{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34251, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9196227847362122e-05, "grad_norm": 1.8227965050993833, "learning_rate": 5.8377116170461185e-09, "loss": 1.1658, "step": 1 }, { "epoch": 5.8392455694724244e-05, "grad_norm": 1.8990170420791797, "learning_rate": 1.1675423234092237e-08, "loss": 1.0618, "step": 2 }, { "epoch": 8.758868354208637e-05, "grad_norm": 1.8115085806591897, "learning_rate": 1.7513134851138356e-08, "loss": 1.1539, "step": 3 }, { "epoch": 0.00011678491138944849, "grad_norm": 1.9249454129553887, "learning_rate": 2.3350846468184474e-08, "loss": 1.2511, "step": 4 }, { "epoch": 0.0001459811392368106, "grad_norm": 1.8200924442664077, "learning_rate": 2.918855808523059e-08, "loss": 1.2494, "step": 5 }, { "epoch": 0.00017517736708417273, "grad_norm": 1.8548081040661013, "learning_rate": 3.502626970227671e-08, "loss": 1.3521, "step": 6 }, { "epoch": 0.00020437359493153485, "grad_norm": 2.092170875441007, "learning_rate": 4.086398131932283e-08, "loss": 1.3483, "step": 7 }, { "epoch": 0.00023356982277889697, "grad_norm": 1.6894694732330184, "learning_rate": 4.670169293636895e-08, "loss": 1.1197, "step": 8 }, { "epoch": 0.0002627660506262591, "grad_norm": 1.8082668368271368, "learning_rate": 5.2539404553415065e-08, "loss": 1.1901, "step": 9 }, { "epoch": 0.0002919622784736212, "grad_norm": 2.1405395937127376, "learning_rate": 5.837711617046118e-08, "loss": 1.2931, "step": 10 }, { "epoch": 0.00032115850632098334, "grad_norm": 1.9193838879975844, "learning_rate": 6.42148277875073e-08, "loss": 1.0624, "step": 11 }, { "epoch": 0.00035035473416834546, "grad_norm": 1.712633056292196, "learning_rate": 7.005253940455342e-08, "loss": 1.1184, "step": 12 }, { "epoch": 0.0003795509620157076, "grad_norm": 2.1467193057222054, "learning_rate": 7.589025102159954e-08, "loss": 1.2312, "step": 13 }, { "epoch": 0.0004087471898630697, "grad_norm": 2.2743823240500314, "learning_rate": 8.172796263864566e-08, "loss": 1.2128, "step": 14 }, { "epoch": 0.00043794341771043183, "grad_norm": 1.9212335581657247, "learning_rate": 8.756567425569177e-08, "loss": 1.1206, "step": 15 }, { "epoch": 0.00046713964555779395, "grad_norm": 1.7309627730170731, "learning_rate": 9.34033858727379e-08, "loss": 1.203, "step": 16 }, { "epoch": 0.000496335873405156, "grad_norm": 1.6690461187161556, "learning_rate": 9.924109748978402e-08, "loss": 1.0673, "step": 17 }, { "epoch": 0.0005255321012525182, "grad_norm": 1.88718965672847, "learning_rate": 1.0507880910683013e-07, "loss": 1.1872, "step": 18 }, { "epoch": 0.0005547283290998803, "grad_norm": 1.8974518920443837, "learning_rate": 1.1091652072387625e-07, "loss": 1.2765, "step": 19 }, { "epoch": 0.0005839245569472424, "grad_norm": 1.8869562122135572, "learning_rate": 1.1675423234092237e-07, "loss": 1.2386, "step": 20 }, { "epoch": 0.0006131207847946045, "grad_norm": 1.8347106003583862, "learning_rate": 1.2259194395796848e-07, "loss": 1.2225, "step": 21 }, { "epoch": 0.0006423170126419667, "grad_norm": 2.0050488904193475, "learning_rate": 1.284296555750146e-07, "loss": 1.2702, "step": 22 }, { "epoch": 0.0006715132404893287, "grad_norm": 1.860116627017748, "learning_rate": 1.3426736719206073e-07, "loss": 1.2417, "step": 23 }, { "epoch": 0.0007007094683366909, "grad_norm": 1.7363741307732015, "learning_rate": 1.4010507880910685e-07, "loss": 1.1251, "step": 24 }, { "epoch": 0.000729905696184053, "grad_norm": 1.9943510168705594, "learning_rate": 1.4594279042615297e-07, "loss": 1.145, "step": 25 }, { "epoch": 0.0007591019240314152, "grad_norm": 2.107888594647293, "learning_rate": 1.5178050204319907e-07, "loss": 1.34, "step": 26 }, { "epoch": 0.0007882981518787772, "grad_norm": 2.291097153786375, "learning_rate": 1.576182136602452e-07, "loss": 1.3656, "step": 27 }, { "epoch": 0.0008174943797261394, "grad_norm": 1.7833691451623168, "learning_rate": 1.6345592527729132e-07, "loss": 1.1555, "step": 28 }, { "epoch": 0.0008466906075735015, "grad_norm": 1.981379263539085, "learning_rate": 1.6929363689433744e-07, "loss": 1.1715, "step": 29 }, { "epoch": 0.0008758868354208637, "grad_norm": 1.8546861021917735, "learning_rate": 1.7513134851138354e-07, "loss": 1.2205, "step": 30 }, { "epoch": 0.0009050830632682257, "grad_norm": 1.7428881082591574, "learning_rate": 1.8096906012842967e-07, "loss": 1.1393, "step": 31 }, { "epoch": 0.0009342792911155879, "grad_norm": 1.7989244929286232, "learning_rate": 1.868067717454758e-07, "loss": 1.2035, "step": 32 }, { "epoch": 0.00096347551896295, "grad_norm": 1.914156817730302, "learning_rate": 1.9264448336252192e-07, "loss": 1.3285, "step": 33 }, { "epoch": 0.000992671746810312, "grad_norm": 1.863384359944999, "learning_rate": 1.9848219497956804e-07, "loss": 1.1907, "step": 34 }, { "epoch": 0.0010218679746576743, "grad_norm": 2.0429089542215046, "learning_rate": 2.0431990659661414e-07, "loss": 1.356, "step": 35 }, { "epoch": 0.0010510642025050364, "grad_norm": 1.8069550944959627, "learning_rate": 2.1015761821366026e-07, "loss": 1.1975, "step": 36 }, { "epoch": 0.0010802604303523985, "grad_norm": 2.1607632957444376, "learning_rate": 2.1599532983070639e-07, "loss": 1.4072, "step": 37 }, { "epoch": 0.0011094566581997605, "grad_norm": 1.8283270860396785, "learning_rate": 2.218330414477525e-07, "loss": 1.1012, "step": 38 }, { "epoch": 0.0011386528860471228, "grad_norm": 1.7874126067144478, "learning_rate": 2.276707530647986e-07, "loss": 1.2201, "step": 39 }, { "epoch": 0.0011678491138944849, "grad_norm": 1.791442233218619, "learning_rate": 2.3350846468184473e-07, "loss": 1.0392, "step": 40 }, { "epoch": 0.001197045341741847, "grad_norm": 2.0212895008966316, "learning_rate": 2.3934617629889086e-07, "loss": 1.2345, "step": 41 }, { "epoch": 0.001226241569589209, "grad_norm": 1.7556282749031462, "learning_rate": 2.4518388791593695e-07, "loss": 1.1782, "step": 42 }, { "epoch": 0.0012554377974365713, "grad_norm": 1.6160790353607735, "learning_rate": 2.510215995329831e-07, "loss": 1.1719, "step": 43 }, { "epoch": 0.0012846340252839334, "grad_norm": 1.8821595525733568, "learning_rate": 2.568593111500292e-07, "loss": 1.2081, "step": 44 }, { "epoch": 0.0013138302531312954, "grad_norm": 2.1653766293049665, "learning_rate": 2.6269702276707535e-07, "loss": 1.3154, "step": 45 }, { "epoch": 0.0013430264809786575, "grad_norm": 2.066128724626591, "learning_rate": 2.6853473438412145e-07, "loss": 1.175, "step": 46 }, { "epoch": 0.0013722227088260198, "grad_norm": 1.8133505690235268, "learning_rate": 2.7437244600116755e-07, "loss": 1.0951, "step": 47 }, { "epoch": 0.0014014189366733818, "grad_norm": 1.9163614380704521, "learning_rate": 2.802101576182137e-07, "loss": 1.2647, "step": 48 }, { "epoch": 0.001430615164520744, "grad_norm": 1.696321610911465, "learning_rate": 2.860478692352598e-07, "loss": 1.135, "step": 49 }, { "epoch": 0.001459811392368106, "grad_norm": 1.8600178371820175, "learning_rate": 2.9188558085230595e-07, "loss": 1.1989, "step": 50 }, { "epoch": 0.0014890076202154683, "grad_norm": 2.0691443141336814, "learning_rate": 2.9772329246935205e-07, "loss": 1.3653, "step": 51 }, { "epoch": 0.0015182038480628303, "grad_norm": 1.9856413765332843, "learning_rate": 3.0356100408639814e-07, "loss": 1.2669, "step": 52 }, { "epoch": 0.0015474000759101924, "grad_norm": 1.8278614149343815, "learning_rate": 3.093987157034443e-07, "loss": 1.1984, "step": 53 }, { "epoch": 0.0015765963037575545, "grad_norm": 4.766526897938263, "learning_rate": 3.152364273204904e-07, "loss": 1.2052, "step": 54 }, { "epoch": 0.0016057925316049165, "grad_norm": 1.938684027273011, "learning_rate": 3.210741389375365e-07, "loss": 1.1757, "step": 55 }, { "epoch": 0.0016349887594522788, "grad_norm": 1.8616046295733517, "learning_rate": 3.2691185055458264e-07, "loss": 1.2346, "step": 56 }, { "epoch": 0.0016641849872996409, "grad_norm": 1.9340429709602287, "learning_rate": 3.3274956217162874e-07, "loss": 1.2829, "step": 57 }, { "epoch": 0.001693381215147003, "grad_norm": 1.9074697107189484, "learning_rate": 3.385872737886749e-07, "loss": 1.1825, "step": 58 }, { "epoch": 0.001722577442994365, "grad_norm": 1.7003933600459653, "learning_rate": 3.44424985405721e-07, "loss": 1.1543, "step": 59 }, { "epoch": 0.0017517736708417273, "grad_norm": 1.8135883688799974, "learning_rate": 3.502626970227671e-07, "loss": 1.2038, "step": 60 }, { "epoch": 0.0017809698986890894, "grad_norm": 1.7402288813815288, "learning_rate": 3.5610040863981324e-07, "loss": 1.2275, "step": 61 }, { "epoch": 0.0018101661265364514, "grad_norm": 1.7620001087412027, "learning_rate": 3.6193812025685933e-07, "loss": 1.2217, "step": 62 }, { "epoch": 0.0018393623543838135, "grad_norm": 1.968553996332111, "learning_rate": 3.677758318739055e-07, "loss": 1.2868, "step": 63 }, { "epoch": 0.0018685585822311758, "grad_norm": 1.997683220486403, "learning_rate": 3.736135434909516e-07, "loss": 1.2396, "step": 64 }, { "epoch": 0.0018977548100785379, "grad_norm": 1.9353172408717114, "learning_rate": 3.794512551079977e-07, "loss": 1.2988, "step": 65 }, { "epoch": 0.0019269510379259, "grad_norm": 2.0197195581202467, "learning_rate": 3.8528896672504383e-07, "loss": 1.1609, "step": 66 }, { "epoch": 0.001956147265773262, "grad_norm": 1.624579456443438, "learning_rate": 3.9112667834208993e-07, "loss": 1.0286, "step": 67 }, { "epoch": 0.001985343493620624, "grad_norm": 1.8052961706408166, "learning_rate": 3.969643899591361e-07, "loss": 1.2236, "step": 68 }, { "epoch": 0.002014539721467986, "grad_norm": 1.976868590594262, "learning_rate": 4.028021015761822e-07, "loss": 1.3276, "step": 69 }, { "epoch": 0.0020437359493153486, "grad_norm": 1.836771199894099, "learning_rate": 4.086398131932283e-07, "loss": 1.1732, "step": 70 }, { "epoch": 0.0020729321771627107, "grad_norm": 1.7996395197314914, "learning_rate": 4.144775248102744e-07, "loss": 1.1347, "step": 71 }, { "epoch": 0.0021021284050100728, "grad_norm": 1.6884154312194182, "learning_rate": 4.203152364273205e-07, "loss": 1.2029, "step": 72 }, { "epoch": 0.002131324632857435, "grad_norm": 1.7637517419200428, "learning_rate": 4.261529480443666e-07, "loss": 1.1319, "step": 73 }, { "epoch": 0.002160520860704797, "grad_norm": 1.7091275395921166, "learning_rate": 4.3199065966141277e-07, "loss": 1.2339, "step": 74 }, { "epoch": 0.002189717088552159, "grad_norm": 1.7118928030286933, "learning_rate": 4.3782837127845887e-07, "loss": 1.011, "step": 75 }, { "epoch": 0.002218913316399521, "grad_norm": 1.6846640402056652, "learning_rate": 4.43666082895505e-07, "loss": 1.1558, "step": 76 }, { "epoch": 0.002248109544246883, "grad_norm": 1.79612899089581, "learning_rate": 4.495037945125511e-07, "loss": 1.201, "step": 77 }, { "epoch": 0.0022773057720942456, "grad_norm": 1.7649828644626553, "learning_rate": 4.553415061295972e-07, "loss": 1.2441, "step": 78 }, { "epoch": 0.0023065019999416077, "grad_norm": 1.7717267254214526, "learning_rate": 4.6117921774664337e-07, "loss": 1.2299, "step": 79 }, { "epoch": 0.0023356982277889697, "grad_norm": 1.6929512837143679, "learning_rate": 4.6701692936368946e-07, "loss": 1.1, "step": 80 }, { "epoch": 0.002364894455636332, "grad_norm": 1.7207398557859221, "learning_rate": 4.728546409807356e-07, "loss": 1.2405, "step": 81 }, { "epoch": 0.002394090683483694, "grad_norm": 1.6092917826758226, "learning_rate": 4.786923525977817e-07, "loss": 1.0692, "step": 82 }, { "epoch": 0.002423286911331056, "grad_norm": 1.6963581357086637, "learning_rate": 4.845300642148279e-07, "loss": 1.1201, "step": 83 }, { "epoch": 0.002452483139178418, "grad_norm": 1.664981305092467, "learning_rate": 4.903677758318739e-07, "loss": 1.1958, "step": 84 }, { "epoch": 0.00248167936702578, "grad_norm": 1.820816121962994, "learning_rate": 4.962054874489201e-07, "loss": 1.1025, "step": 85 }, { "epoch": 0.0025108755948731426, "grad_norm": 1.9165094493384172, "learning_rate": 5.020431990659662e-07, "loss": 1.1471, "step": 86 }, { "epoch": 0.0025400718227205047, "grad_norm": 1.7129106895822028, "learning_rate": 5.078809106830123e-07, "loss": 1.2285, "step": 87 }, { "epoch": 0.0025692680505678667, "grad_norm": 1.6249544106281584, "learning_rate": 5.137186223000584e-07, "loss": 1.1316, "step": 88 }, { "epoch": 0.002598464278415229, "grad_norm": 1.8097159870703767, "learning_rate": 5.195563339171046e-07, "loss": 1.0751, "step": 89 }, { "epoch": 0.002627660506262591, "grad_norm": 1.6883853729529903, "learning_rate": 5.253940455341507e-07, "loss": 1.1716, "step": 90 }, { "epoch": 0.002656856734109953, "grad_norm": 1.8386655388869897, "learning_rate": 5.312317571511968e-07, "loss": 1.2576, "step": 91 }, { "epoch": 0.002686052961957315, "grad_norm": 2.0515518384152642, "learning_rate": 5.370694687682429e-07, "loss": 1.1308, "step": 92 }, { "epoch": 0.002715249189804677, "grad_norm": 1.7639771629021626, "learning_rate": 5.42907180385289e-07, "loss": 1.2315, "step": 93 }, { "epoch": 0.0027444454176520396, "grad_norm": 1.6062431294214667, "learning_rate": 5.487448920023351e-07, "loss": 1.1351, "step": 94 }, { "epoch": 0.0027736416454994016, "grad_norm": 1.5801289538853962, "learning_rate": 5.545826036193812e-07, "loss": 1.0574, "step": 95 }, { "epoch": 0.0028028378733467637, "grad_norm": 1.8170587368146467, "learning_rate": 5.604203152364274e-07, "loss": 1.1452, "step": 96 }, { "epoch": 0.0028320341011941258, "grad_norm": 1.7564832166246473, "learning_rate": 5.662580268534734e-07, "loss": 1.1228, "step": 97 }, { "epoch": 0.002861230329041488, "grad_norm": 1.5097893399230775, "learning_rate": 5.720957384705196e-07, "loss": 1.0269, "step": 98 }, { "epoch": 0.00289042655688885, "grad_norm": 1.5219736807018867, "learning_rate": 5.779334500875657e-07, "loss": 1.0336, "step": 99 }, { "epoch": 0.002919622784736212, "grad_norm": 1.6922825639524792, "learning_rate": 5.837711617046119e-07, "loss": 1.0924, "step": 100 }, { "epoch": 0.002948819012583574, "grad_norm": 2.1008927859201716, "learning_rate": 5.896088733216579e-07, "loss": 1.2413, "step": 101 }, { "epoch": 0.0029780152404309365, "grad_norm": 2.4482485651811796, "learning_rate": 5.954465849387041e-07, "loss": 1.1981, "step": 102 }, { "epoch": 0.0030072114682782986, "grad_norm": 1.5139016795151863, "learning_rate": 6.012842965557502e-07, "loss": 1.0101, "step": 103 }, { "epoch": 0.0030364076961256607, "grad_norm": 1.884923414422728, "learning_rate": 6.071220081727963e-07, "loss": 1.3037, "step": 104 }, { "epoch": 0.0030656039239730227, "grad_norm": 1.7663303284431033, "learning_rate": 6.129597197898424e-07, "loss": 1.1915, "step": 105 }, { "epoch": 0.003094800151820385, "grad_norm": 1.7232634117893169, "learning_rate": 6.187974314068886e-07, "loss": 1.1954, "step": 106 }, { "epoch": 0.003123996379667747, "grad_norm": 1.7469914699986069, "learning_rate": 6.246351430239346e-07, "loss": 1.1685, "step": 107 }, { "epoch": 0.003153192607515109, "grad_norm": 1.8181370069780205, "learning_rate": 6.304728546409808e-07, "loss": 1.1888, "step": 108 }, { "epoch": 0.003182388835362471, "grad_norm": 1.5623358349245473, "learning_rate": 6.363105662580268e-07, "loss": 1.1516, "step": 109 }, { "epoch": 0.003211585063209833, "grad_norm": 1.6745025719908218, "learning_rate": 6.42148277875073e-07, "loss": 1.0834, "step": 110 }, { "epoch": 0.0032407812910571956, "grad_norm": 1.7473862452177025, "learning_rate": 6.47985989492119e-07, "loss": 1.1449, "step": 111 }, { "epoch": 0.0032699775189045576, "grad_norm": 1.5646056420512227, "learning_rate": 6.538237011091653e-07, "loss": 1.1313, "step": 112 }, { "epoch": 0.0032991737467519197, "grad_norm": 1.5640603853014856, "learning_rate": 6.596614127262113e-07, "loss": 1.1959, "step": 113 }, { "epoch": 0.0033283699745992818, "grad_norm": 1.505405145737405, "learning_rate": 6.654991243432575e-07, "loss": 1.083, "step": 114 }, { "epoch": 0.003357566202446644, "grad_norm": 1.5753569356606179, "learning_rate": 6.713368359603035e-07, "loss": 1.1199, "step": 115 }, { "epoch": 0.003386762430294006, "grad_norm": 1.5026576223541221, "learning_rate": 6.771745475773498e-07, "loss": 1.1185, "step": 116 }, { "epoch": 0.003415958658141368, "grad_norm": 1.701737222865328, "learning_rate": 6.830122591943958e-07, "loss": 1.1572, "step": 117 }, { "epoch": 0.00344515488598873, "grad_norm": 1.429466415047004, "learning_rate": 6.88849970811442e-07, "loss": 1.1678, "step": 118 }, { "epoch": 0.0034743511138360926, "grad_norm": 1.5477698090755236, "learning_rate": 6.94687682428488e-07, "loss": 1.1609, "step": 119 }, { "epoch": 0.0035035473416834546, "grad_norm": 1.4888268724790004, "learning_rate": 7.005253940455342e-07, "loss": 1.0776, "step": 120 }, { "epoch": 0.0035327435695308167, "grad_norm": 2.004359902951542, "learning_rate": 7.063631056625802e-07, "loss": 1.0907, "step": 121 }, { "epoch": 0.0035619397973781788, "grad_norm": 1.5881162590673976, "learning_rate": 7.122008172796265e-07, "loss": 1.232, "step": 122 }, { "epoch": 0.003591136025225541, "grad_norm": 1.6514094879957337, "learning_rate": 7.180385288966725e-07, "loss": 1.1908, "step": 123 }, { "epoch": 0.003620332253072903, "grad_norm": 1.7754019171064972, "learning_rate": 7.238762405137187e-07, "loss": 1.1964, "step": 124 }, { "epoch": 0.003649528480920265, "grad_norm": 1.4588239236097347, "learning_rate": 7.297139521307647e-07, "loss": 1.1495, "step": 125 }, { "epoch": 0.003678724708767627, "grad_norm": 1.6411250856276052, "learning_rate": 7.35551663747811e-07, "loss": 1.2413, "step": 126 }, { "epoch": 0.0037079209366149895, "grad_norm": 1.4789036239552769, "learning_rate": 7.41389375364857e-07, "loss": 1.1565, "step": 127 }, { "epoch": 0.0037371171644623516, "grad_norm": 1.3987171133477911, "learning_rate": 7.472270869819032e-07, "loss": 1.0804, "step": 128 }, { "epoch": 0.0037663133923097137, "grad_norm": 1.5253352863466278, "learning_rate": 7.530647985989492e-07, "loss": 1.1247, "step": 129 }, { "epoch": 0.0037955096201570757, "grad_norm": 1.3693087512813684, "learning_rate": 7.589025102159954e-07, "loss": 1.1056, "step": 130 }, { "epoch": 0.003824705848004438, "grad_norm": 1.4653092440845596, "learning_rate": 7.647402218330414e-07, "loss": 1.0053, "step": 131 }, { "epoch": 0.0038539020758518, "grad_norm": 2.075525653555299, "learning_rate": 7.705779334500877e-07, "loss": 1.1359, "step": 132 }, { "epoch": 0.003883098303699162, "grad_norm": 1.265965669938543, "learning_rate": 7.764156450671337e-07, "loss": 1.0402, "step": 133 }, { "epoch": 0.003912294531546524, "grad_norm": 1.441457339676015, "learning_rate": 7.822533566841799e-07, "loss": 1.0906, "step": 134 }, { "epoch": 0.003941490759393886, "grad_norm": 1.3413746684121863, "learning_rate": 7.880910683012259e-07, "loss": 1.125, "step": 135 }, { "epoch": 0.003970686987241248, "grad_norm": 1.6603068325655053, "learning_rate": 7.939287799182722e-07, "loss": 1.2127, "step": 136 }, { "epoch": 0.00399988321508861, "grad_norm": 1.304810796221854, "learning_rate": 7.997664915353182e-07, "loss": 1.1187, "step": 137 }, { "epoch": 0.004029079442935972, "grad_norm": 1.5082994312897942, "learning_rate": 8.056042031523644e-07, "loss": 1.0629, "step": 138 }, { "epoch": 0.004058275670783335, "grad_norm": 1.4526461270352147, "learning_rate": 8.114419147694104e-07, "loss": 1.0867, "step": 139 }, { "epoch": 0.004087471898630697, "grad_norm": 1.5247114625097309, "learning_rate": 8.172796263864565e-07, "loss": 1.1826, "step": 140 }, { "epoch": 0.004116668126478059, "grad_norm": 1.397624467429916, "learning_rate": 8.231173380035026e-07, "loss": 1.1316, "step": 141 }, { "epoch": 0.004145864354325421, "grad_norm": 1.3042090766108523, "learning_rate": 8.289550496205489e-07, "loss": 1.1156, "step": 142 }, { "epoch": 0.0041750605821727835, "grad_norm": 1.3027640040585695, "learning_rate": 8.347927612375949e-07, "loss": 1.1052, "step": 143 }, { "epoch": 0.0042042568100201455, "grad_norm": 1.4882341392042433, "learning_rate": 8.40630472854641e-07, "loss": 1.1408, "step": 144 }, { "epoch": 0.004233453037867508, "grad_norm": 1.4815940536587082, "learning_rate": 8.464681844716871e-07, "loss": 1.1338, "step": 145 }, { "epoch": 0.00426264926571487, "grad_norm": 1.267832922514256, "learning_rate": 8.523058960887332e-07, "loss": 1.0486, "step": 146 }, { "epoch": 0.004291845493562232, "grad_norm": 1.4448822708972493, "learning_rate": 8.581436077057794e-07, "loss": 0.9908, "step": 147 }, { "epoch": 0.004321041721409594, "grad_norm": 1.2636578554788374, "learning_rate": 8.639813193228255e-07, "loss": 1.0963, "step": 148 }, { "epoch": 0.004350237949256956, "grad_norm": 1.5121051846987095, "learning_rate": 8.698190309398716e-07, "loss": 1.1039, "step": 149 }, { "epoch": 0.004379434177104318, "grad_norm": 1.5449711763194895, "learning_rate": 8.756567425569177e-07, "loss": 1.0776, "step": 150 }, { "epoch": 0.00440863040495168, "grad_norm": 1.2100952716801547, "learning_rate": 8.814944541739638e-07, "loss": 0.9685, "step": 151 }, { "epoch": 0.004437826632799042, "grad_norm": 1.4162575061354905, "learning_rate": 8.8733216579101e-07, "loss": 1.1947, "step": 152 }, { "epoch": 0.004467022860646404, "grad_norm": 1.2207480588598472, "learning_rate": 8.931698774080561e-07, "loss": 1.1048, "step": 153 }, { "epoch": 0.004496219088493766, "grad_norm": 1.311353088952776, "learning_rate": 8.990075890251022e-07, "loss": 1.1495, "step": 154 }, { "epoch": 0.004525415316341129, "grad_norm": 1.3649033627560465, "learning_rate": 9.048453006421483e-07, "loss": 1.0664, "step": 155 }, { "epoch": 0.004554611544188491, "grad_norm": 1.5373947465435485, "learning_rate": 9.106830122591944e-07, "loss": 1.1739, "step": 156 }, { "epoch": 0.004583807772035853, "grad_norm": 1.713961096120162, "learning_rate": 9.165207238762405e-07, "loss": 1.0138, "step": 157 }, { "epoch": 0.004613003999883215, "grad_norm": 1.2219126785601233, "learning_rate": 9.223584354932867e-07, "loss": 1.0098, "step": 158 }, { "epoch": 0.004642200227730577, "grad_norm": 1.4798368002364835, "learning_rate": 9.281961471103328e-07, "loss": 1.1186, "step": 159 }, { "epoch": 0.0046713964555779395, "grad_norm": 1.1535239152179488, "learning_rate": 9.340338587273789e-07, "loss": 0.9401, "step": 160 }, { "epoch": 0.0047005926834253016, "grad_norm": 1.1874921699018393, "learning_rate": 9.39871570344425e-07, "loss": 1.0618, "step": 161 }, { "epoch": 0.004729788911272664, "grad_norm": 1.2096881500953156, "learning_rate": 9.457092819614712e-07, "loss": 1.1802, "step": 162 }, { "epoch": 0.004758985139120026, "grad_norm": 1.538130171281637, "learning_rate": 9.515469935785173e-07, "loss": 1.1705, "step": 163 }, { "epoch": 0.004788181366967388, "grad_norm": 1.153488009186267, "learning_rate": 9.573847051955634e-07, "loss": 1.0475, "step": 164 }, { "epoch": 0.00481737759481475, "grad_norm": 1.213131785797461, "learning_rate": 9.632224168126095e-07, "loss": 1.0749, "step": 165 }, { "epoch": 0.004846573822662112, "grad_norm": 1.3011809189542585, "learning_rate": 9.690601284296557e-07, "loss": 1.1061, "step": 166 }, { "epoch": 0.004875770050509474, "grad_norm": 1.021450127341145, "learning_rate": 9.748978400467018e-07, "loss": 0.8899, "step": 167 }, { "epoch": 0.004904966278356836, "grad_norm": 1.341671309344497, "learning_rate": 9.807355516637478e-07, "loss": 1.1796, "step": 168 }, { "epoch": 0.004934162506204198, "grad_norm": 1.249310518230772, "learning_rate": 9.865732632807939e-07, "loss": 1.0805, "step": 169 }, { "epoch": 0.00496335873405156, "grad_norm": 1.6523824329285302, "learning_rate": 9.924109748978401e-07, "loss": 0.9523, "step": 170 }, { "epoch": 0.004992554961898922, "grad_norm": 1.201647375219699, "learning_rate": 9.982486865148862e-07, "loss": 1.1316, "step": 171 }, { "epoch": 0.005021751189746285, "grad_norm": 1.2863404893096486, "learning_rate": 1.0040863981319324e-06, "loss": 1.0677, "step": 172 }, { "epoch": 0.005050947417593647, "grad_norm": 1.2027557831891469, "learning_rate": 1.0099241097489785e-06, "loss": 1.0582, "step": 173 }, { "epoch": 0.005080143645441009, "grad_norm": 1.3669883337840578, "learning_rate": 1.0157618213660245e-06, "loss": 1.0745, "step": 174 }, { "epoch": 0.005109339873288371, "grad_norm": 1.3790210158876124, "learning_rate": 1.0215995329830706e-06, "loss": 1.0281, "step": 175 }, { "epoch": 0.0051385361011357334, "grad_norm": 1.1341224563133376, "learning_rate": 1.0274372446001168e-06, "loss": 1.0471, "step": 176 }, { "epoch": 0.0051677323289830955, "grad_norm": 1.0130612511221633, "learning_rate": 1.0332749562171629e-06, "loss": 0.989, "step": 177 }, { "epoch": 0.005196928556830458, "grad_norm": 1.3895402075442689, "learning_rate": 1.0391126678342091e-06, "loss": 1.0147, "step": 178 }, { "epoch": 0.00522612478467782, "grad_norm": 1.1104867542625296, "learning_rate": 1.0449503794512552e-06, "loss": 1.0232, "step": 179 }, { "epoch": 0.005255321012525182, "grad_norm": 1.1034392822967654, "learning_rate": 1.0507880910683014e-06, "loss": 0.9047, "step": 180 }, { "epoch": 0.005284517240372544, "grad_norm": 1.0762295033925475, "learning_rate": 1.0566258026853475e-06, "loss": 0.901, "step": 181 }, { "epoch": 0.005313713468219906, "grad_norm": 1.202007079130319, "learning_rate": 1.0624635143023935e-06, "loss": 1.041, "step": 182 }, { "epoch": 0.005342909696067268, "grad_norm": 1.104483803652196, "learning_rate": 1.0683012259194395e-06, "loss": 1.1275, "step": 183 }, { "epoch": 0.00537210592391463, "grad_norm": 0.9929362298478933, "learning_rate": 1.0741389375364858e-06, "loss": 0.9152, "step": 184 }, { "epoch": 0.005401302151761992, "grad_norm": 1.096024266285757, "learning_rate": 1.0799766491535318e-06, "loss": 1.0513, "step": 185 }, { "epoch": 0.005430498379609354, "grad_norm": 1.3220337978885586, "learning_rate": 1.085814360770578e-06, "loss": 1.0551, "step": 186 }, { "epoch": 0.005459694607456716, "grad_norm": 1.0054373733196793, "learning_rate": 1.0916520723876242e-06, "loss": 0.9396, "step": 187 }, { "epoch": 0.005488890835304079, "grad_norm": 1.1005609962131577, "learning_rate": 1.0974897840046702e-06, "loss": 1.0564, "step": 188 }, { "epoch": 0.005518087063151441, "grad_norm": 1.007423523943859, "learning_rate": 1.1033274956217162e-06, "loss": 0.961, "step": 189 }, { "epoch": 0.005547283290998803, "grad_norm": 1.293837449153261, "learning_rate": 1.1091652072387625e-06, "loss": 1.1618, "step": 190 }, { "epoch": 0.005576479518846165, "grad_norm": 1.0706740055815076, "learning_rate": 1.1150029188558085e-06, "loss": 0.9621, "step": 191 }, { "epoch": 0.005605675746693527, "grad_norm": 1.0422582179160513, "learning_rate": 1.1208406304728548e-06, "loss": 1.0393, "step": 192 }, { "epoch": 0.0056348719745408895, "grad_norm": 1.1552855898567864, "learning_rate": 1.1266783420899008e-06, "loss": 1.0614, "step": 193 }, { "epoch": 0.0056640682023882515, "grad_norm": 1.1839108854022455, "learning_rate": 1.1325160537069469e-06, "loss": 1.0495, "step": 194 }, { "epoch": 0.005693264430235614, "grad_norm": 1.7809920158646857, "learning_rate": 1.138353765323993e-06, "loss": 0.9989, "step": 195 }, { "epoch": 0.005722460658082976, "grad_norm": 1.03116479933229, "learning_rate": 1.1441914769410392e-06, "loss": 0.9211, "step": 196 }, { "epoch": 0.005751656885930338, "grad_norm": 0.9737340067759925, "learning_rate": 1.1500291885580852e-06, "loss": 0.9347, "step": 197 }, { "epoch": 0.0057808531137777, "grad_norm": 1.0393735317969686, "learning_rate": 1.1558669001751315e-06, "loss": 1.0122, "step": 198 }, { "epoch": 0.005810049341625062, "grad_norm": 1.3990532974791134, "learning_rate": 1.1617046117921775e-06, "loss": 0.9711, "step": 199 }, { "epoch": 0.005839245569472424, "grad_norm": 1.3433398983784588, "learning_rate": 1.1675423234092238e-06, "loss": 1.0016, "step": 200 }, { "epoch": 0.005868441797319786, "grad_norm": 1.135240476003403, "learning_rate": 1.1733800350262698e-06, "loss": 1.1404, "step": 201 }, { "epoch": 0.005897638025167148, "grad_norm": 1.1446454220887925, "learning_rate": 1.1792177466433159e-06, "loss": 1.0591, "step": 202 }, { "epoch": 0.00592683425301451, "grad_norm": 1.6930409873542742, "learning_rate": 1.185055458260362e-06, "loss": 1.1132, "step": 203 }, { "epoch": 0.005956030480861873, "grad_norm": 1.1181637026348472, "learning_rate": 1.1908931698774082e-06, "loss": 0.9972, "step": 204 }, { "epoch": 0.005985226708709235, "grad_norm": 1.0851559017950787, "learning_rate": 1.1967308814944542e-06, "loss": 1.1516, "step": 205 }, { "epoch": 0.006014422936556597, "grad_norm": 0.9440576356184981, "learning_rate": 1.2025685931115005e-06, "loss": 0.9751, "step": 206 }, { "epoch": 0.006043619164403959, "grad_norm": 1.1682980137093562, "learning_rate": 1.2084063047285465e-06, "loss": 1.1823, "step": 207 }, { "epoch": 0.006072815392251321, "grad_norm": 1.079195146312769, "learning_rate": 1.2142440163455926e-06, "loss": 1.0071, "step": 208 }, { "epoch": 0.006102011620098683, "grad_norm": 1.2575968203465302, "learning_rate": 1.2200817279626386e-06, "loss": 1.0997, "step": 209 }, { "epoch": 0.0061312078479460455, "grad_norm": 1.1694773356313104, "learning_rate": 1.2259194395796849e-06, "loss": 0.897, "step": 210 }, { "epoch": 0.0061604040757934075, "grad_norm": 0.8746209523247543, "learning_rate": 1.231757151196731e-06, "loss": 0.8865, "step": 211 }, { "epoch": 0.00618960030364077, "grad_norm": 1.5678027331782223, "learning_rate": 1.2375948628137772e-06, "loss": 1.0, "step": 212 }, { "epoch": 0.006218796531488132, "grad_norm": 1.0536958969849177, "learning_rate": 1.2434325744308232e-06, "loss": 1.009, "step": 213 }, { "epoch": 0.006247992759335494, "grad_norm": 1.0432180396486774, "learning_rate": 1.2492702860478693e-06, "loss": 1.0565, "step": 214 }, { "epoch": 0.006277188987182856, "grad_norm": 1.210435812557432, "learning_rate": 1.2551079976649155e-06, "loss": 1.0562, "step": 215 }, { "epoch": 0.006306385215030218, "grad_norm": 1.1444212342708973, "learning_rate": 1.2609457092819616e-06, "loss": 1.1423, "step": 216 }, { "epoch": 0.00633558144287758, "grad_norm": 0.9481231404959325, "learning_rate": 1.2667834208990076e-06, "loss": 0.9028, "step": 217 }, { "epoch": 0.006364777670724942, "grad_norm": 0.9245837924310368, "learning_rate": 1.2726211325160537e-06, "loss": 0.8836, "step": 218 }, { "epoch": 0.006393973898572304, "grad_norm": 0.9172071618263796, "learning_rate": 1.2784588441331e-06, "loss": 0.8381, "step": 219 }, { "epoch": 0.006423170126419666, "grad_norm": 1.1425156116518584, "learning_rate": 1.284296555750146e-06, "loss": 1.108, "step": 220 }, { "epoch": 0.006452366354267029, "grad_norm": 0.9803276398229784, "learning_rate": 1.290134267367192e-06, "loss": 1.0158, "step": 221 }, { "epoch": 0.006481562582114391, "grad_norm": 1.1122284488034206, "learning_rate": 1.295971978984238e-06, "loss": 0.9444, "step": 222 }, { "epoch": 0.006510758809961753, "grad_norm": 1.0372520525618285, "learning_rate": 1.3018096906012845e-06, "loss": 1.0536, "step": 223 }, { "epoch": 0.006539955037809115, "grad_norm": 1.0340232620781498, "learning_rate": 1.3076474022183306e-06, "loss": 0.9218, "step": 224 }, { "epoch": 0.006569151265656477, "grad_norm": 0.9858282971425091, "learning_rate": 1.3134851138353766e-06, "loss": 0.9304, "step": 225 }, { "epoch": 0.006598347493503839, "grad_norm": 0.9585176758935511, "learning_rate": 1.3193228254524227e-06, "loss": 0.9546, "step": 226 }, { "epoch": 0.0066275437213512015, "grad_norm": 0.9223181288006701, "learning_rate": 1.325160537069469e-06, "loss": 0.9344, "step": 227 }, { "epoch": 0.0066567399491985636, "grad_norm": 1.1720760610373568, "learning_rate": 1.330998248686515e-06, "loss": 1.0167, "step": 228 }, { "epoch": 0.006685936177045926, "grad_norm": 0.9931724759857586, "learning_rate": 1.336835960303561e-06, "loss": 1.0718, "step": 229 }, { "epoch": 0.006715132404893288, "grad_norm": 1.0440556052570058, "learning_rate": 1.342673671920607e-06, "loss": 0.9934, "step": 230 }, { "epoch": 0.00674432863274065, "grad_norm": 1.1277399466933355, "learning_rate": 1.3485113835376535e-06, "loss": 1.0714, "step": 231 }, { "epoch": 0.006773524860588012, "grad_norm": 0.8998121078855315, "learning_rate": 1.3543490951546996e-06, "loss": 0.9417, "step": 232 }, { "epoch": 0.006802721088435374, "grad_norm": 0.8771264454938926, "learning_rate": 1.3601868067717456e-06, "loss": 0.8881, "step": 233 }, { "epoch": 0.006831917316282736, "grad_norm": 0.9306270708271693, "learning_rate": 1.3660245183887916e-06, "loss": 0.907, "step": 234 }, { "epoch": 0.006861113544130098, "grad_norm": 0.9367129609252215, "learning_rate": 1.371862230005838e-06, "loss": 0.8592, "step": 235 }, { "epoch": 0.00689030977197746, "grad_norm": 1.0178990642569605, "learning_rate": 1.377699941622884e-06, "loss": 0.8741, "step": 236 }, { "epoch": 0.006919505999824823, "grad_norm": 0.9851171836475681, "learning_rate": 1.38353765323993e-06, "loss": 1.0135, "step": 237 }, { "epoch": 0.006948702227672185, "grad_norm": 0.9754591986922079, "learning_rate": 1.389375364856976e-06, "loss": 0.9919, "step": 238 }, { "epoch": 0.006977898455519547, "grad_norm": 1.0091796386620704, "learning_rate": 1.3952130764740223e-06, "loss": 0.9128, "step": 239 }, { "epoch": 0.007007094683366909, "grad_norm": 1.1401642273424255, "learning_rate": 1.4010507880910683e-06, "loss": 1.0727, "step": 240 }, { "epoch": 0.007036290911214271, "grad_norm": 1.3097384528900937, "learning_rate": 1.4068884997081144e-06, "loss": 1.041, "step": 241 }, { "epoch": 0.007065487139061633, "grad_norm": 0.855457832687522, "learning_rate": 1.4127262113251604e-06, "loss": 0.8611, "step": 242 }, { "epoch": 0.0070946833669089954, "grad_norm": 1.0407366917535814, "learning_rate": 1.418563922942207e-06, "loss": 1.02, "step": 243 }, { "epoch": 0.0071238795947563575, "grad_norm": 1.0668460938420834, "learning_rate": 1.424401634559253e-06, "loss": 0.9338, "step": 244 }, { "epoch": 0.00715307582260372, "grad_norm": 1.54457700951801, "learning_rate": 1.430239346176299e-06, "loss": 1.1169, "step": 245 }, { "epoch": 0.007182272050451082, "grad_norm": 1.0060060106460644, "learning_rate": 1.436077057793345e-06, "loss": 1.0343, "step": 246 }, { "epoch": 0.007211468278298444, "grad_norm": 1.0642935062973489, "learning_rate": 1.4419147694103913e-06, "loss": 1.0138, "step": 247 }, { "epoch": 0.007240664506145806, "grad_norm": 0.9524235630556971, "learning_rate": 1.4477524810274373e-06, "loss": 0.8551, "step": 248 }, { "epoch": 0.007269860733993168, "grad_norm": 0.8914914993189518, "learning_rate": 1.4535901926444834e-06, "loss": 0.9306, "step": 249 }, { "epoch": 0.00729905696184053, "grad_norm": 0.9185541654125997, "learning_rate": 1.4594279042615294e-06, "loss": 0.9054, "step": 250 }, { "epoch": 0.007328253189687892, "grad_norm": 1.1594976807897037, "learning_rate": 1.4652656158785759e-06, "loss": 1.0977, "step": 251 }, { "epoch": 0.007357449417535254, "grad_norm": 1.0374610859533293, "learning_rate": 1.471103327495622e-06, "loss": 1.0754, "step": 252 }, { "epoch": 0.007386645645382617, "grad_norm": 0.9060245237515538, "learning_rate": 1.476941039112668e-06, "loss": 0.9727, "step": 253 }, { "epoch": 0.007415841873229979, "grad_norm": 1.165198596106424, "learning_rate": 1.482778750729714e-06, "loss": 1.014, "step": 254 }, { "epoch": 0.007445038101077341, "grad_norm": 0.8229467404247373, "learning_rate": 1.4886164623467603e-06, "loss": 0.7942, "step": 255 }, { "epoch": 0.007474234328924703, "grad_norm": 1.0647391152298022, "learning_rate": 1.4944541739638063e-06, "loss": 0.8258, "step": 256 }, { "epoch": 0.007503430556772065, "grad_norm": 1.2518837820623059, "learning_rate": 1.5002918855808524e-06, "loss": 1.0171, "step": 257 }, { "epoch": 0.007532626784619427, "grad_norm": 0.9822425066083865, "learning_rate": 1.5061295971978984e-06, "loss": 0.8841, "step": 258 }, { "epoch": 0.007561823012466789, "grad_norm": 1.0002818931132882, "learning_rate": 1.5119673088149447e-06, "loss": 0.9225, "step": 259 }, { "epoch": 0.0075910192403141515, "grad_norm": 0.9988362442019577, "learning_rate": 1.5178050204319907e-06, "loss": 0.9657, "step": 260 }, { "epoch": 0.0076202154681615135, "grad_norm": 0.9599252578493955, "learning_rate": 1.5236427320490368e-06, "loss": 1.0073, "step": 261 }, { "epoch": 0.007649411696008876, "grad_norm": 0.9513311828447052, "learning_rate": 1.5294804436660828e-06, "loss": 0.9461, "step": 262 }, { "epoch": 0.007678607923856238, "grad_norm": 1.061907135569551, "learning_rate": 1.5353181552831293e-06, "loss": 1.1033, "step": 263 }, { "epoch": 0.0077078041517036, "grad_norm": 0.7996650941345956, "learning_rate": 1.5411558669001753e-06, "loss": 0.8186, "step": 264 }, { "epoch": 0.007737000379550962, "grad_norm": 1.2011464704361885, "learning_rate": 1.5469935785172214e-06, "loss": 0.8561, "step": 265 }, { "epoch": 0.007766196607398324, "grad_norm": 1.0894008138395876, "learning_rate": 1.5528312901342674e-06, "loss": 1.1118, "step": 266 }, { "epoch": 0.007795392835245686, "grad_norm": 0.9465485419756541, "learning_rate": 1.5586690017513137e-06, "loss": 0.9098, "step": 267 }, { "epoch": 0.007824589063093048, "grad_norm": 1.1706007023922116, "learning_rate": 1.5645067133683597e-06, "loss": 0.8496, "step": 268 }, { "epoch": 0.007853785290940411, "grad_norm": 0.9009689439595011, "learning_rate": 1.5703444249854058e-06, "loss": 0.8626, "step": 269 }, { "epoch": 0.007882981518787772, "grad_norm": 0.8956690570970204, "learning_rate": 1.5761821366024518e-06, "loss": 0.9372, "step": 270 }, { "epoch": 0.007912177746635135, "grad_norm": 1.0074672574026164, "learning_rate": 1.5820198482194983e-06, "loss": 0.8389, "step": 271 }, { "epoch": 0.007941373974482496, "grad_norm": 0.9505759260012017, "learning_rate": 1.5878575598365443e-06, "loss": 0.8915, "step": 272 }, { "epoch": 0.00797057020232986, "grad_norm": 0.901513111567268, "learning_rate": 1.5936952714535904e-06, "loss": 0.8932, "step": 273 }, { "epoch": 0.00799976643017722, "grad_norm": 0.8345171844228145, "learning_rate": 1.5995329830706364e-06, "loss": 0.8479, "step": 274 }, { "epoch": 0.008028962658024583, "grad_norm": 1.0176522674887363, "learning_rate": 1.6053706946876827e-06, "loss": 0.9644, "step": 275 }, { "epoch": 0.008058158885871945, "grad_norm": 1.0414768085232393, "learning_rate": 1.6112084063047287e-06, "loss": 0.9959, "step": 276 }, { "epoch": 0.008087355113719307, "grad_norm": 1.008909786511875, "learning_rate": 1.6170461179217748e-06, "loss": 0.966, "step": 277 }, { "epoch": 0.00811655134156667, "grad_norm": 0.9709855892454993, "learning_rate": 1.6228838295388208e-06, "loss": 0.9113, "step": 278 }, { "epoch": 0.008145747569414032, "grad_norm": 1.4058869075828404, "learning_rate": 1.628721541155867e-06, "loss": 0.9876, "step": 279 }, { "epoch": 0.008174943797261395, "grad_norm": 1.0239110727644312, "learning_rate": 1.634559252772913e-06, "loss": 0.9331, "step": 280 }, { "epoch": 0.008204140025108756, "grad_norm": 1.1462152633793823, "learning_rate": 1.6403969643899591e-06, "loss": 0.9901, "step": 281 }, { "epoch": 0.008233336252956119, "grad_norm": 1.0191120280474677, "learning_rate": 1.6462346760070052e-06, "loss": 1.057, "step": 282 }, { "epoch": 0.00826253248080348, "grad_norm": 0.9914522479386205, "learning_rate": 1.6520723876240517e-06, "loss": 1.0859, "step": 283 }, { "epoch": 0.008291728708650843, "grad_norm": 0.9713987380902325, "learning_rate": 1.6579100992410977e-06, "loss": 0.8752, "step": 284 }, { "epoch": 0.008320924936498204, "grad_norm": 0.9319510629944984, "learning_rate": 1.6637478108581437e-06, "loss": 0.9816, "step": 285 }, { "epoch": 0.008350121164345567, "grad_norm": 1.017881157846065, "learning_rate": 1.6695855224751898e-06, "loss": 0.8215, "step": 286 }, { "epoch": 0.008379317392192928, "grad_norm": 0.9225247721333799, "learning_rate": 1.675423234092236e-06, "loss": 0.8558, "step": 287 }, { "epoch": 0.008408513620040291, "grad_norm": 1.0825522828902616, "learning_rate": 1.681260945709282e-06, "loss": 0.9696, "step": 288 }, { "epoch": 0.008437709847887652, "grad_norm": 1.290623460413601, "learning_rate": 1.6870986573263281e-06, "loss": 0.9231, "step": 289 }, { "epoch": 0.008466906075735015, "grad_norm": 0.8760969737217951, "learning_rate": 1.6929363689433742e-06, "loss": 0.966, "step": 290 }, { "epoch": 0.008496102303582376, "grad_norm": 0.8994220740789761, "learning_rate": 1.6987740805604204e-06, "loss": 0.8076, "step": 291 }, { "epoch": 0.00852529853142974, "grad_norm": 0.9598522778239489, "learning_rate": 1.7046117921774665e-06, "loss": 0.9772, "step": 292 }, { "epoch": 0.0085544947592771, "grad_norm": 0.9419215017829345, "learning_rate": 1.7104495037945127e-06, "loss": 0.8829, "step": 293 }, { "epoch": 0.008583690987124463, "grad_norm": 0.7860580308971029, "learning_rate": 1.7162872154115588e-06, "loss": 0.7902, "step": 294 }, { "epoch": 0.008612887214971826, "grad_norm": 0.9985612782508207, "learning_rate": 1.722124927028605e-06, "loss": 0.863, "step": 295 }, { "epoch": 0.008642083442819188, "grad_norm": 0.9829671226555253, "learning_rate": 1.727962638645651e-06, "loss": 0.938, "step": 296 }, { "epoch": 0.00867127967066655, "grad_norm": 0.9276928811149278, "learning_rate": 1.7338003502626971e-06, "loss": 0.8586, "step": 297 }, { "epoch": 0.008700475898513912, "grad_norm": 0.9146044565293817, "learning_rate": 1.7396380618797432e-06, "loss": 0.8946, "step": 298 }, { "epoch": 0.008729672126361275, "grad_norm": 0.9397715811758675, "learning_rate": 1.7454757734967894e-06, "loss": 0.9335, "step": 299 }, { "epoch": 0.008758868354208636, "grad_norm": 0.9482284042119437, "learning_rate": 1.7513134851138355e-06, "loss": 0.9351, "step": 300 }, { "epoch": 0.008788064582055999, "grad_norm": 0.9219011446547525, "learning_rate": 1.7571511967308815e-06, "loss": 1.0248, "step": 301 }, { "epoch": 0.00881726080990336, "grad_norm": 0.883804356030714, "learning_rate": 1.7629889083479276e-06, "loss": 0.9053, "step": 302 }, { "epoch": 0.008846457037750723, "grad_norm": 1.0104916502688526, "learning_rate": 1.768826619964974e-06, "loss": 1.0491, "step": 303 }, { "epoch": 0.008875653265598084, "grad_norm": 0.9432875965771902, "learning_rate": 1.77466433158202e-06, "loss": 0.9879, "step": 304 }, { "epoch": 0.008904849493445447, "grad_norm": 1.0274344495087475, "learning_rate": 1.7805020431990661e-06, "loss": 0.9691, "step": 305 }, { "epoch": 0.008934045721292808, "grad_norm": 1.0006321104289748, "learning_rate": 1.7863397548161122e-06, "loss": 0.9415, "step": 306 }, { "epoch": 0.008963241949140171, "grad_norm": 0.950466563200022, "learning_rate": 1.7921774664331584e-06, "loss": 0.9656, "step": 307 }, { "epoch": 0.008992438176987532, "grad_norm": 0.9061754231141844, "learning_rate": 1.7980151780502045e-06, "loss": 0.9327, "step": 308 }, { "epoch": 0.009021634404834895, "grad_norm": 0.7830159914466153, "learning_rate": 1.8038528896672505e-06, "loss": 0.8145, "step": 309 }, { "epoch": 0.009050830632682258, "grad_norm": 0.9815141530682946, "learning_rate": 1.8096906012842966e-06, "loss": 0.9676, "step": 310 }, { "epoch": 0.00908002686052962, "grad_norm": 0.9905209049180846, "learning_rate": 1.8155283129013428e-06, "loss": 0.9486, "step": 311 }, { "epoch": 0.009109223088376982, "grad_norm": 0.9016219070452046, "learning_rate": 1.8213660245183889e-06, "loss": 0.847, "step": 312 }, { "epoch": 0.009138419316224344, "grad_norm": 1.1344437064887398, "learning_rate": 1.827203736135435e-06, "loss": 0.9948, "step": 313 }, { "epoch": 0.009167615544071707, "grad_norm": 0.9427262258757622, "learning_rate": 1.833041447752481e-06, "loss": 0.8411, "step": 314 }, { "epoch": 0.009196811771919068, "grad_norm": 0.8959584049945696, "learning_rate": 1.8388791593695274e-06, "loss": 0.9785, "step": 315 }, { "epoch": 0.00922600799976643, "grad_norm": 0.8100394290591116, "learning_rate": 1.8447168709865735e-06, "loss": 0.8501, "step": 316 }, { "epoch": 0.009255204227613792, "grad_norm": 0.8580257725378865, "learning_rate": 1.8505545826036195e-06, "loss": 0.8601, "step": 317 }, { "epoch": 0.009284400455461155, "grad_norm": 0.8655480544238017, "learning_rate": 1.8563922942206656e-06, "loss": 0.8901, "step": 318 }, { "epoch": 0.009313596683308516, "grad_norm": 0.9296632087608777, "learning_rate": 1.8622300058377118e-06, "loss": 0.9606, "step": 319 }, { "epoch": 0.009342792911155879, "grad_norm": 0.8634276876273677, "learning_rate": 1.8680677174547579e-06, "loss": 0.9327, "step": 320 }, { "epoch": 0.00937198913900324, "grad_norm": 0.9253102779342595, "learning_rate": 1.873905429071804e-06, "loss": 0.8126, "step": 321 }, { "epoch": 0.009401185366850603, "grad_norm": 1.0146159038680798, "learning_rate": 1.87974314068885e-06, "loss": 0.865, "step": 322 }, { "epoch": 0.009430381594697964, "grad_norm": 1.125161385885965, "learning_rate": 1.8855808523058964e-06, "loss": 0.9095, "step": 323 }, { "epoch": 0.009459577822545327, "grad_norm": 1.0137812938635957, "learning_rate": 1.8914185639229425e-06, "loss": 0.7997, "step": 324 }, { "epoch": 0.009488774050392688, "grad_norm": 0.8711972649763168, "learning_rate": 1.8972562755399885e-06, "loss": 0.9416, "step": 325 }, { "epoch": 0.009517970278240051, "grad_norm": 0.8311685469062691, "learning_rate": 1.9030939871570345e-06, "loss": 0.7751, "step": 326 }, { "epoch": 0.009547166506087414, "grad_norm": 0.9110115750778978, "learning_rate": 1.9089316987740806e-06, "loss": 0.9111, "step": 327 }, { "epoch": 0.009576362733934776, "grad_norm": 0.895780766044245, "learning_rate": 1.914769410391127e-06, "loss": 0.9241, "step": 328 }, { "epoch": 0.009605558961782138, "grad_norm": 1.0414260783616627, "learning_rate": 1.920607122008173e-06, "loss": 0.9359, "step": 329 }, { "epoch": 0.0096347551896295, "grad_norm": 0.9991790979417394, "learning_rate": 1.926444833625219e-06, "loss": 0.8903, "step": 330 }, { "epoch": 0.009663951417476863, "grad_norm": 0.8432389827307792, "learning_rate": 1.932282545242265e-06, "loss": 0.7731, "step": 331 }, { "epoch": 0.009693147645324224, "grad_norm": 0.9738492415152746, "learning_rate": 1.9381202568593115e-06, "loss": 0.9426, "step": 332 }, { "epoch": 0.009722343873171587, "grad_norm": 0.8318823357841882, "learning_rate": 1.9439579684763573e-06, "loss": 0.8728, "step": 333 }, { "epoch": 0.009751540101018948, "grad_norm": 1.0191699987953409, "learning_rate": 1.9497956800934035e-06, "loss": 0.9724, "step": 334 }, { "epoch": 0.00978073632886631, "grad_norm": 0.919077628752596, "learning_rate": 1.95563339171045e-06, "loss": 0.8843, "step": 335 }, { "epoch": 0.009809932556713672, "grad_norm": 0.882556959306734, "learning_rate": 1.9614711033274956e-06, "loss": 0.8376, "step": 336 }, { "epoch": 0.009839128784561035, "grad_norm": 0.9421168654779857, "learning_rate": 1.967308814944542e-06, "loss": 0.844, "step": 337 }, { "epoch": 0.009868325012408396, "grad_norm": 0.8141206103398646, "learning_rate": 1.9731465265615877e-06, "loss": 0.9053, "step": 338 }, { "epoch": 0.00989752124025576, "grad_norm": 0.9072315426460384, "learning_rate": 1.9789842381786344e-06, "loss": 0.9506, "step": 339 }, { "epoch": 0.00992671746810312, "grad_norm": 0.8983942097974588, "learning_rate": 1.9848219497956802e-06, "loss": 0.9008, "step": 340 }, { "epoch": 0.009955913695950483, "grad_norm": 0.9236762484561089, "learning_rate": 1.9906596614127265e-06, "loss": 0.9292, "step": 341 }, { "epoch": 0.009985109923797844, "grad_norm": 0.8254775682730304, "learning_rate": 1.9964973730297723e-06, "loss": 0.8258, "step": 342 }, { "epoch": 0.010014306151645207, "grad_norm": 0.843675391632378, "learning_rate": 2.0023350846468186e-06, "loss": 0.8256, "step": 343 }, { "epoch": 0.01004350237949257, "grad_norm": 0.9095437437388865, "learning_rate": 2.008172796263865e-06, "loss": 0.8698, "step": 344 }, { "epoch": 0.010072698607339932, "grad_norm": 0.9236721589296342, "learning_rate": 2.0140105078809107e-06, "loss": 0.8676, "step": 345 }, { "epoch": 0.010101894835187294, "grad_norm": 0.7974097851164044, "learning_rate": 2.019848219497957e-06, "loss": 0.7235, "step": 346 }, { "epoch": 0.010131091063034656, "grad_norm": 0.8761992984457071, "learning_rate": 2.025685931115003e-06, "loss": 0.9654, "step": 347 }, { "epoch": 0.010160287290882019, "grad_norm": 0.9154631444014527, "learning_rate": 2.031523642732049e-06, "loss": 0.8989, "step": 348 }, { "epoch": 0.01018948351872938, "grad_norm": 0.9001943310905238, "learning_rate": 2.0373613543490953e-06, "loss": 0.8519, "step": 349 }, { "epoch": 0.010218679746576743, "grad_norm": 0.9298816271431926, "learning_rate": 2.043199065966141e-06, "loss": 0.9372, "step": 350 }, { "epoch": 0.010247875974424104, "grad_norm": 0.9939541098806246, "learning_rate": 2.0490367775831878e-06, "loss": 0.9651, "step": 351 }, { "epoch": 0.010277072202271467, "grad_norm": 0.9893531957956126, "learning_rate": 2.0548744892002336e-06, "loss": 0.8979, "step": 352 }, { "epoch": 0.010306268430118828, "grad_norm": 0.997104762495363, "learning_rate": 2.06071220081728e-06, "loss": 1.0391, "step": 353 }, { "epoch": 0.010335464657966191, "grad_norm": 1.0292730752130521, "learning_rate": 2.0665499124343257e-06, "loss": 0.9205, "step": 354 }, { "epoch": 0.010364660885813552, "grad_norm": 0.9420982895150892, "learning_rate": 2.072387624051372e-06, "loss": 0.8926, "step": 355 }, { "epoch": 0.010393857113660915, "grad_norm": 0.93457694329397, "learning_rate": 2.0782253356684182e-06, "loss": 0.8863, "step": 356 }, { "epoch": 0.010423053341508276, "grad_norm": 0.8751974758869683, "learning_rate": 2.084063047285464e-06, "loss": 0.8342, "step": 357 }, { "epoch": 0.01045224956935564, "grad_norm": 0.8967808247831943, "learning_rate": 2.0899007589025103e-06, "loss": 0.8929, "step": 358 }, { "epoch": 0.010481445797203002, "grad_norm": 1.0624966041804653, "learning_rate": 2.0957384705195566e-06, "loss": 0.9207, "step": 359 }, { "epoch": 0.010510642025050363, "grad_norm": 0.8910048514359361, "learning_rate": 2.101576182136603e-06, "loss": 0.7937, "step": 360 }, { "epoch": 0.010539838252897726, "grad_norm": 0.8338349063122279, "learning_rate": 2.1074138937536487e-06, "loss": 0.877, "step": 361 }, { "epoch": 0.010569034480745088, "grad_norm": 0.8595532115978617, "learning_rate": 2.113251605370695e-06, "loss": 0.9558, "step": 362 }, { "epoch": 0.01059823070859245, "grad_norm": 0.9904906783672525, "learning_rate": 2.119089316987741e-06, "loss": 1.0112, "step": 363 }, { "epoch": 0.010627426936439812, "grad_norm": 0.8834594392364598, "learning_rate": 2.124927028604787e-06, "loss": 0.9565, "step": 364 }, { "epoch": 0.010656623164287175, "grad_norm": 0.7497243143458556, "learning_rate": 2.1307647402218333e-06, "loss": 0.7252, "step": 365 }, { "epoch": 0.010685819392134536, "grad_norm": 0.9764179851206904, "learning_rate": 2.136602451838879e-06, "loss": 0.8959, "step": 366 }, { "epoch": 0.010715015619981899, "grad_norm": 0.8501847909600313, "learning_rate": 2.1424401634559254e-06, "loss": 0.6924, "step": 367 }, { "epoch": 0.01074421184782926, "grad_norm": 0.9834934326152533, "learning_rate": 2.1482778750729716e-06, "loss": 0.9217, "step": 368 }, { "epoch": 0.010773408075676623, "grad_norm": 0.8908362085891894, "learning_rate": 2.1541155866900174e-06, "loss": 0.8671, "step": 369 }, { "epoch": 0.010802604303523984, "grad_norm": 0.8735310394441298, "learning_rate": 2.1599532983070637e-06, "loss": 0.7252, "step": 370 }, { "epoch": 0.010831800531371347, "grad_norm": 0.9797877841158172, "learning_rate": 2.16579100992411e-06, "loss": 0.8329, "step": 371 }, { "epoch": 0.010860996759218708, "grad_norm": 0.9125289794498919, "learning_rate": 2.171628721541156e-06, "loss": 0.9048, "step": 372 }, { "epoch": 0.010890192987066071, "grad_norm": 0.8673292831671929, "learning_rate": 2.177466433158202e-06, "loss": 0.783, "step": 373 }, { "epoch": 0.010919389214913432, "grad_norm": 0.8269229478140581, "learning_rate": 2.1833041447752483e-06, "loss": 0.9033, "step": 374 }, { "epoch": 0.010948585442760795, "grad_norm": 0.8658105818039156, "learning_rate": 2.1891418563922946e-06, "loss": 0.9199, "step": 375 }, { "epoch": 0.010977781670608158, "grad_norm": 0.8925155146961229, "learning_rate": 2.1949795680093404e-06, "loss": 0.9942, "step": 376 }, { "epoch": 0.01100697789845552, "grad_norm": 1.1074193309132743, "learning_rate": 2.2008172796263866e-06, "loss": 0.9442, "step": 377 }, { "epoch": 0.011036174126302882, "grad_norm": 0.8492773377279728, "learning_rate": 2.2066549912434325e-06, "loss": 0.8767, "step": 378 }, { "epoch": 0.011065370354150244, "grad_norm": 1.047050880040489, "learning_rate": 2.212492702860479e-06, "loss": 0.8522, "step": 379 }, { "epoch": 0.011094566581997607, "grad_norm": 0.8479168692652069, "learning_rate": 2.218330414477525e-06, "loss": 0.7972, "step": 380 }, { "epoch": 0.011123762809844968, "grad_norm": 2.2925355516040744, "learning_rate": 2.2241681260945713e-06, "loss": 0.9392, "step": 381 }, { "epoch": 0.01115295903769233, "grad_norm": 0.8592544830342819, "learning_rate": 2.230005837711617e-06, "loss": 0.7524, "step": 382 }, { "epoch": 0.011182155265539692, "grad_norm": 2.767685101526022, "learning_rate": 2.2358435493286633e-06, "loss": 1.0021, "step": 383 }, { "epoch": 0.011211351493387055, "grad_norm": 0.8716176846398372, "learning_rate": 2.2416812609457096e-06, "loss": 0.8661, "step": 384 }, { "epoch": 0.011240547721234416, "grad_norm": 1.0148391858884835, "learning_rate": 2.2475189725627554e-06, "loss": 0.9081, "step": 385 }, { "epoch": 0.011269743949081779, "grad_norm": 0.9078684933152272, "learning_rate": 2.2533566841798017e-06, "loss": 0.7901, "step": 386 }, { "epoch": 0.01129894017692914, "grad_norm": 1.033735489781494, "learning_rate": 2.259194395796848e-06, "loss": 0.9025, "step": 387 }, { "epoch": 0.011328136404776503, "grad_norm": 0.8769275383843129, "learning_rate": 2.2650321074138938e-06, "loss": 0.8473, "step": 388 }, { "epoch": 0.011357332632623864, "grad_norm": 0.8508735522187713, "learning_rate": 2.27086981903094e-06, "loss": 0.7442, "step": 389 }, { "epoch": 0.011386528860471227, "grad_norm": 1.0287124301497002, "learning_rate": 2.276707530647986e-06, "loss": 0.8959, "step": 390 }, { "epoch": 0.011415725088318588, "grad_norm": 0.8606734255979019, "learning_rate": 2.2825452422650325e-06, "loss": 0.8426, "step": 391 }, { "epoch": 0.011444921316165951, "grad_norm": 0.8102365759376544, "learning_rate": 2.2883829538820784e-06, "loss": 0.8071, "step": 392 }, { "epoch": 0.011474117544013314, "grad_norm": 1.312172986358446, "learning_rate": 2.2942206654991246e-06, "loss": 0.8846, "step": 393 }, { "epoch": 0.011503313771860675, "grad_norm": 0.9976177808315185, "learning_rate": 2.3000583771161705e-06, "loss": 0.9555, "step": 394 }, { "epoch": 0.011532509999708038, "grad_norm": 0.9517808644852238, "learning_rate": 2.3058960887332167e-06, "loss": 0.9041, "step": 395 }, { "epoch": 0.0115617062275554, "grad_norm": 0.9755149951437361, "learning_rate": 2.311733800350263e-06, "loss": 0.8409, "step": 396 }, { "epoch": 0.011590902455402763, "grad_norm": 0.8069713757242652, "learning_rate": 2.317571511967309e-06, "loss": 0.7869, "step": 397 }, { "epoch": 0.011620098683250124, "grad_norm": 0.8830703274771119, "learning_rate": 2.323409223584355e-06, "loss": 0.8866, "step": 398 }, { "epoch": 0.011649294911097487, "grad_norm": 0.899636101187675, "learning_rate": 2.3292469352014013e-06, "loss": 0.9488, "step": 399 }, { "epoch": 0.011678491138944848, "grad_norm": 0.9263442369096205, "learning_rate": 2.3350846468184476e-06, "loss": 0.8581, "step": 400 }, { "epoch": 0.01170768736679221, "grad_norm": 0.937457419875038, "learning_rate": 2.3409223584354934e-06, "loss": 0.8038, "step": 401 }, { "epoch": 0.011736883594639572, "grad_norm": 0.9722248255999869, "learning_rate": 2.3467600700525397e-06, "loss": 0.9028, "step": 402 }, { "epoch": 0.011766079822486935, "grad_norm": 0.8596316942374389, "learning_rate": 2.352597781669586e-06, "loss": 0.8551, "step": 403 }, { "epoch": 0.011795276050334296, "grad_norm": 0.8055372493512049, "learning_rate": 2.3584354932866318e-06, "loss": 0.7503, "step": 404 }, { "epoch": 0.011824472278181659, "grad_norm": 0.8598347451701431, "learning_rate": 2.364273204903678e-06, "loss": 0.8048, "step": 405 }, { "epoch": 0.01185366850602902, "grad_norm": 1.1779623042951137, "learning_rate": 2.370110916520724e-06, "loss": 0.9118, "step": 406 }, { "epoch": 0.011882864733876383, "grad_norm": 1.0165690502530826, "learning_rate": 2.37594862813777e-06, "loss": 0.9033, "step": 407 }, { "epoch": 0.011912060961723746, "grad_norm": 0.8433748832835458, "learning_rate": 2.3817863397548164e-06, "loss": 0.7491, "step": 408 }, { "epoch": 0.011941257189571107, "grad_norm": 0.9262811223855624, "learning_rate": 2.387624051371862e-06, "loss": 0.9524, "step": 409 }, { "epoch": 0.01197045341741847, "grad_norm": 1.7584064014248213, "learning_rate": 2.3934617629889085e-06, "loss": 0.9407, "step": 410 }, { "epoch": 0.011999649645265831, "grad_norm": 0.9570646356659741, "learning_rate": 2.3992994746059547e-06, "loss": 0.9811, "step": 411 }, { "epoch": 0.012028845873113194, "grad_norm": 0.8465981888689016, "learning_rate": 2.405137186223001e-06, "loss": 0.8312, "step": 412 }, { "epoch": 0.012058042100960556, "grad_norm": 0.8131557960491468, "learning_rate": 2.410974897840047e-06, "loss": 0.7941, "step": 413 }, { "epoch": 0.012087238328807919, "grad_norm": 0.9186442066328906, "learning_rate": 2.416812609457093e-06, "loss": 0.8616, "step": 414 }, { "epoch": 0.01211643455665528, "grad_norm": 1.0302485598563296, "learning_rate": 2.4226503210741393e-06, "loss": 0.8909, "step": 415 }, { "epoch": 0.012145630784502643, "grad_norm": 0.9447009074658232, "learning_rate": 2.428488032691185e-06, "loss": 0.8919, "step": 416 }, { "epoch": 0.012174827012350004, "grad_norm": 0.859615353027868, "learning_rate": 2.4343257443082314e-06, "loss": 0.8581, "step": 417 }, { "epoch": 0.012204023240197367, "grad_norm": 0.8318514978226194, "learning_rate": 2.4401634559252772e-06, "loss": 0.7704, "step": 418 }, { "epoch": 0.012233219468044728, "grad_norm": 0.8806343751503829, "learning_rate": 2.4460011675423235e-06, "loss": 0.9104, "step": 419 }, { "epoch": 0.012262415695892091, "grad_norm": 0.8660754995977452, "learning_rate": 2.4518388791593698e-06, "loss": 0.9773, "step": 420 }, { "epoch": 0.012291611923739452, "grad_norm": 1.0581788879267713, "learning_rate": 2.4576765907764156e-06, "loss": 0.8376, "step": 421 }, { "epoch": 0.012320808151586815, "grad_norm": 0.8851143186111946, "learning_rate": 2.463514302393462e-06, "loss": 0.7899, "step": 422 }, { "epoch": 0.012350004379434176, "grad_norm": 0.944423170011862, "learning_rate": 2.469352014010508e-06, "loss": 0.8487, "step": 423 }, { "epoch": 0.01237920060728154, "grad_norm": 0.86560461498229, "learning_rate": 2.4751897256275544e-06, "loss": 0.774, "step": 424 }, { "epoch": 0.012408396835128902, "grad_norm": 0.9474505205249122, "learning_rate": 2.4810274372446e-06, "loss": 0.8707, "step": 425 }, { "epoch": 0.012437593062976263, "grad_norm": 0.7632067285215304, "learning_rate": 2.4868651488616464e-06, "loss": 0.7393, "step": 426 }, { "epoch": 0.012466789290823626, "grad_norm": 0.8295954365240872, "learning_rate": 2.4927028604786927e-06, "loss": 0.7856, "step": 427 }, { "epoch": 0.012495985518670987, "grad_norm": 0.8117240670960679, "learning_rate": 2.4985405720957385e-06, "loss": 0.8409, "step": 428 }, { "epoch": 0.01252518174651835, "grad_norm": 0.9663579113605155, "learning_rate": 2.5043782837127852e-06, "loss": 0.8818, "step": 429 }, { "epoch": 0.012554377974365712, "grad_norm": 0.9325294482597967, "learning_rate": 2.510215995329831e-06, "loss": 0.8025, "step": 430 }, { "epoch": 0.012583574202213075, "grad_norm": 0.8939470962913886, "learning_rate": 2.5160537069468773e-06, "loss": 0.9928, "step": 431 }, { "epoch": 0.012612770430060436, "grad_norm": 0.7980002121542029, "learning_rate": 2.521891418563923e-06, "loss": 0.7741, "step": 432 }, { "epoch": 0.012641966657907799, "grad_norm": 0.8850385346994889, "learning_rate": 2.5277291301809694e-06, "loss": 0.8252, "step": 433 }, { "epoch": 0.01267116288575516, "grad_norm": 0.906173948375056, "learning_rate": 2.5335668417980152e-06, "loss": 0.8109, "step": 434 }, { "epoch": 0.012700359113602523, "grad_norm": 0.8125065393693467, "learning_rate": 2.5394045534150615e-06, "loss": 0.8119, "step": 435 }, { "epoch": 0.012729555341449884, "grad_norm": 0.7810946158933539, "learning_rate": 2.5452422650321073e-06, "loss": 0.723, "step": 436 }, { "epoch": 0.012758751569297247, "grad_norm": 0.761633542015362, "learning_rate": 2.551079976649154e-06, "loss": 0.7323, "step": 437 }, { "epoch": 0.012787947797144608, "grad_norm": 0.8026265433703633, "learning_rate": 2.5569176882662e-06, "loss": 0.7922, "step": 438 }, { "epoch": 0.012817144024991971, "grad_norm": 1.1225734160827208, "learning_rate": 2.562755399883246e-06, "loss": 0.8993, "step": 439 }, { "epoch": 0.012846340252839332, "grad_norm": 0.9021289088088885, "learning_rate": 2.568593111500292e-06, "loss": 0.7786, "step": 440 }, { "epoch": 0.012875536480686695, "grad_norm": 0.9669727631837267, "learning_rate": 2.574430823117338e-06, "loss": 0.7618, "step": 441 }, { "epoch": 0.012904732708534058, "grad_norm": 0.8277601189488677, "learning_rate": 2.580268534734384e-06, "loss": 0.8973, "step": 442 }, { "epoch": 0.01293392893638142, "grad_norm": 1.0509793603943502, "learning_rate": 2.5861062463514303e-06, "loss": 0.9167, "step": 443 }, { "epoch": 0.012963125164228782, "grad_norm": 0.8239962250506521, "learning_rate": 2.591943957968476e-06, "loss": 0.7488, "step": 444 }, { "epoch": 0.012992321392076144, "grad_norm": 0.9592630568841112, "learning_rate": 2.5977816695855228e-06, "loss": 0.9879, "step": 445 }, { "epoch": 0.013021517619923506, "grad_norm": 0.9976665815077327, "learning_rate": 2.603619381202569e-06, "loss": 0.9047, "step": 446 }, { "epoch": 0.013050713847770868, "grad_norm": 0.9478942648882857, "learning_rate": 2.609457092819615e-06, "loss": 0.7537, "step": 447 }, { "epoch": 0.01307991007561823, "grad_norm": 0.8511144824897788, "learning_rate": 2.615294804436661e-06, "loss": 0.8541, "step": 448 }, { "epoch": 0.013109106303465592, "grad_norm": 0.8628719483210101, "learning_rate": 2.621132516053707e-06, "loss": 0.841, "step": 449 }, { "epoch": 0.013138302531312955, "grad_norm": 0.8715422511444778, "learning_rate": 2.6269702276707532e-06, "loss": 0.9068, "step": 450 }, { "epoch": 0.013167498759160316, "grad_norm": 0.8996934607044351, "learning_rate": 2.632807939287799e-06, "loss": 0.796, "step": 451 }, { "epoch": 0.013196694987007679, "grad_norm": 0.8719468136483846, "learning_rate": 2.6386456509048453e-06, "loss": 0.7796, "step": 452 }, { "epoch": 0.01322589121485504, "grad_norm": 0.8193115577403514, "learning_rate": 2.644483362521892e-06, "loss": 0.8968, "step": 453 }, { "epoch": 0.013255087442702403, "grad_norm": 0.8899507177540211, "learning_rate": 2.650321074138938e-06, "loss": 0.8899, "step": 454 }, { "epoch": 0.013284283670549764, "grad_norm": 0.8023715905076416, "learning_rate": 2.656158785755984e-06, "loss": 0.8348, "step": 455 }, { "epoch": 0.013313479898397127, "grad_norm": 0.89159990864182, "learning_rate": 2.66199649737303e-06, "loss": 0.8567, "step": 456 }, { "epoch": 0.01334267612624449, "grad_norm": 1.0842521991476233, "learning_rate": 2.667834208990076e-06, "loss": 1.0052, "step": 457 }, { "epoch": 0.013371872354091851, "grad_norm": 0.9494627246392208, "learning_rate": 2.673671920607122e-06, "loss": 0.8262, "step": 458 }, { "epoch": 0.013401068581939214, "grad_norm": 0.8684972743449401, "learning_rate": 2.6795096322241683e-06, "loss": 0.8196, "step": 459 }, { "epoch": 0.013430264809786575, "grad_norm": 0.9101407839839202, "learning_rate": 2.685347343841214e-06, "loss": 0.9205, "step": 460 }, { "epoch": 0.013459461037633938, "grad_norm": 1.0142648612906455, "learning_rate": 2.6911850554582608e-06, "loss": 0.868, "step": 461 }, { "epoch": 0.0134886572654813, "grad_norm": 0.7829168063138872, "learning_rate": 2.697022767075307e-06, "loss": 0.812, "step": 462 }, { "epoch": 0.013517853493328662, "grad_norm": 1.8733451179634915, "learning_rate": 2.702860478692353e-06, "loss": 0.9111, "step": 463 }, { "epoch": 0.013547049721176024, "grad_norm": 0.8497216452607567, "learning_rate": 2.708698190309399e-06, "loss": 0.8125, "step": 464 }, { "epoch": 0.013576245949023387, "grad_norm": 0.8111746670120503, "learning_rate": 2.714535901926445e-06, "loss": 0.7874, "step": 465 }, { "epoch": 0.013605442176870748, "grad_norm": 0.9127658195853968, "learning_rate": 2.720373613543491e-06, "loss": 0.8043, "step": 466 }, { "epoch": 0.01363463840471811, "grad_norm": 0.8628996036965582, "learning_rate": 2.726211325160537e-06, "loss": 0.869, "step": 467 }, { "epoch": 0.013663834632565472, "grad_norm": 0.9125040240927438, "learning_rate": 2.7320490367775833e-06, "loss": 0.8662, "step": 468 }, { "epoch": 0.013693030860412835, "grad_norm": 1.199376413599211, "learning_rate": 2.7378867483946295e-06, "loss": 0.9643, "step": 469 }, { "epoch": 0.013722227088260196, "grad_norm": 0.8295432031496704, "learning_rate": 2.743724460011676e-06, "loss": 0.8968, "step": 470 }, { "epoch": 0.013751423316107559, "grad_norm": 0.8257525828859047, "learning_rate": 2.749562171628722e-06, "loss": 0.8423, "step": 471 }, { "epoch": 0.01378061954395492, "grad_norm": 1.360985213477728, "learning_rate": 2.755399883245768e-06, "loss": 0.7841, "step": 472 }, { "epoch": 0.013809815771802283, "grad_norm": 0.8362964740053056, "learning_rate": 2.761237594862814e-06, "loss": 0.862, "step": 473 }, { "epoch": 0.013839011999649646, "grad_norm": 0.9216110581284865, "learning_rate": 2.76707530647986e-06, "loss": 0.9284, "step": 474 }, { "epoch": 0.013868208227497007, "grad_norm": 1.4903839073503204, "learning_rate": 2.7729130180969062e-06, "loss": 0.8126, "step": 475 }, { "epoch": 0.01389740445534437, "grad_norm": 0.8960663762461852, "learning_rate": 2.778750729713952e-06, "loss": 0.8907, "step": 476 }, { "epoch": 0.013926600683191731, "grad_norm": 0.8510878948403667, "learning_rate": 2.7845884413309988e-06, "loss": 0.8443, "step": 477 }, { "epoch": 0.013955796911039094, "grad_norm": 0.8856143307654518, "learning_rate": 2.7904261529480446e-06, "loss": 0.9341, "step": 478 }, { "epoch": 0.013984993138886456, "grad_norm": 1.2497482091433818, "learning_rate": 2.796263864565091e-06, "loss": 0.804, "step": 479 }, { "epoch": 0.014014189366733818, "grad_norm": 0.7991288853467147, "learning_rate": 2.8021015761821367e-06, "loss": 0.7854, "step": 480 }, { "epoch": 0.01404338559458118, "grad_norm": 0.8689411883472324, "learning_rate": 2.807939287799183e-06, "loss": 0.8179, "step": 481 }, { "epoch": 0.014072581822428543, "grad_norm": 1.016489287111674, "learning_rate": 2.8137769994162288e-06, "loss": 0.7477, "step": 482 }, { "epoch": 0.014101778050275904, "grad_norm": 0.899498780499001, "learning_rate": 2.819614711033275e-06, "loss": 0.8843, "step": 483 }, { "epoch": 0.014130974278123267, "grad_norm": 0.9796931011304469, "learning_rate": 2.825452422650321e-06, "loss": 0.8765, "step": 484 }, { "epoch": 0.014160170505970628, "grad_norm": 0.9308817596491076, "learning_rate": 2.8312901342673675e-06, "loss": 0.8146, "step": 485 }, { "epoch": 0.014189366733817991, "grad_norm": 0.8059539818525788, "learning_rate": 2.837127845884414e-06, "loss": 0.7916, "step": 486 }, { "epoch": 0.014218562961665352, "grad_norm": 0.9144470312923392, "learning_rate": 2.8429655575014596e-06, "loss": 0.842, "step": 487 }, { "epoch": 0.014247759189512715, "grad_norm": 0.8752115056785349, "learning_rate": 2.848803269118506e-06, "loss": 0.7978, "step": 488 }, { "epoch": 0.014276955417360076, "grad_norm": 0.8337966501304522, "learning_rate": 2.8546409807355517e-06, "loss": 0.8498, "step": 489 }, { "epoch": 0.01430615164520744, "grad_norm": 0.7895443490568608, "learning_rate": 2.860478692352598e-06, "loss": 0.7911, "step": 490 }, { "epoch": 0.014335347873054802, "grad_norm": 1.1233512297887422, "learning_rate": 2.866316403969644e-06, "loss": 0.8906, "step": 491 }, { "epoch": 0.014364544100902163, "grad_norm": 0.9416286706123671, "learning_rate": 2.87215411558669e-06, "loss": 0.8833, "step": 492 }, { "epoch": 0.014393740328749526, "grad_norm": 0.8728153102022475, "learning_rate": 2.8779918272037367e-06, "loss": 0.8791, "step": 493 }, { "epoch": 0.014422936556596887, "grad_norm": 1.3123111772883322, "learning_rate": 2.8838295388207826e-06, "loss": 0.9011, "step": 494 }, { "epoch": 0.01445213278444425, "grad_norm": 0.8700118945471857, "learning_rate": 2.889667250437829e-06, "loss": 0.8885, "step": 495 }, { "epoch": 0.014481329012291612, "grad_norm": 0.8661364510128851, "learning_rate": 2.8955049620548747e-06, "loss": 0.867, "step": 496 }, { "epoch": 0.014510525240138974, "grad_norm": 0.9950384873197491, "learning_rate": 2.901342673671921e-06, "loss": 0.7468, "step": 497 }, { "epoch": 0.014539721467986336, "grad_norm": 1.0513920402188144, "learning_rate": 2.9071803852889668e-06, "loss": 0.8297, "step": 498 }, { "epoch": 0.014568917695833699, "grad_norm": 0.8642170236166499, "learning_rate": 2.913018096906013e-06, "loss": 0.8736, "step": 499 }, { "epoch": 0.01459811392368106, "grad_norm": 0.8353170821350999, "learning_rate": 2.918855808523059e-06, "loss": 0.7953, "step": 500 }, { "epoch": 0.014627310151528423, "grad_norm": 0.9322108535091165, "learning_rate": 2.9246935201401055e-06, "loss": 0.9745, "step": 501 }, { "epoch": 0.014656506379375784, "grad_norm": 0.826987902973724, "learning_rate": 2.9305312317571518e-06, "loss": 0.8445, "step": 502 }, { "epoch": 0.014685702607223147, "grad_norm": 1.1527339642118746, "learning_rate": 2.9363689433741976e-06, "loss": 0.9125, "step": 503 }, { "epoch": 0.014714898835070508, "grad_norm": 0.8368345454198015, "learning_rate": 2.942206654991244e-06, "loss": 0.8719, "step": 504 }, { "epoch": 0.014744095062917871, "grad_norm": 0.9044431315917746, "learning_rate": 2.9480443666082897e-06, "loss": 0.9309, "step": 505 }, { "epoch": 0.014773291290765234, "grad_norm": 0.8816875150635503, "learning_rate": 2.953882078225336e-06, "loss": 0.7891, "step": 506 }, { "epoch": 0.014802487518612595, "grad_norm": 0.9667241871287072, "learning_rate": 2.959719789842382e-06, "loss": 0.8447, "step": 507 }, { "epoch": 0.014831683746459958, "grad_norm": 0.8641873367703411, "learning_rate": 2.965557501459428e-06, "loss": 0.8354, "step": 508 }, { "epoch": 0.01486087997430732, "grad_norm": 0.860121356971133, "learning_rate": 2.9713952130764743e-06, "loss": 0.8915, "step": 509 }, { "epoch": 0.014890076202154682, "grad_norm": 0.8356087232591345, "learning_rate": 2.9772329246935206e-06, "loss": 0.8442, "step": 510 }, { "epoch": 0.014919272430002043, "grad_norm": 0.8000332780173348, "learning_rate": 2.9830706363105664e-06, "loss": 0.7642, "step": 511 }, { "epoch": 0.014948468657849406, "grad_norm": 0.8241007107418141, "learning_rate": 2.9889083479276127e-06, "loss": 0.8282, "step": 512 }, { "epoch": 0.014977664885696768, "grad_norm": 0.8504297673420246, "learning_rate": 2.9947460595446585e-06, "loss": 0.8559, "step": 513 }, { "epoch": 0.01500686111354413, "grad_norm": 0.9884389350995736, "learning_rate": 3.0005837711617047e-06, "loss": 0.8444, "step": 514 }, { "epoch": 0.015036057341391492, "grad_norm": 1.1412136046186203, "learning_rate": 3.0064214827787506e-06, "loss": 0.9161, "step": 515 }, { "epoch": 0.015065253569238855, "grad_norm": 0.8265296700361615, "learning_rate": 3.012259194395797e-06, "loss": 0.7645, "step": 516 }, { "epoch": 0.015094449797086216, "grad_norm": 0.8288285991203104, "learning_rate": 3.0180969060128435e-06, "loss": 0.8528, "step": 517 }, { "epoch": 0.015123646024933579, "grad_norm": 0.8233002670864503, "learning_rate": 3.0239346176298893e-06, "loss": 0.8285, "step": 518 }, { "epoch": 0.01515284225278094, "grad_norm": 0.8921512921759286, "learning_rate": 3.0297723292469356e-06, "loss": 0.9288, "step": 519 }, { "epoch": 0.015182038480628303, "grad_norm": 0.8953282660296475, "learning_rate": 3.0356100408639814e-06, "loss": 0.8136, "step": 520 }, { "epoch": 0.015211234708475664, "grad_norm": 1.064732984135508, "learning_rate": 3.0414477524810277e-06, "loss": 0.7923, "step": 521 }, { "epoch": 0.015240430936323027, "grad_norm": 1.0943867975059371, "learning_rate": 3.0472854640980735e-06, "loss": 0.7495, "step": 522 }, { "epoch": 0.01526962716417039, "grad_norm": 0.913028047425031, "learning_rate": 3.0531231757151198e-06, "loss": 0.8687, "step": 523 }, { "epoch": 0.015298823392017751, "grad_norm": 0.8166642771487623, "learning_rate": 3.0589608873321656e-06, "loss": 0.7678, "step": 524 }, { "epoch": 0.015328019619865114, "grad_norm": 0.8432818991976362, "learning_rate": 3.0647985989492123e-06, "loss": 0.7602, "step": 525 }, { "epoch": 0.015357215847712475, "grad_norm": 0.8785000495516601, "learning_rate": 3.0706363105662586e-06, "loss": 0.8355, "step": 526 }, { "epoch": 0.015386412075559838, "grad_norm": 0.8625099396725925, "learning_rate": 3.0764740221833044e-06, "loss": 0.851, "step": 527 }, { "epoch": 0.0154156083034072, "grad_norm": 0.79234625214427, "learning_rate": 3.0823117338003506e-06, "loss": 0.7986, "step": 528 }, { "epoch": 0.015444804531254562, "grad_norm": 1.1501408089840068, "learning_rate": 3.0881494454173965e-06, "loss": 0.8717, "step": 529 }, { "epoch": 0.015474000759101924, "grad_norm": 0.8478605238006927, "learning_rate": 3.0939871570344427e-06, "loss": 0.7979, "step": 530 }, { "epoch": 0.015503196986949287, "grad_norm": 1.0311843465566106, "learning_rate": 3.0998248686514886e-06, "loss": 0.7897, "step": 531 }, { "epoch": 0.015532393214796648, "grad_norm": 0.892906512281672, "learning_rate": 3.105662580268535e-06, "loss": 0.961, "step": 532 }, { "epoch": 0.01556158944264401, "grad_norm": 0.7790962640998681, "learning_rate": 3.1115002918855815e-06, "loss": 0.7816, "step": 533 }, { "epoch": 0.015590785670491372, "grad_norm": 0.9386200291005877, "learning_rate": 3.1173380035026273e-06, "loss": 0.9176, "step": 534 }, { "epoch": 0.015619981898338735, "grad_norm": 0.8937638659133861, "learning_rate": 3.1231757151196736e-06, "loss": 0.8684, "step": 535 }, { "epoch": 0.015649178126186096, "grad_norm": 0.8319544400390886, "learning_rate": 3.1290134267367194e-06, "loss": 0.7977, "step": 536 }, { "epoch": 0.015678374354033457, "grad_norm": 0.8284520409988937, "learning_rate": 3.1348511383537657e-06, "loss": 0.8663, "step": 537 }, { "epoch": 0.015707570581880822, "grad_norm": 0.8292862481842664, "learning_rate": 3.1406888499708115e-06, "loss": 0.7517, "step": 538 }, { "epoch": 0.015736766809728183, "grad_norm": 0.7974764228132206, "learning_rate": 3.1465265615878578e-06, "loss": 0.8128, "step": 539 }, { "epoch": 0.015765963037575544, "grad_norm": 1.3146549034003943, "learning_rate": 3.1523642732049036e-06, "loss": 0.9234, "step": 540 }, { "epoch": 0.01579515926542291, "grad_norm": 0.8393727050653442, "learning_rate": 3.1582019848219503e-06, "loss": 0.7891, "step": 541 }, { "epoch": 0.01582435549327027, "grad_norm": 0.8396107906783961, "learning_rate": 3.1640396964389965e-06, "loss": 0.7904, "step": 542 }, { "epoch": 0.01585355172111763, "grad_norm": 0.7685224307168573, "learning_rate": 3.1698774080560424e-06, "loss": 0.6996, "step": 543 }, { "epoch": 0.015882747948964993, "grad_norm": 1.5890698012140687, "learning_rate": 3.1757151196730886e-06, "loss": 0.7896, "step": 544 }, { "epoch": 0.015911944176812357, "grad_norm": 0.8405830722587609, "learning_rate": 3.1815528312901345e-06, "loss": 0.8507, "step": 545 }, { "epoch": 0.01594114040465972, "grad_norm": 0.8736704356221298, "learning_rate": 3.1873905429071807e-06, "loss": 0.8588, "step": 546 }, { "epoch": 0.01597033663250708, "grad_norm": 0.8200331240255861, "learning_rate": 3.1932282545242266e-06, "loss": 0.8093, "step": 547 }, { "epoch": 0.01599953286035444, "grad_norm": 0.8897829887016183, "learning_rate": 3.199065966141273e-06, "loss": 0.8715, "step": 548 }, { "epoch": 0.016028729088201805, "grad_norm": 0.9016324102947624, "learning_rate": 3.204903677758319e-06, "loss": 0.7996, "step": 549 }, { "epoch": 0.016057925316049167, "grad_norm": 0.8534815460313361, "learning_rate": 3.2107413893753653e-06, "loss": 0.8148, "step": 550 }, { "epoch": 0.016087121543896528, "grad_norm": 0.8290038262355958, "learning_rate": 3.216579100992411e-06, "loss": 0.8039, "step": 551 }, { "epoch": 0.01611631777174389, "grad_norm": 0.8255390650241937, "learning_rate": 3.2224168126094574e-06, "loss": 0.8473, "step": 552 }, { "epoch": 0.016145513999591254, "grad_norm": 0.8449933221153941, "learning_rate": 3.2282545242265032e-06, "loss": 0.8428, "step": 553 }, { "epoch": 0.016174710227438615, "grad_norm": 1.0468430066433638, "learning_rate": 3.2340922358435495e-06, "loss": 0.7819, "step": 554 }, { "epoch": 0.016203906455285976, "grad_norm": 0.872209818395027, "learning_rate": 3.2399299474605953e-06, "loss": 0.8575, "step": 555 }, { "epoch": 0.01623310268313334, "grad_norm": 1.4697285976507357, "learning_rate": 3.2457676590776416e-06, "loss": 0.8611, "step": 556 }, { "epoch": 0.016262298910980702, "grad_norm": 1.0668472688701833, "learning_rate": 3.2516053706946883e-06, "loss": 0.984, "step": 557 }, { "epoch": 0.016291495138828063, "grad_norm": 0.8495025836132231, "learning_rate": 3.257443082311734e-06, "loss": 0.804, "step": 558 }, { "epoch": 0.016320691366675424, "grad_norm": 0.9557390287477415, "learning_rate": 3.2632807939287804e-06, "loss": 0.8369, "step": 559 }, { "epoch": 0.01634988759452279, "grad_norm": 0.8378733640561236, "learning_rate": 3.269118505545826e-06, "loss": 0.8944, "step": 560 }, { "epoch": 0.01637908382237015, "grad_norm": 0.903813848037212, "learning_rate": 3.2749562171628725e-06, "loss": 0.8783, "step": 561 }, { "epoch": 0.01640828005021751, "grad_norm": 0.9207416045910332, "learning_rate": 3.2807939287799183e-06, "loss": 0.8491, "step": 562 }, { "epoch": 0.016437476278064873, "grad_norm": 1.9023331782686623, "learning_rate": 3.2866316403969645e-06, "loss": 0.8342, "step": 563 }, { "epoch": 0.016466672505912237, "grad_norm": 0.8711534539600193, "learning_rate": 3.2924693520140104e-06, "loss": 0.9284, "step": 564 }, { "epoch": 0.0164958687337596, "grad_norm": 0.9490717960864818, "learning_rate": 3.298307063631057e-06, "loss": 0.7785, "step": 565 }, { "epoch": 0.01652506496160696, "grad_norm": 0.9717350304571959, "learning_rate": 3.3041447752481033e-06, "loss": 0.8607, "step": 566 }, { "epoch": 0.01655426118945432, "grad_norm": 0.7644562603546977, "learning_rate": 3.309982486865149e-06, "loss": 0.7408, "step": 567 }, { "epoch": 0.016583457417301686, "grad_norm": 0.8559359510224347, "learning_rate": 3.3158201984821954e-06, "loss": 0.8393, "step": 568 }, { "epoch": 0.016612653645149047, "grad_norm": 0.9809784894451858, "learning_rate": 3.3216579100992412e-06, "loss": 0.8431, "step": 569 }, { "epoch": 0.016641849872996408, "grad_norm": 0.8166384671174033, "learning_rate": 3.3274956217162875e-06, "loss": 0.7586, "step": 570 }, { "epoch": 0.016671046100843773, "grad_norm": 0.805928209398, "learning_rate": 3.3333333333333333e-06, "loss": 0.8036, "step": 571 }, { "epoch": 0.016700242328691134, "grad_norm": 1.065954940327243, "learning_rate": 3.3391710449503796e-06, "loss": 0.9038, "step": 572 }, { "epoch": 0.016729438556538495, "grad_norm": 0.8573223754558247, "learning_rate": 3.3450087565674263e-06, "loss": 0.8416, "step": 573 }, { "epoch": 0.016758634784385856, "grad_norm": 0.7527072517389516, "learning_rate": 3.350846468184472e-06, "loss": 0.7882, "step": 574 }, { "epoch": 0.01678783101223322, "grad_norm": 1.2770686164078697, "learning_rate": 3.3566841798015184e-06, "loss": 0.9547, "step": 575 }, { "epoch": 0.016817027240080582, "grad_norm": 0.9559678398802737, "learning_rate": 3.362521891418564e-06, "loss": 0.7813, "step": 576 }, { "epoch": 0.016846223467927943, "grad_norm": 0.8670945145160569, "learning_rate": 3.3683596030356104e-06, "loss": 0.836, "step": 577 }, { "epoch": 0.016875419695775305, "grad_norm": 0.7925949603852497, "learning_rate": 3.3741973146526563e-06, "loss": 0.758, "step": 578 }, { "epoch": 0.01690461592362267, "grad_norm": 1.418274691802406, "learning_rate": 3.3800350262697025e-06, "loss": 0.7464, "step": 579 }, { "epoch": 0.01693381215147003, "grad_norm": 0.8378679706412435, "learning_rate": 3.3858727378867484e-06, "loss": 0.8842, "step": 580 }, { "epoch": 0.01696300837931739, "grad_norm": 0.8417018616440531, "learning_rate": 3.391710449503795e-06, "loss": 0.7946, "step": 581 }, { "epoch": 0.016992204607164753, "grad_norm": 0.8470870489228963, "learning_rate": 3.397548161120841e-06, "loss": 0.9132, "step": 582 }, { "epoch": 0.017021400835012118, "grad_norm": 0.9030190871144109, "learning_rate": 3.403385872737887e-06, "loss": 0.8411, "step": 583 }, { "epoch": 0.01705059706285948, "grad_norm": 0.8638536659759057, "learning_rate": 3.409223584354933e-06, "loss": 0.8708, "step": 584 }, { "epoch": 0.01707979329070684, "grad_norm": 0.8234801088244166, "learning_rate": 3.4150612959719792e-06, "loss": 0.8082, "step": 585 }, { "epoch": 0.0171089895185542, "grad_norm": 0.8505679993601526, "learning_rate": 3.4208990075890255e-06, "loss": 0.8176, "step": 586 }, { "epoch": 0.017138185746401566, "grad_norm": 0.8301131039395624, "learning_rate": 3.4267367192060713e-06, "loss": 0.7548, "step": 587 }, { "epoch": 0.017167381974248927, "grad_norm": 0.9277479202160516, "learning_rate": 3.4325744308231176e-06, "loss": 0.9441, "step": 588 }, { "epoch": 0.017196578202096288, "grad_norm": 0.7975311009270187, "learning_rate": 3.438412142440164e-06, "loss": 0.7755, "step": 589 }, { "epoch": 0.017225774429943653, "grad_norm": 0.8620836218561081, "learning_rate": 3.44424985405721e-06, "loss": 0.8296, "step": 590 }, { "epoch": 0.017254970657791014, "grad_norm": 0.8523888084493545, "learning_rate": 3.450087565674256e-06, "loss": 0.8405, "step": 591 }, { "epoch": 0.017284166885638375, "grad_norm": 1.2847870549191844, "learning_rate": 3.455925277291302e-06, "loss": 0.7341, "step": 592 }, { "epoch": 0.017313363113485736, "grad_norm": 0.8300952468006869, "learning_rate": 3.461762988908348e-06, "loss": 0.8696, "step": 593 }, { "epoch": 0.0173425593413331, "grad_norm": 1.0137268466393197, "learning_rate": 3.4676007005253943e-06, "loss": 0.9125, "step": 594 }, { "epoch": 0.017371755569180462, "grad_norm": 0.8017962589504087, "learning_rate": 3.47343841214244e-06, "loss": 0.7553, "step": 595 }, { "epoch": 0.017400951797027824, "grad_norm": 0.774687241336385, "learning_rate": 3.4792761237594864e-06, "loss": 0.7381, "step": 596 }, { "epoch": 0.017430148024875185, "grad_norm": 0.865557863479969, "learning_rate": 3.485113835376533e-06, "loss": 0.7465, "step": 597 }, { "epoch": 0.01745934425272255, "grad_norm": 0.9031195397716577, "learning_rate": 3.490951546993579e-06, "loss": 0.766, "step": 598 }, { "epoch": 0.01748854048056991, "grad_norm": 0.8034114393021186, "learning_rate": 3.496789258610625e-06, "loss": 0.7643, "step": 599 }, { "epoch": 0.017517736708417272, "grad_norm": 0.9563522515812858, "learning_rate": 3.502626970227671e-06, "loss": 0.797, "step": 600 }, { "epoch": 0.017546932936264633, "grad_norm": 0.877667278189678, "learning_rate": 3.508464681844717e-06, "loss": 0.848, "step": 601 }, { "epoch": 0.017576129164111998, "grad_norm": 0.778662869079439, "learning_rate": 3.514302393461763e-06, "loss": 0.7271, "step": 602 }, { "epoch": 0.01760532539195936, "grad_norm": 1.0659381825645222, "learning_rate": 3.5201401050788093e-06, "loss": 0.7055, "step": 603 }, { "epoch": 0.01763452161980672, "grad_norm": 0.8762314827637705, "learning_rate": 3.525977816695855e-06, "loss": 0.8694, "step": 604 }, { "epoch": 0.017663717847654085, "grad_norm": 0.9483960771175317, "learning_rate": 3.531815528312902e-06, "loss": 0.9453, "step": 605 }, { "epoch": 0.017692914075501446, "grad_norm": 0.823767152164166, "learning_rate": 3.537653239929948e-06, "loss": 0.8224, "step": 606 }, { "epoch": 0.017722110303348807, "grad_norm": 0.8742236675531365, "learning_rate": 3.543490951546994e-06, "loss": 0.8373, "step": 607 }, { "epoch": 0.01775130653119617, "grad_norm": 0.7374775074256383, "learning_rate": 3.54932866316404e-06, "loss": 0.6721, "step": 608 }, { "epoch": 0.017780502759043533, "grad_norm": 1.3078885464069663, "learning_rate": 3.555166374781086e-06, "loss": 0.7129, "step": 609 }, { "epoch": 0.017809698986890894, "grad_norm": 0.8456902944349215, "learning_rate": 3.5610040863981322e-06, "loss": 0.8101, "step": 610 }, { "epoch": 0.017838895214738255, "grad_norm": 0.8602378102765988, "learning_rate": 3.566841798015178e-06, "loss": 0.8313, "step": 611 }, { "epoch": 0.017868091442585617, "grad_norm": 0.9468955909625191, "learning_rate": 3.5726795096322243e-06, "loss": 0.8017, "step": 612 }, { "epoch": 0.01789728767043298, "grad_norm": 0.867066316923945, "learning_rate": 3.578517221249271e-06, "loss": 0.828, "step": 613 }, { "epoch": 0.017926483898280342, "grad_norm": 0.8148581589065501, "learning_rate": 3.584354932866317e-06, "loss": 0.8122, "step": 614 }, { "epoch": 0.017955680126127704, "grad_norm": 0.8374148232725352, "learning_rate": 3.590192644483363e-06, "loss": 0.7919, "step": 615 }, { "epoch": 0.017984876353975065, "grad_norm": 0.870180694155847, "learning_rate": 3.596030356100409e-06, "loss": 0.8427, "step": 616 }, { "epoch": 0.01801407258182243, "grad_norm": 1.146730830935308, "learning_rate": 3.601868067717455e-06, "loss": 0.8718, "step": 617 }, { "epoch": 0.01804326880966979, "grad_norm": 0.7312393242989018, "learning_rate": 3.607705779334501e-06, "loss": 0.7057, "step": 618 }, { "epoch": 0.018072465037517152, "grad_norm": 0.8391129975617021, "learning_rate": 3.6135434909515473e-06, "loss": 0.8094, "step": 619 }, { "epoch": 0.018101661265364517, "grad_norm": 1.002158064083821, "learning_rate": 3.619381202568593e-06, "loss": 0.8506, "step": 620 }, { "epoch": 0.018130857493211878, "grad_norm": 0.8047040514288685, "learning_rate": 3.62521891418564e-06, "loss": 0.8164, "step": 621 }, { "epoch": 0.01816005372105924, "grad_norm": 0.7795820345719939, "learning_rate": 3.6310566258026856e-06, "loss": 0.7447, "step": 622 }, { "epoch": 0.0181892499489066, "grad_norm": 0.9235169098320659, "learning_rate": 3.636894337419732e-06, "loss": 0.8956, "step": 623 }, { "epoch": 0.018218446176753965, "grad_norm": 0.9917585896698509, "learning_rate": 3.6427320490367777e-06, "loss": 0.8551, "step": 624 }, { "epoch": 0.018247642404601326, "grad_norm": 0.8208580430354655, "learning_rate": 3.648569760653824e-06, "loss": 0.8146, "step": 625 }, { "epoch": 0.018276838632448687, "grad_norm": 0.7768624460578145, "learning_rate": 3.65440747227087e-06, "loss": 0.7033, "step": 626 }, { "epoch": 0.01830603486029605, "grad_norm": 0.8711826151110372, "learning_rate": 3.660245183887916e-06, "loss": 0.7762, "step": 627 }, { "epoch": 0.018335231088143413, "grad_norm": 0.9759421911998584, "learning_rate": 3.666082895504962e-06, "loss": 0.9036, "step": 628 }, { "epoch": 0.018364427315990774, "grad_norm": 0.9496805758068722, "learning_rate": 3.6719206071220086e-06, "loss": 0.8161, "step": 629 }, { "epoch": 0.018393623543838136, "grad_norm": 1.105934933506677, "learning_rate": 3.677758318739055e-06, "loss": 0.7799, "step": 630 }, { "epoch": 0.018422819771685497, "grad_norm": 0.8054295733054088, "learning_rate": 3.6835960303561007e-06, "loss": 0.7606, "step": 631 }, { "epoch": 0.01845201599953286, "grad_norm": 0.9268831342527709, "learning_rate": 3.689433741973147e-06, "loss": 0.8562, "step": 632 }, { "epoch": 0.018481212227380223, "grad_norm": 0.8586640382637812, "learning_rate": 3.6952714535901928e-06, "loss": 0.8472, "step": 633 }, { "epoch": 0.018510408455227584, "grad_norm": 0.885159301593847, "learning_rate": 3.701109165207239e-06, "loss": 0.7924, "step": 634 }, { "epoch": 0.018539604683074945, "grad_norm": 0.9171806744239678, "learning_rate": 3.706946876824285e-06, "loss": 0.8075, "step": 635 }, { "epoch": 0.01856880091092231, "grad_norm": 0.8719106921833991, "learning_rate": 3.712784588441331e-06, "loss": 0.8105, "step": 636 }, { "epoch": 0.01859799713876967, "grad_norm": 0.8654716184184762, "learning_rate": 3.718622300058378e-06, "loss": 0.774, "step": 637 }, { "epoch": 0.018627193366617032, "grad_norm": 0.872839116623353, "learning_rate": 3.7244600116754236e-06, "loss": 0.7624, "step": 638 }, { "epoch": 0.018656389594464397, "grad_norm": 1.2844710127373307, "learning_rate": 3.73029772329247e-06, "loss": 0.8304, "step": 639 }, { "epoch": 0.018685585822311758, "grad_norm": 0.8580566719400848, "learning_rate": 3.7361354349095157e-06, "loss": 0.8506, "step": 640 }, { "epoch": 0.01871478205015912, "grad_norm": 0.8306520689000474, "learning_rate": 3.741973146526562e-06, "loss": 0.8059, "step": 641 }, { "epoch": 0.01874397827800648, "grad_norm": 0.7764876558743075, "learning_rate": 3.747810858143608e-06, "loss": 0.7071, "step": 642 }, { "epoch": 0.018773174505853845, "grad_norm": 0.7968167843545949, "learning_rate": 3.753648569760654e-06, "loss": 0.7826, "step": 643 }, { "epoch": 0.018802370733701206, "grad_norm": 0.7556466729765196, "learning_rate": 3.7594862813777e-06, "loss": 0.6872, "step": 644 }, { "epoch": 0.018831566961548567, "grad_norm": 0.7836664673653787, "learning_rate": 3.7653239929947466e-06, "loss": 0.7596, "step": 645 }, { "epoch": 0.01886076318939593, "grad_norm": 0.9049443037831677, "learning_rate": 3.771161704611793e-06, "loss": 0.7962, "step": 646 }, { "epoch": 0.018889959417243293, "grad_norm": 0.8976392897353493, "learning_rate": 3.7769994162288387e-06, "loss": 0.8318, "step": 647 }, { "epoch": 0.018919155645090655, "grad_norm": 1.1013952279334371, "learning_rate": 3.782837127845885e-06, "loss": 0.8791, "step": 648 }, { "epoch": 0.018948351872938016, "grad_norm": 0.9032068230438992, "learning_rate": 3.7886748394629308e-06, "loss": 0.7871, "step": 649 }, { "epoch": 0.018977548100785377, "grad_norm": 0.9925756777421477, "learning_rate": 3.794512551079977e-06, "loss": 0.9998, "step": 650 }, { "epoch": 0.01900674432863274, "grad_norm": 1.1951008100044618, "learning_rate": 3.800350262697023e-06, "loss": 0.836, "step": 651 }, { "epoch": 0.019035940556480103, "grad_norm": 0.7878935059550516, "learning_rate": 3.806187974314069e-06, "loss": 0.7896, "step": 652 }, { "epoch": 0.019065136784327464, "grad_norm": 0.758676452298372, "learning_rate": 3.8120256859311154e-06, "loss": 0.705, "step": 653 }, { "epoch": 0.01909433301217483, "grad_norm": 0.8655627215197849, "learning_rate": 3.817863397548161e-06, "loss": 0.806, "step": 654 }, { "epoch": 0.01912352924002219, "grad_norm": 0.7993264446697882, "learning_rate": 3.823701109165208e-06, "loss": 0.7919, "step": 655 }, { "epoch": 0.01915272546786955, "grad_norm": 1.241539396847497, "learning_rate": 3.829538820782254e-06, "loss": 0.7784, "step": 656 }, { "epoch": 0.019181921695716912, "grad_norm": 0.9882093654909032, "learning_rate": 3.8353765323992995e-06, "loss": 0.8759, "step": 657 }, { "epoch": 0.019211117923564277, "grad_norm": 0.8488885096798514, "learning_rate": 3.841214244016346e-06, "loss": 0.8622, "step": 658 }, { "epoch": 0.019240314151411638, "grad_norm": 0.8293580197381404, "learning_rate": 3.847051955633392e-06, "loss": 0.7981, "step": 659 }, { "epoch": 0.019269510379259, "grad_norm": 0.8454262012249666, "learning_rate": 3.852889667250438e-06, "loss": 0.8385, "step": 660 }, { "epoch": 0.01929870660710636, "grad_norm": 0.8997888385684651, "learning_rate": 3.8587273788674846e-06, "loss": 0.8186, "step": 661 }, { "epoch": 0.019327902834953725, "grad_norm": 0.8252913537254153, "learning_rate": 3.86456509048453e-06, "loss": 0.7509, "step": 662 }, { "epoch": 0.019357099062801086, "grad_norm": 1.0022405733521462, "learning_rate": 3.870402802101576e-06, "loss": 0.7763, "step": 663 }, { "epoch": 0.019386295290648448, "grad_norm": 0.8190832505013742, "learning_rate": 3.876240513718623e-06, "loss": 0.8917, "step": 664 }, { "epoch": 0.01941549151849581, "grad_norm": 0.768547477569382, "learning_rate": 3.882078225335669e-06, "loss": 0.7148, "step": 665 }, { "epoch": 0.019444687746343173, "grad_norm": 0.8906567377879886, "learning_rate": 3.8879159369527146e-06, "loss": 0.7619, "step": 666 }, { "epoch": 0.019473883974190535, "grad_norm": 0.8123792403295087, "learning_rate": 3.89375364856976e-06, "loss": 0.8065, "step": 667 }, { "epoch": 0.019503080202037896, "grad_norm": 0.8349350657567052, "learning_rate": 3.899591360186807e-06, "loss": 0.7957, "step": 668 }, { "epoch": 0.01953227642988526, "grad_norm": 0.7933369883783337, "learning_rate": 3.905429071803854e-06, "loss": 0.6995, "step": 669 }, { "epoch": 0.01956147265773262, "grad_norm": 0.9263584480961852, "learning_rate": 3.9112667834209e-06, "loss": 0.7988, "step": 670 }, { "epoch": 0.019590668885579983, "grad_norm": 0.9404965397069227, "learning_rate": 3.9171044950379454e-06, "loss": 0.7628, "step": 671 }, { "epoch": 0.019619865113427344, "grad_norm": 0.846350467252358, "learning_rate": 3.922942206654991e-06, "loss": 0.8228, "step": 672 }, { "epoch": 0.01964906134127471, "grad_norm": 0.8556732693589622, "learning_rate": 3.928779918272038e-06, "loss": 0.8226, "step": 673 }, { "epoch": 0.01967825756912207, "grad_norm": 0.87370320935904, "learning_rate": 3.934617629889084e-06, "loss": 0.8441, "step": 674 }, { "epoch": 0.01970745379696943, "grad_norm": 0.8770750581664353, "learning_rate": 3.94045534150613e-06, "loss": 0.7773, "step": 675 }, { "epoch": 0.019736650024816792, "grad_norm": 0.7962532904513457, "learning_rate": 3.9462930531231754e-06, "loss": 0.7485, "step": 676 }, { "epoch": 0.019765846252664157, "grad_norm": 0.71717009755733, "learning_rate": 3.952130764740222e-06, "loss": 0.6404, "step": 677 }, { "epoch": 0.01979504248051152, "grad_norm": 0.900042377068275, "learning_rate": 3.957968476357269e-06, "loss": 0.8349, "step": 678 }, { "epoch": 0.01982423870835888, "grad_norm": 0.8222367676107581, "learning_rate": 3.963806187974315e-06, "loss": 0.7923, "step": 679 }, { "epoch": 0.01985343493620624, "grad_norm": 0.7816462159254123, "learning_rate": 3.9696438995913605e-06, "loss": 0.7211, "step": 680 }, { "epoch": 0.019882631164053605, "grad_norm": 0.8194529502370782, "learning_rate": 3.975481611208406e-06, "loss": 0.7523, "step": 681 }, { "epoch": 0.019911827391900967, "grad_norm": 0.9331793510719626, "learning_rate": 3.981319322825453e-06, "loss": 0.7723, "step": 682 }, { "epoch": 0.019941023619748328, "grad_norm": 0.889644326131974, "learning_rate": 3.987157034442499e-06, "loss": 0.751, "step": 683 }, { "epoch": 0.01997021984759569, "grad_norm": 0.9697899512046428, "learning_rate": 3.992994746059545e-06, "loss": 0.8353, "step": 684 }, { "epoch": 0.019999416075443054, "grad_norm": 0.7779978116759915, "learning_rate": 3.998832457676591e-06, "loss": 0.7811, "step": 685 }, { "epoch": 0.020028612303290415, "grad_norm": 1.3125898880444518, "learning_rate": 4.004670169293637e-06, "loss": 0.9425, "step": 686 }, { "epoch": 0.020057808531137776, "grad_norm": 0.8503933888816293, "learning_rate": 4.010507880910684e-06, "loss": 0.7986, "step": 687 }, { "epoch": 0.02008700475898514, "grad_norm": 0.8297288426839076, "learning_rate": 4.01634559252773e-06, "loss": 0.7974, "step": 688 }, { "epoch": 0.020116200986832502, "grad_norm": 0.8842525793892474, "learning_rate": 4.0221833041447755e-06, "loss": 0.8613, "step": 689 }, { "epoch": 0.020145397214679863, "grad_norm": 0.7876672341836208, "learning_rate": 4.028021015761821e-06, "loss": 0.7308, "step": 690 }, { "epoch": 0.020174593442527224, "grad_norm": 0.7779149682835875, "learning_rate": 4.033858727378868e-06, "loss": 0.7574, "step": 691 }, { "epoch": 0.02020378967037459, "grad_norm": 0.9625089379384056, "learning_rate": 4.039696438995914e-06, "loss": 0.8867, "step": 692 }, { "epoch": 0.02023298589822195, "grad_norm": 0.8537173269832322, "learning_rate": 4.0455341506129605e-06, "loss": 0.875, "step": 693 }, { "epoch": 0.02026218212606931, "grad_norm": 0.8160381782603692, "learning_rate": 4.051371862230006e-06, "loss": 0.7559, "step": 694 }, { "epoch": 0.020291378353916673, "grad_norm": 0.9727893110802304, "learning_rate": 4.057209573847052e-06, "loss": 0.7774, "step": 695 }, { "epoch": 0.020320574581764037, "grad_norm": 0.8554545005340922, "learning_rate": 4.063047285464098e-06, "loss": 0.888, "step": 696 }, { "epoch": 0.0203497708096114, "grad_norm": 0.847870809173948, "learning_rate": 4.068884997081145e-06, "loss": 0.7825, "step": 697 }, { "epoch": 0.02037896703745876, "grad_norm": 0.9978145736419982, "learning_rate": 4.0747227086981905e-06, "loss": 0.7382, "step": 698 }, { "epoch": 0.02040816326530612, "grad_norm": 0.8921098972237529, "learning_rate": 4.080560420315236e-06, "loss": 0.8291, "step": 699 }, { "epoch": 0.020437359493153485, "grad_norm": 0.7807377207322683, "learning_rate": 4.086398131932282e-06, "loss": 0.7379, "step": 700 }, { "epoch": 0.020466555721000847, "grad_norm": 0.9539935896190126, "learning_rate": 4.092235843549329e-06, "loss": 0.8318, "step": 701 }, { "epoch": 0.020495751948848208, "grad_norm": 0.8204274958084666, "learning_rate": 4.0980735551663756e-06, "loss": 0.7535, "step": 702 }, { "epoch": 0.020524948176695573, "grad_norm": 0.792298188208643, "learning_rate": 4.103911266783421e-06, "loss": 0.7849, "step": 703 }, { "epoch": 0.020554144404542934, "grad_norm": 0.8815856223261531, "learning_rate": 4.109748978400467e-06, "loss": 0.8303, "step": 704 }, { "epoch": 0.020583340632390295, "grad_norm": 0.8939641505844537, "learning_rate": 4.115586690017513e-06, "loss": 0.8058, "step": 705 }, { "epoch": 0.020612536860237656, "grad_norm": 0.8135180219663393, "learning_rate": 4.12142440163456e-06, "loss": 0.8115, "step": 706 }, { "epoch": 0.02064173308808502, "grad_norm": 0.8695052098410904, "learning_rate": 4.127262113251606e-06, "loss": 0.8364, "step": 707 }, { "epoch": 0.020670929315932382, "grad_norm": 0.8416720731332392, "learning_rate": 4.133099824868651e-06, "loss": 0.8386, "step": 708 }, { "epoch": 0.020700125543779743, "grad_norm": 1.0019413987825614, "learning_rate": 4.138937536485698e-06, "loss": 0.7787, "step": 709 }, { "epoch": 0.020729321771627104, "grad_norm": 0.8854514741939146, "learning_rate": 4.144775248102744e-06, "loss": 0.8181, "step": 710 }, { "epoch": 0.02075851799947447, "grad_norm": 0.7951273604896211, "learning_rate": 4.150612959719791e-06, "loss": 0.8049, "step": 711 }, { "epoch": 0.02078771422732183, "grad_norm": 0.7794678876788329, "learning_rate": 4.1564506713368364e-06, "loss": 0.7746, "step": 712 }, { "epoch": 0.02081691045516919, "grad_norm": 0.7458733973063848, "learning_rate": 4.162288382953882e-06, "loss": 0.7284, "step": 713 }, { "epoch": 0.020846106683016553, "grad_norm": 0.7652020382636301, "learning_rate": 4.168126094570928e-06, "loss": 0.7478, "step": 714 }, { "epoch": 0.020875302910863917, "grad_norm": 0.7143627548691045, "learning_rate": 4.173963806187975e-06, "loss": 0.7253, "step": 715 }, { "epoch": 0.02090449913871128, "grad_norm": 0.9563939273213026, "learning_rate": 4.179801517805021e-06, "loss": 0.8576, "step": 716 }, { "epoch": 0.02093369536655864, "grad_norm": 0.8567320850131741, "learning_rate": 4.185639229422067e-06, "loss": 0.8112, "step": 717 }, { "epoch": 0.020962891594406004, "grad_norm": 0.8196822174718407, "learning_rate": 4.191476941039113e-06, "loss": 0.7569, "step": 718 }, { "epoch": 0.020992087822253366, "grad_norm": 1.770099711720323, "learning_rate": 4.197314652656159e-06, "loss": 0.6885, "step": 719 }, { "epoch": 0.021021284050100727, "grad_norm": 0.7231297386683663, "learning_rate": 4.203152364273206e-06, "loss": 0.6832, "step": 720 }, { "epoch": 0.021050480277948088, "grad_norm": 0.9017490461879722, "learning_rate": 4.2089900758902515e-06, "loss": 0.9096, "step": 721 }, { "epoch": 0.021079676505795453, "grad_norm": 0.7836017642530628, "learning_rate": 4.214827787507297e-06, "loss": 0.722, "step": 722 }, { "epoch": 0.021108872733642814, "grad_norm": 0.7776909068266974, "learning_rate": 4.220665499124343e-06, "loss": 0.7788, "step": 723 }, { "epoch": 0.021138068961490175, "grad_norm": 0.7668729545259783, "learning_rate": 4.22650321074139e-06, "loss": 0.7378, "step": 724 }, { "epoch": 0.021167265189337536, "grad_norm": 0.8427914509225941, "learning_rate": 4.232340922358436e-06, "loss": 0.7909, "step": 725 }, { "epoch": 0.0211964614171849, "grad_norm": 0.8631371140715862, "learning_rate": 4.238178633975482e-06, "loss": 0.8348, "step": 726 }, { "epoch": 0.021225657645032262, "grad_norm": 0.8675407949590223, "learning_rate": 4.244016345592528e-06, "loss": 0.7715, "step": 727 }, { "epoch": 0.021254853872879623, "grad_norm": 0.9107843351481739, "learning_rate": 4.249854057209574e-06, "loss": 0.7997, "step": 728 }, { "epoch": 0.021284050100726985, "grad_norm": 0.9744924973477838, "learning_rate": 4.255691768826621e-06, "loss": 0.8329, "step": 729 }, { "epoch": 0.02131324632857435, "grad_norm": 0.7987291127741163, "learning_rate": 4.2615294804436665e-06, "loss": 0.7734, "step": 730 }, { "epoch": 0.02134244255642171, "grad_norm": 0.8039499564350819, "learning_rate": 4.267367192060712e-06, "loss": 0.775, "step": 731 }, { "epoch": 0.02137163878426907, "grad_norm": 0.7527425421150665, "learning_rate": 4.273204903677758e-06, "loss": 0.7166, "step": 732 }, { "epoch": 0.021400835012116433, "grad_norm": 0.8999492765947504, "learning_rate": 4.279042615294805e-06, "loss": 0.8389, "step": 733 }, { "epoch": 0.021430031239963798, "grad_norm": 1.1854024392979892, "learning_rate": 4.284880326911851e-06, "loss": 0.8349, "step": 734 }, { "epoch": 0.02145922746781116, "grad_norm": 0.9424912321819716, "learning_rate": 4.290718038528897e-06, "loss": 0.812, "step": 735 }, { "epoch": 0.02148842369565852, "grad_norm": 0.8774485451924353, "learning_rate": 4.296555750145943e-06, "loss": 0.812, "step": 736 }, { "epoch": 0.021517619923505885, "grad_norm": 0.8935964939206945, "learning_rate": 4.302393461762989e-06, "loss": 0.7798, "step": 737 }, { "epoch": 0.021546816151353246, "grad_norm": 0.8369789292757109, "learning_rate": 4.308231173380035e-06, "loss": 0.7457, "step": 738 }, { "epoch": 0.021576012379200607, "grad_norm": 0.7890518165884953, "learning_rate": 4.3140688849970816e-06, "loss": 0.7647, "step": 739 }, { "epoch": 0.021605208607047968, "grad_norm": 0.8808562592404812, "learning_rate": 4.319906596614127e-06, "loss": 0.785, "step": 740 }, { "epoch": 0.021634404834895333, "grad_norm": 0.8129473337943147, "learning_rate": 4.325744308231174e-06, "loss": 0.7504, "step": 741 }, { "epoch": 0.021663601062742694, "grad_norm": 0.8412054729525281, "learning_rate": 4.33158201984822e-06, "loss": 0.7578, "step": 742 }, { "epoch": 0.021692797290590055, "grad_norm": 0.8080761623944667, "learning_rate": 4.337419731465266e-06, "loss": 0.7629, "step": 743 }, { "epoch": 0.021721993518437416, "grad_norm": 0.8849556337441726, "learning_rate": 4.343257443082312e-06, "loss": 0.8283, "step": 744 }, { "epoch": 0.02175118974628478, "grad_norm": 0.8173362595670287, "learning_rate": 4.349095154699358e-06, "loss": 0.7839, "step": 745 }, { "epoch": 0.021780385974132142, "grad_norm": 1.324290783895376, "learning_rate": 4.354932866316404e-06, "loss": 0.7157, "step": 746 }, { "epoch": 0.021809582201979504, "grad_norm": 1.1927225339375338, "learning_rate": 4.36077057793345e-06, "loss": 0.9199, "step": 747 }, { "epoch": 0.021838778429826865, "grad_norm": 0.7461975184059046, "learning_rate": 4.366608289550497e-06, "loss": 0.6952, "step": 748 }, { "epoch": 0.02186797465767423, "grad_norm": 0.8213784948198444, "learning_rate": 4.372446001167543e-06, "loss": 0.8236, "step": 749 }, { "epoch": 0.02189717088552159, "grad_norm": 0.8101802622918147, "learning_rate": 4.378283712784589e-06, "loss": 0.7754, "step": 750 }, { "epoch": 0.021926367113368952, "grad_norm": 0.9007995894740938, "learning_rate": 4.384121424401635e-06, "loss": 0.8311, "step": 751 }, { "epoch": 0.021955563341216316, "grad_norm": 0.8442740360824945, "learning_rate": 4.389959136018681e-06, "loss": 0.824, "step": 752 }, { "epoch": 0.021984759569063678, "grad_norm": 1.059188385872112, "learning_rate": 4.3957968476357275e-06, "loss": 0.7909, "step": 753 }, { "epoch": 0.02201395579691104, "grad_norm": 0.905850725724152, "learning_rate": 4.401634559252773e-06, "loss": 0.8318, "step": 754 }, { "epoch": 0.0220431520247584, "grad_norm": 0.9186914419013129, "learning_rate": 4.407472270869819e-06, "loss": 0.8692, "step": 755 }, { "epoch": 0.022072348252605765, "grad_norm": 0.8391454916831989, "learning_rate": 4.413309982486865e-06, "loss": 0.8131, "step": 756 }, { "epoch": 0.022101544480453126, "grad_norm": 0.8254996581520324, "learning_rate": 4.419147694103912e-06, "loss": 0.6965, "step": 757 }, { "epoch": 0.022130740708300487, "grad_norm": 0.8599033092064141, "learning_rate": 4.424985405720958e-06, "loss": 0.8258, "step": 758 }, { "epoch": 0.02215993693614785, "grad_norm": 0.7209359107271075, "learning_rate": 4.430823117338004e-06, "loss": 0.6073, "step": 759 }, { "epoch": 0.022189133163995213, "grad_norm": 0.8301534589711509, "learning_rate": 4.43666082895505e-06, "loss": 0.7375, "step": 760 }, { "epoch": 0.022218329391842574, "grad_norm": 2.4031099441830457, "learning_rate": 4.442498540572096e-06, "loss": 0.8544, "step": 761 }, { "epoch": 0.022247525619689935, "grad_norm": 0.9519901327653697, "learning_rate": 4.4483362521891425e-06, "loss": 0.735, "step": 762 }, { "epoch": 0.022276721847537297, "grad_norm": 0.9745575074819212, "learning_rate": 4.454173963806188e-06, "loss": 0.8503, "step": 763 }, { "epoch": 0.02230591807538466, "grad_norm": 0.8126684763809636, "learning_rate": 4.460011675423234e-06, "loss": 0.8021, "step": 764 }, { "epoch": 0.022335114303232022, "grad_norm": 0.9363588949741178, "learning_rate": 4.465849387040281e-06, "loss": 0.7582, "step": 765 }, { "epoch": 0.022364310531079384, "grad_norm": 0.8807480934703842, "learning_rate": 4.471687098657327e-06, "loss": 0.8386, "step": 766 }, { "epoch": 0.02239350675892675, "grad_norm": 0.8162461808953861, "learning_rate": 4.4775248102743725e-06, "loss": 0.7765, "step": 767 }, { "epoch": 0.02242270298677411, "grad_norm": 0.870718518656943, "learning_rate": 4.483362521891419e-06, "loss": 0.8025, "step": 768 }, { "epoch": 0.02245189921462147, "grad_norm": 0.8495031895041806, "learning_rate": 4.489200233508465e-06, "loss": 0.7421, "step": 769 }, { "epoch": 0.022481095442468832, "grad_norm": 0.9492914530616855, "learning_rate": 4.495037945125511e-06, "loss": 0.8711, "step": 770 }, { "epoch": 0.022510291670316197, "grad_norm": 1.11372718978747, "learning_rate": 4.500875656742557e-06, "loss": 0.8784, "step": 771 }, { "epoch": 0.022539487898163558, "grad_norm": 0.7919287753169946, "learning_rate": 4.506713368359603e-06, "loss": 0.7325, "step": 772 }, { "epoch": 0.02256868412601092, "grad_norm": 0.8323745740775601, "learning_rate": 4.51255107997665e-06, "loss": 0.7559, "step": 773 }, { "epoch": 0.02259788035385828, "grad_norm": 0.885229323866263, "learning_rate": 4.518388791593696e-06, "loss": 0.8134, "step": 774 }, { "epoch": 0.022627076581705645, "grad_norm": 0.845752870407203, "learning_rate": 4.524226503210742e-06, "loss": 0.737, "step": 775 }, { "epoch": 0.022656272809553006, "grad_norm": 0.7767174807733381, "learning_rate": 4.5300642148277876e-06, "loss": 0.7438, "step": 776 }, { "epoch": 0.022685469037400367, "grad_norm": 0.9405360704114266, "learning_rate": 4.535901926444834e-06, "loss": 0.7921, "step": 777 }, { "epoch": 0.02271466526524773, "grad_norm": 0.8375285116180363, "learning_rate": 4.54173963806188e-06, "loss": 0.8174, "step": 778 }, { "epoch": 0.022743861493095093, "grad_norm": 0.8535817450239066, "learning_rate": 4.547577349678926e-06, "loss": 0.8572, "step": 779 }, { "epoch": 0.022773057720942454, "grad_norm": 0.7524496653639892, "learning_rate": 4.553415061295972e-06, "loss": 0.6559, "step": 780 }, { "epoch": 0.022802253948789816, "grad_norm": 1.112405328838428, "learning_rate": 4.559252772913018e-06, "loss": 0.8041, "step": 781 }, { "epoch": 0.022831450176637177, "grad_norm": 1.156180969415571, "learning_rate": 4.565090484530065e-06, "loss": 0.9133, "step": 782 }, { "epoch": 0.02286064640448454, "grad_norm": 0.8655525013289257, "learning_rate": 4.570928196147111e-06, "loss": 0.8215, "step": 783 }, { "epoch": 0.022889842632331903, "grad_norm": 0.8252959434997407, "learning_rate": 4.576765907764157e-06, "loss": 0.755, "step": 784 }, { "epoch": 0.022919038860179264, "grad_norm": 0.8387777344132993, "learning_rate": 4.582603619381203e-06, "loss": 0.7797, "step": 785 }, { "epoch": 0.02294823508802663, "grad_norm": 0.8175511320702659, "learning_rate": 4.588441330998249e-06, "loss": 0.8334, "step": 786 }, { "epoch": 0.02297743131587399, "grad_norm": 0.9403056229503003, "learning_rate": 4.594279042615295e-06, "loss": 0.8887, "step": 787 }, { "epoch": 0.02300662754372135, "grad_norm": 1.1419884825233264, "learning_rate": 4.600116754232341e-06, "loss": 0.871, "step": 788 }, { "epoch": 0.023035823771568712, "grad_norm": 0.8960805467022387, "learning_rate": 4.605954465849388e-06, "loss": 0.7952, "step": 789 }, { "epoch": 0.023065019999416077, "grad_norm": 0.8234010613900077, "learning_rate": 4.6117921774664335e-06, "loss": 0.799, "step": 790 }, { "epoch": 0.023094216227263438, "grad_norm": 0.8902121611041514, "learning_rate": 4.61762988908348e-06, "loss": 0.7981, "step": 791 }, { "epoch": 0.0231234124551108, "grad_norm": 0.7882493581413269, "learning_rate": 4.623467600700526e-06, "loss": 0.8229, "step": 792 }, { "epoch": 0.02315260868295816, "grad_norm": 0.9023430558952431, "learning_rate": 4.629305312317572e-06, "loss": 0.9079, "step": 793 }, { "epoch": 0.023181804910805525, "grad_norm": 0.7553154556974544, "learning_rate": 4.635143023934618e-06, "loss": 0.6467, "step": 794 }, { "epoch": 0.023211001138652886, "grad_norm": 0.8559378603562147, "learning_rate": 4.640980735551664e-06, "loss": 0.7624, "step": 795 }, { "epoch": 0.023240197366500247, "grad_norm": 0.9008020451234339, "learning_rate": 4.64681844716871e-06, "loss": 0.8346, "step": 796 }, { "epoch": 0.02326939359434761, "grad_norm": 0.8475830141326589, "learning_rate": 4.652656158785757e-06, "loss": 0.8028, "step": 797 }, { "epoch": 0.023298589822194973, "grad_norm": 0.7915531256931171, "learning_rate": 4.658493870402803e-06, "loss": 0.7888, "step": 798 }, { "epoch": 0.023327786050042335, "grad_norm": 0.8062849798182556, "learning_rate": 4.6643315820198485e-06, "loss": 0.7899, "step": 799 }, { "epoch": 0.023356982277889696, "grad_norm": 0.8522308758069833, "learning_rate": 4.670169293636895e-06, "loss": 0.8109, "step": 800 }, { "epoch": 0.02338617850573706, "grad_norm": 0.90696915850295, "learning_rate": 4.676007005253941e-06, "loss": 0.6891, "step": 801 }, { "epoch": 0.02341537473358442, "grad_norm": 0.8320503187934932, "learning_rate": 4.681844716870987e-06, "loss": 0.8396, "step": 802 }, { "epoch": 0.023444570961431783, "grad_norm": 0.9308424813356145, "learning_rate": 4.687682428488033e-06, "loss": 0.7888, "step": 803 }, { "epoch": 0.023473767189279144, "grad_norm": 0.8616182545239437, "learning_rate": 4.693520140105079e-06, "loss": 0.8321, "step": 804 }, { "epoch": 0.02350296341712651, "grad_norm": 0.8365571640229997, "learning_rate": 4.699357851722125e-06, "loss": 0.7116, "step": 805 }, { "epoch": 0.02353215964497387, "grad_norm": 0.8791052794078429, "learning_rate": 4.705195563339172e-06, "loss": 0.8123, "step": 806 }, { "epoch": 0.02356135587282123, "grad_norm": 0.7717844139304559, "learning_rate": 4.711033274956218e-06, "loss": 0.7589, "step": 807 }, { "epoch": 0.023590552100668592, "grad_norm": 0.7483474717015017, "learning_rate": 4.7168709865732635e-06, "loss": 0.6849, "step": 808 }, { "epoch": 0.023619748328515957, "grad_norm": 0.9103953862355613, "learning_rate": 4.722708698190309e-06, "loss": 0.787, "step": 809 }, { "epoch": 0.023648944556363318, "grad_norm": 1.0089462673008225, "learning_rate": 4.728546409807356e-06, "loss": 0.8636, "step": 810 }, { "epoch": 0.02367814078421068, "grad_norm": 0.868236427687412, "learning_rate": 4.734384121424402e-06, "loss": 0.8707, "step": 811 }, { "epoch": 0.02370733701205804, "grad_norm": 0.8127302884102787, "learning_rate": 4.740221833041448e-06, "loss": 0.7347, "step": 812 }, { "epoch": 0.023736533239905405, "grad_norm": 0.7888845791505726, "learning_rate": 4.746059544658494e-06, "loss": 0.7347, "step": 813 }, { "epoch": 0.023765729467752766, "grad_norm": 0.8248432651013637, "learning_rate": 4.75189725627554e-06, "loss": 0.8153, "step": 814 }, { "epoch": 0.023794925695600128, "grad_norm": 0.8039847917936834, "learning_rate": 4.757734967892587e-06, "loss": 0.7686, "step": 815 }, { "epoch": 0.023824121923447492, "grad_norm": 0.9716428533203455, "learning_rate": 4.763572679509633e-06, "loss": 0.7976, "step": 816 }, { "epoch": 0.023853318151294853, "grad_norm": 1.0321979214055463, "learning_rate": 4.7694103911266786e-06, "loss": 0.8061, "step": 817 }, { "epoch": 0.023882514379142215, "grad_norm": 0.8045557145377803, "learning_rate": 4.775248102743724e-06, "loss": 0.7442, "step": 818 }, { "epoch": 0.023911710606989576, "grad_norm": 0.7946732723836759, "learning_rate": 4.781085814360771e-06, "loss": 0.7971, "step": 819 }, { "epoch": 0.02394090683483694, "grad_norm": 0.7444257633834778, "learning_rate": 4.786923525977817e-06, "loss": 0.6391, "step": 820 }, { "epoch": 0.023970103062684302, "grad_norm": 0.7487689141626156, "learning_rate": 4.792761237594864e-06, "loss": 0.6313, "step": 821 }, { "epoch": 0.023999299290531663, "grad_norm": 0.946964387930108, "learning_rate": 4.7985989492119094e-06, "loss": 0.7769, "step": 822 }, { "epoch": 0.024028495518379024, "grad_norm": 0.8523305869812338, "learning_rate": 4.804436660828955e-06, "loss": 0.7667, "step": 823 }, { "epoch": 0.02405769174622639, "grad_norm": 0.761013838605807, "learning_rate": 4.810274372446002e-06, "loss": 0.7619, "step": 824 }, { "epoch": 0.02408688797407375, "grad_norm": 0.796772230736654, "learning_rate": 4.816112084063048e-06, "loss": 0.7323, "step": 825 }, { "epoch": 0.02411608420192111, "grad_norm": 0.8522973750269099, "learning_rate": 4.821949795680094e-06, "loss": 0.8084, "step": 826 }, { "epoch": 0.024145280429768472, "grad_norm": 0.8340015262182333, "learning_rate": 4.8277875072971394e-06, "loss": 0.7869, "step": 827 }, { "epoch": 0.024174476657615837, "grad_norm": 0.8232337679373121, "learning_rate": 4.833625218914186e-06, "loss": 0.7753, "step": 828 }, { "epoch": 0.0242036728854632, "grad_norm": 0.8106772575346382, "learning_rate": 4.839462930531233e-06, "loss": 0.8074, "step": 829 }, { "epoch": 0.02423286911331056, "grad_norm": 0.7998152447819277, "learning_rate": 4.845300642148279e-06, "loss": 0.7457, "step": 830 }, { "epoch": 0.02426206534115792, "grad_norm": 0.7810349993920641, "learning_rate": 4.8511383537653245e-06, "loss": 0.732, "step": 831 }, { "epoch": 0.024291261569005285, "grad_norm": 0.8185644933252598, "learning_rate": 4.85697606538237e-06, "loss": 0.7655, "step": 832 }, { "epoch": 0.024320457796852647, "grad_norm": 0.7943465602170499, "learning_rate": 4.862813776999417e-06, "loss": 0.7131, "step": 833 }, { "epoch": 0.024349654024700008, "grad_norm": 0.8612344548683113, "learning_rate": 4.868651488616463e-06, "loss": 0.7368, "step": 834 }, { "epoch": 0.024378850252547372, "grad_norm": 0.8948467590250098, "learning_rate": 4.874489200233509e-06, "loss": 0.8366, "step": 835 }, { "epoch": 0.024408046480394734, "grad_norm": 0.8139207289661449, "learning_rate": 4.8803269118505545e-06, "loss": 0.7599, "step": 836 }, { "epoch": 0.024437242708242095, "grad_norm": 0.8258009847737101, "learning_rate": 4.886164623467601e-06, "loss": 0.8378, "step": 837 }, { "epoch": 0.024466438936089456, "grad_norm": 1.061860959634065, "learning_rate": 4.892002335084647e-06, "loss": 0.8029, "step": 838 }, { "epoch": 0.02449563516393682, "grad_norm": 0.950396291399263, "learning_rate": 4.897840046701694e-06, "loss": 0.8791, "step": 839 }, { "epoch": 0.024524831391784182, "grad_norm": 0.7683535324126776, "learning_rate": 4.9036777583187395e-06, "loss": 0.809, "step": 840 }, { "epoch": 0.024554027619631543, "grad_norm": 1.2529557486061806, "learning_rate": 4.909515469935785e-06, "loss": 0.8825, "step": 841 }, { "epoch": 0.024583223847478904, "grad_norm": 0.8631755320438411, "learning_rate": 4.915353181552831e-06, "loss": 0.8265, "step": 842 }, { "epoch": 0.02461242007532627, "grad_norm": 0.78354783640571, "learning_rate": 4.921190893169878e-06, "loss": 0.8199, "step": 843 }, { "epoch": 0.02464161630317363, "grad_norm": 0.7899589124391064, "learning_rate": 4.927028604786924e-06, "loss": 0.8031, "step": 844 }, { "epoch": 0.02467081253102099, "grad_norm": 0.8673926885526506, "learning_rate": 4.93286631640397e-06, "loss": 0.8441, "step": 845 }, { "epoch": 0.024700008758868353, "grad_norm": 0.8070574458918119, "learning_rate": 4.938704028021016e-06, "loss": 0.7696, "step": 846 }, { "epoch": 0.024729204986715717, "grad_norm": 0.7498142769540546, "learning_rate": 4.944541739638062e-06, "loss": 0.7021, "step": 847 }, { "epoch": 0.02475840121456308, "grad_norm": 0.991707549024212, "learning_rate": 4.950379451255109e-06, "loss": 0.8923, "step": 848 }, { "epoch": 0.02478759744241044, "grad_norm": 0.882556491562639, "learning_rate": 4.9562171628721545e-06, "loss": 0.8563, "step": 849 }, { "epoch": 0.024816793670257804, "grad_norm": 0.8644647392373351, "learning_rate": 4.9620548744892e-06, "loss": 0.7527, "step": 850 }, { "epoch": 0.024845989898105166, "grad_norm": 0.9948288690742758, "learning_rate": 4.967892586106246e-06, "loss": 0.7932, "step": 851 }, { "epoch": 0.024875186125952527, "grad_norm": 0.875483646302961, "learning_rate": 4.973730297723293e-06, "loss": 0.7725, "step": 852 }, { "epoch": 0.024904382353799888, "grad_norm": 0.9838933518671846, "learning_rate": 4.9795680093403396e-06, "loss": 0.9389, "step": 853 }, { "epoch": 0.024933578581647253, "grad_norm": 0.8245994638705136, "learning_rate": 4.985405720957385e-06, "loss": 0.8183, "step": 854 }, { "epoch": 0.024962774809494614, "grad_norm": 0.79933827323707, "learning_rate": 4.991243432574431e-06, "loss": 0.7347, "step": 855 }, { "epoch": 0.024991971037341975, "grad_norm": 0.9452664480934503, "learning_rate": 4.997081144191477e-06, "loss": 0.8315, "step": 856 }, { "epoch": 0.025021167265189336, "grad_norm": 1.051658752955085, "learning_rate": 5.002918855808524e-06, "loss": 0.9092, "step": 857 }, { "epoch": 0.0250503634930367, "grad_norm": 0.7929673374942497, "learning_rate": 5.0087565674255704e-06, "loss": 0.7247, "step": 858 }, { "epoch": 0.025079559720884062, "grad_norm": 0.78082655069239, "learning_rate": 5.014594279042616e-06, "loss": 0.7265, "step": 859 }, { "epoch": 0.025108755948731423, "grad_norm": 0.776921455112756, "learning_rate": 5.020431990659662e-06, "loss": 0.8018, "step": 860 }, { "epoch": 0.025137952176578784, "grad_norm": 0.8363755481794446, "learning_rate": 5.026269702276708e-06, "loss": 0.7243, "step": 861 }, { "epoch": 0.02516714840442615, "grad_norm": 0.7535169735295123, "learning_rate": 5.032107413893755e-06, "loss": 0.784, "step": 862 }, { "epoch": 0.02519634463227351, "grad_norm": 0.7968591961220256, "learning_rate": 5.0379451255108004e-06, "loss": 0.7165, "step": 863 }, { "epoch": 0.02522554086012087, "grad_norm": 0.9297223315989275, "learning_rate": 5.043782837127846e-06, "loss": 0.8254, "step": 864 }, { "epoch": 0.025254737087968236, "grad_norm": 0.8445764472662123, "learning_rate": 5.049620548744892e-06, "loss": 0.7222, "step": 865 }, { "epoch": 0.025283933315815597, "grad_norm": 0.896573270587015, "learning_rate": 5.055458260361939e-06, "loss": 0.8124, "step": 866 }, { "epoch": 0.02531312954366296, "grad_norm": 0.9359382180493944, "learning_rate": 5.061295971978985e-06, "loss": 0.9728, "step": 867 }, { "epoch": 0.02534232577151032, "grad_norm": 0.7818230990424968, "learning_rate": 5.0671336835960305e-06, "loss": 0.8047, "step": 868 }, { "epoch": 0.025371521999357684, "grad_norm": 0.8583276190791488, "learning_rate": 5.072971395213076e-06, "loss": 0.7159, "step": 869 }, { "epoch": 0.025400718227205046, "grad_norm": 0.9068763565653893, "learning_rate": 5.078809106830123e-06, "loss": 0.8373, "step": 870 }, { "epoch": 0.025429914455052407, "grad_norm": 0.8780056750564207, "learning_rate": 5.084646818447169e-06, "loss": 0.7634, "step": 871 }, { "epoch": 0.025459110682899768, "grad_norm": 0.7866307098463916, "learning_rate": 5.090484530064215e-06, "loss": 0.762, "step": 872 }, { "epoch": 0.025488306910747133, "grad_norm": 0.7834327357629658, "learning_rate": 5.096322241681262e-06, "loss": 0.7426, "step": 873 }, { "epoch": 0.025517503138594494, "grad_norm": 0.7850622527043111, "learning_rate": 5.102159953298308e-06, "loss": 0.7055, "step": 874 }, { "epoch": 0.025546699366441855, "grad_norm": 0.838246485541101, "learning_rate": 5.107997664915354e-06, "loss": 0.8054, "step": 875 }, { "epoch": 0.025575895594289216, "grad_norm": 0.7707790783813018, "learning_rate": 5.1138353765324e-06, "loss": 0.7191, "step": 876 }, { "epoch": 0.02560509182213658, "grad_norm": 0.8785912060388666, "learning_rate": 5.119673088149446e-06, "loss": 0.8141, "step": 877 }, { "epoch": 0.025634288049983942, "grad_norm": 0.786466639760328, "learning_rate": 5.125510799766492e-06, "loss": 0.7152, "step": 878 }, { "epoch": 0.025663484277831303, "grad_norm": 0.8627592680073912, "learning_rate": 5.131348511383538e-06, "loss": 0.8082, "step": 879 }, { "epoch": 0.025692680505678665, "grad_norm": 0.7625406307589245, "learning_rate": 5.137186223000584e-06, "loss": 0.7327, "step": 880 }, { "epoch": 0.02572187673352603, "grad_norm": 0.7040185450987981, "learning_rate": 5.1430239346176305e-06, "loss": 0.6079, "step": 881 }, { "epoch": 0.02575107296137339, "grad_norm": 0.8985119319540202, "learning_rate": 5.148861646234676e-06, "loss": 0.8139, "step": 882 }, { "epoch": 0.02578026918922075, "grad_norm": 0.7539655984203286, "learning_rate": 5.154699357851722e-06, "loss": 0.6714, "step": 883 }, { "epoch": 0.025809465417068116, "grad_norm": 0.7879026695365021, "learning_rate": 5.160537069468768e-06, "loss": 0.7857, "step": 884 }, { "epoch": 0.025838661644915478, "grad_norm": 0.8687084532685282, "learning_rate": 5.166374781085815e-06, "loss": 0.7349, "step": 885 }, { "epoch": 0.02586785787276284, "grad_norm": 0.7910881357214989, "learning_rate": 5.1722124927028605e-06, "loss": 0.7023, "step": 886 }, { "epoch": 0.0258970541006102, "grad_norm": 0.8452585625430994, "learning_rate": 5.178050204319906e-06, "loss": 0.7334, "step": 887 }, { "epoch": 0.025926250328457565, "grad_norm": 0.7731276924708111, "learning_rate": 5.183887915936952e-06, "loss": 0.7223, "step": 888 }, { "epoch": 0.025955446556304926, "grad_norm": 0.742445308850094, "learning_rate": 5.189725627554e-06, "loss": 0.6918, "step": 889 }, { "epoch": 0.025984642784152287, "grad_norm": 0.7809375518677178, "learning_rate": 5.1955633391710456e-06, "loss": 0.726, "step": 890 }, { "epoch": 0.026013839011999648, "grad_norm": 1.0089018593434589, "learning_rate": 5.201401050788092e-06, "loss": 0.7744, "step": 891 }, { "epoch": 0.026043035239847013, "grad_norm": 0.8169483829727545, "learning_rate": 5.207238762405138e-06, "loss": 0.8101, "step": 892 }, { "epoch": 0.026072231467694374, "grad_norm": 0.7995411512263761, "learning_rate": 5.213076474022184e-06, "loss": 0.748, "step": 893 }, { "epoch": 0.026101427695541735, "grad_norm": 0.7892311076843453, "learning_rate": 5.21891418563923e-06, "loss": 0.7331, "step": 894 }, { "epoch": 0.026130623923389096, "grad_norm": 1.0799246302150363, "learning_rate": 5.224751897256276e-06, "loss": 0.8359, "step": 895 }, { "epoch": 0.02615982015123646, "grad_norm": 0.7747507104294555, "learning_rate": 5.230589608873322e-06, "loss": 0.7845, "step": 896 }, { "epoch": 0.026189016379083822, "grad_norm": 0.9171780707026127, "learning_rate": 5.236427320490368e-06, "loss": 0.8303, "step": 897 }, { "epoch": 0.026218212606931184, "grad_norm": 0.7694185822538869, "learning_rate": 5.242265032107414e-06, "loss": 0.7102, "step": 898 }, { "epoch": 0.026247408834778548, "grad_norm": 0.7749855790390859, "learning_rate": 5.248102743724461e-06, "loss": 0.6558, "step": 899 }, { "epoch": 0.02627660506262591, "grad_norm": 0.8132619946621182, "learning_rate": 5.2539404553415064e-06, "loss": 0.7566, "step": 900 }, { "epoch": 0.02630580129047327, "grad_norm": 1.4694054028355024, "learning_rate": 5.259778166958552e-06, "loss": 0.891, "step": 901 }, { "epoch": 0.026334997518320632, "grad_norm": 0.8041071623311109, "learning_rate": 5.265615878575598e-06, "loss": 0.7355, "step": 902 }, { "epoch": 0.026364193746167996, "grad_norm": 0.8136843557449224, "learning_rate": 5.271453590192645e-06, "loss": 0.7705, "step": 903 }, { "epoch": 0.026393389974015358, "grad_norm": 0.8098269541467666, "learning_rate": 5.277291301809691e-06, "loss": 0.762, "step": 904 }, { "epoch": 0.02642258620186272, "grad_norm": 0.8958135438586189, "learning_rate": 5.283129013426737e-06, "loss": 0.7988, "step": 905 }, { "epoch": 0.02645178242971008, "grad_norm": 0.8326918383202806, "learning_rate": 5.288966725043784e-06, "loss": 0.7575, "step": 906 }, { "epoch": 0.026480978657557445, "grad_norm": 0.8683493330145541, "learning_rate": 5.29480443666083e-06, "loss": 0.724, "step": 907 }, { "epoch": 0.026510174885404806, "grad_norm": 0.8260635051459112, "learning_rate": 5.300642148277876e-06, "loss": 0.8092, "step": 908 }, { "epoch": 0.026539371113252167, "grad_norm": 1.2219827485835084, "learning_rate": 5.3064798598949215e-06, "loss": 0.9465, "step": 909 }, { "epoch": 0.02656856734109953, "grad_norm": 0.8114517140691411, "learning_rate": 5.312317571511968e-06, "loss": 0.6679, "step": 910 }, { "epoch": 0.026597763568946893, "grad_norm": 0.7434188388132242, "learning_rate": 5.318155283129014e-06, "loss": 0.6952, "step": 911 }, { "epoch": 0.026626959796794254, "grad_norm": 0.8871440175803664, "learning_rate": 5.32399299474606e-06, "loss": 0.762, "step": 912 }, { "epoch": 0.026656156024641615, "grad_norm": 0.7685415682625882, "learning_rate": 5.329830706363106e-06, "loss": 0.7887, "step": 913 }, { "epoch": 0.02668535225248898, "grad_norm": 0.8476406532657279, "learning_rate": 5.335668417980152e-06, "loss": 0.8554, "step": 914 }, { "epoch": 0.02671454848033634, "grad_norm": 0.8300614763788092, "learning_rate": 5.341506129597198e-06, "loss": 0.7777, "step": 915 }, { "epoch": 0.026743744708183703, "grad_norm": 1.1288005860422075, "learning_rate": 5.347343841214244e-06, "loss": 0.811, "step": 916 }, { "epoch": 0.026772940936031064, "grad_norm": 0.9447589878837841, "learning_rate": 5.353181552831291e-06, "loss": 0.7553, "step": 917 }, { "epoch": 0.02680213716387843, "grad_norm": 0.8397822639045598, "learning_rate": 5.3590192644483365e-06, "loss": 0.8111, "step": 918 }, { "epoch": 0.02683133339172579, "grad_norm": 0.8169198342562662, "learning_rate": 5.364856976065382e-06, "loss": 0.8164, "step": 919 }, { "epoch": 0.02686052961957315, "grad_norm": 0.7927447978408886, "learning_rate": 5.370694687682428e-06, "loss": 0.6656, "step": 920 }, { "epoch": 0.026889725847420512, "grad_norm": 0.7233425872123116, "learning_rate": 5.376532399299476e-06, "loss": 0.6738, "step": 921 }, { "epoch": 0.026918922075267877, "grad_norm": 0.8713561661943813, "learning_rate": 5.3823701109165215e-06, "loss": 0.8865, "step": 922 }, { "epoch": 0.026948118303115238, "grad_norm": 0.7658182703171971, "learning_rate": 5.388207822533567e-06, "loss": 0.7541, "step": 923 }, { "epoch": 0.0269773145309626, "grad_norm": 0.7332545643899883, "learning_rate": 5.394045534150614e-06, "loss": 0.7085, "step": 924 }, { "epoch": 0.02700651075880996, "grad_norm": 0.8139409045564986, "learning_rate": 5.39988324576766e-06, "loss": 0.7856, "step": 925 }, { "epoch": 0.027035706986657325, "grad_norm": 1.0218460754825873, "learning_rate": 5.405720957384706e-06, "loss": 0.7269, "step": 926 }, { "epoch": 0.027064903214504686, "grad_norm": 0.794447487068216, "learning_rate": 5.4115586690017515e-06, "loss": 0.7582, "step": 927 }, { "epoch": 0.027094099442352047, "grad_norm": 0.8039973544904279, "learning_rate": 5.417396380618798e-06, "loss": 0.7814, "step": 928 }, { "epoch": 0.02712329567019941, "grad_norm": 0.7732396912328477, "learning_rate": 5.423234092235844e-06, "loss": 0.7817, "step": 929 }, { "epoch": 0.027152491898046773, "grad_norm": 3.194399597015439, "learning_rate": 5.42907180385289e-06, "loss": 0.9018, "step": 930 }, { "epoch": 0.027181688125894134, "grad_norm": 0.8506378953433954, "learning_rate": 5.434909515469936e-06, "loss": 0.865, "step": 931 }, { "epoch": 0.027210884353741496, "grad_norm": 0.9470663457510532, "learning_rate": 5.440747227086982e-06, "loss": 0.7613, "step": 932 }, { "epoch": 0.02724008058158886, "grad_norm": 0.7247593472328796, "learning_rate": 5.446584938704028e-06, "loss": 0.6655, "step": 933 }, { "epoch": 0.02726927680943622, "grad_norm": 0.9061290613927946, "learning_rate": 5.452422650321074e-06, "loss": 0.872, "step": 934 }, { "epoch": 0.027298473037283583, "grad_norm": 0.7839130805578038, "learning_rate": 5.45826036193812e-06, "loss": 0.7346, "step": 935 }, { "epoch": 0.027327669265130944, "grad_norm": 0.8960811275017274, "learning_rate": 5.464098073555167e-06, "loss": 0.826, "step": 936 }, { "epoch": 0.02735686549297831, "grad_norm": 0.7790398647202731, "learning_rate": 5.469935785172213e-06, "loss": 0.6222, "step": 937 }, { "epoch": 0.02738606172082567, "grad_norm": 0.9357625497777345, "learning_rate": 5.475773496789259e-06, "loss": 0.8092, "step": 938 }, { "epoch": 0.02741525794867303, "grad_norm": 0.9905404758514224, "learning_rate": 5.481611208406306e-06, "loss": 0.7987, "step": 939 }, { "epoch": 0.027444454176520392, "grad_norm": 0.8320362573640656, "learning_rate": 5.487448920023352e-06, "loss": 0.7192, "step": 940 }, { "epoch": 0.027473650404367757, "grad_norm": 0.8550436388573671, "learning_rate": 5.4932866316403974e-06, "loss": 0.7632, "step": 941 }, { "epoch": 0.027502846632215118, "grad_norm": 1.0646727942988647, "learning_rate": 5.499124343257444e-06, "loss": 0.8931, "step": 942 }, { "epoch": 0.02753204286006248, "grad_norm": 0.811395713363799, "learning_rate": 5.50496205487449e-06, "loss": 0.8374, "step": 943 }, { "epoch": 0.02756123908790984, "grad_norm": 0.8432171143209308, "learning_rate": 5.510799766491536e-06, "loss": 0.7435, "step": 944 }, { "epoch": 0.027590435315757205, "grad_norm": 0.8095617368186871, "learning_rate": 5.516637478108582e-06, "loss": 0.7888, "step": 945 }, { "epoch": 0.027619631543604566, "grad_norm": 0.8400286907017996, "learning_rate": 5.522475189725628e-06, "loss": 0.7684, "step": 946 }, { "epoch": 0.027648827771451927, "grad_norm": 0.7819529019645477, "learning_rate": 5.528312901342674e-06, "loss": 0.7911, "step": 947 }, { "epoch": 0.027678023999299292, "grad_norm": 0.8224849648465071, "learning_rate": 5.53415061295972e-06, "loss": 0.7852, "step": 948 }, { "epoch": 0.027707220227146653, "grad_norm": 0.7316439752013962, "learning_rate": 5.539988324576766e-06, "loss": 0.6541, "step": 949 }, { "epoch": 0.027736416454994015, "grad_norm": 0.8448637767911725, "learning_rate": 5.5458260361938125e-06, "loss": 0.856, "step": 950 }, { "epoch": 0.027765612682841376, "grad_norm": 0.7584095318566301, "learning_rate": 5.551663747810858e-06, "loss": 0.6913, "step": 951 }, { "epoch": 0.02779480891068874, "grad_norm": 0.8485581614552405, "learning_rate": 5.557501459427904e-06, "loss": 0.8606, "step": 952 }, { "epoch": 0.0278240051385361, "grad_norm": 0.7971699866397618, "learning_rate": 5.563339171044952e-06, "loss": 0.824, "step": 953 }, { "epoch": 0.027853201366383463, "grad_norm": 1.0695938273981072, "learning_rate": 5.5691768826619975e-06, "loss": 0.9245, "step": 954 }, { "epoch": 0.027882397594230824, "grad_norm": 0.8569096442988396, "learning_rate": 5.575014594279043e-06, "loss": 0.8444, "step": 955 }, { "epoch": 0.02791159382207819, "grad_norm": 0.802083075342316, "learning_rate": 5.580852305896089e-06, "loss": 0.7549, "step": 956 }, { "epoch": 0.02794079004992555, "grad_norm": 0.7955416254362276, "learning_rate": 5.586690017513136e-06, "loss": 0.744, "step": 957 }, { "epoch": 0.02796998627777291, "grad_norm": 0.7772816783563355, "learning_rate": 5.592527729130182e-06, "loss": 0.7653, "step": 958 }, { "epoch": 0.027999182505620272, "grad_norm": 0.8366818909479259, "learning_rate": 5.5983654407472275e-06, "loss": 0.7356, "step": 959 }, { "epoch": 0.028028378733467637, "grad_norm": 0.8836786747681377, "learning_rate": 5.604203152364273e-06, "loss": 0.7428, "step": 960 }, { "epoch": 0.028057574961314998, "grad_norm": 0.7624435858407653, "learning_rate": 5.61004086398132e-06, "loss": 0.7431, "step": 961 }, { "epoch": 0.02808677118916236, "grad_norm": 0.8053261700847405, "learning_rate": 5.615878575598366e-06, "loss": 0.7975, "step": 962 }, { "epoch": 0.028115967417009724, "grad_norm": 0.8423611630877086, "learning_rate": 5.621716287215412e-06, "loss": 0.7502, "step": 963 }, { "epoch": 0.028145163644857085, "grad_norm": 0.8340316866231043, "learning_rate": 5.6275539988324575e-06, "loss": 0.7933, "step": 964 }, { "epoch": 0.028174359872704446, "grad_norm": 0.7881713369411603, "learning_rate": 5.633391710449504e-06, "loss": 0.749, "step": 965 }, { "epoch": 0.028203556100551808, "grad_norm": 0.8711728136946126, "learning_rate": 5.63922942206655e-06, "loss": 0.8591, "step": 966 }, { "epoch": 0.028232752328399172, "grad_norm": 0.7867242824214487, "learning_rate": 5.645067133683596e-06, "loss": 0.7788, "step": 967 }, { "epoch": 0.028261948556246533, "grad_norm": 0.9613988213492863, "learning_rate": 5.650904845300642e-06, "loss": 0.8155, "step": 968 }, { "epoch": 0.028291144784093895, "grad_norm": 0.9006237552628891, "learning_rate": 5.656742556917689e-06, "loss": 0.7952, "step": 969 }, { "epoch": 0.028320341011941256, "grad_norm": 0.8386966691189959, "learning_rate": 5.662580268534735e-06, "loss": 0.7809, "step": 970 }, { "epoch": 0.02834953723978862, "grad_norm": 0.774336365766048, "learning_rate": 5.668417980151782e-06, "loss": 0.7381, "step": 971 }, { "epoch": 0.028378733467635982, "grad_norm": 0.8322791220774532, "learning_rate": 5.674255691768828e-06, "loss": 0.8524, "step": 972 }, { "epoch": 0.028407929695483343, "grad_norm": 0.8425243665746769, "learning_rate": 5.680093403385873e-06, "loss": 0.8368, "step": 973 }, { "epoch": 0.028437125923330704, "grad_norm": 0.8577407158550109, "learning_rate": 5.685931115002919e-06, "loss": 0.8012, "step": 974 }, { "epoch": 0.02846632215117807, "grad_norm": 0.7770696193271431, "learning_rate": 5.691768826619966e-06, "loss": 0.8038, "step": 975 }, { "epoch": 0.02849551837902543, "grad_norm": 0.7734183876383772, "learning_rate": 5.697606538237012e-06, "loss": 0.7158, "step": 976 }, { "epoch": 0.02852471460687279, "grad_norm": 0.9941518958347457, "learning_rate": 5.703444249854058e-06, "loss": 0.8066, "step": 977 }, { "epoch": 0.028553910834720152, "grad_norm": 0.8214690690165739, "learning_rate": 5.7092819614711034e-06, "loss": 0.7767, "step": 978 }, { "epoch": 0.028583107062567517, "grad_norm": 0.9971504898680313, "learning_rate": 5.71511967308815e-06, "loss": 0.9112, "step": 979 }, { "epoch": 0.02861230329041488, "grad_norm": 0.7637938107679187, "learning_rate": 5.720957384705196e-06, "loss": 0.6865, "step": 980 }, { "epoch": 0.02864149951826224, "grad_norm": 0.7899740342297679, "learning_rate": 5.726795096322242e-06, "loss": 0.7719, "step": 981 }, { "epoch": 0.028670695746109604, "grad_norm": 0.8738799350093002, "learning_rate": 5.732632807939288e-06, "loss": 0.8559, "step": 982 }, { "epoch": 0.028699891973956965, "grad_norm": 1.1064042905892773, "learning_rate": 5.738470519556334e-06, "loss": 0.7425, "step": 983 }, { "epoch": 0.028729088201804327, "grad_norm": 0.8301315190635923, "learning_rate": 5.74430823117338e-06, "loss": 0.7303, "step": 984 }, { "epoch": 0.028758284429651688, "grad_norm": 0.8489163654795845, "learning_rate": 5.750145942790427e-06, "loss": 0.8026, "step": 985 }, { "epoch": 0.028787480657499052, "grad_norm": 0.7994993617680403, "learning_rate": 5.7559836544074735e-06, "loss": 0.7767, "step": 986 }, { "epoch": 0.028816676885346414, "grad_norm": 0.7404662819674268, "learning_rate": 5.761821366024519e-06, "loss": 0.6842, "step": 987 }, { "epoch": 0.028845873113193775, "grad_norm": 0.8118788256854849, "learning_rate": 5.767659077641565e-06, "loss": 0.8388, "step": 988 }, { "epoch": 0.028875069341041136, "grad_norm": 0.7643212835053182, "learning_rate": 5.773496789258611e-06, "loss": 0.6739, "step": 989 }, { "epoch": 0.0289042655688885, "grad_norm": 0.8054985069419528, "learning_rate": 5.779334500875658e-06, "loss": 0.7832, "step": 990 }, { "epoch": 0.028933461796735862, "grad_norm": 0.7530590877226683, "learning_rate": 5.7851722124927035e-06, "loss": 0.7099, "step": 991 }, { "epoch": 0.028962658024583223, "grad_norm": 0.8329996677485317, "learning_rate": 5.791009924109749e-06, "loss": 0.7575, "step": 992 }, { "epoch": 0.028991854252430584, "grad_norm": 0.7972572443883638, "learning_rate": 5.796847635726795e-06, "loss": 0.7807, "step": 993 }, { "epoch": 0.02902105048027795, "grad_norm": 0.9278485775181281, "learning_rate": 5.802685347343842e-06, "loss": 0.7254, "step": 994 }, { "epoch": 0.02905024670812531, "grad_norm": 0.8296447359787906, "learning_rate": 5.808523058960888e-06, "loss": 0.7795, "step": 995 }, { "epoch": 0.02907944293597267, "grad_norm": 1.0755944554945056, "learning_rate": 5.8143607705779335e-06, "loss": 0.8307, "step": 996 }, { "epoch": 0.029108639163820036, "grad_norm": 0.8565132748241755, "learning_rate": 5.820198482194979e-06, "loss": 0.7936, "step": 997 }, { "epoch": 0.029137835391667397, "grad_norm": 0.8576045652404868, "learning_rate": 5.826036193812026e-06, "loss": 0.8475, "step": 998 }, { "epoch": 0.02916703161951476, "grad_norm": 0.8695108646614281, "learning_rate": 5.831873905429072e-06, "loss": 0.7895, "step": 999 }, { "epoch": 0.02919622784736212, "grad_norm": 0.7296935379066872, "learning_rate": 5.837711617046118e-06, "loss": 0.641, "step": 1000 }, { "epoch": 0.029225424075209484, "grad_norm": 0.840673045100782, "learning_rate": 5.843549328663165e-06, "loss": 0.6362, "step": 1001 }, { "epoch": 0.029254620303056846, "grad_norm": 1.945830552564916, "learning_rate": 5.849387040280211e-06, "loss": 0.7842, "step": 1002 }, { "epoch": 0.029283816530904207, "grad_norm": 0.7891703330829679, "learning_rate": 5.855224751897257e-06, "loss": 0.7196, "step": 1003 }, { "epoch": 0.029313012758751568, "grad_norm": 0.8284509012700669, "learning_rate": 5.8610624635143036e-06, "loss": 0.785, "step": 1004 }, { "epoch": 0.029342208986598933, "grad_norm": 0.7512548467260903, "learning_rate": 5.866900175131349e-06, "loss": 0.6553, "step": 1005 }, { "epoch": 0.029371405214446294, "grad_norm": 0.7723458697470882, "learning_rate": 5.872737886748395e-06, "loss": 0.7367, "step": 1006 }, { "epoch": 0.029400601442293655, "grad_norm": 0.9195946456134794, "learning_rate": 5.878575598365441e-06, "loss": 0.7204, "step": 1007 }, { "epoch": 0.029429797670141016, "grad_norm": 1.0040210890406238, "learning_rate": 5.884413309982488e-06, "loss": 0.8245, "step": 1008 }, { "epoch": 0.02945899389798838, "grad_norm": 0.7309735632839272, "learning_rate": 5.890251021599534e-06, "loss": 0.644, "step": 1009 }, { "epoch": 0.029488190125835742, "grad_norm": 1.0492283720935967, "learning_rate": 5.896088733216579e-06, "loss": 0.8126, "step": 1010 }, { "epoch": 0.029517386353683103, "grad_norm": 0.8632029235064369, "learning_rate": 5.901926444833625e-06, "loss": 0.7285, "step": 1011 }, { "epoch": 0.029546582581530468, "grad_norm": 0.8961023725790624, "learning_rate": 5.907764156450672e-06, "loss": 0.8989, "step": 1012 }, { "epoch": 0.02957577880937783, "grad_norm": 0.7984467578369864, "learning_rate": 5.913601868067718e-06, "loss": 0.7705, "step": 1013 }, { "epoch": 0.02960497503722519, "grad_norm": 1.1318048291204148, "learning_rate": 5.919439579684764e-06, "loss": 0.9185, "step": 1014 }, { "epoch": 0.02963417126507255, "grad_norm": 0.844421761509739, "learning_rate": 5.925277291301809e-06, "loss": 0.7938, "step": 1015 }, { "epoch": 0.029663367492919916, "grad_norm": 0.8520789520955838, "learning_rate": 5.931115002918856e-06, "loss": 0.7528, "step": 1016 }, { "epoch": 0.029692563720767277, "grad_norm": 0.7900318559996375, "learning_rate": 5.936952714535903e-06, "loss": 0.6412, "step": 1017 }, { "epoch": 0.02972175994861464, "grad_norm": 0.8509625221435697, "learning_rate": 5.942790426152949e-06, "loss": 0.8288, "step": 1018 }, { "epoch": 0.029750956176462, "grad_norm": 0.7692707011752614, "learning_rate": 5.948628137769995e-06, "loss": 0.7604, "step": 1019 }, { "epoch": 0.029780152404309364, "grad_norm": 0.7562621740918288, "learning_rate": 5.954465849387041e-06, "loss": 0.7444, "step": 1020 }, { "epoch": 0.029809348632156726, "grad_norm": 0.7143100064263, "learning_rate": 5.960303561004087e-06, "loss": 0.6618, "step": 1021 }, { "epoch": 0.029838544860004087, "grad_norm": 0.9869921396359517, "learning_rate": 5.966141272621133e-06, "loss": 0.7865, "step": 1022 }, { "epoch": 0.029867741087851448, "grad_norm": 0.7987909943798136, "learning_rate": 5.9719789842381795e-06, "loss": 0.7727, "step": 1023 }, { "epoch": 0.029896937315698813, "grad_norm": 0.852197913220237, "learning_rate": 5.977816695855225e-06, "loss": 0.8236, "step": 1024 }, { "epoch": 0.029926133543546174, "grad_norm": 0.7717620456932284, "learning_rate": 5.983654407472271e-06, "loss": 0.7084, "step": 1025 }, { "epoch": 0.029955329771393535, "grad_norm": 0.8323387607210996, "learning_rate": 5.989492119089317e-06, "loss": 0.7661, "step": 1026 }, { "epoch": 0.029984525999240896, "grad_norm": 0.9857313797623114, "learning_rate": 5.995329830706364e-06, "loss": 0.8634, "step": 1027 }, { "epoch": 0.03001372222708826, "grad_norm": 0.8924682786251014, "learning_rate": 6.0011675423234095e-06, "loss": 0.7333, "step": 1028 }, { "epoch": 0.030042918454935622, "grad_norm": 0.7651610466215347, "learning_rate": 6.007005253940455e-06, "loss": 0.6877, "step": 1029 }, { "epoch": 0.030072114682782983, "grad_norm": 0.8294046460204879, "learning_rate": 6.012842965557501e-06, "loss": 0.8093, "step": 1030 }, { "epoch": 0.030101310910630348, "grad_norm": 0.7495755428670269, "learning_rate": 6.018680677174548e-06, "loss": 0.6796, "step": 1031 }, { "epoch": 0.03013050713847771, "grad_norm": 0.8278795051516673, "learning_rate": 6.024518388791594e-06, "loss": 0.7462, "step": 1032 }, { "epoch": 0.03015970336632507, "grad_norm": 0.8772340070630216, "learning_rate": 6.030356100408641e-06, "loss": 0.9079, "step": 1033 }, { "epoch": 0.03018889959417243, "grad_norm": 0.7438247933805626, "learning_rate": 6.036193812025687e-06, "loss": 0.6923, "step": 1034 }, { "epoch": 0.030218095822019796, "grad_norm": 0.8332316400896833, "learning_rate": 6.042031523642733e-06, "loss": 0.7923, "step": 1035 }, { "epoch": 0.030247292049867158, "grad_norm": 0.8654881394336035, "learning_rate": 6.047869235259779e-06, "loss": 0.7341, "step": 1036 }, { "epoch": 0.03027648827771452, "grad_norm": 0.8449919191078946, "learning_rate": 6.053706946876825e-06, "loss": 0.8059, "step": 1037 }, { "epoch": 0.03030568450556188, "grad_norm": 0.8473214175616298, "learning_rate": 6.059544658493871e-06, "loss": 0.7609, "step": 1038 }, { "epoch": 0.030334880733409245, "grad_norm": 0.7422932036943696, "learning_rate": 6.065382370110917e-06, "loss": 0.6818, "step": 1039 }, { "epoch": 0.030364076961256606, "grad_norm": 0.8290227320556808, "learning_rate": 6.071220081727963e-06, "loss": 0.7801, "step": 1040 }, { "epoch": 0.030393273189103967, "grad_norm": 0.8933599808151825, "learning_rate": 6.0770577933450096e-06, "loss": 0.7687, "step": 1041 }, { "epoch": 0.030422469416951328, "grad_norm": 0.9458009282289546, "learning_rate": 6.082895504962055e-06, "loss": 0.8587, "step": 1042 }, { "epoch": 0.030451665644798693, "grad_norm": 0.9571266070647307, "learning_rate": 6.088733216579101e-06, "loss": 0.7467, "step": 1043 }, { "epoch": 0.030480861872646054, "grad_norm": 0.9327830726497854, "learning_rate": 6.094570928196147e-06, "loss": 0.8611, "step": 1044 }, { "epoch": 0.030510058100493415, "grad_norm": 0.8727852281285677, "learning_rate": 6.100408639813194e-06, "loss": 0.8209, "step": 1045 }, { "epoch": 0.03053925432834078, "grad_norm": 0.753742860714136, "learning_rate": 6.1062463514302396e-06, "loss": 0.6713, "step": 1046 }, { "epoch": 0.03056845055618814, "grad_norm": 0.7674065348046177, "learning_rate": 6.112084063047285e-06, "loss": 0.7084, "step": 1047 }, { "epoch": 0.030597646784035502, "grad_norm": 0.7985868010336015, "learning_rate": 6.117921774664331e-06, "loss": 0.7973, "step": 1048 }, { "epoch": 0.030626843011882864, "grad_norm": 0.7899673113607514, "learning_rate": 6.123759486281379e-06, "loss": 0.8151, "step": 1049 }, { "epoch": 0.030656039239730228, "grad_norm": 0.9172084361223293, "learning_rate": 6.129597197898425e-06, "loss": 0.7421, "step": 1050 }, { "epoch": 0.03068523546757759, "grad_norm": 0.7257270979891349, "learning_rate": 6.1354349095154704e-06, "loss": 0.6395, "step": 1051 }, { "epoch": 0.03071443169542495, "grad_norm": 1.1465606227649272, "learning_rate": 6.141272621132517e-06, "loss": 0.8286, "step": 1052 }, { "epoch": 0.030743627923272312, "grad_norm": 0.8247887657438306, "learning_rate": 6.147110332749563e-06, "loss": 0.8235, "step": 1053 }, { "epoch": 0.030772824151119677, "grad_norm": 0.9244687920209792, "learning_rate": 6.152948044366609e-06, "loss": 0.8488, "step": 1054 }, { "epoch": 0.030802020378967038, "grad_norm": 0.9280146046939621, "learning_rate": 6.158785755983655e-06, "loss": 0.8511, "step": 1055 }, { "epoch": 0.0308312166068144, "grad_norm": 0.8372799322219463, "learning_rate": 6.164623467600701e-06, "loss": 0.7801, "step": 1056 }, { "epoch": 0.03086041283466176, "grad_norm": 0.7474503372587908, "learning_rate": 6.170461179217747e-06, "loss": 0.6968, "step": 1057 }, { "epoch": 0.030889609062509125, "grad_norm": 0.7300661717571988, "learning_rate": 6.176298890834793e-06, "loss": 0.6416, "step": 1058 }, { "epoch": 0.030918805290356486, "grad_norm": 0.8111211062784959, "learning_rate": 6.18213660245184e-06, "loss": 0.8065, "step": 1059 }, { "epoch": 0.030948001518203847, "grad_norm": 0.9399515747204843, "learning_rate": 6.1879743140688855e-06, "loss": 0.7226, "step": 1060 }, { "epoch": 0.030977197746051212, "grad_norm": 0.8946012520077751, "learning_rate": 6.193812025685931e-06, "loss": 0.6743, "step": 1061 }, { "epoch": 0.031006393973898573, "grad_norm": 0.7855200259761491, "learning_rate": 6.199649737302977e-06, "loss": 0.7139, "step": 1062 }, { "epoch": 0.031035590201745934, "grad_norm": 0.7614557389897457, "learning_rate": 6.205487448920024e-06, "loss": 0.6732, "step": 1063 }, { "epoch": 0.031064786429593295, "grad_norm": 0.8057588939868134, "learning_rate": 6.21132516053707e-06, "loss": 0.8425, "step": 1064 }, { "epoch": 0.03109398265744066, "grad_norm": 0.8299643464194636, "learning_rate": 6.217162872154116e-06, "loss": 0.7308, "step": 1065 }, { "epoch": 0.03112317888528802, "grad_norm": 0.7511621870358042, "learning_rate": 6.223000583771163e-06, "loss": 0.6899, "step": 1066 }, { "epoch": 0.031152375113135383, "grad_norm": 0.8588235362374104, "learning_rate": 6.228838295388209e-06, "loss": 0.8024, "step": 1067 }, { "epoch": 0.031181571340982744, "grad_norm": 0.7730143111528505, "learning_rate": 6.234676007005255e-06, "loss": 0.7755, "step": 1068 }, { "epoch": 0.03121076756883011, "grad_norm": 0.8304622453229216, "learning_rate": 6.2405137186223005e-06, "loss": 0.8258, "step": 1069 }, { "epoch": 0.03123996379667747, "grad_norm": 0.8560733465618802, "learning_rate": 6.246351430239347e-06, "loss": 0.8049, "step": 1070 }, { "epoch": 0.03126916002452483, "grad_norm": 1.017169175956159, "learning_rate": 6.252189141856393e-06, "loss": 0.7138, "step": 1071 }, { "epoch": 0.03129835625237219, "grad_norm": 0.799910088598411, "learning_rate": 6.258026853473439e-06, "loss": 0.7048, "step": 1072 }, { "epoch": 0.03132755248021955, "grad_norm": 1.0198104628424005, "learning_rate": 6.263864565090485e-06, "loss": 0.8216, "step": 1073 }, { "epoch": 0.031356748708066914, "grad_norm": 0.8464427460496057, "learning_rate": 6.269702276707531e-06, "loss": 0.7519, "step": 1074 }, { "epoch": 0.03138594493591428, "grad_norm": 0.834496132560239, "learning_rate": 6.275539988324577e-06, "loss": 0.7687, "step": 1075 }, { "epoch": 0.031415141163761644, "grad_norm": 0.7423516690465306, "learning_rate": 6.281377699941623e-06, "loss": 0.7078, "step": 1076 }, { "epoch": 0.031444337391609005, "grad_norm": 0.8555178517938725, "learning_rate": 6.287215411558669e-06, "loss": 0.8839, "step": 1077 }, { "epoch": 0.031473533619456366, "grad_norm": 0.7326313053690415, "learning_rate": 6.2930531231757155e-06, "loss": 0.646, "step": 1078 }, { "epoch": 0.03150272984730373, "grad_norm": 0.8543382106072577, "learning_rate": 6.298890834792761e-06, "loss": 0.8067, "step": 1079 }, { "epoch": 0.03153192607515109, "grad_norm": 0.7918203774524861, "learning_rate": 6.304728546409807e-06, "loss": 0.712, "step": 1080 }, { "epoch": 0.03156112230299845, "grad_norm": 0.9210615460286742, "learning_rate": 6.310566258026855e-06, "loss": 0.7819, "step": 1081 }, { "epoch": 0.03159031853084582, "grad_norm": 0.8168310100250304, "learning_rate": 6.3164039696439006e-06, "loss": 0.7735, "step": 1082 }, { "epoch": 0.03161951475869318, "grad_norm": 0.8347835809833275, "learning_rate": 6.322241681260946e-06, "loss": 0.7222, "step": 1083 }, { "epoch": 0.03164871098654054, "grad_norm": 1.064661267804226, "learning_rate": 6.328079392877993e-06, "loss": 0.831, "step": 1084 }, { "epoch": 0.0316779072143879, "grad_norm": 0.8758828056367266, "learning_rate": 6.333917104495039e-06, "loss": 0.721, "step": 1085 }, { "epoch": 0.03170710344223526, "grad_norm": 0.7626818571350044, "learning_rate": 6.339754816112085e-06, "loss": 0.7231, "step": 1086 }, { "epoch": 0.031736299670082624, "grad_norm": 0.7902164178707007, "learning_rate": 6.345592527729131e-06, "loss": 0.7919, "step": 1087 }, { "epoch": 0.031765495897929985, "grad_norm": 0.7801809309108649, "learning_rate": 6.351430239346177e-06, "loss": 0.7568, "step": 1088 }, { "epoch": 0.031794692125777346, "grad_norm": 0.7545767620093139, "learning_rate": 6.357267950963223e-06, "loss": 0.759, "step": 1089 }, { "epoch": 0.031823888353624714, "grad_norm": 0.8531777843935553, "learning_rate": 6.363105662580269e-06, "loss": 0.8068, "step": 1090 }, { "epoch": 0.031853084581472076, "grad_norm": 0.7910767696400812, "learning_rate": 6.368943374197315e-06, "loss": 0.7404, "step": 1091 }, { "epoch": 0.03188228080931944, "grad_norm": 0.8329245371675231, "learning_rate": 6.3747810858143614e-06, "loss": 0.7749, "step": 1092 }, { "epoch": 0.0319114770371668, "grad_norm": 0.7953102649368066, "learning_rate": 6.380618797431407e-06, "loss": 0.7674, "step": 1093 }, { "epoch": 0.03194067326501416, "grad_norm": 0.7854923968858543, "learning_rate": 6.386456509048453e-06, "loss": 0.7227, "step": 1094 }, { "epoch": 0.03196986949286152, "grad_norm": 0.8812243278716021, "learning_rate": 6.392294220665499e-06, "loss": 0.7749, "step": 1095 }, { "epoch": 0.03199906572070888, "grad_norm": 0.8289766521294039, "learning_rate": 6.398131932282546e-06, "loss": 0.6695, "step": 1096 }, { "epoch": 0.03202826194855625, "grad_norm": 0.848755225776088, "learning_rate": 6.403969643899592e-06, "loss": 0.7042, "step": 1097 }, { "epoch": 0.03205745817640361, "grad_norm": 0.8151274649599357, "learning_rate": 6.409807355516638e-06, "loss": 0.7473, "step": 1098 }, { "epoch": 0.03208665440425097, "grad_norm": 0.8364664588478465, "learning_rate": 6.415645067133685e-06, "loss": 0.7339, "step": 1099 }, { "epoch": 0.03211585063209833, "grad_norm": 1.7814082099307884, "learning_rate": 6.421482778750731e-06, "loss": 0.8247, "step": 1100 }, { "epoch": 0.032145046859945695, "grad_norm": 0.7568991481088291, "learning_rate": 6.4273204903677765e-06, "loss": 0.6986, "step": 1101 }, { "epoch": 0.032174243087793056, "grad_norm": 0.8348821992735378, "learning_rate": 6.433158201984822e-06, "loss": 0.7694, "step": 1102 }, { "epoch": 0.03220343931564042, "grad_norm": 0.813049804890688, "learning_rate": 6.438995913601869e-06, "loss": 0.7451, "step": 1103 }, { "epoch": 0.03223263554348778, "grad_norm": 0.7544344683085792, "learning_rate": 6.444833625218915e-06, "loss": 0.7085, "step": 1104 }, { "epoch": 0.032261831771335146, "grad_norm": 0.8915677399660843, "learning_rate": 6.450671336835961e-06, "loss": 0.8055, "step": 1105 }, { "epoch": 0.03229102799918251, "grad_norm": 0.7589353167976591, "learning_rate": 6.4565090484530065e-06, "loss": 0.7097, "step": 1106 }, { "epoch": 0.03232022422702987, "grad_norm": 0.8434668935757139, "learning_rate": 6.462346760070053e-06, "loss": 0.8194, "step": 1107 }, { "epoch": 0.03234942045487723, "grad_norm": 1.5144015722060604, "learning_rate": 6.468184471687099e-06, "loss": 0.8001, "step": 1108 }, { "epoch": 0.03237861668272459, "grad_norm": 0.7596580837275845, "learning_rate": 6.474022183304145e-06, "loss": 0.7106, "step": 1109 }, { "epoch": 0.03240781291057195, "grad_norm": 0.8317857223605404, "learning_rate": 6.479859894921191e-06, "loss": 0.7421, "step": 1110 }, { "epoch": 0.032437009138419313, "grad_norm": 0.7685309305363451, "learning_rate": 6.485697606538237e-06, "loss": 0.6404, "step": 1111 }, { "epoch": 0.03246620536626668, "grad_norm": 0.8122886818622844, "learning_rate": 6.491535318155283e-06, "loss": 0.7807, "step": 1112 }, { "epoch": 0.03249540159411404, "grad_norm": 0.7698422699529807, "learning_rate": 6.497373029772331e-06, "loss": 0.683, "step": 1113 }, { "epoch": 0.032524597821961404, "grad_norm": 0.9730478784729788, "learning_rate": 6.5032107413893765e-06, "loss": 0.7374, "step": 1114 }, { "epoch": 0.032553794049808765, "grad_norm": 0.8118854352548374, "learning_rate": 6.509048453006422e-06, "loss": 0.7382, "step": 1115 }, { "epoch": 0.032582990277656126, "grad_norm": 0.8213970325185165, "learning_rate": 6.514886164623468e-06, "loss": 0.7712, "step": 1116 }, { "epoch": 0.03261218650550349, "grad_norm": 0.8488352234651573, "learning_rate": 6.520723876240515e-06, "loss": 0.7573, "step": 1117 }, { "epoch": 0.03264138273335085, "grad_norm": 0.7551035836651422, "learning_rate": 6.526561587857561e-06, "loss": 0.7424, "step": 1118 }, { "epoch": 0.03267057896119821, "grad_norm": 0.7449994245316974, "learning_rate": 6.5323992994746066e-06, "loss": 0.664, "step": 1119 }, { "epoch": 0.03269977518904558, "grad_norm": 0.7642019733874323, "learning_rate": 6.538237011091652e-06, "loss": 0.755, "step": 1120 }, { "epoch": 0.03272897141689294, "grad_norm": 0.7841530178663231, "learning_rate": 6.544074722708699e-06, "loss": 0.7906, "step": 1121 }, { "epoch": 0.0327581676447403, "grad_norm": 0.7699979204635914, "learning_rate": 6.549912434325745e-06, "loss": 0.6988, "step": 1122 }, { "epoch": 0.03278736387258766, "grad_norm": 0.8380862657762593, "learning_rate": 6.555750145942791e-06, "loss": 0.7734, "step": 1123 }, { "epoch": 0.03281656010043502, "grad_norm": 0.7885268120428749, "learning_rate": 6.5615878575598366e-06, "loss": 0.7959, "step": 1124 }, { "epoch": 0.032845756328282384, "grad_norm": 0.7654926264994374, "learning_rate": 6.567425569176883e-06, "loss": 0.7205, "step": 1125 }, { "epoch": 0.032874952556129745, "grad_norm": 0.8021754595414419, "learning_rate": 6.573263280793929e-06, "loss": 0.7632, "step": 1126 }, { "epoch": 0.032904148783977114, "grad_norm": 0.7982952350338834, "learning_rate": 6.579100992410975e-06, "loss": 0.7804, "step": 1127 }, { "epoch": 0.032933345011824475, "grad_norm": 0.7901063667734229, "learning_rate": 6.584938704028021e-06, "loss": 0.7305, "step": 1128 }, { "epoch": 0.032962541239671836, "grad_norm": 0.918068800003285, "learning_rate": 6.590776415645068e-06, "loss": 0.7452, "step": 1129 }, { "epoch": 0.0329917374675192, "grad_norm": 0.7433045748122489, "learning_rate": 6.596614127262114e-06, "loss": 0.6591, "step": 1130 }, { "epoch": 0.03302093369536656, "grad_norm": 0.8493156834040005, "learning_rate": 6.60245183887916e-06, "loss": 0.752, "step": 1131 }, { "epoch": 0.03305012992321392, "grad_norm": 0.7572015778944706, "learning_rate": 6.608289550496207e-06, "loss": 0.713, "step": 1132 }, { "epoch": 0.03307932615106128, "grad_norm": 1.0703821294898734, "learning_rate": 6.6141272621132525e-06, "loss": 0.6423, "step": 1133 }, { "epoch": 0.03310852237890864, "grad_norm": 0.8319425406786215, "learning_rate": 6.619964973730298e-06, "loss": 0.8346, "step": 1134 }, { "epoch": 0.03313771860675601, "grad_norm": 0.7428097949571666, "learning_rate": 6.625802685347344e-06, "loss": 0.6926, "step": 1135 }, { "epoch": 0.03316691483460337, "grad_norm": 0.7483234194359136, "learning_rate": 6.631640396964391e-06, "loss": 0.7426, "step": 1136 }, { "epoch": 0.03319611106245073, "grad_norm": 0.812126214702625, "learning_rate": 6.637478108581437e-06, "loss": 0.6935, "step": 1137 }, { "epoch": 0.033225307290298094, "grad_norm": 0.8353643911563414, "learning_rate": 6.6433158201984825e-06, "loss": 0.8202, "step": 1138 }, { "epoch": 0.033254503518145455, "grad_norm": 0.8781530759510775, "learning_rate": 6.649153531815528e-06, "loss": 0.8659, "step": 1139 }, { "epoch": 0.033283699745992816, "grad_norm": 0.803427742803306, "learning_rate": 6.654991243432575e-06, "loss": 0.7165, "step": 1140 }, { "epoch": 0.03331289597384018, "grad_norm": 0.8980376412567537, "learning_rate": 6.660828955049621e-06, "loss": 0.8348, "step": 1141 }, { "epoch": 0.033342092201687545, "grad_norm": 0.8730681569991176, "learning_rate": 6.666666666666667e-06, "loss": 0.7563, "step": 1142 }, { "epoch": 0.03337128842953491, "grad_norm": 1.0576413466922008, "learning_rate": 6.6725043782837125e-06, "loss": 0.7251, "step": 1143 }, { "epoch": 0.03340048465738227, "grad_norm": 0.8719864293098841, "learning_rate": 6.678342089900759e-06, "loss": 0.7729, "step": 1144 }, { "epoch": 0.03342968088522963, "grad_norm": 0.7967035531580503, "learning_rate": 6.684179801517806e-06, "loss": 0.6856, "step": 1145 }, { "epoch": 0.03345887711307699, "grad_norm": 0.8454338346979647, "learning_rate": 6.6900175131348525e-06, "loss": 0.8863, "step": 1146 }, { "epoch": 0.03348807334092435, "grad_norm": 0.8463444096907531, "learning_rate": 6.695855224751898e-06, "loss": 0.8137, "step": 1147 }, { "epoch": 0.03351726956877171, "grad_norm": 0.7976213614351084, "learning_rate": 6.701692936368944e-06, "loss": 0.7576, "step": 1148 }, { "epoch": 0.033546465796619074, "grad_norm": 0.7626894655114794, "learning_rate": 6.70753064798599e-06, "loss": 0.6701, "step": 1149 }, { "epoch": 0.03357566202446644, "grad_norm": 0.8092352293378658, "learning_rate": 6.713368359603037e-06, "loss": 0.7242, "step": 1150 }, { "epoch": 0.0336048582523138, "grad_norm": 0.8358068825488434, "learning_rate": 6.7192060712200825e-06, "loss": 0.7838, "step": 1151 }, { "epoch": 0.033634054480161164, "grad_norm": 0.8072634639982539, "learning_rate": 6.725043782837128e-06, "loss": 0.7236, "step": 1152 }, { "epoch": 0.033663250708008526, "grad_norm": 0.7853668773062533, "learning_rate": 6.730881494454174e-06, "loss": 0.7383, "step": 1153 }, { "epoch": 0.03369244693585589, "grad_norm": 0.7838914755941877, "learning_rate": 6.736719206071221e-06, "loss": 0.7357, "step": 1154 }, { "epoch": 0.03372164316370325, "grad_norm": 0.7534862542550068, "learning_rate": 6.742556917688267e-06, "loss": 0.6676, "step": 1155 }, { "epoch": 0.03375083939155061, "grad_norm": 0.7926248625105606, "learning_rate": 6.7483946293053125e-06, "loss": 0.8276, "step": 1156 }, { "epoch": 0.03378003561939797, "grad_norm": 0.9052026440527073, "learning_rate": 6.754232340922358e-06, "loss": 0.8268, "step": 1157 }, { "epoch": 0.03380923184724534, "grad_norm": 0.7929173382063532, "learning_rate": 6.760070052539405e-06, "loss": 0.7314, "step": 1158 }, { "epoch": 0.0338384280750927, "grad_norm": 0.7658674576400905, "learning_rate": 6.765907764156451e-06, "loss": 0.7107, "step": 1159 }, { "epoch": 0.03386762430294006, "grad_norm": 0.9140185648739381, "learning_rate": 6.771745475773497e-06, "loss": 0.8175, "step": 1160 }, { "epoch": 0.03389682053078742, "grad_norm": 0.7827737455792846, "learning_rate": 6.777583187390544e-06, "loss": 0.7434, "step": 1161 }, { "epoch": 0.03392601675863478, "grad_norm": 0.875327171347952, "learning_rate": 6.78342089900759e-06, "loss": 0.842, "step": 1162 }, { "epoch": 0.033955212986482144, "grad_norm": 0.800070706268685, "learning_rate": 6.789258610624636e-06, "loss": 0.796, "step": 1163 }, { "epoch": 0.033984409214329506, "grad_norm": 0.8537462961260984, "learning_rate": 6.795096322241682e-06, "loss": 0.8836, "step": 1164 }, { "epoch": 0.034013605442176874, "grad_norm": 0.8420792460244977, "learning_rate": 6.8009340338587284e-06, "loss": 0.808, "step": 1165 }, { "epoch": 0.034042801670024235, "grad_norm": 0.8837865636006417, "learning_rate": 6.806771745475774e-06, "loss": 0.7942, "step": 1166 }, { "epoch": 0.034071997897871596, "grad_norm": 0.8178086976889147, "learning_rate": 6.81260945709282e-06, "loss": 0.7634, "step": 1167 }, { "epoch": 0.03410119412571896, "grad_norm": 0.7723385023086611, "learning_rate": 6.818447168709866e-06, "loss": 0.7132, "step": 1168 }, { "epoch": 0.03413039035356632, "grad_norm": 0.7644227234592447, "learning_rate": 6.824284880326913e-06, "loss": 0.7022, "step": 1169 }, { "epoch": 0.03415958658141368, "grad_norm": 0.7736562777914048, "learning_rate": 6.8301225919439584e-06, "loss": 0.6899, "step": 1170 }, { "epoch": 0.03418878280926104, "grad_norm": 0.9459182821935509, "learning_rate": 6.835960303561004e-06, "loss": 0.8433, "step": 1171 }, { "epoch": 0.0342179790371084, "grad_norm": 0.8710300480188373, "learning_rate": 6.841798015178051e-06, "loss": 0.86, "step": 1172 }, { "epoch": 0.03424717526495577, "grad_norm": 0.7409851923363697, "learning_rate": 6.847635726795097e-06, "loss": 0.6399, "step": 1173 }, { "epoch": 0.03427637149280313, "grad_norm": 0.7832472608622543, "learning_rate": 6.853473438412143e-06, "loss": 0.7514, "step": 1174 }, { "epoch": 0.03430556772065049, "grad_norm": 0.9217004202898094, "learning_rate": 6.8593111500291885e-06, "loss": 0.7648, "step": 1175 }, { "epoch": 0.034334763948497854, "grad_norm": 0.7706707430795116, "learning_rate": 6.865148861646235e-06, "loss": 0.7717, "step": 1176 }, { "epoch": 0.034363960176345215, "grad_norm": 0.8446041776945036, "learning_rate": 6.870986573263282e-06, "loss": 0.8211, "step": 1177 }, { "epoch": 0.034393156404192576, "grad_norm": 0.7616184906813828, "learning_rate": 6.876824284880328e-06, "loss": 0.686, "step": 1178 }, { "epoch": 0.03442235263203994, "grad_norm": 0.7619594115470817, "learning_rate": 6.882661996497374e-06, "loss": 0.7443, "step": 1179 }, { "epoch": 0.034451548859887306, "grad_norm": 0.7554881413344187, "learning_rate": 6.88849970811442e-06, "loss": 0.7068, "step": 1180 }, { "epoch": 0.03448074508773467, "grad_norm": 0.859392655826647, "learning_rate": 6.894337419731466e-06, "loss": 0.8161, "step": 1181 }, { "epoch": 0.03450994131558203, "grad_norm": 0.8208236682122604, "learning_rate": 6.900175131348512e-06, "loss": 0.7788, "step": 1182 }, { "epoch": 0.03453913754342939, "grad_norm": 0.8027505887540526, "learning_rate": 6.9060128429655585e-06, "loss": 0.7842, "step": 1183 }, { "epoch": 0.03456833377127675, "grad_norm": 0.8429165429658872, "learning_rate": 6.911850554582604e-06, "loss": 0.7627, "step": 1184 }, { "epoch": 0.03459752999912411, "grad_norm": 0.8031830331917102, "learning_rate": 6.91768826619965e-06, "loss": 0.7717, "step": 1185 }, { "epoch": 0.03462672622697147, "grad_norm": 0.8965381093894436, "learning_rate": 6.923525977816696e-06, "loss": 0.7718, "step": 1186 }, { "epoch": 0.034655922454818834, "grad_norm": 0.7406321655888118, "learning_rate": 6.929363689433743e-06, "loss": 0.6892, "step": 1187 }, { "epoch": 0.0346851186826662, "grad_norm": 0.9993716222233757, "learning_rate": 6.9352014010507885e-06, "loss": 0.8113, "step": 1188 }, { "epoch": 0.03471431491051356, "grad_norm": 0.8229967983552107, "learning_rate": 6.941039112667834e-06, "loss": 0.8048, "step": 1189 }, { "epoch": 0.034743511138360925, "grad_norm": 0.7904069464876303, "learning_rate": 6.94687682428488e-06, "loss": 0.8482, "step": 1190 }, { "epoch": 0.034772707366208286, "grad_norm": 0.764461326322466, "learning_rate": 6.952714535901927e-06, "loss": 0.6306, "step": 1191 }, { "epoch": 0.03480190359405565, "grad_norm": 0.8867998911956243, "learning_rate": 6.958552247518973e-06, "loss": 0.8318, "step": 1192 }, { "epoch": 0.03483109982190301, "grad_norm": 0.8483400185074935, "learning_rate": 6.964389959136019e-06, "loss": 0.8029, "step": 1193 }, { "epoch": 0.03486029604975037, "grad_norm": 0.8028327026788192, "learning_rate": 6.970227670753066e-06, "loss": 0.713, "step": 1194 }, { "epoch": 0.03488949227759774, "grad_norm": 0.8100410637954667, "learning_rate": 6.976065382370112e-06, "loss": 0.7593, "step": 1195 }, { "epoch": 0.0349186885054451, "grad_norm": 0.8525994029589039, "learning_rate": 6.981903093987158e-06, "loss": 0.9525, "step": 1196 }, { "epoch": 0.03494788473329246, "grad_norm": 0.7831645941080175, "learning_rate": 6.9877408056042036e-06, "loss": 0.7274, "step": 1197 }, { "epoch": 0.03497708096113982, "grad_norm": 0.7722376816723453, "learning_rate": 6.99357851722125e-06, "loss": 0.7092, "step": 1198 }, { "epoch": 0.03500627718898718, "grad_norm": 0.9958291776405843, "learning_rate": 6.999416228838296e-06, "loss": 0.734, "step": 1199 }, { "epoch": 0.035035473416834544, "grad_norm": 0.7932881425790624, "learning_rate": 7.005253940455342e-06, "loss": 0.7443, "step": 1200 }, { "epoch": 0.035064669644681905, "grad_norm": 0.8210839531057006, "learning_rate": 7.011091652072389e-06, "loss": 0.8369, "step": 1201 }, { "epoch": 0.035093865872529266, "grad_norm": 0.7878929032098221, "learning_rate": 7.016929363689434e-06, "loss": 0.6939, "step": 1202 }, { "epoch": 0.035123062100376634, "grad_norm": 0.7922322158593647, "learning_rate": 7.02276707530648e-06, "loss": 0.6594, "step": 1203 }, { "epoch": 0.035152258328223995, "grad_norm": 0.8550896779280001, "learning_rate": 7.028604786923526e-06, "loss": 0.8339, "step": 1204 }, { "epoch": 0.035181454556071357, "grad_norm": 0.7977900064380046, "learning_rate": 7.034442498540573e-06, "loss": 0.7514, "step": 1205 }, { "epoch": 0.03521065078391872, "grad_norm": 0.8048210871378543, "learning_rate": 7.040280210157619e-06, "loss": 0.8517, "step": 1206 }, { "epoch": 0.03523984701176608, "grad_norm": 0.7550072639383945, "learning_rate": 7.0461179217746644e-06, "loss": 0.7542, "step": 1207 }, { "epoch": 0.03526904323961344, "grad_norm": 0.8616077999317235, "learning_rate": 7.05195563339171e-06, "loss": 0.8455, "step": 1208 }, { "epoch": 0.0352982394674608, "grad_norm": 0.8977728320753586, "learning_rate": 7.057793345008758e-06, "loss": 0.8677, "step": 1209 }, { "epoch": 0.03532743569530817, "grad_norm": 0.7294394731684676, "learning_rate": 7.063631056625804e-06, "loss": 0.6727, "step": 1210 }, { "epoch": 0.03535663192315553, "grad_norm": 0.7620591617318032, "learning_rate": 7.0694687682428495e-06, "loss": 0.7158, "step": 1211 }, { "epoch": 0.03538582815100289, "grad_norm": 0.8693128102467064, "learning_rate": 7.075306479859896e-06, "loss": 0.8453, "step": 1212 }, { "epoch": 0.03541502437885025, "grad_norm": 0.8727138865678458, "learning_rate": 7.081144191476942e-06, "loss": 0.8346, "step": 1213 }, { "epoch": 0.035444220606697614, "grad_norm": 1.4954056324790332, "learning_rate": 7.086981903093988e-06, "loss": 0.7241, "step": 1214 }, { "epoch": 0.035473416834544975, "grad_norm": 0.7993099190067611, "learning_rate": 7.092819614711034e-06, "loss": 0.7752, "step": 1215 }, { "epoch": 0.03550261306239234, "grad_norm": 0.8383561345805719, "learning_rate": 7.09865732632808e-06, "loss": 0.7958, "step": 1216 }, { "epoch": 0.0355318092902397, "grad_norm": 0.878323694502007, "learning_rate": 7.104495037945126e-06, "loss": 0.7591, "step": 1217 }, { "epoch": 0.035561005518087066, "grad_norm": 0.9645676216952824, "learning_rate": 7.110332749562172e-06, "loss": 0.6982, "step": 1218 }, { "epoch": 0.03559020174593443, "grad_norm": 0.9808026479758022, "learning_rate": 7.116170461179218e-06, "loss": 0.743, "step": 1219 }, { "epoch": 0.03561939797378179, "grad_norm": 0.9583363486480938, "learning_rate": 7.1220081727962645e-06, "loss": 0.814, "step": 1220 }, { "epoch": 0.03564859420162915, "grad_norm": 0.8943567747832629, "learning_rate": 7.12784588441331e-06, "loss": 0.6851, "step": 1221 }, { "epoch": 0.03567779042947651, "grad_norm": 0.7506935611945578, "learning_rate": 7.133683596030356e-06, "loss": 0.7012, "step": 1222 }, { "epoch": 0.03570698665732387, "grad_norm": 0.8663412914334528, "learning_rate": 7.139521307647402e-06, "loss": 0.7978, "step": 1223 }, { "epoch": 0.03573618288517123, "grad_norm": 0.7824233379329191, "learning_rate": 7.145359019264449e-06, "loss": 0.6845, "step": 1224 }, { "epoch": 0.0357653791130186, "grad_norm": 0.7328123267022477, "learning_rate": 7.151196730881495e-06, "loss": 0.6911, "step": 1225 }, { "epoch": 0.03579457534086596, "grad_norm": 0.8666249859421016, "learning_rate": 7.157034442498542e-06, "loss": 0.7427, "step": 1226 }, { "epoch": 0.035823771568713324, "grad_norm": 0.8273756186975686, "learning_rate": 7.162872154115588e-06, "loss": 0.7851, "step": 1227 }, { "epoch": 0.035852967796560685, "grad_norm": 0.8765220456629793, "learning_rate": 7.168709865732634e-06, "loss": 0.8523, "step": 1228 }, { "epoch": 0.035882164024408046, "grad_norm": 0.743213178020602, "learning_rate": 7.1745475773496795e-06, "loss": 0.7192, "step": 1229 }, { "epoch": 0.03591136025225541, "grad_norm": 0.7773105347942078, "learning_rate": 7.180385288966726e-06, "loss": 0.7488, "step": 1230 }, { "epoch": 0.03594055648010277, "grad_norm": 0.8234202062856614, "learning_rate": 7.186223000583772e-06, "loss": 0.7844, "step": 1231 }, { "epoch": 0.03596975270795013, "grad_norm": 0.788349964689763, "learning_rate": 7.192060712200818e-06, "loss": 0.7488, "step": 1232 }, { "epoch": 0.0359989489357975, "grad_norm": 0.8135789210208199, "learning_rate": 7.197898423817864e-06, "loss": 0.7758, "step": 1233 }, { "epoch": 0.03602814516364486, "grad_norm": 0.846369946544568, "learning_rate": 7.20373613543491e-06, "loss": 0.7916, "step": 1234 }, { "epoch": 0.03605734139149222, "grad_norm": 1.6809187287583713, "learning_rate": 7.209573847051956e-06, "loss": 0.8191, "step": 1235 }, { "epoch": 0.03608653761933958, "grad_norm": 0.750749297783256, "learning_rate": 7.215411558669002e-06, "loss": 0.7285, "step": 1236 }, { "epoch": 0.03611573384718694, "grad_norm": 0.8893591728091338, "learning_rate": 7.221249270286048e-06, "loss": 0.7373, "step": 1237 }, { "epoch": 0.036144930075034304, "grad_norm": 0.812953545729256, "learning_rate": 7.2270869819030946e-06, "loss": 0.6915, "step": 1238 }, { "epoch": 0.036174126302881665, "grad_norm": 0.83628754534089, "learning_rate": 7.23292469352014e-06, "loss": 0.8148, "step": 1239 }, { "epoch": 0.03620332253072903, "grad_norm": 0.794866312712887, "learning_rate": 7.238762405137186e-06, "loss": 0.7818, "step": 1240 }, { "epoch": 0.036232518758576394, "grad_norm": 0.8783802324089608, "learning_rate": 7.244600116754234e-06, "loss": 0.715, "step": 1241 }, { "epoch": 0.036261714986423756, "grad_norm": 0.7260989131212434, "learning_rate": 7.25043782837128e-06, "loss": 0.685, "step": 1242 }, { "epoch": 0.03629091121427112, "grad_norm": 0.8301917868609165, "learning_rate": 7.2562755399883254e-06, "loss": 0.7378, "step": 1243 }, { "epoch": 0.03632010744211848, "grad_norm": 0.7703511522320337, "learning_rate": 7.262113251605371e-06, "loss": 0.6814, "step": 1244 }, { "epoch": 0.03634930366996584, "grad_norm": 0.8579995195617137, "learning_rate": 7.267950963222418e-06, "loss": 0.732, "step": 1245 }, { "epoch": 0.0363784998978132, "grad_norm": 0.9245192548042016, "learning_rate": 7.273788674839464e-06, "loss": 0.7817, "step": 1246 }, { "epoch": 0.03640769612566056, "grad_norm": 0.8758535914699664, "learning_rate": 7.27962638645651e-06, "loss": 0.8104, "step": 1247 }, { "epoch": 0.03643689235350793, "grad_norm": 0.8120447470334871, "learning_rate": 7.2854640980735554e-06, "loss": 0.7425, "step": 1248 }, { "epoch": 0.03646608858135529, "grad_norm": 0.7412658932088133, "learning_rate": 7.291301809690602e-06, "loss": 0.6542, "step": 1249 }, { "epoch": 0.03649528480920265, "grad_norm": 0.8194500686626168, "learning_rate": 7.297139521307648e-06, "loss": 0.6872, "step": 1250 }, { "epoch": 0.03652448103705001, "grad_norm": 0.803736842788195, "learning_rate": 7.302977232924694e-06, "loss": 0.7253, "step": 1251 }, { "epoch": 0.036553677264897375, "grad_norm": 0.8152024716677888, "learning_rate": 7.30881494454174e-06, "loss": 0.7697, "step": 1252 }, { "epoch": 0.036582873492744736, "grad_norm": 0.8842348083012175, "learning_rate": 7.314652656158786e-06, "loss": 0.7689, "step": 1253 }, { "epoch": 0.0366120697205921, "grad_norm": 0.8478614569544441, "learning_rate": 7.320490367775832e-06, "loss": 0.805, "step": 1254 }, { "epoch": 0.03664126594843946, "grad_norm": 0.8319295499633794, "learning_rate": 7.326328079392878e-06, "loss": 0.8193, "step": 1255 }, { "epoch": 0.036670462176286826, "grad_norm": 0.7552510812512322, "learning_rate": 7.332165791009924e-06, "loss": 0.6864, "step": 1256 }, { "epoch": 0.03669965840413419, "grad_norm": 0.8468527623850192, "learning_rate": 7.338003502626971e-06, "loss": 0.8211, "step": 1257 }, { "epoch": 0.03672885463198155, "grad_norm": 0.756764734589851, "learning_rate": 7.343841214244017e-06, "loss": 0.6651, "step": 1258 }, { "epoch": 0.03675805085982891, "grad_norm": 0.877104662369969, "learning_rate": 7.349678925861064e-06, "loss": 0.8227, "step": 1259 }, { "epoch": 0.03678724708767627, "grad_norm": 0.8574719619662013, "learning_rate": 7.35551663747811e-06, "loss": 0.7807, "step": 1260 }, { "epoch": 0.03681644331552363, "grad_norm": 0.8508017252535964, "learning_rate": 7.3613543490951555e-06, "loss": 0.7538, "step": 1261 }, { "epoch": 0.036845639543370994, "grad_norm": 0.7870041587080508, "learning_rate": 7.367192060712201e-06, "loss": 0.6959, "step": 1262 }, { "epoch": 0.03687483577121836, "grad_norm": 0.799312037334394, "learning_rate": 7.373029772329248e-06, "loss": 0.753, "step": 1263 }, { "epoch": 0.03690403199906572, "grad_norm": 0.7243419013775011, "learning_rate": 7.378867483946294e-06, "loss": 0.6621, "step": 1264 }, { "epoch": 0.036933228226913084, "grad_norm": 0.8595058478079035, "learning_rate": 7.38470519556334e-06, "loss": 0.7819, "step": 1265 }, { "epoch": 0.036962424454760445, "grad_norm": 0.8615675957042641, "learning_rate": 7.3905429071803855e-06, "loss": 0.7298, "step": 1266 }, { "epoch": 0.036991620682607806, "grad_norm": 0.7933435734196227, "learning_rate": 7.396380618797432e-06, "loss": 0.786, "step": 1267 }, { "epoch": 0.03702081691045517, "grad_norm": 0.8976780155452443, "learning_rate": 7.402218330414478e-06, "loss": 0.8188, "step": 1268 }, { "epoch": 0.03705001313830253, "grad_norm": 1.0849571481908076, "learning_rate": 7.408056042031524e-06, "loss": 0.7199, "step": 1269 }, { "epoch": 0.03707920936614989, "grad_norm": 0.7463143582370031, "learning_rate": 7.41389375364857e-06, "loss": 0.6843, "step": 1270 }, { "epoch": 0.03710840559399726, "grad_norm": 0.844311663721927, "learning_rate": 7.419731465265616e-06, "loss": 0.8262, "step": 1271 }, { "epoch": 0.03713760182184462, "grad_norm": 0.8867858871703305, "learning_rate": 7.425569176882662e-06, "loss": 0.7426, "step": 1272 }, { "epoch": 0.03716679804969198, "grad_norm": 0.770345949254158, "learning_rate": 7.431406888499709e-06, "loss": 0.6492, "step": 1273 }, { "epoch": 0.03719599427753934, "grad_norm": 0.8240735209264467, "learning_rate": 7.437244600116756e-06, "loss": 0.7577, "step": 1274 }, { "epoch": 0.0372251905053867, "grad_norm": 0.701426888489933, "learning_rate": 7.443082311733801e-06, "loss": 0.6276, "step": 1275 }, { "epoch": 0.037254386733234064, "grad_norm": 0.9426607527408168, "learning_rate": 7.448920023350847e-06, "loss": 0.815, "step": 1276 }, { "epoch": 0.037283582961081425, "grad_norm": 0.8993247980525926, "learning_rate": 7.454757734967893e-06, "loss": 0.7908, "step": 1277 }, { "epoch": 0.037312779188928794, "grad_norm": 0.9333020514897011, "learning_rate": 7.46059544658494e-06, "loss": 0.7651, "step": 1278 }, { "epoch": 0.037341975416776155, "grad_norm": 0.8069109518752066, "learning_rate": 7.466433158201986e-06, "loss": 0.7528, "step": 1279 }, { "epoch": 0.037371171644623516, "grad_norm": 0.8495307979744717, "learning_rate": 7.4722708698190314e-06, "loss": 0.6868, "step": 1280 }, { "epoch": 0.03740036787247088, "grad_norm": 0.8975235554638804, "learning_rate": 7.478108581436077e-06, "loss": 0.76, "step": 1281 }, { "epoch": 0.03742956410031824, "grad_norm": 0.8244669116773758, "learning_rate": 7.483946293053124e-06, "loss": 0.7657, "step": 1282 }, { "epoch": 0.0374587603281656, "grad_norm": 0.793370948140443, "learning_rate": 7.48978400467017e-06, "loss": 0.7384, "step": 1283 }, { "epoch": 0.03748795655601296, "grad_norm": 0.7691318970630642, "learning_rate": 7.495621716287216e-06, "loss": 0.6917, "step": 1284 }, { "epoch": 0.03751715278386032, "grad_norm": 0.7758729505964883, "learning_rate": 7.5014594279042614e-06, "loss": 0.7058, "step": 1285 }, { "epoch": 0.03754634901170769, "grad_norm": 0.7882402089271809, "learning_rate": 7.507297139521308e-06, "loss": 0.7787, "step": 1286 }, { "epoch": 0.03757554523955505, "grad_norm": 0.8192962358547194, "learning_rate": 7.513134851138354e-06, "loss": 0.6987, "step": 1287 }, { "epoch": 0.03760474146740241, "grad_norm": 0.87796105604853, "learning_rate": 7.5189725627554e-06, "loss": 0.7298, "step": 1288 }, { "epoch": 0.037633937695249774, "grad_norm": 0.7502017015743796, "learning_rate": 7.524810274372447e-06, "loss": 0.6933, "step": 1289 }, { "epoch": 0.037663133923097135, "grad_norm": 0.7892740731189947, "learning_rate": 7.530647985989493e-06, "loss": 0.7551, "step": 1290 }, { "epoch": 0.037692330150944496, "grad_norm": 0.8014418035885654, "learning_rate": 7.536485697606539e-06, "loss": 0.6886, "step": 1291 }, { "epoch": 0.03772152637879186, "grad_norm": 0.7882817010052707, "learning_rate": 7.542323409223586e-06, "loss": 0.7646, "step": 1292 }, { "epoch": 0.037750722606639225, "grad_norm": 1.4261965079547967, "learning_rate": 7.5481611208406315e-06, "loss": 0.8214, "step": 1293 }, { "epoch": 0.03777991883448659, "grad_norm": 1.108332922464957, "learning_rate": 7.553998832457677e-06, "loss": 0.675, "step": 1294 }, { "epoch": 0.03780911506233395, "grad_norm": 0.8247751333779908, "learning_rate": 7.559836544074723e-06, "loss": 0.739, "step": 1295 }, { "epoch": 0.03783831129018131, "grad_norm": 0.7489574093288938, "learning_rate": 7.56567425569177e-06, "loss": 0.6602, "step": 1296 }, { "epoch": 0.03786750751802867, "grad_norm": 0.8251732945665272, "learning_rate": 7.571511967308816e-06, "loss": 0.7829, "step": 1297 }, { "epoch": 0.03789670374587603, "grad_norm": 0.7443564338319405, "learning_rate": 7.5773496789258615e-06, "loss": 0.722, "step": 1298 }, { "epoch": 0.03792589997372339, "grad_norm": 0.7517982723128105, "learning_rate": 7.583187390542907e-06, "loss": 0.6708, "step": 1299 }, { "epoch": 0.037955096201570754, "grad_norm": 0.7294144328380326, "learning_rate": 7.589025102159954e-06, "loss": 0.6507, "step": 1300 }, { "epoch": 0.03798429242941812, "grad_norm": 0.8736595127300255, "learning_rate": 7.594862813777e-06, "loss": 0.7968, "step": 1301 }, { "epoch": 0.03801348865726548, "grad_norm": 0.904483012740478, "learning_rate": 7.600700525394046e-06, "loss": 0.809, "step": 1302 }, { "epoch": 0.038042684885112844, "grad_norm": 0.8260730372997399, "learning_rate": 7.6065382370110915e-06, "loss": 0.7438, "step": 1303 }, { "epoch": 0.038071881112960206, "grad_norm": 1.0010938689190763, "learning_rate": 7.612375948628138e-06, "loss": 0.6508, "step": 1304 }, { "epoch": 0.03810107734080757, "grad_norm": 0.8272871286623843, "learning_rate": 7.618213660245185e-06, "loss": 0.8015, "step": 1305 }, { "epoch": 0.03813027356865493, "grad_norm": 0.8708327547904046, "learning_rate": 7.624051371862231e-06, "loss": 0.8032, "step": 1306 }, { "epoch": 0.03815946979650229, "grad_norm": 0.7511916195179719, "learning_rate": 7.629889083479277e-06, "loss": 0.6511, "step": 1307 }, { "epoch": 0.03818866602434966, "grad_norm": 0.735843264646682, "learning_rate": 7.635726795096322e-06, "loss": 0.7169, "step": 1308 }, { "epoch": 0.03821786225219702, "grad_norm": 0.8035750719135982, "learning_rate": 7.641564506713369e-06, "loss": 0.7723, "step": 1309 }, { "epoch": 0.03824705848004438, "grad_norm": 0.8172892491513639, "learning_rate": 7.647402218330416e-06, "loss": 0.7335, "step": 1310 }, { "epoch": 0.03827625470789174, "grad_norm": 1.0738895309727503, "learning_rate": 7.65323992994746e-06, "loss": 0.7879, "step": 1311 }, { "epoch": 0.0383054509357391, "grad_norm": 0.8321459245423289, "learning_rate": 7.659077641564507e-06, "loss": 0.6716, "step": 1312 }, { "epoch": 0.03833464716358646, "grad_norm": 1.0311794834554122, "learning_rate": 7.664915353181554e-06, "loss": 0.7877, "step": 1313 }, { "epoch": 0.038363843391433824, "grad_norm": 0.7873250101251447, "learning_rate": 7.670753064798599e-06, "loss": 0.685, "step": 1314 }, { "epoch": 0.038393039619281186, "grad_norm": 1.1661846712318555, "learning_rate": 7.676590776415646e-06, "loss": 0.7608, "step": 1315 }, { "epoch": 0.038422235847128554, "grad_norm": 0.7873508492572545, "learning_rate": 7.682428488032692e-06, "loss": 0.7439, "step": 1316 }, { "epoch": 0.038451432074975915, "grad_norm": 0.8628745086977825, "learning_rate": 7.688266199649737e-06, "loss": 0.843, "step": 1317 }, { "epoch": 0.038480628302823276, "grad_norm": 0.7708665073943111, "learning_rate": 7.694103911266784e-06, "loss": 0.6842, "step": 1318 }, { "epoch": 0.03850982453067064, "grad_norm": 0.7663842807635664, "learning_rate": 7.699941622883829e-06, "loss": 0.6879, "step": 1319 }, { "epoch": 0.038539020758518, "grad_norm": 0.8024656317060156, "learning_rate": 7.705779334500876e-06, "loss": 0.7581, "step": 1320 }, { "epoch": 0.03856821698636536, "grad_norm": 0.8916281570783461, "learning_rate": 7.711617046117922e-06, "loss": 0.7544, "step": 1321 }, { "epoch": 0.03859741321421272, "grad_norm": 0.8260785103199263, "learning_rate": 7.717454757734969e-06, "loss": 0.7145, "step": 1322 }, { "epoch": 0.03862660944206009, "grad_norm": 0.79005941374801, "learning_rate": 7.723292469352016e-06, "loss": 0.7337, "step": 1323 }, { "epoch": 0.03865580566990745, "grad_norm": 0.8221526117216786, "learning_rate": 7.72913018096906e-06, "loss": 0.7744, "step": 1324 }, { "epoch": 0.03868500189775481, "grad_norm": 0.7931930500070595, "learning_rate": 7.734967892586107e-06, "loss": 0.7801, "step": 1325 }, { "epoch": 0.03871419812560217, "grad_norm": 0.896947803826291, "learning_rate": 7.740805604203152e-06, "loss": 0.8109, "step": 1326 }, { "epoch": 0.038743394353449534, "grad_norm": 0.8129666704311664, "learning_rate": 7.746643315820199e-06, "loss": 0.8038, "step": 1327 }, { "epoch": 0.038772590581296895, "grad_norm": 0.853108243599024, "learning_rate": 7.752481027437246e-06, "loss": 0.8161, "step": 1328 }, { "epoch": 0.038801786809144256, "grad_norm": 0.767353383502276, "learning_rate": 7.75831873905429e-06, "loss": 0.7371, "step": 1329 }, { "epoch": 0.03883098303699162, "grad_norm": 0.7695211770979914, "learning_rate": 7.764156450671337e-06, "loss": 0.6435, "step": 1330 }, { "epoch": 0.038860179264838986, "grad_norm": 0.7887583923797711, "learning_rate": 7.769994162288384e-06, "loss": 0.7337, "step": 1331 }, { "epoch": 0.03888937549268635, "grad_norm": 0.9680049009331287, "learning_rate": 7.775831873905429e-06, "loss": 0.6858, "step": 1332 }, { "epoch": 0.03891857172053371, "grad_norm": 0.7921346609431683, "learning_rate": 7.781669585522476e-06, "loss": 0.7995, "step": 1333 }, { "epoch": 0.03894776794838107, "grad_norm": 0.9401648979868893, "learning_rate": 7.78750729713952e-06, "loss": 0.8181, "step": 1334 }, { "epoch": 0.03897696417622843, "grad_norm": 0.780302104290248, "learning_rate": 7.793345008756567e-06, "loss": 0.7568, "step": 1335 }, { "epoch": 0.03900616040407579, "grad_norm": 1.5378977331650168, "learning_rate": 7.799182720373614e-06, "loss": 0.7697, "step": 1336 }, { "epoch": 0.03903535663192315, "grad_norm": 0.8764730778708689, "learning_rate": 7.805020431990661e-06, "loss": 0.7775, "step": 1337 }, { "epoch": 0.03906455285977052, "grad_norm": 0.7439355160202906, "learning_rate": 7.810858143607708e-06, "loss": 0.7399, "step": 1338 }, { "epoch": 0.03909374908761788, "grad_norm": 0.9134104576841513, "learning_rate": 7.816695855224753e-06, "loss": 0.7802, "step": 1339 }, { "epoch": 0.03912294531546524, "grad_norm": 0.8331389953884607, "learning_rate": 7.8225335668418e-06, "loss": 0.8032, "step": 1340 }, { "epoch": 0.039152141543312605, "grad_norm": 0.9210405240964529, "learning_rate": 7.828371278458846e-06, "loss": 0.7264, "step": 1341 }, { "epoch": 0.039181337771159966, "grad_norm": 0.7536314591648602, "learning_rate": 7.834208990075891e-06, "loss": 0.6933, "step": 1342 }, { "epoch": 0.03921053399900733, "grad_norm": 1.1115049482059483, "learning_rate": 7.840046701692938e-06, "loss": 0.6945, "step": 1343 }, { "epoch": 0.03923973022685469, "grad_norm": 0.7775638424614446, "learning_rate": 7.845884413309983e-06, "loss": 0.7246, "step": 1344 }, { "epoch": 0.03926892645470205, "grad_norm": 0.7980396478147407, "learning_rate": 7.85172212492703e-06, "loss": 0.7951, "step": 1345 }, { "epoch": 0.03929812268254942, "grad_norm": 1.756642910309964, "learning_rate": 7.857559836544076e-06, "loss": 0.7408, "step": 1346 }, { "epoch": 0.03932731891039678, "grad_norm": 0.9749312154492471, "learning_rate": 7.863397548161121e-06, "loss": 0.7856, "step": 1347 }, { "epoch": 0.03935651513824414, "grad_norm": 0.8809867062771443, "learning_rate": 7.869235259778168e-06, "loss": 0.8339, "step": 1348 }, { "epoch": 0.0393857113660915, "grad_norm": 0.8944878482509201, "learning_rate": 7.875072971395214e-06, "loss": 0.721, "step": 1349 }, { "epoch": 0.03941490759393886, "grad_norm": 0.8192047296476096, "learning_rate": 7.88091068301226e-06, "loss": 0.8224, "step": 1350 }, { "epoch": 0.039444103821786224, "grad_norm": 0.7949375316041798, "learning_rate": 7.886748394629306e-06, "loss": 0.7638, "step": 1351 }, { "epoch": 0.039473300049633585, "grad_norm": 0.7859321897741389, "learning_rate": 7.892586106246351e-06, "loss": 0.8424, "step": 1352 }, { "epoch": 0.039502496277480946, "grad_norm": 0.7805265216157318, "learning_rate": 7.8984238178634e-06, "loss": 0.7343, "step": 1353 }, { "epoch": 0.039531692505328314, "grad_norm": 0.8296431492770963, "learning_rate": 7.904261529480444e-06, "loss": 0.87, "step": 1354 }, { "epoch": 0.039560888733175675, "grad_norm": 0.7983304761373796, "learning_rate": 7.910099241097491e-06, "loss": 0.717, "step": 1355 }, { "epoch": 0.03959008496102304, "grad_norm": 0.7623808503343127, "learning_rate": 7.915936952714538e-06, "loss": 0.6565, "step": 1356 }, { "epoch": 0.0396192811888704, "grad_norm": 0.9633878805474196, "learning_rate": 7.921774664331583e-06, "loss": 0.8211, "step": 1357 }, { "epoch": 0.03964847741671776, "grad_norm": 0.7796020363281165, "learning_rate": 7.92761237594863e-06, "loss": 0.7885, "step": 1358 }, { "epoch": 0.03967767364456512, "grad_norm": 0.7905979536548454, "learning_rate": 7.933450087565674e-06, "loss": 0.7101, "step": 1359 }, { "epoch": 0.03970686987241248, "grad_norm": 0.7720117785209758, "learning_rate": 7.939287799182721e-06, "loss": 0.7186, "step": 1360 }, { "epoch": 0.03973606610025985, "grad_norm": 0.8364681725004303, "learning_rate": 7.945125510799768e-06, "loss": 0.7704, "step": 1361 }, { "epoch": 0.03976526232810721, "grad_norm": 0.7874085326202034, "learning_rate": 7.950963222416813e-06, "loss": 0.7928, "step": 1362 }, { "epoch": 0.03979445855595457, "grad_norm": 0.8720949388034377, "learning_rate": 7.95680093403386e-06, "loss": 0.7733, "step": 1363 }, { "epoch": 0.03982365478380193, "grad_norm": 0.8116963229925508, "learning_rate": 7.962638645650906e-06, "loss": 0.7683, "step": 1364 }, { "epoch": 0.039852851011649294, "grad_norm": 0.8007126419381506, "learning_rate": 7.968476357267951e-06, "loss": 0.7692, "step": 1365 }, { "epoch": 0.039882047239496655, "grad_norm": 0.8444286323163971, "learning_rate": 7.974314068884998e-06, "loss": 0.7981, "step": 1366 }, { "epoch": 0.03991124346734402, "grad_norm": 0.7104963144269192, "learning_rate": 7.980151780502043e-06, "loss": 0.6299, "step": 1367 }, { "epoch": 0.03994043969519138, "grad_norm": 0.783955299778348, "learning_rate": 7.98598949211909e-06, "loss": 0.6923, "step": 1368 }, { "epoch": 0.039969635923038746, "grad_norm": 0.7653612267828557, "learning_rate": 7.991827203736136e-06, "loss": 0.7621, "step": 1369 }, { "epoch": 0.03999883215088611, "grad_norm": 0.8714262825240552, "learning_rate": 7.997664915353183e-06, "loss": 0.8562, "step": 1370 }, { "epoch": 0.04002802837873347, "grad_norm": 0.7822012977122481, "learning_rate": 8.00350262697023e-06, "loss": 0.78, "step": 1371 }, { "epoch": 0.04005722460658083, "grad_norm": 0.9302558366441458, "learning_rate": 8.009340338587274e-06, "loss": 0.825, "step": 1372 }, { "epoch": 0.04008642083442819, "grad_norm": 0.7906212641915359, "learning_rate": 8.015178050204321e-06, "loss": 0.7824, "step": 1373 }, { "epoch": 0.04011561706227555, "grad_norm": 0.7848764091628491, "learning_rate": 8.021015761821368e-06, "loss": 0.6773, "step": 1374 }, { "epoch": 0.04014481329012291, "grad_norm": 0.9255062533665788, "learning_rate": 8.026853473438413e-06, "loss": 0.8004, "step": 1375 }, { "epoch": 0.04017400951797028, "grad_norm": 0.7593621442968495, "learning_rate": 8.03269118505546e-06, "loss": 0.7403, "step": 1376 }, { "epoch": 0.04020320574581764, "grad_norm": 0.7241841466266414, "learning_rate": 8.038528896672504e-06, "loss": 0.6859, "step": 1377 }, { "epoch": 0.040232401973665004, "grad_norm": 0.8316796051553252, "learning_rate": 8.044366608289551e-06, "loss": 0.8093, "step": 1378 }, { "epoch": 0.040261598201512365, "grad_norm": 0.8209428982286078, "learning_rate": 8.050204319906598e-06, "loss": 0.7794, "step": 1379 }, { "epoch": 0.040290794429359726, "grad_norm": 0.8636582298731016, "learning_rate": 8.056042031523643e-06, "loss": 0.7836, "step": 1380 }, { "epoch": 0.04031999065720709, "grad_norm": 0.7341951230474113, "learning_rate": 8.06187974314069e-06, "loss": 0.6401, "step": 1381 }, { "epoch": 0.04034918688505445, "grad_norm": 0.9030402981414763, "learning_rate": 8.067717454757736e-06, "loss": 0.8364, "step": 1382 }, { "epoch": 0.04037838311290181, "grad_norm": 0.8757123487766116, "learning_rate": 8.073555166374781e-06, "loss": 0.7156, "step": 1383 }, { "epoch": 0.04040757934074918, "grad_norm": 0.8641332217525075, "learning_rate": 8.079392877991828e-06, "loss": 0.7033, "step": 1384 }, { "epoch": 0.04043677556859654, "grad_norm": 1.7080154933225253, "learning_rate": 8.085230589608874e-06, "loss": 0.6826, "step": 1385 }, { "epoch": 0.0404659717964439, "grad_norm": 1.0246875137188391, "learning_rate": 8.091068301225921e-06, "loss": 0.8678, "step": 1386 }, { "epoch": 0.04049516802429126, "grad_norm": 0.7608907353563167, "learning_rate": 8.096906012842966e-06, "loss": 0.7227, "step": 1387 }, { "epoch": 0.04052436425213862, "grad_norm": 0.9819080749918562, "learning_rate": 8.102743724460013e-06, "loss": 0.8022, "step": 1388 }, { "epoch": 0.040553560479985984, "grad_norm": 0.8005780647034364, "learning_rate": 8.10858143607706e-06, "loss": 0.7319, "step": 1389 }, { "epoch": 0.040582756707833345, "grad_norm": 0.8013370825388137, "learning_rate": 8.114419147694104e-06, "loss": 0.7772, "step": 1390 }, { "epoch": 0.04061195293568071, "grad_norm": 0.8266878964352536, "learning_rate": 8.120256859311151e-06, "loss": 0.695, "step": 1391 }, { "epoch": 0.040641149163528074, "grad_norm": 1.1259010481046863, "learning_rate": 8.126094570928196e-06, "loss": 0.8096, "step": 1392 }, { "epoch": 0.040670345391375436, "grad_norm": 0.8165331524089807, "learning_rate": 8.131932282545243e-06, "loss": 0.7652, "step": 1393 }, { "epoch": 0.0406995416192228, "grad_norm": 0.8960000535055502, "learning_rate": 8.13776999416229e-06, "loss": 0.7749, "step": 1394 }, { "epoch": 0.04072873784707016, "grad_norm": 0.8611808714524035, "learning_rate": 8.143607705779334e-06, "loss": 0.8872, "step": 1395 }, { "epoch": 0.04075793407491752, "grad_norm": 0.7897367982641128, "learning_rate": 8.149445417396381e-06, "loss": 0.7838, "step": 1396 }, { "epoch": 0.04078713030276488, "grad_norm": 0.777208248174194, "learning_rate": 8.155283129013428e-06, "loss": 0.6594, "step": 1397 }, { "epoch": 0.04081632653061224, "grad_norm": 0.7574461533760428, "learning_rate": 8.161120840630473e-06, "loss": 0.7573, "step": 1398 }, { "epoch": 0.04084552275845961, "grad_norm": 0.8072291801364033, "learning_rate": 8.16695855224752e-06, "loss": 0.7621, "step": 1399 }, { "epoch": 0.04087471898630697, "grad_norm": 0.8142430294159292, "learning_rate": 8.172796263864564e-06, "loss": 0.7381, "step": 1400 }, { "epoch": 0.04090391521415433, "grad_norm": 0.7983789588350152, "learning_rate": 8.178633975481613e-06, "loss": 0.8482, "step": 1401 }, { "epoch": 0.04093311144200169, "grad_norm": 0.7339577928273855, "learning_rate": 8.184471687098658e-06, "loss": 0.659, "step": 1402 }, { "epoch": 0.040962307669849055, "grad_norm": 0.942588407266008, "learning_rate": 8.190309398715704e-06, "loss": 0.726, "step": 1403 }, { "epoch": 0.040991503897696416, "grad_norm": 0.8506721329760091, "learning_rate": 8.196147110332751e-06, "loss": 0.7419, "step": 1404 }, { "epoch": 0.04102070012554378, "grad_norm": 0.8279943195786174, "learning_rate": 8.201984821949796e-06, "loss": 0.7119, "step": 1405 }, { "epoch": 0.041049896353391145, "grad_norm": 1.337200453908007, "learning_rate": 8.207822533566843e-06, "loss": 0.7721, "step": 1406 }, { "epoch": 0.041079092581238506, "grad_norm": 1.3826938059089267, "learning_rate": 8.21366024518389e-06, "loss": 0.6638, "step": 1407 }, { "epoch": 0.04110828880908587, "grad_norm": 0.7999047710009533, "learning_rate": 8.219497956800934e-06, "loss": 0.7676, "step": 1408 }, { "epoch": 0.04113748503693323, "grad_norm": 0.7682810761028462, "learning_rate": 8.225335668417981e-06, "loss": 0.6724, "step": 1409 }, { "epoch": 0.04116668126478059, "grad_norm": 0.878377099402929, "learning_rate": 8.231173380035026e-06, "loss": 0.7995, "step": 1410 }, { "epoch": 0.04119587749262795, "grad_norm": 0.807579316718956, "learning_rate": 8.237011091652073e-06, "loss": 0.7171, "step": 1411 }, { "epoch": 0.04122507372047531, "grad_norm": 0.8129922172865558, "learning_rate": 8.24284880326912e-06, "loss": 0.773, "step": 1412 }, { "epoch": 0.041254269948322674, "grad_norm": 0.7697465722285373, "learning_rate": 8.248686514886164e-06, "loss": 0.707, "step": 1413 }, { "epoch": 0.04128346617617004, "grad_norm": 0.7290912497344546, "learning_rate": 8.254524226503211e-06, "loss": 0.6167, "step": 1414 }, { "epoch": 0.0413126624040174, "grad_norm": 0.8495835147080939, "learning_rate": 8.260361938120258e-06, "loss": 0.7809, "step": 1415 }, { "epoch": 0.041341858631864764, "grad_norm": 0.7789644405846877, "learning_rate": 8.266199649737303e-06, "loss": 0.7691, "step": 1416 }, { "epoch": 0.041371054859712125, "grad_norm": 0.8452661962820123, "learning_rate": 8.27203736135435e-06, "loss": 0.7427, "step": 1417 }, { "epoch": 0.041400251087559486, "grad_norm": 0.7378991175448365, "learning_rate": 8.277875072971396e-06, "loss": 0.6752, "step": 1418 }, { "epoch": 0.04142944731540685, "grad_norm": 0.8211276497214647, "learning_rate": 8.283712784588443e-06, "loss": 0.8476, "step": 1419 }, { "epoch": 0.04145864354325421, "grad_norm": 0.8033988766243118, "learning_rate": 8.289550496205488e-06, "loss": 0.6989, "step": 1420 }, { "epoch": 0.04148783977110158, "grad_norm": 0.808652938723162, "learning_rate": 8.295388207822535e-06, "loss": 0.7571, "step": 1421 }, { "epoch": 0.04151703599894894, "grad_norm": 0.8148410564033277, "learning_rate": 8.301225919439581e-06, "loss": 0.7131, "step": 1422 }, { "epoch": 0.0415462322267963, "grad_norm": 0.793907270552468, "learning_rate": 8.307063631056626e-06, "loss": 0.7504, "step": 1423 }, { "epoch": 0.04157542845464366, "grad_norm": 0.7467565367802133, "learning_rate": 8.312901342673673e-06, "loss": 0.7042, "step": 1424 }, { "epoch": 0.04160462468249102, "grad_norm": 1.3742418979234072, "learning_rate": 8.318739054290718e-06, "loss": 0.7882, "step": 1425 }, { "epoch": 0.04163382091033838, "grad_norm": 0.7782482454063785, "learning_rate": 8.324576765907765e-06, "loss": 0.692, "step": 1426 }, { "epoch": 0.041663017138185744, "grad_norm": 0.7760974407278889, "learning_rate": 8.330414477524811e-06, "loss": 0.7432, "step": 1427 }, { "epoch": 0.041692213366033105, "grad_norm": 0.8396461793440014, "learning_rate": 8.336252189141856e-06, "loss": 0.8535, "step": 1428 }, { "epoch": 0.041721409593880474, "grad_norm": 0.8526699055432965, "learning_rate": 8.342089900758903e-06, "loss": 0.7761, "step": 1429 }, { "epoch": 0.041750605821727835, "grad_norm": 0.7243689253090017, "learning_rate": 8.34792761237595e-06, "loss": 0.6981, "step": 1430 }, { "epoch": 0.041779802049575196, "grad_norm": 0.8110959423283739, "learning_rate": 8.353765323992995e-06, "loss": 0.6906, "step": 1431 }, { "epoch": 0.04180899827742256, "grad_norm": 0.794543639580011, "learning_rate": 8.359603035610041e-06, "loss": 0.6901, "step": 1432 }, { "epoch": 0.04183819450526992, "grad_norm": 0.8463262827528639, "learning_rate": 8.365440747227088e-06, "loss": 0.8594, "step": 1433 }, { "epoch": 0.04186739073311728, "grad_norm": 0.9375597594720395, "learning_rate": 8.371278458844135e-06, "loss": 0.8482, "step": 1434 }, { "epoch": 0.04189658696096464, "grad_norm": 0.7279049920789823, "learning_rate": 8.37711617046118e-06, "loss": 0.6741, "step": 1435 }, { "epoch": 0.04192578318881201, "grad_norm": 0.8693306589299165, "learning_rate": 8.382953882078226e-06, "loss": 0.7423, "step": 1436 }, { "epoch": 0.04195497941665937, "grad_norm": 1.0443165084837127, "learning_rate": 8.388791593695273e-06, "loss": 0.7618, "step": 1437 }, { "epoch": 0.04198417564450673, "grad_norm": 0.9083755236358112, "learning_rate": 8.394629305312318e-06, "loss": 0.7878, "step": 1438 }, { "epoch": 0.04201337187235409, "grad_norm": 1.2513361026232812, "learning_rate": 8.400467016929365e-06, "loss": 0.9194, "step": 1439 }, { "epoch": 0.042042568100201454, "grad_norm": 0.8294680573942073, "learning_rate": 8.406304728546411e-06, "loss": 0.7183, "step": 1440 }, { "epoch": 0.042071764328048815, "grad_norm": 0.7626734034560262, "learning_rate": 8.412142440163456e-06, "loss": 0.7295, "step": 1441 }, { "epoch": 0.042100960555896176, "grad_norm": 0.8436252779386227, "learning_rate": 8.417980151780503e-06, "loss": 0.7935, "step": 1442 }, { "epoch": 0.04213015678374354, "grad_norm": 1.1065961846016446, "learning_rate": 8.423817863397548e-06, "loss": 0.7974, "step": 1443 }, { "epoch": 0.042159353011590905, "grad_norm": 0.7775477829989024, "learning_rate": 8.429655575014595e-06, "loss": 0.7163, "step": 1444 }, { "epoch": 0.04218854923943827, "grad_norm": 0.7504306950100286, "learning_rate": 8.435493286631641e-06, "loss": 0.7109, "step": 1445 }, { "epoch": 0.04221774546728563, "grad_norm": 0.8980727589770906, "learning_rate": 8.441330998248686e-06, "loss": 0.8333, "step": 1446 }, { "epoch": 0.04224694169513299, "grad_norm": 0.7227429345927268, "learning_rate": 8.447168709865733e-06, "loss": 0.6815, "step": 1447 }, { "epoch": 0.04227613792298035, "grad_norm": 0.8466132324161925, "learning_rate": 8.45300642148278e-06, "loss": 0.7035, "step": 1448 }, { "epoch": 0.04230533415082771, "grad_norm": 0.7886696243417693, "learning_rate": 8.458844133099826e-06, "loss": 0.7606, "step": 1449 }, { "epoch": 0.04233453037867507, "grad_norm": 0.8130733677543278, "learning_rate": 8.464681844716871e-06, "loss": 0.7871, "step": 1450 }, { "epoch": 0.04236372660652244, "grad_norm": 0.7593575284524205, "learning_rate": 8.470519556333918e-06, "loss": 0.7455, "step": 1451 }, { "epoch": 0.0423929228343698, "grad_norm": 0.7859942977325834, "learning_rate": 8.476357267950965e-06, "loss": 0.7146, "step": 1452 }, { "epoch": 0.04242211906221716, "grad_norm": 0.937229213431457, "learning_rate": 8.48219497956801e-06, "loss": 0.8653, "step": 1453 }, { "epoch": 0.042451315290064524, "grad_norm": 0.9591583425914171, "learning_rate": 8.488032691185056e-06, "loss": 0.7337, "step": 1454 }, { "epoch": 0.042480511517911886, "grad_norm": 0.8050648607471753, "learning_rate": 8.493870402802103e-06, "loss": 0.6873, "step": 1455 }, { "epoch": 0.04250970774575925, "grad_norm": 0.9148379182652429, "learning_rate": 8.499708114419148e-06, "loss": 0.8364, "step": 1456 }, { "epoch": 0.04253890397360661, "grad_norm": 0.8450630157601707, "learning_rate": 8.505545826036195e-06, "loss": 0.7931, "step": 1457 }, { "epoch": 0.04256810020145397, "grad_norm": 0.9084327850439536, "learning_rate": 8.511383537653241e-06, "loss": 0.6793, "step": 1458 }, { "epoch": 0.04259729642930134, "grad_norm": 0.7955107280638815, "learning_rate": 8.517221249270286e-06, "loss": 0.7103, "step": 1459 }, { "epoch": 0.0426264926571487, "grad_norm": 0.7922664883324047, "learning_rate": 8.523058960887333e-06, "loss": 0.7028, "step": 1460 }, { "epoch": 0.04265568888499606, "grad_norm": 0.8636999310847776, "learning_rate": 8.528896672504378e-06, "loss": 0.7805, "step": 1461 }, { "epoch": 0.04268488511284342, "grad_norm": 0.7373205049184297, "learning_rate": 8.534734384121425e-06, "loss": 0.6505, "step": 1462 }, { "epoch": 0.04271408134069078, "grad_norm": 0.8017050273041287, "learning_rate": 8.540572095738471e-06, "loss": 0.6893, "step": 1463 }, { "epoch": 0.04274327756853814, "grad_norm": 0.826998837564235, "learning_rate": 8.546409807355516e-06, "loss": 0.7479, "step": 1464 }, { "epoch": 0.042772473796385505, "grad_norm": 1.057249715997519, "learning_rate": 8.552247518972565e-06, "loss": 0.7256, "step": 1465 }, { "epoch": 0.042801670024232866, "grad_norm": 0.7744586897091829, "learning_rate": 8.55808523058961e-06, "loss": 0.7713, "step": 1466 }, { "epoch": 0.042830866252080234, "grad_norm": 0.8529221504840213, "learning_rate": 8.563922942206656e-06, "loss": 0.7018, "step": 1467 }, { "epoch": 0.042860062479927595, "grad_norm": 0.707410843645828, "learning_rate": 8.569760653823701e-06, "loss": 0.6377, "step": 1468 }, { "epoch": 0.042889258707774956, "grad_norm": 0.7611611085803776, "learning_rate": 8.575598365440748e-06, "loss": 0.6802, "step": 1469 }, { "epoch": 0.04291845493562232, "grad_norm": 1.0688155286773973, "learning_rate": 8.581436077057795e-06, "loss": 0.8678, "step": 1470 }, { "epoch": 0.04294765116346968, "grad_norm": 0.7126071813655372, "learning_rate": 8.58727378867484e-06, "loss": 0.6479, "step": 1471 }, { "epoch": 0.04297684739131704, "grad_norm": 0.770287719352261, "learning_rate": 8.593111500291886e-06, "loss": 0.6959, "step": 1472 }, { "epoch": 0.0430060436191644, "grad_norm": 0.8099440226013587, "learning_rate": 8.598949211908933e-06, "loss": 0.6269, "step": 1473 }, { "epoch": 0.04303523984701177, "grad_norm": 0.8976833547556357, "learning_rate": 8.604786923525978e-06, "loss": 0.751, "step": 1474 }, { "epoch": 0.04306443607485913, "grad_norm": 0.8064759571919026, "learning_rate": 8.610624635143025e-06, "loss": 0.7391, "step": 1475 }, { "epoch": 0.04309363230270649, "grad_norm": 0.7824855542534955, "learning_rate": 8.61646234676007e-06, "loss": 0.6611, "step": 1476 }, { "epoch": 0.04312282853055385, "grad_norm": 0.9088808153600407, "learning_rate": 8.622300058377116e-06, "loss": 0.6981, "step": 1477 }, { "epoch": 0.043152024758401214, "grad_norm": 0.8422684006622617, "learning_rate": 8.628137769994163e-06, "loss": 0.6923, "step": 1478 }, { "epoch": 0.043181220986248575, "grad_norm": 0.779137029763653, "learning_rate": 8.633975481611208e-06, "loss": 0.6932, "step": 1479 }, { "epoch": 0.043210417214095936, "grad_norm": 0.8000008734599847, "learning_rate": 8.639813193228255e-06, "loss": 0.7016, "step": 1480 }, { "epoch": 0.0432396134419433, "grad_norm": 0.8954243956966456, "learning_rate": 8.645650904845301e-06, "loss": 0.7453, "step": 1481 }, { "epoch": 0.043268809669790666, "grad_norm": 0.8169773902987116, "learning_rate": 8.651488616462348e-06, "loss": 0.8676, "step": 1482 }, { "epoch": 0.04329800589763803, "grad_norm": 0.7700583735698421, "learning_rate": 8.657326328079395e-06, "loss": 0.7385, "step": 1483 }, { "epoch": 0.04332720212548539, "grad_norm": 0.7594704626970998, "learning_rate": 8.66316403969644e-06, "loss": 0.752, "step": 1484 }, { "epoch": 0.04335639835333275, "grad_norm": 0.754291542680247, "learning_rate": 8.669001751313486e-06, "loss": 0.7208, "step": 1485 }, { "epoch": 0.04338559458118011, "grad_norm": 0.8478232399207088, "learning_rate": 8.674839462930531e-06, "loss": 0.8194, "step": 1486 }, { "epoch": 0.04341479080902747, "grad_norm": 0.900777386485295, "learning_rate": 8.680677174547578e-06, "loss": 0.8247, "step": 1487 }, { "epoch": 0.04344398703687483, "grad_norm": 0.7554799542751339, "learning_rate": 8.686514886164625e-06, "loss": 0.7024, "step": 1488 }, { "epoch": 0.0434731832647222, "grad_norm": 0.8321761154433202, "learning_rate": 8.69235259778167e-06, "loss": 0.7226, "step": 1489 }, { "epoch": 0.04350237949256956, "grad_norm": 0.7738304361555493, "learning_rate": 8.698190309398717e-06, "loss": 0.6572, "step": 1490 }, { "epoch": 0.043531575720416923, "grad_norm": 0.7961524909580303, "learning_rate": 8.704028021015763e-06, "loss": 0.758, "step": 1491 }, { "epoch": 0.043560771948264285, "grad_norm": 0.8672897385824679, "learning_rate": 8.709865732632808e-06, "loss": 0.7257, "step": 1492 }, { "epoch": 0.043589968176111646, "grad_norm": 0.8094038383960865, "learning_rate": 8.715703444249855e-06, "loss": 0.7641, "step": 1493 }, { "epoch": 0.04361916440395901, "grad_norm": 0.8688696348172439, "learning_rate": 8.7215411558669e-06, "loss": 0.7816, "step": 1494 }, { "epoch": 0.04364836063180637, "grad_norm": 0.7179825999193078, "learning_rate": 8.727378867483947e-06, "loss": 0.6348, "step": 1495 }, { "epoch": 0.04367755685965373, "grad_norm": 0.7726156398725831, "learning_rate": 8.733216579100993e-06, "loss": 0.758, "step": 1496 }, { "epoch": 0.0437067530875011, "grad_norm": 0.7631369779091381, "learning_rate": 8.73905429071804e-06, "loss": 0.693, "step": 1497 }, { "epoch": 0.04373594931534846, "grad_norm": 1.0302609213197575, "learning_rate": 8.744892002335087e-06, "loss": 0.7619, "step": 1498 }, { "epoch": 0.04376514554319582, "grad_norm": 0.8226924455891942, "learning_rate": 8.750729713952132e-06, "loss": 0.7789, "step": 1499 }, { "epoch": 0.04379434177104318, "grad_norm": 0.8365228829679012, "learning_rate": 8.756567425569178e-06, "loss": 0.7768, "step": 1500 }, { "epoch": 0.04382353799889054, "grad_norm": 0.7505159346617767, "learning_rate": 8.762405137186223e-06, "loss": 0.715, "step": 1501 }, { "epoch": 0.043852734226737904, "grad_norm": 0.8557781138669127, "learning_rate": 8.76824284880327e-06, "loss": 0.8555, "step": 1502 }, { "epoch": 0.043881930454585265, "grad_norm": 0.8462522161738857, "learning_rate": 8.774080560420317e-06, "loss": 0.7281, "step": 1503 }, { "epoch": 0.04391112668243263, "grad_norm": 0.7710189887299623, "learning_rate": 8.779918272037362e-06, "loss": 0.7293, "step": 1504 }, { "epoch": 0.043940322910279994, "grad_norm": 0.9108630442050677, "learning_rate": 8.785755983654408e-06, "loss": 0.7469, "step": 1505 }, { "epoch": 0.043969519138127355, "grad_norm": 0.7894101827951279, "learning_rate": 8.791593695271455e-06, "loss": 0.6907, "step": 1506 }, { "epoch": 0.04399871536597472, "grad_norm": 0.7939225580721656, "learning_rate": 8.7974314068885e-06, "loss": 0.6912, "step": 1507 }, { "epoch": 0.04402791159382208, "grad_norm": 0.795623165855014, "learning_rate": 8.803269118505547e-06, "loss": 0.6563, "step": 1508 }, { "epoch": 0.04405710782166944, "grad_norm": 0.7585242310985802, "learning_rate": 8.809106830122592e-06, "loss": 0.7463, "step": 1509 }, { "epoch": 0.0440863040495168, "grad_norm": 0.7394679625074773, "learning_rate": 8.814944541739638e-06, "loss": 0.6835, "step": 1510 }, { "epoch": 0.04411550027736416, "grad_norm": 0.8397970110597824, "learning_rate": 8.820782253356685e-06, "loss": 0.7804, "step": 1511 }, { "epoch": 0.04414469650521153, "grad_norm": 0.9071309885438047, "learning_rate": 8.82661996497373e-06, "loss": 0.8457, "step": 1512 }, { "epoch": 0.04417389273305889, "grad_norm": 0.8370547041360814, "learning_rate": 8.832457676590778e-06, "loss": 0.727, "step": 1513 }, { "epoch": 0.04420308896090625, "grad_norm": 0.7644211315713156, "learning_rate": 8.838295388207823e-06, "loss": 0.6843, "step": 1514 }, { "epoch": 0.04423228518875361, "grad_norm": 0.7598737322325966, "learning_rate": 8.84413309982487e-06, "loss": 0.7137, "step": 1515 }, { "epoch": 0.044261481416600974, "grad_norm": 0.8516836289080758, "learning_rate": 8.849970811441917e-06, "loss": 0.8566, "step": 1516 }, { "epoch": 0.044290677644448335, "grad_norm": 0.7674094029292298, "learning_rate": 8.855808523058962e-06, "loss": 0.7039, "step": 1517 }, { "epoch": 0.0443198738722957, "grad_norm": 0.7892433630085797, "learning_rate": 8.861646234676008e-06, "loss": 0.6805, "step": 1518 }, { "epoch": 0.044349070100143065, "grad_norm": 0.7455424841040291, "learning_rate": 8.867483946293053e-06, "loss": 0.6556, "step": 1519 }, { "epoch": 0.044378266327990426, "grad_norm": 0.8058072621024998, "learning_rate": 8.8733216579101e-06, "loss": 0.7381, "step": 1520 }, { "epoch": 0.04440746255583779, "grad_norm": 0.7911346134672111, "learning_rate": 8.879159369527147e-06, "loss": 0.74, "step": 1521 }, { "epoch": 0.04443665878368515, "grad_norm": 0.7823940980181263, "learning_rate": 8.884997081144192e-06, "loss": 0.6865, "step": 1522 }, { "epoch": 0.04446585501153251, "grad_norm": 0.8111633697741442, "learning_rate": 8.890834792761238e-06, "loss": 0.7464, "step": 1523 }, { "epoch": 0.04449505123937987, "grad_norm": 0.7439457403372058, "learning_rate": 8.896672504378285e-06, "loss": 0.6763, "step": 1524 }, { "epoch": 0.04452424746722723, "grad_norm": 0.8100256134136585, "learning_rate": 8.90251021599533e-06, "loss": 0.7131, "step": 1525 }, { "epoch": 0.04455344369507459, "grad_norm": 0.7759364411721844, "learning_rate": 8.908347927612377e-06, "loss": 0.7281, "step": 1526 }, { "epoch": 0.04458263992292196, "grad_norm": 0.8863625766429137, "learning_rate": 8.914185639229422e-06, "loss": 0.7379, "step": 1527 }, { "epoch": 0.04461183615076932, "grad_norm": 0.9397523696230973, "learning_rate": 8.920023350846468e-06, "loss": 0.7468, "step": 1528 }, { "epoch": 0.044641032378616684, "grad_norm": 0.8504164752417961, "learning_rate": 8.925861062463515e-06, "loss": 0.731, "step": 1529 }, { "epoch": 0.044670228606464045, "grad_norm": 0.7732658206179193, "learning_rate": 8.931698774080562e-06, "loss": 0.8179, "step": 1530 }, { "epoch": 0.044699424834311406, "grad_norm": 0.7855687736329713, "learning_rate": 8.937536485697608e-06, "loss": 0.8348, "step": 1531 }, { "epoch": 0.04472862106215877, "grad_norm": 0.7873469623685988, "learning_rate": 8.943374197314653e-06, "loss": 0.8174, "step": 1532 }, { "epoch": 0.04475781729000613, "grad_norm": 0.7841849733927296, "learning_rate": 8.9492119089317e-06, "loss": 0.6788, "step": 1533 }, { "epoch": 0.0447870135178535, "grad_norm": 0.8081328279501331, "learning_rate": 8.955049620548745e-06, "loss": 0.7466, "step": 1534 }, { "epoch": 0.04481620974570086, "grad_norm": 0.8425047012785084, "learning_rate": 8.960887332165792e-06, "loss": 0.6887, "step": 1535 }, { "epoch": 0.04484540597354822, "grad_norm": 0.7868716364832725, "learning_rate": 8.966725043782838e-06, "loss": 0.6764, "step": 1536 }, { "epoch": 0.04487460220139558, "grad_norm": 0.7735309067586045, "learning_rate": 8.972562755399883e-06, "loss": 0.7548, "step": 1537 }, { "epoch": 0.04490379842924294, "grad_norm": 0.7009396282414112, "learning_rate": 8.97840046701693e-06, "loss": 0.6553, "step": 1538 }, { "epoch": 0.0449329946570903, "grad_norm": 0.7424804506091435, "learning_rate": 8.984238178633977e-06, "loss": 0.6872, "step": 1539 }, { "epoch": 0.044962190884937664, "grad_norm": 0.7706961615484116, "learning_rate": 8.990075890251022e-06, "loss": 0.6681, "step": 1540 }, { "epoch": 0.044991387112785025, "grad_norm": 0.791447158006375, "learning_rate": 8.995913601868068e-06, "loss": 0.6963, "step": 1541 }, { "epoch": 0.04502058334063239, "grad_norm": 0.8245950082166463, "learning_rate": 9.001751313485113e-06, "loss": 0.8107, "step": 1542 }, { "epoch": 0.045049779568479754, "grad_norm": 0.8620216041206643, "learning_rate": 9.00758902510216e-06, "loss": 0.8278, "step": 1543 }, { "epoch": 0.045078975796327116, "grad_norm": 0.7575409921138369, "learning_rate": 9.013426736719207e-06, "loss": 0.6291, "step": 1544 }, { "epoch": 0.04510817202417448, "grad_norm": 0.8667869396020044, "learning_rate": 9.019264448336253e-06, "loss": 0.754, "step": 1545 }, { "epoch": 0.04513736825202184, "grad_norm": 0.785812178549636, "learning_rate": 9.0251021599533e-06, "loss": 0.7222, "step": 1546 }, { "epoch": 0.0451665644798692, "grad_norm": 0.7803867084733727, "learning_rate": 9.030939871570345e-06, "loss": 0.7174, "step": 1547 }, { "epoch": 0.04519576070771656, "grad_norm": 0.7265159557898598, "learning_rate": 9.036777583187392e-06, "loss": 0.6716, "step": 1548 }, { "epoch": 0.04522495693556393, "grad_norm": 0.7909732495106947, "learning_rate": 9.042615294804438e-06, "loss": 0.7875, "step": 1549 }, { "epoch": 0.04525415316341129, "grad_norm": 0.9740741545737496, "learning_rate": 9.048453006421483e-06, "loss": 0.6993, "step": 1550 }, { "epoch": 0.04528334939125865, "grad_norm": 0.7531906391777292, "learning_rate": 9.05429071803853e-06, "loss": 0.6964, "step": 1551 }, { "epoch": 0.04531254561910601, "grad_norm": 0.859939786760668, "learning_rate": 9.060128429655575e-06, "loss": 0.7312, "step": 1552 }, { "epoch": 0.04534174184695337, "grad_norm": 0.8201437787547057, "learning_rate": 9.065966141272622e-06, "loss": 0.8115, "step": 1553 }, { "epoch": 0.045370938074800735, "grad_norm": 0.8336790984990213, "learning_rate": 9.071803852889668e-06, "loss": 0.7734, "step": 1554 }, { "epoch": 0.045400134302648096, "grad_norm": 0.8279529046319566, "learning_rate": 9.077641564506713e-06, "loss": 0.7088, "step": 1555 }, { "epoch": 0.04542933053049546, "grad_norm": 0.9686044099486065, "learning_rate": 9.08347927612376e-06, "loss": 0.646, "step": 1556 }, { "epoch": 0.045458526758342825, "grad_norm": 0.8230577240757516, "learning_rate": 9.089316987740807e-06, "loss": 0.7807, "step": 1557 }, { "epoch": 0.045487722986190186, "grad_norm": 0.8446926809353954, "learning_rate": 9.095154699357852e-06, "loss": 0.8188, "step": 1558 }, { "epoch": 0.04551691921403755, "grad_norm": 0.8821473215704977, "learning_rate": 9.100992410974898e-06, "loss": 0.7648, "step": 1559 }, { "epoch": 0.04554611544188491, "grad_norm": 0.986252871556141, "learning_rate": 9.106830122591943e-06, "loss": 0.7275, "step": 1560 }, { "epoch": 0.04557531166973227, "grad_norm": 0.7945286697131817, "learning_rate": 9.112667834208992e-06, "loss": 0.698, "step": 1561 }, { "epoch": 0.04560450789757963, "grad_norm": 0.9220758898267372, "learning_rate": 9.118505545826037e-06, "loss": 0.7974, "step": 1562 }, { "epoch": 0.04563370412542699, "grad_norm": 0.8083954915809062, "learning_rate": 9.124343257443084e-06, "loss": 0.7815, "step": 1563 }, { "epoch": 0.045662900353274354, "grad_norm": 0.8021162957561232, "learning_rate": 9.13018096906013e-06, "loss": 0.7034, "step": 1564 }, { "epoch": 0.04569209658112172, "grad_norm": 0.7415847865736341, "learning_rate": 9.136018680677175e-06, "loss": 0.6891, "step": 1565 }, { "epoch": 0.04572129280896908, "grad_norm": 0.7915733899999425, "learning_rate": 9.141856392294222e-06, "loss": 0.7251, "step": 1566 }, { "epoch": 0.045750489036816444, "grad_norm": 0.9791910197717508, "learning_rate": 9.147694103911267e-06, "loss": 0.6917, "step": 1567 }, { "epoch": 0.045779685264663805, "grad_norm": 1.2896504959034003, "learning_rate": 9.153531815528314e-06, "loss": 0.8337, "step": 1568 }, { "epoch": 0.045808881492511166, "grad_norm": 0.7885538323093106, "learning_rate": 9.15936952714536e-06, "loss": 0.7164, "step": 1569 }, { "epoch": 0.04583807772035853, "grad_norm": 0.8987076824411364, "learning_rate": 9.165207238762405e-06, "loss": 0.8098, "step": 1570 }, { "epoch": 0.04586727394820589, "grad_norm": 0.7616030244445114, "learning_rate": 9.171044950379452e-06, "loss": 0.6859, "step": 1571 }, { "epoch": 0.04589647017605326, "grad_norm": 1.227763339945794, "learning_rate": 9.176882661996499e-06, "loss": 0.8663, "step": 1572 }, { "epoch": 0.04592566640390062, "grad_norm": 0.9390963928397074, "learning_rate": 9.182720373613544e-06, "loss": 0.7978, "step": 1573 }, { "epoch": 0.04595486263174798, "grad_norm": 0.7794724454803522, "learning_rate": 9.18855808523059e-06, "loss": 0.7453, "step": 1574 }, { "epoch": 0.04598405885959534, "grad_norm": 0.7616811198589287, "learning_rate": 9.194395796847637e-06, "loss": 0.7032, "step": 1575 }, { "epoch": 0.0460132550874427, "grad_norm": 0.8230117577434442, "learning_rate": 9.200233508464682e-06, "loss": 0.7201, "step": 1576 }, { "epoch": 0.04604245131529006, "grad_norm": 0.7892620763087613, "learning_rate": 9.206071220081729e-06, "loss": 0.6076, "step": 1577 }, { "epoch": 0.046071647543137424, "grad_norm": 0.7986867650440702, "learning_rate": 9.211908931698775e-06, "loss": 0.734, "step": 1578 }, { "epoch": 0.046100843770984785, "grad_norm": 0.793584248222914, "learning_rate": 9.217746643315822e-06, "loss": 0.7402, "step": 1579 }, { "epoch": 0.046130039998832154, "grad_norm": 0.9326047824945953, "learning_rate": 9.223584354932867e-06, "loss": 0.7455, "step": 1580 }, { "epoch": 0.046159236226679515, "grad_norm": 0.7825101329025618, "learning_rate": 9.229422066549914e-06, "loss": 0.6715, "step": 1581 }, { "epoch": 0.046188432454526876, "grad_norm": 0.8175969531103432, "learning_rate": 9.23525977816696e-06, "loss": 0.8052, "step": 1582 }, { "epoch": 0.04621762868237424, "grad_norm": 0.7512849364191413, "learning_rate": 9.241097489784005e-06, "loss": 0.6863, "step": 1583 }, { "epoch": 0.0462468249102216, "grad_norm": 0.7232591737610831, "learning_rate": 9.246935201401052e-06, "loss": 0.6776, "step": 1584 }, { "epoch": 0.04627602113806896, "grad_norm": 0.8351839000232026, "learning_rate": 9.252772913018097e-06, "loss": 0.7547, "step": 1585 }, { "epoch": 0.04630521736591632, "grad_norm": 0.7539274960966261, "learning_rate": 9.258610624635144e-06, "loss": 0.6745, "step": 1586 }, { "epoch": 0.04633441359376369, "grad_norm": 0.7482188458223034, "learning_rate": 9.26444833625219e-06, "loss": 0.6391, "step": 1587 }, { "epoch": 0.04636360982161105, "grad_norm": 0.8381583558902457, "learning_rate": 9.270286047869235e-06, "loss": 0.8299, "step": 1588 }, { "epoch": 0.04639280604945841, "grad_norm": 0.8078891680526303, "learning_rate": 9.276123759486282e-06, "loss": 0.7603, "step": 1589 }, { "epoch": 0.04642200227730577, "grad_norm": 0.8314832801446095, "learning_rate": 9.281961471103329e-06, "loss": 0.7694, "step": 1590 }, { "epoch": 0.046451198505153134, "grad_norm": 1.0882433088743073, "learning_rate": 9.287799182720374e-06, "loss": 0.7742, "step": 1591 }, { "epoch": 0.046480394733000495, "grad_norm": 0.7909830968778057, "learning_rate": 9.29363689433742e-06, "loss": 0.7383, "step": 1592 }, { "epoch": 0.046509590960847856, "grad_norm": 0.8199327994192139, "learning_rate": 9.299474605954467e-06, "loss": 0.7929, "step": 1593 }, { "epoch": 0.04653878718869522, "grad_norm": 0.8113632604156926, "learning_rate": 9.305312317571514e-06, "loss": 0.7585, "step": 1594 }, { "epoch": 0.046567983416542585, "grad_norm": 0.7411469578506225, "learning_rate": 9.311150029188559e-06, "loss": 0.6708, "step": 1595 }, { "epoch": 0.04659717964438995, "grad_norm": 0.7345293830215992, "learning_rate": 9.316987740805605e-06, "loss": 0.6765, "step": 1596 }, { "epoch": 0.04662637587223731, "grad_norm": 0.8190353692596088, "learning_rate": 9.322825452422652e-06, "loss": 0.7776, "step": 1597 }, { "epoch": 0.04665557210008467, "grad_norm": 0.7803503155884932, "learning_rate": 9.328663164039697e-06, "loss": 0.6511, "step": 1598 }, { "epoch": 0.04668476832793203, "grad_norm": 0.9471527807802302, "learning_rate": 9.334500875656744e-06, "loss": 0.6471, "step": 1599 }, { "epoch": 0.04671396455577939, "grad_norm": 0.8733725888195377, "learning_rate": 9.34033858727379e-06, "loss": 0.7725, "step": 1600 }, { "epoch": 0.04674316078362675, "grad_norm": 1.0347868948294514, "learning_rate": 9.346176298890835e-06, "loss": 0.7847, "step": 1601 }, { "epoch": 0.04677235701147412, "grad_norm": 0.7862768168034651, "learning_rate": 9.352014010507882e-06, "loss": 0.7282, "step": 1602 }, { "epoch": 0.04680155323932148, "grad_norm": 0.9289179930375322, "learning_rate": 9.357851722124927e-06, "loss": 0.9359, "step": 1603 }, { "epoch": 0.04683074946716884, "grad_norm": 0.8060159690704684, "learning_rate": 9.363689433741974e-06, "loss": 0.8511, "step": 1604 }, { "epoch": 0.046859945695016204, "grad_norm": 0.8243667788915888, "learning_rate": 9.36952714535902e-06, "loss": 0.7935, "step": 1605 }, { "epoch": 0.046889141922863566, "grad_norm": 0.7974429364597239, "learning_rate": 9.375364856976065e-06, "loss": 0.7532, "step": 1606 }, { "epoch": 0.04691833815071093, "grad_norm": 0.7963014715629638, "learning_rate": 9.381202568593112e-06, "loss": 0.7443, "step": 1607 }, { "epoch": 0.04694753437855829, "grad_norm": 0.8299018765677185, "learning_rate": 9.387040280210159e-06, "loss": 0.7112, "step": 1608 }, { "epoch": 0.04697673060640565, "grad_norm": 0.7647777962093494, "learning_rate": 9.392877991827205e-06, "loss": 0.7384, "step": 1609 }, { "epoch": 0.04700592683425302, "grad_norm": 0.7901500176121399, "learning_rate": 9.39871570344425e-06, "loss": 0.7572, "step": 1610 }, { "epoch": 0.04703512306210038, "grad_norm": 0.9243078665662552, "learning_rate": 9.404553415061297e-06, "loss": 0.7637, "step": 1611 }, { "epoch": 0.04706431928994774, "grad_norm": 0.8048607505641486, "learning_rate": 9.410391126678344e-06, "loss": 0.7688, "step": 1612 }, { "epoch": 0.0470935155177951, "grad_norm": 0.7694327263977797, "learning_rate": 9.416228838295389e-06, "loss": 0.7142, "step": 1613 }, { "epoch": 0.04712271174564246, "grad_norm": 1.0250682220294496, "learning_rate": 9.422066549912435e-06, "loss": 0.7092, "step": 1614 }, { "epoch": 0.04715190797348982, "grad_norm": 0.8217031477923988, "learning_rate": 9.427904261529482e-06, "loss": 0.6833, "step": 1615 }, { "epoch": 0.047181104201337185, "grad_norm": 0.7692324629000523, "learning_rate": 9.433741973146527e-06, "loss": 0.7181, "step": 1616 }, { "epoch": 0.04721030042918455, "grad_norm": 0.796709959562764, "learning_rate": 9.439579684763574e-06, "loss": 0.7973, "step": 1617 }, { "epoch": 0.047239496657031914, "grad_norm": 0.7487845402392322, "learning_rate": 9.445417396380619e-06, "loss": 0.6971, "step": 1618 }, { "epoch": 0.047268692884879275, "grad_norm": 0.744049392759042, "learning_rate": 9.451255107997665e-06, "loss": 0.7007, "step": 1619 }, { "epoch": 0.047297889112726636, "grad_norm": 0.7845776148727038, "learning_rate": 9.457092819614712e-06, "loss": 0.7151, "step": 1620 }, { "epoch": 0.047327085340574, "grad_norm": 0.8250698698954513, "learning_rate": 9.462930531231757e-06, "loss": 0.7548, "step": 1621 }, { "epoch": 0.04735628156842136, "grad_norm": 0.7655109140157325, "learning_rate": 9.468768242848804e-06, "loss": 0.7354, "step": 1622 }, { "epoch": 0.04738547779626872, "grad_norm": 0.8578738487920934, "learning_rate": 9.47460595446585e-06, "loss": 0.7104, "step": 1623 }, { "epoch": 0.04741467402411608, "grad_norm": 0.7908890736003714, "learning_rate": 9.480443666082895e-06, "loss": 0.7123, "step": 1624 }, { "epoch": 0.04744387025196345, "grad_norm": 0.7844679898259539, "learning_rate": 9.486281377699944e-06, "loss": 0.7088, "step": 1625 }, { "epoch": 0.04747306647981081, "grad_norm": 1.7647596776418526, "learning_rate": 9.492119089316989e-06, "loss": 0.8078, "step": 1626 }, { "epoch": 0.04750226270765817, "grad_norm": 0.9406212951909807, "learning_rate": 9.497956800934035e-06, "loss": 0.8779, "step": 1627 }, { "epoch": 0.04753145893550553, "grad_norm": 0.8871204819166005, "learning_rate": 9.50379451255108e-06, "loss": 0.8077, "step": 1628 }, { "epoch": 0.047560655163352894, "grad_norm": 0.7642748560966643, "learning_rate": 9.509632224168127e-06, "loss": 0.7046, "step": 1629 }, { "epoch": 0.047589851391200255, "grad_norm": 0.8149286474715409, "learning_rate": 9.515469935785174e-06, "loss": 0.7689, "step": 1630 }, { "epoch": 0.047619047619047616, "grad_norm": 0.9701928253231513, "learning_rate": 9.521307647402219e-06, "loss": 0.715, "step": 1631 }, { "epoch": 0.047648243846894985, "grad_norm": 0.8335658062142345, "learning_rate": 9.527145359019265e-06, "loss": 0.8034, "step": 1632 }, { "epoch": 0.047677440074742346, "grad_norm": 0.770831521245314, "learning_rate": 9.532983070636312e-06, "loss": 0.7079, "step": 1633 }, { "epoch": 0.04770663630258971, "grad_norm": 0.7971226107815275, "learning_rate": 9.538820782253357e-06, "loss": 0.6952, "step": 1634 }, { "epoch": 0.04773583253043707, "grad_norm": 0.8385284982097554, "learning_rate": 9.544658493870404e-06, "loss": 0.7264, "step": 1635 }, { "epoch": 0.04776502875828443, "grad_norm": 0.7612983991179482, "learning_rate": 9.550496205487449e-06, "loss": 0.6525, "step": 1636 }, { "epoch": 0.04779422498613179, "grad_norm": 0.9734326142051365, "learning_rate": 9.556333917104495e-06, "loss": 0.8167, "step": 1637 }, { "epoch": 0.04782342121397915, "grad_norm": 0.7625062072905898, "learning_rate": 9.562171628721542e-06, "loss": 0.6798, "step": 1638 }, { "epoch": 0.04785261744182651, "grad_norm": 0.7518817793409118, "learning_rate": 9.568009340338587e-06, "loss": 0.6567, "step": 1639 }, { "epoch": 0.04788181366967388, "grad_norm": 0.7891785053081104, "learning_rate": 9.573847051955634e-06, "loss": 0.8116, "step": 1640 }, { "epoch": 0.04791100989752124, "grad_norm": 0.7020244411339602, "learning_rate": 9.57968476357268e-06, "loss": 0.6046, "step": 1641 }, { "epoch": 0.047940206125368603, "grad_norm": 1.029551718228629, "learning_rate": 9.585522475189727e-06, "loss": 0.7711, "step": 1642 }, { "epoch": 0.047969402353215965, "grad_norm": 0.8399267909969631, "learning_rate": 9.591360186806772e-06, "loss": 0.7164, "step": 1643 }, { "epoch": 0.047998598581063326, "grad_norm": 0.7659197643464549, "learning_rate": 9.597197898423819e-06, "loss": 0.7378, "step": 1644 }, { "epoch": 0.04802779480891069, "grad_norm": 0.7603579469781706, "learning_rate": 9.603035610040866e-06, "loss": 0.7262, "step": 1645 }, { "epoch": 0.04805699103675805, "grad_norm": 0.9034620913062148, "learning_rate": 9.60887332165791e-06, "loss": 0.7577, "step": 1646 }, { "epoch": 0.048086187264605416, "grad_norm": 0.7469837183915519, "learning_rate": 9.614711033274957e-06, "loss": 0.6752, "step": 1647 }, { "epoch": 0.04811538349245278, "grad_norm": 0.8066790509605203, "learning_rate": 9.620548744892004e-06, "loss": 0.755, "step": 1648 }, { "epoch": 0.04814457972030014, "grad_norm": 0.830313012361972, "learning_rate": 9.626386456509049e-06, "loss": 0.8304, "step": 1649 }, { "epoch": 0.0481737759481475, "grad_norm": 0.7912325108913849, "learning_rate": 9.632224168126096e-06, "loss": 0.7613, "step": 1650 }, { "epoch": 0.04820297217599486, "grad_norm": 0.7711219287208428, "learning_rate": 9.63806187974314e-06, "loss": 0.6211, "step": 1651 }, { "epoch": 0.04823216840384222, "grad_norm": 0.7952230495853972, "learning_rate": 9.643899591360187e-06, "loss": 0.7983, "step": 1652 }, { "epoch": 0.048261364631689584, "grad_norm": 0.7089172160921303, "learning_rate": 9.649737302977234e-06, "loss": 0.6387, "step": 1653 }, { "epoch": 0.048290560859536945, "grad_norm": 0.7089641309538247, "learning_rate": 9.655575014594279e-06, "loss": 0.6322, "step": 1654 }, { "epoch": 0.04831975708738431, "grad_norm": 0.8443163729900452, "learning_rate": 9.661412726211326e-06, "loss": 0.7778, "step": 1655 }, { "epoch": 0.048348953315231674, "grad_norm": 0.8162562917911255, "learning_rate": 9.667250437828372e-06, "loss": 0.8146, "step": 1656 }, { "epoch": 0.048378149543079035, "grad_norm": 0.8387275634063506, "learning_rate": 9.673088149445419e-06, "loss": 0.7959, "step": 1657 }, { "epoch": 0.0484073457709264, "grad_norm": 0.7453822357884184, "learning_rate": 9.678925861062466e-06, "loss": 0.7315, "step": 1658 }, { "epoch": 0.04843654199877376, "grad_norm": 0.7513067789548036, "learning_rate": 9.68476357267951e-06, "loss": 0.6368, "step": 1659 }, { "epoch": 0.04846573822662112, "grad_norm": 0.7880294300139481, "learning_rate": 9.690601284296557e-06, "loss": 0.8144, "step": 1660 }, { "epoch": 0.04849493445446848, "grad_norm": 0.6593288301414488, "learning_rate": 9.696438995913602e-06, "loss": 0.5424, "step": 1661 }, { "epoch": 0.04852413068231584, "grad_norm": 0.7730526220478153, "learning_rate": 9.702276707530649e-06, "loss": 0.7275, "step": 1662 }, { "epoch": 0.04855332691016321, "grad_norm": 0.8222074248696544, "learning_rate": 9.708114419147696e-06, "loss": 0.6648, "step": 1663 }, { "epoch": 0.04858252313801057, "grad_norm": 0.7301940139071931, "learning_rate": 9.71395213076474e-06, "loss": 0.6038, "step": 1664 }, { "epoch": 0.04861171936585793, "grad_norm": 0.8396049529180837, "learning_rate": 9.719789842381787e-06, "loss": 0.8156, "step": 1665 }, { "epoch": 0.04864091559370529, "grad_norm": 0.8354735536326099, "learning_rate": 9.725627553998834e-06, "loss": 0.8122, "step": 1666 }, { "epoch": 0.048670111821552654, "grad_norm": 0.7220271420105733, "learning_rate": 9.731465265615879e-06, "loss": 0.6071, "step": 1667 }, { "epoch": 0.048699308049400016, "grad_norm": 0.8212209565077299, "learning_rate": 9.737302977232926e-06, "loss": 0.8474, "step": 1668 }, { "epoch": 0.04872850427724738, "grad_norm": 0.8109675974738362, "learning_rate": 9.74314068884997e-06, "loss": 0.7705, "step": 1669 }, { "epoch": 0.048757700505094745, "grad_norm": 0.7880459764103888, "learning_rate": 9.748978400467017e-06, "loss": 0.7845, "step": 1670 }, { "epoch": 0.048786896732942106, "grad_norm": 0.771609151309913, "learning_rate": 9.754816112084064e-06, "loss": 0.7077, "step": 1671 }, { "epoch": 0.04881609296078947, "grad_norm": 1.0317171556311007, "learning_rate": 9.760653823701109e-06, "loss": 0.7274, "step": 1672 }, { "epoch": 0.04884528918863683, "grad_norm": 0.8152441872287503, "learning_rate": 9.766491535318157e-06, "loss": 0.6749, "step": 1673 }, { "epoch": 0.04887448541648419, "grad_norm": 0.7640215879241342, "learning_rate": 9.772329246935202e-06, "loss": 0.6128, "step": 1674 }, { "epoch": 0.04890368164433155, "grad_norm": 0.6848540844862584, "learning_rate": 9.778166958552249e-06, "loss": 0.5972, "step": 1675 }, { "epoch": 0.04893287787217891, "grad_norm": 0.7679953794133885, "learning_rate": 9.784004670169294e-06, "loss": 0.7345, "step": 1676 }, { "epoch": 0.04896207410002627, "grad_norm": 0.8424213664647118, "learning_rate": 9.78984238178634e-06, "loss": 0.8427, "step": 1677 }, { "epoch": 0.04899127032787364, "grad_norm": 0.8890009472515676, "learning_rate": 9.795680093403387e-06, "loss": 0.8129, "step": 1678 }, { "epoch": 0.049020466555721, "grad_norm": 0.8046563199351587, "learning_rate": 9.801517805020432e-06, "loss": 0.6831, "step": 1679 }, { "epoch": 0.049049662783568364, "grad_norm": 0.8170340872405446, "learning_rate": 9.807355516637479e-06, "loss": 0.7862, "step": 1680 }, { "epoch": 0.049078859011415725, "grad_norm": 0.7697729176171548, "learning_rate": 9.813193228254526e-06, "loss": 0.753, "step": 1681 }, { "epoch": 0.049108055239263086, "grad_norm": 1.0667074655987256, "learning_rate": 9.81903093987157e-06, "loss": 0.7364, "step": 1682 }, { "epoch": 0.04913725146711045, "grad_norm": 0.7573201846405372, "learning_rate": 9.824868651488617e-06, "loss": 0.7206, "step": 1683 }, { "epoch": 0.04916644769495781, "grad_norm": 0.7178471309845114, "learning_rate": 9.830706363105662e-06, "loss": 0.6944, "step": 1684 }, { "epoch": 0.04919564392280518, "grad_norm": 0.9262061757982987, "learning_rate": 9.836544074722709e-06, "loss": 0.6566, "step": 1685 }, { "epoch": 0.04922484015065254, "grad_norm": 0.7317158435232085, "learning_rate": 9.842381786339756e-06, "loss": 0.6223, "step": 1686 }, { "epoch": 0.0492540363784999, "grad_norm": 0.7653628989863422, "learning_rate": 9.8482194979568e-06, "loss": 0.7841, "step": 1687 }, { "epoch": 0.04928323260634726, "grad_norm": 0.774790441472545, "learning_rate": 9.854057209573847e-06, "loss": 0.7481, "step": 1688 }, { "epoch": 0.04931242883419462, "grad_norm": 0.674587046227164, "learning_rate": 9.859894921190894e-06, "loss": 0.5671, "step": 1689 }, { "epoch": 0.04934162506204198, "grad_norm": 0.7534919384188059, "learning_rate": 9.86573263280794e-06, "loss": 0.6705, "step": 1690 }, { "epoch": 0.049370821289889344, "grad_norm": 0.752350587590339, "learning_rate": 9.871570344424987e-06, "loss": 0.7348, "step": 1691 }, { "epoch": 0.049400017517736705, "grad_norm": 0.839106620114788, "learning_rate": 9.877408056042032e-06, "loss": 0.749, "step": 1692 }, { "epoch": 0.04942921374558407, "grad_norm": 0.8991011043144381, "learning_rate": 9.883245767659079e-06, "loss": 0.8159, "step": 1693 }, { "epoch": 0.049458409973431434, "grad_norm": 0.7981154108729079, "learning_rate": 9.889083479276124e-06, "loss": 0.7184, "step": 1694 }, { "epoch": 0.049487606201278796, "grad_norm": 0.7410895013857893, "learning_rate": 9.89492119089317e-06, "loss": 0.6745, "step": 1695 }, { "epoch": 0.04951680242912616, "grad_norm": 0.736774417237388, "learning_rate": 9.900758902510217e-06, "loss": 0.7029, "step": 1696 }, { "epoch": 0.04954599865697352, "grad_norm": 0.849261977496499, "learning_rate": 9.906596614127262e-06, "loss": 0.7189, "step": 1697 }, { "epoch": 0.04957519488482088, "grad_norm": 0.8052066405325192, "learning_rate": 9.912434325744309e-06, "loss": 0.7383, "step": 1698 }, { "epoch": 0.04960439111266824, "grad_norm": 0.8855095877124729, "learning_rate": 9.918272037361356e-06, "loss": 0.7654, "step": 1699 }, { "epoch": 0.04963358734051561, "grad_norm": 0.7321972754923246, "learning_rate": 9.9241097489784e-06, "loss": 0.6853, "step": 1700 }, { "epoch": 0.04966278356836297, "grad_norm": 0.8650279800529237, "learning_rate": 9.929947460595447e-06, "loss": 0.8442, "step": 1701 }, { "epoch": 0.04969197979621033, "grad_norm": 0.8277375160944327, "learning_rate": 9.935785172212492e-06, "loss": 0.6928, "step": 1702 }, { "epoch": 0.04972117602405769, "grad_norm": 0.7478366554597506, "learning_rate": 9.941622883829539e-06, "loss": 0.6872, "step": 1703 }, { "epoch": 0.04975037225190505, "grad_norm": 0.7585562344691554, "learning_rate": 9.947460595446586e-06, "loss": 0.6577, "step": 1704 }, { "epoch": 0.049779568479752415, "grad_norm": 1.0567827578889262, "learning_rate": 9.953298307063632e-06, "loss": 0.66, "step": 1705 }, { "epoch": 0.049808764707599776, "grad_norm": 0.7667052312484949, "learning_rate": 9.959136018680679e-06, "loss": 0.7519, "step": 1706 }, { "epoch": 0.04983796093544714, "grad_norm": 0.7517012046555345, "learning_rate": 9.964973730297724e-06, "loss": 0.6887, "step": 1707 }, { "epoch": 0.049867157163294505, "grad_norm": 0.9064341214023905, "learning_rate": 9.97081144191477e-06, "loss": 0.6193, "step": 1708 }, { "epoch": 0.049896353391141866, "grad_norm": 0.8584855011803019, "learning_rate": 9.976649153531816e-06, "loss": 0.75, "step": 1709 }, { "epoch": 0.04992554961898923, "grad_norm": 0.8484736522610691, "learning_rate": 9.982486865148862e-06, "loss": 0.7713, "step": 1710 }, { "epoch": 0.04995474584683659, "grad_norm": 0.8430149927861267, "learning_rate": 9.988324576765909e-06, "loss": 0.8199, "step": 1711 }, { "epoch": 0.04998394207468395, "grad_norm": 0.8582765549143019, "learning_rate": 9.994162288382954e-06, "loss": 0.8154, "step": 1712 }, { "epoch": 0.05001313830253131, "grad_norm": 0.8009813513000679, "learning_rate": 1e-05, "loss": 0.7249, "step": 1713 }, { "epoch": 0.05004233453037867, "grad_norm": 0.8351399049720364, "learning_rate": 1.0005837711617047e-05, "loss": 0.8057, "step": 1714 }, { "epoch": 0.05007153075822604, "grad_norm": 0.7579600812698608, "learning_rate": 1.0011675423234092e-05, "loss": 0.654, "step": 1715 }, { "epoch": 0.0501007269860734, "grad_norm": 0.8080925442652727, "learning_rate": 1.0017513134851141e-05, "loss": 0.7594, "step": 1716 }, { "epoch": 0.05012992321392076, "grad_norm": 0.7986655500461252, "learning_rate": 1.0023350846468186e-05, "loss": 0.7653, "step": 1717 }, { "epoch": 0.050159119441768124, "grad_norm": 0.8365157866624421, "learning_rate": 1.0029188558085233e-05, "loss": 0.7168, "step": 1718 }, { "epoch": 0.050188315669615485, "grad_norm": 0.7374673832569911, "learning_rate": 1.0035026269702278e-05, "loss": 0.6386, "step": 1719 }, { "epoch": 0.050217511897462846, "grad_norm": 0.846577012440781, "learning_rate": 1.0040863981319324e-05, "loss": 0.807, "step": 1720 }, { "epoch": 0.05024670812531021, "grad_norm": 0.7749834101474722, "learning_rate": 1.004670169293637e-05, "loss": 0.6699, "step": 1721 }, { "epoch": 0.05027590435315757, "grad_norm": 0.6910360433393714, "learning_rate": 1.0052539404553416e-05, "loss": 0.6025, "step": 1722 }, { "epoch": 0.05030510058100494, "grad_norm": 0.7740312054918905, "learning_rate": 1.005837711617046e-05, "loss": 0.628, "step": 1723 }, { "epoch": 0.0503342968088523, "grad_norm": 0.7740953838200677, "learning_rate": 1.006421482778751e-05, "loss": 0.7423, "step": 1724 }, { "epoch": 0.05036349303669966, "grad_norm": 0.9486746653872152, "learning_rate": 1.0070052539404554e-05, "loss": 0.7394, "step": 1725 }, { "epoch": 0.05039268926454702, "grad_norm": 0.764814197448783, "learning_rate": 1.0075890251021601e-05, "loss": 0.7017, "step": 1726 }, { "epoch": 0.05042188549239438, "grad_norm": 0.8640291473855268, "learning_rate": 1.0081727962638646e-05, "loss": 0.8191, "step": 1727 }, { "epoch": 0.05045108172024174, "grad_norm": 0.7803284054530971, "learning_rate": 1.0087565674255693e-05, "loss": 0.6875, "step": 1728 }, { "epoch": 0.050480277948089104, "grad_norm": 0.8483838804723003, "learning_rate": 1.0093403385872738e-05, "loss": 0.7094, "step": 1729 }, { "epoch": 0.05050947417593647, "grad_norm": 0.7718268295852568, "learning_rate": 1.0099241097489784e-05, "loss": 0.6947, "step": 1730 }, { "epoch": 0.050538670403783834, "grad_norm": 0.8533633721819891, "learning_rate": 1.0105078809106833e-05, "loss": 0.7649, "step": 1731 }, { "epoch": 0.050567866631631195, "grad_norm": 0.7169373751051582, "learning_rate": 1.0110916520723878e-05, "loss": 0.6288, "step": 1732 }, { "epoch": 0.050597062859478556, "grad_norm": 0.773967636449231, "learning_rate": 1.0116754232340924e-05, "loss": 0.6909, "step": 1733 }, { "epoch": 0.05062625908732592, "grad_norm": 0.8966795235026901, "learning_rate": 1.012259194395797e-05, "loss": 0.7526, "step": 1734 }, { "epoch": 0.05065545531517328, "grad_norm": 0.9100298647756719, "learning_rate": 1.0128429655575016e-05, "loss": 0.6724, "step": 1735 }, { "epoch": 0.05068465154302064, "grad_norm": 0.7545877798024537, "learning_rate": 1.0134267367192061e-05, "loss": 0.6702, "step": 1736 }, { "epoch": 0.050713847770868, "grad_norm": 0.7500261342387441, "learning_rate": 1.0140105078809108e-05, "loss": 0.6942, "step": 1737 }, { "epoch": 0.05074304399871537, "grad_norm": 0.981909412502653, "learning_rate": 1.0145942790426153e-05, "loss": 0.7868, "step": 1738 }, { "epoch": 0.05077224022656273, "grad_norm": 0.9188552951441606, "learning_rate": 1.0151780502043201e-05, "loss": 0.9209, "step": 1739 }, { "epoch": 0.05080143645441009, "grad_norm": 0.7672550662305174, "learning_rate": 1.0157618213660246e-05, "loss": 0.6897, "step": 1740 }, { "epoch": 0.05083063268225745, "grad_norm": 0.8735286155964586, "learning_rate": 1.0163455925277293e-05, "loss": 0.7945, "step": 1741 }, { "epoch": 0.050859828910104814, "grad_norm": 0.7488950348265575, "learning_rate": 1.0169293636894338e-05, "loss": 0.6236, "step": 1742 }, { "epoch": 0.050889025137952175, "grad_norm": 0.7887505474291062, "learning_rate": 1.0175131348511384e-05, "loss": 0.6641, "step": 1743 }, { "epoch": 0.050918221365799536, "grad_norm": 0.8428175632385329, "learning_rate": 1.018096906012843e-05, "loss": 0.7832, "step": 1744 }, { "epoch": 0.050947417593646904, "grad_norm": 0.8090003996430946, "learning_rate": 1.0186806771745476e-05, "loss": 0.7437, "step": 1745 }, { "epoch": 0.050976613821494265, "grad_norm": 0.839158176267354, "learning_rate": 1.0192644483362524e-05, "loss": 0.8219, "step": 1746 }, { "epoch": 0.05100581004934163, "grad_norm": 0.8046274881102583, "learning_rate": 1.019848219497957e-05, "loss": 0.7542, "step": 1747 }, { "epoch": 0.05103500627718899, "grad_norm": 0.7160827997371433, "learning_rate": 1.0204319906596616e-05, "loss": 0.6811, "step": 1748 }, { "epoch": 0.05106420250503635, "grad_norm": 0.7995173145678939, "learning_rate": 1.0210157618213661e-05, "loss": 0.6828, "step": 1749 }, { "epoch": 0.05109339873288371, "grad_norm": 0.8309186599269465, "learning_rate": 1.0215995329830708e-05, "loss": 0.622, "step": 1750 }, { "epoch": 0.05112259496073107, "grad_norm": 0.7417940863419699, "learning_rate": 1.0221833041447753e-05, "loss": 0.7161, "step": 1751 }, { "epoch": 0.05115179118857843, "grad_norm": 0.7968030694457999, "learning_rate": 1.02276707530648e-05, "loss": 0.7545, "step": 1752 }, { "epoch": 0.0511809874164258, "grad_norm": 0.8450902367277625, "learning_rate": 1.0233508464681844e-05, "loss": 0.7383, "step": 1753 }, { "epoch": 0.05121018364427316, "grad_norm": 0.7612100716304093, "learning_rate": 1.0239346176298893e-05, "loss": 0.747, "step": 1754 }, { "epoch": 0.05123937987212052, "grad_norm": 0.7773925871708203, "learning_rate": 1.0245183887915938e-05, "loss": 0.6792, "step": 1755 }, { "epoch": 0.051268576099967884, "grad_norm": 0.7290096892585864, "learning_rate": 1.0251021599532984e-05, "loss": 0.7081, "step": 1756 }, { "epoch": 0.051297772327815246, "grad_norm": 0.9450096118493673, "learning_rate": 1.025685931115003e-05, "loss": 0.7722, "step": 1757 }, { "epoch": 0.05132696855566261, "grad_norm": 0.8952467870128722, "learning_rate": 1.0262697022767076e-05, "loss": 0.7893, "step": 1758 }, { "epoch": 0.05135616478350997, "grad_norm": 0.8199095151140738, "learning_rate": 1.0268534734384121e-05, "loss": 0.7366, "step": 1759 }, { "epoch": 0.05138536101135733, "grad_norm": 0.8052919576312245, "learning_rate": 1.0274372446001168e-05, "loss": 0.664, "step": 1760 }, { "epoch": 0.0514145572392047, "grad_norm": 0.774591921277757, "learning_rate": 1.0280210157618213e-05, "loss": 0.6856, "step": 1761 }, { "epoch": 0.05144375346705206, "grad_norm": 0.8899180054436664, "learning_rate": 1.0286047869235261e-05, "loss": 0.8022, "step": 1762 }, { "epoch": 0.05147294969489942, "grad_norm": 0.766784682759578, "learning_rate": 1.0291885580852308e-05, "loss": 0.755, "step": 1763 }, { "epoch": 0.05150214592274678, "grad_norm": 0.8308329010374804, "learning_rate": 1.0297723292469353e-05, "loss": 0.7445, "step": 1764 }, { "epoch": 0.05153134215059414, "grad_norm": 0.7802256002251313, "learning_rate": 1.03035610040864e-05, "loss": 0.7145, "step": 1765 }, { "epoch": 0.0515605383784415, "grad_norm": 1.0533126678867988, "learning_rate": 1.0309398715703444e-05, "loss": 0.7166, "step": 1766 }, { "epoch": 0.051589734606288865, "grad_norm": 0.9244031623100992, "learning_rate": 1.0315236427320493e-05, "loss": 0.7705, "step": 1767 }, { "epoch": 0.05161893083413623, "grad_norm": 0.7471224687220738, "learning_rate": 1.0321074138937536e-05, "loss": 0.6983, "step": 1768 }, { "epoch": 0.051648127061983594, "grad_norm": 0.8886424447025706, "learning_rate": 1.0326911850554584e-05, "loss": 0.8565, "step": 1769 }, { "epoch": 0.051677323289830955, "grad_norm": 0.892557656680833, "learning_rate": 1.033274956217163e-05, "loss": 0.7412, "step": 1770 }, { "epoch": 0.051706519517678316, "grad_norm": 0.7855776831291085, "learning_rate": 1.0338587273788676e-05, "loss": 0.7958, "step": 1771 }, { "epoch": 0.05173571574552568, "grad_norm": 0.7917561547355882, "learning_rate": 1.0344424985405721e-05, "loss": 0.7222, "step": 1772 }, { "epoch": 0.05176491197337304, "grad_norm": 1.2742147273096518, "learning_rate": 1.0350262697022768e-05, "loss": 0.7435, "step": 1773 }, { "epoch": 0.0517941082012204, "grad_norm": 0.840540964941932, "learning_rate": 1.0356100408639813e-05, "loss": 0.7202, "step": 1774 }, { "epoch": 0.05182330442906776, "grad_norm": 0.7885473433703273, "learning_rate": 1.0361938120256861e-05, "loss": 0.6517, "step": 1775 }, { "epoch": 0.05185250065691513, "grad_norm": 0.7791962605855584, "learning_rate": 1.0367775831873904e-05, "loss": 0.6706, "step": 1776 }, { "epoch": 0.05188169688476249, "grad_norm": 0.7725047401937868, "learning_rate": 1.0373613543490953e-05, "loss": 0.6828, "step": 1777 }, { "epoch": 0.05191089311260985, "grad_norm": 0.819933667661895, "learning_rate": 1.0379451255108e-05, "loss": 0.7086, "step": 1778 }, { "epoch": 0.05194008934045721, "grad_norm": 0.7957745574386194, "learning_rate": 1.0385288966725044e-05, "loss": 0.775, "step": 1779 }, { "epoch": 0.051969285568304574, "grad_norm": 0.9075932182760837, "learning_rate": 1.0391126678342091e-05, "loss": 0.7601, "step": 1780 }, { "epoch": 0.051998481796151935, "grad_norm": 0.8279142301827658, "learning_rate": 1.0396964389959136e-05, "loss": 0.7524, "step": 1781 }, { "epoch": 0.052027678023999296, "grad_norm": 0.7869802206455085, "learning_rate": 1.0402802101576184e-05, "loss": 0.7642, "step": 1782 }, { "epoch": 0.052056874251846665, "grad_norm": 0.8051910126209864, "learning_rate": 1.040863981319323e-05, "loss": 0.7581, "step": 1783 }, { "epoch": 0.052086070479694026, "grad_norm": 0.8578035784371217, "learning_rate": 1.0414477524810276e-05, "loss": 0.7803, "step": 1784 }, { "epoch": 0.05211526670754139, "grad_norm": 0.7825981972952551, "learning_rate": 1.0420315236427321e-05, "loss": 0.7574, "step": 1785 }, { "epoch": 0.05214446293538875, "grad_norm": 1.0692850396182028, "learning_rate": 1.0426152948044368e-05, "loss": 0.8015, "step": 1786 }, { "epoch": 0.05217365916323611, "grad_norm": 0.7864065719014012, "learning_rate": 1.0431990659661413e-05, "loss": 0.7411, "step": 1787 }, { "epoch": 0.05220285539108347, "grad_norm": 0.8847159529915773, "learning_rate": 1.043782837127846e-05, "loss": 0.7626, "step": 1788 }, { "epoch": 0.05223205161893083, "grad_norm": 0.7536432971749947, "learning_rate": 1.0443666082895504e-05, "loss": 0.7073, "step": 1789 }, { "epoch": 0.05226124784677819, "grad_norm": 0.8963833488780828, "learning_rate": 1.0449503794512553e-05, "loss": 0.7767, "step": 1790 }, { "epoch": 0.05229044407462556, "grad_norm": 0.885516495321528, "learning_rate": 1.0455341506129598e-05, "loss": 0.7842, "step": 1791 }, { "epoch": 0.05231964030247292, "grad_norm": 0.8159706026449393, "learning_rate": 1.0461179217746645e-05, "loss": 0.7407, "step": 1792 }, { "epoch": 0.052348836530320283, "grad_norm": 0.8028579429779764, "learning_rate": 1.046701692936369e-05, "loss": 0.6456, "step": 1793 }, { "epoch": 0.052378032758167645, "grad_norm": 0.8093511826480079, "learning_rate": 1.0472854640980736e-05, "loss": 0.7499, "step": 1794 }, { "epoch": 0.052407228986015006, "grad_norm": 0.9262252295339909, "learning_rate": 1.0478692352597783e-05, "loss": 0.6826, "step": 1795 }, { "epoch": 0.05243642521386237, "grad_norm": 0.7444791806035246, "learning_rate": 1.0484530064214828e-05, "loss": 0.6539, "step": 1796 }, { "epoch": 0.05246562144170973, "grad_norm": 0.7975737273544108, "learning_rate": 1.0490367775831876e-05, "loss": 0.7583, "step": 1797 }, { "epoch": 0.052494817669557096, "grad_norm": 0.8193087251084078, "learning_rate": 1.0496205487448921e-05, "loss": 0.7472, "step": 1798 }, { "epoch": 0.05252401389740446, "grad_norm": 0.7894271103295553, "learning_rate": 1.0502043199065968e-05, "loss": 0.7049, "step": 1799 }, { "epoch": 0.05255321012525182, "grad_norm": 0.7639670341450221, "learning_rate": 1.0507880910683013e-05, "loss": 0.7046, "step": 1800 }, { "epoch": 0.05258240635309918, "grad_norm": 0.8020253398644644, "learning_rate": 1.051371862230006e-05, "loss": 0.7628, "step": 1801 }, { "epoch": 0.05261160258094654, "grad_norm": 1.1418015734029436, "learning_rate": 1.0519556333917105e-05, "loss": 0.7159, "step": 1802 }, { "epoch": 0.0526407988087939, "grad_norm": 0.7875570126809605, "learning_rate": 1.0525394045534151e-05, "loss": 0.7413, "step": 1803 }, { "epoch": 0.052669995036641264, "grad_norm": 0.780004078098782, "learning_rate": 1.0531231757151196e-05, "loss": 0.7119, "step": 1804 }, { "epoch": 0.052699191264488625, "grad_norm": 0.7157868827605537, "learning_rate": 1.0537069468768245e-05, "loss": 0.6606, "step": 1805 }, { "epoch": 0.05272838749233599, "grad_norm": 0.7821841464172131, "learning_rate": 1.054290718038529e-05, "loss": 0.7227, "step": 1806 }, { "epoch": 0.052757583720183354, "grad_norm": 0.8250086594035915, "learning_rate": 1.0548744892002336e-05, "loss": 0.7486, "step": 1807 }, { "epoch": 0.052786779948030715, "grad_norm": 0.7392481435598924, "learning_rate": 1.0554582603619381e-05, "loss": 0.6587, "step": 1808 }, { "epoch": 0.05281597617587808, "grad_norm": 0.797081368455408, "learning_rate": 1.0560420315236428e-05, "loss": 0.7717, "step": 1809 }, { "epoch": 0.05284517240372544, "grad_norm": 1.8806693396927392, "learning_rate": 1.0566258026853475e-05, "loss": 0.643, "step": 1810 }, { "epoch": 0.0528743686315728, "grad_norm": 0.7847052369497493, "learning_rate": 1.057209573847052e-05, "loss": 0.7636, "step": 1811 }, { "epoch": 0.05290356485942016, "grad_norm": 0.8777480733775723, "learning_rate": 1.0577933450087568e-05, "loss": 0.8314, "step": 1812 }, { "epoch": 0.05293276108726753, "grad_norm": 0.9941081173204549, "learning_rate": 1.0583771161704613e-05, "loss": 0.7839, "step": 1813 }, { "epoch": 0.05296195731511489, "grad_norm": 0.7647781632991631, "learning_rate": 1.058960887332166e-05, "loss": 0.6548, "step": 1814 }, { "epoch": 0.05299115354296225, "grad_norm": 0.7988215574241327, "learning_rate": 1.0595446584938705e-05, "loss": 0.7587, "step": 1815 }, { "epoch": 0.05302034977080961, "grad_norm": 0.8194933006062239, "learning_rate": 1.0601284296555751e-05, "loss": 0.8028, "step": 1816 }, { "epoch": 0.05304954599865697, "grad_norm": 0.7136637244770372, "learning_rate": 1.0607122008172796e-05, "loss": 0.6394, "step": 1817 }, { "epoch": 0.053078742226504334, "grad_norm": 0.7465430111000865, "learning_rate": 1.0612959719789843e-05, "loss": 0.6562, "step": 1818 }, { "epoch": 0.053107938454351696, "grad_norm": 0.8885488918890353, "learning_rate": 1.0618797431406888e-05, "loss": 0.7643, "step": 1819 }, { "epoch": 0.05313713468219906, "grad_norm": 0.778301540968768, "learning_rate": 1.0624635143023936e-05, "loss": 0.7765, "step": 1820 }, { "epoch": 0.053166330910046425, "grad_norm": 0.6770228692621518, "learning_rate": 1.0630472854640981e-05, "loss": 0.5915, "step": 1821 }, { "epoch": 0.053195527137893786, "grad_norm": 0.8220026670754333, "learning_rate": 1.0636310566258028e-05, "loss": 0.6802, "step": 1822 }, { "epoch": 0.05322472336574115, "grad_norm": 0.8253487365635492, "learning_rate": 1.0642148277875073e-05, "loss": 0.7932, "step": 1823 }, { "epoch": 0.05325391959358851, "grad_norm": 0.7904895040745051, "learning_rate": 1.064798598949212e-05, "loss": 0.7818, "step": 1824 }, { "epoch": 0.05328311582143587, "grad_norm": 0.7550349042023562, "learning_rate": 1.0653823701109165e-05, "loss": 0.7048, "step": 1825 }, { "epoch": 0.05331231204928323, "grad_norm": 0.7849327046281113, "learning_rate": 1.0659661412726211e-05, "loss": 0.7535, "step": 1826 }, { "epoch": 0.05334150827713059, "grad_norm": 0.7158488005267729, "learning_rate": 1.066549912434326e-05, "loss": 0.6793, "step": 1827 }, { "epoch": 0.05337070450497796, "grad_norm": 0.8397469513808355, "learning_rate": 1.0671336835960305e-05, "loss": 0.7833, "step": 1828 }, { "epoch": 0.05339990073282532, "grad_norm": 0.7254276702164008, "learning_rate": 1.0677174547577351e-05, "loss": 0.646, "step": 1829 }, { "epoch": 0.05342909696067268, "grad_norm": 0.8205144498957648, "learning_rate": 1.0683012259194396e-05, "loss": 0.7425, "step": 1830 }, { "epoch": 0.053458293188520044, "grad_norm": 0.7397601453788751, "learning_rate": 1.0688849970811443e-05, "loss": 0.6802, "step": 1831 }, { "epoch": 0.053487489416367405, "grad_norm": 0.8940171266348722, "learning_rate": 1.0694687682428488e-05, "loss": 0.6092, "step": 1832 }, { "epoch": 0.053516685644214766, "grad_norm": 0.952512198251553, "learning_rate": 1.0700525394045536e-05, "loss": 0.7031, "step": 1833 }, { "epoch": 0.05354588187206213, "grad_norm": 0.7985594535322671, "learning_rate": 1.0706363105662581e-05, "loss": 0.7323, "step": 1834 }, { "epoch": 0.05357507809990949, "grad_norm": 0.7237139988898783, "learning_rate": 1.0712200817279628e-05, "loss": 0.6814, "step": 1835 }, { "epoch": 0.05360427432775686, "grad_norm": 0.8410498386714989, "learning_rate": 1.0718038528896673e-05, "loss": 0.8736, "step": 1836 }, { "epoch": 0.05363347055560422, "grad_norm": 0.7880006107325435, "learning_rate": 1.072387624051372e-05, "loss": 0.6568, "step": 1837 }, { "epoch": 0.05366266678345158, "grad_norm": 0.7546721970518714, "learning_rate": 1.0729713952130765e-05, "loss": 0.6656, "step": 1838 }, { "epoch": 0.05369186301129894, "grad_norm": 0.8190712222115081, "learning_rate": 1.0735551663747811e-05, "loss": 0.715, "step": 1839 }, { "epoch": 0.0537210592391463, "grad_norm": 0.8626393497381762, "learning_rate": 1.0741389375364856e-05, "loss": 0.7335, "step": 1840 }, { "epoch": 0.05375025546699366, "grad_norm": 0.7441828117539071, "learning_rate": 1.0747227086981905e-05, "loss": 0.6769, "step": 1841 }, { "epoch": 0.053779451694841024, "grad_norm": 0.718651705432917, "learning_rate": 1.0753064798598951e-05, "loss": 0.6458, "step": 1842 }, { "epoch": 0.05380864792268839, "grad_norm": 0.883840884431195, "learning_rate": 1.0758902510215996e-05, "loss": 0.8024, "step": 1843 }, { "epoch": 0.05383784415053575, "grad_norm": 0.7570922240067846, "learning_rate": 1.0764740221833043e-05, "loss": 0.7046, "step": 1844 }, { "epoch": 0.053867040378383114, "grad_norm": 0.8980962946432117, "learning_rate": 1.0770577933450088e-05, "loss": 0.7652, "step": 1845 }, { "epoch": 0.053896236606230476, "grad_norm": 0.750617439429869, "learning_rate": 1.0776415645067135e-05, "loss": 0.6789, "step": 1846 }, { "epoch": 0.05392543283407784, "grad_norm": 0.862861229801857, "learning_rate": 1.078225335668418e-05, "loss": 0.8153, "step": 1847 }, { "epoch": 0.0539546290619252, "grad_norm": 0.7501195268535783, "learning_rate": 1.0788091068301228e-05, "loss": 0.6745, "step": 1848 }, { "epoch": 0.05398382528977256, "grad_norm": 0.9620898387619244, "learning_rate": 1.0793928779918273e-05, "loss": 0.7599, "step": 1849 }, { "epoch": 0.05401302151761992, "grad_norm": 0.7672772629863673, "learning_rate": 1.079976649153532e-05, "loss": 0.754, "step": 1850 }, { "epoch": 0.05404221774546729, "grad_norm": 0.8281931620273273, "learning_rate": 1.0805604203152365e-05, "loss": 0.7677, "step": 1851 }, { "epoch": 0.05407141397331465, "grad_norm": 0.8264537088462754, "learning_rate": 1.0811441914769411e-05, "loss": 0.7007, "step": 1852 }, { "epoch": 0.05410061020116201, "grad_norm": 0.7965003415992266, "learning_rate": 1.0817279626386456e-05, "loss": 0.758, "step": 1853 }, { "epoch": 0.05412980642900937, "grad_norm": 0.7624195838365946, "learning_rate": 1.0823117338003503e-05, "loss": 0.6861, "step": 1854 }, { "epoch": 0.05415900265685673, "grad_norm": 0.7290805243570552, "learning_rate": 1.0828955049620548e-05, "loss": 0.6593, "step": 1855 }, { "epoch": 0.054188198884704095, "grad_norm": 0.7380135439548637, "learning_rate": 1.0834792761237596e-05, "loss": 0.6326, "step": 1856 }, { "epoch": 0.054217395112551456, "grad_norm": 0.8383244879746411, "learning_rate": 1.0840630472854641e-05, "loss": 0.7948, "step": 1857 }, { "epoch": 0.05424659134039882, "grad_norm": 0.8022961232447909, "learning_rate": 1.0846468184471688e-05, "loss": 0.6945, "step": 1858 }, { "epoch": 0.054275787568246185, "grad_norm": 0.8348158693507993, "learning_rate": 1.0852305896088735e-05, "loss": 0.7847, "step": 1859 }, { "epoch": 0.054304983796093546, "grad_norm": 0.8268502896691896, "learning_rate": 1.085814360770578e-05, "loss": 0.7724, "step": 1860 }, { "epoch": 0.05433418002394091, "grad_norm": 0.7115444682164077, "learning_rate": 1.0863981319322826e-05, "loss": 0.6233, "step": 1861 }, { "epoch": 0.05436337625178827, "grad_norm": 0.7531527300098191, "learning_rate": 1.0869819030939871e-05, "loss": 0.7205, "step": 1862 }, { "epoch": 0.05439257247963563, "grad_norm": 1.3401508969819809, "learning_rate": 1.087565674255692e-05, "loss": 0.7377, "step": 1863 }, { "epoch": 0.05442176870748299, "grad_norm": 0.7284867288369082, "learning_rate": 1.0881494454173965e-05, "loss": 0.6965, "step": 1864 }, { "epoch": 0.05445096493533035, "grad_norm": 0.8258239077067235, "learning_rate": 1.0887332165791011e-05, "loss": 0.7416, "step": 1865 }, { "epoch": 0.05448016116317772, "grad_norm": 1.6920551246816633, "learning_rate": 1.0893169877408056e-05, "loss": 0.7425, "step": 1866 }, { "epoch": 0.05450935739102508, "grad_norm": 0.9126313687304572, "learning_rate": 1.0899007589025103e-05, "loss": 0.8086, "step": 1867 }, { "epoch": 0.05453855361887244, "grad_norm": 0.7850167722139162, "learning_rate": 1.0904845300642148e-05, "loss": 0.7528, "step": 1868 }, { "epoch": 0.054567749846719804, "grad_norm": 0.7836095297752469, "learning_rate": 1.0910683012259195e-05, "loss": 0.7869, "step": 1869 }, { "epoch": 0.054596946074567165, "grad_norm": 0.7145746280106439, "learning_rate": 1.091652072387624e-05, "loss": 0.6421, "step": 1870 }, { "epoch": 0.054626142302414527, "grad_norm": 0.811312460498549, "learning_rate": 1.0922358435493288e-05, "loss": 0.7403, "step": 1871 }, { "epoch": 0.05465533853026189, "grad_norm": 0.8811747461437711, "learning_rate": 1.0928196147110333e-05, "loss": 0.7712, "step": 1872 }, { "epoch": 0.05468453475810925, "grad_norm": 0.8669801476923413, "learning_rate": 1.093403385872738e-05, "loss": 0.7796, "step": 1873 }, { "epoch": 0.05471373098595662, "grad_norm": 0.7742695148254766, "learning_rate": 1.0939871570344427e-05, "loss": 0.7232, "step": 1874 }, { "epoch": 0.05474292721380398, "grad_norm": 0.723852827289498, "learning_rate": 1.0945709281961472e-05, "loss": 0.6124, "step": 1875 }, { "epoch": 0.05477212344165134, "grad_norm": 0.7909762971379347, "learning_rate": 1.0951546993578518e-05, "loss": 0.7185, "step": 1876 }, { "epoch": 0.0548013196694987, "grad_norm": 0.7628884813664317, "learning_rate": 1.0957384705195563e-05, "loss": 0.737, "step": 1877 }, { "epoch": 0.05483051589734606, "grad_norm": 0.7977775732685647, "learning_rate": 1.0963222416812612e-05, "loss": 0.7568, "step": 1878 }, { "epoch": 0.05485971212519342, "grad_norm": 0.8281884900754916, "learning_rate": 1.0969060128429657e-05, "loss": 0.7354, "step": 1879 }, { "epoch": 0.054888908353040784, "grad_norm": 0.8446205662916993, "learning_rate": 1.0974897840046703e-05, "loss": 0.7752, "step": 1880 }, { "epoch": 0.05491810458088815, "grad_norm": 0.8012097661882607, "learning_rate": 1.0980735551663748e-05, "loss": 0.7038, "step": 1881 }, { "epoch": 0.054947300808735514, "grad_norm": 0.783138150731236, "learning_rate": 1.0986573263280795e-05, "loss": 0.7813, "step": 1882 }, { "epoch": 0.054976497036582875, "grad_norm": 0.7866836061947363, "learning_rate": 1.099241097489784e-05, "loss": 0.7917, "step": 1883 }, { "epoch": 0.055005693264430236, "grad_norm": 0.8958201525072367, "learning_rate": 1.0998248686514888e-05, "loss": 0.7179, "step": 1884 }, { "epoch": 0.0550348894922776, "grad_norm": 0.7281084027228958, "learning_rate": 1.1004086398131932e-05, "loss": 0.6674, "step": 1885 }, { "epoch": 0.05506408572012496, "grad_norm": 0.7755319476360062, "learning_rate": 1.100992410974898e-05, "loss": 0.7142, "step": 1886 }, { "epoch": 0.05509328194797232, "grad_norm": 0.7569765926382531, "learning_rate": 1.1015761821366025e-05, "loss": 0.7507, "step": 1887 }, { "epoch": 0.05512247817581968, "grad_norm": 0.8050092697421507, "learning_rate": 1.1021599532983072e-05, "loss": 0.7608, "step": 1888 }, { "epoch": 0.05515167440366705, "grad_norm": 0.8918800380486039, "learning_rate": 1.1027437244600117e-05, "loss": 0.7743, "step": 1889 }, { "epoch": 0.05518087063151441, "grad_norm": 0.8305048125204697, "learning_rate": 1.1033274956217163e-05, "loss": 0.6694, "step": 1890 }, { "epoch": 0.05521006685936177, "grad_norm": 0.7952143148851232, "learning_rate": 1.1039112667834212e-05, "loss": 0.6714, "step": 1891 }, { "epoch": 0.05523926308720913, "grad_norm": 0.73898125963259, "learning_rate": 1.1044950379451257e-05, "loss": 0.6753, "step": 1892 }, { "epoch": 0.055268459315056494, "grad_norm": 0.8852237193709135, "learning_rate": 1.1050788091068303e-05, "loss": 0.7045, "step": 1893 }, { "epoch": 0.055297655542903855, "grad_norm": 0.8972826327823676, "learning_rate": 1.1056625802685348e-05, "loss": 0.7287, "step": 1894 }, { "epoch": 0.055326851770751216, "grad_norm": 0.6945696230782153, "learning_rate": 1.1062463514302395e-05, "loss": 0.5926, "step": 1895 }, { "epoch": 0.055356047998598584, "grad_norm": 0.7400480345884065, "learning_rate": 1.106830122591944e-05, "loss": 0.6847, "step": 1896 }, { "epoch": 0.055385244226445945, "grad_norm": 0.7148075406496082, "learning_rate": 1.1074138937536487e-05, "loss": 0.6208, "step": 1897 }, { "epoch": 0.05541444045429331, "grad_norm": 0.7229937022367914, "learning_rate": 1.1079976649153532e-05, "loss": 0.6792, "step": 1898 }, { "epoch": 0.05544363668214067, "grad_norm": 0.9151470128309742, "learning_rate": 1.108581436077058e-05, "loss": 0.6929, "step": 1899 }, { "epoch": 0.05547283290998803, "grad_norm": 0.7821395949764618, "learning_rate": 1.1091652072387625e-05, "loss": 0.7136, "step": 1900 }, { "epoch": 0.05550202913783539, "grad_norm": 0.7904062049360114, "learning_rate": 1.1097489784004672e-05, "loss": 0.7703, "step": 1901 }, { "epoch": 0.05553122536568275, "grad_norm": 0.7686634537912409, "learning_rate": 1.1103327495621717e-05, "loss": 0.6892, "step": 1902 }, { "epoch": 0.05556042159353011, "grad_norm": 0.837844409328971, "learning_rate": 1.1109165207238763e-05, "loss": 0.791, "step": 1903 }, { "epoch": 0.05558961782137748, "grad_norm": 0.7665714043382823, "learning_rate": 1.1115002918855808e-05, "loss": 0.6554, "step": 1904 }, { "epoch": 0.05561881404922484, "grad_norm": 0.7569204841824453, "learning_rate": 1.1120840630472855e-05, "loss": 0.7128, "step": 1905 }, { "epoch": 0.0556480102770722, "grad_norm": 0.8808471149297507, "learning_rate": 1.1126678342089903e-05, "loss": 0.7604, "step": 1906 }, { "epoch": 0.055677206504919564, "grad_norm": 0.8111957627441794, "learning_rate": 1.1132516053706948e-05, "loss": 0.7321, "step": 1907 }, { "epoch": 0.055706402732766926, "grad_norm": 0.7408514338028014, "learning_rate": 1.1138353765323995e-05, "loss": 0.6788, "step": 1908 }, { "epoch": 0.05573559896061429, "grad_norm": 0.8072029043303279, "learning_rate": 1.114419147694104e-05, "loss": 0.7536, "step": 1909 }, { "epoch": 0.05576479518846165, "grad_norm": 0.7870258040247236, "learning_rate": 1.1150029188558087e-05, "loss": 0.688, "step": 1910 }, { "epoch": 0.055793991416309016, "grad_norm": 0.7308728446263778, "learning_rate": 1.1155866900175132e-05, "loss": 0.6503, "step": 1911 }, { "epoch": 0.05582318764415638, "grad_norm": 0.8636745529964078, "learning_rate": 1.1161704611792178e-05, "loss": 0.8106, "step": 1912 }, { "epoch": 0.05585238387200374, "grad_norm": 0.7827255624295248, "learning_rate": 1.1167542323409223e-05, "loss": 0.6858, "step": 1913 }, { "epoch": 0.0558815800998511, "grad_norm": 0.8408486481466204, "learning_rate": 1.1173380035026272e-05, "loss": 0.6361, "step": 1914 }, { "epoch": 0.05591077632769846, "grad_norm": 0.8369994722953746, "learning_rate": 1.1179217746643317e-05, "loss": 0.7578, "step": 1915 }, { "epoch": 0.05593997255554582, "grad_norm": 0.8509837128209812, "learning_rate": 1.1185055458260363e-05, "loss": 0.7476, "step": 1916 }, { "epoch": 0.05596916878339318, "grad_norm": 0.8040085278804214, "learning_rate": 1.1190893169877408e-05, "loss": 0.7554, "step": 1917 }, { "epoch": 0.055998365011240545, "grad_norm": 0.8486948742111587, "learning_rate": 1.1196730881494455e-05, "loss": 0.7385, "step": 1918 }, { "epoch": 0.05602756123908791, "grad_norm": 0.7404471471439547, "learning_rate": 1.12025685931115e-05, "loss": 0.6966, "step": 1919 }, { "epoch": 0.056056757466935274, "grad_norm": 0.7231209236324977, "learning_rate": 1.1208406304728547e-05, "loss": 0.6706, "step": 1920 }, { "epoch": 0.056085953694782635, "grad_norm": 0.8096305283631563, "learning_rate": 1.1214244016345592e-05, "loss": 0.7048, "step": 1921 }, { "epoch": 0.056115149922629996, "grad_norm": 0.8685602053284386, "learning_rate": 1.122008172796264e-05, "loss": 0.8228, "step": 1922 }, { "epoch": 0.05614434615047736, "grad_norm": 0.7953588544473036, "learning_rate": 1.1225919439579687e-05, "loss": 0.722, "step": 1923 }, { "epoch": 0.05617354237832472, "grad_norm": 0.7950181521853925, "learning_rate": 1.1231757151196732e-05, "loss": 0.6463, "step": 1924 }, { "epoch": 0.05620273860617208, "grad_norm": 0.800867164004802, "learning_rate": 1.1237594862813778e-05, "loss": 0.694, "step": 1925 }, { "epoch": 0.05623193483401945, "grad_norm": 0.758611802676238, "learning_rate": 1.1243432574430823e-05, "loss": 0.7061, "step": 1926 }, { "epoch": 0.05626113106186681, "grad_norm": 0.7108523805433937, "learning_rate": 1.124927028604787e-05, "loss": 0.6981, "step": 1927 }, { "epoch": 0.05629032728971417, "grad_norm": 0.801200991301033, "learning_rate": 1.1255107997664915e-05, "loss": 0.7683, "step": 1928 }, { "epoch": 0.05631952351756153, "grad_norm": 1.0654267894146823, "learning_rate": 1.1260945709281963e-05, "loss": 0.686, "step": 1929 }, { "epoch": 0.05634871974540889, "grad_norm": 0.8326195793043789, "learning_rate": 1.1266783420899008e-05, "loss": 0.7629, "step": 1930 }, { "epoch": 0.056377915973256254, "grad_norm": 0.7610861629418181, "learning_rate": 1.1272621132516055e-05, "loss": 0.7452, "step": 1931 }, { "epoch": 0.056407112201103615, "grad_norm": 0.7099079314129656, "learning_rate": 1.12784588441331e-05, "loss": 0.6626, "step": 1932 }, { "epoch": 0.056436308428950976, "grad_norm": 0.7932504720287382, "learning_rate": 1.1284296555750147e-05, "loss": 0.6733, "step": 1933 }, { "epoch": 0.056465504656798345, "grad_norm": 0.7703078056392306, "learning_rate": 1.1290134267367192e-05, "loss": 0.6917, "step": 1934 }, { "epoch": 0.056494700884645706, "grad_norm": 0.7847334932051818, "learning_rate": 1.1295971978984238e-05, "loss": 0.7679, "step": 1935 }, { "epoch": 0.05652389711249307, "grad_norm": 0.9482780083732747, "learning_rate": 1.1301809690601283e-05, "loss": 0.8135, "step": 1936 }, { "epoch": 0.05655309334034043, "grad_norm": 1.1821232883292632, "learning_rate": 1.1307647402218332e-05, "loss": 0.8085, "step": 1937 }, { "epoch": 0.05658228956818779, "grad_norm": 0.8295378295692267, "learning_rate": 1.1313485113835378e-05, "loss": 0.8008, "step": 1938 }, { "epoch": 0.05661148579603515, "grad_norm": 0.7119109133650218, "learning_rate": 1.1319322825452423e-05, "loss": 0.6141, "step": 1939 }, { "epoch": 0.05664068202388251, "grad_norm": 0.7554928888911011, "learning_rate": 1.132516053706947e-05, "loss": 0.6552, "step": 1940 }, { "epoch": 0.05666987825172988, "grad_norm": 0.8304639336885384, "learning_rate": 1.1330998248686515e-05, "loss": 0.7421, "step": 1941 }, { "epoch": 0.05669907447957724, "grad_norm": 0.7681047287381508, "learning_rate": 1.1336835960303564e-05, "loss": 0.7251, "step": 1942 }, { "epoch": 0.0567282707074246, "grad_norm": 0.8160055818847255, "learning_rate": 1.1342673671920607e-05, "loss": 0.706, "step": 1943 }, { "epoch": 0.056757466935271964, "grad_norm": 0.7385541362032911, "learning_rate": 1.1348511383537655e-05, "loss": 0.7279, "step": 1944 }, { "epoch": 0.056786663163119325, "grad_norm": 0.7356650334584581, "learning_rate": 1.13543490951547e-05, "loss": 0.664, "step": 1945 }, { "epoch": 0.056815859390966686, "grad_norm": 1.4059485035969312, "learning_rate": 1.1360186806771747e-05, "loss": 0.7282, "step": 1946 }, { "epoch": 0.05684505561881405, "grad_norm": 0.7105137136577815, "learning_rate": 1.1366024518388792e-05, "loss": 0.6187, "step": 1947 }, { "epoch": 0.05687425184666141, "grad_norm": 0.7897228820628974, "learning_rate": 1.1371862230005839e-05, "loss": 0.6852, "step": 1948 }, { "epoch": 0.056903448074508776, "grad_norm": 0.6849089039622476, "learning_rate": 1.1377699941622883e-05, "loss": 0.585, "step": 1949 }, { "epoch": 0.05693264430235614, "grad_norm": 0.7046661728271987, "learning_rate": 1.1383537653239932e-05, "loss": 0.5769, "step": 1950 }, { "epoch": 0.0569618405302035, "grad_norm": 0.7276121172599967, "learning_rate": 1.1389375364856977e-05, "loss": 0.678, "step": 1951 }, { "epoch": 0.05699103675805086, "grad_norm": 0.8010440966098618, "learning_rate": 1.1395213076474024e-05, "loss": 0.751, "step": 1952 }, { "epoch": 0.05702023298589822, "grad_norm": 0.8028457846845692, "learning_rate": 1.1401050788091069e-05, "loss": 0.7743, "step": 1953 }, { "epoch": 0.05704942921374558, "grad_norm": 0.7697181647003931, "learning_rate": 1.1406888499708115e-05, "loss": 0.7247, "step": 1954 }, { "epoch": 0.057078625441592944, "grad_norm": 0.7913773425083405, "learning_rate": 1.1412726211325162e-05, "loss": 0.7123, "step": 1955 }, { "epoch": 0.057107821669440305, "grad_norm": 0.7023442724276773, "learning_rate": 1.1418563922942207e-05, "loss": 0.6191, "step": 1956 }, { "epoch": 0.05713701789728767, "grad_norm": 0.8526707571414466, "learning_rate": 1.1424401634559255e-05, "loss": 0.7428, "step": 1957 }, { "epoch": 0.057166214125135034, "grad_norm": 0.7849593123245517, "learning_rate": 1.14302393461763e-05, "loss": 0.7001, "step": 1958 }, { "epoch": 0.057195410352982395, "grad_norm": 0.7615700243133943, "learning_rate": 1.1436077057793347e-05, "loss": 0.7231, "step": 1959 }, { "epoch": 0.05722460658082976, "grad_norm": 0.7184641455276682, "learning_rate": 1.1441914769410392e-05, "loss": 0.7163, "step": 1960 }, { "epoch": 0.05725380280867712, "grad_norm": 0.7167296065256606, "learning_rate": 1.1447752481027439e-05, "loss": 0.625, "step": 1961 }, { "epoch": 0.05728299903652448, "grad_norm": 0.8389175046801735, "learning_rate": 1.1453590192644484e-05, "loss": 0.6605, "step": 1962 }, { "epoch": 0.05731219526437184, "grad_norm": 0.7592725040655804, "learning_rate": 1.145942790426153e-05, "loss": 0.7288, "step": 1963 }, { "epoch": 0.05734139149221921, "grad_norm": 0.7833754756725732, "learning_rate": 1.1465265615878575e-05, "loss": 0.7827, "step": 1964 }, { "epoch": 0.05737058772006657, "grad_norm": 0.9167923863014358, "learning_rate": 1.1471103327495624e-05, "loss": 0.757, "step": 1965 }, { "epoch": 0.05739978394791393, "grad_norm": 0.809680943069489, "learning_rate": 1.1476941039112669e-05, "loss": 0.7735, "step": 1966 }, { "epoch": 0.05742898017576129, "grad_norm": 0.7188151725000417, "learning_rate": 1.1482778750729715e-05, "loss": 0.6497, "step": 1967 }, { "epoch": 0.05745817640360865, "grad_norm": 0.8388808524564941, "learning_rate": 1.148861646234676e-05, "loss": 0.7426, "step": 1968 }, { "epoch": 0.057487372631456014, "grad_norm": 0.770160846873455, "learning_rate": 1.1494454173963807e-05, "loss": 0.7128, "step": 1969 }, { "epoch": 0.057516568859303376, "grad_norm": 0.727182475427761, "learning_rate": 1.1500291885580854e-05, "loss": 0.6454, "step": 1970 }, { "epoch": 0.05754576508715074, "grad_norm": 0.8440708013454996, "learning_rate": 1.1506129597197899e-05, "loss": 0.775, "step": 1971 }, { "epoch": 0.057574961314998105, "grad_norm": 0.7772160677522854, "learning_rate": 1.1511967308814947e-05, "loss": 0.6221, "step": 1972 }, { "epoch": 0.057604157542845466, "grad_norm": 0.7950431171867883, "learning_rate": 1.1517805020431992e-05, "loss": 0.7113, "step": 1973 }, { "epoch": 0.05763335377069283, "grad_norm": 0.7911253697889595, "learning_rate": 1.1523642732049039e-05, "loss": 0.7727, "step": 1974 }, { "epoch": 0.05766254999854019, "grad_norm": 0.7707940507159888, "learning_rate": 1.1529480443666084e-05, "loss": 0.7397, "step": 1975 }, { "epoch": 0.05769174622638755, "grad_norm": 0.7576308094377266, "learning_rate": 1.153531815528313e-05, "loss": 0.6889, "step": 1976 }, { "epoch": 0.05772094245423491, "grad_norm": 0.7748023696258494, "learning_rate": 1.1541155866900175e-05, "loss": 0.6771, "step": 1977 }, { "epoch": 0.05775013868208227, "grad_norm": 0.7549937938419758, "learning_rate": 1.1546993578517222e-05, "loss": 0.7091, "step": 1978 }, { "epoch": 0.05777933490992964, "grad_norm": 0.7669828240044627, "learning_rate": 1.1552831290134267e-05, "loss": 0.7623, "step": 1979 }, { "epoch": 0.057808531137777, "grad_norm": 0.8146590943210694, "learning_rate": 1.1558669001751315e-05, "loss": 0.8048, "step": 1980 }, { "epoch": 0.05783772736562436, "grad_norm": 0.6916532850012981, "learning_rate": 1.156450671336836e-05, "loss": 0.6248, "step": 1981 }, { "epoch": 0.057866923593471724, "grad_norm": 0.7830978510173839, "learning_rate": 1.1570344424985407e-05, "loss": 0.7071, "step": 1982 }, { "epoch": 0.057896119821319085, "grad_norm": 0.8339284714441102, "learning_rate": 1.1576182136602452e-05, "loss": 0.7992, "step": 1983 }, { "epoch": 0.057925316049166446, "grad_norm": 0.7591445981688353, "learning_rate": 1.1582019848219499e-05, "loss": 0.6903, "step": 1984 }, { "epoch": 0.05795451227701381, "grad_norm": 0.7765154599334231, "learning_rate": 1.1587857559836544e-05, "loss": 0.7222, "step": 1985 }, { "epoch": 0.05798370850486117, "grad_norm": 0.8133121746368234, "learning_rate": 1.159369527145359e-05, "loss": 0.7696, "step": 1986 }, { "epoch": 0.05801290473270854, "grad_norm": 0.6877875465346811, "learning_rate": 1.1599532983070639e-05, "loss": 0.6056, "step": 1987 }, { "epoch": 0.0580421009605559, "grad_norm": 0.7489669533197348, "learning_rate": 1.1605370694687684e-05, "loss": 0.6392, "step": 1988 }, { "epoch": 0.05807129718840326, "grad_norm": 0.777582064393268, "learning_rate": 1.161120840630473e-05, "loss": 0.7177, "step": 1989 }, { "epoch": 0.05810049341625062, "grad_norm": 0.7619009625143967, "learning_rate": 1.1617046117921775e-05, "loss": 0.7147, "step": 1990 }, { "epoch": 0.05812968964409798, "grad_norm": 0.8716548681616275, "learning_rate": 1.1622883829538822e-05, "loss": 0.7263, "step": 1991 }, { "epoch": 0.05815888587194534, "grad_norm": 0.6901105310508802, "learning_rate": 1.1628721541155867e-05, "loss": 0.61, "step": 1992 }, { "epoch": 0.058188082099792704, "grad_norm": 0.7197350654600589, "learning_rate": 1.1634559252772914e-05, "loss": 0.6454, "step": 1993 }, { "epoch": 0.05821727832764007, "grad_norm": 0.7723450996872785, "learning_rate": 1.1640396964389959e-05, "loss": 0.6783, "step": 1994 }, { "epoch": 0.05824647455548743, "grad_norm": 0.7180395719820699, "learning_rate": 1.1646234676007007e-05, "loss": 0.6025, "step": 1995 }, { "epoch": 0.058275670783334795, "grad_norm": 0.795453220338147, "learning_rate": 1.1652072387624052e-05, "loss": 0.7427, "step": 1996 }, { "epoch": 0.058304867011182156, "grad_norm": 0.749676287433765, "learning_rate": 1.1657910099241099e-05, "loss": 0.7103, "step": 1997 }, { "epoch": 0.05833406323902952, "grad_norm": 0.8612620718303406, "learning_rate": 1.1663747810858144e-05, "loss": 0.6925, "step": 1998 }, { "epoch": 0.05836325946687688, "grad_norm": 0.737500267156837, "learning_rate": 1.166958552247519e-05, "loss": 0.6689, "step": 1999 }, { "epoch": 0.05839245569472424, "grad_norm": 0.8877422886556574, "learning_rate": 1.1675423234092235e-05, "loss": 0.8547, "step": 2000 }, { "epoch": 0.0584216519225716, "grad_norm": 0.8136135999448624, "learning_rate": 1.1681260945709284e-05, "loss": 0.7266, "step": 2001 }, { "epoch": 0.05845084815041897, "grad_norm": 1.9061694021447557, "learning_rate": 1.168709865732633e-05, "loss": 0.7415, "step": 2002 }, { "epoch": 0.05848004437826633, "grad_norm": 0.7290588689226075, "learning_rate": 1.1692936368943375e-05, "loss": 0.6301, "step": 2003 }, { "epoch": 0.05850924060611369, "grad_norm": 0.8056794757881061, "learning_rate": 1.1698774080560422e-05, "loss": 0.7504, "step": 2004 }, { "epoch": 0.05853843683396105, "grad_norm": 0.8478272171701473, "learning_rate": 1.1704611792177467e-05, "loss": 0.8061, "step": 2005 }, { "epoch": 0.05856763306180841, "grad_norm": 0.8504339447575998, "learning_rate": 1.1710449503794514e-05, "loss": 0.7548, "step": 2006 }, { "epoch": 0.058596829289655775, "grad_norm": 0.7827173890440251, "learning_rate": 1.1716287215411559e-05, "loss": 0.6149, "step": 2007 }, { "epoch": 0.058626025517503136, "grad_norm": 0.7865687372867455, "learning_rate": 1.1722124927028607e-05, "loss": 0.7321, "step": 2008 }, { "epoch": 0.058655221745350504, "grad_norm": 0.8850089595742477, "learning_rate": 1.1727962638645652e-05, "loss": 0.7937, "step": 2009 }, { "epoch": 0.058684417973197865, "grad_norm": 1.0149210015663295, "learning_rate": 1.1733800350262699e-05, "loss": 0.6751, "step": 2010 }, { "epoch": 0.058713614201045226, "grad_norm": 0.8448315959856763, "learning_rate": 1.1739638061879744e-05, "loss": 0.7672, "step": 2011 }, { "epoch": 0.05874281042889259, "grad_norm": 0.807463013725601, "learning_rate": 1.174547577349679e-05, "loss": 0.7114, "step": 2012 }, { "epoch": 0.05877200665673995, "grad_norm": 0.8871235638486199, "learning_rate": 1.1751313485113835e-05, "loss": 0.8203, "step": 2013 }, { "epoch": 0.05880120288458731, "grad_norm": 0.8702847054237678, "learning_rate": 1.1757151196730882e-05, "loss": 0.6587, "step": 2014 }, { "epoch": 0.05883039911243467, "grad_norm": 0.8849804887520728, "learning_rate": 1.1762988908347927e-05, "loss": 0.7375, "step": 2015 }, { "epoch": 0.05885959534028203, "grad_norm": 0.8357016899039731, "learning_rate": 1.1768826619964975e-05, "loss": 0.7686, "step": 2016 }, { "epoch": 0.0588887915681294, "grad_norm": 0.7415067495070137, "learning_rate": 1.177466433158202e-05, "loss": 0.7487, "step": 2017 }, { "epoch": 0.05891798779597676, "grad_norm": 0.7270821543199893, "learning_rate": 1.1780502043199067e-05, "loss": 0.6579, "step": 2018 }, { "epoch": 0.05894718402382412, "grad_norm": 0.9054273837573834, "learning_rate": 1.1786339754816114e-05, "loss": 0.8189, "step": 2019 }, { "epoch": 0.058976380251671484, "grad_norm": 0.7665118476709023, "learning_rate": 1.1792177466433159e-05, "loss": 0.7415, "step": 2020 }, { "epoch": 0.059005576479518845, "grad_norm": 0.8890129809687486, "learning_rate": 1.1798015178050206e-05, "loss": 0.7114, "step": 2021 }, { "epoch": 0.059034772707366207, "grad_norm": 0.8241514242638891, "learning_rate": 1.180385288966725e-05, "loss": 0.7345, "step": 2022 }, { "epoch": 0.05906396893521357, "grad_norm": 0.7248412251034274, "learning_rate": 1.1809690601284299e-05, "loss": 0.6766, "step": 2023 }, { "epoch": 0.059093165163060936, "grad_norm": 0.7645159121026969, "learning_rate": 1.1815528312901344e-05, "loss": 0.7552, "step": 2024 }, { "epoch": 0.0591223613909083, "grad_norm": 0.8783177714321402, "learning_rate": 1.182136602451839e-05, "loss": 0.6557, "step": 2025 }, { "epoch": 0.05915155761875566, "grad_norm": 0.7507401733485674, "learning_rate": 1.1827203736135436e-05, "loss": 0.694, "step": 2026 }, { "epoch": 0.05918075384660302, "grad_norm": 0.7269190278338264, "learning_rate": 1.1833041447752482e-05, "loss": 0.6755, "step": 2027 }, { "epoch": 0.05920995007445038, "grad_norm": 0.7592310953942851, "learning_rate": 1.1838879159369527e-05, "loss": 0.7228, "step": 2028 }, { "epoch": 0.05923914630229774, "grad_norm": 3.1902004652182843, "learning_rate": 1.1844716870986574e-05, "loss": 0.8106, "step": 2029 }, { "epoch": 0.0592683425301451, "grad_norm": 0.7901735665317062, "learning_rate": 1.1850554582603619e-05, "loss": 0.6932, "step": 2030 }, { "epoch": 0.059297538757992464, "grad_norm": 0.7494318855944576, "learning_rate": 1.1856392294220667e-05, "loss": 0.7197, "step": 2031 }, { "epoch": 0.05932673498583983, "grad_norm": 0.778164273026459, "learning_rate": 1.1862230005837712e-05, "loss": 0.7378, "step": 2032 }, { "epoch": 0.059355931213687194, "grad_norm": 0.7481213365256479, "learning_rate": 1.1868067717454759e-05, "loss": 0.6704, "step": 2033 }, { "epoch": 0.059385127441534555, "grad_norm": 0.7559607419096284, "learning_rate": 1.1873905429071806e-05, "loss": 0.7122, "step": 2034 }, { "epoch": 0.059414323669381916, "grad_norm": 0.8531319454042923, "learning_rate": 1.187974314068885e-05, "loss": 0.7775, "step": 2035 }, { "epoch": 0.05944351989722928, "grad_norm": 0.7593961056426238, "learning_rate": 1.1885580852305897e-05, "loss": 0.7608, "step": 2036 }, { "epoch": 0.05947271612507664, "grad_norm": 0.6773304447514068, "learning_rate": 1.1891418563922942e-05, "loss": 0.5456, "step": 2037 }, { "epoch": 0.059501912352924, "grad_norm": 0.771910820952376, "learning_rate": 1.189725627553999e-05, "loss": 0.7456, "step": 2038 }, { "epoch": 0.05953110858077137, "grad_norm": 0.8367402941332494, "learning_rate": 1.1903093987157036e-05, "loss": 0.8166, "step": 2039 }, { "epoch": 0.05956030480861873, "grad_norm": 0.7993701767834998, "learning_rate": 1.1908931698774082e-05, "loss": 0.788, "step": 2040 }, { "epoch": 0.05958950103646609, "grad_norm": 0.777331063540904, "learning_rate": 1.1914769410391127e-05, "loss": 0.7318, "step": 2041 }, { "epoch": 0.05961869726431345, "grad_norm": 0.8044742953360141, "learning_rate": 1.1920607122008174e-05, "loss": 0.7423, "step": 2042 }, { "epoch": 0.05964789349216081, "grad_norm": 0.9226920808820565, "learning_rate": 1.1926444833625219e-05, "loss": 0.6612, "step": 2043 }, { "epoch": 0.059677089720008174, "grad_norm": 0.8082394181403217, "learning_rate": 1.1932282545242266e-05, "loss": 0.6902, "step": 2044 }, { "epoch": 0.059706285947855535, "grad_norm": 0.8394333721484494, "learning_rate": 1.193812025685931e-05, "loss": 0.7226, "step": 2045 }, { "epoch": 0.059735482175702896, "grad_norm": 1.0347727157133881, "learning_rate": 1.1943957968476359e-05, "loss": 0.7158, "step": 2046 }, { "epoch": 0.059764678403550264, "grad_norm": 0.7405961422445645, "learning_rate": 1.1949795680093404e-05, "loss": 0.6236, "step": 2047 }, { "epoch": 0.059793874631397625, "grad_norm": 0.788815325164338, "learning_rate": 1.195563339171045e-05, "loss": 0.722, "step": 2048 }, { "epoch": 0.05982307085924499, "grad_norm": 0.8170153966561983, "learning_rate": 1.1961471103327496e-05, "loss": 0.7909, "step": 2049 }, { "epoch": 0.05985226708709235, "grad_norm": 0.7458673947266379, "learning_rate": 1.1967308814944542e-05, "loss": 0.7215, "step": 2050 }, { "epoch": 0.05988146331493971, "grad_norm": 0.7152581710465433, "learning_rate": 1.197314652656159e-05, "loss": 0.6404, "step": 2051 }, { "epoch": 0.05991065954278707, "grad_norm": 0.7299358118633181, "learning_rate": 1.1978984238178634e-05, "loss": 0.683, "step": 2052 }, { "epoch": 0.05993985577063443, "grad_norm": 0.9032494752529737, "learning_rate": 1.1984821949795682e-05, "loss": 0.6968, "step": 2053 }, { "epoch": 0.05996905199848179, "grad_norm": 0.8021312460894691, "learning_rate": 1.1990659661412727e-05, "loss": 0.7491, "step": 2054 }, { "epoch": 0.05999824822632916, "grad_norm": 0.8203437906307469, "learning_rate": 1.1996497373029774e-05, "loss": 0.7998, "step": 2055 }, { "epoch": 0.06002744445417652, "grad_norm": 0.7620015938027702, "learning_rate": 1.2002335084646819e-05, "loss": 0.7309, "step": 2056 }, { "epoch": 0.06005664068202388, "grad_norm": 0.7413626404637844, "learning_rate": 1.2008172796263866e-05, "loss": 0.7132, "step": 2057 }, { "epoch": 0.060085836909871244, "grad_norm": 0.7126675152090624, "learning_rate": 1.201401050788091e-05, "loss": 0.6582, "step": 2058 }, { "epoch": 0.060115033137718606, "grad_norm": 0.7925896713851334, "learning_rate": 1.2019848219497959e-05, "loss": 0.7429, "step": 2059 }, { "epoch": 0.06014422936556597, "grad_norm": 0.7881627949708438, "learning_rate": 1.2025685931115002e-05, "loss": 0.6816, "step": 2060 }, { "epoch": 0.06017342559341333, "grad_norm": 0.7461153109713995, "learning_rate": 1.203152364273205e-05, "loss": 0.6839, "step": 2061 }, { "epoch": 0.060202621821260696, "grad_norm": 0.8015728110261926, "learning_rate": 1.2037361354349096e-05, "loss": 0.7015, "step": 2062 }, { "epoch": 0.06023181804910806, "grad_norm": 1.3321506802506815, "learning_rate": 1.2043199065966142e-05, "loss": 0.7652, "step": 2063 }, { "epoch": 0.06026101427695542, "grad_norm": 0.9531597255455826, "learning_rate": 1.2049036777583187e-05, "loss": 0.7384, "step": 2064 }, { "epoch": 0.06029021050480278, "grad_norm": 0.8097367185497323, "learning_rate": 1.2054874489200234e-05, "loss": 0.7989, "step": 2065 }, { "epoch": 0.06031940673265014, "grad_norm": 0.8847487486067359, "learning_rate": 1.2060712200817282e-05, "loss": 0.6561, "step": 2066 }, { "epoch": 0.0603486029604975, "grad_norm": 0.7576401715535455, "learning_rate": 1.2066549912434327e-05, "loss": 0.6985, "step": 2067 }, { "epoch": 0.06037779918834486, "grad_norm": 0.7365277895237884, "learning_rate": 1.2072387624051374e-05, "loss": 0.6665, "step": 2068 }, { "epoch": 0.060406995416192225, "grad_norm": 0.8505453232935043, "learning_rate": 1.2078225335668419e-05, "loss": 0.6397, "step": 2069 }, { "epoch": 0.06043619164403959, "grad_norm": 0.78173395297125, "learning_rate": 1.2084063047285466e-05, "loss": 0.7613, "step": 2070 }, { "epoch": 0.060465387871886954, "grad_norm": 0.7847274417847655, "learning_rate": 1.208990075890251e-05, "loss": 0.733, "step": 2071 }, { "epoch": 0.060494584099734315, "grad_norm": 0.7798828383343356, "learning_rate": 1.2095738470519557e-05, "loss": 0.7291, "step": 2072 }, { "epoch": 0.060523780327581676, "grad_norm": 0.8483387218676437, "learning_rate": 1.2101576182136602e-05, "loss": 0.7354, "step": 2073 }, { "epoch": 0.06055297655542904, "grad_norm": 0.7660221449826995, "learning_rate": 1.210741389375365e-05, "loss": 0.7318, "step": 2074 }, { "epoch": 0.0605821727832764, "grad_norm": 0.7639059164746057, "learning_rate": 1.2113251605370696e-05, "loss": 0.7203, "step": 2075 }, { "epoch": 0.06061136901112376, "grad_norm": 0.7922403050459202, "learning_rate": 1.2119089316987742e-05, "loss": 0.6604, "step": 2076 }, { "epoch": 0.06064056523897113, "grad_norm": 0.8528461623654126, "learning_rate": 1.2124927028604787e-05, "loss": 0.7403, "step": 2077 }, { "epoch": 0.06066976146681849, "grad_norm": 0.7761389403399714, "learning_rate": 1.2130764740221834e-05, "loss": 0.7006, "step": 2078 }, { "epoch": 0.06069895769466585, "grad_norm": 2.4773758631745655, "learning_rate": 1.2136602451838879e-05, "loss": 0.7164, "step": 2079 }, { "epoch": 0.06072815392251321, "grad_norm": 0.7949094294601882, "learning_rate": 1.2142440163455926e-05, "loss": 0.6944, "step": 2080 }, { "epoch": 0.06075735015036057, "grad_norm": 0.7086223263750185, "learning_rate": 1.214827787507297e-05, "loss": 0.6124, "step": 2081 }, { "epoch": 0.060786546378207934, "grad_norm": 0.7510386011952295, "learning_rate": 1.2154115586690019e-05, "loss": 0.6883, "step": 2082 }, { "epoch": 0.060815742606055295, "grad_norm": 0.7724246087692835, "learning_rate": 1.2159953298307066e-05, "loss": 0.7117, "step": 2083 }, { "epoch": 0.060844938833902656, "grad_norm": 0.8779812793161502, "learning_rate": 1.216579100992411e-05, "loss": 0.7168, "step": 2084 }, { "epoch": 0.060874135061750025, "grad_norm": 0.7593031663996785, "learning_rate": 1.2171628721541157e-05, "loss": 0.7414, "step": 2085 }, { "epoch": 0.060903331289597386, "grad_norm": 0.8031882451974403, "learning_rate": 1.2177466433158202e-05, "loss": 0.8239, "step": 2086 }, { "epoch": 0.06093252751744475, "grad_norm": 0.7629613700839234, "learning_rate": 1.2183304144775249e-05, "loss": 0.6999, "step": 2087 }, { "epoch": 0.06096172374529211, "grad_norm": 0.738419088598312, "learning_rate": 1.2189141856392294e-05, "loss": 0.6494, "step": 2088 }, { "epoch": 0.06099091997313947, "grad_norm": 0.7968292611893623, "learning_rate": 1.2194979568009342e-05, "loss": 0.7715, "step": 2089 }, { "epoch": 0.06102011620098683, "grad_norm": 0.7177390726384127, "learning_rate": 1.2200817279626387e-05, "loss": 0.6429, "step": 2090 }, { "epoch": 0.06104931242883419, "grad_norm": 0.9464665601278109, "learning_rate": 1.2206654991243434e-05, "loss": 0.7507, "step": 2091 }, { "epoch": 0.06107850865668156, "grad_norm": 0.7612069670369633, "learning_rate": 1.2212492702860479e-05, "loss": 0.6718, "step": 2092 }, { "epoch": 0.06110770488452892, "grad_norm": 0.7313103747259232, "learning_rate": 1.2218330414477526e-05, "loss": 0.636, "step": 2093 }, { "epoch": 0.06113690111237628, "grad_norm": 0.9055205038434115, "learning_rate": 1.222416812609457e-05, "loss": 0.7507, "step": 2094 }, { "epoch": 0.061166097340223644, "grad_norm": 0.7575962752496961, "learning_rate": 1.2230005837711617e-05, "loss": 0.728, "step": 2095 }, { "epoch": 0.061195293568071005, "grad_norm": 0.8954294387164748, "learning_rate": 1.2235843549328662e-05, "loss": 0.7575, "step": 2096 }, { "epoch": 0.061224489795918366, "grad_norm": 0.9268111851596337, "learning_rate": 1.224168126094571e-05, "loss": 0.7495, "step": 2097 }, { "epoch": 0.06125368602376573, "grad_norm": 0.7926122534264983, "learning_rate": 1.2247518972562758e-05, "loss": 0.785, "step": 2098 }, { "epoch": 0.06128288225161309, "grad_norm": 0.8576964592951027, "learning_rate": 1.2253356684179803e-05, "loss": 0.7683, "step": 2099 }, { "epoch": 0.061312078479460456, "grad_norm": 0.789690632623687, "learning_rate": 1.225919439579685e-05, "loss": 0.6753, "step": 2100 }, { "epoch": 0.06134127470730782, "grad_norm": 0.7664944035490147, "learning_rate": 1.2265032107413894e-05, "loss": 0.7449, "step": 2101 }, { "epoch": 0.06137047093515518, "grad_norm": 0.8731351664808462, "learning_rate": 1.2270869819030941e-05, "loss": 0.7421, "step": 2102 }, { "epoch": 0.06139966716300254, "grad_norm": 0.7572789587388904, "learning_rate": 1.2276707530647986e-05, "loss": 0.6873, "step": 2103 }, { "epoch": 0.0614288633908499, "grad_norm": 0.778592059660983, "learning_rate": 1.2282545242265034e-05, "loss": 0.7839, "step": 2104 }, { "epoch": 0.06145805961869726, "grad_norm": 0.8593224201004184, "learning_rate": 1.228838295388208e-05, "loss": 0.704, "step": 2105 }, { "epoch": 0.061487255846544624, "grad_norm": 0.7495229118859617, "learning_rate": 1.2294220665499126e-05, "loss": 0.6409, "step": 2106 }, { "epoch": 0.06151645207439199, "grad_norm": 0.7728187999853876, "learning_rate": 1.2300058377116171e-05, "loss": 0.7432, "step": 2107 }, { "epoch": 0.06154564830223935, "grad_norm": 0.8174650723980171, "learning_rate": 1.2305896088733218e-05, "loss": 0.7767, "step": 2108 }, { "epoch": 0.061574844530086714, "grad_norm": 0.7226435517391466, "learning_rate": 1.2311733800350263e-05, "loss": 0.6237, "step": 2109 }, { "epoch": 0.061604040757934075, "grad_norm": 0.8160308884128014, "learning_rate": 1.231757151196731e-05, "loss": 0.8205, "step": 2110 }, { "epoch": 0.06163323698578144, "grad_norm": 0.7559792963039502, "learning_rate": 1.2323409223584354e-05, "loss": 0.6925, "step": 2111 }, { "epoch": 0.0616624332136288, "grad_norm": 1.683329313789862, "learning_rate": 1.2329246935201403e-05, "loss": 0.8486, "step": 2112 }, { "epoch": 0.06169162944147616, "grad_norm": 0.8332513353044513, "learning_rate": 1.2335084646818448e-05, "loss": 0.84, "step": 2113 }, { "epoch": 0.06172082566932352, "grad_norm": 0.775913160767304, "learning_rate": 1.2340922358435494e-05, "loss": 0.7592, "step": 2114 }, { "epoch": 0.06175002189717089, "grad_norm": 0.8562121088256167, "learning_rate": 1.2346760070052541e-05, "loss": 0.6806, "step": 2115 }, { "epoch": 0.06177921812501825, "grad_norm": 0.7770582819271233, "learning_rate": 1.2352597781669586e-05, "loss": 0.6777, "step": 2116 }, { "epoch": 0.06180841435286561, "grad_norm": 0.8030900553749394, "learning_rate": 1.2358435493286634e-05, "loss": 0.6975, "step": 2117 }, { "epoch": 0.06183761058071297, "grad_norm": 0.7359161748575592, "learning_rate": 1.236427320490368e-05, "loss": 0.6055, "step": 2118 }, { "epoch": 0.06186680680856033, "grad_norm": 0.7747518273959824, "learning_rate": 1.2370110916520726e-05, "loss": 0.7604, "step": 2119 }, { "epoch": 0.061896003036407694, "grad_norm": 0.8042419722378382, "learning_rate": 1.2375948628137771e-05, "loss": 0.7859, "step": 2120 }, { "epoch": 0.061925199264255056, "grad_norm": 0.731194694314883, "learning_rate": 1.2381786339754818e-05, "loss": 0.6986, "step": 2121 }, { "epoch": 0.061954395492102424, "grad_norm": 0.8087705595496772, "learning_rate": 1.2387624051371863e-05, "loss": 0.7875, "step": 2122 }, { "epoch": 0.061983591719949785, "grad_norm": 0.795472562109297, "learning_rate": 1.239346176298891e-05, "loss": 0.7425, "step": 2123 }, { "epoch": 0.062012787947797146, "grad_norm": 0.7711223123590769, "learning_rate": 1.2399299474605954e-05, "loss": 0.7014, "step": 2124 }, { "epoch": 0.06204198417564451, "grad_norm": 1.7135080626890977, "learning_rate": 1.2405137186223003e-05, "loss": 0.7033, "step": 2125 }, { "epoch": 0.06207118040349187, "grad_norm": 0.8450166226604687, "learning_rate": 1.2410974897840048e-05, "loss": 0.7847, "step": 2126 }, { "epoch": 0.06210037663133923, "grad_norm": 0.7892426298599204, "learning_rate": 1.2416812609457094e-05, "loss": 0.7641, "step": 2127 }, { "epoch": 0.06212957285918659, "grad_norm": 0.7965736274623096, "learning_rate": 1.242265032107414e-05, "loss": 0.728, "step": 2128 }, { "epoch": 0.06215876908703395, "grad_norm": 0.8664465911455549, "learning_rate": 1.2428488032691186e-05, "loss": 0.6788, "step": 2129 }, { "epoch": 0.06218796531488132, "grad_norm": 0.8163211907366776, "learning_rate": 1.2434325744308233e-05, "loss": 0.7169, "step": 2130 }, { "epoch": 0.06221716154272868, "grad_norm": 0.7239352974352184, "learning_rate": 1.2440163455925278e-05, "loss": 0.6219, "step": 2131 }, { "epoch": 0.06224635777057604, "grad_norm": 0.7824806541687417, "learning_rate": 1.2446001167542326e-05, "loss": 0.6893, "step": 2132 }, { "epoch": 0.062275553998423404, "grad_norm": 0.8866769658011956, "learning_rate": 1.2451838879159371e-05, "loss": 0.6447, "step": 2133 }, { "epoch": 0.062304750226270765, "grad_norm": 0.7247422211172587, "learning_rate": 1.2457676590776418e-05, "loss": 0.6547, "step": 2134 }, { "epoch": 0.062333946454118126, "grad_norm": 0.7511853608232, "learning_rate": 1.2463514302393463e-05, "loss": 0.7375, "step": 2135 }, { "epoch": 0.06236314268196549, "grad_norm": 0.7694952982660209, "learning_rate": 1.246935201401051e-05, "loss": 0.7954, "step": 2136 }, { "epoch": 0.062392338909812856, "grad_norm": 0.7574499850528897, "learning_rate": 1.2475189725627554e-05, "loss": 0.6825, "step": 2137 }, { "epoch": 0.06242153513766022, "grad_norm": 0.7088165374485823, "learning_rate": 1.2481027437244601e-05, "loss": 0.6458, "step": 2138 }, { "epoch": 0.06245073136550758, "grad_norm": 0.8099815036634799, "learning_rate": 1.2486865148861646e-05, "loss": 0.7747, "step": 2139 }, { "epoch": 0.06247992759335494, "grad_norm": 0.8277818059198881, "learning_rate": 1.2492702860478694e-05, "loss": 0.719, "step": 2140 }, { "epoch": 0.0625091238212023, "grad_norm": 0.7045871887056188, "learning_rate": 1.249854057209574e-05, "loss": 0.6138, "step": 2141 }, { "epoch": 0.06253832004904966, "grad_norm": 0.8090249151384247, "learning_rate": 1.2504378283712786e-05, "loss": 0.7621, "step": 2142 }, { "epoch": 0.06256751627689702, "grad_norm": 11.131776408074003, "learning_rate": 1.2510215995329831e-05, "loss": 1.2775, "step": 2143 }, { "epoch": 0.06259671250474438, "grad_norm": 0.831769642387258, "learning_rate": 1.2516053706946878e-05, "loss": 0.7916, "step": 2144 }, { "epoch": 0.06262590873259175, "grad_norm": 0.925924601503146, "learning_rate": 1.2521891418563923e-05, "loss": 0.7323, "step": 2145 }, { "epoch": 0.0626551049604391, "grad_norm": 0.7695294943589052, "learning_rate": 1.252772913018097e-05, "loss": 0.755, "step": 2146 }, { "epoch": 0.06268430118828647, "grad_norm": 0.7558619464590659, "learning_rate": 1.2533566841798018e-05, "loss": 0.7477, "step": 2147 }, { "epoch": 0.06271349741613383, "grad_norm": 0.7238312960553362, "learning_rate": 1.2539404553415063e-05, "loss": 0.6282, "step": 2148 }, { "epoch": 0.0627426936439812, "grad_norm": 0.8921271509124951, "learning_rate": 1.254524226503211e-05, "loss": 0.7778, "step": 2149 }, { "epoch": 0.06277188987182857, "grad_norm": 0.8579548458973415, "learning_rate": 1.2551079976649154e-05, "loss": 0.8065, "step": 2150 }, { "epoch": 0.06280108609967593, "grad_norm": 0.7382656106286155, "learning_rate": 1.2556917688266201e-05, "loss": 0.7348, "step": 2151 }, { "epoch": 0.06283028232752329, "grad_norm": 0.716458025855731, "learning_rate": 1.2562755399883246e-05, "loss": 0.6463, "step": 2152 }, { "epoch": 0.06285947855537065, "grad_norm": 0.7927132175486843, "learning_rate": 1.2568593111500293e-05, "loss": 0.6915, "step": 2153 }, { "epoch": 0.06288867478321801, "grad_norm": 0.8986567971198082, "learning_rate": 1.2574430823117338e-05, "loss": 0.8244, "step": 2154 }, { "epoch": 0.06291787101106537, "grad_norm": 0.7085535049510511, "learning_rate": 1.2580268534734386e-05, "loss": 0.6948, "step": 2155 }, { "epoch": 0.06294706723891273, "grad_norm": 0.7864289428160424, "learning_rate": 1.2586106246351431e-05, "loss": 0.797, "step": 2156 }, { "epoch": 0.0629762634667601, "grad_norm": 0.7456412756318997, "learning_rate": 1.2591943957968478e-05, "loss": 0.7101, "step": 2157 }, { "epoch": 0.06300545969460745, "grad_norm": 0.7283726252822842, "learning_rate": 1.2597781669585523e-05, "loss": 0.6332, "step": 2158 }, { "epoch": 0.06303465592245482, "grad_norm": 0.7422865610923933, "learning_rate": 1.260361938120257e-05, "loss": 0.6748, "step": 2159 }, { "epoch": 0.06306385215030218, "grad_norm": 0.7980566506872785, "learning_rate": 1.2609457092819614e-05, "loss": 0.7754, "step": 2160 }, { "epoch": 0.06309304837814954, "grad_norm": 0.8119653807801718, "learning_rate": 1.2615294804436661e-05, "loss": 0.6913, "step": 2161 }, { "epoch": 0.0631222446059969, "grad_norm": 0.8314234464585647, "learning_rate": 1.262113251605371e-05, "loss": 0.7983, "step": 2162 }, { "epoch": 0.06315144083384426, "grad_norm": 0.7257695978540676, "learning_rate": 1.2626970227670754e-05, "loss": 0.6077, "step": 2163 }, { "epoch": 0.06318063706169164, "grad_norm": 0.6996973216045863, "learning_rate": 1.2632807939287801e-05, "loss": 0.6514, "step": 2164 }, { "epoch": 0.063209833289539, "grad_norm": 0.7570151422216503, "learning_rate": 1.2638645650904846e-05, "loss": 0.7232, "step": 2165 }, { "epoch": 0.06323902951738636, "grad_norm": 0.9773884742848568, "learning_rate": 1.2644483362521893e-05, "loss": 0.8864, "step": 2166 }, { "epoch": 0.06326822574523372, "grad_norm": 0.8532611648416699, "learning_rate": 1.2650321074138938e-05, "loss": 0.7923, "step": 2167 }, { "epoch": 0.06329742197308108, "grad_norm": 1.008724007925508, "learning_rate": 1.2656158785755986e-05, "loss": 0.6763, "step": 2168 }, { "epoch": 0.06332661820092844, "grad_norm": 0.8707224840125052, "learning_rate": 1.266199649737303e-05, "loss": 0.795, "step": 2169 }, { "epoch": 0.0633558144287758, "grad_norm": 0.6841632159964459, "learning_rate": 1.2667834208990078e-05, "loss": 0.6304, "step": 2170 }, { "epoch": 0.06338501065662316, "grad_norm": 0.7529088140725502, "learning_rate": 1.2673671920607123e-05, "loss": 0.6744, "step": 2171 }, { "epoch": 0.06341420688447053, "grad_norm": 0.877325182723081, "learning_rate": 1.267950963222417e-05, "loss": 0.8317, "step": 2172 }, { "epoch": 0.06344340311231789, "grad_norm": 0.8060428469525573, "learning_rate": 1.2685347343841214e-05, "loss": 0.722, "step": 2173 }, { "epoch": 0.06347259934016525, "grad_norm": 0.8017672604860577, "learning_rate": 1.2691185055458261e-05, "loss": 0.729, "step": 2174 }, { "epoch": 0.06350179556801261, "grad_norm": 0.838763822335704, "learning_rate": 1.2697022767075306e-05, "loss": 0.7553, "step": 2175 }, { "epoch": 0.06353099179585997, "grad_norm": 0.8584700960351812, "learning_rate": 1.2702860478692355e-05, "loss": 0.7417, "step": 2176 }, { "epoch": 0.06356018802370733, "grad_norm": 0.7830830718703939, "learning_rate": 1.27086981903094e-05, "loss": 0.7224, "step": 2177 }, { "epoch": 0.06358938425155469, "grad_norm": 0.9679603338238522, "learning_rate": 1.2714535901926446e-05, "loss": 0.6983, "step": 2178 }, { "epoch": 0.06361858047940207, "grad_norm": 0.8040667675359022, "learning_rate": 1.2720373613543493e-05, "loss": 0.7528, "step": 2179 }, { "epoch": 0.06364777670724943, "grad_norm": 0.8323990392291656, "learning_rate": 1.2726211325160538e-05, "loss": 0.8134, "step": 2180 }, { "epoch": 0.06367697293509679, "grad_norm": 0.7190966207391942, "learning_rate": 1.2732049036777585e-05, "loss": 0.6026, "step": 2181 }, { "epoch": 0.06370616916294415, "grad_norm": 0.8415419038185711, "learning_rate": 1.273788674839463e-05, "loss": 0.7743, "step": 2182 }, { "epoch": 0.06373536539079151, "grad_norm": 0.8670091244432105, "learning_rate": 1.2743724460011678e-05, "loss": 0.785, "step": 2183 }, { "epoch": 0.06376456161863887, "grad_norm": 0.7102550211554212, "learning_rate": 1.2749562171628723e-05, "loss": 0.6712, "step": 2184 }, { "epoch": 0.06379375784648623, "grad_norm": 0.788610171883178, "learning_rate": 1.275539988324577e-05, "loss": 0.7661, "step": 2185 }, { "epoch": 0.0638229540743336, "grad_norm": 1.125187850783591, "learning_rate": 1.2761237594862815e-05, "loss": 0.7669, "step": 2186 }, { "epoch": 0.06385215030218096, "grad_norm": 0.8151551483852517, "learning_rate": 1.2767075306479861e-05, "loss": 0.8584, "step": 2187 }, { "epoch": 0.06388134653002832, "grad_norm": 0.8646580974268476, "learning_rate": 1.2772913018096906e-05, "loss": 0.7977, "step": 2188 }, { "epoch": 0.06391054275787568, "grad_norm": 0.956428643912873, "learning_rate": 1.2778750729713953e-05, "loss": 0.7837, "step": 2189 }, { "epoch": 0.06393973898572304, "grad_norm": 0.736141318436259, "learning_rate": 1.2784588441330998e-05, "loss": 0.6809, "step": 2190 }, { "epoch": 0.0639689352135704, "grad_norm": 0.8654888931180917, "learning_rate": 1.2790426152948046e-05, "loss": 0.8302, "step": 2191 }, { "epoch": 0.06399813144141776, "grad_norm": 0.7759153905379338, "learning_rate": 1.2796263864565091e-05, "loss": 0.7077, "step": 2192 }, { "epoch": 0.06402732766926512, "grad_norm": 0.7784899462949842, "learning_rate": 1.2802101576182138e-05, "loss": 0.7649, "step": 2193 }, { "epoch": 0.0640565238971125, "grad_norm": 0.7764424328027368, "learning_rate": 1.2807939287799185e-05, "loss": 0.7063, "step": 2194 }, { "epoch": 0.06408572012495986, "grad_norm": 0.8582368329093177, "learning_rate": 1.281377699941623e-05, "loss": 0.7648, "step": 2195 }, { "epoch": 0.06411491635280722, "grad_norm": 1.1164376399002849, "learning_rate": 1.2819614711033276e-05, "loss": 0.7964, "step": 2196 }, { "epoch": 0.06414411258065458, "grad_norm": 0.7564094863173745, "learning_rate": 1.2825452422650321e-05, "loss": 0.7023, "step": 2197 }, { "epoch": 0.06417330880850194, "grad_norm": 0.7346871037627136, "learning_rate": 1.283129013426737e-05, "loss": 0.6034, "step": 2198 }, { "epoch": 0.0642025050363493, "grad_norm": 0.8131249653271929, "learning_rate": 1.2837127845884415e-05, "loss": 0.799, "step": 2199 }, { "epoch": 0.06423170126419667, "grad_norm": 0.7408591898610025, "learning_rate": 1.2842965557501461e-05, "loss": 0.6715, "step": 2200 }, { "epoch": 0.06426089749204403, "grad_norm": 0.8028265044035735, "learning_rate": 1.2848803269118506e-05, "loss": 0.7683, "step": 2201 }, { "epoch": 0.06429009371989139, "grad_norm": 0.7593771304422328, "learning_rate": 1.2854640980735553e-05, "loss": 0.6882, "step": 2202 }, { "epoch": 0.06431928994773875, "grad_norm": 0.8178814043603914, "learning_rate": 1.2860478692352598e-05, "loss": 0.7412, "step": 2203 }, { "epoch": 0.06434848617558611, "grad_norm": 0.8090804349429433, "learning_rate": 1.2866316403969645e-05, "loss": 0.7443, "step": 2204 }, { "epoch": 0.06437768240343347, "grad_norm": 0.7469587432212681, "learning_rate": 1.287215411558669e-05, "loss": 0.6696, "step": 2205 }, { "epoch": 0.06440687863128083, "grad_norm": 0.8425093084098872, "learning_rate": 1.2877991827203738e-05, "loss": 0.7038, "step": 2206 }, { "epoch": 0.0644360748591282, "grad_norm": 0.7293607582754852, "learning_rate": 1.2883829538820783e-05, "loss": 0.6316, "step": 2207 }, { "epoch": 0.06446527108697556, "grad_norm": 0.78680444986121, "learning_rate": 1.288966725043783e-05, "loss": 0.7736, "step": 2208 }, { "epoch": 0.06449446731482293, "grad_norm": 0.7504426998152657, "learning_rate": 1.2895504962054875e-05, "loss": 0.7126, "step": 2209 }, { "epoch": 0.06452366354267029, "grad_norm": 0.9524663454156121, "learning_rate": 1.2901342673671921e-05, "loss": 0.7132, "step": 2210 }, { "epoch": 0.06455285977051765, "grad_norm": 0.8220344914941602, "learning_rate": 1.2907180385288968e-05, "loss": 0.795, "step": 2211 }, { "epoch": 0.06458205599836501, "grad_norm": 0.8628957875907233, "learning_rate": 1.2913018096906013e-05, "loss": 0.6704, "step": 2212 }, { "epoch": 0.06461125222621238, "grad_norm": 0.7956121122419629, "learning_rate": 1.2918855808523061e-05, "loss": 0.6821, "step": 2213 }, { "epoch": 0.06464044845405974, "grad_norm": 0.8803610782588829, "learning_rate": 1.2924693520140106e-05, "loss": 0.7599, "step": 2214 }, { "epoch": 0.0646696446819071, "grad_norm": 0.9137602791675632, "learning_rate": 1.2930531231757153e-05, "loss": 0.7643, "step": 2215 }, { "epoch": 0.06469884090975446, "grad_norm": 0.7131545499804988, "learning_rate": 1.2936368943374198e-05, "loss": 0.6218, "step": 2216 }, { "epoch": 0.06472803713760182, "grad_norm": 0.796665312173439, "learning_rate": 1.2942206654991245e-05, "loss": 0.68, "step": 2217 }, { "epoch": 0.06475723336544918, "grad_norm": 0.7892291568374523, "learning_rate": 1.294804436660829e-05, "loss": 0.7103, "step": 2218 }, { "epoch": 0.06478642959329654, "grad_norm": 0.6582464930465391, "learning_rate": 1.2953882078225336e-05, "loss": 0.5842, "step": 2219 }, { "epoch": 0.0648156258211439, "grad_norm": 2.720377497405468, "learning_rate": 1.2959719789842381e-05, "loss": 0.716, "step": 2220 }, { "epoch": 0.06484482204899127, "grad_norm": 0.7693657917497397, "learning_rate": 1.296555750145943e-05, "loss": 0.6993, "step": 2221 }, { "epoch": 0.06487401827683863, "grad_norm": 0.7700484203865811, "learning_rate": 1.2971395213076475e-05, "loss": 0.7279, "step": 2222 }, { "epoch": 0.06490321450468599, "grad_norm": 0.7610456640240018, "learning_rate": 1.2977232924693521e-05, "loss": 0.7176, "step": 2223 }, { "epoch": 0.06493241073253336, "grad_norm": 0.8414393201686311, "learning_rate": 1.2983070636310566e-05, "loss": 0.7323, "step": 2224 }, { "epoch": 0.06496160696038072, "grad_norm": 0.8477643853232935, "learning_rate": 1.2988908347927613e-05, "loss": 0.6935, "step": 2225 }, { "epoch": 0.06499080318822809, "grad_norm": 0.8638425456363497, "learning_rate": 1.2994746059544661e-05, "loss": 0.6979, "step": 2226 }, { "epoch": 0.06501999941607545, "grad_norm": 0.812783423692965, "learning_rate": 1.3000583771161705e-05, "loss": 0.7099, "step": 2227 }, { "epoch": 0.06504919564392281, "grad_norm": 1.1709687924574872, "learning_rate": 1.3006421482778753e-05, "loss": 0.7848, "step": 2228 }, { "epoch": 0.06507839187177017, "grad_norm": 0.7255781413590929, "learning_rate": 1.3012259194395798e-05, "loss": 0.6565, "step": 2229 }, { "epoch": 0.06510758809961753, "grad_norm": 0.8107148954494804, "learning_rate": 1.3018096906012845e-05, "loss": 0.7825, "step": 2230 }, { "epoch": 0.06513678432746489, "grad_norm": 0.7273597667694822, "learning_rate": 1.302393461762989e-05, "loss": 0.6656, "step": 2231 }, { "epoch": 0.06516598055531225, "grad_norm": 0.8856100252929674, "learning_rate": 1.3029772329246936e-05, "loss": 0.792, "step": 2232 }, { "epoch": 0.06519517678315961, "grad_norm": 0.7429163590214322, "learning_rate": 1.3035610040863981e-05, "loss": 0.6344, "step": 2233 }, { "epoch": 0.06522437301100698, "grad_norm": 0.7299508877062127, "learning_rate": 1.304144775248103e-05, "loss": 0.6416, "step": 2234 }, { "epoch": 0.06525356923885434, "grad_norm": 0.7281506149236039, "learning_rate": 1.3047285464098075e-05, "loss": 0.6748, "step": 2235 }, { "epoch": 0.0652827654667017, "grad_norm": 0.7518901322055678, "learning_rate": 1.3053123175715121e-05, "loss": 0.6701, "step": 2236 }, { "epoch": 0.06531196169454906, "grad_norm": 0.6664761758746557, "learning_rate": 1.3058960887332166e-05, "loss": 0.613, "step": 2237 }, { "epoch": 0.06534115792239642, "grad_norm": 0.824489266281077, "learning_rate": 1.3064798598949213e-05, "loss": 0.817, "step": 2238 }, { "epoch": 0.0653703541502438, "grad_norm": 0.8517131832572696, "learning_rate": 1.3070636310566258e-05, "loss": 0.8386, "step": 2239 }, { "epoch": 0.06539955037809116, "grad_norm": 0.8573751069870194, "learning_rate": 1.3076474022183305e-05, "loss": 0.7017, "step": 2240 }, { "epoch": 0.06542874660593852, "grad_norm": 0.8078146920092594, "learning_rate": 1.308231173380035e-05, "loss": 0.7404, "step": 2241 }, { "epoch": 0.06545794283378588, "grad_norm": 0.7498263022080387, "learning_rate": 1.3088149445417398e-05, "loss": 0.6838, "step": 2242 }, { "epoch": 0.06548713906163324, "grad_norm": 0.700914708988966, "learning_rate": 1.3093987157034445e-05, "loss": 0.6269, "step": 2243 }, { "epoch": 0.0655163352894806, "grad_norm": 0.858764932609256, "learning_rate": 1.309982486865149e-05, "loss": 0.8213, "step": 2244 }, { "epoch": 0.06554553151732796, "grad_norm": 0.6961551660534366, "learning_rate": 1.3105662580268536e-05, "loss": 0.5922, "step": 2245 }, { "epoch": 0.06557472774517532, "grad_norm": 0.7820844921249276, "learning_rate": 1.3111500291885581e-05, "loss": 0.6997, "step": 2246 }, { "epoch": 0.06560392397302268, "grad_norm": 0.8977673494517371, "learning_rate": 1.3117338003502628e-05, "loss": 0.6914, "step": 2247 }, { "epoch": 0.06563312020087005, "grad_norm": 1.010376794919809, "learning_rate": 1.3123175715119673e-05, "loss": 0.7014, "step": 2248 }, { "epoch": 0.06566231642871741, "grad_norm": 0.7943260266623309, "learning_rate": 1.3129013426736722e-05, "loss": 0.7418, "step": 2249 }, { "epoch": 0.06569151265656477, "grad_norm": 0.7746194027475869, "learning_rate": 1.3134851138353766e-05, "loss": 0.6728, "step": 2250 }, { "epoch": 0.06572070888441213, "grad_norm": 0.7298408267133457, "learning_rate": 1.3140688849970813e-05, "loss": 0.6697, "step": 2251 }, { "epoch": 0.06574990511225949, "grad_norm": 0.8612084474282462, "learning_rate": 1.3146526561587858e-05, "loss": 0.7104, "step": 2252 }, { "epoch": 0.06577910134010685, "grad_norm": 0.7807857898447362, "learning_rate": 1.3152364273204905e-05, "loss": 0.7003, "step": 2253 }, { "epoch": 0.06580829756795423, "grad_norm": 0.851611892230278, "learning_rate": 1.315820198482195e-05, "loss": 0.753, "step": 2254 }, { "epoch": 0.06583749379580159, "grad_norm": 0.8777529397841786, "learning_rate": 1.3164039696438997e-05, "loss": 0.6498, "step": 2255 }, { "epoch": 0.06586669002364895, "grad_norm": 0.7900863472338625, "learning_rate": 1.3169877408056041e-05, "loss": 0.6782, "step": 2256 }, { "epoch": 0.06589588625149631, "grad_norm": 0.7966358534270424, "learning_rate": 1.317571511967309e-05, "loss": 0.7224, "step": 2257 }, { "epoch": 0.06592508247934367, "grad_norm": 0.7540709238408968, "learning_rate": 1.3181552831290137e-05, "loss": 0.6745, "step": 2258 }, { "epoch": 0.06595427870719103, "grad_norm": 0.7796607519067705, "learning_rate": 1.3187390542907182e-05, "loss": 0.6931, "step": 2259 }, { "epoch": 0.0659834749350384, "grad_norm": 0.7751826013716736, "learning_rate": 1.3193228254524228e-05, "loss": 0.7505, "step": 2260 }, { "epoch": 0.06601267116288576, "grad_norm": 0.7276002271409028, "learning_rate": 1.3199065966141273e-05, "loss": 0.6897, "step": 2261 }, { "epoch": 0.06604186739073312, "grad_norm": 0.7000601147158508, "learning_rate": 1.320490367775832e-05, "loss": 0.6601, "step": 2262 }, { "epoch": 0.06607106361858048, "grad_norm": 0.7154647370932188, "learning_rate": 1.3210741389375365e-05, "loss": 0.6036, "step": 2263 }, { "epoch": 0.06610025984642784, "grad_norm": 0.8130172991823001, "learning_rate": 1.3216579100992413e-05, "loss": 0.773, "step": 2264 }, { "epoch": 0.0661294560742752, "grad_norm": 0.706566412297211, "learning_rate": 1.3222416812609458e-05, "loss": 0.6617, "step": 2265 }, { "epoch": 0.06615865230212256, "grad_norm": 0.6846735024817991, "learning_rate": 1.3228254524226505e-05, "loss": 0.5859, "step": 2266 }, { "epoch": 0.06618784852996992, "grad_norm": 0.7411225966659141, "learning_rate": 1.323409223584355e-05, "loss": 0.6609, "step": 2267 }, { "epoch": 0.06621704475781728, "grad_norm": 0.8465800449474086, "learning_rate": 1.3239929947460597e-05, "loss": 0.663, "step": 2268 }, { "epoch": 0.06624624098566466, "grad_norm": 0.7228572889753152, "learning_rate": 1.3245767659077642e-05, "loss": 0.7076, "step": 2269 }, { "epoch": 0.06627543721351202, "grad_norm": 0.7631264587394352, "learning_rate": 1.3251605370694688e-05, "loss": 0.7606, "step": 2270 }, { "epoch": 0.06630463344135938, "grad_norm": 0.7829778044435631, "learning_rate": 1.3257443082311733e-05, "loss": 0.7177, "step": 2271 }, { "epoch": 0.06633382966920674, "grad_norm": 1.1401073840085476, "learning_rate": 1.3263280793928782e-05, "loss": 0.7742, "step": 2272 }, { "epoch": 0.0663630258970541, "grad_norm": 0.8494432579540887, "learning_rate": 1.3269118505545827e-05, "loss": 0.7257, "step": 2273 }, { "epoch": 0.06639222212490146, "grad_norm": 0.8506742816476921, "learning_rate": 1.3274956217162873e-05, "loss": 0.8476, "step": 2274 }, { "epoch": 0.06642141835274883, "grad_norm": 0.8050259859175997, "learning_rate": 1.328079392877992e-05, "loss": 0.6625, "step": 2275 }, { "epoch": 0.06645061458059619, "grad_norm": 0.7974515228865469, "learning_rate": 1.3286631640396965e-05, "loss": 0.7314, "step": 2276 }, { "epoch": 0.06647981080844355, "grad_norm": 1.0143551981993075, "learning_rate": 1.3292469352014012e-05, "loss": 0.8368, "step": 2277 }, { "epoch": 0.06650900703629091, "grad_norm": 0.706621985264878, "learning_rate": 1.3298307063631057e-05, "loss": 0.6857, "step": 2278 }, { "epoch": 0.06653820326413827, "grad_norm": 0.8544356061907251, "learning_rate": 1.3304144775248105e-05, "loss": 0.7656, "step": 2279 }, { "epoch": 0.06656739949198563, "grad_norm": 0.7800334674656484, "learning_rate": 1.330998248686515e-05, "loss": 0.7302, "step": 2280 }, { "epoch": 0.066596595719833, "grad_norm": 0.7922738463324382, "learning_rate": 1.3315820198482197e-05, "loss": 0.7078, "step": 2281 }, { "epoch": 0.06662579194768035, "grad_norm": 0.7018648947172426, "learning_rate": 1.3321657910099242e-05, "loss": 0.6596, "step": 2282 }, { "epoch": 0.06665498817552772, "grad_norm": 0.7882705044146335, "learning_rate": 1.3327495621716288e-05, "loss": 0.7472, "step": 2283 }, { "epoch": 0.06668418440337509, "grad_norm": 0.906248441591661, "learning_rate": 1.3333333333333333e-05, "loss": 0.7387, "step": 2284 }, { "epoch": 0.06671338063122245, "grad_norm": 0.8199312804878622, "learning_rate": 1.3339171044950382e-05, "loss": 0.6898, "step": 2285 }, { "epoch": 0.06674257685906981, "grad_norm": 0.8153871916230566, "learning_rate": 1.3345008756567425e-05, "loss": 0.6431, "step": 2286 }, { "epoch": 0.06677177308691717, "grad_norm": 0.7382951608557805, "learning_rate": 1.3350846468184473e-05, "loss": 0.661, "step": 2287 }, { "epoch": 0.06680096931476454, "grad_norm": 0.7008383975697292, "learning_rate": 1.3356684179801518e-05, "loss": 0.6503, "step": 2288 }, { "epoch": 0.0668301655426119, "grad_norm": 0.7528859152042049, "learning_rate": 1.3362521891418565e-05, "loss": 0.7021, "step": 2289 }, { "epoch": 0.06685936177045926, "grad_norm": 0.7688215715342059, "learning_rate": 1.3368359603035612e-05, "loss": 0.6952, "step": 2290 }, { "epoch": 0.06688855799830662, "grad_norm": 0.8674939413476807, "learning_rate": 1.3374197314652657e-05, "loss": 0.7927, "step": 2291 }, { "epoch": 0.06691775422615398, "grad_norm": 0.7643253022339079, "learning_rate": 1.3380035026269705e-05, "loss": 0.6031, "step": 2292 }, { "epoch": 0.06694695045400134, "grad_norm": 0.8244021986913562, "learning_rate": 1.338587273788675e-05, "loss": 0.7552, "step": 2293 }, { "epoch": 0.0669761466818487, "grad_norm": 0.7825659091322456, "learning_rate": 1.3391710449503797e-05, "loss": 0.6745, "step": 2294 }, { "epoch": 0.06700534290969606, "grad_norm": 0.9065697700537882, "learning_rate": 1.3397548161120842e-05, "loss": 0.7401, "step": 2295 }, { "epoch": 0.06703453913754343, "grad_norm": 0.7227310697740051, "learning_rate": 1.3403385872737888e-05, "loss": 0.6477, "step": 2296 }, { "epoch": 0.06706373536539079, "grad_norm": 0.7706872570650306, "learning_rate": 1.3409223584354933e-05, "loss": 0.6929, "step": 2297 }, { "epoch": 0.06709293159323815, "grad_norm": 0.8267422460384134, "learning_rate": 1.341506129597198e-05, "loss": 0.7779, "step": 2298 }, { "epoch": 0.06712212782108552, "grad_norm": 0.7328971020688666, "learning_rate": 1.3420899007589025e-05, "loss": 0.6797, "step": 2299 }, { "epoch": 0.06715132404893288, "grad_norm": 0.8008197487638884, "learning_rate": 1.3426736719206073e-05, "loss": 0.7293, "step": 2300 }, { "epoch": 0.06718052027678025, "grad_norm": 1.000825081437246, "learning_rate": 1.3432574430823118e-05, "loss": 0.8187, "step": 2301 }, { "epoch": 0.0672097165046276, "grad_norm": 0.8277236036107083, "learning_rate": 1.3438412142440165e-05, "loss": 0.7024, "step": 2302 }, { "epoch": 0.06723891273247497, "grad_norm": 0.7900842069962228, "learning_rate": 1.344424985405721e-05, "loss": 0.6942, "step": 2303 }, { "epoch": 0.06726810896032233, "grad_norm": 0.7661542066961564, "learning_rate": 1.3450087565674257e-05, "loss": 0.7235, "step": 2304 }, { "epoch": 0.06729730518816969, "grad_norm": 0.7184923698581558, "learning_rate": 1.3455925277291302e-05, "loss": 0.6448, "step": 2305 }, { "epoch": 0.06732650141601705, "grad_norm": 0.823175964607059, "learning_rate": 1.3461762988908348e-05, "loss": 0.7835, "step": 2306 }, { "epoch": 0.06735569764386441, "grad_norm": 0.7590221335472775, "learning_rate": 1.3467600700525397e-05, "loss": 0.6993, "step": 2307 }, { "epoch": 0.06738489387171177, "grad_norm": 0.816048447256651, "learning_rate": 1.3473438412142442e-05, "loss": 0.7259, "step": 2308 }, { "epoch": 0.06741409009955913, "grad_norm": 0.8154601359827576, "learning_rate": 1.3479276123759488e-05, "loss": 0.772, "step": 2309 }, { "epoch": 0.0674432863274065, "grad_norm": 0.9446157971538393, "learning_rate": 1.3485113835376533e-05, "loss": 0.6939, "step": 2310 }, { "epoch": 0.06747248255525386, "grad_norm": 0.7170601270953505, "learning_rate": 1.349095154699358e-05, "loss": 0.6417, "step": 2311 }, { "epoch": 0.06750167878310122, "grad_norm": 0.7681451306605029, "learning_rate": 1.3496789258610625e-05, "loss": 0.7548, "step": 2312 }, { "epoch": 0.06753087501094858, "grad_norm": 0.8015191416367061, "learning_rate": 1.3502626970227672e-05, "loss": 0.74, "step": 2313 }, { "epoch": 0.06756007123879594, "grad_norm": 1.0707702535130428, "learning_rate": 1.3508464681844717e-05, "loss": 0.8659, "step": 2314 }, { "epoch": 0.06758926746664332, "grad_norm": 0.7124848311091002, "learning_rate": 1.3514302393461765e-05, "loss": 0.6127, "step": 2315 }, { "epoch": 0.06761846369449068, "grad_norm": 0.8781362559678534, "learning_rate": 1.352014010507881e-05, "loss": 0.7106, "step": 2316 }, { "epoch": 0.06764765992233804, "grad_norm": 0.8076429359401607, "learning_rate": 1.3525977816695857e-05, "loss": 0.7, "step": 2317 }, { "epoch": 0.0676768561501854, "grad_norm": 2.706656123139569, "learning_rate": 1.3531815528312902e-05, "loss": 0.8168, "step": 2318 }, { "epoch": 0.06770605237803276, "grad_norm": 0.8505428049836038, "learning_rate": 1.3537653239929948e-05, "loss": 0.8615, "step": 2319 }, { "epoch": 0.06773524860588012, "grad_norm": 0.7841337375050417, "learning_rate": 1.3543490951546993e-05, "loss": 0.7136, "step": 2320 }, { "epoch": 0.06776444483372748, "grad_norm": 0.8055479633574495, "learning_rate": 1.354932866316404e-05, "loss": 0.6167, "step": 2321 }, { "epoch": 0.06779364106157484, "grad_norm": 0.8499828788935495, "learning_rate": 1.3555166374781089e-05, "loss": 0.7849, "step": 2322 }, { "epoch": 0.0678228372894222, "grad_norm": 0.7841408618740133, "learning_rate": 1.3561004086398133e-05, "loss": 0.7479, "step": 2323 }, { "epoch": 0.06785203351726957, "grad_norm": 0.7260737906726339, "learning_rate": 1.356684179801518e-05, "loss": 0.6971, "step": 2324 }, { "epoch": 0.06788122974511693, "grad_norm": 0.8367646742640932, "learning_rate": 1.3572679509632225e-05, "loss": 0.8196, "step": 2325 }, { "epoch": 0.06791042597296429, "grad_norm": 0.7625631444111972, "learning_rate": 1.3578517221249272e-05, "loss": 0.6752, "step": 2326 }, { "epoch": 0.06793962220081165, "grad_norm": 0.8133901975873118, "learning_rate": 1.3584354932866317e-05, "loss": 0.7255, "step": 2327 }, { "epoch": 0.06796881842865901, "grad_norm": 0.7605800592298958, "learning_rate": 1.3590192644483364e-05, "loss": 0.6544, "step": 2328 }, { "epoch": 0.06799801465650637, "grad_norm": 0.7764113758563543, "learning_rate": 1.3596030356100408e-05, "loss": 0.6783, "step": 2329 }, { "epoch": 0.06802721088435375, "grad_norm": 0.7231275914392051, "learning_rate": 1.3601868067717457e-05, "loss": 0.6324, "step": 2330 }, { "epoch": 0.06805640711220111, "grad_norm": 0.7750957231118224, "learning_rate": 1.3607705779334502e-05, "loss": 0.702, "step": 2331 }, { "epoch": 0.06808560334004847, "grad_norm": 0.7576123032297656, "learning_rate": 1.3613543490951549e-05, "loss": 0.6426, "step": 2332 }, { "epoch": 0.06811479956789583, "grad_norm": 0.8113280067472584, "learning_rate": 1.3619381202568594e-05, "loss": 0.7752, "step": 2333 }, { "epoch": 0.06814399579574319, "grad_norm": 0.6618105156839369, "learning_rate": 1.362521891418564e-05, "loss": 0.5343, "step": 2334 }, { "epoch": 0.06817319202359055, "grad_norm": 0.7141420643575739, "learning_rate": 1.3631056625802685e-05, "loss": 0.6231, "step": 2335 }, { "epoch": 0.06820238825143791, "grad_norm": 0.729746823827223, "learning_rate": 1.3636894337419732e-05, "loss": 0.6726, "step": 2336 }, { "epoch": 0.06823158447928528, "grad_norm": 0.7489343409272323, "learning_rate": 1.3642732049036777e-05, "loss": 0.6615, "step": 2337 }, { "epoch": 0.06826078070713264, "grad_norm": 0.7482654008047281, "learning_rate": 1.3648569760653825e-05, "loss": 0.7531, "step": 2338 }, { "epoch": 0.06828997693498, "grad_norm": 0.8103921278815137, "learning_rate": 1.3654407472270872e-05, "loss": 0.6401, "step": 2339 }, { "epoch": 0.06831917316282736, "grad_norm": 0.8036656151979163, "learning_rate": 1.3660245183887917e-05, "loss": 0.773, "step": 2340 }, { "epoch": 0.06834836939067472, "grad_norm": 0.7957636081549123, "learning_rate": 1.3666082895504964e-05, "loss": 0.7353, "step": 2341 }, { "epoch": 0.06837756561852208, "grad_norm": 0.7237166566232871, "learning_rate": 1.3671920607122009e-05, "loss": 0.6732, "step": 2342 }, { "epoch": 0.06840676184636944, "grad_norm": 0.7822383420764757, "learning_rate": 1.3677758318739057e-05, "loss": 0.6756, "step": 2343 }, { "epoch": 0.0684359580742168, "grad_norm": 0.7537619917571118, "learning_rate": 1.3683596030356102e-05, "loss": 0.7239, "step": 2344 }, { "epoch": 0.06846515430206418, "grad_norm": 0.8266088940488722, "learning_rate": 1.3689433741973149e-05, "loss": 0.7562, "step": 2345 }, { "epoch": 0.06849435052991154, "grad_norm": 0.7439278268901587, "learning_rate": 1.3695271453590194e-05, "loss": 0.669, "step": 2346 }, { "epoch": 0.0685235467577589, "grad_norm": 0.7272712425154547, "learning_rate": 1.370110916520724e-05, "loss": 0.652, "step": 2347 }, { "epoch": 0.06855274298560626, "grad_norm": 0.7266918513997278, "learning_rate": 1.3706946876824285e-05, "loss": 0.6453, "step": 2348 }, { "epoch": 0.06858193921345362, "grad_norm": 0.854514771924992, "learning_rate": 1.3712784588441332e-05, "loss": 0.775, "step": 2349 }, { "epoch": 0.06861113544130099, "grad_norm": 0.7530756305552171, "learning_rate": 1.3718622300058377e-05, "loss": 0.6766, "step": 2350 }, { "epoch": 0.06864033166914835, "grad_norm": 0.7443798872820266, "learning_rate": 1.3724460011675425e-05, "loss": 0.5906, "step": 2351 }, { "epoch": 0.06866952789699571, "grad_norm": 0.7775459280417635, "learning_rate": 1.373029772329247e-05, "loss": 0.7241, "step": 2352 }, { "epoch": 0.06869872412484307, "grad_norm": 0.9792886708383457, "learning_rate": 1.3736135434909517e-05, "loss": 0.7931, "step": 2353 }, { "epoch": 0.06872792035269043, "grad_norm": 0.7166592838444544, "learning_rate": 1.3741973146526564e-05, "loss": 0.6242, "step": 2354 }, { "epoch": 0.06875711658053779, "grad_norm": 0.7275652246076023, "learning_rate": 1.3747810858143609e-05, "loss": 0.6497, "step": 2355 }, { "epoch": 0.06878631280838515, "grad_norm": 0.8056734536147955, "learning_rate": 1.3753648569760655e-05, "loss": 0.7537, "step": 2356 }, { "epoch": 0.06881550903623251, "grad_norm": 0.7963075082922166, "learning_rate": 1.37594862813777e-05, "loss": 0.8025, "step": 2357 }, { "epoch": 0.06884470526407988, "grad_norm": 0.7717468375398363, "learning_rate": 1.3765323992994749e-05, "loss": 0.743, "step": 2358 }, { "epoch": 0.06887390149192724, "grad_norm": 0.7157388706469736, "learning_rate": 1.3771161704611794e-05, "loss": 0.7115, "step": 2359 }, { "epoch": 0.06890309771977461, "grad_norm": 0.7732114425585764, "learning_rate": 1.377699941622884e-05, "loss": 0.7217, "step": 2360 }, { "epoch": 0.06893229394762197, "grad_norm": 0.8476620541675977, "learning_rate": 1.3782837127845885e-05, "loss": 0.668, "step": 2361 }, { "epoch": 0.06896149017546933, "grad_norm": 0.7419038192470986, "learning_rate": 1.3788674839462932e-05, "loss": 0.6069, "step": 2362 }, { "epoch": 0.0689906864033167, "grad_norm": 0.7352827623135829, "learning_rate": 1.3794512551079977e-05, "loss": 0.6465, "step": 2363 }, { "epoch": 0.06901988263116406, "grad_norm": 0.7839632626619251, "learning_rate": 1.3800350262697024e-05, "loss": 0.7649, "step": 2364 }, { "epoch": 0.06904907885901142, "grad_norm": 1.0274181481260667, "learning_rate": 1.3806187974314069e-05, "loss": 0.7971, "step": 2365 }, { "epoch": 0.06907827508685878, "grad_norm": 0.8285541460839477, "learning_rate": 1.3812025685931117e-05, "loss": 0.77, "step": 2366 }, { "epoch": 0.06910747131470614, "grad_norm": 0.7399596542279089, "learning_rate": 1.3817863397548162e-05, "loss": 0.6565, "step": 2367 }, { "epoch": 0.0691366675425535, "grad_norm": 0.7453961360571045, "learning_rate": 1.3823701109165209e-05, "loss": 0.6471, "step": 2368 }, { "epoch": 0.06916586377040086, "grad_norm": 0.7453476773004595, "learning_rate": 1.3829538820782254e-05, "loss": 0.6784, "step": 2369 }, { "epoch": 0.06919505999824822, "grad_norm": 0.8554992803365888, "learning_rate": 1.38353765323993e-05, "loss": 0.7352, "step": 2370 }, { "epoch": 0.06922425622609558, "grad_norm": 0.7545451065875651, "learning_rate": 1.3841214244016347e-05, "loss": 0.7385, "step": 2371 }, { "epoch": 0.06925345245394295, "grad_norm": 0.8130253657918741, "learning_rate": 1.3847051955633392e-05, "loss": 0.6657, "step": 2372 }, { "epoch": 0.06928264868179031, "grad_norm": 0.7309961441456708, "learning_rate": 1.385288966725044e-05, "loss": 0.6605, "step": 2373 }, { "epoch": 0.06931184490963767, "grad_norm": 0.7881927926293798, "learning_rate": 1.3858727378867485e-05, "loss": 0.6941, "step": 2374 }, { "epoch": 0.06934104113748504, "grad_norm": 0.7892057592213255, "learning_rate": 1.3864565090484532e-05, "loss": 0.7613, "step": 2375 }, { "epoch": 0.0693702373653324, "grad_norm": 0.6974041289892666, "learning_rate": 1.3870402802101577e-05, "loss": 0.5913, "step": 2376 }, { "epoch": 0.06939943359317977, "grad_norm": 1.027761229701501, "learning_rate": 1.3876240513718624e-05, "loss": 0.7179, "step": 2377 }, { "epoch": 0.06942862982102713, "grad_norm": 0.7449053441717349, "learning_rate": 1.3882078225335669e-05, "loss": 0.6393, "step": 2378 }, { "epoch": 0.06945782604887449, "grad_norm": 0.6648216157986947, "learning_rate": 1.3887915936952715e-05, "loss": 0.5782, "step": 2379 }, { "epoch": 0.06948702227672185, "grad_norm": 0.7961086393526559, "learning_rate": 1.389375364856976e-05, "loss": 0.7514, "step": 2380 }, { "epoch": 0.06951621850456921, "grad_norm": 0.8453337257262439, "learning_rate": 1.3899591360186809e-05, "loss": 0.7406, "step": 2381 }, { "epoch": 0.06954541473241657, "grad_norm": 0.8377088504412971, "learning_rate": 1.3905429071803854e-05, "loss": 0.7256, "step": 2382 }, { "epoch": 0.06957461096026393, "grad_norm": 0.8283942921399993, "learning_rate": 1.39112667834209e-05, "loss": 0.7125, "step": 2383 }, { "epoch": 0.0696038071881113, "grad_norm": 0.9365608732343685, "learning_rate": 1.3917104495037945e-05, "loss": 0.805, "step": 2384 }, { "epoch": 0.06963300341595866, "grad_norm": 0.8268697825451307, "learning_rate": 1.3922942206654992e-05, "loss": 0.7134, "step": 2385 }, { "epoch": 0.06966219964380602, "grad_norm": 0.8256990348079353, "learning_rate": 1.3928779918272039e-05, "loss": 0.782, "step": 2386 }, { "epoch": 0.06969139587165338, "grad_norm": 0.7430614026726802, "learning_rate": 1.3934617629889084e-05, "loss": 0.7145, "step": 2387 }, { "epoch": 0.06972059209950074, "grad_norm": 0.7543269377916157, "learning_rate": 1.3940455341506132e-05, "loss": 0.6405, "step": 2388 }, { "epoch": 0.0697497883273481, "grad_norm": 0.7375564365080453, "learning_rate": 1.3946293053123177e-05, "loss": 0.7168, "step": 2389 }, { "epoch": 0.06977898455519548, "grad_norm": 0.7654556147132598, "learning_rate": 1.3952130764740224e-05, "loss": 0.6935, "step": 2390 }, { "epoch": 0.06980818078304284, "grad_norm": 0.7838493796075224, "learning_rate": 1.3957968476357269e-05, "loss": 0.6926, "step": 2391 }, { "epoch": 0.0698373770108902, "grad_norm": 0.8209145547228227, "learning_rate": 1.3963806187974315e-05, "loss": 0.6949, "step": 2392 }, { "epoch": 0.06986657323873756, "grad_norm": 0.7277291334589076, "learning_rate": 1.396964389959136e-05, "loss": 0.6931, "step": 2393 }, { "epoch": 0.06989576946658492, "grad_norm": 0.8007188160509097, "learning_rate": 1.3975481611208407e-05, "loss": 0.7085, "step": 2394 }, { "epoch": 0.06992496569443228, "grad_norm": 0.7386718494976842, "learning_rate": 1.3981319322825452e-05, "loss": 0.6352, "step": 2395 }, { "epoch": 0.06995416192227964, "grad_norm": 0.7737449851544509, "learning_rate": 1.39871570344425e-05, "loss": 0.6547, "step": 2396 }, { "epoch": 0.069983358150127, "grad_norm": 0.7572482370173038, "learning_rate": 1.3992994746059545e-05, "loss": 0.6434, "step": 2397 }, { "epoch": 0.07001255437797436, "grad_norm": 0.9525514472615783, "learning_rate": 1.3998832457676592e-05, "loss": 0.7608, "step": 2398 }, { "epoch": 0.07004175060582173, "grad_norm": 0.7635278458541076, "learning_rate": 1.4004670169293637e-05, "loss": 0.6922, "step": 2399 }, { "epoch": 0.07007094683366909, "grad_norm": 0.8388515227232188, "learning_rate": 1.4010507880910684e-05, "loss": 0.679, "step": 2400 }, { "epoch": 0.07010014306151645, "grad_norm": 0.7698922524398669, "learning_rate": 1.4016345592527729e-05, "loss": 0.7162, "step": 2401 }, { "epoch": 0.07012933928936381, "grad_norm": 0.7365937113266461, "learning_rate": 1.4022183304144777e-05, "loss": 0.6681, "step": 2402 }, { "epoch": 0.07015853551721117, "grad_norm": 0.8133298250401072, "learning_rate": 1.4028021015761824e-05, "loss": 0.7221, "step": 2403 }, { "epoch": 0.07018773174505853, "grad_norm": 0.8699755615597354, "learning_rate": 1.4033858727378869e-05, "loss": 0.7233, "step": 2404 }, { "epoch": 0.07021692797290591, "grad_norm": 0.7364145298945112, "learning_rate": 1.4039696438995916e-05, "loss": 0.6571, "step": 2405 }, { "epoch": 0.07024612420075327, "grad_norm": 0.7278959676863606, "learning_rate": 1.404553415061296e-05, "loss": 0.6549, "step": 2406 }, { "epoch": 0.07027532042860063, "grad_norm": 0.7965897554794874, "learning_rate": 1.4051371862230007e-05, "loss": 0.7163, "step": 2407 }, { "epoch": 0.07030451665644799, "grad_norm": 0.9507200613567298, "learning_rate": 1.4057209573847052e-05, "loss": 0.7722, "step": 2408 }, { "epoch": 0.07033371288429535, "grad_norm": 0.7963055048336004, "learning_rate": 1.40630472854641e-05, "loss": 0.7587, "step": 2409 }, { "epoch": 0.07036290911214271, "grad_norm": 0.8053103300648005, "learning_rate": 1.4068884997081146e-05, "loss": 0.7038, "step": 2410 }, { "epoch": 0.07039210533999007, "grad_norm": 0.734976368415554, "learning_rate": 1.4074722708698192e-05, "loss": 0.7106, "step": 2411 }, { "epoch": 0.07042130156783744, "grad_norm": 0.7363641212393006, "learning_rate": 1.4080560420315237e-05, "loss": 0.6657, "step": 2412 }, { "epoch": 0.0704504977956848, "grad_norm": 0.7712585829045394, "learning_rate": 1.4086398131932284e-05, "loss": 0.7524, "step": 2413 }, { "epoch": 0.07047969402353216, "grad_norm": 0.7510785081135971, "learning_rate": 1.4092235843549329e-05, "loss": 0.6892, "step": 2414 }, { "epoch": 0.07050889025137952, "grad_norm": 0.8619641501214699, "learning_rate": 1.4098073555166376e-05, "loss": 0.8119, "step": 2415 }, { "epoch": 0.07053808647922688, "grad_norm": 0.9799228729369729, "learning_rate": 1.410391126678342e-05, "loss": 0.7343, "step": 2416 }, { "epoch": 0.07056728270707424, "grad_norm": 0.7382274145229164, "learning_rate": 1.4109748978400469e-05, "loss": 0.6462, "step": 2417 }, { "epoch": 0.0705964789349216, "grad_norm": 0.8592076934503463, "learning_rate": 1.4115586690017516e-05, "loss": 0.6654, "step": 2418 }, { "epoch": 0.07062567516276896, "grad_norm": 0.7830745627680183, "learning_rate": 1.412142440163456e-05, "loss": 0.7158, "step": 2419 }, { "epoch": 0.07065487139061634, "grad_norm": 0.6923228999770275, "learning_rate": 1.4127262113251607e-05, "loss": 0.6334, "step": 2420 }, { "epoch": 0.0706840676184637, "grad_norm": 0.7862734161016661, "learning_rate": 1.4133099824868652e-05, "loss": 0.7278, "step": 2421 }, { "epoch": 0.07071326384631106, "grad_norm": 0.7824996702620236, "learning_rate": 1.4138937536485699e-05, "loss": 0.7325, "step": 2422 }, { "epoch": 0.07074246007415842, "grad_norm": 0.7939733436768364, "learning_rate": 1.4144775248102744e-05, "loss": 0.7677, "step": 2423 }, { "epoch": 0.07077165630200578, "grad_norm": 0.8088360038805237, "learning_rate": 1.4150612959719792e-05, "loss": 0.7765, "step": 2424 }, { "epoch": 0.07080085252985314, "grad_norm": 0.8145999088228131, "learning_rate": 1.4156450671336837e-05, "loss": 0.7665, "step": 2425 }, { "epoch": 0.0708300487577005, "grad_norm": 0.7093874798616335, "learning_rate": 1.4162288382953884e-05, "loss": 0.6363, "step": 2426 }, { "epoch": 0.07085924498554787, "grad_norm": 0.8059258433428793, "learning_rate": 1.4168126094570929e-05, "loss": 0.7078, "step": 2427 }, { "epoch": 0.07088844121339523, "grad_norm": 0.7910090228401344, "learning_rate": 1.4173963806187976e-05, "loss": 0.7173, "step": 2428 }, { "epoch": 0.07091763744124259, "grad_norm": 0.7974067524255647, "learning_rate": 1.417980151780502e-05, "loss": 0.5896, "step": 2429 }, { "epoch": 0.07094683366908995, "grad_norm": 0.7796119922923448, "learning_rate": 1.4185639229422067e-05, "loss": 0.7087, "step": 2430 }, { "epoch": 0.07097602989693731, "grad_norm": 1.044193257797898, "learning_rate": 1.4191476941039112e-05, "loss": 0.7277, "step": 2431 }, { "epoch": 0.07100522612478467, "grad_norm": 0.8011930618299644, "learning_rate": 1.419731465265616e-05, "loss": 0.7285, "step": 2432 }, { "epoch": 0.07103442235263203, "grad_norm": 0.781852856380366, "learning_rate": 1.4203152364273206e-05, "loss": 0.669, "step": 2433 }, { "epoch": 0.0710636185804794, "grad_norm": 0.7282364740693644, "learning_rate": 1.4208990075890252e-05, "loss": 0.618, "step": 2434 }, { "epoch": 0.07109281480832677, "grad_norm": 0.7314847393039534, "learning_rate": 1.4214827787507299e-05, "loss": 0.7289, "step": 2435 }, { "epoch": 0.07112201103617413, "grad_norm": 0.889244767879091, "learning_rate": 1.4220665499124344e-05, "loss": 0.745, "step": 2436 }, { "epoch": 0.0711512072640215, "grad_norm": 0.742345143043125, "learning_rate": 1.422650321074139e-05, "loss": 0.7228, "step": 2437 }, { "epoch": 0.07118040349186885, "grad_norm": 0.7610100359571786, "learning_rate": 1.4232340922358436e-05, "loss": 0.6458, "step": 2438 }, { "epoch": 0.07120959971971622, "grad_norm": 0.7986860709367238, "learning_rate": 1.4238178633975484e-05, "loss": 0.6901, "step": 2439 }, { "epoch": 0.07123879594756358, "grad_norm": 0.7484514175974499, "learning_rate": 1.4244016345592529e-05, "loss": 0.666, "step": 2440 }, { "epoch": 0.07126799217541094, "grad_norm": 1.317194915651583, "learning_rate": 1.4249854057209576e-05, "loss": 0.7566, "step": 2441 }, { "epoch": 0.0712971884032583, "grad_norm": 0.8223401308666577, "learning_rate": 1.425569176882662e-05, "loss": 0.7773, "step": 2442 }, { "epoch": 0.07132638463110566, "grad_norm": 0.7995945305793796, "learning_rate": 1.4261529480443667e-05, "loss": 0.6822, "step": 2443 }, { "epoch": 0.07135558085895302, "grad_norm": 1.055502898262398, "learning_rate": 1.4267367192060712e-05, "loss": 0.8074, "step": 2444 }, { "epoch": 0.07138477708680038, "grad_norm": 0.8489528283911891, "learning_rate": 1.4273204903677759e-05, "loss": 0.696, "step": 2445 }, { "epoch": 0.07141397331464774, "grad_norm": 0.7192438908210489, "learning_rate": 1.4279042615294804e-05, "loss": 0.5936, "step": 2446 }, { "epoch": 0.0714431695424951, "grad_norm": 0.7190880561518451, "learning_rate": 1.4284880326911852e-05, "loss": 0.6534, "step": 2447 }, { "epoch": 0.07147236577034247, "grad_norm": 0.8284640833311159, "learning_rate": 1.4290718038528897e-05, "loss": 0.7428, "step": 2448 }, { "epoch": 0.07150156199818983, "grad_norm": 0.9999629723999313, "learning_rate": 1.4296555750145944e-05, "loss": 0.7318, "step": 2449 }, { "epoch": 0.0715307582260372, "grad_norm": 0.7389317500345884, "learning_rate": 1.430239346176299e-05, "loss": 0.6868, "step": 2450 }, { "epoch": 0.07155995445388456, "grad_norm": 0.7434395950657754, "learning_rate": 1.4308231173380036e-05, "loss": 0.6578, "step": 2451 }, { "epoch": 0.07158915068173193, "grad_norm": 0.757235505066115, "learning_rate": 1.4314068884997084e-05, "loss": 0.659, "step": 2452 }, { "epoch": 0.07161834690957929, "grad_norm": 0.8063892454612637, "learning_rate": 1.4319906596614127e-05, "loss": 0.7765, "step": 2453 }, { "epoch": 0.07164754313742665, "grad_norm": 0.7549506553903013, "learning_rate": 1.4325744308231176e-05, "loss": 0.6563, "step": 2454 }, { "epoch": 0.07167673936527401, "grad_norm": 0.7334302452941791, "learning_rate": 1.433158201984822e-05, "loss": 0.6883, "step": 2455 }, { "epoch": 0.07170593559312137, "grad_norm": 0.7591390980962127, "learning_rate": 1.4337419731465267e-05, "loss": 0.6661, "step": 2456 }, { "epoch": 0.07173513182096873, "grad_norm": 0.8025712573537538, "learning_rate": 1.4343257443082312e-05, "loss": 0.7653, "step": 2457 }, { "epoch": 0.07176432804881609, "grad_norm": 0.8146606386817292, "learning_rate": 1.4349095154699359e-05, "loss": 0.7601, "step": 2458 }, { "epoch": 0.07179352427666345, "grad_norm": 0.7256132285688458, "learning_rate": 1.4354932866316404e-05, "loss": 0.6848, "step": 2459 }, { "epoch": 0.07182272050451081, "grad_norm": 0.7593233373538334, "learning_rate": 1.4360770577933452e-05, "loss": 0.746, "step": 2460 }, { "epoch": 0.07185191673235818, "grad_norm": 0.7237629332923551, "learning_rate": 1.4366608289550497e-05, "loss": 0.6436, "step": 2461 }, { "epoch": 0.07188111296020554, "grad_norm": 0.7699515991255902, "learning_rate": 1.4372446001167544e-05, "loss": 0.7089, "step": 2462 }, { "epoch": 0.0719103091880529, "grad_norm": 0.6976444260571544, "learning_rate": 1.4378283712784589e-05, "loss": 0.6077, "step": 2463 }, { "epoch": 0.07193950541590026, "grad_norm": 0.811514034909862, "learning_rate": 1.4384121424401636e-05, "loss": 0.7564, "step": 2464 }, { "epoch": 0.07196870164374763, "grad_norm": 0.8606409605471905, "learning_rate": 1.438995913601868e-05, "loss": 0.6667, "step": 2465 }, { "epoch": 0.071997897871595, "grad_norm": 0.7850227838175435, "learning_rate": 1.4395796847635727e-05, "loss": 0.6747, "step": 2466 }, { "epoch": 0.07202709409944236, "grad_norm": 0.8019198988588601, "learning_rate": 1.4401634559252776e-05, "loss": 0.7642, "step": 2467 }, { "epoch": 0.07205629032728972, "grad_norm": 1.2535738504589413, "learning_rate": 1.440747227086982e-05, "loss": 0.789, "step": 2468 }, { "epoch": 0.07208548655513708, "grad_norm": 0.9249865162025357, "learning_rate": 1.4413309982486867e-05, "loss": 0.7215, "step": 2469 }, { "epoch": 0.07211468278298444, "grad_norm": 0.7203096398298989, "learning_rate": 1.4419147694103912e-05, "loss": 0.7055, "step": 2470 }, { "epoch": 0.0721438790108318, "grad_norm": 0.7370468064130756, "learning_rate": 1.4424985405720959e-05, "loss": 0.6834, "step": 2471 }, { "epoch": 0.07217307523867916, "grad_norm": 0.7761513231973852, "learning_rate": 1.4430823117338004e-05, "loss": 0.7131, "step": 2472 }, { "epoch": 0.07220227146652652, "grad_norm": 0.7600380567234932, "learning_rate": 1.443666082895505e-05, "loss": 0.7217, "step": 2473 }, { "epoch": 0.07223146769437389, "grad_norm": 0.822345761946451, "learning_rate": 1.4442498540572096e-05, "loss": 0.7283, "step": 2474 }, { "epoch": 0.07226066392222125, "grad_norm": 0.7663092424699823, "learning_rate": 1.4448336252189144e-05, "loss": 0.6623, "step": 2475 }, { "epoch": 0.07228986015006861, "grad_norm": 0.7389127289674154, "learning_rate": 1.4454173963806189e-05, "loss": 0.6679, "step": 2476 }, { "epoch": 0.07231905637791597, "grad_norm": 0.7595570022286285, "learning_rate": 1.4460011675423236e-05, "loss": 0.7021, "step": 2477 }, { "epoch": 0.07234825260576333, "grad_norm": 0.9247919415208232, "learning_rate": 1.446584938704028e-05, "loss": 0.8112, "step": 2478 }, { "epoch": 0.07237744883361069, "grad_norm": 0.8110845447859198, "learning_rate": 1.4471687098657327e-05, "loss": 0.6173, "step": 2479 }, { "epoch": 0.07240664506145807, "grad_norm": 0.7526807425592796, "learning_rate": 1.4477524810274372e-05, "loss": 0.645, "step": 2480 }, { "epoch": 0.07243584128930543, "grad_norm": 0.7806675384499892, "learning_rate": 1.448336252189142e-05, "loss": 0.6974, "step": 2481 }, { "epoch": 0.07246503751715279, "grad_norm": 0.920438340819312, "learning_rate": 1.4489200233508468e-05, "loss": 0.7796, "step": 2482 }, { "epoch": 0.07249423374500015, "grad_norm": 0.776130631325111, "learning_rate": 1.4495037945125513e-05, "loss": 0.6991, "step": 2483 }, { "epoch": 0.07252342997284751, "grad_norm": 0.8531207503096662, "learning_rate": 1.450087565674256e-05, "loss": 0.7957, "step": 2484 }, { "epoch": 0.07255262620069487, "grad_norm": 0.7966843454851071, "learning_rate": 1.4506713368359604e-05, "loss": 0.7814, "step": 2485 }, { "epoch": 0.07258182242854223, "grad_norm": 0.8868138804057282, "learning_rate": 1.4512551079976651e-05, "loss": 0.6851, "step": 2486 }, { "epoch": 0.0726110186563896, "grad_norm": 0.7832805925217416, "learning_rate": 1.4518388791593696e-05, "loss": 0.7478, "step": 2487 }, { "epoch": 0.07264021488423696, "grad_norm": 0.7190738736678324, "learning_rate": 1.4524226503210743e-05, "loss": 0.6776, "step": 2488 }, { "epoch": 0.07266941111208432, "grad_norm": 0.7883021435422092, "learning_rate": 1.4530064214827788e-05, "loss": 0.7588, "step": 2489 }, { "epoch": 0.07269860733993168, "grad_norm": 0.7414158771296068, "learning_rate": 1.4535901926444836e-05, "loss": 0.7026, "step": 2490 }, { "epoch": 0.07272780356777904, "grad_norm": 0.7399648994709305, "learning_rate": 1.4541739638061881e-05, "loss": 0.6145, "step": 2491 }, { "epoch": 0.0727569997956264, "grad_norm": 0.7978619230832468, "learning_rate": 1.4547577349678928e-05, "loss": 0.7042, "step": 2492 }, { "epoch": 0.07278619602347376, "grad_norm": 0.7673658847941788, "learning_rate": 1.4553415061295973e-05, "loss": 0.6902, "step": 2493 }, { "epoch": 0.07281539225132112, "grad_norm": 0.7556906625636989, "learning_rate": 1.455925277291302e-05, "loss": 0.731, "step": 2494 }, { "epoch": 0.0728445884791685, "grad_norm": 0.7306914350926238, "learning_rate": 1.4565090484530064e-05, "loss": 0.6497, "step": 2495 }, { "epoch": 0.07287378470701586, "grad_norm": 0.6989148191462724, "learning_rate": 1.4570928196147111e-05, "loss": 0.6456, "step": 2496 }, { "epoch": 0.07290298093486322, "grad_norm": 0.7352471294198691, "learning_rate": 1.4576765907764156e-05, "loss": 0.6053, "step": 2497 }, { "epoch": 0.07293217716271058, "grad_norm": 0.76432688142073, "learning_rate": 1.4582603619381204e-05, "loss": 0.6676, "step": 2498 }, { "epoch": 0.07296137339055794, "grad_norm": 0.753290373436549, "learning_rate": 1.4588441330998251e-05, "loss": 0.6367, "step": 2499 }, { "epoch": 0.0729905696184053, "grad_norm": 0.7787557766739589, "learning_rate": 1.4594279042615296e-05, "loss": 0.6517, "step": 2500 }, { "epoch": 0.07301976584625267, "grad_norm": 0.7468833800969491, "learning_rate": 1.4600116754232343e-05, "loss": 0.6235, "step": 2501 }, { "epoch": 0.07304896207410003, "grad_norm": 0.8762532091632729, "learning_rate": 1.4605954465849388e-05, "loss": 0.8248, "step": 2502 }, { "epoch": 0.07307815830194739, "grad_norm": 0.9299683981808218, "learning_rate": 1.4611792177466434e-05, "loss": 0.7514, "step": 2503 }, { "epoch": 0.07310735452979475, "grad_norm": 0.7744960704584462, "learning_rate": 1.461762988908348e-05, "loss": 0.6321, "step": 2504 }, { "epoch": 0.07313655075764211, "grad_norm": 0.8312042175681156, "learning_rate": 1.4623467600700528e-05, "loss": 0.784, "step": 2505 }, { "epoch": 0.07316574698548947, "grad_norm": 0.8153800529233992, "learning_rate": 1.4629305312317573e-05, "loss": 0.7809, "step": 2506 }, { "epoch": 0.07319494321333683, "grad_norm": 0.7578808087317541, "learning_rate": 1.463514302393462e-05, "loss": 0.7363, "step": 2507 }, { "epoch": 0.0732241394411842, "grad_norm": 0.9318964176324981, "learning_rate": 1.4640980735551664e-05, "loss": 0.6661, "step": 2508 }, { "epoch": 0.07325333566903156, "grad_norm": 0.8688217882638405, "learning_rate": 1.4646818447168711e-05, "loss": 0.7408, "step": 2509 }, { "epoch": 0.07328253189687892, "grad_norm": 0.7708986840610156, "learning_rate": 1.4652656158785756e-05, "loss": 0.7757, "step": 2510 }, { "epoch": 0.07331172812472629, "grad_norm": 0.8235541947047722, "learning_rate": 1.4658493870402803e-05, "loss": 0.7876, "step": 2511 }, { "epoch": 0.07334092435257365, "grad_norm": 0.7143793647974636, "learning_rate": 1.4664331582019848e-05, "loss": 0.6322, "step": 2512 }, { "epoch": 0.07337012058042101, "grad_norm": 0.7990083527215104, "learning_rate": 1.4670169293636896e-05, "loss": 0.6677, "step": 2513 }, { "epoch": 0.07339931680826838, "grad_norm": 0.7608521451998607, "learning_rate": 1.4676007005253943e-05, "loss": 0.6705, "step": 2514 }, { "epoch": 0.07342851303611574, "grad_norm": 0.9239724972539705, "learning_rate": 1.4681844716870988e-05, "loss": 0.71, "step": 2515 }, { "epoch": 0.0734577092639631, "grad_norm": 0.7854578585274093, "learning_rate": 1.4687682428488034e-05, "loss": 0.7719, "step": 2516 }, { "epoch": 0.07348690549181046, "grad_norm": 0.7532832607516381, "learning_rate": 1.469352014010508e-05, "loss": 0.6937, "step": 2517 }, { "epoch": 0.07351610171965782, "grad_norm": 0.6935161671947244, "learning_rate": 1.4699357851722128e-05, "loss": 0.6437, "step": 2518 }, { "epoch": 0.07354529794750518, "grad_norm": 0.7671605122988626, "learning_rate": 1.4705195563339173e-05, "loss": 0.7508, "step": 2519 }, { "epoch": 0.07357449417535254, "grad_norm": 0.7774857329414174, "learning_rate": 1.471103327495622e-05, "loss": 0.6884, "step": 2520 }, { "epoch": 0.0736036904031999, "grad_norm": 0.8196246016396354, "learning_rate": 1.4716870986573264e-05, "loss": 0.7387, "step": 2521 }, { "epoch": 0.07363288663104726, "grad_norm": 0.7358108019685866, "learning_rate": 1.4722708698190311e-05, "loss": 0.6879, "step": 2522 }, { "epoch": 0.07366208285889463, "grad_norm": 0.8069555013180295, "learning_rate": 1.4728546409807356e-05, "loss": 0.7483, "step": 2523 }, { "epoch": 0.07369127908674199, "grad_norm": 0.8200509298844336, "learning_rate": 1.4734384121424403e-05, "loss": 0.7318, "step": 2524 }, { "epoch": 0.07372047531458935, "grad_norm": 0.8078155964237128, "learning_rate": 1.4740221833041448e-05, "loss": 0.7911, "step": 2525 }, { "epoch": 0.07374967154243672, "grad_norm": 0.8177053698774094, "learning_rate": 1.4746059544658496e-05, "loss": 0.7682, "step": 2526 }, { "epoch": 0.07377886777028408, "grad_norm": 0.7662767747958813, "learning_rate": 1.4751897256275541e-05, "loss": 0.7159, "step": 2527 }, { "epoch": 0.07380806399813145, "grad_norm": 0.7996177365934335, "learning_rate": 1.4757734967892588e-05, "loss": 0.7719, "step": 2528 }, { "epoch": 0.0738372602259788, "grad_norm": 0.8184948720965782, "learning_rate": 1.4763572679509633e-05, "loss": 0.7238, "step": 2529 }, { "epoch": 0.07386645645382617, "grad_norm": 0.7621060107919396, "learning_rate": 1.476941039112668e-05, "loss": 0.6865, "step": 2530 }, { "epoch": 0.07389565268167353, "grad_norm": 0.8201548449287743, "learning_rate": 1.4775248102743726e-05, "loss": 0.7399, "step": 2531 }, { "epoch": 0.07392484890952089, "grad_norm": 0.7069260937808366, "learning_rate": 1.4781085814360771e-05, "loss": 0.6108, "step": 2532 }, { "epoch": 0.07395404513736825, "grad_norm": 0.8798632167376685, "learning_rate": 1.478692352597782e-05, "loss": 0.7176, "step": 2533 }, { "epoch": 0.07398324136521561, "grad_norm": 0.7225137767466603, "learning_rate": 1.4792761237594864e-05, "loss": 0.6553, "step": 2534 }, { "epoch": 0.07401243759306297, "grad_norm": 0.7076381583829029, "learning_rate": 1.4798598949211911e-05, "loss": 0.6219, "step": 2535 }, { "epoch": 0.07404163382091034, "grad_norm": 0.758306038325107, "learning_rate": 1.4804436660828956e-05, "loss": 0.6387, "step": 2536 }, { "epoch": 0.0740708300487577, "grad_norm": 0.8741454364034253, "learning_rate": 1.4810274372446003e-05, "loss": 0.7534, "step": 2537 }, { "epoch": 0.07410002627660506, "grad_norm": 0.8201935484789644, "learning_rate": 1.4816112084063048e-05, "loss": 0.749, "step": 2538 }, { "epoch": 0.07412922250445242, "grad_norm": 0.7974953288880277, "learning_rate": 1.4821949795680094e-05, "loss": 0.7252, "step": 2539 }, { "epoch": 0.07415841873229978, "grad_norm": 0.8403070393329328, "learning_rate": 1.482778750729714e-05, "loss": 0.7527, "step": 2540 }, { "epoch": 0.07418761496014716, "grad_norm": 0.7624180559035697, "learning_rate": 1.4833625218914188e-05, "loss": 0.7071, "step": 2541 }, { "epoch": 0.07421681118799452, "grad_norm": 0.7249146611147862, "learning_rate": 1.4839462930531233e-05, "loss": 0.6784, "step": 2542 }, { "epoch": 0.07424600741584188, "grad_norm": 0.7935710881186789, "learning_rate": 1.484530064214828e-05, "loss": 0.7554, "step": 2543 }, { "epoch": 0.07427520364368924, "grad_norm": 0.7580066730507498, "learning_rate": 1.4851138353765324e-05, "loss": 0.72, "step": 2544 }, { "epoch": 0.0743043998715366, "grad_norm": 0.7801682150162358, "learning_rate": 1.4856976065382371e-05, "loss": 0.7492, "step": 2545 }, { "epoch": 0.07433359609938396, "grad_norm": 1.1009135771709364, "learning_rate": 1.4862813776999418e-05, "loss": 0.691, "step": 2546 }, { "epoch": 0.07436279232723132, "grad_norm": 0.7695204503849735, "learning_rate": 1.4868651488616463e-05, "loss": 0.7813, "step": 2547 }, { "epoch": 0.07439198855507868, "grad_norm": 0.779032844750015, "learning_rate": 1.4874489200233511e-05, "loss": 0.813, "step": 2548 }, { "epoch": 0.07442118478292604, "grad_norm": 1.0484930286997192, "learning_rate": 1.4880326911850556e-05, "loss": 0.6766, "step": 2549 }, { "epoch": 0.0744503810107734, "grad_norm": 0.7322520099538584, "learning_rate": 1.4886164623467603e-05, "loss": 0.6994, "step": 2550 }, { "epoch": 0.07447957723862077, "grad_norm": 0.7622963634883662, "learning_rate": 1.4892002335084648e-05, "loss": 0.6716, "step": 2551 }, { "epoch": 0.07450877346646813, "grad_norm": 0.6958778092718462, "learning_rate": 1.4897840046701694e-05, "loss": 0.6428, "step": 2552 }, { "epoch": 0.07453796969431549, "grad_norm": 0.7293016206607218, "learning_rate": 1.490367775831874e-05, "loss": 0.6839, "step": 2553 }, { "epoch": 0.07456716592216285, "grad_norm": 0.7638901600057262, "learning_rate": 1.4909515469935786e-05, "loss": 0.7049, "step": 2554 }, { "epoch": 0.07459636215001021, "grad_norm": 0.8654243286465263, "learning_rate": 1.4915353181552831e-05, "loss": 0.5201, "step": 2555 }, { "epoch": 0.07462555837785759, "grad_norm": 0.7095927682409978, "learning_rate": 1.492119089316988e-05, "loss": 0.6774, "step": 2556 }, { "epoch": 0.07465475460570495, "grad_norm": 0.8069720531365993, "learning_rate": 1.4927028604786925e-05, "loss": 0.7649, "step": 2557 }, { "epoch": 0.07468395083355231, "grad_norm": 0.8578210629714382, "learning_rate": 1.4932866316403971e-05, "loss": 0.7071, "step": 2558 }, { "epoch": 0.07471314706139967, "grad_norm": 0.7542834953700466, "learning_rate": 1.4938704028021016e-05, "loss": 0.6618, "step": 2559 }, { "epoch": 0.07474234328924703, "grad_norm": 1.0178155193311298, "learning_rate": 1.4944541739638063e-05, "loss": 0.7714, "step": 2560 }, { "epoch": 0.07477153951709439, "grad_norm": 0.7941477356676403, "learning_rate": 1.4950379451255108e-05, "loss": 0.7333, "step": 2561 }, { "epoch": 0.07480073574494175, "grad_norm": 0.7014762087815396, "learning_rate": 1.4956217162872155e-05, "loss": 0.5954, "step": 2562 }, { "epoch": 0.07482993197278912, "grad_norm": 0.7982366469405587, "learning_rate": 1.4962054874489203e-05, "loss": 0.7252, "step": 2563 }, { "epoch": 0.07485912820063648, "grad_norm": 0.7267422840746112, "learning_rate": 1.4967892586106248e-05, "loss": 0.6423, "step": 2564 }, { "epoch": 0.07488832442848384, "grad_norm": 0.8204484919104342, "learning_rate": 1.4973730297723295e-05, "loss": 0.7953, "step": 2565 }, { "epoch": 0.0749175206563312, "grad_norm": 0.7484929094412138, "learning_rate": 1.497956800934034e-05, "loss": 0.6752, "step": 2566 }, { "epoch": 0.07494671688417856, "grad_norm": 0.7526366944307182, "learning_rate": 1.4985405720957386e-05, "loss": 0.6838, "step": 2567 }, { "epoch": 0.07497591311202592, "grad_norm": 0.7766632087910055, "learning_rate": 1.4991243432574431e-05, "loss": 0.6729, "step": 2568 }, { "epoch": 0.07500510933987328, "grad_norm": 0.8265035976046095, "learning_rate": 1.499708114419148e-05, "loss": 0.7738, "step": 2569 }, { "epoch": 0.07503430556772064, "grad_norm": 0.7252838035838198, "learning_rate": 1.5002918855808523e-05, "loss": 0.6257, "step": 2570 }, { "epoch": 0.07506350179556802, "grad_norm": 0.7940561582485881, "learning_rate": 1.5008756567425571e-05, "loss": 0.7646, "step": 2571 }, { "epoch": 0.07509269802341538, "grad_norm": 0.7444711140439014, "learning_rate": 1.5014594279042616e-05, "loss": 0.7094, "step": 2572 }, { "epoch": 0.07512189425126274, "grad_norm": 0.7777284461111028, "learning_rate": 1.5020431990659663e-05, "loss": 0.7042, "step": 2573 }, { "epoch": 0.0751510904791101, "grad_norm": 0.6941547230595797, "learning_rate": 1.5026269702276708e-05, "loss": 0.5938, "step": 2574 }, { "epoch": 0.07518028670695746, "grad_norm": 0.7099768483815277, "learning_rate": 1.5032107413893755e-05, "loss": 0.6652, "step": 2575 }, { "epoch": 0.07520948293480482, "grad_norm": 0.7515364166582845, "learning_rate": 1.50379451255108e-05, "loss": 0.6941, "step": 2576 }, { "epoch": 0.07523867916265219, "grad_norm": 1.652520045586942, "learning_rate": 1.5043782837127848e-05, "loss": 0.6751, "step": 2577 }, { "epoch": 0.07526787539049955, "grad_norm": 0.654048259634359, "learning_rate": 1.5049620548744895e-05, "loss": 0.5557, "step": 2578 }, { "epoch": 0.07529707161834691, "grad_norm": 0.8245944834232279, "learning_rate": 1.505545826036194e-05, "loss": 0.7845, "step": 2579 }, { "epoch": 0.07532626784619427, "grad_norm": 0.9998612095812066, "learning_rate": 1.5061295971978986e-05, "loss": 0.7991, "step": 2580 }, { "epoch": 0.07535546407404163, "grad_norm": 0.9509347435019955, "learning_rate": 1.5067133683596031e-05, "loss": 0.8285, "step": 2581 }, { "epoch": 0.07538466030188899, "grad_norm": 0.7619650529147993, "learning_rate": 1.5072971395213078e-05, "loss": 0.7642, "step": 2582 }, { "epoch": 0.07541385652973635, "grad_norm": 0.7473715373285362, "learning_rate": 1.5078809106830123e-05, "loss": 0.7313, "step": 2583 }, { "epoch": 0.07544305275758371, "grad_norm": 0.7017613173162331, "learning_rate": 1.5084646818447171e-05, "loss": 0.6462, "step": 2584 }, { "epoch": 0.07547224898543108, "grad_norm": 0.7103605358017949, "learning_rate": 1.5090484530064216e-05, "loss": 0.6328, "step": 2585 }, { "epoch": 0.07550144521327845, "grad_norm": 0.8832120370447621, "learning_rate": 1.5096322241681263e-05, "loss": 0.7842, "step": 2586 }, { "epoch": 0.07553064144112581, "grad_norm": 1.8671130733547165, "learning_rate": 1.5102159953298308e-05, "loss": 0.7209, "step": 2587 }, { "epoch": 0.07555983766897317, "grad_norm": 0.730418917821092, "learning_rate": 1.5107997664915355e-05, "loss": 0.708, "step": 2588 }, { "epoch": 0.07558903389682053, "grad_norm": 0.728401536562498, "learning_rate": 1.51138353765324e-05, "loss": 0.667, "step": 2589 }, { "epoch": 0.0756182301246679, "grad_norm": 0.7124917354861897, "learning_rate": 1.5119673088149446e-05, "loss": 0.6817, "step": 2590 }, { "epoch": 0.07564742635251526, "grad_norm": 0.7634904386610556, "learning_rate": 1.5125510799766491e-05, "loss": 0.6953, "step": 2591 }, { "epoch": 0.07567662258036262, "grad_norm": 0.9546663315636295, "learning_rate": 1.513134851138354e-05, "loss": 0.7983, "step": 2592 }, { "epoch": 0.07570581880820998, "grad_norm": 0.7904787633192404, "learning_rate": 1.5137186223000585e-05, "loss": 0.7384, "step": 2593 }, { "epoch": 0.07573501503605734, "grad_norm": 0.7207189374423527, "learning_rate": 1.5143023934617631e-05, "loss": 0.5865, "step": 2594 }, { "epoch": 0.0757642112639047, "grad_norm": 0.6851443318174857, "learning_rate": 1.5148861646234678e-05, "loss": 0.6651, "step": 2595 }, { "epoch": 0.07579340749175206, "grad_norm": 0.6801734357833755, "learning_rate": 1.5154699357851723e-05, "loss": 0.6051, "step": 2596 }, { "epoch": 0.07582260371959942, "grad_norm": 0.8406707241127614, "learning_rate": 1.516053706946877e-05, "loss": 0.7393, "step": 2597 }, { "epoch": 0.07585179994744679, "grad_norm": 0.800179484396365, "learning_rate": 1.5166374781085815e-05, "loss": 0.7141, "step": 2598 }, { "epoch": 0.07588099617529415, "grad_norm": 0.7265149124955753, "learning_rate": 1.5172212492702863e-05, "loss": 0.6688, "step": 2599 }, { "epoch": 0.07591019240314151, "grad_norm": 0.7428070990904757, "learning_rate": 1.5178050204319908e-05, "loss": 0.6941, "step": 2600 }, { "epoch": 0.07593938863098888, "grad_norm": 0.8042070063773079, "learning_rate": 1.5183887915936955e-05, "loss": 0.736, "step": 2601 }, { "epoch": 0.07596858485883624, "grad_norm": 0.7212183133412277, "learning_rate": 1.5189725627554e-05, "loss": 0.7135, "step": 2602 }, { "epoch": 0.0759977810866836, "grad_norm": 0.7720073833569902, "learning_rate": 1.5195563339171046e-05, "loss": 0.7005, "step": 2603 }, { "epoch": 0.07602697731453097, "grad_norm": 1.5197013157176638, "learning_rate": 1.5201401050788091e-05, "loss": 0.8181, "step": 2604 }, { "epoch": 0.07605617354237833, "grad_norm": 0.8357188495762254, "learning_rate": 1.5207238762405138e-05, "loss": 0.7787, "step": 2605 }, { "epoch": 0.07608536977022569, "grad_norm": 0.7472144743251279, "learning_rate": 1.5213076474022183e-05, "loss": 0.6375, "step": 2606 }, { "epoch": 0.07611456599807305, "grad_norm": 0.8477881560971534, "learning_rate": 1.5218914185639231e-05, "loss": 0.66, "step": 2607 }, { "epoch": 0.07614376222592041, "grad_norm": 0.7622688219798057, "learning_rate": 1.5224751897256276e-05, "loss": 0.6748, "step": 2608 }, { "epoch": 0.07617295845376777, "grad_norm": 0.8116335932304487, "learning_rate": 1.5230589608873323e-05, "loss": 0.759, "step": 2609 }, { "epoch": 0.07620215468161513, "grad_norm": 0.7641103693613984, "learning_rate": 1.523642732049037e-05, "loss": 0.7299, "step": 2610 }, { "epoch": 0.0762313509094625, "grad_norm": 0.7568175063295254, "learning_rate": 1.5242265032107415e-05, "loss": 0.68, "step": 2611 }, { "epoch": 0.07626054713730986, "grad_norm": 0.8367530913574664, "learning_rate": 1.5248102743724461e-05, "loss": 0.7594, "step": 2612 }, { "epoch": 0.07628974336515722, "grad_norm": 0.8679159411450373, "learning_rate": 1.5253940455341506e-05, "loss": 0.8046, "step": 2613 }, { "epoch": 0.07631893959300458, "grad_norm": 0.9077075676941572, "learning_rate": 1.5259778166958555e-05, "loss": 0.6833, "step": 2614 }, { "epoch": 0.07634813582085194, "grad_norm": 0.7761708796985194, "learning_rate": 1.52656158785756e-05, "loss": 0.6048, "step": 2615 }, { "epoch": 0.07637733204869931, "grad_norm": 0.7972045912114413, "learning_rate": 1.5271453590192645e-05, "loss": 0.7557, "step": 2616 }, { "epoch": 0.07640652827654668, "grad_norm": 0.8492225980862111, "learning_rate": 1.527729130180969e-05, "loss": 0.7377, "step": 2617 }, { "epoch": 0.07643572450439404, "grad_norm": 0.705229057659577, "learning_rate": 1.5283129013426738e-05, "loss": 0.6622, "step": 2618 }, { "epoch": 0.0764649207322414, "grad_norm": 0.89647524697392, "learning_rate": 1.5288966725043783e-05, "loss": 0.7874, "step": 2619 }, { "epoch": 0.07649411696008876, "grad_norm": 0.8014550418898329, "learning_rate": 1.529480443666083e-05, "loss": 0.7927, "step": 2620 }, { "epoch": 0.07652331318793612, "grad_norm": 0.7955824895852784, "learning_rate": 1.5300642148277876e-05, "loss": 0.753, "step": 2621 }, { "epoch": 0.07655250941578348, "grad_norm": 0.6925395633872172, "learning_rate": 1.530647985989492e-05, "loss": 0.6007, "step": 2622 }, { "epoch": 0.07658170564363084, "grad_norm": 0.8593735701714134, "learning_rate": 1.5312317571511966e-05, "loss": 0.7807, "step": 2623 }, { "epoch": 0.0766109018714782, "grad_norm": 0.8322201613749953, "learning_rate": 1.5318155283129015e-05, "loss": 0.7817, "step": 2624 }, { "epoch": 0.07664009809932557, "grad_norm": 0.7525887940697042, "learning_rate": 1.532399299474606e-05, "loss": 0.7114, "step": 2625 }, { "epoch": 0.07666929432717293, "grad_norm": 0.7815839699808339, "learning_rate": 1.5329830706363108e-05, "loss": 0.7695, "step": 2626 }, { "epoch": 0.07669849055502029, "grad_norm": 0.8979863812223344, "learning_rate": 1.5335668417980153e-05, "loss": 0.6886, "step": 2627 }, { "epoch": 0.07672768678286765, "grad_norm": 0.7267249589411459, "learning_rate": 1.5341506129597198e-05, "loss": 0.6898, "step": 2628 }, { "epoch": 0.07675688301071501, "grad_norm": 0.8604803979917971, "learning_rate": 1.5347343841214247e-05, "loss": 0.7636, "step": 2629 }, { "epoch": 0.07678607923856237, "grad_norm": 0.897025127777002, "learning_rate": 1.535318155283129e-05, "loss": 0.8033, "step": 2630 }, { "epoch": 0.07681527546640975, "grad_norm": 0.7608223632871678, "learning_rate": 1.535901926444834e-05, "loss": 0.6149, "step": 2631 }, { "epoch": 0.07684447169425711, "grad_norm": 0.8670143924388231, "learning_rate": 1.5364856976065385e-05, "loss": 0.7117, "step": 2632 }, { "epoch": 0.07687366792210447, "grad_norm": 0.7531167729176835, "learning_rate": 1.537069468768243e-05, "loss": 0.6989, "step": 2633 }, { "epoch": 0.07690286414995183, "grad_norm": 0.7464458029544627, "learning_rate": 1.5376532399299475e-05, "loss": 0.7134, "step": 2634 }, { "epoch": 0.07693206037779919, "grad_norm": 0.7609735950879455, "learning_rate": 1.5382370110916523e-05, "loss": 0.6658, "step": 2635 }, { "epoch": 0.07696125660564655, "grad_norm": 0.7277690427870641, "learning_rate": 1.5388207822533568e-05, "loss": 0.7242, "step": 2636 }, { "epoch": 0.07699045283349391, "grad_norm": 0.9205429809031846, "learning_rate": 1.5394045534150613e-05, "loss": 0.8349, "step": 2637 }, { "epoch": 0.07701964906134127, "grad_norm": 0.8425469623096623, "learning_rate": 1.5399883245767658e-05, "loss": 0.6651, "step": 2638 }, { "epoch": 0.07704884528918864, "grad_norm": 0.8303790206872405, "learning_rate": 1.5405720957384707e-05, "loss": 0.7684, "step": 2639 }, { "epoch": 0.077078041517036, "grad_norm": 0.9421563409530352, "learning_rate": 1.541155866900175e-05, "loss": 0.8204, "step": 2640 }, { "epoch": 0.07710723774488336, "grad_norm": 0.7195459423108052, "learning_rate": 1.54173963806188e-05, "loss": 0.6282, "step": 2641 }, { "epoch": 0.07713643397273072, "grad_norm": 0.7868656913058721, "learning_rate": 1.5423234092235845e-05, "loss": 0.7876, "step": 2642 }, { "epoch": 0.07716563020057808, "grad_norm": 0.7267341252728745, "learning_rate": 1.542907180385289e-05, "loss": 0.6521, "step": 2643 }, { "epoch": 0.07719482642842544, "grad_norm": 0.7645215476824618, "learning_rate": 1.5434909515469938e-05, "loss": 0.7317, "step": 2644 }, { "epoch": 0.0772240226562728, "grad_norm": 1.0205211190558316, "learning_rate": 1.5440747227086983e-05, "loss": 0.7632, "step": 2645 }, { "epoch": 0.07725321888412018, "grad_norm": 0.8134786543219588, "learning_rate": 1.544658493870403e-05, "loss": 0.7197, "step": 2646 }, { "epoch": 0.07728241511196754, "grad_norm": 0.7824545556505311, "learning_rate": 1.5452422650321077e-05, "loss": 0.6974, "step": 2647 }, { "epoch": 0.0773116113398149, "grad_norm": 0.9665530419953584, "learning_rate": 1.545826036193812e-05, "loss": 0.735, "step": 2648 }, { "epoch": 0.07734080756766226, "grad_norm": 0.833332363641691, "learning_rate": 1.5464098073555167e-05, "loss": 0.8372, "step": 2649 }, { "epoch": 0.07737000379550962, "grad_norm": 0.7970501895058317, "learning_rate": 1.5469935785172215e-05, "loss": 0.7164, "step": 2650 }, { "epoch": 0.07739920002335698, "grad_norm": 0.7959434908285652, "learning_rate": 1.547577349678926e-05, "loss": 0.796, "step": 2651 }, { "epoch": 0.07742839625120435, "grad_norm": 0.7594847801028307, "learning_rate": 1.5481611208406305e-05, "loss": 0.7061, "step": 2652 }, { "epoch": 0.0774575924790517, "grad_norm": 0.7315266367331691, "learning_rate": 1.548744892002335e-05, "loss": 0.6302, "step": 2653 }, { "epoch": 0.07748678870689907, "grad_norm": 0.8473127195697506, "learning_rate": 1.5493286631640398e-05, "loss": 0.7666, "step": 2654 }, { "epoch": 0.07751598493474643, "grad_norm": 0.7972220118902364, "learning_rate": 1.5499124343257443e-05, "loss": 0.638, "step": 2655 }, { "epoch": 0.07754518116259379, "grad_norm": 0.8442280244654181, "learning_rate": 1.550496205487449e-05, "loss": 0.7228, "step": 2656 }, { "epoch": 0.07757437739044115, "grad_norm": 0.6825872589014036, "learning_rate": 1.5510799766491537e-05, "loss": 0.6393, "step": 2657 }, { "epoch": 0.07760357361828851, "grad_norm": 0.7542916729668425, "learning_rate": 1.551663747810858e-05, "loss": 0.7093, "step": 2658 }, { "epoch": 0.07763276984613587, "grad_norm": 0.827154245146506, "learning_rate": 1.552247518972563e-05, "loss": 0.7665, "step": 2659 }, { "epoch": 0.07766196607398324, "grad_norm": 0.7892033228340769, "learning_rate": 1.5528312901342675e-05, "loss": 0.6301, "step": 2660 }, { "epoch": 0.07769116230183061, "grad_norm": 0.7559321222829862, "learning_rate": 1.5534150612959723e-05, "loss": 0.6701, "step": 2661 }, { "epoch": 0.07772035852967797, "grad_norm": 0.9044689294364558, "learning_rate": 1.553998832457677e-05, "loss": 0.9073, "step": 2662 }, { "epoch": 0.07774955475752533, "grad_norm": 0.7207726482932177, "learning_rate": 1.5545826036193813e-05, "loss": 0.6118, "step": 2663 }, { "epoch": 0.0777787509853727, "grad_norm": 0.7628581687721426, "learning_rate": 1.5551663747810858e-05, "loss": 0.7031, "step": 2664 }, { "epoch": 0.07780794721322006, "grad_norm": 1.0961716932750158, "learning_rate": 1.5557501459427907e-05, "loss": 0.7297, "step": 2665 }, { "epoch": 0.07783714344106742, "grad_norm": 0.8040969156907016, "learning_rate": 1.556333917104495e-05, "loss": 0.6176, "step": 2666 }, { "epoch": 0.07786633966891478, "grad_norm": 0.7898033838087474, "learning_rate": 1.5569176882661997e-05, "loss": 0.7705, "step": 2667 }, { "epoch": 0.07789553589676214, "grad_norm": 0.8253458568242464, "learning_rate": 1.557501459427904e-05, "loss": 0.7071, "step": 2668 }, { "epoch": 0.0779247321246095, "grad_norm": 0.7698050542627907, "learning_rate": 1.558085230589609e-05, "loss": 0.7318, "step": 2669 }, { "epoch": 0.07795392835245686, "grad_norm": 0.9122879288822476, "learning_rate": 1.5586690017513135e-05, "loss": 0.7317, "step": 2670 }, { "epoch": 0.07798312458030422, "grad_norm": 0.7595454751331446, "learning_rate": 1.5592527729130183e-05, "loss": 0.6747, "step": 2671 }, { "epoch": 0.07801232080815158, "grad_norm": 0.7504227982645878, "learning_rate": 1.559836544074723e-05, "loss": 0.6788, "step": 2672 }, { "epoch": 0.07804151703599894, "grad_norm": 0.782618254388295, "learning_rate": 1.5604203152364273e-05, "loss": 0.61, "step": 2673 }, { "epoch": 0.0780707132638463, "grad_norm": 0.7550596624325914, "learning_rate": 1.5610040863981322e-05, "loss": 0.7031, "step": 2674 }, { "epoch": 0.07809990949169367, "grad_norm": 0.7957471059209422, "learning_rate": 1.5615878575598367e-05, "loss": 0.7893, "step": 2675 }, { "epoch": 0.07812910571954104, "grad_norm": 0.7472570062861317, "learning_rate": 1.5621716287215415e-05, "loss": 0.6933, "step": 2676 }, { "epoch": 0.0781583019473884, "grad_norm": 0.9040028209701332, "learning_rate": 1.562755399883246e-05, "loss": 0.76, "step": 2677 }, { "epoch": 0.07818749817523576, "grad_norm": 0.7109238880539446, "learning_rate": 1.5633391710449505e-05, "loss": 0.6614, "step": 2678 }, { "epoch": 0.07821669440308313, "grad_norm": 0.7765097569890373, "learning_rate": 1.563922942206655e-05, "loss": 0.5665, "step": 2679 }, { "epoch": 0.07824589063093049, "grad_norm": 0.8490341585305838, "learning_rate": 1.56450671336836e-05, "loss": 0.7557, "step": 2680 }, { "epoch": 0.07827508685877785, "grad_norm": 0.7714246006600386, "learning_rate": 1.5650904845300643e-05, "loss": 0.7643, "step": 2681 }, { "epoch": 0.07830428308662521, "grad_norm": 0.7242842382295103, "learning_rate": 1.5656742556917692e-05, "loss": 0.6528, "step": 2682 }, { "epoch": 0.07833347931447257, "grad_norm": 0.7072714497973109, "learning_rate": 1.5662580268534733e-05, "loss": 0.6895, "step": 2683 }, { "epoch": 0.07836267554231993, "grad_norm": 0.8314018903549917, "learning_rate": 1.5668417980151782e-05, "loss": 0.7875, "step": 2684 }, { "epoch": 0.07839187177016729, "grad_norm": 0.7548517120213113, "learning_rate": 1.5674255691768827e-05, "loss": 0.6965, "step": 2685 }, { "epoch": 0.07842106799801465, "grad_norm": 0.8083416806427351, "learning_rate": 1.5680093403385875e-05, "loss": 0.7877, "step": 2686 }, { "epoch": 0.07845026422586202, "grad_norm": 0.8477137927107584, "learning_rate": 1.568593111500292e-05, "loss": 0.8382, "step": 2687 }, { "epoch": 0.07847946045370938, "grad_norm": 0.7672517378215933, "learning_rate": 1.5691768826619965e-05, "loss": 0.6326, "step": 2688 }, { "epoch": 0.07850865668155674, "grad_norm": 0.7821993133652899, "learning_rate": 1.569760653823701e-05, "loss": 0.7465, "step": 2689 }, { "epoch": 0.0785378529094041, "grad_norm": 0.8131070806363617, "learning_rate": 1.570344424985406e-05, "loss": 0.7934, "step": 2690 }, { "epoch": 0.07856704913725147, "grad_norm": 0.6877130697453838, "learning_rate": 1.5709281961471107e-05, "loss": 0.5874, "step": 2691 }, { "epoch": 0.07859624536509884, "grad_norm": 0.7454103123845365, "learning_rate": 1.5715119673088152e-05, "loss": 0.7068, "step": 2692 }, { "epoch": 0.0786254415929462, "grad_norm": 0.7770670467747374, "learning_rate": 1.5720957384705197e-05, "loss": 0.7927, "step": 2693 }, { "epoch": 0.07865463782079356, "grad_norm": 0.767325810749074, "learning_rate": 1.5726795096322242e-05, "loss": 0.7644, "step": 2694 }, { "epoch": 0.07868383404864092, "grad_norm": 0.7684302609945054, "learning_rate": 1.573263280793929e-05, "loss": 0.6963, "step": 2695 }, { "epoch": 0.07871303027648828, "grad_norm": 0.8220692845301869, "learning_rate": 1.5738470519556335e-05, "loss": 0.739, "step": 2696 }, { "epoch": 0.07874222650433564, "grad_norm": 0.7553967195322436, "learning_rate": 1.5744308231173383e-05, "loss": 0.657, "step": 2697 }, { "epoch": 0.078771422732183, "grad_norm": 0.7324256664986636, "learning_rate": 1.575014594279043e-05, "loss": 0.7143, "step": 2698 }, { "epoch": 0.07880061896003036, "grad_norm": 0.7137402882792291, "learning_rate": 1.5755983654407473e-05, "loss": 0.6744, "step": 2699 }, { "epoch": 0.07882981518787772, "grad_norm": 0.7579266609983469, "learning_rate": 1.576182136602452e-05, "loss": 0.6514, "step": 2700 }, { "epoch": 0.07885901141572509, "grad_norm": 0.808698525490525, "learning_rate": 1.5767659077641567e-05, "loss": 0.6738, "step": 2701 }, { "epoch": 0.07888820764357245, "grad_norm": 0.7645434909874078, "learning_rate": 1.5773496789258612e-05, "loss": 0.7355, "step": 2702 }, { "epoch": 0.07891740387141981, "grad_norm": 0.7812499296718199, "learning_rate": 1.5779334500875657e-05, "loss": 0.7028, "step": 2703 }, { "epoch": 0.07894660009926717, "grad_norm": 0.8717496388173844, "learning_rate": 1.5785172212492702e-05, "loss": 0.741, "step": 2704 }, { "epoch": 0.07897579632711453, "grad_norm": 0.7466717742612023, "learning_rate": 1.579100992410975e-05, "loss": 0.6454, "step": 2705 }, { "epoch": 0.07900499255496189, "grad_norm": 0.8205438627447166, "learning_rate": 1.57968476357268e-05, "loss": 0.6679, "step": 2706 }, { "epoch": 0.07903418878280927, "grad_norm": 0.6850741828004488, "learning_rate": 1.5802685347343844e-05, "loss": 0.606, "step": 2707 }, { "epoch": 0.07906338501065663, "grad_norm": 0.7430137616847718, "learning_rate": 1.580852305896089e-05, "loss": 0.6608, "step": 2708 }, { "epoch": 0.07909258123850399, "grad_norm": 0.709765106940524, "learning_rate": 1.5814360770577933e-05, "loss": 0.6444, "step": 2709 }, { "epoch": 0.07912177746635135, "grad_norm": 0.6781263932803945, "learning_rate": 1.5820198482194982e-05, "loss": 0.6111, "step": 2710 }, { "epoch": 0.07915097369419871, "grad_norm": 0.7173930822670831, "learning_rate": 1.5826036193812027e-05, "loss": 0.6512, "step": 2711 }, { "epoch": 0.07918016992204607, "grad_norm": 0.7463946259651927, "learning_rate": 1.5831873905429075e-05, "loss": 0.6891, "step": 2712 }, { "epoch": 0.07920936614989343, "grad_norm": 0.7276920489222178, "learning_rate": 1.583771161704612e-05, "loss": 0.7019, "step": 2713 }, { "epoch": 0.0792385623777408, "grad_norm": 0.8424543535029542, "learning_rate": 1.5843549328663165e-05, "loss": 0.8117, "step": 2714 }, { "epoch": 0.07926775860558816, "grad_norm": 0.7306389552781235, "learning_rate": 1.584938704028021e-05, "loss": 0.6895, "step": 2715 }, { "epoch": 0.07929695483343552, "grad_norm": 0.7717268076019649, "learning_rate": 1.585522475189726e-05, "loss": 0.7174, "step": 2716 }, { "epoch": 0.07932615106128288, "grad_norm": 0.7448764445799334, "learning_rate": 1.5861062463514304e-05, "loss": 0.7255, "step": 2717 }, { "epoch": 0.07935534728913024, "grad_norm": 0.8130168997591783, "learning_rate": 1.586690017513135e-05, "loss": 0.6606, "step": 2718 }, { "epoch": 0.0793845435169776, "grad_norm": 0.910978219183505, "learning_rate": 1.5872737886748394e-05, "loss": 0.7531, "step": 2719 }, { "epoch": 0.07941373974482496, "grad_norm": 0.8903404121541763, "learning_rate": 1.5878575598365442e-05, "loss": 0.7958, "step": 2720 }, { "epoch": 0.07944293597267232, "grad_norm": 0.7803448426697795, "learning_rate": 1.5884413309982487e-05, "loss": 0.6884, "step": 2721 }, { "epoch": 0.0794721322005197, "grad_norm": 0.7561683304658575, "learning_rate": 1.5890251021599535e-05, "loss": 0.7385, "step": 2722 }, { "epoch": 0.07950132842836706, "grad_norm": 0.8016104008979492, "learning_rate": 1.589608873321658e-05, "loss": 0.7793, "step": 2723 }, { "epoch": 0.07953052465621442, "grad_norm": 0.7390748915080669, "learning_rate": 1.5901926444833625e-05, "loss": 0.676, "step": 2724 }, { "epoch": 0.07955972088406178, "grad_norm": 0.8109061853108099, "learning_rate": 1.5907764156450674e-05, "loss": 0.7723, "step": 2725 }, { "epoch": 0.07958891711190914, "grad_norm": 0.8515669821277345, "learning_rate": 1.591360186806772e-05, "loss": 0.6869, "step": 2726 }, { "epoch": 0.0796181133397565, "grad_norm": 0.7392835824984318, "learning_rate": 1.5919439579684767e-05, "loss": 0.7258, "step": 2727 }, { "epoch": 0.07964730956760387, "grad_norm": 0.9344989562642153, "learning_rate": 1.5925277291301812e-05, "loss": 0.8709, "step": 2728 }, { "epoch": 0.07967650579545123, "grad_norm": 0.8866286844057645, "learning_rate": 1.5931115002918857e-05, "loss": 0.7899, "step": 2729 }, { "epoch": 0.07970570202329859, "grad_norm": 0.8798801926303615, "learning_rate": 1.5936952714535902e-05, "loss": 0.775, "step": 2730 }, { "epoch": 0.07973489825114595, "grad_norm": 0.6904675890844565, "learning_rate": 1.594279042615295e-05, "loss": 0.5718, "step": 2731 }, { "epoch": 0.07976409447899331, "grad_norm": 0.6712993460501848, "learning_rate": 1.5948628137769995e-05, "loss": 0.5592, "step": 2732 }, { "epoch": 0.07979329070684067, "grad_norm": 0.7417628328207186, "learning_rate": 1.595446584938704e-05, "loss": 0.666, "step": 2733 }, { "epoch": 0.07982248693468803, "grad_norm": 0.7900373839525885, "learning_rate": 1.5960303561004085e-05, "loss": 0.7063, "step": 2734 }, { "epoch": 0.0798516831625354, "grad_norm": 0.7444008971762978, "learning_rate": 1.5966141272621134e-05, "loss": 0.6885, "step": 2735 }, { "epoch": 0.07988087939038276, "grad_norm": 0.7198280356063614, "learning_rate": 1.597197898423818e-05, "loss": 0.6885, "step": 2736 }, { "epoch": 0.07991007561823013, "grad_norm": 0.89191835931351, "learning_rate": 1.5977816695855227e-05, "loss": 0.7596, "step": 2737 }, { "epoch": 0.07993927184607749, "grad_norm": 0.81747191438496, "learning_rate": 1.5983654407472272e-05, "loss": 0.7292, "step": 2738 }, { "epoch": 0.07996846807392485, "grad_norm": 0.9606444340673708, "learning_rate": 1.5989492119089317e-05, "loss": 0.6924, "step": 2739 }, { "epoch": 0.07999766430177221, "grad_norm": 0.784398611543293, "learning_rate": 1.5995329830706365e-05, "loss": 0.7646, "step": 2740 }, { "epoch": 0.08002686052961958, "grad_norm": 0.8421045723454953, "learning_rate": 1.600116754232341e-05, "loss": 0.6888, "step": 2741 }, { "epoch": 0.08005605675746694, "grad_norm": 0.8074491266460725, "learning_rate": 1.600700525394046e-05, "loss": 0.804, "step": 2742 }, { "epoch": 0.0800852529853143, "grad_norm": 0.8162626002887556, "learning_rate": 1.6012842965557504e-05, "loss": 0.7194, "step": 2743 }, { "epoch": 0.08011444921316166, "grad_norm": 0.7828146149756187, "learning_rate": 1.601868067717455e-05, "loss": 0.7216, "step": 2744 }, { "epoch": 0.08014364544100902, "grad_norm": 0.8722077826986571, "learning_rate": 1.6024518388791594e-05, "loss": 0.7432, "step": 2745 }, { "epoch": 0.08017284166885638, "grad_norm": 0.8324045292224631, "learning_rate": 1.6030356100408642e-05, "loss": 0.7553, "step": 2746 }, { "epoch": 0.08020203789670374, "grad_norm": 0.741052962491158, "learning_rate": 1.6036193812025687e-05, "loss": 0.6948, "step": 2747 }, { "epoch": 0.0802312341245511, "grad_norm": 0.7610046137508271, "learning_rate": 1.6042031523642735e-05, "loss": 0.713, "step": 2748 }, { "epoch": 0.08026043035239847, "grad_norm": 0.7362942688430073, "learning_rate": 1.604786923525978e-05, "loss": 0.6703, "step": 2749 }, { "epoch": 0.08028962658024583, "grad_norm": 0.768532385725354, "learning_rate": 1.6053706946876825e-05, "loss": 0.7351, "step": 2750 }, { "epoch": 0.08031882280809319, "grad_norm": 0.7898798791372672, "learning_rate": 1.605954465849387e-05, "loss": 0.7437, "step": 2751 }, { "epoch": 0.08034801903594056, "grad_norm": 0.7409100831019912, "learning_rate": 1.606538237011092e-05, "loss": 0.7106, "step": 2752 }, { "epoch": 0.08037721526378792, "grad_norm": 0.7196589439422019, "learning_rate": 1.6071220081727964e-05, "loss": 0.6405, "step": 2753 }, { "epoch": 0.08040641149163529, "grad_norm": 0.7692541567670403, "learning_rate": 1.607705779334501e-05, "loss": 0.7066, "step": 2754 }, { "epoch": 0.08043560771948265, "grad_norm": 0.7182378939387594, "learning_rate": 1.6082895504962057e-05, "loss": 0.6507, "step": 2755 }, { "epoch": 0.08046480394733001, "grad_norm": 0.7703691831823393, "learning_rate": 1.6088733216579102e-05, "loss": 0.7332, "step": 2756 }, { "epoch": 0.08049400017517737, "grad_norm": 0.7788478195720683, "learning_rate": 1.609457092819615e-05, "loss": 0.696, "step": 2757 }, { "epoch": 0.08052319640302473, "grad_norm": 0.8136255304398666, "learning_rate": 1.6100408639813195e-05, "loss": 0.7005, "step": 2758 }, { "epoch": 0.08055239263087209, "grad_norm": 0.7361707336942657, "learning_rate": 1.610624635143024e-05, "loss": 0.6256, "step": 2759 }, { "epoch": 0.08058158885871945, "grad_norm": 0.7720068183141364, "learning_rate": 1.6112084063047285e-05, "loss": 0.7806, "step": 2760 }, { "epoch": 0.08061078508656681, "grad_norm": 0.7814557371535167, "learning_rate": 1.6117921774664334e-05, "loss": 0.6738, "step": 2761 }, { "epoch": 0.08063998131441417, "grad_norm": 0.7393600592359605, "learning_rate": 1.612375948628138e-05, "loss": 0.6724, "step": 2762 }, { "epoch": 0.08066917754226154, "grad_norm": 0.9898050532966892, "learning_rate": 1.6129597197898427e-05, "loss": 0.7341, "step": 2763 }, { "epoch": 0.0806983737701089, "grad_norm": 0.7761149982316312, "learning_rate": 1.6135434909515472e-05, "loss": 0.7085, "step": 2764 }, { "epoch": 0.08072756999795626, "grad_norm": 0.7403923620325048, "learning_rate": 1.6141272621132517e-05, "loss": 0.6702, "step": 2765 }, { "epoch": 0.08075676622580362, "grad_norm": 0.7763068370464435, "learning_rate": 1.6147110332749562e-05, "loss": 0.7845, "step": 2766 }, { "epoch": 0.080785962453651, "grad_norm": 0.6665913609950794, "learning_rate": 1.615294804436661e-05, "loss": 0.5779, "step": 2767 }, { "epoch": 0.08081515868149836, "grad_norm": 0.7394073521091341, "learning_rate": 1.6158785755983655e-05, "loss": 0.7116, "step": 2768 }, { "epoch": 0.08084435490934572, "grad_norm": 0.8035034179171472, "learning_rate": 1.61646234676007e-05, "loss": 0.6824, "step": 2769 }, { "epoch": 0.08087355113719308, "grad_norm": 0.7707460836871493, "learning_rate": 1.617046117921775e-05, "loss": 0.6599, "step": 2770 }, { "epoch": 0.08090274736504044, "grad_norm": 0.789924789404472, "learning_rate": 1.6176298890834794e-05, "loss": 0.7208, "step": 2771 }, { "epoch": 0.0809319435928878, "grad_norm": 0.8020971345483491, "learning_rate": 1.6182136602451842e-05, "loss": 0.7052, "step": 2772 }, { "epoch": 0.08096113982073516, "grad_norm": 0.7329311998430715, "learning_rate": 1.6187974314068887e-05, "loss": 0.6978, "step": 2773 }, { "epoch": 0.08099033604858252, "grad_norm": 0.8663028716806868, "learning_rate": 1.6193812025685932e-05, "loss": 0.8229, "step": 2774 }, { "epoch": 0.08101953227642988, "grad_norm": 0.779427909312745, "learning_rate": 1.6199649737302977e-05, "loss": 0.6978, "step": 2775 }, { "epoch": 0.08104872850427725, "grad_norm": 0.8031976216511283, "learning_rate": 1.6205487448920025e-05, "loss": 0.7671, "step": 2776 }, { "epoch": 0.0810779247321246, "grad_norm": 0.7882166118125831, "learning_rate": 1.621132516053707e-05, "loss": 0.7592, "step": 2777 }, { "epoch": 0.08110712095997197, "grad_norm": 0.9185588723661527, "learning_rate": 1.621716287215412e-05, "loss": 0.7695, "step": 2778 }, { "epoch": 0.08113631718781933, "grad_norm": 0.7762623491434962, "learning_rate": 1.6223000583771164e-05, "loss": 0.7202, "step": 2779 }, { "epoch": 0.08116551341566669, "grad_norm": 0.8854264523316988, "learning_rate": 1.622883829538821e-05, "loss": 0.7085, "step": 2780 }, { "epoch": 0.08119470964351405, "grad_norm": 0.7351002311716803, "learning_rate": 1.6234676007005254e-05, "loss": 0.7047, "step": 2781 }, { "epoch": 0.08122390587136143, "grad_norm": 0.8318957296333948, "learning_rate": 1.6240513718622302e-05, "loss": 0.6986, "step": 2782 }, { "epoch": 0.08125310209920879, "grad_norm": 0.9199412828124579, "learning_rate": 1.6246351430239347e-05, "loss": 0.7711, "step": 2783 }, { "epoch": 0.08128229832705615, "grad_norm": 0.7229195113335299, "learning_rate": 1.6252189141856392e-05, "loss": 0.6682, "step": 2784 }, { "epoch": 0.08131149455490351, "grad_norm": 0.7916880829545513, "learning_rate": 1.6258026853473437e-05, "loss": 0.6885, "step": 2785 }, { "epoch": 0.08134069078275087, "grad_norm": 0.754471478933065, "learning_rate": 1.6263864565090486e-05, "loss": 0.6131, "step": 2786 }, { "epoch": 0.08136988701059823, "grad_norm": 0.8516386725663756, "learning_rate": 1.6269702276707534e-05, "loss": 0.7445, "step": 2787 }, { "epoch": 0.0813990832384456, "grad_norm": 0.7868938060373339, "learning_rate": 1.627553998832458e-05, "loss": 0.7122, "step": 2788 }, { "epoch": 0.08142827946629295, "grad_norm": 0.769496219115148, "learning_rate": 1.6281377699941624e-05, "loss": 0.7829, "step": 2789 }, { "epoch": 0.08145747569414032, "grad_norm": 1.0103262371057475, "learning_rate": 1.628721541155867e-05, "loss": 0.7684, "step": 2790 }, { "epoch": 0.08148667192198768, "grad_norm": 0.7406591111832413, "learning_rate": 1.6293053123175717e-05, "loss": 0.6915, "step": 2791 }, { "epoch": 0.08151586814983504, "grad_norm": 0.7974864531576682, "learning_rate": 1.6298890834792762e-05, "loss": 0.7553, "step": 2792 }, { "epoch": 0.0815450643776824, "grad_norm": 0.7932992408452777, "learning_rate": 1.630472854640981e-05, "loss": 0.6876, "step": 2793 }, { "epoch": 0.08157426060552976, "grad_norm": 0.8763929136223507, "learning_rate": 1.6310566258026856e-05, "loss": 0.6774, "step": 2794 }, { "epoch": 0.08160345683337712, "grad_norm": 1.1619885576451001, "learning_rate": 1.63164039696439e-05, "loss": 0.6571, "step": 2795 }, { "epoch": 0.08163265306122448, "grad_norm": 0.7987784213152587, "learning_rate": 1.6322241681260946e-05, "loss": 0.7265, "step": 2796 }, { "epoch": 0.08166184928907186, "grad_norm": 0.7720996473522005, "learning_rate": 1.6328079392877994e-05, "loss": 0.6387, "step": 2797 }, { "epoch": 0.08169104551691922, "grad_norm": 0.9382845889533268, "learning_rate": 1.633391710449504e-05, "loss": 0.7846, "step": 2798 }, { "epoch": 0.08172024174476658, "grad_norm": 0.7374331004349634, "learning_rate": 1.6339754816112087e-05, "loss": 0.6627, "step": 2799 }, { "epoch": 0.08174943797261394, "grad_norm": 0.7213005476208801, "learning_rate": 1.634559252772913e-05, "loss": 0.715, "step": 2800 }, { "epoch": 0.0817786342004613, "grad_norm": 0.7564659375408664, "learning_rate": 1.6351430239346177e-05, "loss": 0.7362, "step": 2801 }, { "epoch": 0.08180783042830866, "grad_norm": 0.7236012722799452, "learning_rate": 1.6357267950963226e-05, "loss": 0.6906, "step": 2802 }, { "epoch": 0.08183702665615603, "grad_norm": 0.7519249548887911, "learning_rate": 1.636310566258027e-05, "loss": 0.615, "step": 2803 }, { "epoch": 0.08186622288400339, "grad_norm": 0.7860731337084365, "learning_rate": 1.6368943374197316e-05, "loss": 0.6951, "step": 2804 }, { "epoch": 0.08189541911185075, "grad_norm": 0.9130007782214681, "learning_rate": 1.637478108581436e-05, "loss": 0.6736, "step": 2805 }, { "epoch": 0.08192461533969811, "grad_norm": 0.9016953106408258, "learning_rate": 1.638061879743141e-05, "loss": 0.7999, "step": 2806 }, { "epoch": 0.08195381156754547, "grad_norm": 0.760209614580512, "learning_rate": 1.6386456509048454e-05, "loss": 0.7381, "step": 2807 }, { "epoch": 0.08198300779539283, "grad_norm": 0.7739426398270114, "learning_rate": 1.6392294220665502e-05, "loss": 0.6118, "step": 2808 }, { "epoch": 0.08201220402324019, "grad_norm": 0.7512945331989748, "learning_rate": 1.6398131932282547e-05, "loss": 0.7291, "step": 2809 }, { "epoch": 0.08204140025108755, "grad_norm": 1.2633604059919021, "learning_rate": 1.6403969643899592e-05, "loss": 0.6644, "step": 2810 }, { "epoch": 0.08207059647893492, "grad_norm": 0.7639662488605221, "learning_rate": 1.6409807355516637e-05, "loss": 0.6878, "step": 2811 }, { "epoch": 0.08209979270678229, "grad_norm": 0.8592344694099076, "learning_rate": 1.6415645067133686e-05, "loss": 0.7411, "step": 2812 }, { "epoch": 0.08212898893462965, "grad_norm": 0.7847625602871833, "learning_rate": 1.642148277875073e-05, "loss": 0.6569, "step": 2813 }, { "epoch": 0.08215818516247701, "grad_norm": 0.7844003465764617, "learning_rate": 1.642732049036778e-05, "loss": 0.7086, "step": 2814 }, { "epoch": 0.08218738139032437, "grad_norm": 0.7661523119639797, "learning_rate": 1.6433158201984824e-05, "loss": 0.7483, "step": 2815 }, { "epoch": 0.08221657761817174, "grad_norm": 1.2545265762321, "learning_rate": 1.643899591360187e-05, "loss": 0.5961, "step": 2816 }, { "epoch": 0.0822457738460191, "grad_norm": 0.7224771891483207, "learning_rate": 1.6444833625218914e-05, "loss": 0.663, "step": 2817 }, { "epoch": 0.08227497007386646, "grad_norm": 0.8834425185592264, "learning_rate": 1.6450671336835962e-05, "loss": 0.7453, "step": 2818 }, { "epoch": 0.08230416630171382, "grad_norm": 0.8266514792789601, "learning_rate": 1.6456509048453007e-05, "loss": 0.8159, "step": 2819 }, { "epoch": 0.08233336252956118, "grad_norm": 0.7413785476162525, "learning_rate": 1.6462346760070052e-05, "loss": 0.6421, "step": 2820 }, { "epoch": 0.08236255875740854, "grad_norm": 0.6944969736441845, "learning_rate": 1.64681844716871e-05, "loss": 0.5945, "step": 2821 }, { "epoch": 0.0823917549852559, "grad_norm": 0.7379496192336689, "learning_rate": 1.6474022183304146e-05, "loss": 0.615, "step": 2822 }, { "epoch": 0.08242095121310326, "grad_norm": 0.751124539025192, "learning_rate": 1.6479859894921194e-05, "loss": 0.7276, "step": 2823 }, { "epoch": 0.08245014744095062, "grad_norm": 0.7560131713784347, "learning_rate": 1.648569760653824e-05, "loss": 0.7509, "step": 2824 }, { "epoch": 0.08247934366879799, "grad_norm": 0.7328119493612766, "learning_rate": 1.6491535318155284e-05, "loss": 0.6214, "step": 2825 }, { "epoch": 0.08250853989664535, "grad_norm": 0.9154862339217509, "learning_rate": 1.649737302977233e-05, "loss": 0.7036, "step": 2826 }, { "epoch": 0.08253773612449272, "grad_norm": 0.875871258901832, "learning_rate": 1.6503210741389377e-05, "loss": 0.7818, "step": 2827 }, { "epoch": 0.08256693235234008, "grad_norm": 0.8570174416390591, "learning_rate": 1.6509048453006422e-05, "loss": 0.7331, "step": 2828 }, { "epoch": 0.08259612858018744, "grad_norm": 0.7562922439732138, "learning_rate": 1.651488616462347e-05, "loss": 0.7159, "step": 2829 }, { "epoch": 0.0826253248080348, "grad_norm": 0.6859266137971913, "learning_rate": 1.6520723876240516e-05, "loss": 0.6513, "step": 2830 }, { "epoch": 0.08265452103588217, "grad_norm": 0.8435902363924767, "learning_rate": 1.652656158785756e-05, "loss": 0.7145, "step": 2831 }, { "epoch": 0.08268371726372953, "grad_norm": 0.711153994300109, "learning_rate": 1.6532399299474606e-05, "loss": 0.6176, "step": 2832 }, { "epoch": 0.08271291349157689, "grad_norm": 0.7553551139371957, "learning_rate": 1.6538237011091654e-05, "loss": 0.6902, "step": 2833 }, { "epoch": 0.08274210971942425, "grad_norm": 0.7348675846646328, "learning_rate": 1.65440747227087e-05, "loss": 0.7064, "step": 2834 }, { "epoch": 0.08277130594727161, "grad_norm": 0.8437405410572015, "learning_rate": 1.6549912434325744e-05, "loss": 0.7776, "step": 2835 }, { "epoch": 0.08280050217511897, "grad_norm": 0.8193610040793873, "learning_rate": 1.6555750145942792e-05, "loss": 0.6755, "step": 2836 }, { "epoch": 0.08282969840296633, "grad_norm": 0.8457100358938945, "learning_rate": 1.6561587857559837e-05, "loss": 0.7837, "step": 2837 }, { "epoch": 0.0828588946308137, "grad_norm": 0.7574613829847707, "learning_rate": 1.6567425569176886e-05, "loss": 0.6546, "step": 2838 }, { "epoch": 0.08288809085866106, "grad_norm": 0.7080902256594379, "learning_rate": 1.657326328079393e-05, "loss": 0.6131, "step": 2839 }, { "epoch": 0.08291728708650842, "grad_norm": 0.7395568038719479, "learning_rate": 1.6579100992410976e-05, "loss": 0.6146, "step": 2840 }, { "epoch": 0.08294648331435578, "grad_norm": 0.74986681415718, "learning_rate": 1.658493870402802e-05, "loss": 0.6376, "step": 2841 }, { "epoch": 0.08297567954220315, "grad_norm": 0.7829788352065535, "learning_rate": 1.659077641564507e-05, "loss": 0.7681, "step": 2842 }, { "epoch": 0.08300487577005052, "grad_norm": 0.7235407255086715, "learning_rate": 1.6596614127262114e-05, "loss": 0.6351, "step": 2843 }, { "epoch": 0.08303407199789788, "grad_norm": 0.7640619750906595, "learning_rate": 1.6602451838879162e-05, "loss": 0.6957, "step": 2844 }, { "epoch": 0.08306326822574524, "grad_norm": 0.76026388018747, "learning_rate": 1.6608289550496207e-05, "loss": 0.6825, "step": 2845 }, { "epoch": 0.0830924644535926, "grad_norm": 0.700153755440368, "learning_rate": 1.6614127262113252e-05, "loss": 0.6353, "step": 2846 }, { "epoch": 0.08312166068143996, "grad_norm": 0.8117690015950877, "learning_rate": 1.6619964973730297e-05, "loss": 0.7158, "step": 2847 }, { "epoch": 0.08315085690928732, "grad_norm": 0.7863557519113493, "learning_rate": 1.6625802685347346e-05, "loss": 0.7076, "step": 2848 }, { "epoch": 0.08318005313713468, "grad_norm": 0.7962403361657165, "learning_rate": 1.663164039696439e-05, "loss": 0.8168, "step": 2849 }, { "epoch": 0.08320924936498204, "grad_norm": 0.8272852791931465, "learning_rate": 1.6637478108581436e-05, "loss": 0.7365, "step": 2850 }, { "epoch": 0.0832384455928294, "grad_norm": 0.7067341743207595, "learning_rate": 1.6643315820198484e-05, "loss": 0.6309, "step": 2851 }, { "epoch": 0.08326764182067677, "grad_norm": 0.689371888555044, "learning_rate": 1.664915353181553e-05, "loss": 0.6225, "step": 2852 }, { "epoch": 0.08329683804852413, "grad_norm": 1.5246755719906773, "learning_rate": 1.6654991243432577e-05, "loss": 0.7446, "step": 2853 }, { "epoch": 0.08332603427637149, "grad_norm": 0.8220244820779804, "learning_rate": 1.6660828955049622e-05, "loss": 0.8136, "step": 2854 }, { "epoch": 0.08335523050421885, "grad_norm": 0.8039833844738402, "learning_rate": 1.6666666666666667e-05, "loss": 0.7046, "step": 2855 }, { "epoch": 0.08338442673206621, "grad_norm": 0.8350712990998741, "learning_rate": 1.6672504378283712e-05, "loss": 0.7471, "step": 2856 }, { "epoch": 0.08341362295991359, "grad_norm": 0.8346109217479087, "learning_rate": 1.667834208990076e-05, "loss": 0.8081, "step": 2857 }, { "epoch": 0.08344281918776095, "grad_norm": 0.680911989969724, "learning_rate": 1.6684179801517806e-05, "loss": 0.6098, "step": 2858 }, { "epoch": 0.08347201541560831, "grad_norm": 0.7361395323525256, "learning_rate": 1.6690017513134854e-05, "loss": 0.6553, "step": 2859 }, { "epoch": 0.08350121164345567, "grad_norm": 0.835398538140608, "learning_rate": 1.66958552247519e-05, "loss": 0.7474, "step": 2860 }, { "epoch": 0.08353040787130303, "grad_norm": 1.0456792257139556, "learning_rate": 1.6701692936368944e-05, "loss": 0.6489, "step": 2861 }, { "epoch": 0.08355960409915039, "grad_norm": 0.7198939964293051, "learning_rate": 1.670753064798599e-05, "loss": 0.658, "step": 2862 }, { "epoch": 0.08358880032699775, "grad_norm": 1.358599610854948, "learning_rate": 1.6713368359603038e-05, "loss": 0.6048, "step": 2863 }, { "epoch": 0.08361799655484511, "grad_norm": 0.7145998147211093, "learning_rate": 1.6719206071220083e-05, "loss": 0.6466, "step": 2864 }, { "epoch": 0.08364719278269248, "grad_norm": 0.8263767028310511, "learning_rate": 1.672504378283713e-05, "loss": 0.6949, "step": 2865 }, { "epoch": 0.08367638901053984, "grad_norm": 0.7414729379674478, "learning_rate": 1.6730881494454176e-05, "loss": 0.6613, "step": 2866 }, { "epoch": 0.0837055852383872, "grad_norm": 0.7522289695965436, "learning_rate": 1.673671920607122e-05, "loss": 0.6929, "step": 2867 }, { "epoch": 0.08373478146623456, "grad_norm": 0.7881766157173216, "learning_rate": 1.674255691768827e-05, "loss": 0.7295, "step": 2868 }, { "epoch": 0.08376397769408192, "grad_norm": 0.7625930650087881, "learning_rate": 1.6748394629305314e-05, "loss": 0.6596, "step": 2869 }, { "epoch": 0.08379317392192928, "grad_norm": 1.0045993643579003, "learning_rate": 1.675423234092236e-05, "loss": 0.7269, "step": 2870 }, { "epoch": 0.08382237014977664, "grad_norm": 0.7590539250751172, "learning_rate": 1.6760070052539404e-05, "loss": 0.7106, "step": 2871 }, { "epoch": 0.08385156637762402, "grad_norm": 0.7816216655811801, "learning_rate": 1.6765907764156453e-05, "loss": 0.6764, "step": 2872 }, { "epoch": 0.08388076260547138, "grad_norm": 0.7754075810278193, "learning_rate": 1.6771745475773498e-05, "loss": 0.6693, "step": 2873 }, { "epoch": 0.08390995883331874, "grad_norm": 0.6921686592352492, "learning_rate": 1.6777583187390546e-05, "loss": 0.6432, "step": 2874 }, { "epoch": 0.0839391550611661, "grad_norm": 0.7418804945713849, "learning_rate": 1.678342089900759e-05, "loss": 0.6962, "step": 2875 }, { "epoch": 0.08396835128901346, "grad_norm": 0.759303375217204, "learning_rate": 1.6789258610624636e-05, "loss": 0.6962, "step": 2876 }, { "epoch": 0.08399754751686082, "grad_norm": 0.772694190024776, "learning_rate": 1.679509632224168e-05, "loss": 0.6744, "step": 2877 }, { "epoch": 0.08402674374470818, "grad_norm": 0.7996123788668374, "learning_rate": 1.680093403385873e-05, "loss": 0.6542, "step": 2878 }, { "epoch": 0.08405593997255555, "grad_norm": 0.6993753095863044, "learning_rate": 1.6806771745475774e-05, "loss": 0.6273, "step": 2879 }, { "epoch": 0.08408513620040291, "grad_norm": 0.7848736136883012, "learning_rate": 1.6812609457092823e-05, "loss": 0.6859, "step": 2880 }, { "epoch": 0.08411433242825027, "grad_norm": 1.3486155226194458, "learning_rate": 1.6818447168709868e-05, "loss": 0.7691, "step": 2881 }, { "epoch": 0.08414352865609763, "grad_norm": 0.7280110744454654, "learning_rate": 1.6824284880326913e-05, "loss": 0.6654, "step": 2882 }, { "epoch": 0.08417272488394499, "grad_norm": 0.9849085640592979, "learning_rate": 1.683012259194396e-05, "loss": 0.7142, "step": 2883 }, { "epoch": 0.08420192111179235, "grad_norm": 0.7493626523522116, "learning_rate": 1.6835960303561006e-05, "loss": 0.5892, "step": 2884 }, { "epoch": 0.08423111733963971, "grad_norm": 0.7965434015578877, "learning_rate": 1.684179801517805e-05, "loss": 0.7495, "step": 2885 }, { "epoch": 0.08426031356748707, "grad_norm": 0.8352458623702506, "learning_rate": 1.6847635726795096e-05, "loss": 0.6671, "step": 2886 }, { "epoch": 0.08428950979533445, "grad_norm": 0.7100415223115148, "learning_rate": 1.6853473438412144e-05, "loss": 0.685, "step": 2887 }, { "epoch": 0.08431870602318181, "grad_norm": 0.9451239354081769, "learning_rate": 1.685931115002919e-05, "loss": 0.8295, "step": 2888 }, { "epoch": 0.08434790225102917, "grad_norm": 0.7565548601831149, "learning_rate": 1.6865148861646238e-05, "loss": 0.7159, "step": 2889 }, { "epoch": 0.08437709847887653, "grad_norm": 0.8191452266674853, "learning_rate": 1.6870986573263283e-05, "loss": 0.7776, "step": 2890 }, { "epoch": 0.0844062947067239, "grad_norm": 0.7445458572364068, "learning_rate": 1.6876824284880328e-05, "loss": 0.7395, "step": 2891 }, { "epoch": 0.08443549093457126, "grad_norm": 0.7318730159064567, "learning_rate": 1.6882661996497373e-05, "loss": 0.6626, "step": 2892 }, { "epoch": 0.08446468716241862, "grad_norm": 0.7483566924436758, "learning_rate": 1.688849970811442e-05, "loss": 0.6692, "step": 2893 }, { "epoch": 0.08449388339026598, "grad_norm": 0.8761577419818879, "learning_rate": 1.6894337419731466e-05, "loss": 0.6882, "step": 2894 }, { "epoch": 0.08452307961811334, "grad_norm": 0.6921123557018821, "learning_rate": 1.6900175131348514e-05, "loss": 0.5782, "step": 2895 }, { "epoch": 0.0845522758459607, "grad_norm": 0.7716495705490208, "learning_rate": 1.690601284296556e-05, "loss": 0.6822, "step": 2896 }, { "epoch": 0.08458147207380806, "grad_norm": 1.0195143081957447, "learning_rate": 1.6911850554582604e-05, "loss": 0.7003, "step": 2897 }, { "epoch": 0.08461066830165542, "grad_norm": 0.8135444024287931, "learning_rate": 1.6917688266199653e-05, "loss": 0.8127, "step": 2898 }, { "epoch": 0.08463986452950278, "grad_norm": 0.6962797392731577, "learning_rate": 1.6923525977816698e-05, "loss": 0.6369, "step": 2899 }, { "epoch": 0.08466906075735015, "grad_norm": 0.8703450138547532, "learning_rate": 1.6929363689433743e-05, "loss": 0.7473, "step": 2900 }, { "epoch": 0.0846982569851975, "grad_norm": 0.7428989284176375, "learning_rate": 1.6935201401050788e-05, "loss": 0.7012, "step": 2901 }, { "epoch": 0.08472745321304488, "grad_norm": 0.7206668324654062, "learning_rate": 1.6941039112667836e-05, "loss": 0.6831, "step": 2902 }, { "epoch": 0.08475664944089224, "grad_norm": 0.8916867788609383, "learning_rate": 1.694687682428488e-05, "loss": 0.7678, "step": 2903 }, { "epoch": 0.0847858456687396, "grad_norm": 0.7082944500517974, "learning_rate": 1.695271453590193e-05, "loss": 0.6603, "step": 2904 }, { "epoch": 0.08481504189658697, "grad_norm": 0.7637749428989405, "learning_rate": 1.6958552247518974e-05, "loss": 0.7347, "step": 2905 }, { "epoch": 0.08484423812443433, "grad_norm": 0.8190275353314155, "learning_rate": 1.696438995913602e-05, "loss": 0.7505, "step": 2906 }, { "epoch": 0.08487343435228169, "grad_norm": 0.8835741185661246, "learning_rate": 1.6970227670753064e-05, "loss": 0.7857, "step": 2907 }, { "epoch": 0.08490263058012905, "grad_norm": 0.8414860792944557, "learning_rate": 1.6976065382370113e-05, "loss": 0.7797, "step": 2908 }, { "epoch": 0.08493182680797641, "grad_norm": 0.7917523781099387, "learning_rate": 1.6981903093987158e-05, "loss": 0.6689, "step": 2909 }, { "epoch": 0.08496102303582377, "grad_norm": 0.7842940441669807, "learning_rate": 1.6987740805604206e-05, "loss": 0.7151, "step": 2910 }, { "epoch": 0.08499021926367113, "grad_norm": 0.7543673930875685, "learning_rate": 1.699357851722125e-05, "loss": 0.6485, "step": 2911 }, { "epoch": 0.0850194154915185, "grad_norm": 0.7205151243804067, "learning_rate": 1.6999416228838296e-05, "loss": 0.6858, "step": 2912 }, { "epoch": 0.08504861171936585, "grad_norm": 0.7432461837036944, "learning_rate": 1.700525394045534e-05, "loss": 0.7356, "step": 2913 }, { "epoch": 0.08507780794721322, "grad_norm": 0.795671459905526, "learning_rate": 1.701109165207239e-05, "loss": 0.7286, "step": 2914 }, { "epoch": 0.08510700417506058, "grad_norm": 0.7024998600817209, "learning_rate": 1.7016929363689438e-05, "loss": 0.6304, "step": 2915 }, { "epoch": 0.08513620040290794, "grad_norm": 0.7532318334363131, "learning_rate": 1.7022767075306483e-05, "loss": 0.577, "step": 2916 }, { "epoch": 0.0851653966307553, "grad_norm": 0.880831921436906, "learning_rate": 1.7028604786923528e-05, "loss": 0.6488, "step": 2917 }, { "epoch": 0.08519459285860267, "grad_norm": 0.7003945259350246, "learning_rate": 1.7034442498540573e-05, "loss": 0.6089, "step": 2918 }, { "epoch": 0.08522378908645004, "grad_norm": 0.7179362541558972, "learning_rate": 1.704028021015762e-05, "loss": 0.67, "step": 2919 }, { "epoch": 0.0852529853142974, "grad_norm": 0.7677569290712001, "learning_rate": 1.7046117921774666e-05, "loss": 0.6494, "step": 2920 }, { "epoch": 0.08528218154214476, "grad_norm": 1.4117264660979891, "learning_rate": 1.705195563339171e-05, "loss": 0.7206, "step": 2921 }, { "epoch": 0.08531137776999212, "grad_norm": 0.7664622189129217, "learning_rate": 1.7057793345008756e-05, "loss": 0.634, "step": 2922 }, { "epoch": 0.08534057399783948, "grad_norm": 0.7195820201833018, "learning_rate": 1.7063631056625804e-05, "loss": 0.6786, "step": 2923 }, { "epoch": 0.08536977022568684, "grad_norm": 0.7082605955904078, "learning_rate": 1.706946876824285e-05, "loss": 0.5971, "step": 2924 }, { "epoch": 0.0853989664535342, "grad_norm": 0.9006218688304408, "learning_rate": 1.7075306479859898e-05, "loss": 0.6807, "step": 2925 }, { "epoch": 0.08542816268138156, "grad_norm": 0.7045959339705795, "learning_rate": 1.7081144191476943e-05, "loss": 0.6936, "step": 2926 }, { "epoch": 0.08545735890922893, "grad_norm": 0.7036157810994342, "learning_rate": 1.7086981903093988e-05, "loss": 0.6052, "step": 2927 }, { "epoch": 0.08548655513707629, "grad_norm": 0.8450480571227373, "learning_rate": 1.7092819614711033e-05, "loss": 0.7883, "step": 2928 }, { "epoch": 0.08551575136492365, "grad_norm": 0.8493978166617964, "learning_rate": 1.709865732632808e-05, "loss": 0.7885, "step": 2929 }, { "epoch": 0.08554494759277101, "grad_norm": 0.7340881898827738, "learning_rate": 1.710449503794513e-05, "loss": 0.686, "step": 2930 }, { "epoch": 0.08557414382061837, "grad_norm": 0.8118301215955493, "learning_rate": 1.7110332749562174e-05, "loss": 0.7614, "step": 2931 }, { "epoch": 0.08560334004846573, "grad_norm": 0.8136501771531954, "learning_rate": 1.711617046117922e-05, "loss": 0.6681, "step": 2932 }, { "epoch": 0.0856325362763131, "grad_norm": 0.8494671659754114, "learning_rate": 1.7122008172796264e-05, "loss": 0.7559, "step": 2933 }, { "epoch": 0.08566173250416047, "grad_norm": 0.7671536937245983, "learning_rate": 1.7127845884413313e-05, "loss": 0.6897, "step": 2934 }, { "epoch": 0.08569092873200783, "grad_norm": 0.7191217303087586, "learning_rate": 1.7133683596030358e-05, "loss": 0.6408, "step": 2935 }, { "epoch": 0.08572012495985519, "grad_norm": 0.6989718940127877, "learning_rate": 1.7139521307647403e-05, "loss": 0.6347, "step": 2936 }, { "epoch": 0.08574932118770255, "grad_norm": 0.7136400717743141, "learning_rate": 1.7145359019264448e-05, "loss": 0.5928, "step": 2937 }, { "epoch": 0.08577851741554991, "grad_norm": 0.7983667159265831, "learning_rate": 1.7151196730881496e-05, "loss": 0.7481, "step": 2938 }, { "epoch": 0.08580771364339727, "grad_norm": 0.7646016791672835, "learning_rate": 1.715703444249854e-05, "loss": 0.6909, "step": 2939 }, { "epoch": 0.08583690987124463, "grad_norm": 1.0648528480600994, "learning_rate": 1.716287215411559e-05, "loss": 0.6091, "step": 2940 }, { "epoch": 0.085866106099092, "grad_norm": 0.7146676145815176, "learning_rate": 1.7168709865732635e-05, "loss": 0.64, "step": 2941 }, { "epoch": 0.08589530232693936, "grad_norm": 0.7347864494134803, "learning_rate": 1.717454757734968e-05, "loss": 0.7449, "step": 2942 }, { "epoch": 0.08592449855478672, "grad_norm": 0.7709262938353991, "learning_rate": 1.7180385288966724e-05, "loss": 0.6853, "step": 2943 }, { "epoch": 0.08595369478263408, "grad_norm": 0.720373287527461, "learning_rate": 1.7186223000583773e-05, "loss": 0.6558, "step": 2944 }, { "epoch": 0.08598289101048144, "grad_norm": 0.7004054453739289, "learning_rate": 1.7192060712200818e-05, "loss": 0.6325, "step": 2945 }, { "epoch": 0.0860120872383288, "grad_norm": 1.031766075480485, "learning_rate": 1.7197898423817866e-05, "loss": 0.7229, "step": 2946 }, { "epoch": 0.08604128346617616, "grad_norm": 0.8302632329064721, "learning_rate": 1.720373613543491e-05, "loss": 0.7312, "step": 2947 }, { "epoch": 0.08607047969402354, "grad_norm": 0.7329407969815901, "learning_rate": 1.7209573847051956e-05, "loss": 0.7177, "step": 2948 }, { "epoch": 0.0860996759218709, "grad_norm": 0.8037420928719883, "learning_rate": 1.7215411558669005e-05, "loss": 0.7636, "step": 2949 }, { "epoch": 0.08612887214971826, "grad_norm": 0.7526689293898977, "learning_rate": 1.722124927028605e-05, "loss": 0.6888, "step": 2950 }, { "epoch": 0.08615806837756562, "grad_norm": 0.7297555498766194, "learning_rate": 1.7227086981903095e-05, "loss": 0.6422, "step": 2951 }, { "epoch": 0.08618726460541298, "grad_norm": 0.6910947463948077, "learning_rate": 1.723292469352014e-05, "loss": 0.6199, "step": 2952 }, { "epoch": 0.08621646083326034, "grad_norm": 0.7367723185709091, "learning_rate": 1.7238762405137188e-05, "loss": 0.6832, "step": 2953 }, { "epoch": 0.0862456570611077, "grad_norm": 0.7218498659607517, "learning_rate": 1.7244600116754233e-05, "loss": 0.6048, "step": 2954 }, { "epoch": 0.08627485328895507, "grad_norm": 0.8168126136803139, "learning_rate": 1.725043782837128e-05, "loss": 0.7631, "step": 2955 }, { "epoch": 0.08630404951680243, "grad_norm": 0.9005070839185428, "learning_rate": 1.7256275539988326e-05, "loss": 0.7882, "step": 2956 }, { "epoch": 0.08633324574464979, "grad_norm": 0.8130587550190526, "learning_rate": 1.726211325160537e-05, "loss": 0.755, "step": 2957 }, { "epoch": 0.08636244197249715, "grad_norm": 0.8979620562184167, "learning_rate": 1.7267950963222416e-05, "loss": 0.6532, "step": 2958 }, { "epoch": 0.08639163820034451, "grad_norm": 0.7733136079976709, "learning_rate": 1.7273788674839465e-05, "loss": 0.7953, "step": 2959 }, { "epoch": 0.08642083442819187, "grad_norm": 0.8733674383077358, "learning_rate": 1.727962638645651e-05, "loss": 0.763, "step": 2960 }, { "epoch": 0.08645003065603923, "grad_norm": 0.7378317641427027, "learning_rate": 1.7285464098073558e-05, "loss": 0.6889, "step": 2961 }, { "epoch": 0.0864792268838866, "grad_norm": 0.7492318776086774, "learning_rate": 1.7291301809690603e-05, "loss": 0.7671, "step": 2962 }, { "epoch": 0.08650842311173397, "grad_norm": 0.7716819064439886, "learning_rate": 1.7297139521307648e-05, "loss": 0.7289, "step": 2963 }, { "epoch": 0.08653761933958133, "grad_norm": 0.7495506203145957, "learning_rate": 1.7302977232924696e-05, "loss": 0.6586, "step": 2964 }, { "epoch": 0.08656681556742869, "grad_norm": 0.7032758842586887, "learning_rate": 1.730881494454174e-05, "loss": 0.6356, "step": 2965 }, { "epoch": 0.08659601179527605, "grad_norm": 0.723126864315504, "learning_rate": 1.731465265615879e-05, "loss": 0.6783, "step": 2966 }, { "epoch": 0.08662520802312342, "grad_norm": 0.8174250361546033, "learning_rate": 1.732049036777583e-05, "loss": 0.6255, "step": 2967 }, { "epoch": 0.08665440425097078, "grad_norm": 0.7402938180815043, "learning_rate": 1.732632807939288e-05, "loss": 0.7043, "step": 2968 }, { "epoch": 0.08668360047881814, "grad_norm": 0.734419729539774, "learning_rate": 1.7332165791009925e-05, "loss": 0.6738, "step": 2969 }, { "epoch": 0.0867127967066655, "grad_norm": 0.9285675263668641, "learning_rate": 1.7338003502626973e-05, "loss": 0.6728, "step": 2970 }, { "epoch": 0.08674199293451286, "grad_norm": 0.6827532518719238, "learning_rate": 1.7343841214244018e-05, "loss": 0.6791, "step": 2971 }, { "epoch": 0.08677118916236022, "grad_norm": 0.7267047270666502, "learning_rate": 1.7349678925861063e-05, "loss": 0.5801, "step": 2972 }, { "epoch": 0.08680038539020758, "grad_norm": 0.9605317700001912, "learning_rate": 1.7355516637478108e-05, "loss": 0.7623, "step": 2973 }, { "epoch": 0.08682958161805494, "grad_norm": 0.7528172739450584, "learning_rate": 1.7361354349095156e-05, "loss": 0.7043, "step": 2974 }, { "epoch": 0.0868587778459023, "grad_norm": 0.7543014800267231, "learning_rate": 1.73671920607122e-05, "loss": 0.741, "step": 2975 }, { "epoch": 0.08688797407374967, "grad_norm": 0.7537180196164615, "learning_rate": 1.737302977232925e-05, "loss": 0.7049, "step": 2976 }, { "epoch": 0.08691717030159703, "grad_norm": 0.7598611205531837, "learning_rate": 1.7378867483946295e-05, "loss": 0.6811, "step": 2977 }, { "epoch": 0.0869463665294444, "grad_norm": 0.7745182442416616, "learning_rate": 1.738470519556334e-05, "loss": 0.7289, "step": 2978 }, { "epoch": 0.08697556275729176, "grad_norm": 0.7722413187122171, "learning_rate": 1.7390542907180388e-05, "loss": 0.711, "step": 2979 }, { "epoch": 0.08700475898513912, "grad_norm": 0.7072024512084316, "learning_rate": 1.7396380618797433e-05, "loss": 0.6991, "step": 2980 }, { "epoch": 0.08703395521298649, "grad_norm": 0.7310420241669376, "learning_rate": 1.740221833041448e-05, "loss": 0.6808, "step": 2981 }, { "epoch": 0.08706315144083385, "grad_norm": 0.7154663237127529, "learning_rate": 1.7408056042031526e-05, "loss": 0.6295, "step": 2982 }, { "epoch": 0.08709234766868121, "grad_norm": 0.7232453098302897, "learning_rate": 1.741389375364857e-05, "loss": 0.6786, "step": 2983 }, { "epoch": 0.08712154389652857, "grad_norm": 0.786792129830902, "learning_rate": 1.7419731465265616e-05, "loss": 0.723, "step": 2984 }, { "epoch": 0.08715074012437593, "grad_norm": 0.7506041833517904, "learning_rate": 1.7425569176882665e-05, "loss": 0.7564, "step": 2985 }, { "epoch": 0.08717993635222329, "grad_norm": 0.7614972342871252, "learning_rate": 1.743140688849971e-05, "loss": 0.5874, "step": 2986 }, { "epoch": 0.08720913258007065, "grad_norm": 0.7842809616836218, "learning_rate": 1.7437244600116755e-05, "loss": 0.7428, "step": 2987 }, { "epoch": 0.08723832880791801, "grad_norm": 0.7917164496257529, "learning_rate": 1.74430823117338e-05, "loss": 0.6268, "step": 2988 }, { "epoch": 0.08726752503576538, "grad_norm": 0.7972640174571475, "learning_rate": 1.7448920023350848e-05, "loss": 0.6482, "step": 2989 }, { "epoch": 0.08729672126361274, "grad_norm": 0.6978665417825395, "learning_rate": 1.7454757734967893e-05, "loss": 0.6526, "step": 2990 }, { "epoch": 0.0873259174914601, "grad_norm": 0.7614429426538805, "learning_rate": 1.746059544658494e-05, "loss": 0.6817, "step": 2991 }, { "epoch": 0.08735511371930746, "grad_norm": 0.8375624437892795, "learning_rate": 1.7466433158201986e-05, "loss": 0.7838, "step": 2992 }, { "epoch": 0.08738430994715483, "grad_norm": 0.7410313876192236, "learning_rate": 1.747227086981903e-05, "loss": 0.6947, "step": 2993 }, { "epoch": 0.0874135061750022, "grad_norm": 0.7189583234943894, "learning_rate": 1.747810858143608e-05, "loss": 0.6466, "step": 2994 }, { "epoch": 0.08744270240284956, "grad_norm": 0.8103986623377787, "learning_rate": 1.7483946293053125e-05, "loss": 0.7237, "step": 2995 }, { "epoch": 0.08747189863069692, "grad_norm": 0.731041674920709, "learning_rate": 1.7489784004670173e-05, "loss": 0.6496, "step": 2996 }, { "epoch": 0.08750109485854428, "grad_norm": 0.7410817457983733, "learning_rate": 1.7495621716287218e-05, "loss": 0.6519, "step": 2997 }, { "epoch": 0.08753029108639164, "grad_norm": 0.789776874824872, "learning_rate": 1.7501459427904263e-05, "loss": 0.7562, "step": 2998 }, { "epoch": 0.087559487314239, "grad_norm": 1.12979467279506, "learning_rate": 1.7507297139521308e-05, "loss": 0.8883, "step": 2999 }, { "epoch": 0.08758868354208636, "grad_norm": 0.8315945378148814, "learning_rate": 1.7513134851138356e-05, "loss": 0.7353, "step": 3000 }, { "epoch": 0.08761787976993372, "grad_norm": 0.8761348485495951, "learning_rate": 1.75189725627554e-05, "loss": 0.7646, "step": 3001 }, { "epoch": 0.08764707599778108, "grad_norm": 0.7747912815699171, "learning_rate": 1.7524810274372446e-05, "loss": 0.6804, "step": 3002 }, { "epoch": 0.08767627222562845, "grad_norm": 0.7457993181202928, "learning_rate": 1.753064798598949e-05, "loss": 0.6953, "step": 3003 }, { "epoch": 0.08770546845347581, "grad_norm": 0.8830328980726024, "learning_rate": 1.753648569760654e-05, "loss": 0.7779, "step": 3004 }, { "epoch": 0.08773466468132317, "grad_norm": 0.8265136768866741, "learning_rate": 1.7542323409223585e-05, "loss": 0.7668, "step": 3005 }, { "epoch": 0.08776386090917053, "grad_norm": 0.6988301217181515, "learning_rate": 1.7548161120840633e-05, "loss": 0.6216, "step": 3006 }, { "epoch": 0.08779305713701789, "grad_norm": 0.7498472503851051, "learning_rate": 1.7553998832457678e-05, "loss": 0.7384, "step": 3007 }, { "epoch": 0.08782225336486527, "grad_norm": 0.8828303862915238, "learning_rate": 1.7559836544074723e-05, "loss": 0.7327, "step": 3008 }, { "epoch": 0.08785144959271263, "grad_norm": 0.7707502750703941, "learning_rate": 1.7565674255691768e-05, "loss": 0.7106, "step": 3009 }, { "epoch": 0.08788064582055999, "grad_norm": 0.7784703317413286, "learning_rate": 1.7571511967308816e-05, "loss": 0.6571, "step": 3010 }, { "epoch": 0.08790984204840735, "grad_norm": 0.724129022081574, "learning_rate": 1.7577349678925865e-05, "loss": 0.6739, "step": 3011 }, { "epoch": 0.08793903827625471, "grad_norm": 0.776343796759987, "learning_rate": 1.758318739054291e-05, "loss": 0.7864, "step": 3012 }, { "epoch": 0.08796823450410207, "grad_norm": 0.7535432716603365, "learning_rate": 1.7589025102159955e-05, "loss": 0.7206, "step": 3013 }, { "epoch": 0.08799743073194943, "grad_norm": 0.8118108588666408, "learning_rate": 1.7594862813777e-05, "loss": 0.721, "step": 3014 }, { "epoch": 0.0880266269597968, "grad_norm": 0.8077295663790072, "learning_rate": 1.7600700525394048e-05, "loss": 0.7745, "step": 3015 }, { "epoch": 0.08805582318764416, "grad_norm": 0.7540227390386632, "learning_rate": 1.7606538237011093e-05, "loss": 0.6511, "step": 3016 }, { "epoch": 0.08808501941549152, "grad_norm": 0.7399837856980395, "learning_rate": 1.7612375948628138e-05, "loss": 0.7105, "step": 3017 }, { "epoch": 0.08811421564333888, "grad_norm": 1.08299452841372, "learning_rate": 1.7618213660245183e-05, "loss": 0.7132, "step": 3018 }, { "epoch": 0.08814341187118624, "grad_norm": 0.7551057763658034, "learning_rate": 1.762405137186223e-05, "loss": 0.6961, "step": 3019 }, { "epoch": 0.0881726080990336, "grad_norm": 0.7299586733560791, "learning_rate": 1.7629889083479277e-05, "loss": 0.7089, "step": 3020 }, { "epoch": 0.08820180432688096, "grad_norm": 0.7892327167429609, "learning_rate": 1.7635726795096325e-05, "loss": 0.7765, "step": 3021 }, { "epoch": 0.08823100055472832, "grad_norm": 0.7614643551978363, "learning_rate": 1.764156450671337e-05, "loss": 0.685, "step": 3022 }, { "epoch": 0.0882601967825757, "grad_norm": 0.7138356484492965, "learning_rate": 1.7647402218330415e-05, "loss": 0.67, "step": 3023 }, { "epoch": 0.08828939301042306, "grad_norm": 0.7563930059278898, "learning_rate": 1.765323992994746e-05, "loss": 0.7031, "step": 3024 }, { "epoch": 0.08831858923827042, "grad_norm": 0.7999363865438944, "learning_rate": 1.7659077641564508e-05, "loss": 0.6163, "step": 3025 }, { "epoch": 0.08834778546611778, "grad_norm": 0.7294953793338218, "learning_rate": 1.7664915353181557e-05, "loss": 0.6316, "step": 3026 }, { "epoch": 0.08837698169396514, "grad_norm": 0.7109859316513248, "learning_rate": 1.76707530647986e-05, "loss": 0.6413, "step": 3027 }, { "epoch": 0.0884061779218125, "grad_norm": 0.8156347201316131, "learning_rate": 1.7676590776415647e-05, "loss": 0.7875, "step": 3028 }, { "epoch": 0.08843537414965986, "grad_norm": 0.8536025656296276, "learning_rate": 1.768242848803269e-05, "loss": 0.7963, "step": 3029 }, { "epoch": 0.08846457037750723, "grad_norm": 0.7843457794120576, "learning_rate": 1.768826619964974e-05, "loss": 0.7565, "step": 3030 }, { "epoch": 0.08849376660535459, "grad_norm": 0.7404120615312235, "learning_rate": 1.7694103911266785e-05, "loss": 0.7013, "step": 3031 }, { "epoch": 0.08852296283320195, "grad_norm": 0.7744137380184901, "learning_rate": 1.7699941622883833e-05, "loss": 0.7174, "step": 3032 }, { "epoch": 0.08855215906104931, "grad_norm": 0.7656811831935718, "learning_rate": 1.7705779334500878e-05, "loss": 0.7204, "step": 3033 }, { "epoch": 0.08858135528889667, "grad_norm": 0.7617972334598044, "learning_rate": 1.7711617046117923e-05, "loss": 0.6238, "step": 3034 }, { "epoch": 0.08861055151674403, "grad_norm": 0.7774021613801056, "learning_rate": 1.7717454757734968e-05, "loss": 0.7344, "step": 3035 }, { "epoch": 0.0886397477445914, "grad_norm": 0.868428092461646, "learning_rate": 1.7723292469352017e-05, "loss": 0.7266, "step": 3036 }, { "epoch": 0.08866894397243875, "grad_norm": 0.6963984782912271, "learning_rate": 1.772913018096906e-05, "loss": 0.6549, "step": 3037 }, { "epoch": 0.08869814020028613, "grad_norm": 0.7103208232744798, "learning_rate": 1.7734967892586107e-05, "loss": 0.6858, "step": 3038 }, { "epoch": 0.08872733642813349, "grad_norm": 0.7253786622873218, "learning_rate": 1.774080560420315e-05, "loss": 0.693, "step": 3039 }, { "epoch": 0.08875653265598085, "grad_norm": 0.7670409860026897, "learning_rate": 1.77466433158202e-05, "loss": 0.7332, "step": 3040 }, { "epoch": 0.08878572888382821, "grad_norm": 0.8229319999416073, "learning_rate": 1.7752481027437245e-05, "loss": 0.8396, "step": 3041 }, { "epoch": 0.08881492511167557, "grad_norm": 0.7591174192461551, "learning_rate": 1.7758318739054293e-05, "loss": 0.6934, "step": 3042 }, { "epoch": 0.08884412133952294, "grad_norm": 0.7424411019617202, "learning_rate": 1.7764156450671338e-05, "loss": 0.693, "step": 3043 }, { "epoch": 0.0888733175673703, "grad_norm": 0.821231612054371, "learning_rate": 1.7769994162288383e-05, "loss": 0.7349, "step": 3044 }, { "epoch": 0.08890251379521766, "grad_norm": 0.7414613459330022, "learning_rate": 1.777583187390543e-05, "loss": 0.7461, "step": 3045 }, { "epoch": 0.08893171002306502, "grad_norm": 0.7505165984196637, "learning_rate": 1.7781669585522477e-05, "loss": 0.7357, "step": 3046 }, { "epoch": 0.08896090625091238, "grad_norm": 0.6812941904767794, "learning_rate": 1.7787507297139525e-05, "loss": 0.6313, "step": 3047 }, { "epoch": 0.08899010247875974, "grad_norm": 0.8401020933925432, "learning_rate": 1.779334500875657e-05, "loss": 0.7039, "step": 3048 }, { "epoch": 0.0890192987066071, "grad_norm": 0.8186706367273526, "learning_rate": 1.7799182720373615e-05, "loss": 0.7128, "step": 3049 }, { "epoch": 0.08904849493445446, "grad_norm": 0.7925222192896242, "learning_rate": 1.780502043199066e-05, "loss": 0.7765, "step": 3050 }, { "epoch": 0.08907769116230183, "grad_norm": 0.732456288166859, "learning_rate": 1.781085814360771e-05, "loss": 0.7308, "step": 3051 }, { "epoch": 0.08910688739014919, "grad_norm": 0.8628012322520703, "learning_rate": 1.7816695855224753e-05, "loss": 0.7328, "step": 3052 }, { "epoch": 0.08913608361799656, "grad_norm": 0.7654217023715867, "learning_rate": 1.78225335668418e-05, "loss": 0.6617, "step": 3053 }, { "epoch": 0.08916527984584392, "grad_norm": 0.7482079999094858, "learning_rate": 1.7828371278458843e-05, "loss": 0.6801, "step": 3054 }, { "epoch": 0.08919447607369128, "grad_norm": 0.7935044297451701, "learning_rate": 1.783420899007589e-05, "loss": 0.6939, "step": 3055 }, { "epoch": 0.08922367230153865, "grad_norm": 1.0160337054157342, "learning_rate": 1.7840046701692937e-05, "loss": 0.5988, "step": 3056 }, { "epoch": 0.089252868529386, "grad_norm": 0.7108870479497027, "learning_rate": 1.7845884413309985e-05, "loss": 0.6747, "step": 3057 }, { "epoch": 0.08928206475723337, "grad_norm": 0.7113665256650298, "learning_rate": 1.785172212492703e-05, "loss": 0.6618, "step": 3058 }, { "epoch": 0.08931126098508073, "grad_norm": 0.880785759931273, "learning_rate": 1.7857559836544075e-05, "loss": 0.7797, "step": 3059 }, { "epoch": 0.08934045721292809, "grad_norm": 0.8599921793376899, "learning_rate": 1.7863397548161123e-05, "loss": 0.734, "step": 3060 }, { "epoch": 0.08936965344077545, "grad_norm": 0.7769916984691718, "learning_rate": 1.786923525977817e-05, "loss": 0.6974, "step": 3061 }, { "epoch": 0.08939884966862281, "grad_norm": 0.8065004063269754, "learning_rate": 1.7875072971395217e-05, "loss": 0.744, "step": 3062 }, { "epoch": 0.08942804589647017, "grad_norm": 0.6810495128265629, "learning_rate": 1.7880910683012262e-05, "loss": 0.6317, "step": 3063 }, { "epoch": 0.08945724212431753, "grad_norm": 0.7112864389987292, "learning_rate": 1.7886748394629307e-05, "loss": 0.6519, "step": 3064 }, { "epoch": 0.0894864383521649, "grad_norm": 0.9452916618342265, "learning_rate": 1.7892586106246352e-05, "loss": 0.717, "step": 3065 }, { "epoch": 0.08951563458001226, "grad_norm": 0.7753617297163562, "learning_rate": 1.78984238178634e-05, "loss": 0.6334, "step": 3066 }, { "epoch": 0.08954483080785962, "grad_norm": 0.7533668338333643, "learning_rate": 1.7904261529480445e-05, "loss": 0.5818, "step": 3067 }, { "epoch": 0.089574027035707, "grad_norm": 0.7463141579427129, "learning_rate": 1.791009924109749e-05, "loss": 0.7012, "step": 3068 }, { "epoch": 0.08960322326355435, "grad_norm": 0.7362882270253901, "learning_rate": 1.7915936952714535e-05, "loss": 0.7059, "step": 3069 }, { "epoch": 0.08963241949140172, "grad_norm": 0.8481962234285046, "learning_rate": 1.7921774664331583e-05, "loss": 0.7323, "step": 3070 }, { "epoch": 0.08966161571924908, "grad_norm": 0.8073725737573535, "learning_rate": 1.792761237594863e-05, "loss": 0.7378, "step": 3071 }, { "epoch": 0.08969081194709644, "grad_norm": 0.8202521435752461, "learning_rate": 1.7933450087565677e-05, "loss": 0.7156, "step": 3072 }, { "epoch": 0.0897200081749438, "grad_norm": 0.687747048265165, "learning_rate": 1.7939287799182722e-05, "loss": 0.6547, "step": 3073 }, { "epoch": 0.08974920440279116, "grad_norm": 0.7414041267881092, "learning_rate": 1.7945125510799767e-05, "loss": 0.6763, "step": 3074 }, { "epoch": 0.08977840063063852, "grad_norm": 0.7606419831501768, "learning_rate": 1.7950963222416815e-05, "loss": 0.6609, "step": 3075 }, { "epoch": 0.08980759685848588, "grad_norm": 0.7671770783723963, "learning_rate": 1.795680093403386e-05, "loss": 0.6717, "step": 3076 }, { "epoch": 0.08983679308633324, "grad_norm": 0.7738687385549241, "learning_rate": 1.796263864565091e-05, "loss": 0.7779, "step": 3077 }, { "epoch": 0.0898659893141806, "grad_norm": 0.7748177644627938, "learning_rate": 1.7968476357267953e-05, "loss": 0.7581, "step": 3078 }, { "epoch": 0.08989518554202797, "grad_norm": 1.0014380472609226, "learning_rate": 1.7974314068885e-05, "loss": 0.8055, "step": 3079 }, { "epoch": 0.08992438176987533, "grad_norm": 0.7812510446433549, "learning_rate": 1.7980151780502043e-05, "loss": 0.6984, "step": 3080 }, { "epoch": 0.08995357799772269, "grad_norm": 0.7669612664798088, "learning_rate": 1.7985989492119092e-05, "loss": 0.7082, "step": 3081 }, { "epoch": 0.08998277422557005, "grad_norm": 0.8160447388508597, "learning_rate": 1.7991827203736137e-05, "loss": 0.7848, "step": 3082 }, { "epoch": 0.09001197045341743, "grad_norm": 0.874009400446355, "learning_rate": 1.7997664915353185e-05, "loss": 0.7727, "step": 3083 }, { "epoch": 0.09004116668126479, "grad_norm": 0.859899412636347, "learning_rate": 1.8003502626970227e-05, "loss": 0.7462, "step": 3084 }, { "epoch": 0.09007036290911215, "grad_norm": 0.7850353350675744, "learning_rate": 1.8009340338587275e-05, "loss": 0.7618, "step": 3085 }, { "epoch": 0.09009955913695951, "grad_norm": 0.7375302830987228, "learning_rate": 1.801517805020432e-05, "loss": 0.6454, "step": 3086 }, { "epoch": 0.09012875536480687, "grad_norm": 0.7332900994771703, "learning_rate": 1.802101576182137e-05, "loss": 0.5782, "step": 3087 }, { "epoch": 0.09015795159265423, "grad_norm": 0.8589468778023546, "learning_rate": 1.8026853473438413e-05, "loss": 0.6985, "step": 3088 }, { "epoch": 0.09018714782050159, "grad_norm": 0.8053776619348592, "learning_rate": 1.803269118505546e-05, "loss": 0.7346, "step": 3089 }, { "epoch": 0.09021634404834895, "grad_norm": 0.7887939328412846, "learning_rate": 1.8038528896672507e-05, "loss": 0.7112, "step": 3090 }, { "epoch": 0.09024554027619631, "grad_norm": 0.9562702947665032, "learning_rate": 1.8044366608289552e-05, "loss": 0.7188, "step": 3091 }, { "epoch": 0.09027473650404368, "grad_norm": 0.6933057047689755, "learning_rate": 1.80502043199066e-05, "loss": 0.6297, "step": 3092 }, { "epoch": 0.09030393273189104, "grad_norm": 0.7758081861855787, "learning_rate": 1.8056042031523645e-05, "loss": 0.7019, "step": 3093 }, { "epoch": 0.0903331289597384, "grad_norm": 0.7739655257299827, "learning_rate": 1.806187974314069e-05, "loss": 0.6957, "step": 3094 }, { "epoch": 0.09036232518758576, "grad_norm": 0.7467452962306391, "learning_rate": 1.8067717454757735e-05, "loss": 0.6853, "step": 3095 }, { "epoch": 0.09039152141543312, "grad_norm": 0.7424768515177245, "learning_rate": 1.8073555166374784e-05, "loss": 0.6222, "step": 3096 }, { "epoch": 0.09042071764328048, "grad_norm": 0.8104908596054927, "learning_rate": 1.807939287799183e-05, "loss": 0.7513, "step": 3097 }, { "epoch": 0.09044991387112786, "grad_norm": 0.74879452636532, "learning_rate": 1.8085230589608877e-05, "loss": 0.7241, "step": 3098 }, { "epoch": 0.09047911009897522, "grad_norm": 0.7393552086250389, "learning_rate": 1.8091068301225922e-05, "loss": 0.7257, "step": 3099 }, { "epoch": 0.09050830632682258, "grad_norm": 0.7766426194886324, "learning_rate": 1.8096906012842967e-05, "loss": 0.7286, "step": 3100 }, { "epoch": 0.09053750255466994, "grad_norm": 0.8832358271382011, "learning_rate": 1.8102743724460012e-05, "loss": 0.7725, "step": 3101 }, { "epoch": 0.0905666987825173, "grad_norm": 0.7862225125581002, "learning_rate": 1.810858143607706e-05, "loss": 0.6363, "step": 3102 }, { "epoch": 0.09059589501036466, "grad_norm": 0.7664698600844186, "learning_rate": 1.8114419147694105e-05, "loss": 0.7053, "step": 3103 }, { "epoch": 0.09062509123821202, "grad_norm": 0.7517495249214644, "learning_rate": 1.812025685931115e-05, "loss": 0.6436, "step": 3104 }, { "epoch": 0.09065428746605939, "grad_norm": 0.7308815392652815, "learning_rate": 1.8126094570928195e-05, "loss": 0.6873, "step": 3105 }, { "epoch": 0.09068348369390675, "grad_norm": 0.8068959553123456, "learning_rate": 1.8131932282545244e-05, "loss": 0.6912, "step": 3106 }, { "epoch": 0.09071267992175411, "grad_norm": 0.7205290239556232, "learning_rate": 1.8137769994162292e-05, "loss": 0.6983, "step": 3107 }, { "epoch": 0.09074187614960147, "grad_norm": 0.8636729373443562, "learning_rate": 1.8143607705779337e-05, "loss": 0.7045, "step": 3108 }, { "epoch": 0.09077107237744883, "grad_norm": 0.7092131295617476, "learning_rate": 1.8149445417396382e-05, "loss": 0.6172, "step": 3109 }, { "epoch": 0.09080026860529619, "grad_norm": 0.7036869632257146, "learning_rate": 1.8155283129013427e-05, "loss": 0.5956, "step": 3110 }, { "epoch": 0.09082946483314355, "grad_norm": 0.7998779966234653, "learning_rate": 1.8161120840630475e-05, "loss": 0.6649, "step": 3111 }, { "epoch": 0.09085866106099091, "grad_norm": 0.8295600098543903, "learning_rate": 1.816695855224752e-05, "loss": 0.7017, "step": 3112 }, { "epoch": 0.09088785728883828, "grad_norm": 0.8137952697941508, "learning_rate": 1.817279626386457e-05, "loss": 0.758, "step": 3113 }, { "epoch": 0.09091705351668565, "grad_norm": 0.9017203232612957, "learning_rate": 1.8178633975481614e-05, "loss": 0.7407, "step": 3114 }, { "epoch": 0.09094624974453301, "grad_norm": 0.9887043269008586, "learning_rate": 1.818447168709866e-05, "loss": 0.7052, "step": 3115 }, { "epoch": 0.09097544597238037, "grad_norm": 0.817458449006819, "learning_rate": 1.8190309398715704e-05, "loss": 0.7664, "step": 3116 }, { "epoch": 0.09100464220022773, "grad_norm": 0.7369389615227652, "learning_rate": 1.8196147110332752e-05, "loss": 0.7304, "step": 3117 }, { "epoch": 0.0910338384280751, "grad_norm": 0.7385794365200331, "learning_rate": 1.8201984821949797e-05, "loss": 0.7277, "step": 3118 }, { "epoch": 0.09106303465592246, "grad_norm": 0.9024710054966175, "learning_rate": 1.8207822533566842e-05, "loss": 0.7491, "step": 3119 }, { "epoch": 0.09109223088376982, "grad_norm": 0.7867077856000044, "learning_rate": 1.8213660245183887e-05, "loss": 0.6853, "step": 3120 }, { "epoch": 0.09112142711161718, "grad_norm": 0.8124310325511763, "learning_rate": 1.8219497956800935e-05, "loss": 0.798, "step": 3121 }, { "epoch": 0.09115062333946454, "grad_norm": 0.8547447085602441, "learning_rate": 1.8225335668417984e-05, "loss": 0.7405, "step": 3122 }, { "epoch": 0.0911798195673119, "grad_norm": 0.7542736356570972, "learning_rate": 1.823117338003503e-05, "loss": 0.6871, "step": 3123 }, { "epoch": 0.09120901579515926, "grad_norm": 0.792569203409157, "learning_rate": 1.8237011091652074e-05, "loss": 0.6863, "step": 3124 }, { "epoch": 0.09123821202300662, "grad_norm": 0.7429461077544732, "learning_rate": 1.824284880326912e-05, "loss": 0.7572, "step": 3125 }, { "epoch": 0.09126740825085398, "grad_norm": 0.6817702229256957, "learning_rate": 1.8248686514886167e-05, "loss": 0.6317, "step": 3126 }, { "epoch": 0.09129660447870135, "grad_norm": 0.7252397198045327, "learning_rate": 1.8254524226503212e-05, "loss": 0.6755, "step": 3127 }, { "epoch": 0.09132580070654871, "grad_norm": 0.7276578249184906, "learning_rate": 1.826036193812026e-05, "loss": 0.6861, "step": 3128 }, { "epoch": 0.09135499693439608, "grad_norm": 0.7136201359144752, "learning_rate": 1.8266199649737305e-05, "loss": 0.6479, "step": 3129 }, { "epoch": 0.09138419316224344, "grad_norm": 0.8232784381318524, "learning_rate": 1.827203736135435e-05, "loss": 0.7348, "step": 3130 }, { "epoch": 0.0914133893900908, "grad_norm": 0.7790239263915771, "learning_rate": 1.8277875072971395e-05, "loss": 0.7247, "step": 3131 }, { "epoch": 0.09144258561793817, "grad_norm": 0.7574482711351298, "learning_rate": 1.8283712784588444e-05, "loss": 0.717, "step": 3132 }, { "epoch": 0.09147178184578553, "grad_norm": 0.7940376423070992, "learning_rate": 1.828955049620549e-05, "loss": 0.6887, "step": 3133 }, { "epoch": 0.09150097807363289, "grad_norm": 0.8181984115091498, "learning_rate": 1.8295388207822534e-05, "loss": 0.785, "step": 3134 }, { "epoch": 0.09153017430148025, "grad_norm": 0.7355659844567142, "learning_rate": 1.830122591943958e-05, "loss": 0.669, "step": 3135 }, { "epoch": 0.09155937052932761, "grad_norm": 0.7772193481468764, "learning_rate": 1.8307063631056627e-05, "loss": 0.7161, "step": 3136 }, { "epoch": 0.09158856675717497, "grad_norm": 0.8064109612579262, "learning_rate": 1.8312901342673672e-05, "loss": 0.7434, "step": 3137 }, { "epoch": 0.09161776298502233, "grad_norm": 0.7470943327502222, "learning_rate": 1.831873905429072e-05, "loss": 0.7612, "step": 3138 }, { "epoch": 0.0916469592128697, "grad_norm": 0.7584035020463907, "learning_rate": 1.8324576765907765e-05, "loss": 0.6532, "step": 3139 }, { "epoch": 0.09167615544071706, "grad_norm": 0.7434089035685623, "learning_rate": 1.833041447752481e-05, "loss": 0.673, "step": 3140 }, { "epoch": 0.09170535166856442, "grad_norm": 0.78344241191964, "learning_rate": 1.833625218914186e-05, "loss": 0.7337, "step": 3141 }, { "epoch": 0.09173454789641178, "grad_norm": 0.7050022137008979, "learning_rate": 1.8342089900758904e-05, "loss": 0.653, "step": 3142 }, { "epoch": 0.09176374412425914, "grad_norm": 0.7028395903142595, "learning_rate": 1.8347927612375952e-05, "loss": 0.661, "step": 3143 }, { "epoch": 0.09179294035210651, "grad_norm": 0.9924159224055483, "learning_rate": 1.8353765323992997e-05, "loss": 0.7955, "step": 3144 }, { "epoch": 0.09182213657995388, "grad_norm": 0.7554716271710005, "learning_rate": 1.8359603035610042e-05, "loss": 0.6791, "step": 3145 }, { "epoch": 0.09185133280780124, "grad_norm": 0.7372608649523789, "learning_rate": 1.8365440747227087e-05, "loss": 0.7069, "step": 3146 }, { "epoch": 0.0918805290356486, "grad_norm": 0.7886906918770777, "learning_rate": 1.8371278458844135e-05, "loss": 0.7898, "step": 3147 }, { "epoch": 0.09190972526349596, "grad_norm": 0.7416656932787288, "learning_rate": 1.837711617046118e-05, "loss": 0.7174, "step": 3148 }, { "epoch": 0.09193892149134332, "grad_norm": 0.8066720702055743, "learning_rate": 1.838295388207823e-05, "loss": 0.7318, "step": 3149 }, { "epoch": 0.09196811771919068, "grad_norm": 0.7650276951114624, "learning_rate": 1.8388791593695274e-05, "loss": 0.6984, "step": 3150 }, { "epoch": 0.09199731394703804, "grad_norm": 0.8117459115248995, "learning_rate": 1.839462930531232e-05, "loss": 0.809, "step": 3151 }, { "epoch": 0.0920265101748854, "grad_norm": 0.7414084500907817, "learning_rate": 1.8400467016929364e-05, "loss": 0.6495, "step": 3152 }, { "epoch": 0.09205570640273276, "grad_norm": 0.7313541035450769, "learning_rate": 1.8406304728546412e-05, "loss": 0.6843, "step": 3153 }, { "epoch": 0.09208490263058013, "grad_norm": 0.7093335140444997, "learning_rate": 1.8412142440163457e-05, "loss": 0.6559, "step": 3154 }, { "epoch": 0.09211409885842749, "grad_norm": 0.7331436546863532, "learning_rate": 1.8417980151780502e-05, "loss": 0.6316, "step": 3155 }, { "epoch": 0.09214329508627485, "grad_norm": 0.7540222101140236, "learning_rate": 1.842381786339755e-05, "loss": 0.6519, "step": 3156 }, { "epoch": 0.09217249131412221, "grad_norm": 0.9152501899384583, "learning_rate": 1.8429655575014595e-05, "loss": 0.7704, "step": 3157 }, { "epoch": 0.09220168754196957, "grad_norm": 0.8343617524469071, "learning_rate": 1.8435493286631644e-05, "loss": 0.7592, "step": 3158 }, { "epoch": 0.09223088376981695, "grad_norm": 0.7845849590351075, "learning_rate": 1.844133099824869e-05, "loss": 0.7498, "step": 3159 }, { "epoch": 0.09226007999766431, "grad_norm": 0.7136650831342737, "learning_rate": 1.8447168709865734e-05, "loss": 0.6755, "step": 3160 }, { "epoch": 0.09228927622551167, "grad_norm": 0.7296898298539407, "learning_rate": 1.845300642148278e-05, "loss": 0.5418, "step": 3161 }, { "epoch": 0.09231847245335903, "grad_norm": 0.7657164434684113, "learning_rate": 1.8458844133099827e-05, "loss": 0.7338, "step": 3162 }, { "epoch": 0.09234766868120639, "grad_norm": 1.0687305338379158, "learning_rate": 1.8464681844716872e-05, "loss": 0.8575, "step": 3163 }, { "epoch": 0.09237686490905375, "grad_norm": 0.6614770889795487, "learning_rate": 1.847051955633392e-05, "loss": 0.612, "step": 3164 }, { "epoch": 0.09240606113690111, "grad_norm": 0.7504505693603959, "learning_rate": 1.8476357267950966e-05, "loss": 0.6905, "step": 3165 }, { "epoch": 0.09243525736474847, "grad_norm": 0.7952915036689837, "learning_rate": 1.848219497956801e-05, "loss": 0.7955, "step": 3166 }, { "epoch": 0.09246445359259584, "grad_norm": 0.7647863897354878, "learning_rate": 1.8488032691185055e-05, "loss": 0.7101, "step": 3167 }, { "epoch": 0.0924936498204432, "grad_norm": 0.723176078077032, "learning_rate": 1.8493870402802104e-05, "loss": 0.6259, "step": 3168 }, { "epoch": 0.09252284604829056, "grad_norm": 1.1405806952332478, "learning_rate": 1.849970811441915e-05, "loss": 0.7585, "step": 3169 }, { "epoch": 0.09255204227613792, "grad_norm": 0.7369729145774794, "learning_rate": 1.8505545826036194e-05, "loss": 0.6658, "step": 3170 }, { "epoch": 0.09258123850398528, "grad_norm": 0.705073012612882, "learning_rate": 1.8511383537653242e-05, "loss": 0.6064, "step": 3171 }, { "epoch": 0.09261043473183264, "grad_norm": 0.7771614099032266, "learning_rate": 1.8517221249270287e-05, "loss": 0.7259, "step": 3172 }, { "epoch": 0.09263963095968, "grad_norm": 0.723875748373884, "learning_rate": 1.8523058960887336e-05, "loss": 0.7016, "step": 3173 }, { "epoch": 0.09266882718752738, "grad_norm": 0.7256128243820349, "learning_rate": 1.852889667250438e-05, "loss": 0.6927, "step": 3174 }, { "epoch": 0.09269802341537474, "grad_norm": 1.5008910063916432, "learning_rate": 1.8534734384121426e-05, "loss": 0.7524, "step": 3175 }, { "epoch": 0.0927272196432221, "grad_norm": 0.707669711606965, "learning_rate": 1.854057209573847e-05, "loss": 0.6565, "step": 3176 }, { "epoch": 0.09275641587106946, "grad_norm": 0.7445235030265347, "learning_rate": 1.854640980735552e-05, "loss": 0.6108, "step": 3177 }, { "epoch": 0.09278561209891682, "grad_norm": 1.517626821432437, "learning_rate": 1.8552247518972564e-05, "loss": 0.765, "step": 3178 }, { "epoch": 0.09281480832676418, "grad_norm": 0.7825799076666687, "learning_rate": 1.8558085230589612e-05, "loss": 0.7219, "step": 3179 }, { "epoch": 0.09284400455461154, "grad_norm": 0.7796990950660514, "learning_rate": 1.8563922942206657e-05, "loss": 0.7067, "step": 3180 }, { "epoch": 0.0928732007824589, "grad_norm": 0.8912321585857919, "learning_rate": 1.8569760653823702e-05, "loss": 0.7816, "step": 3181 }, { "epoch": 0.09290239701030627, "grad_norm": 0.8134078396357208, "learning_rate": 1.8575598365440747e-05, "loss": 0.6985, "step": 3182 }, { "epoch": 0.09293159323815363, "grad_norm": 0.7345379805460394, "learning_rate": 1.8581436077057796e-05, "loss": 0.6337, "step": 3183 }, { "epoch": 0.09296078946600099, "grad_norm": 0.7479876076517383, "learning_rate": 1.858727378867484e-05, "loss": 0.7217, "step": 3184 }, { "epoch": 0.09298998569384835, "grad_norm": 0.7335650012119631, "learning_rate": 1.8593111500291886e-05, "loss": 0.6012, "step": 3185 }, { "epoch": 0.09301918192169571, "grad_norm": 0.7772357131300366, "learning_rate": 1.8598949211908934e-05, "loss": 0.7564, "step": 3186 }, { "epoch": 0.09304837814954307, "grad_norm": 1.0027841820826753, "learning_rate": 1.860478692352598e-05, "loss": 0.766, "step": 3187 }, { "epoch": 0.09307757437739043, "grad_norm": 0.7613707952020615, "learning_rate": 1.8610624635143027e-05, "loss": 0.7122, "step": 3188 }, { "epoch": 0.09310677060523781, "grad_norm": 0.7408622201852597, "learning_rate": 1.8616462346760072e-05, "loss": 0.7507, "step": 3189 }, { "epoch": 0.09313596683308517, "grad_norm": 0.7460396081076074, "learning_rate": 1.8622300058377117e-05, "loss": 0.7057, "step": 3190 }, { "epoch": 0.09316516306093253, "grad_norm": 0.7704786063566247, "learning_rate": 1.8628137769994162e-05, "loss": 0.721, "step": 3191 }, { "epoch": 0.0931943592887799, "grad_norm": 0.8270705774657751, "learning_rate": 1.863397548161121e-05, "loss": 0.7722, "step": 3192 }, { "epoch": 0.09322355551662725, "grad_norm": 0.7445254960661776, "learning_rate": 1.8639813193228256e-05, "loss": 0.7357, "step": 3193 }, { "epoch": 0.09325275174447462, "grad_norm": 0.7939815815892832, "learning_rate": 1.8645650904845304e-05, "loss": 0.7823, "step": 3194 }, { "epoch": 0.09328194797232198, "grad_norm": 0.8292156884727494, "learning_rate": 1.865148861646235e-05, "loss": 0.6877, "step": 3195 }, { "epoch": 0.09331114420016934, "grad_norm": 0.7462255281516047, "learning_rate": 1.8657326328079394e-05, "loss": 0.6773, "step": 3196 }, { "epoch": 0.0933403404280167, "grad_norm": 0.7557274810210745, "learning_rate": 1.866316403969644e-05, "loss": 0.6616, "step": 3197 }, { "epoch": 0.09336953665586406, "grad_norm": 0.7449068112624561, "learning_rate": 1.8669001751313487e-05, "loss": 0.7272, "step": 3198 }, { "epoch": 0.09339873288371142, "grad_norm": 0.7294740117861941, "learning_rate": 1.8674839462930532e-05, "loss": 0.6526, "step": 3199 }, { "epoch": 0.09342792911155878, "grad_norm": 0.7973072578509629, "learning_rate": 1.868067717454758e-05, "loss": 0.6893, "step": 3200 }, { "epoch": 0.09345712533940614, "grad_norm": 0.7321245048514068, "learning_rate": 1.8686514886164622e-05, "loss": 0.683, "step": 3201 }, { "epoch": 0.0934863215672535, "grad_norm": 0.7053195857334271, "learning_rate": 1.869235259778167e-05, "loss": 0.6762, "step": 3202 }, { "epoch": 0.09351551779510087, "grad_norm": 0.7657479657874854, "learning_rate": 1.869819030939872e-05, "loss": 0.7058, "step": 3203 }, { "epoch": 0.09354471402294824, "grad_norm": 0.804477396352945, "learning_rate": 1.8704028021015764e-05, "loss": 0.7326, "step": 3204 }, { "epoch": 0.0935739102507956, "grad_norm": 0.7712517732012569, "learning_rate": 1.870986573263281e-05, "loss": 0.6594, "step": 3205 }, { "epoch": 0.09360310647864296, "grad_norm": 0.746970051039766, "learning_rate": 1.8715703444249854e-05, "loss": 0.6553, "step": 3206 }, { "epoch": 0.09363230270649033, "grad_norm": 0.771701322056158, "learning_rate": 1.8721541155866902e-05, "loss": 0.7117, "step": 3207 }, { "epoch": 0.09366149893433769, "grad_norm": 0.7205199854535779, "learning_rate": 1.8727378867483947e-05, "loss": 0.6608, "step": 3208 }, { "epoch": 0.09369069516218505, "grad_norm": 0.7581415462671126, "learning_rate": 1.8733216579100996e-05, "loss": 0.74, "step": 3209 }, { "epoch": 0.09371989139003241, "grad_norm": 0.779349269281321, "learning_rate": 1.873905429071804e-05, "loss": 0.689, "step": 3210 }, { "epoch": 0.09374908761787977, "grad_norm": 0.7592641949573585, "learning_rate": 1.8744892002335086e-05, "loss": 0.6991, "step": 3211 }, { "epoch": 0.09377828384572713, "grad_norm": 0.7271205022623856, "learning_rate": 1.875072971395213e-05, "loss": 0.6813, "step": 3212 }, { "epoch": 0.09380748007357449, "grad_norm": 0.698064529755883, "learning_rate": 1.875656742556918e-05, "loss": 0.6323, "step": 3213 }, { "epoch": 0.09383667630142185, "grad_norm": 0.6801444651220607, "learning_rate": 1.8762405137186224e-05, "loss": 0.6055, "step": 3214 }, { "epoch": 0.09386587252926921, "grad_norm": 0.705395799404547, "learning_rate": 1.8768242848803272e-05, "loss": 0.6905, "step": 3215 }, { "epoch": 0.09389506875711658, "grad_norm": 0.7673603516338218, "learning_rate": 1.8774080560420317e-05, "loss": 0.6371, "step": 3216 }, { "epoch": 0.09392426498496394, "grad_norm": 0.7310161565986435, "learning_rate": 1.8779918272037362e-05, "loss": 0.6982, "step": 3217 }, { "epoch": 0.0939534612128113, "grad_norm": 0.7829989246296272, "learning_rate": 1.878575598365441e-05, "loss": 0.7781, "step": 3218 }, { "epoch": 0.09398265744065867, "grad_norm": 0.6982187828807619, "learning_rate": 1.8791593695271456e-05, "loss": 0.7289, "step": 3219 }, { "epoch": 0.09401185366850603, "grad_norm": 0.7626588745666427, "learning_rate": 1.87974314068885e-05, "loss": 0.7626, "step": 3220 }, { "epoch": 0.0940410498963534, "grad_norm": 0.7287274808619036, "learning_rate": 1.8803269118505546e-05, "loss": 0.6896, "step": 3221 }, { "epoch": 0.09407024612420076, "grad_norm": 0.7854508848661474, "learning_rate": 1.8809106830122594e-05, "loss": 0.6099, "step": 3222 }, { "epoch": 0.09409944235204812, "grad_norm": 0.8865851037308784, "learning_rate": 1.881494454173964e-05, "loss": 0.7887, "step": 3223 }, { "epoch": 0.09412863857989548, "grad_norm": 0.741986848523743, "learning_rate": 1.8820782253356687e-05, "loss": 0.6396, "step": 3224 }, { "epoch": 0.09415783480774284, "grad_norm": 0.751924766380109, "learning_rate": 1.8826619964973732e-05, "loss": 0.6915, "step": 3225 }, { "epoch": 0.0941870310355902, "grad_norm": 0.7328325266341436, "learning_rate": 1.8832457676590777e-05, "loss": 0.6434, "step": 3226 }, { "epoch": 0.09421622726343756, "grad_norm": 0.9400942407991484, "learning_rate": 1.8838295388207822e-05, "loss": 0.7737, "step": 3227 }, { "epoch": 0.09424542349128492, "grad_norm": 0.7265089517862993, "learning_rate": 1.884413309982487e-05, "loss": 0.6172, "step": 3228 }, { "epoch": 0.09427461971913229, "grad_norm": 0.7498285266769755, "learning_rate": 1.8849970811441916e-05, "loss": 0.75, "step": 3229 }, { "epoch": 0.09430381594697965, "grad_norm": 0.8059673834062021, "learning_rate": 1.8855808523058964e-05, "loss": 0.7641, "step": 3230 }, { "epoch": 0.09433301217482701, "grad_norm": 0.8225381837653863, "learning_rate": 1.886164623467601e-05, "loss": 0.7472, "step": 3231 }, { "epoch": 0.09436220840267437, "grad_norm": 0.7962750384123737, "learning_rate": 1.8867483946293054e-05, "loss": 0.7897, "step": 3232 }, { "epoch": 0.09439140463052173, "grad_norm": 0.7915283221637579, "learning_rate": 1.88733216579101e-05, "loss": 0.7581, "step": 3233 }, { "epoch": 0.0944206008583691, "grad_norm": 0.8626561023669915, "learning_rate": 1.8879159369527147e-05, "loss": 0.6696, "step": 3234 }, { "epoch": 0.09444979708621647, "grad_norm": 0.8149048455196719, "learning_rate": 1.8884997081144192e-05, "loss": 0.7005, "step": 3235 }, { "epoch": 0.09447899331406383, "grad_norm": 0.773365152143276, "learning_rate": 1.8890834792761237e-05, "loss": 0.7131, "step": 3236 }, { "epoch": 0.09450818954191119, "grad_norm": 0.7674493950866289, "learning_rate": 1.8896672504378286e-05, "loss": 0.6937, "step": 3237 }, { "epoch": 0.09453738576975855, "grad_norm": 0.7015110968621064, "learning_rate": 1.890251021599533e-05, "loss": 0.6931, "step": 3238 }, { "epoch": 0.09456658199760591, "grad_norm": 0.7620725918833418, "learning_rate": 1.890834792761238e-05, "loss": 0.6878, "step": 3239 }, { "epoch": 0.09459577822545327, "grad_norm": 0.8162068582993806, "learning_rate": 1.8914185639229424e-05, "loss": 0.7203, "step": 3240 }, { "epoch": 0.09462497445330063, "grad_norm": 0.7379104483282355, "learning_rate": 1.892002335084647e-05, "loss": 0.6463, "step": 3241 }, { "epoch": 0.094654170681148, "grad_norm": 0.688416242230604, "learning_rate": 1.8925861062463514e-05, "loss": 0.6602, "step": 3242 }, { "epoch": 0.09468336690899536, "grad_norm": 0.7245371758621005, "learning_rate": 1.8931698774080563e-05, "loss": 0.6669, "step": 3243 }, { "epoch": 0.09471256313684272, "grad_norm": 0.7976929815360397, "learning_rate": 1.8937536485697607e-05, "loss": 0.7495, "step": 3244 }, { "epoch": 0.09474175936469008, "grad_norm": 0.6986021529878529, "learning_rate": 1.8943374197314656e-05, "loss": 0.6637, "step": 3245 }, { "epoch": 0.09477095559253744, "grad_norm": 0.8029519094728914, "learning_rate": 1.89492119089317e-05, "loss": 0.7243, "step": 3246 }, { "epoch": 0.0948001518203848, "grad_norm": 0.7265083204032382, "learning_rate": 1.8955049620548746e-05, "loss": 0.6656, "step": 3247 }, { "epoch": 0.09482934804823216, "grad_norm": 0.7075327180898683, "learning_rate": 1.896088733216579e-05, "loss": 0.6416, "step": 3248 }, { "epoch": 0.09485854427607954, "grad_norm": 0.6992609224116689, "learning_rate": 1.896672504378284e-05, "loss": 0.6035, "step": 3249 }, { "epoch": 0.0948877405039269, "grad_norm": 0.7061855551797338, "learning_rate": 1.8972562755399888e-05, "loss": 0.6446, "step": 3250 }, { "epoch": 0.09491693673177426, "grad_norm": 0.8203747904803221, "learning_rate": 1.897840046701693e-05, "loss": 0.7744, "step": 3251 }, { "epoch": 0.09494613295962162, "grad_norm": 0.7021785491674118, "learning_rate": 1.8984238178633978e-05, "loss": 0.6326, "step": 3252 }, { "epoch": 0.09497532918746898, "grad_norm": 0.7461333917795849, "learning_rate": 1.8990075890251023e-05, "loss": 0.7335, "step": 3253 }, { "epoch": 0.09500452541531634, "grad_norm": 0.7056929900822625, "learning_rate": 1.899591360186807e-05, "loss": 0.656, "step": 3254 }, { "epoch": 0.0950337216431637, "grad_norm": 0.7861113328969644, "learning_rate": 1.9001751313485116e-05, "loss": 0.7035, "step": 3255 }, { "epoch": 0.09506291787101107, "grad_norm": 0.8040289238428242, "learning_rate": 1.900758902510216e-05, "loss": 0.8461, "step": 3256 }, { "epoch": 0.09509211409885843, "grad_norm": 0.7501844775733646, "learning_rate": 1.9013426736719206e-05, "loss": 0.6669, "step": 3257 }, { "epoch": 0.09512131032670579, "grad_norm": 1.000617908349667, "learning_rate": 1.9019264448336254e-05, "loss": 0.7619, "step": 3258 }, { "epoch": 0.09515050655455315, "grad_norm": 0.7552978427897739, "learning_rate": 1.90251021599533e-05, "loss": 0.7299, "step": 3259 }, { "epoch": 0.09517970278240051, "grad_norm": 0.6987335276998992, "learning_rate": 1.9030939871570348e-05, "loss": 0.6232, "step": 3260 }, { "epoch": 0.09520889901024787, "grad_norm": 0.8325220127724561, "learning_rate": 1.9036777583187393e-05, "loss": 0.7251, "step": 3261 }, { "epoch": 0.09523809523809523, "grad_norm": 0.738485512614121, "learning_rate": 1.9042615294804438e-05, "loss": 0.693, "step": 3262 }, { "epoch": 0.0952672914659426, "grad_norm": 0.7322633533316244, "learning_rate": 1.9048453006421483e-05, "loss": 0.6817, "step": 3263 }, { "epoch": 0.09529648769378997, "grad_norm": 0.7316588584657616, "learning_rate": 1.905429071803853e-05, "loss": 0.6626, "step": 3264 }, { "epoch": 0.09532568392163733, "grad_norm": 0.7617234982648322, "learning_rate": 1.9060128429655576e-05, "loss": 0.7267, "step": 3265 }, { "epoch": 0.09535488014948469, "grad_norm": 0.8123359319611284, "learning_rate": 1.9065966141272624e-05, "loss": 0.7189, "step": 3266 }, { "epoch": 0.09538407637733205, "grad_norm": 0.8219420614891094, "learning_rate": 1.907180385288967e-05, "loss": 0.6869, "step": 3267 }, { "epoch": 0.09541327260517941, "grad_norm": 0.8035924133542507, "learning_rate": 1.9077641564506714e-05, "loss": 0.6678, "step": 3268 }, { "epoch": 0.09544246883302678, "grad_norm": 0.7300072898850093, "learning_rate": 1.9083479276123763e-05, "loss": 0.6872, "step": 3269 }, { "epoch": 0.09547166506087414, "grad_norm": 0.7491384765042565, "learning_rate": 1.9089316987740808e-05, "loss": 0.6101, "step": 3270 }, { "epoch": 0.0955008612887215, "grad_norm": 0.7318599878633084, "learning_rate": 1.9095154699357853e-05, "loss": 0.7008, "step": 3271 }, { "epoch": 0.09553005751656886, "grad_norm": 0.7949085689866703, "learning_rate": 1.9100992410974898e-05, "loss": 0.7559, "step": 3272 }, { "epoch": 0.09555925374441622, "grad_norm": 0.7580562566031585, "learning_rate": 1.9106830122591946e-05, "loss": 0.6848, "step": 3273 }, { "epoch": 0.09558844997226358, "grad_norm": 0.8076239644478671, "learning_rate": 1.911266783420899e-05, "loss": 0.6403, "step": 3274 }, { "epoch": 0.09561764620011094, "grad_norm": 0.7270467828864586, "learning_rate": 1.911850554582604e-05, "loss": 0.6861, "step": 3275 }, { "epoch": 0.0956468424279583, "grad_norm": 0.6959567038624374, "learning_rate": 1.9124343257443084e-05, "loss": 0.6303, "step": 3276 }, { "epoch": 0.09567603865580566, "grad_norm": 0.78891849384512, "learning_rate": 1.913018096906013e-05, "loss": 0.8095, "step": 3277 }, { "epoch": 0.09570523488365303, "grad_norm": 0.7685234764918173, "learning_rate": 1.9136018680677174e-05, "loss": 0.7203, "step": 3278 }, { "epoch": 0.0957344311115004, "grad_norm": 0.7509552814043775, "learning_rate": 1.9141856392294223e-05, "loss": 0.6567, "step": 3279 }, { "epoch": 0.09576362733934776, "grad_norm": 0.6819158417521111, "learning_rate": 1.9147694103911268e-05, "loss": 0.5997, "step": 3280 }, { "epoch": 0.09579282356719512, "grad_norm": 0.7065676436553079, "learning_rate": 1.9153531815528316e-05, "loss": 0.626, "step": 3281 }, { "epoch": 0.09582201979504248, "grad_norm": 0.7577102033084545, "learning_rate": 1.915936952714536e-05, "loss": 0.6679, "step": 3282 }, { "epoch": 0.09585121602288985, "grad_norm": 0.7524398832021763, "learning_rate": 1.9165207238762406e-05, "loss": 0.7149, "step": 3283 }, { "epoch": 0.09588041225073721, "grad_norm": 0.7377143197473728, "learning_rate": 1.9171044950379454e-05, "loss": 0.7119, "step": 3284 }, { "epoch": 0.09590960847858457, "grad_norm": 1.305951075389667, "learning_rate": 1.91768826619965e-05, "loss": 0.6511, "step": 3285 }, { "epoch": 0.09593880470643193, "grad_norm": 0.7273104237466482, "learning_rate": 1.9182720373613544e-05, "loss": 0.6963, "step": 3286 }, { "epoch": 0.09596800093427929, "grad_norm": 0.7347014655357403, "learning_rate": 1.918855808523059e-05, "loss": 0.7234, "step": 3287 }, { "epoch": 0.09599719716212665, "grad_norm": 0.8192518502879085, "learning_rate": 1.9194395796847638e-05, "loss": 0.7172, "step": 3288 }, { "epoch": 0.09602639338997401, "grad_norm": 0.7297586376323797, "learning_rate": 1.9200233508464683e-05, "loss": 0.6193, "step": 3289 }, { "epoch": 0.09605558961782137, "grad_norm": 0.9023065141133608, "learning_rate": 1.920607122008173e-05, "loss": 0.7749, "step": 3290 }, { "epoch": 0.09608478584566874, "grad_norm": 0.7979456127010763, "learning_rate": 1.9211908931698776e-05, "loss": 0.7641, "step": 3291 }, { "epoch": 0.0961139820735161, "grad_norm": 0.6200014789740811, "learning_rate": 1.921774664331582e-05, "loss": 0.5052, "step": 3292 }, { "epoch": 0.09614317830136346, "grad_norm": 0.7110371629377069, "learning_rate": 1.9223584354932866e-05, "loss": 0.7026, "step": 3293 }, { "epoch": 0.09617237452921083, "grad_norm": 0.6930417284170243, "learning_rate": 1.9229422066549914e-05, "loss": 0.6001, "step": 3294 }, { "epoch": 0.0962015707570582, "grad_norm": 0.6600758414541459, "learning_rate": 1.923525977816696e-05, "loss": 0.5696, "step": 3295 }, { "epoch": 0.09623076698490556, "grad_norm": 0.663137905682488, "learning_rate": 1.9241097489784008e-05, "loss": 0.6333, "step": 3296 }, { "epoch": 0.09625996321275292, "grad_norm": 0.7506416916769899, "learning_rate": 1.9246935201401053e-05, "loss": 0.7254, "step": 3297 }, { "epoch": 0.09628915944060028, "grad_norm": 0.7611152023476687, "learning_rate": 1.9252772913018098e-05, "loss": 0.6612, "step": 3298 }, { "epoch": 0.09631835566844764, "grad_norm": 0.9113428046718469, "learning_rate": 1.9258610624635146e-05, "loss": 0.7031, "step": 3299 }, { "epoch": 0.096347551896295, "grad_norm": 0.6909022920075201, "learning_rate": 1.926444833625219e-05, "loss": 0.6315, "step": 3300 }, { "epoch": 0.09637674812414236, "grad_norm": 0.78208275526558, "learning_rate": 1.9270286047869236e-05, "loss": 0.6277, "step": 3301 }, { "epoch": 0.09640594435198972, "grad_norm": 0.7998550145399997, "learning_rate": 1.927612375948628e-05, "loss": 0.7998, "step": 3302 }, { "epoch": 0.09643514057983708, "grad_norm": 0.7684092809476151, "learning_rate": 1.928196147110333e-05, "loss": 0.7281, "step": 3303 }, { "epoch": 0.09646433680768444, "grad_norm": 0.7717963192051132, "learning_rate": 1.9287799182720374e-05, "loss": 0.7475, "step": 3304 }, { "epoch": 0.0964935330355318, "grad_norm": 0.7203599939289953, "learning_rate": 1.9293636894337423e-05, "loss": 0.6899, "step": 3305 }, { "epoch": 0.09652272926337917, "grad_norm": 0.881927855214154, "learning_rate": 1.9299474605954468e-05, "loss": 0.812, "step": 3306 }, { "epoch": 0.09655192549122653, "grad_norm": 1.1324680273134378, "learning_rate": 1.9305312317571513e-05, "loss": 0.738, "step": 3307 }, { "epoch": 0.09658112171907389, "grad_norm": 0.7230022012262328, "learning_rate": 1.9311150029188558e-05, "loss": 0.6217, "step": 3308 }, { "epoch": 0.09661031794692125, "grad_norm": 0.7657829482265659, "learning_rate": 1.9316987740805606e-05, "loss": 0.71, "step": 3309 }, { "epoch": 0.09663951417476863, "grad_norm": 0.7035108120522315, "learning_rate": 1.932282545242265e-05, "loss": 0.6499, "step": 3310 }, { "epoch": 0.09666871040261599, "grad_norm": 0.7506015840468953, "learning_rate": 1.93286631640397e-05, "loss": 0.7358, "step": 3311 }, { "epoch": 0.09669790663046335, "grad_norm": 0.7601493935770391, "learning_rate": 1.9334500875656744e-05, "loss": 0.669, "step": 3312 }, { "epoch": 0.09672710285831071, "grad_norm": 0.7019389062906526, "learning_rate": 1.934033858727379e-05, "loss": 0.5954, "step": 3313 }, { "epoch": 0.09675629908615807, "grad_norm": 0.8051876329176859, "learning_rate": 1.9346176298890838e-05, "loss": 0.7737, "step": 3314 }, { "epoch": 0.09678549531400543, "grad_norm": 0.7393258770981997, "learning_rate": 1.9352014010507883e-05, "loss": 0.7002, "step": 3315 }, { "epoch": 0.0968146915418528, "grad_norm": 0.7831875849782705, "learning_rate": 1.935785172212493e-05, "loss": 0.7043, "step": 3316 }, { "epoch": 0.09684388776970015, "grad_norm": 0.8096488007918567, "learning_rate": 1.9363689433741976e-05, "loss": 0.7689, "step": 3317 }, { "epoch": 0.09687308399754752, "grad_norm": 0.7238782169368684, "learning_rate": 1.936952714535902e-05, "loss": 0.6784, "step": 3318 }, { "epoch": 0.09690228022539488, "grad_norm": 0.7691526680487732, "learning_rate": 1.9375364856976066e-05, "loss": 0.7265, "step": 3319 }, { "epoch": 0.09693147645324224, "grad_norm": 0.7688331568443415, "learning_rate": 1.9381202568593115e-05, "loss": 0.7317, "step": 3320 }, { "epoch": 0.0969606726810896, "grad_norm": 0.7797790012792266, "learning_rate": 1.938704028021016e-05, "loss": 0.7048, "step": 3321 }, { "epoch": 0.09698986890893696, "grad_norm": 0.7387552855877141, "learning_rate": 1.9392877991827205e-05, "loss": 0.6914, "step": 3322 }, { "epoch": 0.09701906513678432, "grad_norm": 0.7818475996403705, "learning_rate": 1.939871570344425e-05, "loss": 0.7074, "step": 3323 }, { "epoch": 0.09704826136463168, "grad_norm": 0.7605678256559568, "learning_rate": 1.9404553415061298e-05, "loss": 0.7327, "step": 3324 }, { "epoch": 0.09707745759247906, "grad_norm": 0.7355403991749604, "learning_rate": 1.9410391126678343e-05, "loss": 0.717, "step": 3325 }, { "epoch": 0.09710665382032642, "grad_norm": 0.8100333293598582, "learning_rate": 1.941622883829539e-05, "loss": 0.7415, "step": 3326 }, { "epoch": 0.09713585004817378, "grad_norm": 0.8487028201279012, "learning_rate": 1.9422066549912436e-05, "loss": 0.7206, "step": 3327 }, { "epoch": 0.09716504627602114, "grad_norm": 0.8186032039731196, "learning_rate": 1.942790426152948e-05, "loss": 0.7157, "step": 3328 }, { "epoch": 0.0971942425038685, "grad_norm": 1.0081598260002562, "learning_rate": 1.9433741973146526e-05, "loss": 0.7412, "step": 3329 }, { "epoch": 0.09722343873171586, "grad_norm": 0.8416947957616205, "learning_rate": 1.9439579684763575e-05, "loss": 0.6622, "step": 3330 }, { "epoch": 0.09725263495956323, "grad_norm": 0.7898126739292071, "learning_rate": 1.9445417396380623e-05, "loss": 0.6451, "step": 3331 }, { "epoch": 0.09728183118741059, "grad_norm": 0.658449687075858, "learning_rate": 1.9451255107997668e-05, "loss": 0.572, "step": 3332 }, { "epoch": 0.09731102741525795, "grad_norm": 0.7117687712724462, "learning_rate": 1.9457092819614713e-05, "loss": 0.6782, "step": 3333 }, { "epoch": 0.09734022364310531, "grad_norm": 0.6981637604765246, "learning_rate": 1.9462930531231758e-05, "loss": 0.6786, "step": 3334 }, { "epoch": 0.09736941987095267, "grad_norm": 0.7971245586516985, "learning_rate": 1.9468768242848806e-05, "loss": 0.7289, "step": 3335 }, { "epoch": 0.09739861609880003, "grad_norm": 0.7291297849584, "learning_rate": 1.947460595446585e-05, "loss": 0.5203, "step": 3336 }, { "epoch": 0.09742781232664739, "grad_norm": 0.8699303506571527, "learning_rate": 1.9480443666082896e-05, "loss": 0.6639, "step": 3337 }, { "epoch": 0.09745700855449475, "grad_norm": 0.7494642172758408, "learning_rate": 1.948628137769994e-05, "loss": 0.6454, "step": 3338 }, { "epoch": 0.09748620478234211, "grad_norm": 0.6969508710822272, "learning_rate": 1.949211908931699e-05, "loss": 0.6389, "step": 3339 }, { "epoch": 0.09751540101018949, "grad_norm": 0.7383694530647575, "learning_rate": 1.9497956800934035e-05, "loss": 0.6985, "step": 3340 }, { "epoch": 0.09754459723803685, "grad_norm": 0.817343142992012, "learning_rate": 1.9503794512551083e-05, "loss": 0.7283, "step": 3341 }, { "epoch": 0.09757379346588421, "grad_norm": 0.7764709180820157, "learning_rate": 1.9509632224168128e-05, "loss": 0.7295, "step": 3342 }, { "epoch": 0.09760298969373157, "grad_norm": 0.7997838044839127, "learning_rate": 1.9515469935785173e-05, "loss": 0.7851, "step": 3343 }, { "epoch": 0.09763218592157893, "grad_norm": 0.7385910760198152, "learning_rate": 1.9521307647402218e-05, "loss": 0.7103, "step": 3344 }, { "epoch": 0.0976613821494263, "grad_norm": 0.7024296030163177, "learning_rate": 1.9527145359019266e-05, "loss": 0.6367, "step": 3345 }, { "epoch": 0.09769057837727366, "grad_norm": 0.7624198742581341, "learning_rate": 1.9532983070636315e-05, "loss": 0.7247, "step": 3346 }, { "epoch": 0.09771977460512102, "grad_norm": 0.7613125911205935, "learning_rate": 1.953882078225336e-05, "loss": 0.6158, "step": 3347 }, { "epoch": 0.09774897083296838, "grad_norm": 0.7067817904554995, "learning_rate": 1.9544658493870405e-05, "loss": 0.6441, "step": 3348 }, { "epoch": 0.09777816706081574, "grad_norm": 0.7684457470431519, "learning_rate": 1.955049620548745e-05, "loss": 0.6915, "step": 3349 }, { "epoch": 0.0978073632886631, "grad_norm": 0.7485224568755049, "learning_rate": 1.9556333917104498e-05, "loss": 0.7108, "step": 3350 }, { "epoch": 0.09783655951651046, "grad_norm": 0.7691304251035089, "learning_rate": 1.9562171628721543e-05, "loss": 0.7403, "step": 3351 }, { "epoch": 0.09786575574435782, "grad_norm": 0.7315358597797218, "learning_rate": 1.9568009340338588e-05, "loss": 0.6967, "step": 3352 }, { "epoch": 0.09789495197220519, "grad_norm": 0.8261536630850324, "learning_rate": 1.9573847051955633e-05, "loss": 0.6716, "step": 3353 }, { "epoch": 0.09792414820005255, "grad_norm": 0.7479457170162616, "learning_rate": 1.957968476357268e-05, "loss": 0.7223, "step": 3354 }, { "epoch": 0.09795334442789992, "grad_norm": 0.8166719009150765, "learning_rate": 1.9585522475189726e-05, "loss": 0.7774, "step": 3355 }, { "epoch": 0.09798254065574728, "grad_norm": 0.6976657816406011, "learning_rate": 1.9591360186806775e-05, "loss": 0.6892, "step": 3356 }, { "epoch": 0.09801173688359464, "grad_norm": 0.7204906487490269, "learning_rate": 1.959719789842382e-05, "loss": 0.7098, "step": 3357 }, { "epoch": 0.098040933111442, "grad_norm": 0.8657555512011352, "learning_rate": 1.9603035610040865e-05, "loss": 0.7724, "step": 3358 }, { "epoch": 0.09807012933928937, "grad_norm": 0.9365696676322895, "learning_rate": 1.960887332165791e-05, "loss": 0.7275, "step": 3359 }, { "epoch": 0.09809932556713673, "grad_norm": 0.7125919278874708, "learning_rate": 1.9614711033274958e-05, "loss": 0.66, "step": 3360 }, { "epoch": 0.09812852179498409, "grad_norm": 0.8263321273234897, "learning_rate": 1.9620548744892003e-05, "loss": 0.7572, "step": 3361 }, { "epoch": 0.09815771802283145, "grad_norm": 0.6951115176412912, "learning_rate": 1.962638645650905e-05, "loss": 0.6275, "step": 3362 }, { "epoch": 0.09818691425067881, "grad_norm": 0.7265624854247429, "learning_rate": 1.9632224168126096e-05, "loss": 0.677, "step": 3363 }, { "epoch": 0.09821611047852617, "grad_norm": 0.8487688215547132, "learning_rate": 1.963806187974314e-05, "loss": 0.7331, "step": 3364 }, { "epoch": 0.09824530670637353, "grad_norm": 0.7498923000227387, "learning_rate": 1.964389959136019e-05, "loss": 0.7336, "step": 3365 }, { "epoch": 0.0982745029342209, "grad_norm": 0.7352613963600816, "learning_rate": 1.9649737302977235e-05, "loss": 0.6731, "step": 3366 }, { "epoch": 0.09830369916206826, "grad_norm": 0.7797135315437207, "learning_rate": 1.9655575014594283e-05, "loss": 0.7757, "step": 3367 }, { "epoch": 0.09833289538991562, "grad_norm": 0.7193870431334071, "learning_rate": 1.9661412726211325e-05, "loss": 0.6929, "step": 3368 }, { "epoch": 0.09836209161776298, "grad_norm": 0.7218940965859755, "learning_rate": 1.9667250437828373e-05, "loss": 0.6919, "step": 3369 }, { "epoch": 0.09839128784561035, "grad_norm": 1.963748527598429, "learning_rate": 1.9673088149445418e-05, "loss": 0.7395, "step": 3370 }, { "epoch": 0.09842048407345771, "grad_norm": 0.6844453923167657, "learning_rate": 1.9678925861062466e-05, "loss": 0.6275, "step": 3371 }, { "epoch": 0.09844968030130508, "grad_norm": 0.7779438618961269, "learning_rate": 1.968476357267951e-05, "loss": 0.7495, "step": 3372 }, { "epoch": 0.09847887652915244, "grad_norm": 0.8517049574919193, "learning_rate": 1.9690601284296556e-05, "loss": 0.6811, "step": 3373 }, { "epoch": 0.0985080727569998, "grad_norm": 0.7792171441029189, "learning_rate": 1.96964389959136e-05, "loss": 0.7924, "step": 3374 }, { "epoch": 0.09853726898484716, "grad_norm": 0.6965567977903808, "learning_rate": 1.970227670753065e-05, "loss": 0.6382, "step": 3375 }, { "epoch": 0.09856646521269452, "grad_norm": 0.722824468169462, "learning_rate": 1.9708114419147695e-05, "loss": 0.6705, "step": 3376 }, { "epoch": 0.09859566144054188, "grad_norm": 0.7346770795515729, "learning_rate": 1.9713952130764743e-05, "loss": 0.7022, "step": 3377 }, { "epoch": 0.09862485766838924, "grad_norm": 0.7847156387762815, "learning_rate": 1.9719789842381788e-05, "loss": 0.7504, "step": 3378 }, { "epoch": 0.0986540538962366, "grad_norm": 0.6743248492607716, "learning_rate": 1.9725627553998833e-05, "loss": 0.6603, "step": 3379 }, { "epoch": 0.09868325012408397, "grad_norm": 0.7488853042931826, "learning_rate": 1.973146526561588e-05, "loss": 0.6422, "step": 3380 }, { "epoch": 0.09871244635193133, "grad_norm": 0.7023988261285413, "learning_rate": 1.9737302977232926e-05, "loss": 0.664, "step": 3381 }, { "epoch": 0.09874164257977869, "grad_norm": 0.7439460064662737, "learning_rate": 1.9743140688849975e-05, "loss": 0.7231, "step": 3382 }, { "epoch": 0.09877083880762605, "grad_norm": 0.8555455374555044, "learning_rate": 1.974897840046702e-05, "loss": 0.7266, "step": 3383 }, { "epoch": 0.09880003503547341, "grad_norm": 0.7297633766276216, "learning_rate": 1.9754816112084065e-05, "loss": 0.6789, "step": 3384 }, { "epoch": 0.09882923126332079, "grad_norm": 0.7921044927009053, "learning_rate": 1.976065382370111e-05, "loss": 0.7202, "step": 3385 }, { "epoch": 0.09885842749116815, "grad_norm": 0.7314913854145199, "learning_rate": 1.9766491535318158e-05, "loss": 0.6809, "step": 3386 }, { "epoch": 0.09888762371901551, "grad_norm": 0.759413034960657, "learning_rate": 1.9772329246935203e-05, "loss": 0.6442, "step": 3387 }, { "epoch": 0.09891681994686287, "grad_norm": 0.8056310924045125, "learning_rate": 1.9778166958552248e-05, "loss": 0.6996, "step": 3388 }, { "epoch": 0.09894601617471023, "grad_norm": 0.8255044277876613, "learning_rate": 1.9784004670169293e-05, "loss": 0.7336, "step": 3389 }, { "epoch": 0.09897521240255759, "grad_norm": 0.7219756649354971, "learning_rate": 1.978984238178634e-05, "loss": 0.6429, "step": 3390 }, { "epoch": 0.09900440863040495, "grad_norm": 0.771056532518628, "learning_rate": 1.9795680093403386e-05, "loss": 0.7939, "step": 3391 }, { "epoch": 0.09903360485825231, "grad_norm": 1.0050988285617917, "learning_rate": 1.9801517805020435e-05, "loss": 0.7139, "step": 3392 }, { "epoch": 0.09906280108609967, "grad_norm": 0.7476980880233087, "learning_rate": 1.980735551663748e-05, "loss": 0.7509, "step": 3393 }, { "epoch": 0.09909199731394704, "grad_norm": 0.7364286882322953, "learning_rate": 1.9813193228254525e-05, "loss": 0.7261, "step": 3394 }, { "epoch": 0.0991211935417944, "grad_norm": 0.6921703159270608, "learning_rate": 1.9819030939871573e-05, "loss": 0.6288, "step": 3395 }, { "epoch": 0.09915038976964176, "grad_norm": 0.8354197121789249, "learning_rate": 1.9824868651488618e-05, "loss": 0.7519, "step": 3396 }, { "epoch": 0.09917958599748912, "grad_norm": 0.7377649888981506, "learning_rate": 1.9830706363105667e-05, "loss": 0.7451, "step": 3397 }, { "epoch": 0.09920878222533648, "grad_norm": 0.7308797101655444, "learning_rate": 1.983654407472271e-05, "loss": 0.7155, "step": 3398 }, { "epoch": 0.09923797845318384, "grad_norm": 0.7382031162653232, "learning_rate": 1.9842381786339757e-05, "loss": 0.5497, "step": 3399 }, { "epoch": 0.09926717468103122, "grad_norm": 0.7110908886599115, "learning_rate": 1.98482194979568e-05, "loss": 0.6805, "step": 3400 }, { "epoch": 0.09929637090887858, "grad_norm": 0.8867622510658624, "learning_rate": 1.985405720957385e-05, "loss": 0.6961, "step": 3401 }, { "epoch": 0.09932556713672594, "grad_norm": 0.7194093142828407, "learning_rate": 1.9859894921190895e-05, "loss": 0.6868, "step": 3402 }, { "epoch": 0.0993547633645733, "grad_norm": 0.7271159168414477, "learning_rate": 1.986573263280794e-05, "loss": 0.6952, "step": 3403 }, { "epoch": 0.09938395959242066, "grad_norm": 0.8032920225663644, "learning_rate": 1.9871570344424985e-05, "loss": 0.7445, "step": 3404 }, { "epoch": 0.09941315582026802, "grad_norm": 0.701118096961551, "learning_rate": 1.9877408056042033e-05, "loss": 0.6112, "step": 3405 }, { "epoch": 0.09944235204811538, "grad_norm": 0.7551882668540986, "learning_rate": 1.9883245767659078e-05, "loss": 0.6645, "step": 3406 }, { "epoch": 0.09947154827596275, "grad_norm": 0.7429154266548387, "learning_rate": 1.9889083479276127e-05, "loss": 0.7258, "step": 3407 }, { "epoch": 0.0995007445038101, "grad_norm": 0.8025978406142762, "learning_rate": 1.989492119089317e-05, "loss": 0.7096, "step": 3408 }, { "epoch": 0.09952994073165747, "grad_norm": 0.7023462851770421, "learning_rate": 1.9900758902510217e-05, "loss": 0.674, "step": 3409 }, { "epoch": 0.09955913695950483, "grad_norm": 0.7796750147172965, "learning_rate": 1.9906596614127265e-05, "loss": 0.7308, "step": 3410 }, { "epoch": 0.09958833318735219, "grad_norm": 0.7638645836056035, "learning_rate": 1.991243432574431e-05, "loss": 0.6557, "step": 3411 }, { "epoch": 0.09961752941519955, "grad_norm": 0.7394372344114641, "learning_rate": 1.9918272037361358e-05, "loss": 0.6992, "step": 3412 }, { "epoch": 0.09964672564304691, "grad_norm": 0.7240805251034079, "learning_rate": 1.9924109748978403e-05, "loss": 0.6699, "step": 3413 }, { "epoch": 0.09967592187089427, "grad_norm": 0.9390334407121375, "learning_rate": 1.9929947460595448e-05, "loss": 0.7465, "step": 3414 }, { "epoch": 0.09970511809874165, "grad_norm": 0.7311829395690665, "learning_rate": 1.9935785172212493e-05, "loss": 0.6211, "step": 3415 }, { "epoch": 0.09973431432658901, "grad_norm": 0.7249640586847177, "learning_rate": 1.994162288382954e-05, "loss": 0.6835, "step": 3416 }, { "epoch": 0.09976351055443637, "grad_norm": 0.7361205732520786, "learning_rate": 1.9947460595446587e-05, "loss": 0.706, "step": 3417 }, { "epoch": 0.09979270678228373, "grad_norm": 0.7244448195560974, "learning_rate": 1.995329830706363e-05, "loss": 0.6502, "step": 3418 }, { "epoch": 0.0998219030101311, "grad_norm": 0.7601528973032118, "learning_rate": 1.9959136018680677e-05, "loss": 0.7222, "step": 3419 }, { "epoch": 0.09985109923797846, "grad_norm": 0.7506747781723666, "learning_rate": 1.9964973730297725e-05, "loss": 0.7314, "step": 3420 }, { "epoch": 0.09988029546582582, "grad_norm": 0.7623222607144419, "learning_rate": 1.997081144191477e-05, "loss": 0.6689, "step": 3421 }, { "epoch": 0.09990949169367318, "grad_norm": 1.2454368401718166, "learning_rate": 1.9976649153531818e-05, "loss": 0.6729, "step": 3422 }, { "epoch": 0.09993868792152054, "grad_norm": 0.7887358113952857, "learning_rate": 1.9982486865148863e-05, "loss": 0.7662, "step": 3423 }, { "epoch": 0.0999678841493679, "grad_norm": 0.7822815642328474, "learning_rate": 1.9988324576765908e-05, "loss": 0.7288, "step": 3424 }, { "epoch": 0.09999708037721526, "grad_norm": 0.7888459884740259, "learning_rate": 1.9994162288382953e-05, "loss": 0.6136, "step": 3425 }, { "epoch": 0.10002627660506262, "grad_norm": 0.7287350120845226, "learning_rate": 2e-05, "loss": 0.7016, "step": 3426 }, { "epoch": 0.10005547283290998, "grad_norm": 0.6823551850270316, "learning_rate": 1.9999351175993514e-05, "loss": 0.5974, "step": 3427 }, { "epoch": 0.10008466906075734, "grad_norm": 0.7093498443516972, "learning_rate": 1.9998702351987026e-05, "loss": 0.6375, "step": 3428 }, { "epoch": 0.1001138652886047, "grad_norm": 1.0468269340245429, "learning_rate": 1.9998053527980538e-05, "loss": 0.6926, "step": 3429 }, { "epoch": 0.10014306151645208, "grad_norm": 0.8774564589915572, "learning_rate": 1.999740470397405e-05, "loss": 0.7508, "step": 3430 }, { "epoch": 0.10017225774429944, "grad_norm": 0.6985034920023137, "learning_rate": 1.999675587996756e-05, "loss": 0.5549, "step": 3431 }, { "epoch": 0.1002014539721468, "grad_norm": 0.7636131100215118, "learning_rate": 1.999610705596107e-05, "loss": 0.6902, "step": 3432 }, { "epoch": 0.10023065019999416, "grad_norm": 0.9079736848998357, "learning_rate": 1.9995458231954582e-05, "loss": 0.7802, "step": 3433 }, { "epoch": 0.10025984642784153, "grad_norm": 0.7838794431673315, "learning_rate": 1.9994809407948094e-05, "loss": 0.7362, "step": 3434 }, { "epoch": 0.10028904265568889, "grad_norm": 0.6999462859968101, "learning_rate": 1.9994160583941606e-05, "loss": 0.6488, "step": 3435 }, { "epoch": 0.10031823888353625, "grad_norm": 0.7161717998810814, "learning_rate": 1.9993511759935122e-05, "loss": 0.7318, "step": 3436 }, { "epoch": 0.10034743511138361, "grad_norm": 0.730908508221618, "learning_rate": 1.999286293592863e-05, "loss": 0.6954, "step": 3437 }, { "epoch": 0.10037663133923097, "grad_norm": 0.7403420021532089, "learning_rate": 1.9992214111922143e-05, "loss": 0.7086, "step": 3438 }, { "epoch": 0.10040582756707833, "grad_norm": 0.7334227454031929, "learning_rate": 1.9991565287915655e-05, "loss": 0.6936, "step": 3439 }, { "epoch": 0.10043502379492569, "grad_norm": 0.734855368848857, "learning_rate": 1.9990916463909167e-05, "loss": 0.6527, "step": 3440 }, { "epoch": 0.10046422002277305, "grad_norm": 0.7052661899970316, "learning_rate": 1.999026763990268e-05, "loss": 0.6197, "step": 3441 }, { "epoch": 0.10049341625062042, "grad_norm": 0.7261273703605099, "learning_rate": 1.998961881589619e-05, "loss": 0.6711, "step": 3442 }, { "epoch": 0.10052261247846778, "grad_norm": 0.7605336905747391, "learning_rate": 1.9988969991889703e-05, "loss": 0.7428, "step": 3443 }, { "epoch": 0.10055180870631514, "grad_norm": 0.8281848288150745, "learning_rate": 1.9988321167883215e-05, "loss": 0.7921, "step": 3444 }, { "epoch": 0.10058100493416251, "grad_norm": 0.7149669901242761, "learning_rate": 1.9987672343876723e-05, "loss": 0.6467, "step": 3445 }, { "epoch": 0.10061020116200987, "grad_norm": 0.7772085379625472, "learning_rate": 1.9987023519870235e-05, "loss": 0.7216, "step": 3446 }, { "epoch": 0.10063939738985724, "grad_norm": 0.7236371402209842, "learning_rate": 1.9986374695863747e-05, "loss": 0.6648, "step": 3447 }, { "epoch": 0.1006685936177046, "grad_norm": 0.8235584480494169, "learning_rate": 1.998572587185726e-05, "loss": 0.7793, "step": 3448 }, { "epoch": 0.10069778984555196, "grad_norm": 0.7709437284295012, "learning_rate": 1.998507704785077e-05, "loss": 0.6949, "step": 3449 }, { "epoch": 0.10072698607339932, "grad_norm": 0.6865057403077226, "learning_rate": 1.9984428223844284e-05, "loss": 0.6032, "step": 3450 }, { "epoch": 0.10075618230124668, "grad_norm": 0.6953449941184644, "learning_rate": 1.9983779399837796e-05, "loss": 0.6374, "step": 3451 }, { "epoch": 0.10078537852909404, "grad_norm": 0.7550208406048445, "learning_rate": 1.9983130575831308e-05, "loss": 0.701, "step": 3452 }, { "epoch": 0.1008145747569414, "grad_norm": 0.7484762491068419, "learning_rate": 1.998248175182482e-05, "loss": 0.756, "step": 3453 }, { "epoch": 0.10084377098478876, "grad_norm": 0.6985745254001537, "learning_rate": 1.9981832927818328e-05, "loss": 0.6313, "step": 3454 }, { "epoch": 0.10087296721263612, "grad_norm": 0.7553300562024535, "learning_rate": 1.9981184103811844e-05, "loss": 0.709, "step": 3455 }, { "epoch": 0.10090216344048349, "grad_norm": 0.9349299930189938, "learning_rate": 1.9980535279805356e-05, "loss": 0.7158, "step": 3456 }, { "epoch": 0.10093135966833085, "grad_norm": 0.831064683612193, "learning_rate": 1.9979886455798868e-05, "loss": 0.7572, "step": 3457 }, { "epoch": 0.10096055589617821, "grad_norm": 0.7499747215419279, "learning_rate": 1.997923763179238e-05, "loss": 0.7059, "step": 3458 }, { "epoch": 0.10098975212402557, "grad_norm": 0.7493426286087751, "learning_rate": 1.9978588807785892e-05, "loss": 0.6397, "step": 3459 }, { "epoch": 0.10101894835187294, "grad_norm": 0.7768668523016848, "learning_rate": 1.99779399837794e-05, "loss": 0.6689, "step": 3460 }, { "epoch": 0.1010481445797203, "grad_norm": 0.7351398312413138, "learning_rate": 1.9977291159772912e-05, "loss": 0.6986, "step": 3461 }, { "epoch": 0.10107734080756767, "grad_norm": 0.8954024223718644, "learning_rate": 1.9976642335766424e-05, "loss": 0.7708, "step": 3462 }, { "epoch": 0.10110653703541503, "grad_norm": 0.7703057950987411, "learning_rate": 1.9975993511759936e-05, "loss": 0.7069, "step": 3463 }, { "epoch": 0.10113573326326239, "grad_norm": 0.7119601503688691, "learning_rate": 1.997534468775345e-05, "loss": 0.6608, "step": 3464 }, { "epoch": 0.10116492949110975, "grad_norm": 0.7716354295079411, "learning_rate": 1.997469586374696e-05, "loss": 0.7591, "step": 3465 }, { "epoch": 0.10119412571895711, "grad_norm": 0.7112418257919727, "learning_rate": 1.9974047039740473e-05, "loss": 0.6529, "step": 3466 }, { "epoch": 0.10122332194680447, "grad_norm": 0.7890624816004217, "learning_rate": 1.9973398215733985e-05, "loss": 0.6825, "step": 3467 }, { "epoch": 0.10125251817465183, "grad_norm": 0.8962501411592578, "learning_rate": 1.9972749391727497e-05, "loss": 0.7474, "step": 3468 }, { "epoch": 0.1012817144024992, "grad_norm": 0.7970416943460223, "learning_rate": 1.9972100567721005e-05, "loss": 0.7348, "step": 3469 }, { "epoch": 0.10131091063034656, "grad_norm": 0.7726768523951082, "learning_rate": 1.9971451743714517e-05, "loss": 0.7055, "step": 3470 }, { "epoch": 0.10134010685819392, "grad_norm": 0.7523583481130478, "learning_rate": 1.997080291970803e-05, "loss": 0.6694, "step": 3471 }, { "epoch": 0.10136930308604128, "grad_norm": 0.6668225599163612, "learning_rate": 1.997015409570154e-05, "loss": 0.5728, "step": 3472 }, { "epoch": 0.10139849931388864, "grad_norm": 0.7779017997225546, "learning_rate": 1.9969505271695053e-05, "loss": 0.8141, "step": 3473 }, { "epoch": 0.101427695541736, "grad_norm": 0.7467272168401484, "learning_rate": 1.996885644768857e-05, "loss": 0.697, "step": 3474 }, { "epoch": 0.10145689176958338, "grad_norm": 0.758680091386631, "learning_rate": 1.9968207623682077e-05, "loss": 0.7059, "step": 3475 }, { "epoch": 0.10148608799743074, "grad_norm": 0.8351999952013853, "learning_rate": 1.996755879967559e-05, "loss": 0.6635, "step": 3476 }, { "epoch": 0.1015152842252781, "grad_norm": 0.7579293320658161, "learning_rate": 1.99669099756691e-05, "loss": 0.693, "step": 3477 }, { "epoch": 0.10154448045312546, "grad_norm": 0.755400460437788, "learning_rate": 1.9966261151662613e-05, "loss": 0.7644, "step": 3478 }, { "epoch": 0.10157367668097282, "grad_norm": 0.767454467196144, "learning_rate": 1.9965612327656126e-05, "loss": 0.7611, "step": 3479 }, { "epoch": 0.10160287290882018, "grad_norm": 0.7756211467966211, "learning_rate": 1.9964963503649638e-05, "loss": 0.6951, "step": 3480 }, { "epoch": 0.10163206913666754, "grad_norm": 0.7716853590409065, "learning_rate": 1.996431467964315e-05, "loss": 0.6279, "step": 3481 }, { "epoch": 0.1016612653645149, "grad_norm": 0.7242223072563322, "learning_rate": 1.996366585563666e-05, "loss": 0.7002, "step": 3482 }, { "epoch": 0.10169046159236227, "grad_norm": 0.7991053946089945, "learning_rate": 1.996301703163017e-05, "loss": 0.6953, "step": 3483 }, { "epoch": 0.10171965782020963, "grad_norm": 0.7725653942889623, "learning_rate": 1.9962368207623682e-05, "loss": 0.6947, "step": 3484 }, { "epoch": 0.10174885404805699, "grad_norm": 0.7524180451729495, "learning_rate": 1.9961719383617194e-05, "loss": 0.7226, "step": 3485 }, { "epoch": 0.10177805027590435, "grad_norm": 0.7723326645849724, "learning_rate": 1.9961070559610706e-05, "loss": 0.7558, "step": 3486 }, { "epoch": 0.10180724650375171, "grad_norm": 0.7257251395548976, "learning_rate": 1.996042173560422e-05, "loss": 0.6645, "step": 3487 }, { "epoch": 0.10183644273159907, "grad_norm": 0.7679829690923063, "learning_rate": 1.995977291159773e-05, "loss": 0.6452, "step": 3488 }, { "epoch": 0.10186563895944643, "grad_norm": 0.7605244658965755, "learning_rate": 1.9959124087591242e-05, "loss": 0.7028, "step": 3489 }, { "epoch": 0.10189483518729381, "grad_norm": 0.7439880138820356, "learning_rate": 1.9958475263584754e-05, "loss": 0.7141, "step": 3490 }, { "epoch": 0.10192403141514117, "grad_norm": 0.7196934968468904, "learning_rate": 1.9957826439578266e-05, "loss": 0.683, "step": 3491 }, { "epoch": 0.10195322764298853, "grad_norm": 0.6800348149788255, "learning_rate": 1.9957177615571775e-05, "loss": 0.6009, "step": 3492 }, { "epoch": 0.10198242387083589, "grad_norm": 0.696907748703755, "learning_rate": 1.995652879156529e-05, "loss": 0.6671, "step": 3493 }, { "epoch": 0.10201162009868325, "grad_norm": 0.8113167212590674, "learning_rate": 1.9955879967558803e-05, "loss": 0.7345, "step": 3494 }, { "epoch": 0.10204081632653061, "grad_norm": 0.7358893511960234, "learning_rate": 1.9955231143552315e-05, "loss": 0.6627, "step": 3495 }, { "epoch": 0.10207001255437798, "grad_norm": 0.7421848986874148, "learning_rate": 1.9954582319545827e-05, "loss": 0.639, "step": 3496 }, { "epoch": 0.10209920878222534, "grad_norm": 0.8879067063890991, "learning_rate": 1.995393349553934e-05, "loss": 0.7059, "step": 3497 }, { "epoch": 0.1021284050100727, "grad_norm": 0.7308395004862147, "learning_rate": 1.9953284671532847e-05, "loss": 0.6897, "step": 3498 }, { "epoch": 0.10215760123792006, "grad_norm": 0.8925691502071115, "learning_rate": 1.995263584752636e-05, "loss": 0.6649, "step": 3499 }, { "epoch": 0.10218679746576742, "grad_norm": 0.7609248359247066, "learning_rate": 1.995198702351987e-05, "loss": 0.6799, "step": 3500 }, { "epoch": 0.10221599369361478, "grad_norm": 0.7620780269838846, "learning_rate": 1.9951338199513383e-05, "loss": 0.6814, "step": 3501 }, { "epoch": 0.10224518992146214, "grad_norm": 0.6729465732802576, "learning_rate": 1.9950689375506895e-05, "loss": 0.6131, "step": 3502 }, { "epoch": 0.1022743861493095, "grad_norm": 0.7733350966964725, "learning_rate": 1.9950040551500407e-05, "loss": 0.7532, "step": 3503 }, { "epoch": 0.10230358237715687, "grad_norm": 0.8153480496158876, "learning_rate": 1.994939172749392e-05, "loss": 0.7032, "step": 3504 }, { "epoch": 0.10233277860500424, "grad_norm": 0.7109213335032061, "learning_rate": 1.994874290348743e-05, "loss": 0.6142, "step": 3505 }, { "epoch": 0.1023619748328516, "grad_norm": 0.7140380375303385, "learning_rate": 1.9948094079480943e-05, "loss": 0.6963, "step": 3506 }, { "epoch": 0.10239117106069896, "grad_norm": 0.7122898633619664, "learning_rate": 1.9947445255474452e-05, "loss": 0.657, "step": 3507 }, { "epoch": 0.10242036728854632, "grad_norm": 0.7242970177940808, "learning_rate": 1.9946796431467964e-05, "loss": 0.6534, "step": 3508 }, { "epoch": 0.10244956351639369, "grad_norm": 1.194983043253194, "learning_rate": 1.9946147607461476e-05, "loss": 0.732, "step": 3509 }, { "epoch": 0.10247875974424105, "grad_norm": 0.7065860456242463, "learning_rate": 1.9945498783454988e-05, "loss": 0.6271, "step": 3510 }, { "epoch": 0.10250795597208841, "grad_norm": 0.721778899968129, "learning_rate": 1.9944849959448504e-05, "loss": 0.6513, "step": 3511 }, { "epoch": 0.10253715219993577, "grad_norm": 0.6923036428970837, "learning_rate": 1.9944201135442016e-05, "loss": 0.674, "step": 3512 }, { "epoch": 0.10256634842778313, "grad_norm": 0.7402384476861731, "learning_rate": 1.9943552311435524e-05, "loss": 0.6856, "step": 3513 }, { "epoch": 0.10259554465563049, "grad_norm": 0.7200007446199509, "learning_rate": 1.9942903487429036e-05, "loss": 0.6627, "step": 3514 }, { "epoch": 0.10262474088347785, "grad_norm": 0.7215337312202983, "learning_rate": 1.994225466342255e-05, "loss": 0.6788, "step": 3515 }, { "epoch": 0.10265393711132521, "grad_norm": 0.7177293928891071, "learning_rate": 1.994160583941606e-05, "loss": 0.6414, "step": 3516 }, { "epoch": 0.10268313333917257, "grad_norm": 0.6317381235915605, "learning_rate": 1.9940957015409572e-05, "loss": 0.5708, "step": 3517 }, { "epoch": 0.10271232956701994, "grad_norm": 0.8044190276078096, "learning_rate": 1.9940308191403084e-05, "loss": 0.7314, "step": 3518 }, { "epoch": 0.1027415257948673, "grad_norm": 0.7548325811182393, "learning_rate": 1.9939659367396596e-05, "loss": 0.733, "step": 3519 }, { "epoch": 0.10277072202271466, "grad_norm": 0.718903698938721, "learning_rate": 1.993901054339011e-05, "loss": 0.6718, "step": 3520 }, { "epoch": 0.10279991825056203, "grad_norm": 0.7068858324698194, "learning_rate": 1.9938361719383617e-05, "loss": 0.5955, "step": 3521 }, { "epoch": 0.1028291144784094, "grad_norm": 0.7037843007601342, "learning_rate": 1.993771289537713e-05, "loss": 0.6683, "step": 3522 }, { "epoch": 0.10285831070625676, "grad_norm": 0.74441562366324, "learning_rate": 1.993706407137064e-05, "loss": 0.6818, "step": 3523 }, { "epoch": 0.10288750693410412, "grad_norm": 0.7495173838822975, "learning_rate": 1.9936415247364153e-05, "loss": 0.6237, "step": 3524 }, { "epoch": 0.10291670316195148, "grad_norm": 0.7463631281577829, "learning_rate": 1.9935766423357665e-05, "loss": 0.6686, "step": 3525 }, { "epoch": 0.10294589938979884, "grad_norm": 0.7728050905781088, "learning_rate": 1.9935117599351177e-05, "loss": 0.7663, "step": 3526 }, { "epoch": 0.1029750956176462, "grad_norm": 0.6665257307261292, "learning_rate": 1.993446877534469e-05, "loss": 0.5789, "step": 3527 }, { "epoch": 0.10300429184549356, "grad_norm": 0.7935942615845057, "learning_rate": 1.99338199513382e-05, "loss": 0.7318, "step": 3528 }, { "epoch": 0.10303348807334092, "grad_norm": 0.7098661369313222, "learning_rate": 1.9933171127331713e-05, "loss": 0.6784, "step": 3529 }, { "epoch": 0.10306268430118828, "grad_norm": 0.7607877735890348, "learning_rate": 1.9932522303325225e-05, "loss": 0.7461, "step": 3530 }, { "epoch": 0.10309188052903565, "grad_norm": 0.8194678639890485, "learning_rate": 1.9931873479318737e-05, "loss": 0.6531, "step": 3531 }, { "epoch": 0.103121076756883, "grad_norm": 0.6872075399893454, "learning_rate": 1.993122465531225e-05, "loss": 0.663, "step": 3532 }, { "epoch": 0.10315027298473037, "grad_norm": 0.7868259352652054, "learning_rate": 1.993057583130576e-05, "loss": 0.7736, "step": 3533 }, { "epoch": 0.10317946921257773, "grad_norm": 0.7323837636299344, "learning_rate": 1.9929927007299273e-05, "loss": 0.6951, "step": 3534 }, { "epoch": 0.10320866544042509, "grad_norm": 0.7200565642405683, "learning_rate": 1.9929278183292785e-05, "loss": 0.7009, "step": 3535 }, { "epoch": 0.10323786166827247, "grad_norm": 0.70553188151481, "learning_rate": 1.9928629359286294e-05, "loss": 0.6855, "step": 3536 }, { "epoch": 0.10326705789611983, "grad_norm": 0.6755145132697374, "learning_rate": 1.9927980535279806e-05, "loss": 0.6002, "step": 3537 }, { "epoch": 0.10329625412396719, "grad_norm": 0.7115247456847671, "learning_rate": 1.9927331711273318e-05, "loss": 0.6422, "step": 3538 }, { "epoch": 0.10332545035181455, "grad_norm": 0.7575572415075628, "learning_rate": 1.992668288726683e-05, "loss": 0.7064, "step": 3539 }, { "epoch": 0.10335464657966191, "grad_norm": 0.8423101610005274, "learning_rate": 1.9926034063260342e-05, "loss": 0.6512, "step": 3540 }, { "epoch": 0.10338384280750927, "grad_norm": 0.803232778891394, "learning_rate": 1.9925385239253854e-05, "loss": 0.7546, "step": 3541 }, { "epoch": 0.10341303903535663, "grad_norm": 0.7226320662713556, "learning_rate": 1.9924736415247366e-05, "loss": 0.6792, "step": 3542 }, { "epoch": 0.103442235263204, "grad_norm": 0.6923940356162612, "learning_rate": 1.992408759124088e-05, "loss": 0.6394, "step": 3543 }, { "epoch": 0.10347143149105135, "grad_norm": 0.7927523024631187, "learning_rate": 1.992343876723439e-05, "loss": 0.7211, "step": 3544 }, { "epoch": 0.10350062771889872, "grad_norm": 0.7414205578034179, "learning_rate": 1.99227899432279e-05, "loss": 0.7269, "step": 3545 }, { "epoch": 0.10352982394674608, "grad_norm": 0.764068538331511, "learning_rate": 1.992214111922141e-05, "loss": 0.717, "step": 3546 }, { "epoch": 0.10355902017459344, "grad_norm": 0.731348764086581, "learning_rate": 1.9921492295214923e-05, "loss": 0.7192, "step": 3547 }, { "epoch": 0.1035882164024408, "grad_norm": 0.7670358796333587, "learning_rate": 1.9920843471208435e-05, "loss": 0.7559, "step": 3548 }, { "epoch": 0.10361741263028816, "grad_norm": 0.9336630910839786, "learning_rate": 1.992019464720195e-05, "loss": 0.751, "step": 3549 }, { "epoch": 0.10364660885813552, "grad_norm": 0.6217507998309015, "learning_rate": 1.991954582319546e-05, "loss": 0.5338, "step": 3550 }, { "epoch": 0.1036758050859829, "grad_norm": 0.8010936704723305, "learning_rate": 1.991889699918897e-05, "loss": 0.682, "step": 3551 }, { "epoch": 0.10370500131383026, "grad_norm": 0.8191213579576665, "learning_rate": 1.9918248175182483e-05, "loss": 0.7737, "step": 3552 }, { "epoch": 0.10373419754167762, "grad_norm": 0.7829393243797605, "learning_rate": 1.9917599351175995e-05, "loss": 0.7745, "step": 3553 }, { "epoch": 0.10376339376952498, "grad_norm": 0.715800153081431, "learning_rate": 1.9916950527169507e-05, "loss": 0.6283, "step": 3554 }, { "epoch": 0.10379258999737234, "grad_norm": 1.0121831498456875, "learning_rate": 1.991630170316302e-05, "loss": 0.7297, "step": 3555 }, { "epoch": 0.1038217862252197, "grad_norm": 0.7194802004768635, "learning_rate": 1.991565287915653e-05, "loss": 0.6631, "step": 3556 }, { "epoch": 0.10385098245306706, "grad_norm": 0.6945136454145391, "learning_rate": 1.9915004055150043e-05, "loss": 0.6515, "step": 3557 }, { "epoch": 0.10388017868091443, "grad_norm": 0.8117877579945229, "learning_rate": 1.9914355231143555e-05, "loss": 0.7216, "step": 3558 }, { "epoch": 0.10390937490876179, "grad_norm": 0.7218181899020609, "learning_rate": 1.9913706407137064e-05, "loss": 0.7019, "step": 3559 }, { "epoch": 0.10393857113660915, "grad_norm": 0.7142894784090441, "learning_rate": 1.9913057583130576e-05, "loss": 0.6974, "step": 3560 }, { "epoch": 0.10396776736445651, "grad_norm": 0.7836228907778982, "learning_rate": 1.9912408759124088e-05, "loss": 0.7072, "step": 3561 }, { "epoch": 0.10399696359230387, "grad_norm": 0.7349408365452669, "learning_rate": 1.99117599351176e-05, "loss": 0.6872, "step": 3562 }, { "epoch": 0.10402615982015123, "grad_norm": 0.7387424230744271, "learning_rate": 1.9911111111111112e-05, "loss": 0.7152, "step": 3563 }, { "epoch": 0.10405535604799859, "grad_norm": 0.9095042054154633, "learning_rate": 1.9910462287104624e-05, "loss": 0.702, "step": 3564 }, { "epoch": 0.10408455227584595, "grad_norm": 0.7365016630825468, "learning_rate": 1.9909813463098136e-05, "loss": 0.7046, "step": 3565 }, { "epoch": 0.10411374850369333, "grad_norm": 0.7704924821706706, "learning_rate": 1.9909164639091648e-05, "loss": 0.714, "step": 3566 }, { "epoch": 0.10414294473154069, "grad_norm": 0.6880840202721549, "learning_rate": 1.990851581508516e-05, "loss": 0.6087, "step": 3567 }, { "epoch": 0.10417214095938805, "grad_norm": 0.6873140395236625, "learning_rate": 1.9907866991078672e-05, "loss": 0.6064, "step": 3568 }, { "epoch": 0.10420133718723541, "grad_norm": 0.827487626692842, "learning_rate": 1.9907218167072184e-05, "loss": 0.6508, "step": 3569 }, { "epoch": 0.10423053341508277, "grad_norm": 0.7346719898172746, "learning_rate": 1.9906569343065696e-05, "loss": 0.6586, "step": 3570 }, { "epoch": 0.10425972964293014, "grad_norm": 0.7094867843413069, "learning_rate": 1.990592051905921e-05, "loss": 0.6116, "step": 3571 }, { "epoch": 0.1042889258707775, "grad_norm": 0.7283643020820973, "learning_rate": 1.990527169505272e-05, "loss": 0.6151, "step": 3572 }, { "epoch": 0.10431812209862486, "grad_norm": 0.6943310026932574, "learning_rate": 1.9904622871046232e-05, "loss": 0.6426, "step": 3573 }, { "epoch": 0.10434731832647222, "grad_norm": 0.6853990536312654, "learning_rate": 1.990397404703974e-05, "loss": 0.6942, "step": 3574 }, { "epoch": 0.10437651455431958, "grad_norm": 0.6766711442565101, "learning_rate": 1.9903325223033253e-05, "loss": 0.6218, "step": 3575 }, { "epoch": 0.10440571078216694, "grad_norm": 0.850103902558641, "learning_rate": 1.9902676399026765e-05, "loss": 0.6651, "step": 3576 }, { "epoch": 0.1044349070100143, "grad_norm": 0.7362862643410258, "learning_rate": 1.9902027575020277e-05, "loss": 0.6841, "step": 3577 }, { "epoch": 0.10446410323786166, "grad_norm": 0.7257623795577061, "learning_rate": 1.990137875101379e-05, "loss": 0.6697, "step": 3578 }, { "epoch": 0.10449329946570902, "grad_norm": 0.6822259991088169, "learning_rate": 1.99007299270073e-05, "loss": 0.6102, "step": 3579 }, { "epoch": 0.10452249569355639, "grad_norm": 0.7394681599064771, "learning_rate": 1.9900081103000813e-05, "loss": 0.6275, "step": 3580 }, { "epoch": 0.10455169192140376, "grad_norm": 0.6796879482771371, "learning_rate": 1.9899432278994325e-05, "loss": 0.65, "step": 3581 }, { "epoch": 0.10458088814925112, "grad_norm": 0.7207934526858876, "learning_rate": 1.9898783454987837e-05, "loss": 0.6245, "step": 3582 }, { "epoch": 0.10461008437709848, "grad_norm": 0.7546110499588403, "learning_rate": 1.9898134630981346e-05, "loss": 0.7169, "step": 3583 }, { "epoch": 0.10463928060494584, "grad_norm": 0.8473105088037611, "learning_rate": 1.9897485806974858e-05, "loss": 0.7534, "step": 3584 }, { "epoch": 0.1046684768327932, "grad_norm": 0.7126233836842079, "learning_rate": 1.989683698296837e-05, "loss": 0.6471, "step": 3585 }, { "epoch": 0.10469767306064057, "grad_norm": 0.7120888249837455, "learning_rate": 1.9896188158961882e-05, "loss": 0.6745, "step": 3586 }, { "epoch": 0.10472686928848793, "grad_norm": 0.7115939556607481, "learning_rate": 1.9895539334955397e-05, "loss": 0.6514, "step": 3587 }, { "epoch": 0.10475606551633529, "grad_norm": 0.847308524535381, "learning_rate": 1.9894890510948906e-05, "loss": 0.7099, "step": 3588 }, { "epoch": 0.10478526174418265, "grad_norm": 0.8879169862814055, "learning_rate": 1.9894241686942418e-05, "loss": 0.7282, "step": 3589 }, { "epoch": 0.10481445797203001, "grad_norm": 0.7500018814216367, "learning_rate": 1.989359286293593e-05, "loss": 0.704, "step": 3590 }, { "epoch": 0.10484365419987737, "grad_norm": 0.6525350132068856, "learning_rate": 1.9892944038929442e-05, "loss": 0.5859, "step": 3591 }, { "epoch": 0.10487285042772473, "grad_norm": 0.6958667648186547, "learning_rate": 1.9892295214922954e-05, "loss": 0.6131, "step": 3592 }, { "epoch": 0.1049020466555721, "grad_norm": 0.7422383526207718, "learning_rate": 1.9891646390916466e-05, "loss": 0.7299, "step": 3593 }, { "epoch": 0.10493124288341946, "grad_norm": 0.7848860781669471, "learning_rate": 1.9890997566909978e-05, "loss": 0.7389, "step": 3594 }, { "epoch": 0.10496043911126682, "grad_norm": 0.8762911823144832, "learning_rate": 1.989034874290349e-05, "loss": 0.7221, "step": 3595 }, { "epoch": 0.10498963533911419, "grad_norm": 0.6848795995100767, "learning_rate": 1.9889699918897002e-05, "loss": 0.6222, "step": 3596 }, { "epoch": 0.10501883156696155, "grad_norm": 0.7678233995669338, "learning_rate": 1.988905109489051e-05, "loss": 0.7498, "step": 3597 }, { "epoch": 0.10504802779480892, "grad_norm": 0.7822045840962384, "learning_rate": 1.9888402270884023e-05, "loss": 0.812, "step": 3598 }, { "epoch": 0.10507722402265628, "grad_norm": 0.7105296890567367, "learning_rate": 1.9887753446877535e-05, "loss": 0.6417, "step": 3599 }, { "epoch": 0.10510642025050364, "grad_norm": 0.6790922829774413, "learning_rate": 1.9887104622871047e-05, "loss": 0.6569, "step": 3600 }, { "epoch": 0.105135616478351, "grad_norm": 0.6960233692083102, "learning_rate": 1.988645579886456e-05, "loss": 0.6042, "step": 3601 }, { "epoch": 0.10516481270619836, "grad_norm": 0.7863057235015224, "learning_rate": 1.988580697485807e-05, "loss": 0.6978, "step": 3602 }, { "epoch": 0.10519400893404572, "grad_norm": 0.6881452879724791, "learning_rate": 1.9885158150851583e-05, "loss": 0.5946, "step": 3603 }, { "epoch": 0.10522320516189308, "grad_norm": 0.7389903595605917, "learning_rate": 1.9884509326845095e-05, "loss": 0.6971, "step": 3604 }, { "epoch": 0.10525240138974044, "grad_norm": 0.7112414383515432, "learning_rate": 1.9883860502838607e-05, "loss": 0.606, "step": 3605 }, { "epoch": 0.1052815976175878, "grad_norm": 0.7543129157948766, "learning_rate": 1.988321167883212e-05, "loss": 0.7, "step": 3606 }, { "epoch": 0.10531079384543517, "grad_norm": 0.6674263187626062, "learning_rate": 1.988256285482563e-05, "loss": 0.6656, "step": 3607 }, { "epoch": 0.10533999007328253, "grad_norm": 0.7473265371637609, "learning_rate": 1.9881914030819143e-05, "loss": 0.7183, "step": 3608 }, { "epoch": 0.10536918630112989, "grad_norm": 0.8096144726024777, "learning_rate": 1.9881265206812655e-05, "loss": 0.8092, "step": 3609 }, { "epoch": 0.10539838252897725, "grad_norm": 0.7394038299561944, "learning_rate": 1.9880616382806167e-05, "loss": 0.7288, "step": 3610 }, { "epoch": 0.10542757875682462, "grad_norm": 0.7446929518112559, "learning_rate": 1.987996755879968e-05, "loss": 0.6376, "step": 3611 }, { "epoch": 0.10545677498467199, "grad_norm": 0.7291607205159477, "learning_rate": 1.9879318734793188e-05, "loss": 0.6699, "step": 3612 }, { "epoch": 0.10548597121251935, "grad_norm": 0.7518703863982485, "learning_rate": 1.98786699107867e-05, "loss": 0.745, "step": 3613 }, { "epoch": 0.10551516744036671, "grad_norm": 0.7206791345918041, "learning_rate": 1.9878021086780212e-05, "loss": 0.7008, "step": 3614 }, { "epoch": 0.10554436366821407, "grad_norm": 0.7413931936096875, "learning_rate": 1.9877372262773724e-05, "loss": 0.7121, "step": 3615 }, { "epoch": 0.10557355989606143, "grad_norm": 0.696165614807569, "learning_rate": 1.9876723438767236e-05, "loss": 0.6341, "step": 3616 }, { "epoch": 0.10560275612390879, "grad_norm": 0.7497109202237193, "learning_rate": 1.9876074614760748e-05, "loss": 0.7359, "step": 3617 }, { "epoch": 0.10563195235175615, "grad_norm": 0.6933108190714841, "learning_rate": 1.987542579075426e-05, "loss": 0.5149, "step": 3618 }, { "epoch": 0.10566114857960351, "grad_norm": 0.7984366894314938, "learning_rate": 1.9874776966747772e-05, "loss": 0.7411, "step": 3619 }, { "epoch": 0.10569034480745088, "grad_norm": 0.8861176175284371, "learning_rate": 1.9874128142741284e-05, "loss": 0.6363, "step": 3620 }, { "epoch": 0.10571954103529824, "grad_norm": 0.7542812855653357, "learning_rate": 1.9873479318734793e-05, "loss": 0.7385, "step": 3621 }, { "epoch": 0.1057487372631456, "grad_norm": 0.8012956640723339, "learning_rate": 1.9872830494728305e-05, "loss": 0.7052, "step": 3622 }, { "epoch": 0.10577793349099296, "grad_norm": 0.7149598025795454, "learning_rate": 1.9872181670721817e-05, "loss": 0.7115, "step": 3623 }, { "epoch": 0.10580712971884032, "grad_norm": 0.7703094637676791, "learning_rate": 1.987153284671533e-05, "loss": 0.7519, "step": 3624 }, { "epoch": 0.10583632594668768, "grad_norm": 1.101141581557159, "learning_rate": 1.9870884022708844e-05, "loss": 0.6148, "step": 3625 }, { "epoch": 0.10586552217453506, "grad_norm": 0.661894838834986, "learning_rate": 1.9870235198702353e-05, "loss": 0.6023, "step": 3626 }, { "epoch": 0.10589471840238242, "grad_norm": 0.7381688270853767, "learning_rate": 1.9869586374695865e-05, "loss": 0.6746, "step": 3627 }, { "epoch": 0.10592391463022978, "grad_norm": 0.8021325387770376, "learning_rate": 1.9868937550689377e-05, "loss": 0.7933, "step": 3628 }, { "epoch": 0.10595311085807714, "grad_norm": 0.7499390552426515, "learning_rate": 1.986828872668289e-05, "loss": 0.7265, "step": 3629 }, { "epoch": 0.1059823070859245, "grad_norm": 0.6763965247468389, "learning_rate": 1.98676399026764e-05, "loss": 0.5754, "step": 3630 }, { "epoch": 0.10601150331377186, "grad_norm": 0.7137543816512008, "learning_rate": 1.9866991078669913e-05, "loss": 0.7115, "step": 3631 }, { "epoch": 0.10604069954161922, "grad_norm": 0.7725664082844078, "learning_rate": 1.9866342254663425e-05, "loss": 0.7698, "step": 3632 }, { "epoch": 0.10606989576946659, "grad_norm": 0.6835364445683657, "learning_rate": 1.9865693430656937e-05, "loss": 0.5669, "step": 3633 }, { "epoch": 0.10609909199731395, "grad_norm": 1.0041256896255448, "learning_rate": 1.986504460665045e-05, "loss": 0.7036, "step": 3634 }, { "epoch": 0.10612828822516131, "grad_norm": 0.706920446381124, "learning_rate": 1.9864395782643958e-05, "loss": 0.6601, "step": 3635 }, { "epoch": 0.10615748445300867, "grad_norm": 0.722173840953068, "learning_rate": 1.986374695863747e-05, "loss": 0.6909, "step": 3636 }, { "epoch": 0.10618668068085603, "grad_norm": 1.0349789228497515, "learning_rate": 1.9863098134630982e-05, "loss": 0.7442, "step": 3637 }, { "epoch": 0.10621587690870339, "grad_norm": 0.6729020593380172, "learning_rate": 1.9862449310624494e-05, "loss": 0.6074, "step": 3638 }, { "epoch": 0.10624507313655075, "grad_norm": 0.7126387456597593, "learning_rate": 1.9861800486618006e-05, "loss": 0.6362, "step": 3639 }, { "epoch": 0.10627426936439811, "grad_norm": 0.6894131634406117, "learning_rate": 1.9861151662611518e-05, "loss": 0.6609, "step": 3640 }, { "epoch": 0.10630346559224549, "grad_norm": 0.7471629925482629, "learning_rate": 1.986050283860503e-05, "loss": 0.6703, "step": 3641 }, { "epoch": 0.10633266182009285, "grad_norm": 0.7426589126184056, "learning_rate": 1.9859854014598542e-05, "loss": 0.6993, "step": 3642 }, { "epoch": 0.10636185804794021, "grad_norm": 0.7392763502926383, "learning_rate": 1.9859205190592054e-05, "loss": 0.6837, "step": 3643 }, { "epoch": 0.10639105427578757, "grad_norm": 0.7297752650918164, "learning_rate": 1.9858556366585566e-05, "loss": 0.6355, "step": 3644 }, { "epoch": 0.10642025050363493, "grad_norm": 0.7887298909179588, "learning_rate": 1.9857907542579078e-05, "loss": 0.7477, "step": 3645 }, { "epoch": 0.1064494467314823, "grad_norm": 0.7568200812972158, "learning_rate": 1.985725871857259e-05, "loss": 0.7131, "step": 3646 }, { "epoch": 0.10647864295932966, "grad_norm": 0.7242618910029781, "learning_rate": 1.9856609894566102e-05, "loss": 0.5975, "step": 3647 }, { "epoch": 0.10650783918717702, "grad_norm": 0.7086990475445569, "learning_rate": 1.9855961070559614e-05, "loss": 0.6232, "step": 3648 }, { "epoch": 0.10653703541502438, "grad_norm": 0.7721492315634488, "learning_rate": 1.9855312246553126e-05, "loss": 0.7408, "step": 3649 }, { "epoch": 0.10656623164287174, "grad_norm": 0.8139569484137834, "learning_rate": 1.9854663422546635e-05, "loss": 0.7605, "step": 3650 }, { "epoch": 0.1065954278707191, "grad_norm": 0.7630332085603976, "learning_rate": 1.9854014598540147e-05, "loss": 0.7298, "step": 3651 }, { "epoch": 0.10662462409856646, "grad_norm": 0.9382503606601866, "learning_rate": 1.985336577453366e-05, "loss": 0.7597, "step": 3652 }, { "epoch": 0.10665382032641382, "grad_norm": 0.7339012383056701, "learning_rate": 1.985271695052717e-05, "loss": 0.7327, "step": 3653 }, { "epoch": 0.10668301655426118, "grad_norm": 0.7205245998440172, "learning_rate": 1.9852068126520683e-05, "loss": 0.5951, "step": 3654 }, { "epoch": 0.10671221278210855, "grad_norm": 0.8083142131807252, "learning_rate": 1.9851419302514195e-05, "loss": 0.7595, "step": 3655 }, { "epoch": 0.10674140900995592, "grad_norm": 0.7224611564834745, "learning_rate": 1.9850770478507707e-05, "loss": 0.6477, "step": 3656 }, { "epoch": 0.10677060523780328, "grad_norm": 0.7020093681812649, "learning_rate": 1.985012165450122e-05, "loss": 0.6351, "step": 3657 }, { "epoch": 0.10679980146565064, "grad_norm": 0.7585749739378514, "learning_rate": 1.984947283049473e-05, "loss": 0.7395, "step": 3658 }, { "epoch": 0.106828997693498, "grad_norm": 0.7157719727144478, "learning_rate": 1.984882400648824e-05, "loss": 0.6809, "step": 3659 }, { "epoch": 0.10685819392134537, "grad_norm": 0.6649558933165243, "learning_rate": 1.984817518248175e-05, "loss": 0.6328, "step": 3660 }, { "epoch": 0.10688739014919273, "grad_norm": 0.8095777931869927, "learning_rate": 1.9847526358475264e-05, "loss": 0.7022, "step": 3661 }, { "epoch": 0.10691658637704009, "grad_norm": 0.7120205759833358, "learning_rate": 1.984687753446878e-05, "loss": 0.6785, "step": 3662 }, { "epoch": 0.10694578260488745, "grad_norm": 0.7206546311188499, "learning_rate": 1.984622871046229e-05, "loss": 0.6594, "step": 3663 }, { "epoch": 0.10697497883273481, "grad_norm": 0.7344360286400647, "learning_rate": 1.98455798864558e-05, "loss": 0.6298, "step": 3664 }, { "epoch": 0.10700417506058217, "grad_norm": 0.7570944187921538, "learning_rate": 1.9844931062449312e-05, "loss": 0.6719, "step": 3665 }, { "epoch": 0.10703337128842953, "grad_norm": 0.6936619758752424, "learning_rate": 1.9844282238442824e-05, "loss": 0.6441, "step": 3666 }, { "epoch": 0.1070625675162769, "grad_norm": 0.7023731297496371, "learning_rate": 1.9843633414436336e-05, "loss": 0.5767, "step": 3667 }, { "epoch": 0.10709176374412425, "grad_norm": 0.7113455726575446, "learning_rate": 1.9842984590429848e-05, "loss": 0.6454, "step": 3668 }, { "epoch": 0.10712095997197162, "grad_norm": 0.8249056350737751, "learning_rate": 1.984233576642336e-05, "loss": 0.8082, "step": 3669 }, { "epoch": 0.10715015619981898, "grad_norm": 0.9372287307274768, "learning_rate": 1.9841686942416872e-05, "loss": 0.7211, "step": 3670 }, { "epoch": 0.10717935242766635, "grad_norm": 0.842296646347593, "learning_rate": 1.9841038118410384e-05, "loss": 0.6925, "step": 3671 }, { "epoch": 0.10720854865551371, "grad_norm": 0.7034550399990593, "learning_rate": 1.9840389294403896e-05, "loss": 0.6536, "step": 3672 }, { "epoch": 0.10723774488336107, "grad_norm": 0.6681656097008438, "learning_rate": 1.9839740470397405e-05, "loss": 0.565, "step": 3673 }, { "epoch": 0.10726694111120844, "grad_norm": 0.7115683944989176, "learning_rate": 1.9839091646390917e-05, "loss": 0.6795, "step": 3674 }, { "epoch": 0.1072961373390558, "grad_norm": 0.672865726948339, "learning_rate": 1.983844282238443e-05, "loss": 0.5997, "step": 3675 }, { "epoch": 0.10732533356690316, "grad_norm": 0.7317077431160647, "learning_rate": 1.983779399837794e-05, "loss": 0.6865, "step": 3676 }, { "epoch": 0.10735452979475052, "grad_norm": 0.773682183451828, "learning_rate": 1.9837145174371453e-05, "loss": 0.6869, "step": 3677 }, { "epoch": 0.10738372602259788, "grad_norm": 0.7663104334939527, "learning_rate": 1.9836496350364965e-05, "loss": 0.7094, "step": 3678 }, { "epoch": 0.10741292225044524, "grad_norm": 0.6593857570289421, "learning_rate": 1.9835847526358477e-05, "loss": 0.6306, "step": 3679 }, { "epoch": 0.1074421184782926, "grad_norm": 0.770406204969435, "learning_rate": 1.983519870235199e-05, "loss": 0.7534, "step": 3680 }, { "epoch": 0.10747131470613996, "grad_norm": 0.670961743528233, "learning_rate": 1.98345498783455e-05, "loss": 0.6292, "step": 3681 }, { "epoch": 0.10750051093398733, "grad_norm": 0.7807131656303016, "learning_rate": 1.9833901054339013e-05, "loss": 0.7754, "step": 3682 }, { "epoch": 0.10752970716183469, "grad_norm": 0.6770816233341025, "learning_rate": 1.9833252230332525e-05, "loss": 0.5829, "step": 3683 }, { "epoch": 0.10755890338968205, "grad_norm": 0.7317367452287722, "learning_rate": 1.9832603406326037e-05, "loss": 0.71, "step": 3684 }, { "epoch": 0.10758809961752941, "grad_norm": 0.6525399741493362, "learning_rate": 1.983195458231955e-05, "loss": 0.5619, "step": 3685 }, { "epoch": 0.10761729584537678, "grad_norm": 0.7343560415782744, "learning_rate": 1.983130575831306e-05, "loss": 0.6348, "step": 3686 }, { "epoch": 0.10764649207322415, "grad_norm": 0.6854001856054278, "learning_rate": 1.9830656934306573e-05, "loss": 0.5556, "step": 3687 }, { "epoch": 0.1076756883010715, "grad_norm": 0.7173728754696042, "learning_rate": 1.983000811030008e-05, "loss": 0.6142, "step": 3688 }, { "epoch": 0.10770488452891887, "grad_norm": 0.7790625801447076, "learning_rate": 1.9829359286293594e-05, "loss": 0.7369, "step": 3689 }, { "epoch": 0.10773408075676623, "grad_norm": 0.8832596234512641, "learning_rate": 1.9828710462287106e-05, "loss": 0.8275, "step": 3690 }, { "epoch": 0.10776327698461359, "grad_norm": 0.964305386200952, "learning_rate": 1.9828061638280618e-05, "loss": 0.7614, "step": 3691 }, { "epoch": 0.10779247321246095, "grad_norm": 0.6799452983514771, "learning_rate": 1.982741281427413e-05, "loss": 0.5668, "step": 3692 }, { "epoch": 0.10782166944030831, "grad_norm": 0.7548582326186304, "learning_rate": 1.9826763990267642e-05, "loss": 0.7183, "step": 3693 }, { "epoch": 0.10785086566815567, "grad_norm": 0.7148564767437773, "learning_rate": 1.9826115166261154e-05, "loss": 0.6439, "step": 3694 }, { "epoch": 0.10788006189600303, "grad_norm": 0.6721039097395795, "learning_rate": 1.9825466342254666e-05, "loss": 0.5955, "step": 3695 }, { "epoch": 0.1079092581238504, "grad_norm": 0.8082090430738222, "learning_rate": 1.9824817518248174e-05, "loss": 0.8048, "step": 3696 }, { "epoch": 0.10793845435169776, "grad_norm": 0.7015233548014327, "learning_rate": 1.9824168694241686e-05, "loss": 0.6238, "step": 3697 }, { "epoch": 0.10796765057954512, "grad_norm": 0.7067600302648204, "learning_rate": 1.98235198702352e-05, "loss": 0.6615, "step": 3698 }, { "epoch": 0.10799684680739248, "grad_norm": 0.6892856150537867, "learning_rate": 1.982287104622871e-05, "loss": 0.6293, "step": 3699 }, { "epoch": 0.10802604303523984, "grad_norm": 0.6738447089197059, "learning_rate": 1.9822222222222226e-05, "loss": 0.5697, "step": 3700 }, { "epoch": 0.10805523926308722, "grad_norm": 0.7365394222573439, "learning_rate": 1.9821573398215738e-05, "loss": 0.7158, "step": 3701 }, { "epoch": 0.10808443549093458, "grad_norm": 0.7172883079826775, "learning_rate": 1.9820924574209247e-05, "loss": 0.6426, "step": 3702 }, { "epoch": 0.10811363171878194, "grad_norm": 0.7370825116461361, "learning_rate": 1.982027575020276e-05, "loss": 0.7259, "step": 3703 }, { "epoch": 0.1081428279466293, "grad_norm": 0.7270769703035438, "learning_rate": 1.981962692619627e-05, "loss": 0.6526, "step": 3704 }, { "epoch": 0.10817202417447666, "grad_norm": 0.6777116624298238, "learning_rate": 1.9818978102189783e-05, "loss": 0.6424, "step": 3705 }, { "epoch": 0.10820122040232402, "grad_norm": 0.7074212129311295, "learning_rate": 1.9818329278183295e-05, "loss": 0.6594, "step": 3706 }, { "epoch": 0.10823041663017138, "grad_norm": 0.7348613371446525, "learning_rate": 1.9817680454176807e-05, "loss": 0.6903, "step": 3707 }, { "epoch": 0.10825961285801874, "grad_norm": 0.6655140156877207, "learning_rate": 1.981703163017032e-05, "loss": 0.5606, "step": 3708 }, { "epoch": 0.1082888090858661, "grad_norm": 0.7794474074726585, "learning_rate": 1.981638280616383e-05, "loss": 0.7206, "step": 3709 }, { "epoch": 0.10831800531371347, "grad_norm": 0.6902894519206818, "learning_rate": 1.9815733982157343e-05, "loss": 0.5977, "step": 3710 }, { "epoch": 0.10834720154156083, "grad_norm": 0.6719979822497449, "learning_rate": 1.981508515815085e-05, "loss": 0.6151, "step": 3711 }, { "epoch": 0.10837639776940819, "grad_norm": 0.7524500431809559, "learning_rate": 1.9814436334144363e-05, "loss": 0.7146, "step": 3712 }, { "epoch": 0.10840559399725555, "grad_norm": 0.7324790185034689, "learning_rate": 1.9813787510137876e-05, "loss": 0.736, "step": 3713 }, { "epoch": 0.10843479022510291, "grad_norm": 0.889388330199259, "learning_rate": 1.9813138686131388e-05, "loss": 0.6785, "step": 3714 }, { "epoch": 0.10846398645295027, "grad_norm": 0.8634341884931467, "learning_rate": 1.98124898621249e-05, "loss": 0.7345, "step": 3715 }, { "epoch": 0.10849318268079763, "grad_norm": 0.7625117648929065, "learning_rate": 1.981184103811841e-05, "loss": 0.6416, "step": 3716 }, { "epoch": 0.10852237890864501, "grad_norm": 0.7445562748848625, "learning_rate": 1.9811192214111924e-05, "loss": 0.6326, "step": 3717 }, { "epoch": 0.10855157513649237, "grad_norm": 0.7441036657458643, "learning_rate": 1.9810543390105436e-05, "loss": 0.6036, "step": 3718 }, { "epoch": 0.10858077136433973, "grad_norm": 0.7606408777980265, "learning_rate": 1.9809894566098948e-05, "loss": 0.7311, "step": 3719 }, { "epoch": 0.10860996759218709, "grad_norm": 0.7583009484038137, "learning_rate": 1.980924574209246e-05, "loss": 0.7509, "step": 3720 }, { "epoch": 0.10863916382003445, "grad_norm": 0.758346542445605, "learning_rate": 1.9808596918085972e-05, "loss": 0.7148, "step": 3721 }, { "epoch": 0.10866836004788182, "grad_norm": 0.7988224909311359, "learning_rate": 1.9807948094079484e-05, "loss": 0.7229, "step": 3722 }, { "epoch": 0.10869755627572918, "grad_norm": 0.6891651807571936, "learning_rate": 1.9807299270072996e-05, "loss": 0.6051, "step": 3723 }, { "epoch": 0.10872675250357654, "grad_norm": 0.6870327088384192, "learning_rate": 1.9806650446066508e-05, "loss": 0.6757, "step": 3724 }, { "epoch": 0.1087559487314239, "grad_norm": 0.7550638032637724, "learning_rate": 1.980600162206002e-05, "loss": 0.7032, "step": 3725 }, { "epoch": 0.10878514495927126, "grad_norm": 0.7226681309193481, "learning_rate": 1.980535279805353e-05, "loss": 0.6816, "step": 3726 }, { "epoch": 0.10881434118711862, "grad_norm": 0.7749131669438223, "learning_rate": 1.980470397404704e-05, "loss": 0.6933, "step": 3727 }, { "epoch": 0.10884353741496598, "grad_norm": 0.77738114430152, "learning_rate": 1.9804055150040553e-05, "loss": 0.7501, "step": 3728 }, { "epoch": 0.10887273364281334, "grad_norm": 0.7458950571271364, "learning_rate": 1.9803406326034065e-05, "loss": 0.6762, "step": 3729 }, { "epoch": 0.1089019298706607, "grad_norm": 0.762815705834172, "learning_rate": 1.9802757502027577e-05, "loss": 0.7175, "step": 3730 }, { "epoch": 0.10893112609850807, "grad_norm": 0.7764909448259428, "learning_rate": 1.980210867802109e-05, "loss": 0.7582, "step": 3731 }, { "epoch": 0.10896032232635544, "grad_norm": 0.6883727939986953, "learning_rate": 1.98014598540146e-05, "loss": 0.6382, "step": 3732 }, { "epoch": 0.1089895185542028, "grad_norm": 0.6795842951479627, "learning_rate": 1.9800811030008113e-05, "loss": 0.6308, "step": 3733 }, { "epoch": 0.10901871478205016, "grad_norm": 0.9792759278576636, "learning_rate": 1.980016220600162e-05, "loss": 0.714, "step": 3734 }, { "epoch": 0.10904791100989752, "grad_norm": 0.8114908033083186, "learning_rate": 1.9799513381995133e-05, "loss": 0.7932, "step": 3735 }, { "epoch": 0.10907710723774489, "grad_norm": 0.7329936370556319, "learning_rate": 1.9798864557988645e-05, "loss": 0.6887, "step": 3736 }, { "epoch": 0.10910630346559225, "grad_norm": 0.796942090163429, "learning_rate": 1.9798215733982157e-05, "loss": 0.7451, "step": 3737 }, { "epoch": 0.10913549969343961, "grad_norm": 0.8484060444479377, "learning_rate": 1.9797566909975673e-05, "loss": 0.7453, "step": 3738 }, { "epoch": 0.10916469592128697, "grad_norm": 0.7402059173553613, "learning_rate": 1.9796918085969185e-05, "loss": 0.6713, "step": 3739 }, { "epoch": 0.10919389214913433, "grad_norm": 0.6690682603931142, "learning_rate": 1.9796269261962693e-05, "loss": 0.5824, "step": 3740 }, { "epoch": 0.10922308837698169, "grad_norm": 0.7652679544879358, "learning_rate": 1.9795620437956205e-05, "loss": 0.7359, "step": 3741 }, { "epoch": 0.10925228460482905, "grad_norm": 0.6381145645906363, "learning_rate": 1.9794971613949718e-05, "loss": 0.5604, "step": 3742 }, { "epoch": 0.10928148083267641, "grad_norm": 0.7466489747326733, "learning_rate": 1.979432278994323e-05, "loss": 0.638, "step": 3743 }, { "epoch": 0.10931067706052378, "grad_norm": 0.7551371843412045, "learning_rate": 1.979367396593674e-05, "loss": 0.6747, "step": 3744 }, { "epoch": 0.10933987328837114, "grad_norm": 0.8419801245061197, "learning_rate": 1.9793025141930254e-05, "loss": 0.839, "step": 3745 }, { "epoch": 0.1093690695162185, "grad_norm": 0.8150924411063962, "learning_rate": 1.9792376317923766e-05, "loss": 0.7607, "step": 3746 }, { "epoch": 0.10939826574406587, "grad_norm": 0.7266574866242649, "learning_rate": 1.9791727493917278e-05, "loss": 0.6963, "step": 3747 }, { "epoch": 0.10942746197191323, "grad_norm": 0.8354648318034001, "learning_rate": 1.979107866991079e-05, "loss": 0.8041, "step": 3748 }, { "epoch": 0.1094566581997606, "grad_norm": 0.8444485691156898, "learning_rate": 1.97904298459043e-05, "loss": 0.7515, "step": 3749 }, { "epoch": 0.10948585442760796, "grad_norm": 0.7188984433531065, "learning_rate": 1.978978102189781e-05, "loss": 0.714, "step": 3750 }, { "epoch": 0.10951505065545532, "grad_norm": 0.7588463939191973, "learning_rate": 1.9789132197891322e-05, "loss": 0.7471, "step": 3751 }, { "epoch": 0.10954424688330268, "grad_norm": 0.8083285907574066, "learning_rate": 1.9788483373884834e-05, "loss": 0.7382, "step": 3752 }, { "epoch": 0.10957344311115004, "grad_norm": 0.8706846218469717, "learning_rate": 1.9787834549878346e-05, "loss": 0.7616, "step": 3753 }, { "epoch": 0.1096026393389974, "grad_norm": 0.7529515967834761, "learning_rate": 1.978718572587186e-05, "loss": 0.6587, "step": 3754 }, { "epoch": 0.10963183556684476, "grad_norm": 0.7220139126838169, "learning_rate": 1.978653690186537e-05, "loss": 0.6815, "step": 3755 }, { "epoch": 0.10966103179469212, "grad_norm": 0.7807018784801074, "learning_rate": 1.9785888077858883e-05, "loss": 0.6883, "step": 3756 }, { "epoch": 0.10969022802253948, "grad_norm": 0.6590189642402484, "learning_rate": 1.9785239253852395e-05, "loss": 0.6024, "step": 3757 }, { "epoch": 0.10971942425038685, "grad_norm": 0.7692718358726726, "learning_rate": 1.9784590429845907e-05, "loss": 0.7458, "step": 3758 }, { "epoch": 0.10974862047823421, "grad_norm": 0.7600978674456941, "learning_rate": 1.978394160583942e-05, "loss": 0.6392, "step": 3759 }, { "epoch": 0.10977781670608157, "grad_norm": 0.8814930252436478, "learning_rate": 1.978329278183293e-05, "loss": 0.6278, "step": 3760 }, { "epoch": 0.10980701293392893, "grad_norm": 0.8749238798517571, "learning_rate": 1.9782643957826443e-05, "loss": 0.819, "step": 3761 }, { "epoch": 0.1098362091617763, "grad_norm": 0.7811640346141332, "learning_rate": 1.9781995133819955e-05, "loss": 0.6974, "step": 3762 }, { "epoch": 0.10986540538962367, "grad_norm": 0.7332115886953556, "learning_rate": 1.9781346309813467e-05, "loss": 0.7088, "step": 3763 }, { "epoch": 0.10989460161747103, "grad_norm": 0.7243408885581742, "learning_rate": 1.9780697485806975e-05, "loss": 0.635, "step": 3764 }, { "epoch": 0.10992379784531839, "grad_norm": 0.7371624529905644, "learning_rate": 1.9780048661800487e-05, "loss": 0.677, "step": 3765 }, { "epoch": 0.10995299407316575, "grad_norm": 0.7314450572542467, "learning_rate": 1.9779399837794e-05, "loss": 0.6783, "step": 3766 }, { "epoch": 0.10998219030101311, "grad_norm": 0.6925506086901775, "learning_rate": 1.977875101378751e-05, "loss": 0.6177, "step": 3767 }, { "epoch": 0.11001138652886047, "grad_norm": 0.6670196615394742, "learning_rate": 1.9778102189781023e-05, "loss": 0.596, "step": 3768 }, { "epoch": 0.11004058275670783, "grad_norm": 0.754739129708766, "learning_rate": 1.9777453365774535e-05, "loss": 0.704, "step": 3769 }, { "epoch": 0.1100697789845552, "grad_norm": 0.7566585061208779, "learning_rate": 1.9776804541768048e-05, "loss": 0.7715, "step": 3770 }, { "epoch": 0.11009897521240256, "grad_norm": 0.7286147616913837, "learning_rate": 1.977615571776156e-05, "loss": 0.6415, "step": 3771 }, { "epoch": 0.11012817144024992, "grad_norm": 0.7328378044885493, "learning_rate": 1.9775506893755068e-05, "loss": 0.6508, "step": 3772 }, { "epoch": 0.11015736766809728, "grad_norm": 0.770491574306812, "learning_rate": 1.977485806974858e-05, "loss": 0.7611, "step": 3773 }, { "epoch": 0.11018656389594464, "grad_norm": 0.6917573636240308, "learning_rate": 1.9774209245742092e-05, "loss": 0.5812, "step": 3774 }, { "epoch": 0.110215760123792, "grad_norm": 0.6719137777039101, "learning_rate": 1.9773560421735604e-05, "loss": 0.6178, "step": 3775 }, { "epoch": 0.11024495635163936, "grad_norm": 0.7599609702538304, "learning_rate": 1.977291159772912e-05, "loss": 0.7201, "step": 3776 }, { "epoch": 0.11027415257948674, "grad_norm": 0.801028325744521, "learning_rate": 1.9772262773722632e-05, "loss": 0.7391, "step": 3777 }, { "epoch": 0.1103033488073341, "grad_norm": 0.8124797134885755, "learning_rate": 1.977161394971614e-05, "loss": 0.6334, "step": 3778 }, { "epoch": 0.11033254503518146, "grad_norm": 0.7022213280118295, "learning_rate": 1.9770965125709652e-05, "loss": 0.665, "step": 3779 }, { "epoch": 0.11036174126302882, "grad_norm": 0.6867662385445041, "learning_rate": 1.9770316301703164e-05, "loss": 0.5934, "step": 3780 }, { "epoch": 0.11039093749087618, "grad_norm": 0.837653879685252, "learning_rate": 1.9769667477696676e-05, "loss": 0.7509, "step": 3781 }, { "epoch": 0.11042013371872354, "grad_norm": 0.7979914009244271, "learning_rate": 1.976901865369019e-05, "loss": 0.7469, "step": 3782 }, { "epoch": 0.1104493299465709, "grad_norm": 0.7934282689717768, "learning_rate": 1.97683698296837e-05, "loss": 0.7682, "step": 3783 }, { "epoch": 0.11047852617441827, "grad_norm": 0.6876150212118542, "learning_rate": 1.9767721005677213e-05, "loss": 0.5986, "step": 3784 }, { "epoch": 0.11050772240226563, "grad_norm": 0.8699871218527839, "learning_rate": 1.9767072181670725e-05, "loss": 0.7654, "step": 3785 }, { "epoch": 0.11053691863011299, "grad_norm": 0.6954369183590773, "learning_rate": 1.9766423357664237e-05, "loss": 0.6279, "step": 3786 }, { "epoch": 0.11056611485796035, "grad_norm": 0.6751994313037145, "learning_rate": 1.9765774533657745e-05, "loss": 0.6025, "step": 3787 }, { "epoch": 0.11059531108580771, "grad_norm": 0.7467726008957319, "learning_rate": 1.9765125709651257e-05, "loss": 0.6876, "step": 3788 }, { "epoch": 0.11062450731365507, "grad_norm": 0.7420041556618708, "learning_rate": 1.976447688564477e-05, "loss": 0.6443, "step": 3789 }, { "epoch": 0.11065370354150243, "grad_norm": 0.8201537152613027, "learning_rate": 1.976382806163828e-05, "loss": 0.8167, "step": 3790 }, { "epoch": 0.1106828997693498, "grad_norm": 0.6974625942780772, "learning_rate": 1.9763179237631793e-05, "loss": 0.6485, "step": 3791 }, { "epoch": 0.11071209599719717, "grad_norm": 0.731027694573482, "learning_rate": 1.9762530413625305e-05, "loss": 0.6922, "step": 3792 }, { "epoch": 0.11074129222504453, "grad_norm": 0.7778098915916655, "learning_rate": 1.9761881589618817e-05, "loss": 0.7591, "step": 3793 }, { "epoch": 0.11077048845289189, "grad_norm": 0.7251892057988732, "learning_rate": 1.976123276561233e-05, "loss": 0.7019, "step": 3794 }, { "epoch": 0.11079968468073925, "grad_norm": 0.850450280494477, "learning_rate": 1.976058394160584e-05, "loss": 0.7694, "step": 3795 }, { "epoch": 0.11082888090858661, "grad_norm": 0.820241377580697, "learning_rate": 1.9759935117599353e-05, "loss": 0.6932, "step": 3796 }, { "epoch": 0.11085807713643397, "grad_norm": 0.7317824725145996, "learning_rate": 1.9759286293592865e-05, "loss": 0.7115, "step": 3797 }, { "epoch": 0.11088727336428134, "grad_norm": 0.8043414120224311, "learning_rate": 1.9758637469586377e-05, "loss": 0.6515, "step": 3798 }, { "epoch": 0.1109164695921287, "grad_norm": 0.7874586128669533, "learning_rate": 1.975798864557989e-05, "loss": 0.7603, "step": 3799 }, { "epoch": 0.11094566581997606, "grad_norm": 0.7199339495548174, "learning_rate": 1.97573398215734e-05, "loss": 0.6322, "step": 3800 }, { "epoch": 0.11097486204782342, "grad_norm": 0.7072453987901998, "learning_rate": 1.9756690997566914e-05, "loss": 0.6317, "step": 3801 }, { "epoch": 0.11100405827567078, "grad_norm": 0.6983752543606558, "learning_rate": 1.9756042173560422e-05, "loss": 0.6234, "step": 3802 }, { "epoch": 0.11103325450351814, "grad_norm": 0.6763935024928093, "learning_rate": 1.9755393349553934e-05, "loss": 0.5658, "step": 3803 }, { "epoch": 0.1110624507313655, "grad_norm": 0.7898435471723176, "learning_rate": 1.9754744525547446e-05, "loss": 0.7532, "step": 3804 }, { "epoch": 0.11109164695921286, "grad_norm": 0.8144043470895547, "learning_rate": 1.9754095701540958e-05, "loss": 0.6981, "step": 3805 }, { "epoch": 0.11112084318706023, "grad_norm": 0.7284806989358965, "learning_rate": 1.975344687753447e-05, "loss": 0.6494, "step": 3806 }, { "epoch": 0.1111500394149076, "grad_norm": 0.8758332917927382, "learning_rate": 1.9752798053527982e-05, "loss": 0.6771, "step": 3807 }, { "epoch": 0.11117923564275496, "grad_norm": 0.7163888477285192, "learning_rate": 1.9752149229521494e-05, "loss": 0.6387, "step": 3808 }, { "epoch": 0.11120843187060232, "grad_norm": 0.7698441307517956, "learning_rate": 1.9751500405515006e-05, "loss": 0.7769, "step": 3809 }, { "epoch": 0.11123762809844968, "grad_norm": 0.7782370339701002, "learning_rate": 1.9750851581508515e-05, "loss": 0.7471, "step": 3810 }, { "epoch": 0.11126682432629705, "grad_norm": 0.7398889769602063, "learning_rate": 1.9750202757502027e-05, "loss": 0.6882, "step": 3811 }, { "epoch": 0.1112960205541444, "grad_norm": 0.924738901460522, "learning_rate": 1.974955393349554e-05, "loss": 0.7464, "step": 3812 }, { "epoch": 0.11132521678199177, "grad_norm": 0.736064823496469, "learning_rate": 1.9748905109489055e-05, "loss": 0.6625, "step": 3813 }, { "epoch": 0.11135441300983913, "grad_norm": 0.734101146143381, "learning_rate": 1.9748256285482567e-05, "loss": 0.6148, "step": 3814 }, { "epoch": 0.11138360923768649, "grad_norm": 0.7963101244236309, "learning_rate": 1.974760746147608e-05, "loss": 0.6782, "step": 3815 }, { "epoch": 0.11141280546553385, "grad_norm": 0.7143034251267381, "learning_rate": 1.9746958637469587e-05, "loss": 0.6542, "step": 3816 }, { "epoch": 0.11144200169338121, "grad_norm": 0.7041497804954256, "learning_rate": 1.97463098134631e-05, "loss": 0.6635, "step": 3817 }, { "epoch": 0.11147119792122857, "grad_norm": 0.7990234323682815, "learning_rate": 1.974566098945661e-05, "loss": 0.695, "step": 3818 }, { "epoch": 0.11150039414907593, "grad_norm": 0.702094004222217, "learning_rate": 1.9745012165450123e-05, "loss": 0.6403, "step": 3819 }, { "epoch": 0.1115295903769233, "grad_norm": 0.7934301281447178, "learning_rate": 1.9744363341443635e-05, "loss": 0.756, "step": 3820 }, { "epoch": 0.11155878660477066, "grad_norm": 0.7298281976333266, "learning_rate": 1.9743714517437147e-05, "loss": 0.6767, "step": 3821 }, { "epoch": 0.11158798283261803, "grad_norm": 0.6838757917596638, "learning_rate": 1.974306569343066e-05, "loss": 0.6237, "step": 3822 }, { "epoch": 0.1116171790604654, "grad_norm": 0.7561171585296036, "learning_rate": 1.974241686942417e-05, "loss": 0.693, "step": 3823 }, { "epoch": 0.11164637528831275, "grad_norm": 0.7450994623743682, "learning_rate": 1.9741768045417683e-05, "loss": 0.5901, "step": 3824 }, { "epoch": 0.11167557151616012, "grad_norm": 0.7882892175281289, "learning_rate": 1.9741119221411192e-05, "loss": 0.7164, "step": 3825 }, { "epoch": 0.11170476774400748, "grad_norm": 0.7318294962662719, "learning_rate": 1.9740470397404704e-05, "loss": 0.7013, "step": 3826 }, { "epoch": 0.11173396397185484, "grad_norm": 0.6894187780936234, "learning_rate": 1.9739821573398216e-05, "loss": 0.6137, "step": 3827 }, { "epoch": 0.1117631601997022, "grad_norm": 0.7674226676644454, "learning_rate": 1.9739172749391728e-05, "loss": 0.6943, "step": 3828 }, { "epoch": 0.11179235642754956, "grad_norm": 0.6557791999160173, "learning_rate": 1.973852392538524e-05, "loss": 0.5632, "step": 3829 }, { "epoch": 0.11182155265539692, "grad_norm": 0.7675568031842256, "learning_rate": 1.9737875101378752e-05, "loss": 0.669, "step": 3830 }, { "epoch": 0.11185074888324428, "grad_norm": 0.8047227291309559, "learning_rate": 1.9737226277372264e-05, "loss": 0.7509, "step": 3831 }, { "epoch": 0.11187994511109164, "grad_norm": 0.7262095191546443, "learning_rate": 1.9736577453365776e-05, "loss": 0.6404, "step": 3832 }, { "epoch": 0.111909141338939, "grad_norm": 0.7708838167819267, "learning_rate": 1.9735928629359288e-05, "loss": 0.6125, "step": 3833 }, { "epoch": 0.11193833756678637, "grad_norm": 0.6963957234353405, "learning_rate": 1.97352798053528e-05, "loss": 0.6738, "step": 3834 }, { "epoch": 0.11196753379463373, "grad_norm": 0.6712861066998179, "learning_rate": 1.9734630981346312e-05, "loss": 0.5917, "step": 3835 }, { "epoch": 0.11199673002248109, "grad_norm": 0.6720762113741566, "learning_rate": 1.9733982157339824e-05, "loss": 0.632, "step": 3836 }, { "epoch": 0.11202592625032846, "grad_norm": 0.7131664167564946, "learning_rate": 1.9733333333333336e-05, "loss": 0.5825, "step": 3837 }, { "epoch": 0.11205512247817583, "grad_norm": 0.7569976292296582, "learning_rate": 1.973268450932685e-05, "loss": 0.6971, "step": 3838 }, { "epoch": 0.11208431870602319, "grad_norm": 0.7390192229726327, "learning_rate": 1.9732035685320357e-05, "loss": 0.6719, "step": 3839 }, { "epoch": 0.11211351493387055, "grad_norm": 0.7347093219964013, "learning_rate": 1.973138686131387e-05, "loss": 0.6863, "step": 3840 }, { "epoch": 0.11214271116171791, "grad_norm": 0.8109455224947932, "learning_rate": 1.973073803730738e-05, "loss": 0.7385, "step": 3841 }, { "epoch": 0.11217190738956527, "grad_norm": 0.7710456343935279, "learning_rate": 1.9730089213300893e-05, "loss": 0.7337, "step": 3842 }, { "epoch": 0.11220110361741263, "grad_norm": 0.8348457008356142, "learning_rate": 1.9729440389294405e-05, "loss": 0.7171, "step": 3843 }, { "epoch": 0.11223029984525999, "grad_norm": 1.3624773486247657, "learning_rate": 1.9728791565287917e-05, "loss": 0.5833, "step": 3844 }, { "epoch": 0.11225949607310735, "grad_norm": 0.7385424657102709, "learning_rate": 1.972814274128143e-05, "loss": 0.6773, "step": 3845 }, { "epoch": 0.11228869230095471, "grad_norm": 0.7151662910293468, "learning_rate": 1.972749391727494e-05, "loss": 0.6261, "step": 3846 }, { "epoch": 0.11231788852880208, "grad_norm": 0.6851658558610872, "learning_rate": 1.9726845093268453e-05, "loss": 0.5892, "step": 3847 }, { "epoch": 0.11234708475664944, "grad_norm": 0.6712477959547491, "learning_rate": 1.9726196269261962e-05, "loss": 0.6654, "step": 3848 }, { "epoch": 0.1123762809844968, "grad_norm": 0.7526810735921567, "learning_rate": 1.9725547445255474e-05, "loss": 0.6775, "step": 3849 }, { "epoch": 0.11240547721234416, "grad_norm": 0.7414976338705426, "learning_rate": 1.9724898621248986e-05, "loss": 0.7002, "step": 3850 }, { "epoch": 0.11243467344019152, "grad_norm": 0.7693796126813487, "learning_rate": 1.97242497972425e-05, "loss": 0.6985, "step": 3851 }, { "epoch": 0.1124638696680389, "grad_norm": 0.732130721367409, "learning_rate": 1.9723600973236013e-05, "loss": 0.6963, "step": 3852 }, { "epoch": 0.11249306589588626, "grad_norm": 0.7088698470109671, "learning_rate": 1.9722952149229525e-05, "loss": 0.6615, "step": 3853 }, { "epoch": 0.11252226212373362, "grad_norm": 0.6803829897474928, "learning_rate": 1.9722303325223034e-05, "loss": 0.6515, "step": 3854 }, { "epoch": 0.11255145835158098, "grad_norm": 0.7623219568993838, "learning_rate": 1.9721654501216546e-05, "loss": 0.6742, "step": 3855 }, { "epoch": 0.11258065457942834, "grad_norm": 0.7885029544554248, "learning_rate": 1.9721005677210058e-05, "loss": 0.7036, "step": 3856 }, { "epoch": 0.1126098508072757, "grad_norm": 0.7314370757267428, "learning_rate": 1.972035685320357e-05, "loss": 0.6839, "step": 3857 }, { "epoch": 0.11263904703512306, "grad_norm": 0.7257336944771191, "learning_rate": 1.9719708029197082e-05, "loss": 0.6824, "step": 3858 }, { "epoch": 0.11266824326297042, "grad_norm": 0.6912817841985647, "learning_rate": 1.9719059205190594e-05, "loss": 0.636, "step": 3859 }, { "epoch": 0.11269743949081779, "grad_norm": 0.8873723654958684, "learning_rate": 1.9718410381184106e-05, "loss": 0.8101, "step": 3860 }, { "epoch": 0.11272663571866515, "grad_norm": 0.7131105934619016, "learning_rate": 1.9717761557177618e-05, "loss": 0.7025, "step": 3861 }, { "epoch": 0.11275583194651251, "grad_norm": 0.9803199998057753, "learning_rate": 1.971711273317113e-05, "loss": 0.7427, "step": 3862 }, { "epoch": 0.11278502817435987, "grad_norm": 0.6614456537971929, "learning_rate": 1.971646390916464e-05, "loss": 0.6257, "step": 3863 }, { "epoch": 0.11281422440220723, "grad_norm": 0.6464147019268037, "learning_rate": 1.971581508515815e-05, "loss": 0.5417, "step": 3864 }, { "epoch": 0.11284342063005459, "grad_norm": 0.7543018628093671, "learning_rate": 1.9715166261151663e-05, "loss": 0.6592, "step": 3865 }, { "epoch": 0.11287261685790195, "grad_norm": 0.7471340982322026, "learning_rate": 1.9714517437145175e-05, "loss": 0.7155, "step": 3866 }, { "epoch": 0.11290181308574933, "grad_norm": 0.7090738147592053, "learning_rate": 1.9713868613138687e-05, "loss": 0.6399, "step": 3867 }, { "epoch": 0.11293100931359669, "grad_norm": 0.8199806098660796, "learning_rate": 1.97132197891322e-05, "loss": 0.8566, "step": 3868 }, { "epoch": 0.11296020554144405, "grad_norm": 0.7558822116413102, "learning_rate": 1.971257096512571e-05, "loss": 0.6765, "step": 3869 }, { "epoch": 0.11298940176929141, "grad_norm": 0.7616743348609594, "learning_rate": 1.9711922141119223e-05, "loss": 0.6947, "step": 3870 }, { "epoch": 0.11301859799713877, "grad_norm": 0.6220057950570064, "learning_rate": 1.9711273317112735e-05, "loss": 0.5291, "step": 3871 }, { "epoch": 0.11304779422498613, "grad_norm": 0.7488961019493572, "learning_rate": 1.9710624493106247e-05, "loss": 0.7061, "step": 3872 }, { "epoch": 0.1130769904528335, "grad_norm": 0.7437049179962345, "learning_rate": 1.970997566909976e-05, "loss": 0.6722, "step": 3873 }, { "epoch": 0.11310618668068086, "grad_norm": 0.7351503891823484, "learning_rate": 1.970932684509327e-05, "loss": 0.7042, "step": 3874 }, { "epoch": 0.11313538290852822, "grad_norm": 0.6886041205747413, "learning_rate": 1.9708678021086783e-05, "loss": 0.6701, "step": 3875 }, { "epoch": 0.11316457913637558, "grad_norm": 0.8636070315939057, "learning_rate": 1.9708029197080295e-05, "loss": 0.716, "step": 3876 }, { "epoch": 0.11319377536422294, "grad_norm": 0.7837210858294705, "learning_rate": 1.9707380373073804e-05, "loss": 0.7046, "step": 3877 }, { "epoch": 0.1132229715920703, "grad_norm": 0.6866630924780008, "learning_rate": 1.9706731549067316e-05, "loss": 0.5724, "step": 3878 }, { "epoch": 0.11325216781991766, "grad_norm": 0.695417659750813, "learning_rate": 1.9706082725060828e-05, "loss": 0.6716, "step": 3879 }, { "epoch": 0.11328136404776502, "grad_norm": 0.7193742407302436, "learning_rate": 1.970543390105434e-05, "loss": 0.6867, "step": 3880 }, { "epoch": 0.11331056027561238, "grad_norm": 0.6856904383336951, "learning_rate": 1.9704785077047852e-05, "loss": 0.6721, "step": 3881 }, { "epoch": 0.11333975650345976, "grad_norm": 0.7706765926935039, "learning_rate": 1.9704136253041364e-05, "loss": 0.7085, "step": 3882 }, { "epoch": 0.11336895273130712, "grad_norm": 0.6679013806930674, "learning_rate": 1.9703487429034876e-05, "loss": 0.598, "step": 3883 }, { "epoch": 0.11339814895915448, "grad_norm": 0.6970687498992355, "learning_rate": 1.9702838605028388e-05, "loss": 0.6667, "step": 3884 }, { "epoch": 0.11342734518700184, "grad_norm": 0.7098917972710445, "learning_rate": 1.97021897810219e-05, "loss": 0.6053, "step": 3885 }, { "epoch": 0.1134565414148492, "grad_norm": 0.7237951311121708, "learning_rate": 1.970154095701541e-05, "loss": 0.7292, "step": 3886 }, { "epoch": 0.11348573764269657, "grad_norm": 0.7952807788490279, "learning_rate": 1.970089213300892e-05, "loss": 0.7151, "step": 3887 }, { "epoch": 0.11351493387054393, "grad_norm": 0.6956233599991081, "learning_rate": 1.9700243309002433e-05, "loss": 0.64, "step": 3888 }, { "epoch": 0.11354413009839129, "grad_norm": 0.775819257108459, "learning_rate": 1.9699594484995948e-05, "loss": 0.7166, "step": 3889 }, { "epoch": 0.11357332632623865, "grad_norm": 0.7373840261675039, "learning_rate": 1.969894566098946e-05, "loss": 0.7496, "step": 3890 }, { "epoch": 0.11360252255408601, "grad_norm": 0.7797905103468188, "learning_rate": 1.9698296836982972e-05, "loss": 0.7481, "step": 3891 }, { "epoch": 0.11363171878193337, "grad_norm": 0.7063338237024187, "learning_rate": 1.969764801297648e-05, "loss": 0.6619, "step": 3892 }, { "epoch": 0.11366091500978073, "grad_norm": 0.7042176704304535, "learning_rate": 1.9696999188969993e-05, "loss": 0.6633, "step": 3893 }, { "epoch": 0.1136901112376281, "grad_norm": 0.673581342606427, "learning_rate": 1.9696350364963505e-05, "loss": 0.6104, "step": 3894 }, { "epoch": 0.11371930746547546, "grad_norm": 0.7117216366060167, "learning_rate": 1.9695701540957017e-05, "loss": 0.7055, "step": 3895 }, { "epoch": 0.11374850369332282, "grad_norm": 0.7479146461607822, "learning_rate": 1.969505271695053e-05, "loss": 0.6111, "step": 3896 }, { "epoch": 0.11377769992117019, "grad_norm": 0.8285776902044902, "learning_rate": 1.969440389294404e-05, "loss": 0.7441, "step": 3897 }, { "epoch": 0.11380689614901755, "grad_norm": 0.8679345324732326, "learning_rate": 1.9693755068937553e-05, "loss": 0.6953, "step": 3898 }, { "epoch": 0.11383609237686491, "grad_norm": 0.7614362298730032, "learning_rate": 1.9693106244931065e-05, "loss": 0.7453, "step": 3899 }, { "epoch": 0.11386528860471228, "grad_norm": 0.7202584831308602, "learning_rate": 1.9692457420924577e-05, "loss": 0.66, "step": 3900 }, { "epoch": 0.11389448483255964, "grad_norm": 0.8235751444520223, "learning_rate": 1.9691808596918086e-05, "loss": 0.8148, "step": 3901 }, { "epoch": 0.113923681060407, "grad_norm": 0.6526171144883347, "learning_rate": 1.9691159772911598e-05, "loss": 0.6334, "step": 3902 }, { "epoch": 0.11395287728825436, "grad_norm": 0.7395722957604847, "learning_rate": 1.969051094890511e-05, "loss": 0.6708, "step": 3903 }, { "epoch": 0.11398207351610172, "grad_norm": 0.6796623115977577, "learning_rate": 1.9689862124898622e-05, "loss": 0.6453, "step": 3904 }, { "epoch": 0.11401126974394908, "grad_norm": 0.7143535770064589, "learning_rate": 1.9689213300892134e-05, "loss": 0.6481, "step": 3905 }, { "epoch": 0.11404046597179644, "grad_norm": 0.905853268991506, "learning_rate": 1.9688564476885646e-05, "loss": 0.6782, "step": 3906 }, { "epoch": 0.1140696621996438, "grad_norm": 0.7733231564262898, "learning_rate": 1.9687915652879158e-05, "loss": 0.723, "step": 3907 }, { "epoch": 0.11409885842749116, "grad_norm": 0.8119476781820328, "learning_rate": 1.968726682887267e-05, "loss": 0.7987, "step": 3908 }, { "epoch": 0.11412805465533853, "grad_norm": 0.7532142899571252, "learning_rate": 1.9686618004866182e-05, "loss": 0.6728, "step": 3909 }, { "epoch": 0.11415725088318589, "grad_norm": 0.7191003133269962, "learning_rate": 1.9685969180859694e-05, "loss": 0.7246, "step": 3910 }, { "epoch": 0.11418644711103325, "grad_norm": 0.76738953061962, "learning_rate": 1.9685320356853206e-05, "loss": 0.7346, "step": 3911 }, { "epoch": 0.11421564333888061, "grad_norm": 0.8935610134561829, "learning_rate": 1.9684671532846718e-05, "loss": 0.7489, "step": 3912 }, { "epoch": 0.11424483956672798, "grad_norm": 0.7536555391609737, "learning_rate": 1.968402270884023e-05, "loss": 0.6883, "step": 3913 }, { "epoch": 0.11427403579457535, "grad_norm": 0.7119215922984065, "learning_rate": 1.9683373884833742e-05, "loss": 0.5976, "step": 3914 }, { "epoch": 0.11430323202242271, "grad_norm": 0.7280003244013471, "learning_rate": 1.968272506082725e-05, "loss": 0.683, "step": 3915 }, { "epoch": 0.11433242825027007, "grad_norm": 1.2884049448203307, "learning_rate": 1.9682076236820763e-05, "loss": 0.7418, "step": 3916 }, { "epoch": 0.11436162447811743, "grad_norm": 0.7450248884766232, "learning_rate": 1.9681427412814275e-05, "loss": 0.7364, "step": 3917 }, { "epoch": 0.11439082070596479, "grad_norm": 0.7497718794475764, "learning_rate": 1.9680778588807787e-05, "loss": 0.7221, "step": 3918 }, { "epoch": 0.11442001693381215, "grad_norm": 0.6963180711134291, "learning_rate": 1.96801297648013e-05, "loss": 0.6789, "step": 3919 }, { "epoch": 0.11444921316165951, "grad_norm": 0.782872937913653, "learning_rate": 1.967948094079481e-05, "loss": 0.7601, "step": 3920 }, { "epoch": 0.11447840938950687, "grad_norm": 0.8554186739462151, "learning_rate": 1.9678832116788323e-05, "loss": 0.802, "step": 3921 }, { "epoch": 0.11450760561735424, "grad_norm": 0.7068400253581631, "learning_rate": 1.9678183292781835e-05, "loss": 0.6605, "step": 3922 }, { "epoch": 0.1145368018452016, "grad_norm": 0.7521840967450512, "learning_rate": 1.9677534468775347e-05, "loss": 0.7513, "step": 3923 }, { "epoch": 0.11456599807304896, "grad_norm": 0.7378833267454418, "learning_rate": 1.9676885644768856e-05, "loss": 0.6912, "step": 3924 }, { "epoch": 0.11459519430089632, "grad_norm": 0.6299505316413608, "learning_rate": 1.9676236820762368e-05, "loss": 0.5731, "step": 3925 }, { "epoch": 0.11462439052874368, "grad_norm": 0.7582051060957332, "learning_rate": 1.967558799675588e-05, "loss": 0.7087, "step": 3926 }, { "epoch": 0.11465358675659104, "grad_norm": 0.8100239007683463, "learning_rate": 1.9674939172749395e-05, "loss": 0.7356, "step": 3927 }, { "epoch": 0.11468278298443842, "grad_norm": 0.6892319780617908, "learning_rate": 1.9674290348742907e-05, "loss": 0.662, "step": 3928 }, { "epoch": 0.11471197921228578, "grad_norm": 0.7352580909186928, "learning_rate": 1.967364152473642e-05, "loss": 0.7369, "step": 3929 }, { "epoch": 0.11474117544013314, "grad_norm": 0.7689480250713268, "learning_rate": 1.9672992700729928e-05, "loss": 0.7368, "step": 3930 }, { "epoch": 0.1147703716679805, "grad_norm": 0.814647685255829, "learning_rate": 1.967234387672344e-05, "loss": 0.704, "step": 3931 }, { "epoch": 0.11479956789582786, "grad_norm": 0.8005737264656008, "learning_rate": 1.9671695052716952e-05, "loss": 0.6326, "step": 3932 }, { "epoch": 0.11482876412367522, "grad_norm": 0.721478728811585, "learning_rate": 1.9671046228710464e-05, "loss": 0.7215, "step": 3933 }, { "epoch": 0.11485796035152258, "grad_norm": 0.7562165620001072, "learning_rate": 1.9670397404703976e-05, "loss": 0.7228, "step": 3934 }, { "epoch": 0.11488715657936995, "grad_norm": 0.7859693736413411, "learning_rate": 1.9669748580697488e-05, "loss": 0.8142, "step": 3935 }, { "epoch": 0.1149163528072173, "grad_norm": 0.7207988910621372, "learning_rate": 1.9669099756691e-05, "loss": 0.6719, "step": 3936 }, { "epoch": 0.11494554903506467, "grad_norm": 0.7221819034018351, "learning_rate": 1.9668450932684512e-05, "loss": 0.7052, "step": 3937 }, { "epoch": 0.11497474526291203, "grad_norm": 0.7704680591444174, "learning_rate": 1.9667802108678024e-05, "loss": 0.6741, "step": 3938 }, { "epoch": 0.11500394149075939, "grad_norm": 0.7320407797342807, "learning_rate": 1.9667153284671533e-05, "loss": 0.6351, "step": 3939 }, { "epoch": 0.11503313771860675, "grad_norm": 0.7298780272593726, "learning_rate": 1.9666504460665045e-05, "loss": 0.6905, "step": 3940 }, { "epoch": 0.11506233394645411, "grad_norm": 0.686408730676666, "learning_rate": 1.9665855636658557e-05, "loss": 0.6667, "step": 3941 }, { "epoch": 0.11509153017430147, "grad_norm": 0.7375161454861402, "learning_rate": 1.966520681265207e-05, "loss": 0.6803, "step": 3942 }, { "epoch": 0.11512072640214885, "grad_norm": 0.6681506230565003, "learning_rate": 1.966455798864558e-05, "loss": 0.6126, "step": 3943 }, { "epoch": 0.11514992262999621, "grad_norm": 0.7971170258464518, "learning_rate": 1.9663909164639093e-05, "loss": 0.7069, "step": 3944 }, { "epoch": 0.11517911885784357, "grad_norm": 0.782168629452924, "learning_rate": 1.9663260340632605e-05, "loss": 0.706, "step": 3945 }, { "epoch": 0.11520831508569093, "grad_norm": 0.6949820999728007, "learning_rate": 1.9662611516626117e-05, "loss": 0.5856, "step": 3946 }, { "epoch": 0.1152375113135383, "grad_norm": 0.752841196639202, "learning_rate": 1.966196269261963e-05, "loss": 0.6711, "step": 3947 }, { "epoch": 0.11526670754138565, "grad_norm": 0.7942475326503252, "learning_rate": 1.966131386861314e-05, "loss": 0.7422, "step": 3948 }, { "epoch": 0.11529590376923302, "grad_norm": 0.7746122261001835, "learning_rate": 1.9660665044606653e-05, "loss": 0.7692, "step": 3949 }, { "epoch": 0.11532509999708038, "grad_norm": 0.7200929344109748, "learning_rate": 1.9660016220600165e-05, "loss": 0.6722, "step": 3950 }, { "epoch": 0.11535429622492774, "grad_norm": 0.8084509178706017, "learning_rate": 1.9659367396593677e-05, "loss": 0.8371, "step": 3951 }, { "epoch": 0.1153834924527751, "grad_norm": 0.8252096517515027, "learning_rate": 1.965871857258719e-05, "loss": 0.5694, "step": 3952 }, { "epoch": 0.11541268868062246, "grad_norm": 0.7570921461106613, "learning_rate": 1.9658069748580698e-05, "loss": 0.698, "step": 3953 }, { "epoch": 0.11544188490846982, "grad_norm": 0.7020202697188718, "learning_rate": 1.965742092457421e-05, "loss": 0.6255, "step": 3954 }, { "epoch": 0.11547108113631718, "grad_norm": 0.7288567090747297, "learning_rate": 1.9656772100567722e-05, "loss": 0.6987, "step": 3955 }, { "epoch": 0.11550027736416454, "grad_norm": 0.6695270567905183, "learning_rate": 1.9656123276561234e-05, "loss": 0.58, "step": 3956 }, { "epoch": 0.1155294735920119, "grad_norm": 0.7476605938127371, "learning_rate": 1.9655474452554746e-05, "loss": 0.6583, "step": 3957 }, { "epoch": 0.11555866981985928, "grad_norm": 0.7581897739360741, "learning_rate": 1.9654825628548258e-05, "loss": 0.6096, "step": 3958 }, { "epoch": 0.11558786604770664, "grad_norm": 0.6519926163141422, "learning_rate": 1.965417680454177e-05, "loss": 0.6176, "step": 3959 }, { "epoch": 0.115617062275554, "grad_norm": 0.7337503367352793, "learning_rate": 1.9653527980535282e-05, "loss": 0.6595, "step": 3960 }, { "epoch": 0.11564625850340136, "grad_norm": 0.7967988663633273, "learning_rate": 1.9652879156528794e-05, "loss": 0.7651, "step": 3961 }, { "epoch": 0.11567545473124873, "grad_norm": 0.7893281610220402, "learning_rate": 1.9652230332522303e-05, "loss": 0.7813, "step": 3962 }, { "epoch": 0.11570465095909609, "grad_norm": 0.7481833010509812, "learning_rate": 1.9651581508515815e-05, "loss": 0.8054, "step": 3963 }, { "epoch": 0.11573384718694345, "grad_norm": 0.8130394129782957, "learning_rate": 1.965093268450933e-05, "loss": 0.7074, "step": 3964 }, { "epoch": 0.11576304341479081, "grad_norm": 0.7047371230929211, "learning_rate": 1.9650283860502842e-05, "loss": 0.6758, "step": 3965 }, { "epoch": 0.11579223964263817, "grad_norm": 0.7801361025267266, "learning_rate": 1.9649635036496354e-05, "loss": 0.7863, "step": 3966 }, { "epoch": 0.11582143587048553, "grad_norm": 0.7645707386992319, "learning_rate": 1.9648986212489866e-05, "loss": 0.7288, "step": 3967 }, { "epoch": 0.11585063209833289, "grad_norm": 0.7031386947726094, "learning_rate": 1.9648337388483375e-05, "loss": 0.6606, "step": 3968 }, { "epoch": 0.11587982832618025, "grad_norm": 0.6969160736730085, "learning_rate": 1.9647688564476887e-05, "loss": 0.681, "step": 3969 }, { "epoch": 0.11590902455402761, "grad_norm": 0.7587734021426732, "learning_rate": 1.96470397404704e-05, "loss": 0.7991, "step": 3970 }, { "epoch": 0.11593822078187498, "grad_norm": 0.7482429860871455, "learning_rate": 1.964639091646391e-05, "loss": 0.7621, "step": 3971 }, { "epoch": 0.11596741700972234, "grad_norm": 0.736134619250232, "learning_rate": 1.9645742092457423e-05, "loss": 0.6934, "step": 3972 }, { "epoch": 0.11599661323756971, "grad_norm": 0.7532019872693241, "learning_rate": 1.9645093268450935e-05, "loss": 0.6588, "step": 3973 }, { "epoch": 0.11602580946541707, "grad_norm": 0.8208334329864649, "learning_rate": 1.9644444444444447e-05, "loss": 0.737, "step": 3974 }, { "epoch": 0.11605500569326443, "grad_norm": 0.6931908899503072, "learning_rate": 1.964379562043796e-05, "loss": 0.625, "step": 3975 }, { "epoch": 0.1160842019211118, "grad_norm": 0.8111841452457221, "learning_rate": 1.964314679643147e-05, "loss": 0.7735, "step": 3976 }, { "epoch": 0.11611339814895916, "grad_norm": 0.7448768014007753, "learning_rate": 1.964249797242498e-05, "loss": 0.7431, "step": 3977 }, { "epoch": 0.11614259437680652, "grad_norm": 0.6850381370110504, "learning_rate": 1.964184914841849e-05, "loss": 0.6303, "step": 3978 }, { "epoch": 0.11617179060465388, "grad_norm": 0.7780909549146808, "learning_rate": 1.9641200324412004e-05, "loss": 0.7833, "step": 3979 }, { "epoch": 0.11620098683250124, "grad_norm": 0.7886740645496594, "learning_rate": 1.9640551500405516e-05, "loss": 0.7682, "step": 3980 }, { "epoch": 0.1162301830603486, "grad_norm": 1.1978122878098563, "learning_rate": 1.9639902676399028e-05, "loss": 0.6747, "step": 3981 }, { "epoch": 0.11625937928819596, "grad_norm": 0.7753730837189219, "learning_rate": 1.963925385239254e-05, "loss": 0.767, "step": 3982 }, { "epoch": 0.11628857551604332, "grad_norm": 0.7356740338719375, "learning_rate": 1.9638605028386052e-05, "loss": 0.7191, "step": 3983 }, { "epoch": 0.11631777174389069, "grad_norm": 0.7689920655714373, "learning_rate": 1.9637956204379564e-05, "loss": 0.6767, "step": 3984 }, { "epoch": 0.11634696797173805, "grad_norm": 0.7774604028374743, "learning_rate": 1.9637307380373076e-05, "loss": 0.6651, "step": 3985 }, { "epoch": 0.11637616419958541, "grad_norm": 0.7405521509753951, "learning_rate": 1.9636658556366588e-05, "loss": 0.6821, "step": 3986 }, { "epoch": 0.11640536042743277, "grad_norm": 0.6597928901335461, "learning_rate": 1.96360097323601e-05, "loss": 0.5829, "step": 3987 }, { "epoch": 0.11643455665528014, "grad_norm": 0.689452397262487, "learning_rate": 1.9635360908353612e-05, "loss": 0.6482, "step": 3988 }, { "epoch": 0.1164637528831275, "grad_norm": 0.739644651257954, "learning_rate": 1.9634712084347124e-05, "loss": 0.6962, "step": 3989 }, { "epoch": 0.11649294911097487, "grad_norm": 0.7166206775795815, "learning_rate": 1.9634063260340636e-05, "loss": 0.641, "step": 3990 }, { "epoch": 0.11652214533882223, "grad_norm": 1.3041520907341422, "learning_rate": 1.9633414436334145e-05, "loss": 0.7266, "step": 3991 }, { "epoch": 0.11655134156666959, "grad_norm": 0.6495307216340978, "learning_rate": 1.9632765612327657e-05, "loss": 0.5686, "step": 3992 }, { "epoch": 0.11658053779451695, "grad_norm": 0.8179246039088869, "learning_rate": 1.963211678832117e-05, "loss": 0.6788, "step": 3993 }, { "epoch": 0.11660973402236431, "grad_norm": 0.7155922737661946, "learning_rate": 1.963146796431468e-05, "loss": 0.59, "step": 3994 }, { "epoch": 0.11663893025021167, "grad_norm": 0.7781751771893212, "learning_rate": 1.9630819140308193e-05, "loss": 0.6569, "step": 3995 }, { "epoch": 0.11666812647805903, "grad_norm": 0.6953519882278838, "learning_rate": 1.9630170316301705e-05, "loss": 0.5973, "step": 3996 }, { "epoch": 0.1166973227059064, "grad_norm": 0.8676356325783798, "learning_rate": 1.9629521492295217e-05, "loss": 0.7788, "step": 3997 }, { "epoch": 0.11672651893375376, "grad_norm": 0.683435550667058, "learning_rate": 1.962887266828873e-05, "loss": 0.6166, "step": 3998 }, { "epoch": 0.11675571516160112, "grad_norm": 0.7140831843070535, "learning_rate": 1.962822384428224e-05, "loss": 0.642, "step": 3999 }, { "epoch": 0.11678491138944848, "grad_norm": 0.7490384729994023, "learning_rate": 1.962757502027575e-05, "loss": 0.6879, "step": 4000 }, { "epoch": 0.11681410761729584, "grad_norm": 0.8369543451166294, "learning_rate": 1.962692619626926e-05, "loss": 0.7125, "step": 4001 }, { "epoch": 0.1168433038451432, "grad_norm": 0.7301263916664191, "learning_rate": 1.9626277372262777e-05, "loss": 0.7309, "step": 4002 }, { "epoch": 0.11687250007299058, "grad_norm": 0.6513766015626835, "learning_rate": 1.962562854825629e-05, "loss": 0.5477, "step": 4003 }, { "epoch": 0.11690169630083794, "grad_norm": 0.6895455898531866, "learning_rate": 1.96249797242498e-05, "loss": 0.6127, "step": 4004 }, { "epoch": 0.1169308925286853, "grad_norm": 0.7304628226536047, "learning_rate": 1.9624330900243313e-05, "loss": 0.6467, "step": 4005 }, { "epoch": 0.11696008875653266, "grad_norm": 1.0855305502241122, "learning_rate": 1.962368207623682e-05, "loss": 0.7862, "step": 4006 }, { "epoch": 0.11698928498438002, "grad_norm": 0.7296650105781008, "learning_rate": 1.9623033252230334e-05, "loss": 0.6842, "step": 4007 }, { "epoch": 0.11701848121222738, "grad_norm": 0.7405757847220027, "learning_rate": 1.9622384428223846e-05, "loss": 0.7223, "step": 4008 }, { "epoch": 0.11704767744007474, "grad_norm": 0.7338326942264854, "learning_rate": 1.9621735604217358e-05, "loss": 0.6703, "step": 4009 }, { "epoch": 0.1170768736679221, "grad_norm": 0.706734816468459, "learning_rate": 1.962108678021087e-05, "loss": 0.6322, "step": 4010 }, { "epoch": 0.11710606989576947, "grad_norm": 0.7230312998758615, "learning_rate": 1.962043795620438e-05, "loss": 0.6392, "step": 4011 }, { "epoch": 0.11713526612361683, "grad_norm": 1.5586613135608338, "learning_rate": 1.9619789132197894e-05, "loss": 0.6516, "step": 4012 }, { "epoch": 0.11716446235146419, "grad_norm": 0.7552043110270774, "learning_rate": 1.9619140308191406e-05, "loss": 0.7168, "step": 4013 }, { "epoch": 0.11719365857931155, "grad_norm": 0.8027930854969929, "learning_rate": 1.9618491484184918e-05, "loss": 0.7414, "step": 4014 }, { "epoch": 0.11722285480715891, "grad_norm": 0.6893137305654948, "learning_rate": 1.9617842660178426e-05, "loss": 0.6714, "step": 4015 }, { "epoch": 0.11725205103500627, "grad_norm": 0.7590455123301124, "learning_rate": 1.961719383617194e-05, "loss": 0.7323, "step": 4016 }, { "epoch": 0.11728124726285363, "grad_norm": 0.8189928224773441, "learning_rate": 1.961654501216545e-05, "loss": 0.6797, "step": 4017 }, { "epoch": 0.11731044349070101, "grad_norm": 0.7320871568344898, "learning_rate": 1.9615896188158962e-05, "loss": 0.6135, "step": 4018 }, { "epoch": 0.11733963971854837, "grad_norm": 0.6915901274164996, "learning_rate": 1.9615247364152475e-05, "loss": 0.6652, "step": 4019 }, { "epoch": 0.11736883594639573, "grad_norm": 0.706146521835804, "learning_rate": 1.9614598540145987e-05, "loss": 0.6997, "step": 4020 }, { "epoch": 0.11739803217424309, "grad_norm": 0.7580655160391541, "learning_rate": 1.96139497161395e-05, "loss": 0.7475, "step": 4021 }, { "epoch": 0.11742722840209045, "grad_norm": 0.6961048247588648, "learning_rate": 1.961330089213301e-05, "loss": 0.6557, "step": 4022 }, { "epoch": 0.11745642462993781, "grad_norm": 0.7838187228273162, "learning_rate": 1.9612652068126523e-05, "loss": 0.7406, "step": 4023 }, { "epoch": 0.11748562085778518, "grad_norm": 0.7424670970289141, "learning_rate": 1.9612003244120035e-05, "loss": 0.6722, "step": 4024 }, { "epoch": 0.11751481708563254, "grad_norm": 0.7300489036332969, "learning_rate": 1.9611354420113547e-05, "loss": 0.7006, "step": 4025 }, { "epoch": 0.1175440133134799, "grad_norm": 0.7014651174021151, "learning_rate": 1.961070559610706e-05, "loss": 0.6512, "step": 4026 }, { "epoch": 0.11757320954132726, "grad_norm": 1.2997756580082065, "learning_rate": 1.961005677210057e-05, "loss": 0.757, "step": 4027 }, { "epoch": 0.11760240576917462, "grad_norm": 0.7390678933212473, "learning_rate": 1.9609407948094083e-05, "loss": 0.6928, "step": 4028 }, { "epoch": 0.11763160199702198, "grad_norm": 0.7369411006451384, "learning_rate": 1.960875912408759e-05, "loss": 0.6743, "step": 4029 }, { "epoch": 0.11766079822486934, "grad_norm": 0.726466605632131, "learning_rate": 1.9608110300081103e-05, "loss": 0.6264, "step": 4030 }, { "epoch": 0.1176899944527167, "grad_norm": 0.7285431274887022, "learning_rate": 1.9607461476074615e-05, "loss": 0.6713, "step": 4031 }, { "epoch": 0.11771919068056406, "grad_norm": 0.7197597959969753, "learning_rate": 1.9606812652068127e-05, "loss": 0.6917, "step": 4032 }, { "epoch": 0.11774838690841144, "grad_norm": 0.6977309642402058, "learning_rate": 1.960616382806164e-05, "loss": 0.6265, "step": 4033 }, { "epoch": 0.1177775831362588, "grad_norm": 0.7256048874030285, "learning_rate": 1.960551500405515e-05, "loss": 0.7322, "step": 4034 }, { "epoch": 0.11780677936410616, "grad_norm": 0.747530259824895, "learning_rate": 1.9604866180048664e-05, "loss": 0.725, "step": 4035 }, { "epoch": 0.11783597559195352, "grad_norm": 0.9143527447615284, "learning_rate": 1.9604217356042176e-05, "loss": 0.6955, "step": 4036 }, { "epoch": 0.11786517181980088, "grad_norm": 0.70250637822175, "learning_rate": 1.9603568532035688e-05, "loss": 0.6413, "step": 4037 }, { "epoch": 0.11789436804764825, "grad_norm": 0.852740435219945, "learning_rate": 1.9602919708029196e-05, "loss": 0.8273, "step": 4038 }, { "epoch": 0.11792356427549561, "grad_norm": 0.7016322882422058, "learning_rate": 1.9602270884022708e-05, "loss": 0.6669, "step": 4039 }, { "epoch": 0.11795276050334297, "grad_norm": 0.7466243767629307, "learning_rate": 1.9601622060016224e-05, "loss": 0.7939, "step": 4040 }, { "epoch": 0.11798195673119033, "grad_norm": 0.6784854227702365, "learning_rate": 1.9600973236009736e-05, "loss": 0.6618, "step": 4041 }, { "epoch": 0.11801115295903769, "grad_norm": 0.7831736267498811, "learning_rate": 1.9600324412003248e-05, "loss": 0.6869, "step": 4042 }, { "epoch": 0.11804034918688505, "grad_norm": 0.7717627127184542, "learning_rate": 1.959967558799676e-05, "loss": 0.7261, "step": 4043 }, { "epoch": 0.11806954541473241, "grad_norm": 0.7249058112189936, "learning_rate": 1.959902676399027e-05, "loss": 0.634, "step": 4044 }, { "epoch": 0.11809874164257977, "grad_norm": 0.9231756839032488, "learning_rate": 1.959837793998378e-05, "loss": 0.7058, "step": 4045 }, { "epoch": 0.11812793787042714, "grad_norm": 0.6769003399352312, "learning_rate": 1.9597729115977292e-05, "loss": 0.6334, "step": 4046 }, { "epoch": 0.1181571340982745, "grad_norm": 0.6680820532455383, "learning_rate": 1.9597080291970805e-05, "loss": 0.6498, "step": 4047 }, { "epoch": 0.11818633032612187, "grad_norm": 0.7650679921687507, "learning_rate": 1.9596431467964317e-05, "loss": 0.697, "step": 4048 }, { "epoch": 0.11821552655396923, "grad_norm": 0.7649698116920256, "learning_rate": 1.959578264395783e-05, "loss": 0.7435, "step": 4049 }, { "epoch": 0.1182447227818166, "grad_norm": 0.7317002893182907, "learning_rate": 1.959513381995134e-05, "loss": 0.6838, "step": 4050 }, { "epoch": 0.11827391900966396, "grad_norm": 0.7253452276123127, "learning_rate": 1.9594484995944853e-05, "loss": 0.6318, "step": 4051 }, { "epoch": 0.11830311523751132, "grad_norm": 0.7651847525776458, "learning_rate": 1.9593836171938365e-05, "loss": 0.7106, "step": 4052 }, { "epoch": 0.11833231146535868, "grad_norm": 0.691083158395994, "learning_rate": 1.9593187347931873e-05, "loss": 0.6423, "step": 4053 }, { "epoch": 0.11836150769320604, "grad_norm": 0.751495123217704, "learning_rate": 1.9592538523925385e-05, "loss": 0.7101, "step": 4054 }, { "epoch": 0.1183907039210534, "grad_norm": 0.7143475348465939, "learning_rate": 1.9591889699918897e-05, "loss": 0.6605, "step": 4055 }, { "epoch": 0.11841990014890076, "grad_norm": 0.7281957195551774, "learning_rate": 1.959124087591241e-05, "loss": 0.6947, "step": 4056 }, { "epoch": 0.11844909637674812, "grad_norm": 0.7088234928254618, "learning_rate": 1.959059205190592e-05, "loss": 0.6853, "step": 4057 }, { "epoch": 0.11847829260459548, "grad_norm": 0.8680594365763395, "learning_rate": 1.9589943227899433e-05, "loss": 0.7673, "step": 4058 }, { "epoch": 0.11850748883244284, "grad_norm": 0.7153329924701205, "learning_rate": 1.9589294403892945e-05, "loss": 0.7336, "step": 4059 }, { "epoch": 0.1185366850602902, "grad_norm": 0.695823174940224, "learning_rate": 1.9588645579886457e-05, "loss": 0.6754, "step": 4060 }, { "epoch": 0.11856588128813757, "grad_norm": 0.7460031324070281, "learning_rate": 1.958799675587997e-05, "loss": 0.7367, "step": 4061 }, { "epoch": 0.11859507751598493, "grad_norm": 0.7314774636563123, "learning_rate": 1.958734793187348e-05, "loss": 0.7198, "step": 4062 }, { "epoch": 0.1186242737438323, "grad_norm": 0.6847883204147031, "learning_rate": 1.9586699107866994e-05, "loss": 0.584, "step": 4063 }, { "epoch": 0.11865346997167966, "grad_norm": 0.7121201394342198, "learning_rate": 1.9586050283860506e-05, "loss": 0.6591, "step": 4064 }, { "epoch": 0.11868266619952703, "grad_norm": 0.8164047218874215, "learning_rate": 1.9585401459854018e-05, "loss": 0.7139, "step": 4065 }, { "epoch": 0.11871186242737439, "grad_norm": 0.7631646073009065, "learning_rate": 1.958475263584753e-05, "loss": 0.7487, "step": 4066 }, { "epoch": 0.11874105865522175, "grad_norm": 0.6981610032221733, "learning_rate": 1.9584103811841038e-05, "loss": 0.6504, "step": 4067 }, { "epoch": 0.11877025488306911, "grad_norm": 0.7969782611236552, "learning_rate": 1.958345498783455e-05, "loss": 0.7165, "step": 4068 }, { "epoch": 0.11879945111091647, "grad_norm": 0.7434964896746816, "learning_rate": 1.9582806163828062e-05, "loss": 0.6573, "step": 4069 }, { "epoch": 0.11882864733876383, "grad_norm": 0.7199263356807951, "learning_rate": 1.9582157339821574e-05, "loss": 0.6478, "step": 4070 }, { "epoch": 0.1188578435666112, "grad_norm": 0.7332099069051676, "learning_rate": 1.9581508515815086e-05, "loss": 0.6737, "step": 4071 }, { "epoch": 0.11888703979445855, "grad_norm": 0.7470831530106076, "learning_rate": 1.95808596918086e-05, "loss": 0.677, "step": 4072 }, { "epoch": 0.11891623602230592, "grad_norm": 0.6595534733631743, "learning_rate": 1.958021086780211e-05, "loss": 0.6123, "step": 4073 }, { "epoch": 0.11894543225015328, "grad_norm": 0.8357907466745566, "learning_rate": 1.9579562043795622e-05, "loss": 0.7931, "step": 4074 }, { "epoch": 0.11897462847800064, "grad_norm": 0.7610700977541668, "learning_rate": 1.9578913219789134e-05, "loss": 0.75, "step": 4075 }, { "epoch": 0.119003824705848, "grad_norm": 0.7439950362330552, "learning_rate": 1.9578264395782643e-05, "loss": 0.7273, "step": 4076 }, { "epoch": 0.11903302093369536, "grad_norm": 0.7286425590373012, "learning_rate": 1.9577615571776155e-05, "loss": 0.653, "step": 4077 }, { "epoch": 0.11906221716154274, "grad_norm": 0.6897305183323951, "learning_rate": 1.957696674776967e-05, "loss": 0.648, "step": 4078 }, { "epoch": 0.1190914133893901, "grad_norm": 0.7000142602477932, "learning_rate": 1.9576317923763183e-05, "loss": 0.6425, "step": 4079 }, { "epoch": 0.11912060961723746, "grad_norm": 0.7670322206325668, "learning_rate": 1.9575669099756695e-05, "loss": 0.7335, "step": 4080 }, { "epoch": 0.11914980584508482, "grad_norm": 0.7419815233471857, "learning_rate": 1.9575020275750207e-05, "loss": 0.6827, "step": 4081 }, { "epoch": 0.11917900207293218, "grad_norm": 1.184196697345976, "learning_rate": 1.9574371451743715e-05, "loss": 0.707, "step": 4082 }, { "epoch": 0.11920819830077954, "grad_norm": 0.7620993603495282, "learning_rate": 1.9573722627737227e-05, "loss": 0.7114, "step": 4083 }, { "epoch": 0.1192373945286269, "grad_norm": 0.7799865735274571, "learning_rate": 1.957307380373074e-05, "loss": 0.7256, "step": 4084 }, { "epoch": 0.11926659075647426, "grad_norm": 0.7428293376731993, "learning_rate": 1.957242497972425e-05, "loss": 0.725, "step": 4085 }, { "epoch": 0.11929578698432163, "grad_norm": 0.7196545917230934, "learning_rate": 1.9571776155717763e-05, "loss": 0.6611, "step": 4086 }, { "epoch": 0.11932498321216899, "grad_norm": 0.6695947147725494, "learning_rate": 1.9571127331711275e-05, "loss": 0.595, "step": 4087 }, { "epoch": 0.11935417944001635, "grad_norm": 1.0900616435130455, "learning_rate": 1.9570478507704787e-05, "loss": 0.6832, "step": 4088 }, { "epoch": 0.11938337566786371, "grad_norm": 1.059606955875946, "learning_rate": 1.95698296836983e-05, "loss": 0.6249, "step": 4089 }, { "epoch": 0.11941257189571107, "grad_norm": 0.7249573665103041, "learning_rate": 1.9569180859691808e-05, "loss": 0.7115, "step": 4090 }, { "epoch": 0.11944176812355843, "grad_norm": 0.6962130006581255, "learning_rate": 1.956853203568532e-05, "loss": 0.6837, "step": 4091 }, { "epoch": 0.11947096435140579, "grad_norm": 0.7183277038287138, "learning_rate": 1.9567883211678832e-05, "loss": 0.6863, "step": 4092 }, { "epoch": 0.11950016057925317, "grad_norm": 0.8497498042940053, "learning_rate": 1.9567234387672344e-05, "loss": 0.7477, "step": 4093 }, { "epoch": 0.11952935680710053, "grad_norm": 0.7180179539563473, "learning_rate": 1.9566585563665856e-05, "loss": 0.6791, "step": 4094 }, { "epoch": 0.11955855303494789, "grad_norm": 0.6767047712823488, "learning_rate": 1.9565936739659368e-05, "loss": 0.6164, "step": 4095 }, { "epoch": 0.11958774926279525, "grad_norm": 0.6918997427190435, "learning_rate": 1.956528791565288e-05, "loss": 0.6165, "step": 4096 }, { "epoch": 0.11961694549064261, "grad_norm": 0.7183039118392414, "learning_rate": 1.9564639091646392e-05, "loss": 0.719, "step": 4097 }, { "epoch": 0.11964614171848997, "grad_norm": 0.737354679544131, "learning_rate": 1.9563990267639904e-05, "loss": 0.6832, "step": 4098 }, { "epoch": 0.11967533794633733, "grad_norm": 0.7624944515091571, "learning_rate": 1.9563341443633416e-05, "loss": 0.7137, "step": 4099 }, { "epoch": 0.1197045341741847, "grad_norm": 0.6760940373776506, "learning_rate": 1.956269261962693e-05, "loss": 0.6223, "step": 4100 }, { "epoch": 0.11973373040203206, "grad_norm": 0.6570626243160436, "learning_rate": 1.956204379562044e-05, "loss": 0.6121, "step": 4101 }, { "epoch": 0.11976292662987942, "grad_norm": 0.8050632632396831, "learning_rate": 1.9561394971613952e-05, "loss": 0.7204, "step": 4102 }, { "epoch": 0.11979212285772678, "grad_norm": 0.7732445033900732, "learning_rate": 1.9560746147607464e-05, "loss": 0.8128, "step": 4103 }, { "epoch": 0.11982131908557414, "grad_norm": 0.7820910650693231, "learning_rate": 1.9560097323600977e-05, "loss": 0.7514, "step": 4104 }, { "epoch": 0.1198505153134215, "grad_norm": 0.7538640556496007, "learning_rate": 1.9559448499594485e-05, "loss": 0.7621, "step": 4105 }, { "epoch": 0.11987971154126886, "grad_norm": 0.7145865102007485, "learning_rate": 1.9558799675587997e-05, "loss": 0.5907, "step": 4106 }, { "epoch": 0.11990890776911622, "grad_norm": 0.8019281833878901, "learning_rate": 1.955815085158151e-05, "loss": 0.7195, "step": 4107 }, { "epoch": 0.11993810399696359, "grad_norm": 0.6869702092320863, "learning_rate": 1.955750202757502e-05, "loss": 0.6688, "step": 4108 }, { "epoch": 0.11996730022481096, "grad_norm": 0.7550210575850685, "learning_rate": 1.9556853203568533e-05, "loss": 0.6999, "step": 4109 }, { "epoch": 0.11999649645265832, "grad_norm": 0.8557342204038596, "learning_rate": 1.9556204379562045e-05, "loss": 0.7779, "step": 4110 }, { "epoch": 0.12002569268050568, "grad_norm": 0.8419371493212887, "learning_rate": 1.9555555555555557e-05, "loss": 0.7083, "step": 4111 }, { "epoch": 0.12005488890835304, "grad_norm": 0.7641795421878175, "learning_rate": 1.955490673154907e-05, "loss": 0.7403, "step": 4112 }, { "epoch": 0.1200840851362004, "grad_norm": 0.695603916391158, "learning_rate": 1.955425790754258e-05, "loss": 0.6555, "step": 4113 }, { "epoch": 0.12011328136404777, "grad_norm": 0.771259488566583, "learning_rate": 1.955360908353609e-05, "loss": 0.687, "step": 4114 }, { "epoch": 0.12014247759189513, "grad_norm": 0.8170968104048308, "learning_rate": 1.9552960259529605e-05, "loss": 0.7244, "step": 4115 }, { "epoch": 0.12017167381974249, "grad_norm": 0.6862153098562589, "learning_rate": 1.9552311435523117e-05, "loss": 0.5657, "step": 4116 }, { "epoch": 0.12020087004758985, "grad_norm": 0.8107529013970984, "learning_rate": 1.955166261151663e-05, "loss": 0.7096, "step": 4117 }, { "epoch": 0.12023006627543721, "grad_norm": 0.7792610076613653, "learning_rate": 1.955101378751014e-05, "loss": 0.7202, "step": 4118 }, { "epoch": 0.12025926250328457, "grad_norm": 0.7662566369616224, "learning_rate": 1.9550364963503654e-05, "loss": 0.7525, "step": 4119 }, { "epoch": 0.12028845873113193, "grad_norm": 0.792092360198235, "learning_rate": 1.9549716139497162e-05, "loss": 0.7021, "step": 4120 }, { "epoch": 0.1203176549589793, "grad_norm": 0.7298735417318534, "learning_rate": 1.9549067315490674e-05, "loss": 0.6615, "step": 4121 }, { "epoch": 0.12034685118682666, "grad_norm": 0.677846928784321, "learning_rate": 1.9548418491484186e-05, "loss": 0.6418, "step": 4122 }, { "epoch": 0.12037604741467402, "grad_norm": 0.7256545519888574, "learning_rate": 1.9547769667477698e-05, "loss": 0.6427, "step": 4123 }, { "epoch": 0.12040524364252139, "grad_norm": 0.7107025045615163, "learning_rate": 1.954712084347121e-05, "loss": 0.7353, "step": 4124 }, { "epoch": 0.12043443987036875, "grad_norm": 0.7580793425868981, "learning_rate": 1.9546472019464722e-05, "loss": 0.7389, "step": 4125 }, { "epoch": 0.12046363609821611, "grad_norm": 0.7252783418559936, "learning_rate": 1.9545823195458234e-05, "loss": 0.6577, "step": 4126 }, { "epoch": 0.12049283232606348, "grad_norm": 0.7452633215481338, "learning_rate": 1.9545174371451746e-05, "loss": 0.7008, "step": 4127 }, { "epoch": 0.12052202855391084, "grad_norm": 0.7269572150199178, "learning_rate": 1.9544525547445255e-05, "loss": 0.6632, "step": 4128 }, { "epoch": 0.1205512247817582, "grad_norm": 0.6872838295394105, "learning_rate": 1.9543876723438767e-05, "loss": 0.6439, "step": 4129 }, { "epoch": 0.12058042100960556, "grad_norm": 0.737960568610869, "learning_rate": 1.954322789943228e-05, "loss": 0.662, "step": 4130 }, { "epoch": 0.12060961723745292, "grad_norm": 0.7265073140718293, "learning_rate": 1.954257907542579e-05, "loss": 0.708, "step": 4131 }, { "epoch": 0.12063881346530028, "grad_norm": 0.7868767286929499, "learning_rate": 1.9541930251419303e-05, "loss": 0.6984, "step": 4132 }, { "epoch": 0.12066800969314764, "grad_norm": 0.771452013781904, "learning_rate": 1.9541281427412815e-05, "loss": 0.6684, "step": 4133 }, { "epoch": 0.120697205920995, "grad_norm": 0.6691622532499063, "learning_rate": 1.9540632603406327e-05, "loss": 0.6039, "step": 4134 }, { "epoch": 0.12072640214884237, "grad_norm": 0.7417018963713776, "learning_rate": 1.953998377939984e-05, "loss": 0.7123, "step": 4135 }, { "epoch": 0.12075559837668973, "grad_norm": 0.6907836428697173, "learning_rate": 1.953933495539335e-05, "loss": 0.6513, "step": 4136 }, { "epoch": 0.12078479460453709, "grad_norm": 0.7447834938945491, "learning_rate": 1.9538686131386863e-05, "loss": 0.6874, "step": 4137 }, { "epoch": 0.12081399083238445, "grad_norm": 0.698317979164526, "learning_rate": 1.9538037307380375e-05, "loss": 0.5953, "step": 4138 }, { "epoch": 0.12084318706023182, "grad_norm": 0.7454424350381136, "learning_rate": 1.9537388483373887e-05, "loss": 0.6799, "step": 4139 }, { "epoch": 0.12087238328807919, "grad_norm": 0.7819741145013924, "learning_rate": 1.95367396593674e-05, "loss": 0.7929, "step": 4140 }, { "epoch": 0.12090157951592655, "grad_norm": 0.8077077191225013, "learning_rate": 1.953609083536091e-05, "loss": 0.7246, "step": 4141 }, { "epoch": 0.12093077574377391, "grad_norm": 0.8177409060788434, "learning_rate": 1.9535442011354423e-05, "loss": 0.6379, "step": 4142 }, { "epoch": 0.12095997197162127, "grad_norm": 0.7269013118143429, "learning_rate": 1.9534793187347932e-05, "loss": 0.6895, "step": 4143 }, { "epoch": 0.12098916819946863, "grad_norm": 0.6731783169142393, "learning_rate": 1.9534144363341444e-05, "loss": 0.5978, "step": 4144 }, { "epoch": 0.12101836442731599, "grad_norm": 0.8786734630595933, "learning_rate": 1.9533495539334956e-05, "loss": 0.654, "step": 4145 }, { "epoch": 0.12104756065516335, "grad_norm": 0.7687402125530383, "learning_rate": 1.9532846715328468e-05, "loss": 0.7096, "step": 4146 }, { "epoch": 0.12107675688301071, "grad_norm": 0.6849066726984449, "learning_rate": 1.953219789132198e-05, "loss": 0.6978, "step": 4147 }, { "epoch": 0.12110595311085808, "grad_norm": 0.7063247679101983, "learning_rate": 1.9531549067315492e-05, "loss": 0.6426, "step": 4148 }, { "epoch": 0.12113514933870544, "grad_norm": 0.6997072731106757, "learning_rate": 1.9530900243309004e-05, "loss": 0.7252, "step": 4149 }, { "epoch": 0.1211643455665528, "grad_norm": 0.7271697067734576, "learning_rate": 1.9530251419302516e-05, "loss": 0.6319, "step": 4150 }, { "epoch": 0.12119354179440016, "grad_norm": 0.652154787162999, "learning_rate": 1.9529602595296028e-05, "loss": 0.5666, "step": 4151 }, { "epoch": 0.12122273802224752, "grad_norm": 0.776920232338249, "learning_rate": 1.9528953771289537e-05, "loss": 0.7066, "step": 4152 }, { "epoch": 0.12125193425009488, "grad_norm": 0.6953836062655153, "learning_rate": 1.9528304947283052e-05, "loss": 0.677, "step": 4153 }, { "epoch": 0.12128113047794226, "grad_norm": 0.6792350506550603, "learning_rate": 1.9527656123276564e-05, "loss": 0.639, "step": 4154 }, { "epoch": 0.12131032670578962, "grad_norm": 0.6633599017619118, "learning_rate": 1.9527007299270076e-05, "loss": 0.625, "step": 4155 }, { "epoch": 0.12133952293363698, "grad_norm": 0.8116214634708518, "learning_rate": 1.952635847526359e-05, "loss": 0.7785, "step": 4156 }, { "epoch": 0.12136871916148434, "grad_norm": 0.7820297199941086, "learning_rate": 1.95257096512571e-05, "loss": 0.7723, "step": 4157 }, { "epoch": 0.1213979153893317, "grad_norm": 1.0808998846470215, "learning_rate": 1.952506082725061e-05, "loss": 0.8158, "step": 4158 }, { "epoch": 0.12142711161717906, "grad_norm": 0.7610166610131394, "learning_rate": 1.952441200324412e-05, "loss": 0.7469, "step": 4159 }, { "epoch": 0.12145630784502642, "grad_norm": 0.742155145622137, "learning_rate": 1.9523763179237633e-05, "loss": 0.7433, "step": 4160 }, { "epoch": 0.12148550407287378, "grad_norm": 0.7538200328541977, "learning_rate": 1.9523114355231145e-05, "loss": 0.773, "step": 4161 }, { "epoch": 0.12151470030072115, "grad_norm": 0.7074586916177026, "learning_rate": 1.9522465531224657e-05, "loss": 0.6495, "step": 4162 }, { "epoch": 0.1215438965285685, "grad_norm": 0.7618140356813589, "learning_rate": 1.952181670721817e-05, "loss": 0.6745, "step": 4163 }, { "epoch": 0.12157309275641587, "grad_norm": 1.0027461145083942, "learning_rate": 1.952116788321168e-05, "loss": 0.8301, "step": 4164 }, { "epoch": 0.12160228898426323, "grad_norm": 0.7416714519626579, "learning_rate": 1.9520519059205193e-05, "loss": 0.658, "step": 4165 }, { "epoch": 0.12163148521211059, "grad_norm": 0.9469704602004106, "learning_rate": 1.9519870235198702e-05, "loss": 0.687, "step": 4166 }, { "epoch": 0.12166068143995795, "grad_norm": 0.7774410632726024, "learning_rate": 1.9519221411192214e-05, "loss": 0.7489, "step": 4167 }, { "epoch": 0.12168987766780531, "grad_norm": 0.7676884024394289, "learning_rate": 1.9518572587185726e-05, "loss": 0.6419, "step": 4168 }, { "epoch": 0.12171907389565269, "grad_norm": 0.7381856443193142, "learning_rate": 1.9517923763179238e-05, "loss": 0.6337, "step": 4169 }, { "epoch": 0.12174827012350005, "grad_norm": 0.8127655609884915, "learning_rate": 1.951727493917275e-05, "loss": 0.7747, "step": 4170 }, { "epoch": 0.12177746635134741, "grad_norm": 0.6853724352605176, "learning_rate": 1.9516626115166262e-05, "loss": 0.663, "step": 4171 }, { "epoch": 0.12180666257919477, "grad_norm": 0.8541422943021669, "learning_rate": 1.9515977291159774e-05, "loss": 0.7163, "step": 4172 }, { "epoch": 0.12183585880704213, "grad_norm": 0.7302443644860807, "learning_rate": 1.9515328467153286e-05, "loss": 0.6895, "step": 4173 }, { "epoch": 0.1218650550348895, "grad_norm": 0.7593972883230322, "learning_rate": 1.9514679643146798e-05, "loss": 0.694, "step": 4174 }, { "epoch": 0.12189425126273686, "grad_norm": 0.8743172480696356, "learning_rate": 1.951403081914031e-05, "loss": 0.7297, "step": 4175 }, { "epoch": 0.12192344749058422, "grad_norm": 1.167684494644504, "learning_rate": 1.9513381995133822e-05, "loss": 0.767, "step": 4176 }, { "epoch": 0.12195264371843158, "grad_norm": 0.6828082476025481, "learning_rate": 1.9512733171127334e-05, "loss": 0.6261, "step": 4177 }, { "epoch": 0.12198183994627894, "grad_norm": 0.7085278855756969, "learning_rate": 1.9512084347120846e-05, "loss": 0.669, "step": 4178 }, { "epoch": 0.1220110361741263, "grad_norm": 0.71899707783538, "learning_rate": 1.9511435523114358e-05, "loss": 0.6725, "step": 4179 }, { "epoch": 0.12204023240197366, "grad_norm": 0.7225494834726349, "learning_rate": 1.951078669910787e-05, "loss": 0.6152, "step": 4180 }, { "epoch": 0.12206942862982102, "grad_norm": 0.700413197677039, "learning_rate": 1.951013787510138e-05, "loss": 0.6437, "step": 4181 }, { "epoch": 0.12209862485766838, "grad_norm": 0.9848688658289037, "learning_rate": 1.950948905109489e-05, "loss": 0.6837, "step": 4182 }, { "epoch": 0.12212782108551574, "grad_norm": 0.721811814482695, "learning_rate": 1.9508840227088403e-05, "loss": 0.6402, "step": 4183 }, { "epoch": 0.12215701731336312, "grad_norm": 0.6935628177546098, "learning_rate": 1.9508191403081915e-05, "loss": 0.648, "step": 4184 }, { "epoch": 0.12218621354121048, "grad_norm": 0.670637726762892, "learning_rate": 1.9507542579075427e-05, "loss": 0.6776, "step": 4185 }, { "epoch": 0.12221540976905784, "grad_norm": 0.7044789384256113, "learning_rate": 1.950689375506894e-05, "loss": 0.6323, "step": 4186 }, { "epoch": 0.1222446059969052, "grad_norm": 0.7424596691253659, "learning_rate": 1.950624493106245e-05, "loss": 0.7427, "step": 4187 }, { "epoch": 0.12227380222475256, "grad_norm": 0.7548029056492945, "learning_rate": 1.9505596107055963e-05, "loss": 0.7902, "step": 4188 }, { "epoch": 0.12230299845259993, "grad_norm": 0.6845612883658179, "learning_rate": 1.9504947283049475e-05, "loss": 0.5767, "step": 4189 }, { "epoch": 0.12233219468044729, "grad_norm": 0.6726377003870818, "learning_rate": 1.9504298459042984e-05, "loss": 0.6183, "step": 4190 }, { "epoch": 0.12236139090829465, "grad_norm": 0.7357587847072035, "learning_rate": 1.95036496350365e-05, "loss": 0.7562, "step": 4191 }, { "epoch": 0.12239058713614201, "grad_norm": 0.7138081706590117, "learning_rate": 1.950300081103001e-05, "loss": 0.6784, "step": 4192 }, { "epoch": 0.12241978336398937, "grad_norm": 0.7776847314681455, "learning_rate": 1.9502351987023523e-05, "loss": 0.7667, "step": 4193 }, { "epoch": 0.12244897959183673, "grad_norm": 0.7405720255047779, "learning_rate": 1.9501703163017035e-05, "loss": 0.681, "step": 4194 }, { "epoch": 0.1224781758196841, "grad_norm": 0.7487787068893736, "learning_rate": 1.9501054339010544e-05, "loss": 0.6929, "step": 4195 }, { "epoch": 0.12250737204753145, "grad_norm": 0.8644935701785954, "learning_rate": 1.9500405515004056e-05, "loss": 0.7465, "step": 4196 }, { "epoch": 0.12253656827537882, "grad_norm": 0.9825074886438271, "learning_rate": 1.9499756690997568e-05, "loss": 0.6626, "step": 4197 }, { "epoch": 0.12256576450322618, "grad_norm": 0.7716414572956979, "learning_rate": 1.949910786699108e-05, "loss": 0.7205, "step": 4198 }, { "epoch": 0.12259496073107355, "grad_norm": 0.7270296179311553, "learning_rate": 1.9498459042984592e-05, "loss": 0.716, "step": 4199 }, { "epoch": 0.12262415695892091, "grad_norm": 0.6298080603309562, "learning_rate": 1.9497810218978104e-05, "loss": 0.5505, "step": 4200 }, { "epoch": 0.12265335318676827, "grad_norm": 0.7344989675365348, "learning_rate": 1.9497161394971616e-05, "loss": 0.6809, "step": 4201 }, { "epoch": 0.12268254941461564, "grad_norm": 1.3409867311573287, "learning_rate": 1.9496512570965128e-05, "loss": 0.6582, "step": 4202 }, { "epoch": 0.122711745642463, "grad_norm": 0.6707598872560803, "learning_rate": 1.949586374695864e-05, "loss": 0.6082, "step": 4203 }, { "epoch": 0.12274094187031036, "grad_norm": 0.730196789382556, "learning_rate": 1.949521492295215e-05, "loss": 0.6437, "step": 4204 }, { "epoch": 0.12277013809815772, "grad_norm": 0.8338608063863285, "learning_rate": 1.949456609894566e-05, "loss": 0.75, "step": 4205 }, { "epoch": 0.12279933432600508, "grad_norm": 0.7875458801443928, "learning_rate": 1.9493917274939173e-05, "loss": 0.6989, "step": 4206 }, { "epoch": 0.12282853055385244, "grad_norm": 10.545497156293306, "learning_rate": 1.9493268450932685e-05, "loss": 0.8179, "step": 4207 }, { "epoch": 0.1228577267816998, "grad_norm": 0.7198938701532142, "learning_rate": 1.9492619626926197e-05, "loss": 0.6681, "step": 4208 }, { "epoch": 0.12288692300954716, "grad_norm": 0.7452035188821027, "learning_rate": 1.949197080291971e-05, "loss": 0.7112, "step": 4209 }, { "epoch": 0.12291611923739452, "grad_norm": 0.7304875604044294, "learning_rate": 1.949132197891322e-05, "loss": 0.6568, "step": 4210 }, { "epoch": 0.12294531546524189, "grad_norm": 0.6862531471291056, "learning_rate": 1.9490673154906733e-05, "loss": 0.6318, "step": 4211 }, { "epoch": 0.12297451169308925, "grad_norm": 0.7157692217577517, "learning_rate": 1.9490024330900245e-05, "loss": 0.6744, "step": 4212 }, { "epoch": 0.12300370792093661, "grad_norm": 0.7189885231599232, "learning_rate": 1.9489375506893757e-05, "loss": 0.6628, "step": 4213 }, { "epoch": 0.12303290414878398, "grad_norm": 0.6558862206320705, "learning_rate": 1.948872668288727e-05, "loss": 0.65, "step": 4214 }, { "epoch": 0.12306210037663134, "grad_norm": 0.8254888508278876, "learning_rate": 1.948807785888078e-05, "loss": 0.7641, "step": 4215 }, { "epoch": 0.1230912966044787, "grad_norm": 0.6772765312164261, "learning_rate": 1.9487429034874293e-05, "loss": 0.6774, "step": 4216 }, { "epoch": 0.12312049283232607, "grad_norm": 0.7253308632758119, "learning_rate": 1.9486780210867805e-05, "loss": 0.6938, "step": 4217 }, { "epoch": 0.12314968906017343, "grad_norm": 0.8068324057584195, "learning_rate": 1.9486131386861317e-05, "loss": 0.7735, "step": 4218 }, { "epoch": 0.12317888528802079, "grad_norm": 0.7491446680560376, "learning_rate": 1.9485482562854826e-05, "loss": 0.7147, "step": 4219 }, { "epoch": 0.12320808151586815, "grad_norm": 0.7252800147883361, "learning_rate": 1.9484833738848338e-05, "loss": 0.6624, "step": 4220 }, { "epoch": 0.12323727774371551, "grad_norm": 0.7243988504894208, "learning_rate": 1.948418491484185e-05, "loss": 0.646, "step": 4221 }, { "epoch": 0.12326647397156287, "grad_norm": 0.8081933417938857, "learning_rate": 1.9483536090835362e-05, "loss": 0.7098, "step": 4222 }, { "epoch": 0.12329567019941023, "grad_norm": 0.6793147035405328, "learning_rate": 1.9482887266828874e-05, "loss": 0.584, "step": 4223 }, { "epoch": 0.1233248664272576, "grad_norm": 0.8310498075878905, "learning_rate": 1.9482238442822386e-05, "loss": 0.6762, "step": 4224 }, { "epoch": 0.12335406265510496, "grad_norm": 0.73265400021457, "learning_rate": 1.9481589618815898e-05, "loss": 0.7173, "step": 4225 }, { "epoch": 0.12338325888295232, "grad_norm": 0.8003377485341588, "learning_rate": 1.948094079480941e-05, "loss": 0.6573, "step": 4226 }, { "epoch": 0.12341245511079968, "grad_norm": 0.7035882532861368, "learning_rate": 1.9480291970802922e-05, "loss": 0.6909, "step": 4227 }, { "epoch": 0.12344165133864704, "grad_norm": 0.6752121768881038, "learning_rate": 1.947964314679643e-05, "loss": 0.6426, "step": 4228 }, { "epoch": 0.12347084756649442, "grad_norm": 0.6854504977797121, "learning_rate": 1.9478994322789946e-05, "loss": 0.6294, "step": 4229 }, { "epoch": 0.12350004379434178, "grad_norm": 0.7339531610795681, "learning_rate": 1.9478345498783458e-05, "loss": 0.6933, "step": 4230 }, { "epoch": 0.12352924002218914, "grad_norm": 0.674013517670468, "learning_rate": 1.947769667477697e-05, "loss": 0.5851, "step": 4231 }, { "epoch": 0.1235584362500365, "grad_norm": 0.6953740182960311, "learning_rate": 1.9477047850770482e-05, "loss": 0.5971, "step": 4232 }, { "epoch": 0.12358763247788386, "grad_norm": 0.7867616436893827, "learning_rate": 1.947639902676399e-05, "loss": 0.7493, "step": 4233 }, { "epoch": 0.12361682870573122, "grad_norm": 0.7209965287874669, "learning_rate": 1.9475750202757503e-05, "loss": 0.7111, "step": 4234 }, { "epoch": 0.12364602493357858, "grad_norm": 0.720166328215632, "learning_rate": 1.9475101378751015e-05, "loss": 0.6548, "step": 4235 }, { "epoch": 0.12367522116142594, "grad_norm": 0.7076370880755146, "learning_rate": 1.9474452554744527e-05, "loss": 0.6788, "step": 4236 }, { "epoch": 0.1237044173892733, "grad_norm": 0.7071807857517018, "learning_rate": 1.947380373073804e-05, "loss": 0.5982, "step": 4237 }, { "epoch": 0.12373361361712067, "grad_norm": 0.7527023906605326, "learning_rate": 1.947315490673155e-05, "loss": 0.7557, "step": 4238 }, { "epoch": 0.12376280984496803, "grad_norm": 0.6818240125745788, "learning_rate": 1.9472506082725063e-05, "loss": 0.6841, "step": 4239 }, { "epoch": 0.12379200607281539, "grad_norm": 0.7376094160296168, "learning_rate": 1.9471857258718575e-05, "loss": 0.7324, "step": 4240 }, { "epoch": 0.12382120230066275, "grad_norm": 0.6637146850520965, "learning_rate": 1.9471208434712087e-05, "loss": 0.609, "step": 4241 }, { "epoch": 0.12385039852851011, "grad_norm": 0.7478712368047713, "learning_rate": 1.9470559610705596e-05, "loss": 0.7712, "step": 4242 }, { "epoch": 0.12387959475635747, "grad_norm": 0.7012416294346628, "learning_rate": 1.9469910786699108e-05, "loss": 0.6873, "step": 4243 }, { "epoch": 0.12390879098420485, "grad_norm": 0.7684644909235655, "learning_rate": 1.946926196269262e-05, "loss": 0.7403, "step": 4244 }, { "epoch": 0.12393798721205221, "grad_norm": 0.7441569841006686, "learning_rate": 1.946861313868613e-05, "loss": 0.6757, "step": 4245 }, { "epoch": 0.12396718343989957, "grad_norm": 0.7428516523466169, "learning_rate": 1.9467964314679644e-05, "loss": 0.681, "step": 4246 }, { "epoch": 0.12399637966774693, "grad_norm": 0.6666826907854466, "learning_rate": 1.9467315490673156e-05, "loss": 0.6642, "step": 4247 }, { "epoch": 0.12402557589559429, "grad_norm": 0.6849012404124091, "learning_rate": 1.9466666666666668e-05, "loss": 0.6044, "step": 4248 }, { "epoch": 0.12405477212344165, "grad_norm": 0.7445900847391126, "learning_rate": 1.946601784266018e-05, "loss": 0.6573, "step": 4249 }, { "epoch": 0.12408396835128901, "grad_norm": 0.7800860791970293, "learning_rate": 1.9465369018653692e-05, "loss": 0.5954, "step": 4250 }, { "epoch": 0.12411316457913638, "grad_norm": 0.693401556618405, "learning_rate": 1.9464720194647204e-05, "loss": 0.65, "step": 4251 }, { "epoch": 0.12414236080698374, "grad_norm": 0.7028405441881416, "learning_rate": 1.9464071370640716e-05, "loss": 0.689, "step": 4252 }, { "epoch": 0.1241715570348311, "grad_norm": 0.7830278702648612, "learning_rate": 1.9463422546634228e-05, "loss": 0.7311, "step": 4253 }, { "epoch": 0.12420075326267846, "grad_norm": 0.7682608291087666, "learning_rate": 1.946277372262774e-05, "loss": 0.6905, "step": 4254 }, { "epoch": 0.12422994949052582, "grad_norm": 0.7142005610993674, "learning_rate": 1.9462124898621252e-05, "loss": 0.6998, "step": 4255 }, { "epoch": 0.12425914571837318, "grad_norm": 0.7790492735385044, "learning_rate": 1.9461476074614764e-05, "loss": 0.7129, "step": 4256 }, { "epoch": 0.12428834194622054, "grad_norm": 0.7615731707777968, "learning_rate": 1.9460827250608273e-05, "loss": 0.6553, "step": 4257 }, { "epoch": 0.1243175381740679, "grad_norm": 0.6866627137712107, "learning_rate": 1.9460178426601785e-05, "loss": 0.6123, "step": 4258 }, { "epoch": 0.12434673440191528, "grad_norm": 0.7481241428685383, "learning_rate": 1.9459529602595297e-05, "loss": 0.7686, "step": 4259 }, { "epoch": 0.12437593062976264, "grad_norm": 0.8159428863946213, "learning_rate": 1.945888077858881e-05, "loss": 0.8136, "step": 4260 }, { "epoch": 0.12440512685761, "grad_norm": 0.6919710066319055, "learning_rate": 1.945823195458232e-05, "loss": 0.6419, "step": 4261 }, { "epoch": 0.12443432308545736, "grad_norm": 0.801788564907721, "learning_rate": 1.9457583130575833e-05, "loss": 0.6532, "step": 4262 }, { "epoch": 0.12446351931330472, "grad_norm": 0.7107692210416345, "learning_rate": 1.9456934306569345e-05, "loss": 0.6747, "step": 4263 }, { "epoch": 0.12449271554115209, "grad_norm": 0.7411678925280244, "learning_rate": 1.9456285482562857e-05, "loss": 0.6832, "step": 4264 }, { "epoch": 0.12452191176899945, "grad_norm": 0.7386241492206568, "learning_rate": 1.945563665855637e-05, "loss": 0.7512, "step": 4265 }, { "epoch": 0.12455110799684681, "grad_norm": 0.7168828394471615, "learning_rate": 1.945498783454988e-05, "loss": 0.5802, "step": 4266 }, { "epoch": 0.12458030422469417, "grad_norm": 0.6518917016168893, "learning_rate": 1.9454339010543393e-05, "loss": 0.5585, "step": 4267 }, { "epoch": 0.12460950045254153, "grad_norm": 0.6410618990848109, "learning_rate": 1.9453690186536905e-05, "loss": 0.5687, "step": 4268 }, { "epoch": 0.12463869668038889, "grad_norm": 0.7604823372794612, "learning_rate": 1.9453041362530417e-05, "loss": 0.6853, "step": 4269 }, { "epoch": 0.12466789290823625, "grad_norm": 0.6786874777991725, "learning_rate": 1.945239253852393e-05, "loss": 0.6173, "step": 4270 }, { "epoch": 0.12469708913608361, "grad_norm": 0.7611580706635005, "learning_rate": 1.9451743714517438e-05, "loss": 0.7418, "step": 4271 }, { "epoch": 0.12472628536393097, "grad_norm": 0.9429735873461009, "learning_rate": 1.945109489051095e-05, "loss": 0.6507, "step": 4272 }, { "epoch": 0.12475548159177834, "grad_norm": 0.6734964666073666, "learning_rate": 1.945044606650446e-05, "loss": 0.6039, "step": 4273 }, { "epoch": 0.12478467781962571, "grad_norm": 0.7409712210582797, "learning_rate": 1.9449797242497974e-05, "loss": 0.671, "step": 4274 }, { "epoch": 0.12481387404747307, "grad_norm": 0.727892788022951, "learning_rate": 1.9449148418491486e-05, "loss": 0.6823, "step": 4275 }, { "epoch": 0.12484307027532043, "grad_norm": 0.8106416189149072, "learning_rate": 1.9448499594484998e-05, "loss": 0.844, "step": 4276 }, { "epoch": 0.1248722665031678, "grad_norm": 0.7232840505533623, "learning_rate": 1.944785077047851e-05, "loss": 0.7471, "step": 4277 }, { "epoch": 0.12490146273101516, "grad_norm": 0.8805282902124247, "learning_rate": 1.9447201946472022e-05, "loss": 0.6867, "step": 4278 }, { "epoch": 0.12493065895886252, "grad_norm": 0.6846427466099307, "learning_rate": 1.9446553122465534e-05, "loss": 0.6494, "step": 4279 }, { "epoch": 0.12495985518670988, "grad_norm": 0.7586106648174653, "learning_rate": 1.9445904298459042e-05, "loss": 0.7163, "step": 4280 }, { "epoch": 0.12498905141455724, "grad_norm": 0.6907483338629524, "learning_rate": 1.9445255474452554e-05, "loss": 0.623, "step": 4281 }, { "epoch": 0.1250182476424046, "grad_norm": 0.8140913300742889, "learning_rate": 1.9444606650446067e-05, "loss": 0.7466, "step": 4282 }, { "epoch": 0.12504744387025196, "grad_norm": 0.697341112685968, "learning_rate": 1.944395782643958e-05, "loss": 0.6813, "step": 4283 }, { "epoch": 0.12507664009809932, "grad_norm": 0.7222194430596307, "learning_rate": 1.944330900243309e-05, "loss": 0.6957, "step": 4284 }, { "epoch": 0.12510583632594668, "grad_norm": 0.7066588838955911, "learning_rate": 1.9442660178426606e-05, "loss": 0.7037, "step": 4285 }, { "epoch": 0.12513503255379405, "grad_norm": 0.6670623453751717, "learning_rate": 1.9442011354420115e-05, "loss": 0.6139, "step": 4286 }, { "epoch": 0.1251642287816414, "grad_norm": 0.7088925313088678, "learning_rate": 1.9441362530413627e-05, "loss": 0.6431, "step": 4287 }, { "epoch": 0.12519342500948877, "grad_norm": 0.7033633138916575, "learning_rate": 1.944071370640714e-05, "loss": 0.6391, "step": 4288 }, { "epoch": 0.12522262123733613, "grad_norm": 0.7298915518325667, "learning_rate": 1.944006488240065e-05, "loss": 0.7217, "step": 4289 }, { "epoch": 0.1252518174651835, "grad_norm": 0.7597958769924104, "learning_rate": 1.9439416058394163e-05, "loss": 0.7494, "step": 4290 }, { "epoch": 0.12528101369303085, "grad_norm": 0.7092181911154168, "learning_rate": 1.9438767234387675e-05, "loss": 0.6414, "step": 4291 }, { "epoch": 0.1253102099208782, "grad_norm": 0.8129283081512008, "learning_rate": 1.9438118410381187e-05, "loss": 0.6925, "step": 4292 }, { "epoch": 0.12533940614872557, "grad_norm": 0.7536916765427707, "learning_rate": 1.94374695863747e-05, "loss": 0.7975, "step": 4293 }, { "epoch": 0.12536860237657294, "grad_norm": 0.7312465140327419, "learning_rate": 1.943682076236821e-05, "loss": 0.6576, "step": 4294 }, { "epoch": 0.1253977986044203, "grad_norm": 0.7178847295596876, "learning_rate": 1.943617193836172e-05, "loss": 0.5826, "step": 4295 }, { "epoch": 0.12542699483226766, "grad_norm": 0.7839112563408017, "learning_rate": 1.943552311435523e-05, "loss": 0.774, "step": 4296 }, { "epoch": 0.12545619106011505, "grad_norm": 0.7010067004846062, "learning_rate": 1.9434874290348744e-05, "loss": 0.6461, "step": 4297 }, { "epoch": 0.1254853872879624, "grad_norm": 0.7643835156478499, "learning_rate": 1.9434225466342256e-05, "loss": 0.6972, "step": 4298 }, { "epoch": 0.12551458351580977, "grad_norm": 0.6794126456411631, "learning_rate": 1.9433576642335768e-05, "loss": 0.6307, "step": 4299 }, { "epoch": 0.12554377974365713, "grad_norm": 0.7577547519891705, "learning_rate": 1.943292781832928e-05, "loss": 0.7616, "step": 4300 }, { "epoch": 0.1255729759715045, "grad_norm": 0.6418447381340636, "learning_rate": 1.943227899432279e-05, "loss": 0.5948, "step": 4301 }, { "epoch": 0.12560217219935185, "grad_norm": 0.711383760127586, "learning_rate": 1.9431630170316304e-05, "loss": 0.6999, "step": 4302 }, { "epoch": 0.1256313684271992, "grad_norm": 0.6555902162127493, "learning_rate": 1.9430981346309816e-05, "loss": 0.665, "step": 4303 }, { "epoch": 0.12566056465504657, "grad_norm": 0.7554305555800299, "learning_rate": 1.9430332522303328e-05, "loss": 0.675, "step": 4304 }, { "epoch": 0.12568976088289394, "grad_norm": 0.7808372919575127, "learning_rate": 1.942968369829684e-05, "loss": 0.757, "step": 4305 }, { "epoch": 0.1257189571107413, "grad_norm": 0.816580860834685, "learning_rate": 1.9429034874290352e-05, "loss": 0.8312, "step": 4306 }, { "epoch": 0.12574815333858866, "grad_norm": 0.6971540593402448, "learning_rate": 1.9428386050283864e-05, "loss": 0.6774, "step": 4307 }, { "epoch": 0.12577734956643602, "grad_norm": 0.811976199396213, "learning_rate": 1.9427737226277376e-05, "loss": 0.7001, "step": 4308 }, { "epoch": 0.12580654579428338, "grad_norm": 0.730584167750526, "learning_rate": 1.9427088402270884e-05, "loss": 0.6527, "step": 4309 }, { "epoch": 0.12583574202213074, "grad_norm": 0.7115845565828895, "learning_rate": 1.9426439578264397e-05, "loss": 0.7002, "step": 4310 }, { "epoch": 0.1258649382499781, "grad_norm": 0.9685883223774797, "learning_rate": 1.942579075425791e-05, "loss": 0.6129, "step": 4311 }, { "epoch": 0.12589413447782546, "grad_norm": 0.6744202582636682, "learning_rate": 1.942514193025142e-05, "loss": 0.6081, "step": 4312 }, { "epoch": 0.12592333070567283, "grad_norm": 0.7520151947632827, "learning_rate": 1.9424493106244933e-05, "loss": 0.6921, "step": 4313 }, { "epoch": 0.1259525269335202, "grad_norm": 0.6954843978232109, "learning_rate": 1.9423844282238445e-05, "loss": 0.6761, "step": 4314 }, { "epoch": 0.12598172316136755, "grad_norm": 0.767772337393321, "learning_rate": 1.9423195458231957e-05, "loss": 0.7305, "step": 4315 }, { "epoch": 0.1260109193892149, "grad_norm": 0.74297770337535, "learning_rate": 1.942254663422547e-05, "loss": 0.6502, "step": 4316 }, { "epoch": 0.12604011561706227, "grad_norm": 0.6663881998706764, "learning_rate": 1.942189781021898e-05, "loss": 0.6003, "step": 4317 }, { "epoch": 0.12606931184490963, "grad_norm": 0.6934037331311254, "learning_rate": 1.942124898621249e-05, "loss": 0.6936, "step": 4318 }, { "epoch": 0.126098508072757, "grad_norm": 0.7083529771738997, "learning_rate": 1.9420600162206e-05, "loss": 0.6691, "step": 4319 }, { "epoch": 0.12612770430060435, "grad_norm": 0.7810183960201379, "learning_rate": 1.9419951338199513e-05, "loss": 0.77, "step": 4320 }, { "epoch": 0.12615690052845172, "grad_norm": 0.6535106139863299, "learning_rate": 1.9419302514193025e-05, "loss": 0.5921, "step": 4321 }, { "epoch": 0.12618609675629908, "grad_norm": 0.7664485639886418, "learning_rate": 1.9418653690186537e-05, "loss": 0.6788, "step": 4322 }, { "epoch": 0.12621529298414644, "grad_norm": 0.7585534976477601, "learning_rate": 1.9418004866180053e-05, "loss": 0.678, "step": 4323 }, { "epoch": 0.1262444892119938, "grad_norm": 0.7185972781796723, "learning_rate": 1.941735604217356e-05, "loss": 0.5936, "step": 4324 }, { "epoch": 0.12627368543984116, "grad_norm": 0.6752805708232863, "learning_rate": 1.9416707218167074e-05, "loss": 0.6181, "step": 4325 }, { "epoch": 0.12630288166768852, "grad_norm": 0.722076135518708, "learning_rate": 1.9416058394160586e-05, "loss": 0.7192, "step": 4326 }, { "epoch": 0.1263320778955359, "grad_norm": 0.6604514332052892, "learning_rate": 1.9415409570154098e-05, "loss": 0.6021, "step": 4327 }, { "epoch": 0.12636127412338327, "grad_norm": 0.7897578749711529, "learning_rate": 1.941476074614761e-05, "loss": 0.7487, "step": 4328 }, { "epoch": 0.12639047035123063, "grad_norm": 0.8107216461258523, "learning_rate": 1.941411192214112e-05, "loss": 0.71, "step": 4329 }, { "epoch": 0.126419666579078, "grad_norm": 0.7691250289002239, "learning_rate": 1.9413463098134634e-05, "loss": 0.7123, "step": 4330 }, { "epoch": 0.12644886280692536, "grad_norm": 0.6836009614234451, "learning_rate": 1.9412814274128146e-05, "loss": 0.6157, "step": 4331 }, { "epoch": 0.12647805903477272, "grad_norm": 0.7051994457116142, "learning_rate": 1.9412165450121658e-05, "loss": 0.6996, "step": 4332 }, { "epoch": 0.12650725526262008, "grad_norm": 0.7263979035418869, "learning_rate": 1.9411516626115166e-05, "loss": 0.6689, "step": 4333 }, { "epoch": 0.12653645149046744, "grad_norm": 0.6948470759974843, "learning_rate": 1.941086780210868e-05, "loss": 0.6134, "step": 4334 }, { "epoch": 0.1265656477183148, "grad_norm": 0.7211018975344108, "learning_rate": 1.941021897810219e-05, "loss": 0.682, "step": 4335 }, { "epoch": 0.12659484394616216, "grad_norm": 0.6834533336172249, "learning_rate": 1.9409570154095702e-05, "loss": 0.5861, "step": 4336 }, { "epoch": 0.12662404017400952, "grad_norm": 0.7913696399773219, "learning_rate": 1.9408921330089214e-05, "loss": 0.7028, "step": 4337 }, { "epoch": 0.12665323640185688, "grad_norm": 0.7321451285910804, "learning_rate": 1.9408272506082726e-05, "loss": 0.6927, "step": 4338 }, { "epoch": 0.12668243262970424, "grad_norm": 0.6749678889193154, "learning_rate": 1.940762368207624e-05, "loss": 0.5818, "step": 4339 }, { "epoch": 0.1267116288575516, "grad_norm": 0.6740259261625207, "learning_rate": 1.940697485806975e-05, "loss": 0.6691, "step": 4340 }, { "epoch": 0.12674082508539897, "grad_norm": 0.873611989734037, "learning_rate": 1.940632603406326e-05, "loss": 0.7172, "step": 4341 }, { "epoch": 0.12677002131324633, "grad_norm": 0.8042008899355633, "learning_rate": 1.9405677210056775e-05, "loss": 0.7464, "step": 4342 }, { "epoch": 0.1267992175410937, "grad_norm": 0.781465323517259, "learning_rate": 1.9405028386050287e-05, "loss": 0.6816, "step": 4343 }, { "epoch": 0.12682841376894105, "grad_norm": 0.729410794013637, "learning_rate": 1.94043795620438e-05, "loss": 0.6588, "step": 4344 }, { "epoch": 0.1268576099967884, "grad_norm": 0.6920212629537231, "learning_rate": 1.940373073803731e-05, "loss": 0.5798, "step": 4345 }, { "epoch": 0.12688680622463577, "grad_norm": 0.7259371523188874, "learning_rate": 1.9403081914030823e-05, "loss": 0.6837, "step": 4346 }, { "epoch": 0.12691600245248313, "grad_norm": 0.7396138912190928, "learning_rate": 1.940243309002433e-05, "loss": 0.6892, "step": 4347 }, { "epoch": 0.1269451986803305, "grad_norm": 0.8540923956045566, "learning_rate": 1.9401784266017843e-05, "loss": 0.7154, "step": 4348 }, { "epoch": 0.12697439490817786, "grad_norm": 0.6900891194568769, "learning_rate": 1.9401135442011355e-05, "loss": 0.6003, "step": 4349 }, { "epoch": 0.12700359113602522, "grad_norm": 0.67844266844704, "learning_rate": 1.9400486618004867e-05, "loss": 0.6494, "step": 4350 }, { "epoch": 0.12703278736387258, "grad_norm": 0.7365961189633703, "learning_rate": 1.939983779399838e-05, "loss": 0.7334, "step": 4351 }, { "epoch": 0.12706198359171994, "grad_norm": 0.7722299635748003, "learning_rate": 1.939918896999189e-05, "loss": 0.6394, "step": 4352 }, { "epoch": 0.1270911798195673, "grad_norm": 0.7734363962407848, "learning_rate": 1.9398540145985404e-05, "loss": 0.8445, "step": 4353 }, { "epoch": 0.12712037604741466, "grad_norm": 0.7431593117404852, "learning_rate": 1.9397891321978916e-05, "loss": 0.6684, "step": 4354 }, { "epoch": 0.12714957227526202, "grad_norm": 0.7333288188106594, "learning_rate": 1.9397242497972428e-05, "loss": 0.6418, "step": 4355 }, { "epoch": 0.12717876850310939, "grad_norm": 0.7653006865067152, "learning_rate": 1.9396593673965936e-05, "loss": 0.7051, "step": 4356 }, { "epoch": 0.12720796473095677, "grad_norm": 0.732274023850332, "learning_rate": 1.9395944849959448e-05, "loss": 0.6907, "step": 4357 }, { "epoch": 0.12723716095880414, "grad_norm": 1.2067193718612983, "learning_rate": 1.939529602595296e-05, "loss": 0.6965, "step": 4358 }, { "epoch": 0.1272663571866515, "grad_norm": 0.695134376068797, "learning_rate": 1.9394647201946472e-05, "loss": 0.6656, "step": 4359 }, { "epoch": 0.12729555341449886, "grad_norm": 0.7565217352081993, "learning_rate": 1.9393998377939984e-05, "loss": 0.6945, "step": 4360 }, { "epoch": 0.12732474964234622, "grad_norm": 0.7365799824434208, "learning_rate": 1.93933495539335e-05, "loss": 0.723, "step": 4361 }, { "epoch": 0.12735394587019358, "grad_norm": 0.9135104757995512, "learning_rate": 1.939270072992701e-05, "loss": 0.6572, "step": 4362 }, { "epoch": 0.12738314209804094, "grad_norm": 0.7236628346109866, "learning_rate": 1.939205190592052e-05, "loss": 0.6614, "step": 4363 }, { "epoch": 0.1274123383258883, "grad_norm": 0.8744418949858205, "learning_rate": 1.9391403081914032e-05, "loss": 0.6039, "step": 4364 }, { "epoch": 0.12744153455373566, "grad_norm": 0.659129258117948, "learning_rate": 1.9390754257907544e-05, "loss": 0.5789, "step": 4365 }, { "epoch": 0.12747073078158302, "grad_norm": 0.7622074906700549, "learning_rate": 1.9390105433901056e-05, "loss": 0.7648, "step": 4366 }, { "epoch": 0.12749992700943039, "grad_norm": 0.7071281736680293, "learning_rate": 1.938945660989457e-05, "loss": 0.6962, "step": 4367 }, { "epoch": 0.12752912323727775, "grad_norm": 0.7102665369798881, "learning_rate": 1.938880778588808e-05, "loss": 0.6905, "step": 4368 }, { "epoch": 0.1275583194651251, "grad_norm": 0.7940648677554152, "learning_rate": 1.9388158961881593e-05, "loss": 0.7428, "step": 4369 }, { "epoch": 0.12758751569297247, "grad_norm": 0.6751987565302171, "learning_rate": 1.9387510137875105e-05, "loss": 0.6095, "step": 4370 }, { "epoch": 0.12761671192081983, "grad_norm": 0.8692570432265453, "learning_rate": 1.9386861313868613e-05, "loss": 0.7373, "step": 4371 }, { "epoch": 0.1276459081486672, "grad_norm": 0.7722054136590425, "learning_rate": 1.9386212489862125e-05, "loss": 0.7666, "step": 4372 }, { "epoch": 0.12767510437651455, "grad_norm": 0.7162527377979366, "learning_rate": 1.9385563665855637e-05, "loss": 0.701, "step": 4373 }, { "epoch": 0.12770430060436191, "grad_norm": 0.6657105982366613, "learning_rate": 1.938491484184915e-05, "loss": 0.6098, "step": 4374 }, { "epoch": 0.12773349683220928, "grad_norm": 0.7312496282432887, "learning_rate": 1.938426601784266e-05, "loss": 0.676, "step": 4375 }, { "epoch": 0.12776269306005664, "grad_norm": 0.6791112607092341, "learning_rate": 1.9383617193836173e-05, "loss": 0.6349, "step": 4376 }, { "epoch": 0.127791889287904, "grad_norm": 0.6834226071486291, "learning_rate": 1.9382968369829685e-05, "loss": 0.5904, "step": 4377 }, { "epoch": 0.12782108551575136, "grad_norm": 0.6910227898793646, "learning_rate": 1.9382319545823197e-05, "loss": 0.5936, "step": 4378 }, { "epoch": 0.12785028174359872, "grad_norm": 0.6655174197199688, "learning_rate": 1.9381670721816706e-05, "loss": 0.6002, "step": 4379 }, { "epoch": 0.12787947797144608, "grad_norm": 0.7217012228505574, "learning_rate": 1.938102189781022e-05, "loss": 0.6876, "step": 4380 }, { "epoch": 0.12790867419929344, "grad_norm": 0.6934671711856949, "learning_rate": 1.9380373073803733e-05, "loss": 0.5748, "step": 4381 }, { "epoch": 0.1279378704271408, "grad_norm": 0.8112060903136896, "learning_rate": 1.9379724249797246e-05, "loss": 0.5936, "step": 4382 }, { "epoch": 0.12796706665498817, "grad_norm": 0.7498715528584723, "learning_rate": 1.9379075425790758e-05, "loss": 0.6732, "step": 4383 }, { "epoch": 0.12799626288283553, "grad_norm": 0.7039446435846014, "learning_rate": 1.937842660178427e-05, "loss": 0.6383, "step": 4384 }, { "epoch": 0.1280254591106829, "grad_norm": 0.7456527273084291, "learning_rate": 1.9377777777777778e-05, "loss": 0.7404, "step": 4385 }, { "epoch": 0.12805465533853025, "grad_norm": 0.7338037878085117, "learning_rate": 1.937712895377129e-05, "loss": 0.722, "step": 4386 }, { "epoch": 0.12808385156637764, "grad_norm": 0.9269547486292269, "learning_rate": 1.9376480129764802e-05, "loss": 0.6218, "step": 4387 }, { "epoch": 0.128113047794225, "grad_norm": 0.6537298028703706, "learning_rate": 1.9375831305758314e-05, "loss": 0.5926, "step": 4388 }, { "epoch": 0.12814224402207236, "grad_norm": 0.7168174910871261, "learning_rate": 1.9375182481751826e-05, "loss": 0.6504, "step": 4389 }, { "epoch": 0.12817144024991972, "grad_norm": 0.7133210124844028, "learning_rate": 1.937453365774534e-05, "loss": 0.6423, "step": 4390 }, { "epoch": 0.12820063647776708, "grad_norm": 0.6817883947186895, "learning_rate": 1.937388483373885e-05, "loss": 0.6792, "step": 4391 }, { "epoch": 0.12822983270561444, "grad_norm": 0.7168655822019855, "learning_rate": 1.9373236009732362e-05, "loss": 0.6512, "step": 4392 }, { "epoch": 0.1282590289334618, "grad_norm": 0.7684822935090785, "learning_rate": 1.9372587185725874e-05, "loss": 0.713, "step": 4393 }, { "epoch": 0.12828822516130917, "grad_norm": 0.7928397608661911, "learning_rate": 1.9371938361719383e-05, "loss": 0.6117, "step": 4394 }, { "epoch": 0.12831742138915653, "grad_norm": 0.8239084833691349, "learning_rate": 1.9371289537712895e-05, "loss": 0.6625, "step": 4395 }, { "epoch": 0.1283466176170039, "grad_norm": 0.9718438938478573, "learning_rate": 1.9370640713706407e-05, "loss": 0.7697, "step": 4396 }, { "epoch": 0.12837581384485125, "grad_norm": 0.7292602835580335, "learning_rate": 1.936999188969992e-05, "loss": 0.6941, "step": 4397 }, { "epoch": 0.1284050100726986, "grad_norm": 0.7057564231743613, "learning_rate": 1.936934306569343e-05, "loss": 0.6327, "step": 4398 }, { "epoch": 0.12843420630054597, "grad_norm": 0.7385391003183316, "learning_rate": 1.9368694241686947e-05, "loss": 0.6876, "step": 4399 }, { "epoch": 0.12846340252839333, "grad_norm": 0.6891385006510087, "learning_rate": 1.9368045417680455e-05, "loss": 0.6014, "step": 4400 }, { "epoch": 0.1284925987562407, "grad_norm": 0.6984611004185425, "learning_rate": 1.9367396593673967e-05, "loss": 0.6784, "step": 4401 }, { "epoch": 0.12852179498408806, "grad_norm": 0.8538220195438507, "learning_rate": 1.936674776966748e-05, "loss": 0.7129, "step": 4402 }, { "epoch": 0.12855099121193542, "grad_norm": 0.8384739833544357, "learning_rate": 1.936609894566099e-05, "loss": 0.7181, "step": 4403 }, { "epoch": 0.12858018743978278, "grad_norm": 0.7121351824176636, "learning_rate": 1.9365450121654503e-05, "loss": 0.6466, "step": 4404 }, { "epoch": 0.12860938366763014, "grad_norm": 0.7373101586971673, "learning_rate": 1.9364801297648015e-05, "loss": 0.7202, "step": 4405 }, { "epoch": 0.1286385798954775, "grad_norm": 0.7266386265953038, "learning_rate": 1.9364152473641527e-05, "loss": 0.7304, "step": 4406 }, { "epoch": 0.12866777612332486, "grad_norm": 0.7696158287641176, "learning_rate": 1.936350364963504e-05, "loss": 0.7944, "step": 4407 }, { "epoch": 0.12869697235117222, "grad_norm": 0.6560538292360276, "learning_rate": 1.936285482562855e-05, "loss": 0.6058, "step": 4408 }, { "epoch": 0.12872616857901958, "grad_norm": 0.7744524768977077, "learning_rate": 1.936220600162206e-05, "loss": 0.7436, "step": 4409 }, { "epoch": 0.12875536480686695, "grad_norm": 0.6942400129515611, "learning_rate": 1.9361557177615572e-05, "loss": 0.5964, "step": 4410 }, { "epoch": 0.1287845610347143, "grad_norm": 0.6861399460939474, "learning_rate": 1.9360908353609084e-05, "loss": 0.6436, "step": 4411 }, { "epoch": 0.12881375726256167, "grad_norm": 0.6577411333218925, "learning_rate": 1.9360259529602596e-05, "loss": 0.6339, "step": 4412 }, { "epoch": 0.12884295349040903, "grad_norm": 0.7144915629836333, "learning_rate": 1.9359610705596108e-05, "loss": 0.6896, "step": 4413 }, { "epoch": 0.1288721497182564, "grad_norm": 0.7258666225580499, "learning_rate": 1.935896188158962e-05, "loss": 0.663, "step": 4414 }, { "epoch": 0.12890134594610375, "grad_norm": 0.6142242463164199, "learning_rate": 1.9358313057583132e-05, "loss": 0.5545, "step": 4415 }, { "epoch": 0.1289305421739511, "grad_norm": 0.7259163238219937, "learning_rate": 1.9357664233576644e-05, "loss": 0.6998, "step": 4416 }, { "epoch": 0.12895973840179847, "grad_norm": 0.7420730310813027, "learning_rate": 1.9357015409570153e-05, "loss": 0.7186, "step": 4417 }, { "epoch": 0.12898893462964586, "grad_norm": 0.7537945928167444, "learning_rate": 1.935636658556367e-05, "loss": 0.6748, "step": 4418 }, { "epoch": 0.12901813085749322, "grad_norm": 0.726819161013207, "learning_rate": 1.935571776155718e-05, "loss": 0.7004, "step": 4419 }, { "epoch": 0.12904732708534059, "grad_norm": 0.7309589301764673, "learning_rate": 1.9355068937550692e-05, "loss": 0.5954, "step": 4420 }, { "epoch": 0.12907652331318795, "grad_norm": 0.7487446019427267, "learning_rate": 1.9354420113544204e-05, "loss": 0.688, "step": 4421 }, { "epoch": 0.1291057195410353, "grad_norm": 0.9178482601912415, "learning_rate": 1.9353771289537716e-05, "loss": 0.7059, "step": 4422 }, { "epoch": 0.12913491576888267, "grad_norm": 0.7663448323878912, "learning_rate": 1.9353122465531225e-05, "loss": 0.6292, "step": 4423 }, { "epoch": 0.12916411199673003, "grad_norm": 0.7246515795381688, "learning_rate": 1.9352473641524737e-05, "loss": 0.6772, "step": 4424 }, { "epoch": 0.1291933082245774, "grad_norm": 0.853916650867773, "learning_rate": 1.935182481751825e-05, "loss": 0.6828, "step": 4425 }, { "epoch": 0.12922250445242475, "grad_norm": 0.8238896968146485, "learning_rate": 1.935117599351176e-05, "loss": 0.7152, "step": 4426 }, { "epoch": 0.1292517006802721, "grad_norm": 0.7895900555883063, "learning_rate": 1.9350527169505273e-05, "loss": 0.7176, "step": 4427 }, { "epoch": 0.12928089690811947, "grad_norm": 0.6892993819114762, "learning_rate": 1.9349878345498785e-05, "loss": 0.6524, "step": 4428 }, { "epoch": 0.12931009313596684, "grad_norm": 0.7153572747775492, "learning_rate": 1.9349229521492297e-05, "loss": 0.6486, "step": 4429 }, { "epoch": 0.1293392893638142, "grad_norm": 0.7956548614630106, "learning_rate": 1.934858069748581e-05, "loss": 0.6706, "step": 4430 }, { "epoch": 0.12936848559166156, "grad_norm": 0.7447997438400776, "learning_rate": 1.934793187347932e-05, "loss": 0.7585, "step": 4431 }, { "epoch": 0.12939768181950892, "grad_norm": 0.661823197041392, "learning_rate": 1.934728304947283e-05, "loss": 0.6425, "step": 4432 }, { "epoch": 0.12942687804735628, "grad_norm": 0.6945500314933487, "learning_rate": 1.9346634225466342e-05, "loss": 0.6523, "step": 4433 }, { "epoch": 0.12945607427520364, "grad_norm": 0.6721422210873913, "learning_rate": 1.9345985401459854e-05, "loss": 0.6226, "step": 4434 }, { "epoch": 0.129485270503051, "grad_norm": 0.7894694235311839, "learning_rate": 1.9345336577453366e-05, "loss": 0.7405, "step": 4435 }, { "epoch": 0.12951446673089836, "grad_norm": 0.7343710955044742, "learning_rate": 1.934468775344688e-05, "loss": 0.6971, "step": 4436 }, { "epoch": 0.12954366295874573, "grad_norm": 0.6951642531652721, "learning_rate": 1.9344038929440393e-05, "loss": 0.6555, "step": 4437 }, { "epoch": 0.1295728591865931, "grad_norm": 0.6943329016475667, "learning_rate": 1.9343390105433902e-05, "loss": 0.7045, "step": 4438 }, { "epoch": 0.12960205541444045, "grad_norm": 0.6697999025422916, "learning_rate": 1.9342741281427414e-05, "loss": 0.6196, "step": 4439 }, { "epoch": 0.1296312516422878, "grad_norm": 0.6634340078657128, "learning_rate": 1.9342092457420926e-05, "loss": 0.6365, "step": 4440 }, { "epoch": 0.12966044787013517, "grad_norm": 0.7238983225997608, "learning_rate": 1.9341443633414438e-05, "loss": 0.6657, "step": 4441 }, { "epoch": 0.12968964409798253, "grad_norm": 0.6979790535119079, "learning_rate": 1.934079480940795e-05, "loss": 0.7212, "step": 4442 }, { "epoch": 0.1297188403258299, "grad_norm": 0.7745051158889943, "learning_rate": 1.9340145985401462e-05, "loss": 0.8026, "step": 4443 }, { "epoch": 0.12974803655367725, "grad_norm": 0.7499238249011505, "learning_rate": 1.9339497161394974e-05, "loss": 0.627, "step": 4444 }, { "epoch": 0.12977723278152462, "grad_norm": 0.6949117142320741, "learning_rate": 1.9338848337388486e-05, "loss": 0.6292, "step": 4445 }, { "epoch": 0.12980642900937198, "grad_norm": 0.8162148082154168, "learning_rate": 1.9338199513382e-05, "loss": 0.7602, "step": 4446 }, { "epoch": 0.12983562523721934, "grad_norm": 0.6824519764230922, "learning_rate": 1.9337550689375507e-05, "loss": 0.631, "step": 4447 }, { "epoch": 0.12986482146506673, "grad_norm": 0.7194339243526899, "learning_rate": 1.933690186536902e-05, "loss": 0.6272, "step": 4448 }, { "epoch": 0.1298940176929141, "grad_norm": 0.6735953655961956, "learning_rate": 1.933625304136253e-05, "loss": 0.6454, "step": 4449 }, { "epoch": 0.12992321392076145, "grad_norm": 0.6427606717953862, "learning_rate": 1.9335604217356043e-05, "loss": 0.5687, "step": 4450 }, { "epoch": 0.1299524101486088, "grad_norm": 0.7574731916379962, "learning_rate": 1.9334955393349555e-05, "loss": 0.7339, "step": 4451 }, { "epoch": 0.12998160637645617, "grad_norm": 0.7298469055783393, "learning_rate": 1.9334306569343067e-05, "loss": 0.6524, "step": 4452 }, { "epoch": 0.13001080260430353, "grad_norm": 0.7122525892043718, "learning_rate": 1.933365774533658e-05, "loss": 0.623, "step": 4453 }, { "epoch": 0.1300399988321509, "grad_norm": 0.7138702790091759, "learning_rate": 1.933300892133009e-05, "loss": 0.6592, "step": 4454 }, { "epoch": 0.13006919505999825, "grad_norm": 0.7148330935885477, "learning_rate": 1.9332360097323603e-05, "loss": 0.6227, "step": 4455 }, { "epoch": 0.13009839128784562, "grad_norm": 0.661895133745747, "learning_rate": 1.9331711273317115e-05, "loss": 0.6255, "step": 4456 }, { "epoch": 0.13012758751569298, "grad_norm": 0.6496791027606493, "learning_rate": 1.9331062449310627e-05, "loss": 0.5603, "step": 4457 }, { "epoch": 0.13015678374354034, "grad_norm": 0.7818909999275319, "learning_rate": 1.933041362530414e-05, "loss": 0.7774, "step": 4458 }, { "epoch": 0.1301859799713877, "grad_norm": 0.7337130025848112, "learning_rate": 1.932976480129765e-05, "loss": 0.6461, "step": 4459 }, { "epoch": 0.13021517619923506, "grad_norm": 0.7011667854884391, "learning_rate": 1.9329115977291163e-05, "loss": 0.6731, "step": 4460 }, { "epoch": 0.13024437242708242, "grad_norm": 0.6819987814634367, "learning_rate": 1.9328467153284672e-05, "loss": 0.6048, "step": 4461 }, { "epoch": 0.13027356865492978, "grad_norm": 0.7202288635941575, "learning_rate": 1.9327818329278184e-05, "loss": 0.6707, "step": 4462 }, { "epoch": 0.13030276488277714, "grad_norm": 0.8104028412553794, "learning_rate": 1.9327169505271696e-05, "loss": 0.7025, "step": 4463 }, { "epoch": 0.1303319611106245, "grad_norm": 0.7483513268580322, "learning_rate": 1.9326520681265208e-05, "loss": 0.6921, "step": 4464 }, { "epoch": 0.13036115733847187, "grad_norm": 0.6877012975480569, "learning_rate": 1.932587185725872e-05, "loss": 0.6846, "step": 4465 }, { "epoch": 0.13039035356631923, "grad_norm": 0.7213475678375775, "learning_rate": 1.9325223033252232e-05, "loss": 0.7209, "step": 4466 }, { "epoch": 0.1304195497941666, "grad_norm": 0.725795259743544, "learning_rate": 1.9324574209245744e-05, "loss": 0.7215, "step": 4467 }, { "epoch": 0.13044874602201395, "grad_norm": 0.773386369552963, "learning_rate": 1.9323925385239256e-05, "loss": 0.7312, "step": 4468 }, { "epoch": 0.1304779422498613, "grad_norm": 0.8347893510121025, "learning_rate": 1.9323276561232768e-05, "loss": 0.6652, "step": 4469 }, { "epoch": 0.13050713847770867, "grad_norm": 0.6864081772408964, "learning_rate": 1.9322627737226277e-05, "loss": 0.6495, "step": 4470 }, { "epoch": 0.13053633470555603, "grad_norm": 0.6861619383266494, "learning_rate": 1.932197891321979e-05, "loss": 0.6792, "step": 4471 }, { "epoch": 0.1305655309334034, "grad_norm": 0.6874955974654833, "learning_rate": 1.93213300892133e-05, "loss": 0.6615, "step": 4472 }, { "epoch": 0.13059472716125076, "grad_norm": 0.7735925678297261, "learning_rate": 1.9320681265206813e-05, "loss": 0.705, "step": 4473 }, { "epoch": 0.13062392338909812, "grad_norm": 0.6487050603489913, "learning_rate": 1.932003244120033e-05, "loss": 0.6019, "step": 4474 }, { "epoch": 0.13065311961694548, "grad_norm": 0.6702198298323413, "learning_rate": 1.931938361719384e-05, "loss": 0.6315, "step": 4475 }, { "epoch": 0.13068231584479284, "grad_norm": 0.7302845319632245, "learning_rate": 1.931873479318735e-05, "loss": 0.6467, "step": 4476 }, { "epoch": 0.1307115120726402, "grad_norm": 0.6769984687758165, "learning_rate": 1.931808596918086e-05, "loss": 0.6105, "step": 4477 }, { "epoch": 0.1307407083004876, "grad_norm": 0.7402677480501947, "learning_rate": 1.9317437145174373e-05, "loss": 0.7198, "step": 4478 }, { "epoch": 0.13076990452833495, "grad_norm": 0.7329769368917877, "learning_rate": 1.9316788321167885e-05, "loss": 0.6328, "step": 4479 }, { "epoch": 0.1307991007561823, "grad_norm": 0.833401488447484, "learning_rate": 1.9316139497161397e-05, "loss": 0.7755, "step": 4480 }, { "epoch": 0.13082829698402967, "grad_norm": 0.7076936513051039, "learning_rate": 1.931549067315491e-05, "loss": 0.618, "step": 4481 }, { "epoch": 0.13085749321187704, "grad_norm": 0.7477453622471114, "learning_rate": 1.931484184914842e-05, "loss": 0.7941, "step": 4482 }, { "epoch": 0.1308866894397244, "grad_norm": 0.726103950888271, "learning_rate": 1.9314193025141933e-05, "loss": 0.6564, "step": 4483 }, { "epoch": 0.13091588566757176, "grad_norm": 0.74308012030455, "learning_rate": 1.9313544201135442e-05, "loss": 0.6848, "step": 4484 }, { "epoch": 0.13094508189541912, "grad_norm": 0.7579728582626554, "learning_rate": 1.9312895377128954e-05, "loss": 0.7826, "step": 4485 }, { "epoch": 0.13097427812326648, "grad_norm": 0.7009855796179593, "learning_rate": 1.9312246553122466e-05, "loss": 0.6867, "step": 4486 }, { "epoch": 0.13100347435111384, "grad_norm": 0.7724994068969382, "learning_rate": 1.9311597729115978e-05, "loss": 0.6678, "step": 4487 }, { "epoch": 0.1310326705789612, "grad_norm": 0.7913608465985379, "learning_rate": 1.931094890510949e-05, "loss": 0.7211, "step": 4488 }, { "epoch": 0.13106186680680856, "grad_norm": 0.7232210265032074, "learning_rate": 1.9310300081103002e-05, "loss": 0.6905, "step": 4489 }, { "epoch": 0.13109106303465592, "grad_norm": 0.7428510648354637, "learning_rate": 1.9309651257096514e-05, "loss": 0.7012, "step": 4490 }, { "epoch": 0.13112025926250329, "grad_norm": 0.7073337761183893, "learning_rate": 1.9309002433090026e-05, "loss": 0.6851, "step": 4491 }, { "epoch": 0.13114945549035065, "grad_norm": 0.6923437996019783, "learning_rate": 1.9308353609083538e-05, "loss": 0.6803, "step": 4492 }, { "epoch": 0.131178651718198, "grad_norm": 0.7272297057315887, "learning_rate": 1.930770478507705e-05, "loss": 0.6654, "step": 4493 }, { "epoch": 0.13120784794604537, "grad_norm": 0.7176007839687975, "learning_rate": 1.9307055961070562e-05, "loss": 0.6851, "step": 4494 }, { "epoch": 0.13123704417389273, "grad_norm": 0.7353594886206782, "learning_rate": 1.9306407137064074e-05, "loss": 0.6201, "step": 4495 }, { "epoch": 0.1312662404017401, "grad_norm": 1.0152473189008022, "learning_rate": 1.9305758313057586e-05, "loss": 0.877, "step": 4496 }, { "epoch": 0.13129543662958745, "grad_norm": 0.6493305829013248, "learning_rate": 1.9305109489051098e-05, "loss": 0.6081, "step": 4497 }, { "epoch": 0.13132463285743481, "grad_norm": 0.715606878069431, "learning_rate": 1.930446066504461e-05, "loss": 0.7431, "step": 4498 }, { "epoch": 0.13135382908528218, "grad_norm": 0.6675993417646653, "learning_rate": 1.930381184103812e-05, "loss": 0.672, "step": 4499 }, { "epoch": 0.13138302531312954, "grad_norm": 0.7096027889346952, "learning_rate": 1.930316301703163e-05, "loss": 0.6545, "step": 4500 }, { "epoch": 0.1314122215409769, "grad_norm": 0.70949071064077, "learning_rate": 1.9302514193025143e-05, "loss": 0.6791, "step": 4501 }, { "epoch": 0.13144141776882426, "grad_norm": 0.7836539226464508, "learning_rate": 1.9301865369018655e-05, "loss": 0.5819, "step": 4502 }, { "epoch": 0.13147061399667162, "grad_norm": 0.6852650313171486, "learning_rate": 1.9301216545012167e-05, "loss": 0.6142, "step": 4503 }, { "epoch": 0.13149981022451898, "grad_norm": 0.7936355112744311, "learning_rate": 1.930056772100568e-05, "loss": 0.7911, "step": 4504 }, { "epoch": 0.13152900645236634, "grad_norm": 0.6919235781405921, "learning_rate": 1.929991889699919e-05, "loss": 0.6255, "step": 4505 }, { "epoch": 0.1315582026802137, "grad_norm": 0.6495157991403132, "learning_rate": 1.9299270072992703e-05, "loss": 0.5916, "step": 4506 }, { "epoch": 0.13158739890806107, "grad_norm": 0.6753673159313986, "learning_rate": 1.9298621248986215e-05, "loss": 0.6937, "step": 4507 }, { "epoch": 0.13161659513590845, "grad_norm": 0.676396443848269, "learning_rate": 1.9297972424979724e-05, "loss": 0.5593, "step": 4508 }, { "epoch": 0.13164579136375582, "grad_norm": 0.7427792358598858, "learning_rate": 1.9297323600973236e-05, "loss": 0.7304, "step": 4509 }, { "epoch": 0.13167498759160318, "grad_norm": 0.7067806442179176, "learning_rate": 1.9296674776966748e-05, "loss": 0.6401, "step": 4510 }, { "epoch": 0.13170418381945054, "grad_norm": 0.8263680350628749, "learning_rate": 1.929602595296026e-05, "loss": 0.7597, "step": 4511 }, { "epoch": 0.1317333800472979, "grad_norm": 0.6698546338783559, "learning_rate": 1.9295377128953775e-05, "loss": 0.6155, "step": 4512 }, { "epoch": 0.13176257627514526, "grad_norm": 0.7296233082952389, "learning_rate": 1.9294728304947287e-05, "loss": 0.6904, "step": 4513 }, { "epoch": 0.13179177250299262, "grad_norm": 0.7689536940690942, "learning_rate": 1.9294079480940796e-05, "loss": 0.7334, "step": 4514 }, { "epoch": 0.13182096873083998, "grad_norm": 0.8219458667091575, "learning_rate": 1.9293430656934308e-05, "loss": 0.7225, "step": 4515 }, { "epoch": 0.13185016495868734, "grad_norm": 0.6755536504044684, "learning_rate": 1.929278183292782e-05, "loss": 0.6337, "step": 4516 }, { "epoch": 0.1318793611865347, "grad_norm": 0.7295944301523247, "learning_rate": 1.9292133008921332e-05, "loss": 0.5895, "step": 4517 }, { "epoch": 0.13190855741438207, "grad_norm": 0.8417553193695955, "learning_rate": 1.9291484184914844e-05, "loss": 0.6996, "step": 4518 }, { "epoch": 0.13193775364222943, "grad_norm": 0.7165394325539507, "learning_rate": 1.9290835360908356e-05, "loss": 0.6739, "step": 4519 }, { "epoch": 0.1319669498700768, "grad_norm": 0.6646747315333451, "learning_rate": 1.9290186536901868e-05, "loss": 0.6304, "step": 4520 }, { "epoch": 0.13199614609792415, "grad_norm": 0.7673633367163611, "learning_rate": 1.928953771289538e-05, "loss": 0.6848, "step": 4521 }, { "epoch": 0.1320253423257715, "grad_norm": 0.7146772631550358, "learning_rate": 1.928888888888889e-05, "loss": 0.6784, "step": 4522 }, { "epoch": 0.13205453855361887, "grad_norm": 0.7221862284184803, "learning_rate": 1.92882400648824e-05, "loss": 0.6149, "step": 4523 }, { "epoch": 0.13208373478146623, "grad_norm": 0.7609518411294113, "learning_rate": 1.9287591240875913e-05, "loss": 0.7329, "step": 4524 }, { "epoch": 0.1321129310093136, "grad_norm": 0.678846117164845, "learning_rate": 1.9286942416869425e-05, "loss": 0.6568, "step": 4525 }, { "epoch": 0.13214212723716096, "grad_norm": 0.7079368956088614, "learning_rate": 1.9286293592862937e-05, "loss": 0.647, "step": 4526 }, { "epoch": 0.13217132346500832, "grad_norm": 0.6916372242045804, "learning_rate": 1.928564476885645e-05, "loss": 0.6375, "step": 4527 }, { "epoch": 0.13220051969285568, "grad_norm": 0.7368122581130271, "learning_rate": 1.928499594484996e-05, "loss": 0.7083, "step": 4528 }, { "epoch": 0.13222971592070304, "grad_norm": 0.7208334026794567, "learning_rate": 1.9284347120843473e-05, "loss": 0.7022, "step": 4529 }, { "epoch": 0.1322589121485504, "grad_norm": 0.8569345402959908, "learning_rate": 1.9283698296836985e-05, "loss": 0.8154, "step": 4530 }, { "epoch": 0.13228810837639776, "grad_norm": 0.7865316620266322, "learning_rate": 1.9283049472830497e-05, "loss": 0.6974, "step": 4531 }, { "epoch": 0.13231730460424512, "grad_norm": 0.708012742864166, "learning_rate": 1.928240064882401e-05, "loss": 0.6771, "step": 4532 }, { "epoch": 0.13234650083209248, "grad_norm": 0.7943676501639286, "learning_rate": 1.928175182481752e-05, "loss": 0.7469, "step": 4533 }, { "epoch": 0.13237569705993985, "grad_norm": 0.6820330449236699, "learning_rate": 1.9281103000811033e-05, "loss": 0.6252, "step": 4534 }, { "epoch": 0.1324048932877872, "grad_norm": 0.7474648165232403, "learning_rate": 1.9280454176804545e-05, "loss": 0.7308, "step": 4535 }, { "epoch": 0.13243408951563457, "grad_norm": 0.7622438482387132, "learning_rate": 1.9279805352798057e-05, "loss": 0.7629, "step": 4536 }, { "epoch": 0.13246328574348193, "grad_norm": 0.6981400305670376, "learning_rate": 1.9279156528791566e-05, "loss": 0.6606, "step": 4537 }, { "epoch": 0.13249248197132932, "grad_norm": 0.8768240938815336, "learning_rate": 1.9278507704785078e-05, "loss": 0.7336, "step": 4538 }, { "epoch": 0.13252167819917668, "grad_norm": 0.6828343432530969, "learning_rate": 1.927785888077859e-05, "loss": 0.6321, "step": 4539 }, { "epoch": 0.13255087442702404, "grad_norm": 0.8767200442013693, "learning_rate": 1.9277210056772102e-05, "loss": 0.6442, "step": 4540 }, { "epoch": 0.1325800706548714, "grad_norm": 0.7600374533976398, "learning_rate": 1.9276561232765614e-05, "loss": 0.7424, "step": 4541 }, { "epoch": 0.13260926688271876, "grad_norm": 0.7550978779771689, "learning_rate": 1.9275912408759126e-05, "loss": 0.6592, "step": 4542 }, { "epoch": 0.13263846311056612, "grad_norm": 0.7083057916235684, "learning_rate": 1.9275263584752638e-05, "loss": 0.6568, "step": 4543 }, { "epoch": 0.13266765933841349, "grad_norm": 0.6637585429652654, "learning_rate": 1.927461476074615e-05, "loss": 0.633, "step": 4544 }, { "epoch": 0.13269685556626085, "grad_norm": 0.6681351142772552, "learning_rate": 1.9273965936739662e-05, "loss": 0.6062, "step": 4545 }, { "epoch": 0.1327260517941082, "grad_norm": 0.6417697746334307, "learning_rate": 1.927331711273317e-05, "loss": 0.6507, "step": 4546 }, { "epoch": 0.13275524802195557, "grad_norm": 0.719360269157815, "learning_rate": 1.9272668288726683e-05, "loss": 0.687, "step": 4547 }, { "epoch": 0.13278444424980293, "grad_norm": 0.7519755910839647, "learning_rate": 1.9272019464720195e-05, "loss": 0.6864, "step": 4548 }, { "epoch": 0.1328136404776503, "grad_norm": 0.7094774146058871, "learning_rate": 1.9271370640713707e-05, "loss": 0.6694, "step": 4549 }, { "epoch": 0.13284283670549765, "grad_norm": 0.7458035569293436, "learning_rate": 1.9270721816707222e-05, "loss": 0.661, "step": 4550 }, { "epoch": 0.132872032933345, "grad_norm": 0.6526348280504293, "learning_rate": 1.9270072992700734e-05, "loss": 0.5808, "step": 4551 }, { "epoch": 0.13290122916119237, "grad_norm": 0.6998865977311626, "learning_rate": 1.9269424168694243e-05, "loss": 0.6513, "step": 4552 }, { "epoch": 0.13293042538903974, "grad_norm": 0.6704192443369927, "learning_rate": 1.9268775344687755e-05, "loss": 0.6122, "step": 4553 }, { "epoch": 0.1329596216168871, "grad_norm": 0.8377486309571223, "learning_rate": 1.9268126520681267e-05, "loss": 0.7761, "step": 4554 }, { "epoch": 0.13298881784473446, "grad_norm": 0.7238706380988772, "learning_rate": 1.926747769667478e-05, "loss": 0.6058, "step": 4555 }, { "epoch": 0.13301801407258182, "grad_norm": 0.6926942359077097, "learning_rate": 1.926682887266829e-05, "loss": 0.6376, "step": 4556 }, { "epoch": 0.13304721030042918, "grad_norm": 0.7369533079369074, "learning_rate": 1.9266180048661803e-05, "loss": 0.6602, "step": 4557 }, { "epoch": 0.13307640652827654, "grad_norm": 0.7122514222510992, "learning_rate": 1.9265531224655315e-05, "loss": 0.6161, "step": 4558 }, { "epoch": 0.1331056027561239, "grad_norm": 0.7098729980775399, "learning_rate": 1.9264882400648827e-05, "loss": 0.671, "step": 4559 }, { "epoch": 0.13313479898397126, "grad_norm": 0.6545120665843962, "learning_rate": 1.9264233576642336e-05, "loss": 0.5737, "step": 4560 }, { "epoch": 0.13316399521181863, "grad_norm": 0.6680730139052855, "learning_rate": 1.9263584752635848e-05, "loss": 0.6038, "step": 4561 }, { "epoch": 0.133193191439666, "grad_norm": 0.6971054009209097, "learning_rate": 1.926293592862936e-05, "loss": 0.6002, "step": 4562 }, { "epoch": 0.13322238766751335, "grad_norm": 0.7313515577353613, "learning_rate": 1.926228710462287e-05, "loss": 0.666, "step": 4563 }, { "epoch": 0.1332515838953607, "grad_norm": 0.7094453739559957, "learning_rate": 1.9261638280616384e-05, "loss": 0.6629, "step": 4564 }, { "epoch": 0.13328078012320807, "grad_norm": 0.76698357548342, "learning_rate": 1.9260989456609896e-05, "loss": 0.6942, "step": 4565 }, { "epoch": 0.13330997635105543, "grad_norm": 0.8613949890189712, "learning_rate": 1.9260340632603408e-05, "loss": 0.7467, "step": 4566 }, { "epoch": 0.1333391725789028, "grad_norm": 0.7756308062993804, "learning_rate": 1.925969180859692e-05, "loss": 0.7962, "step": 4567 }, { "epoch": 0.13336836880675018, "grad_norm": 0.7068547191630055, "learning_rate": 1.9259042984590432e-05, "loss": 0.6604, "step": 4568 }, { "epoch": 0.13339756503459754, "grad_norm": 0.7270254788558529, "learning_rate": 1.9258394160583944e-05, "loss": 0.6938, "step": 4569 }, { "epoch": 0.1334267612624449, "grad_norm": 0.741367576529615, "learning_rate": 1.9257745336577456e-05, "loss": 0.6957, "step": 4570 }, { "epoch": 0.13345595749029227, "grad_norm": 0.7275771963022154, "learning_rate": 1.9257096512570968e-05, "loss": 0.6844, "step": 4571 }, { "epoch": 0.13348515371813963, "grad_norm": 0.7313435850728659, "learning_rate": 1.925644768856448e-05, "loss": 0.7308, "step": 4572 }, { "epoch": 0.133514349945987, "grad_norm": 0.6900883454038531, "learning_rate": 1.9255798864557992e-05, "loss": 0.6771, "step": 4573 }, { "epoch": 0.13354354617383435, "grad_norm": 0.6788051291756628, "learning_rate": 1.9255150040551504e-05, "loss": 0.6653, "step": 4574 }, { "epoch": 0.1335727424016817, "grad_norm": 0.6542857673694319, "learning_rate": 1.9254501216545013e-05, "loss": 0.6119, "step": 4575 }, { "epoch": 0.13360193862952907, "grad_norm": 0.6580517809472349, "learning_rate": 1.9253852392538525e-05, "loss": 0.6013, "step": 4576 }, { "epoch": 0.13363113485737643, "grad_norm": 0.6878721067658945, "learning_rate": 1.9253203568532037e-05, "loss": 0.6648, "step": 4577 }, { "epoch": 0.1336603310852238, "grad_norm": 0.7273333256594887, "learning_rate": 1.925255474452555e-05, "loss": 0.6415, "step": 4578 }, { "epoch": 0.13368952731307115, "grad_norm": 0.7459128923395045, "learning_rate": 1.925190592051906e-05, "loss": 0.7627, "step": 4579 }, { "epoch": 0.13371872354091852, "grad_norm": 0.7237312582435911, "learning_rate": 1.9251257096512573e-05, "loss": 0.6851, "step": 4580 }, { "epoch": 0.13374791976876588, "grad_norm": 0.7428648410643115, "learning_rate": 1.9250608272506085e-05, "loss": 0.7324, "step": 4581 }, { "epoch": 0.13377711599661324, "grad_norm": 0.7233452046140861, "learning_rate": 1.9249959448499597e-05, "loss": 0.7386, "step": 4582 }, { "epoch": 0.1338063122244606, "grad_norm": 0.6451712946333478, "learning_rate": 1.924931062449311e-05, "loss": 0.5949, "step": 4583 }, { "epoch": 0.13383550845230796, "grad_norm": 0.7047855171281204, "learning_rate": 1.9248661800486617e-05, "loss": 0.6991, "step": 4584 }, { "epoch": 0.13386470468015532, "grad_norm": 0.7823505661358672, "learning_rate": 1.924801297648013e-05, "loss": 0.7101, "step": 4585 }, { "epoch": 0.13389390090800268, "grad_norm": 0.834613237660312, "learning_rate": 1.924736415247364e-05, "loss": 0.6054, "step": 4586 }, { "epoch": 0.13392309713585004, "grad_norm": 0.8031989230266445, "learning_rate": 1.9246715328467157e-05, "loss": 0.7139, "step": 4587 }, { "epoch": 0.1339522933636974, "grad_norm": 0.6537808371686347, "learning_rate": 1.924606650446067e-05, "loss": 0.5645, "step": 4588 }, { "epoch": 0.13398148959154477, "grad_norm": 0.80515022189135, "learning_rate": 1.9245417680454178e-05, "loss": 0.6823, "step": 4589 }, { "epoch": 0.13401068581939213, "grad_norm": 1.3556850288057523, "learning_rate": 1.924476885644769e-05, "loss": 0.6836, "step": 4590 }, { "epoch": 0.1340398820472395, "grad_norm": 0.7108688519671971, "learning_rate": 1.92441200324412e-05, "loss": 0.6743, "step": 4591 }, { "epoch": 0.13406907827508685, "grad_norm": 0.9359674111436216, "learning_rate": 1.9243471208434714e-05, "loss": 0.6136, "step": 4592 }, { "epoch": 0.1340982745029342, "grad_norm": 0.7055228489621662, "learning_rate": 1.9242822384428226e-05, "loss": 0.7287, "step": 4593 }, { "epoch": 0.13412747073078157, "grad_norm": 0.7509419951993217, "learning_rate": 1.9242173560421738e-05, "loss": 0.7034, "step": 4594 }, { "epoch": 0.13415666695862893, "grad_norm": 0.7565133594521883, "learning_rate": 1.924152473641525e-05, "loss": 0.6954, "step": 4595 }, { "epoch": 0.1341858631864763, "grad_norm": 0.6989860319981731, "learning_rate": 1.9240875912408762e-05, "loss": 0.6687, "step": 4596 }, { "epoch": 0.13421505941432366, "grad_norm": 0.75382234546901, "learning_rate": 1.9240227088402274e-05, "loss": 0.6815, "step": 4597 }, { "epoch": 0.13424425564217105, "grad_norm": 0.7329864472634872, "learning_rate": 1.9239578264395782e-05, "loss": 0.6626, "step": 4598 }, { "epoch": 0.1342734518700184, "grad_norm": 0.7816784136433146, "learning_rate": 1.9238929440389294e-05, "loss": 0.7688, "step": 4599 }, { "epoch": 0.13430264809786577, "grad_norm": 0.6993207181314601, "learning_rate": 1.9238280616382806e-05, "loss": 0.5953, "step": 4600 }, { "epoch": 0.13433184432571313, "grad_norm": 0.7037093697767574, "learning_rate": 1.923763179237632e-05, "loss": 0.6979, "step": 4601 }, { "epoch": 0.1343610405535605, "grad_norm": 0.7035866088283973, "learning_rate": 1.923698296836983e-05, "loss": 0.6563, "step": 4602 }, { "epoch": 0.13439023678140785, "grad_norm": 0.7264166503697735, "learning_rate": 1.9236334144363343e-05, "loss": 0.704, "step": 4603 }, { "epoch": 0.1344194330092552, "grad_norm": 0.6818858485085668, "learning_rate": 1.9235685320356855e-05, "loss": 0.6746, "step": 4604 }, { "epoch": 0.13444862923710257, "grad_norm": 0.7023729121878629, "learning_rate": 1.9235036496350367e-05, "loss": 0.6957, "step": 4605 }, { "epoch": 0.13447782546494993, "grad_norm": 0.6974596981496738, "learning_rate": 1.923438767234388e-05, "loss": 0.6367, "step": 4606 }, { "epoch": 0.1345070216927973, "grad_norm": 0.6943295512604878, "learning_rate": 1.923373884833739e-05, "loss": 0.6201, "step": 4607 }, { "epoch": 0.13453621792064466, "grad_norm": 0.7506829067995882, "learning_rate": 1.9233090024330903e-05, "loss": 0.6924, "step": 4608 }, { "epoch": 0.13456541414849202, "grad_norm": 0.6513773001868386, "learning_rate": 1.9232441200324415e-05, "loss": 0.5886, "step": 4609 }, { "epoch": 0.13459461037633938, "grad_norm": 0.6545662884787699, "learning_rate": 1.9231792376317927e-05, "loss": 0.5694, "step": 4610 }, { "epoch": 0.13462380660418674, "grad_norm": 0.6743236312837928, "learning_rate": 1.923114355231144e-05, "loss": 0.6179, "step": 4611 }, { "epoch": 0.1346530028320341, "grad_norm": 0.6981388378636002, "learning_rate": 1.923049472830495e-05, "loss": 0.6243, "step": 4612 }, { "epoch": 0.13468219905988146, "grad_norm": 0.7396119355865338, "learning_rate": 1.922984590429846e-05, "loss": 0.7187, "step": 4613 }, { "epoch": 0.13471139528772882, "grad_norm": 0.7573317527475366, "learning_rate": 1.922919708029197e-05, "loss": 0.6871, "step": 4614 }, { "epoch": 0.13474059151557619, "grad_norm": 0.7978197464559199, "learning_rate": 1.9228548256285483e-05, "loss": 0.6959, "step": 4615 }, { "epoch": 0.13476978774342355, "grad_norm": 0.7170450236343417, "learning_rate": 1.9227899432278996e-05, "loss": 0.6713, "step": 4616 }, { "epoch": 0.1347989839712709, "grad_norm": 0.7379955313268666, "learning_rate": 1.9227250608272508e-05, "loss": 0.6045, "step": 4617 }, { "epoch": 0.13482818019911827, "grad_norm": 0.6961838297829847, "learning_rate": 1.922660178426602e-05, "loss": 0.6222, "step": 4618 }, { "epoch": 0.13485737642696563, "grad_norm": 0.7471351102866316, "learning_rate": 1.922595296025953e-05, "loss": 0.716, "step": 4619 }, { "epoch": 0.134886572654813, "grad_norm": 0.8586679705822183, "learning_rate": 1.9225304136253044e-05, "loss": 0.7933, "step": 4620 }, { "epoch": 0.13491576888266035, "grad_norm": 0.8065955946777371, "learning_rate": 1.9224655312246556e-05, "loss": 0.7035, "step": 4621 }, { "epoch": 0.13494496511050771, "grad_norm": 0.6857007217207154, "learning_rate": 1.9224006488240064e-05, "loss": 0.6278, "step": 4622 }, { "epoch": 0.13497416133835508, "grad_norm": 0.7075359120326462, "learning_rate": 1.9223357664233576e-05, "loss": 0.6983, "step": 4623 }, { "epoch": 0.13500335756620244, "grad_norm": 0.8231787589538461, "learning_rate": 1.922270884022709e-05, "loss": 0.7508, "step": 4624 }, { "epoch": 0.1350325537940498, "grad_norm": 0.6559594159923887, "learning_rate": 1.9222060016220604e-05, "loss": 0.6227, "step": 4625 }, { "epoch": 0.13506175002189716, "grad_norm": 0.7410214756737568, "learning_rate": 1.9221411192214116e-05, "loss": 0.6726, "step": 4626 }, { "epoch": 0.13509094624974452, "grad_norm": 0.7501538292701866, "learning_rate": 1.9220762368207624e-05, "loss": 0.6324, "step": 4627 }, { "epoch": 0.13512014247759188, "grad_norm": 0.6320678956409489, "learning_rate": 1.9220113544201136e-05, "loss": 0.5269, "step": 4628 }, { "epoch": 0.13514933870543927, "grad_norm": 0.7038933541019508, "learning_rate": 1.921946472019465e-05, "loss": 0.646, "step": 4629 }, { "epoch": 0.13517853493328663, "grad_norm": 0.7102834495862975, "learning_rate": 1.921881589618816e-05, "loss": 0.6773, "step": 4630 }, { "epoch": 0.135207731161134, "grad_norm": 0.7633412494640236, "learning_rate": 1.9218167072181673e-05, "loss": 0.634, "step": 4631 }, { "epoch": 0.13523692738898135, "grad_norm": 0.7149777452848751, "learning_rate": 1.9217518248175185e-05, "loss": 0.6947, "step": 4632 }, { "epoch": 0.13526612361682872, "grad_norm": 0.6896437668034172, "learning_rate": 1.9216869424168697e-05, "loss": 0.6599, "step": 4633 }, { "epoch": 0.13529531984467608, "grad_norm": 0.9418189915420142, "learning_rate": 1.921622060016221e-05, "loss": 0.7014, "step": 4634 }, { "epoch": 0.13532451607252344, "grad_norm": 0.6916871222659768, "learning_rate": 1.921557177615572e-05, "loss": 0.6445, "step": 4635 }, { "epoch": 0.1353537123003708, "grad_norm": 0.7192677281758338, "learning_rate": 1.921492295214923e-05, "loss": 0.6675, "step": 4636 }, { "epoch": 0.13538290852821816, "grad_norm": 0.9276037869342817, "learning_rate": 1.921427412814274e-05, "loss": 0.7583, "step": 4637 }, { "epoch": 0.13541210475606552, "grad_norm": 0.6832844782085783, "learning_rate": 1.9213625304136253e-05, "loss": 0.6095, "step": 4638 }, { "epoch": 0.13544130098391288, "grad_norm": 0.7339929414232443, "learning_rate": 1.9212976480129765e-05, "loss": 0.6988, "step": 4639 }, { "epoch": 0.13547049721176024, "grad_norm": 0.7063193999435685, "learning_rate": 1.9212327656123277e-05, "loss": 0.734, "step": 4640 }, { "epoch": 0.1354996934396076, "grad_norm": 0.9141875806549177, "learning_rate": 1.921167883211679e-05, "loss": 0.7339, "step": 4641 }, { "epoch": 0.13552888966745497, "grad_norm": 0.7921541088667102, "learning_rate": 1.92110300081103e-05, "loss": 0.7689, "step": 4642 }, { "epoch": 0.13555808589530233, "grad_norm": 0.7001899770539456, "learning_rate": 1.9210381184103813e-05, "loss": 0.7033, "step": 4643 }, { "epoch": 0.1355872821231497, "grad_norm": 0.7348888770801654, "learning_rate": 1.9209732360097325e-05, "loss": 0.7056, "step": 4644 }, { "epoch": 0.13561647835099705, "grad_norm": 0.6855704761407674, "learning_rate": 1.9209083536090838e-05, "loss": 0.7099, "step": 4645 }, { "epoch": 0.1356456745788444, "grad_norm": 0.7128933529496138, "learning_rate": 1.920843471208435e-05, "loss": 0.6964, "step": 4646 }, { "epoch": 0.13567487080669177, "grad_norm": 0.8277117514163679, "learning_rate": 1.920778588807786e-05, "loss": 0.8747, "step": 4647 }, { "epoch": 0.13570406703453913, "grad_norm": 0.7140933044432026, "learning_rate": 1.9207137064071374e-05, "loss": 0.6864, "step": 4648 }, { "epoch": 0.1357332632623865, "grad_norm": 0.7432357466218912, "learning_rate": 1.9206488240064886e-05, "loss": 0.6933, "step": 4649 }, { "epoch": 0.13576245949023386, "grad_norm": 0.7577747400899713, "learning_rate": 1.9205839416058398e-05, "loss": 0.7113, "step": 4650 }, { "epoch": 0.13579165571808122, "grad_norm": 0.7300487979760816, "learning_rate": 1.9205190592051906e-05, "loss": 0.6903, "step": 4651 }, { "epoch": 0.13582085194592858, "grad_norm": 0.6958045771560621, "learning_rate": 1.920454176804542e-05, "loss": 0.669, "step": 4652 }, { "epoch": 0.13585004817377594, "grad_norm": 0.8272014274629818, "learning_rate": 1.920389294403893e-05, "loss": 0.706, "step": 4653 }, { "epoch": 0.1358792444016233, "grad_norm": 0.6889792842225047, "learning_rate": 1.9203244120032442e-05, "loss": 0.6549, "step": 4654 }, { "epoch": 0.13590844062947066, "grad_norm": 0.7066055315072993, "learning_rate": 1.9202595296025954e-05, "loss": 0.6565, "step": 4655 }, { "epoch": 0.13593763685731802, "grad_norm": 0.7617964797842367, "learning_rate": 1.9201946472019466e-05, "loss": 0.6726, "step": 4656 }, { "epoch": 0.13596683308516538, "grad_norm": 0.8563185768657549, "learning_rate": 1.920129764801298e-05, "loss": 0.6526, "step": 4657 }, { "epoch": 0.13599602931301275, "grad_norm": 0.6994518918940685, "learning_rate": 1.920064882400649e-05, "loss": 0.6254, "step": 4658 }, { "epoch": 0.13602522554086013, "grad_norm": 0.8316999190710362, "learning_rate": 1.9200000000000003e-05, "loss": 0.7989, "step": 4659 }, { "epoch": 0.1360544217687075, "grad_norm": 0.7259406081479823, "learning_rate": 1.919935117599351e-05, "loss": 0.6697, "step": 4660 }, { "epoch": 0.13608361799655486, "grad_norm": 0.6819368407722733, "learning_rate": 1.9198702351987023e-05, "loss": 0.6756, "step": 4661 }, { "epoch": 0.13611281422440222, "grad_norm": 0.6954075994306443, "learning_rate": 1.9198053527980535e-05, "loss": 0.6817, "step": 4662 }, { "epoch": 0.13614201045224958, "grad_norm": 0.7092378703377148, "learning_rate": 1.919740470397405e-05, "loss": 0.7303, "step": 4663 }, { "epoch": 0.13617120668009694, "grad_norm": 0.7322835501171288, "learning_rate": 1.9196755879967563e-05, "loss": 0.716, "step": 4664 }, { "epoch": 0.1362004029079443, "grad_norm": 0.9321820434135836, "learning_rate": 1.919610705596107e-05, "loss": 0.7404, "step": 4665 }, { "epoch": 0.13622959913579166, "grad_norm": 0.6226576859650214, "learning_rate": 1.9195458231954583e-05, "loss": 0.6016, "step": 4666 }, { "epoch": 0.13625879536363902, "grad_norm": 0.6360340482786699, "learning_rate": 1.9194809407948095e-05, "loss": 0.6109, "step": 4667 }, { "epoch": 0.13628799159148638, "grad_norm": 0.7247008275463233, "learning_rate": 1.9194160583941607e-05, "loss": 0.6792, "step": 4668 }, { "epoch": 0.13631718781933375, "grad_norm": 0.6346637239617663, "learning_rate": 1.919351175993512e-05, "loss": 0.5466, "step": 4669 }, { "epoch": 0.1363463840471811, "grad_norm": 0.7913844605556144, "learning_rate": 1.919286293592863e-05, "loss": 0.7804, "step": 4670 }, { "epoch": 0.13637558027502847, "grad_norm": 0.688324491658558, "learning_rate": 1.9192214111922143e-05, "loss": 0.6165, "step": 4671 }, { "epoch": 0.13640477650287583, "grad_norm": 0.7776542660553195, "learning_rate": 1.9191565287915655e-05, "loss": 0.7585, "step": 4672 }, { "epoch": 0.1364339727307232, "grad_norm": 0.764202058377708, "learning_rate": 1.9190916463909168e-05, "loss": 0.7197, "step": 4673 }, { "epoch": 0.13646316895857055, "grad_norm": 0.6632041092159584, "learning_rate": 1.9190267639902676e-05, "loss": 0.6007, "step": 4674 }, { "epoch": 0.1364923651864179, "grad_norm": 0.7737008004400221, "learning_rate": 1.9189618815896188e-05, "loss": 0.7569, "step": 4675 }, { "epoch": 0.13652156141426527, "grad_norm": 0.6923681227311438, "learning_rate": 1.91889699918897e-05, "loss": 0.6553, "step": 4676 }, { "epoch": 0.13655075764211264, "grad_norm": 0.6673514860512296, "learning_rate": 1.9188321167883212e-05, "loss": 0.5355, "step": 4677 }, { "epoch": 0.13657995386996, "grad_norm": 0.6913271724722218, "learning_rate": 1.9187672343876724e-05, "loss": 0.6114, "step": 4678 }, { "epoch": 0.13660915009780736, "grad_norm": 0.7642610238214975, "learning_rate": 1.9187023519870236e-05, "loss": 0.7773, "step": 4679 }, { "epoch": 0.13663834632565472, "grad_norm": 0.6534585535135506, "learning_rate": 1.918637469586375e-05, "loss": 0.5936, "step": 4680 }, { "epoch": 0.13666754255350208, "grad_norm": 0.764595555217119, "learning_rate": 1.918572587185726e-05, "loss": 0.7238, "step": 4681 }, { "epoch": 0.13669673878134944, "grad_norm": 0.8537418241516596, "learning_rate": 1.9185077047850772e-05, "loss": 0.7599, "step": 4682 }, { "epoch": 0.1367259350091968, "grad_norm": 0.7283728578713076, "learning_rate": 1.9184428223844284e-05, "loss": 0.6753, "step": 4683 }, { "epoch": 0.13675513123704416, "grad_norm": 0.711838372250359, "learning_rate": 1.9183779399837796e-05, "loss": 0.6156, "step": 4684 }, { "epoch": 0.13678432746489153, "grad_norm": 0.657981966276518, "learning_rate": 1.918313057583131e-05, "loss": 0.5997, "step": 4685 }, { "epoch": 0.1368135236927389, "grad_norm": 0.7249649615314605, "learning_rate": 1.918248175182482e-05, "loss": 0.6326, "step": 4686 }, { "epoch": 0.13684271992058625, "grad_norm": 0.7368652896692827, "learning_rate": 1.9181832927818333e-05, "loss": 0.7263, "step": 4687 }, { "epoch": 0.1368719161484336, "grad_norm": 0.7467599976123502, "learning_rate": 1.9181184103811845e-05, "loss": 0.7062, "step": 4688 }, { "epoch": 0.136901112376281, "grad_norm": 0.6865760503239812, "learning_rate": 1.9180535279805353e-05, "loss": 0.5589, "step": 4689 }, { "epoch": 0.13693030860412836, "grad_norm": 0.7376833686671285, "learning_rate": 1.9179886455798865e-05, "loss": 0.7448, "step": 4690 }, { "epoch": 0.13695950483197572, "grad_norm": 0.7065564915949831, "learning_rate": 1.9179237631792377e-05, "loss": 0.6138, "step": 4691 }, { "epoch": 0.13698870105982308, "grad_norm": 0.8802748464807919, "learning_rate": 1.917858880778589e-05, "loss": 0.7326, "step": 4692 }, { "epoch": 0.13701789728767044, "grad_norm": 0.6801496993285182, "learning_rate": 1.91779399837794e-05, "loss": 0.6588, "step": 4693 }, { "epoch": 0.1370470935155178, "grad_norm": 0.7048518547358127, "learning_rate": 1.9177291159772913e-05, "loss": 0.664, "step": 4694 }, { "epoch": 0.13707628974336517, "grad_norm": 0.7915869276380295, "learning_rate": 1.9176642335766425e-05, "loss": 0.7145, "step": 4695 }, { "epoch": 0.13710548597121253, "grad_norm": 0.7815115356177348, "learning_rate": 1.9175993511759937e-05, "loss": 0.7815, "step": 4696 }, { "epoch": 0.1371346821990599, "grad_norm": 0.6857306878744561, "learning_rate": 1.917534468775345e-05, "loss": 0.6845, "step": 4697 }, { "epoch": 0.13716387842690725, "grad_norm": 0.6966536104900466, "learning_rate": 1.9174695863746958e-05, "loss": 0.6627, "step": 4698 }, { "epoch": 0.1371930746547546, "grad_norm": 0.6679461165285592, "learning_rate": 1.917404703974047e-05, "loss": 0.6678, "step": 4699 }, { "epoch": 0.13722227088260197, "grad_norm": 0.7838537233360897, "learning_rate": 1.9173398215733982e-05, "loss": 0.7306, "step": 4700 }, { "epoch": 0.13725146711044933, "grad_norm": 0.6945752902862999, "learning_rate": 1.9172749391727497e-05, "loss": 0.6576, "step": 4701 }, { "epoch": 0.1372806633382967, "grad_norm": 0.6733410558672478, "learning_rate": 1.917210056772101e-05, "loss": 0.6699, "step": 4702 }, { "epoch": 0.13730985956614405, "grad_norm": 0.7861871430989803, "learning_rate": 1.9171451743714518e-05, "loss": 0.7164, "step": 4703 }, { "epoch": 0.13733905579399142, "grad_norm": 0.6871163166906568, "learning_rate": 1.917080291970803e-05, "loss": 0.6468, "step": 4704 }, { "epoch": 0.13736825202183878, "grad_norm": 0.7205247396573161, "learning_rate": 1.9170154095701542e-05, "loss": 0.6836, "step": 4705 }, { "epoch": 0.13739744824968614, "grad_norm": 0.7973862761732755, "learning_rate": 1.9169505271695054e-05, "loss": 0.7003, "step": 4706 }, { "epoch": 0.1374266444775335, "grad_norm": 0.7716743460915366, "learning_rate": 1.9168856447688566e-05, "loss": 0.7266, "step": 4707 }, { "epoch": 0.13745584070538086, "grad_norm": 0.7532856326765865, "learning_rate": 1.9168207623682078e-05, "loss": 0.709, "step": 4708 }, { "epoch": 0.13748503693322822, "grad_norm": 0.6717801465181046, "learning_rate": 1.916755879967559e-05, "loss": 0.6745, "step": 4709 }, { "epoch": 0.13751423316107558, "grad_norm": 0.7805148204224023, "learning_rate": 1.9166909975669102e-05, "loss": 0.7427, "step": 4710 }, { "epoch": 0.13754342938892294, "grad_norm": 0.6977734550884153, "learning_rate": 1.9166261151662614e-05, "loss": 0.7216, "step": 4711 }, { "epoch": 0.1375726256167703, "grad_norm": 0.655729993106413, "learning_rate": 1.9165612327656123e-05, "loss": 0.5962, "step": 4712 }, { "epoch": 0.13760182184461767, "grad_norm": 0.8030526904811767, "learning_rate": 1.9164963503649635e-05, "loss": 0.7377, "step": 4713 }, { "epoch": 0.13763101807246503, "grad_norm": 0.7256017799095242, "learning_rate": 1.9164314679643147e-05, "loss": 0.686, "step": 4714 }, { "epoch": 0.1376602143003124, "grad_norm": 0.7318500375774509, "learning_rate": 1.916366585563666e-05, "loss": 0.71, "step": 4715 }, { "epoch": 0.13768941052815975, "grad_norm": 0.7200752524753525, "learning_rate": 1.916301703163017e-05, "loss": 0.6737, "step": 4716 }, { "epoch": 0.1377186067560071, "grad_norm": 0.7245897273170552, "learning_rate": 1.9162368207623683e-05, "loss": 0.7179, "step": 4717 }, { "epoch": 0.13774780298385447, "grad_norm": 0.7398377579741752, "learning_rate": 1.9161719383617195e-05, "loss": 0.7384, "step": 4718 }, { "epoch": 0.13777699921170186, "grad_norm": 0.7143614985802769, "learning_rate": 1.9161070559610707e-05, "loss": 0.6147, "step": 4719 }, { "epoch": 0.13780619543954922, "grad_norm": 0.7384261154604439, "learning_rate": 1.916042173560422e-05, "loss": 0.7483, "step": 4720 }, { "epoch": 0.13783539166739658, "grad_norm": 0.7400072780415431, "learning_rate": 1.915977291159773e-05, "loss": 0.7241, "step": 4721 }, { "epoch": 0.13786458789524395, "grad_norm": 0.7009291364590817, "learning_rate": 1.9159124087591243e-05, "loss": 0.6173, "step": 4722 }, { "epoch": 0.1378937841230913, "grad_norm": 0.7450098138198327, "learning_rate": 1.9158475263584755e-05, "loss": 0.7174, "step": 4723 }, { "epoch": 0.13792298035093867, "grad_norm": 0.7395392380213462, "learning_rate": 1.9157826439578267e-05, "loss": 0.6987, "step": 4724 }, { "epoch": 0.13795217657878603, "grad_norm": 0.7704378063892287, "learning_rate": 1.915717761557178e-05, "loss": 0.7308, "step": 4725 }, { "epoch": 0.1379813728066334, "grad_norm": 0.8873301646611933, "learning_rate": 1.915652879156529e-05, "loss": 0.7373, "step": 4726 }, { "epoch": 0.13801056903448075, "grad_norm": 0.7089966557469125, "learning_rate": 1.91558799675588e-05, "loss": 0.6839, "step": 4727 }, { "epoch": 0.1380397652623281, "grad_norm": 0.6701069421648009, "learning_rate": 1.9155231143552312e-05, "loss": 0.5794, "step": 4728 }, { "epoch": 0.13806896149017547, "grad_norm": 0.6397137924080223, "learning_rate": 1.9154582319545824e-05, "loss": 0.5492, "step": 4729 }, { "epoch": 0.13809815771802283, "grad_norm": 0.6316249496678441, "learning_rate": 1.9153933495539336e-05, "loss": 0.5798, "step": 4730 }, { "epoch": 0.1381273539458702, "grad_norm": 0.6660616919111709, "learning_rate": 1.9153284671532848e-05, "loss": 0.6053, "step": 4731 }, { "epoch": 0.13815655017371756, "grad_norm": 0.7076921365979243, "learning_rate": 1.915263584752636e-05, "loss": 0.7018, "step": 4732 }, { "epoch": 0.13818574640156492, "grad_norm": 0.6209673226416957, "learning_rate": 1.9151987023519872e-05, "loss": 0.5696, "step": 4733 }, { "epoch": 0.13821494262941228, "grad_norm": 0.6829507415129593, "learning_rate": 1.9151338199513384e-05, "loss": 0.6221, "step": 4734 }, { "epoch": 0.13824413885725964, "grad_norm": 0.7350762209569079, "learning_rate": 1.9150689375506893e-05, "loss": 0.695, "step": 4735 }, { "epoch": 0.138273335085107, "grad_norm": 0.6759491667537572, "learning_rate": 1.9150040551500405e-05, "loss": 0.6155, "step": 4736 }, { "epoch": 0.13830253131295436, "grad_norm": 0.7131305399609615, "learning_rate": 1.9149391727493917e-05, "loss": 0.668, "step": 4737 }, { "epoch": 0.13833172754080172, "grad_norm": 0.690344950114633, "learning_rate": 1.9148742903487432e-05, "loss": 0.6597, "step": 4738 }, { "epoch": 0.13836092376864909, "grad_norm": 0.7459613945261911, "learning_rate": 1.9148094079480944e-05, "loss": 0.7407, "step": 4739 }, { "epoch": 0.13839011999649645, "grad_norm": 0.7715915322559633, "learning_rate": 1.9147445255474456e-05, "loss": 0.6749, "step": 4740 }, { "epoch": 0.1384193162243438, "grad_norm": 0.7170754403420494, "learning_rate": 1.9146796431467965e-05, "loss": 0.6663, "step": 4741 }, { "epoch": 0.13844851245219117, "grad_norm": 0.6546272003265773, "learning_rate": 1.9146147607461477e-05, "loss": 0.5561, "step": 4742 }, { "epoch": 0.13847770868003853, "grad_norm": 0.7507085764931865, "learning_rate": 1.914549878345499e-05, "loss": 0.6906, "step": 4743 }, { "epoch": 0.1385069049078859, "grad_norm": 0.7567991470232945, "learning_rate": 1.91448499594485e-05, "loss": 0.6832, "step": 4744 }, { "epoch": 0.13853610113573325, "grad_norm": 0.6694281679090949, "learning_rate": 1.9144201135442013e-05, "loss": 0.5951, "step": 4745 }, { "epoch": 0.13856529736358061, "grad_norm": 0.7604973706654087, "learning_rate": 1.9143552311435525e-05, "loss": 0.7138, "step": 4746 }, { "epoch": 0.13859449359142798, "grad_norm": 0.732229794423129, "learning_rate": 1.9142903487429037e-05, "loss": 0.7497, "step": 4747 }, { "epoch": 0.13862368981927534, "grad_norm": 0.6589013770713185, "learning_rate": 1.914225466342255e-05, "loss": 0.6013, "step": 4748 }, { "epoch": 0.13865288604712273, "grad_norm": 0.7011266372252473, "learning_rate": 1.914160583941606e-05, "loss": 0.6393, "step": 4749 }, { "epoch": 0.1386820822749701, "grad_norm": 0.7681372777950075, "learning_rate": 1.914095701540957e-05, "loss": 0.7623, "step": 4750 }, { "epoch": 0.13871127850281745, "grad_norm": 0.7355687756236, "learning_rate": 1.9140308191403082e-05, "loss": 0.7542, "step": 4751 }, { "epoch": 0.1387404747306648, "grad_norm": 0.6665871217368794, "learning_rate": 1.9139659367396594e-05, "loss": 0.554, "step": 4752 }, { "epoch": 0.13876967095851217, "grad_norm": 0.6633107836532097, "learning_rate": 1.9139010543390106e-05, "loss": 0.6284, "step": 4753 }, { "epoch": 0.13879886718635953, "grad_norm": 0.6743796925773459, "learning_rate": 1.9138361719383618e-05, "loss": 0.6314, "step": 4754 }, { "epoch": 0.1388280634142069, "grad_norm": 0.7015340139341301, "learning_rate": 1.913771289537713e-05, "loss": 0.6787, "step": 4755 }, { "epoch": 0.13885725964205425, "grad_norm": 0.6342655198536497, "learning_rate": 1.9137064071370642e-05, "loss": 0.5739, "step": 4756 }, { "epoch": 0.13888645586990161, "grad_norm": 0.8039986485166513, "learning_rate": 1.9136415247364154e-05, "loss": 0.7135, "step": 4757 }, { "epoch": 0.13891565209774898, "grad_norm": 0.8057038072703223, "learning_rate": 1.9135766423357666e-05, "loss": 0.8316, "step": 4758 }, { "epoch": 0.13894484832559634, "grad_norm": 0.6704932538308648, "learning_rate": 1.9135117599351178e-05, "loss": 0.6443, "step": 4759 }, { "epoch": 0.1389740445534437, "grad_norm": 0.7013556292043135, "learning_rate": 1.913446877534469e-05, "loss": 0.624, "step": 4760 }, { "epoch": 0.13900324078129106, "grad_norm": 0.6818546367969307, "learning_rate": 1.9133819951338202e-05, "loss": 0.6062, "step": 4761 }, { "epoch": 0.13903243700913842, "grad_norm": 0.6932702532937108, "learning_rate": 1.9133171127331714e-05, "loss": 0.671, "step": 4762 }, { "epoch": 0.13906163323698578, "grad_norm": 0.6980634975693619, "learning_rate": 1.9132522303325226e-05, "loss": 0.6038, "step": 4763 }, { "epoch": 0.13909082946483314, "grad_norm": 0.6712862746052375, "learning_rate": 1.9131873479318738e-05, "loss": 0.6332, "step": 4764 }, { "epoch": 0.1391200256926805, "grad_norm": 0.6894336715078646, "learning_rate": 1.9131224655312247e-05, "loss": 0.6121, "step": 4765 }, { "epoch": 0.13914922192052787, "grad_norm": 0.7333944768807861, "learning_rate": 1.913057583130576e-05, "loss": 0.6924, "step": 4766 }, { "epoch": 0.13917841814837523, "grad_norm": 0.699639909561076, "learning_rate": 1.912992700729927e-05, "loss": 0.6675, "step": 4767 }, { "epoch": 0.1392076143762226, "grad_norm": 0.6589379903252225, "learning_rate": 1.9129278183292783e-05, "loss": 0.5765, "step": 4768 }, { "epoch": 0.13923681060406995, "grad_norm": 0.773174038939647, "learning_rate": 1.9128629359286295e-05, "loss": 0.695, "step": 4769 }, { "epoch": 0.1392660068319173, "grad_norm": 0.7001814796081578, "learning_rate": 1.9127980535279807e-05, "loss": 0.7588, "step": 4770 }, { "epoch": 0.13929520305976467, "grad_norm": 0.7176258234613779, "learning_rate": 1.912733171127332e-05, "loss": 0.6875, "step": 4771 }, { "epoch": 0.13932439928761203, "grad_norm": 0.7376340599954598, "learning_rate": 1.912668288726683e-05, "loss": 0.6867, "step": 4772 }, { "epoch": 0.1393535955154594, "grad_norm": 0.7044163426085156, "learning_rate": 1.912603406326034e-05, "loss": 0.6618, "step": 4773 }, { "epoch": 0.13938279174330676, "grad_norm": 0.7166814097136801, "learning_rate": 1.9125385239253852e-05, "loss": 0.6738, "step": 4774 }, { "epoch": 0.13941198797115412, "grad_norm": 0.741397046197868, "learning_rate": 1.9124736415247364e-05, "loss": 0.7193, "step": 4775 }, { "epoch": 0.13944118419900148, "grad_norm": 0.6663229279897844, "learning_rate": 1.912408759124088e-05, "loss": 0.6608, "step": 4776 }, { "epoch": 0.13947038042684884, "grad_norm": 0.7356197811686602, "learning_rate": 1.912343876723439e-05, "loss": 0.7394, "step": 4777 }, { "epoch": 0.1394995766546962, "grad_norm": 0.7035327024719141, "learning_rate": 1.9122789943227903e-05, "loss": 0.6545, "step": 4778 }, { "epoch": 0.1395287728825436, "grad_norm": 0.6671244571957664, "learning_rate": 1.9122141119221412e-05, "loss": 0.6059, "step": 4779 }, { "epoch": 0.13955796911039095, "grad_norm": 0.7623263327841266, "learning_rate": 1.9121492295214924e-05, "loss": 0.7248, "step": 4780 }, { "epoch": 0.1395871653382383, "grad_norm": 0.7398536999879676, "learning_rate": 1.9120843471208436e-05, "loss": 0.675, "step": 4781 }, { "epoch": 0.13961636156608567, "grad_norm": 0.8233701499889231, "learning_rate": 1.9120194647201948e-05, "loss": 0.7881, "step": 4782 }, { "epoch": 0.13964555779393303, "grad_norm": 0.8033517836939102, "learning_rate": 1.911954582319546e-05, "loss": 0.6845, "step": 4783 }, { "epoch": 0.1396747540217804, "grad_norm": 0.662446324041135, "learning_rate": 1.9118896999188972e-05, "loss": 0.603, "step": 4784 }, { "epoch": 0.13970395024962776, "grad_norm": 0.6908015925177765, "learning_rate": 1.9118248175182484e-05, "loss": 0.6443, "step": 4785 }, { "epoch": 0.13973314647747512, "grad_norm": 0.6165417568669463, "learning_rate": 1.9117599351175996e-05, "loss": 0.5247, "step": 4786 }, { "epoch": 0.13976234270532248, "grad_norm": 0.733643665322235, "learning_rate": 1.9116950527169508e-05, "loss": 0.6529, "step": 4787 }, { "epoch": 0.13979153893316984, "grad_norm": 0.7694022619684158, "learning_rate": 1.9116301703163017e-05, "loss": 0.7467, "step": 4788 }, { "epoch": 0.1398207351610172, "grad_norm": 0.7331914015383381, "learning_rate": 1.911565287915653e-05, "loss": 0.7188, "step": 4789 }, { "epoch": 0.13984993138886456, "grad_norm": 0.7783020998822108, "learning_rate": 1.911500405515004e-05, "loss": 0.6318, "step": 4790 }, { "epoch": 0.13987912761671192, "grad_norm": 0.7349308064279287, "learning_rate": 1.9114355231143553e-05, "loss": 0.728, "step": 4791 }, { "epoch": 0.13990832384455928, "grad_norm": 0.7325162942578471, "learning_rate": 1.9113706407137065e-05, "loss": 0.6698, "step": 4792 }, { "epoch": 0.13993752007240665, "grad_norm": 0.7068694155709788, "learning_rate": 1.9113057583130577e-05, "loss": 0.6529, "step": 4793 }, { "epoch": 0.139966716300254, "grad_norm": 0.697318845627316, "learning_rate": 1.911240875912409e-05, "loss": 0.6469, "step": 4794 }, { "epoch": 0.13999591252810137, "grad_norm": 0.7311225689913995, "learning_rate": 1.91117599351176e-05, "loss": 0.6942, "step": 4795 }, { "epoch": 0.14002510875594873, "grad_norm": 0.7136448006471701, "learning_rate": 1.9111111111111113e-05, "loss": 0.7003, "step": 4796 }, { "epoch": 0.1400543049837961, "grad_norm": 0.8838485283123929, "learning_rate": 1.9110462287104625e-05, "loss": 0.6301, "step": 4797 }, { "epoch": 0.14008350121164345, "grad_norm": 0.8317629226675569, "learning_rate": 1.9109813463098137e-05, "loss": 0.7438, "step": 4798 }, { "epoch": 0.1401126974394908, "grad_norm": 0.7937912821087917, "learning_rate": 1.910916463909165e-05, "loss": 0.7084, "step": 4799 }, { "epoch": 0.14014189366733817, "grad_norm": 0.6676957335633402, "learning_rate": 1.910851581508516e-05, "loss": 0.6629, "step": 4800 }, { "epoch": 0.14017108989518554, "grad_norm": 0.7631664674746294, "learning_rate": 1.9107866991078673e-05, "loss": 0.7572, "step": 4801 }, { "epoch": 0.1402002861230329, "grad_norm": 0.6906654787980602, "learning_rate": 1.9107218167072185e-05, "loss": 0.6377, "step": 4802 }, { "epoch": 0.14022948235088026, "grad_norm": 0.6918529211831109, "learning_rate": 1.9106569343065694e-05, "loss": 0.6538, "step": 4803 }, { "epoch": 0.14025867857872762, "grad_norm": 0.6684295034527694, "learning_rate": 1.9105920519059206e-05, "loss": 0.5995, "step": 4804 }, { "epoch": 0.14028787480657498, "grad_norm": 0.6347730173917091, "learning_rate": 1.9105271695052718e-05, "loss": 0.6078, "step": 4805 }, { "epoch": 0.14031707103442234, "grad_norm": 0.6885309610685194, "learning_rate": 1.910462287104623e-05, "loss": 0.6591, "step": 4806 }, { "epoch": 0.1403462672622697, "grad_norm": 0.6658145251618769, "learning_rate": 1.9103974047039742e-05, "loss": 0.6549, "step": 4807 }, { "epoch": 0.14037546349011706, "grad_norm": 0.7813412706706734, "learning_rate": 1.9103325223033254e-05, "loss": 0.6233, "step": 4808 }, { "epoch": 0.14040465971796443, "grad_norm": 0.7565526022030477, "learning_rate": 1.9102676399026766e-05, "loss": 0.6775, "step": 4809 }, { "epoch": 0.14043385594581181, "grad_norm": 0.7798208401523317, "learning_rate": 1.9102027575020278e-05, "loss": 0.7993, "step": 4810 }, { "epoch": 0.14046305217365918, "grad_norm": 0.7382502994033205, "learning_rate": 1.9101378751013787e-05, "loss": 0.6924, "step": 4811 }, { "epoch": 0.14049224840150654, "grad_norm": 0.7510206565077785, "learning_rate": 1.91007299270073e-05, "loss": 0.6325, "step": 4812 }, { "epoch": 0.1405214446293539, "grad_norm": 0.7221960100249775, "learning_rate": 1.910008110300081e-05, "loss": 0.6939, "step": 4813 }, { "epoch": 0.14055064085720126, "grad_norm": 0.6453910731426565, "learning_rate": 1.9099432278994326e-05, "loss": 0.5798, "step": 4814 }, { "epoch": 0.14057983708504862, "grad_norm": 0.7189485734741202, "learning_rate": 1.9098783454987838e-05, "loss": 0.6906, "step": 4815 }, { "epoch": 0.14060903331289598, "grad_norm": 0.7212784282083177, "learning_rate": 1.909813463098135e-05, "loss": 0.6745, "step": 4816 }, { "epoch": 0.14063822954074334, "grad_norm": 0.7527852823667843, "learning_rate": 1.909748580697486e-05, "loss": 0.728, "step": 4817 }, { "epoch": 0.1406674257685907, "grad_norm": 0.6720781278599381, "learning_rate": 1.909683698296837e-05, "loss": 0.6731, "step": 4818 }, { "epoch": 0.14069662199643806, "grad_norm": 0.9080421097959576, "learning_rate": 1.9096188158961883e-05, "loss": 0.7113, "step": 4819 }, { "epoch": 0.14072581822428543, "grad_norm": 0.7907034859454448, "learning_rate": 1.9095539334955395e-05, "loss": 0.7955, "step": 4820 }, { "epoch": 0.1407550144521328, "grad_norm": 0.7919842648983612, "learning_rate": 1.9094890510948907e-05, "loss": 0.7985, "step": 4821 }, { "epoch": 0.14078421067998015, "grad_norm": 0.678710941795192, "learning_rate": 1.909424168694242e-05, "loss": 0.6156, "step": 4822 }, { "epoch": 0.1408134069078275, "grad_norm": 0.7730250124862563, "learning_rate": 1.909359286293593e-05, "loss": 0.6232, "step": 4823 }, { "epoch": 0.14084260313567487, "grad_norm": 0.7568043643455266, "learning_rate": 1.9092944038929443e-05, "loss": 0.6622, "step": 4824 }, { "epoch": 0.14087179936352223, "grad_norm": 0.9861533827728564, "learning_rate": 1.9092295214922955e-05, "loss": 0.6534, "step": 4825 }, { "epoch": 0.1409009955913696, "grad_norm": 0.6699977326380965, "learning_rate": 1.9091646390916464e-05, "loss": 0.6224, "step": 4826 }, { "epoch": 0.14093019181921695, "grad_norm": 0.6869543341646581, "learning_rate": 1.9090997566909976e-05, "loss": 0.5941, "step": 4827 }, { "epoch": 0.14095938804706432, "grad_norm": 0.8209751996274224, "learning_rate": 1.9090348742903488e-05, "loss": 0.6476, "step": 4828 }, { "epoch": 0.14098858427491168, "grad_norm": 0.756760911496473, "learning_rate": 1.9089699918897e-05, "loss": 0.6767, "step": 4829 }, { "epoch": 0.14101778050275904, "grad_norm": 0.7272404840972015, "learning_rate": 1.9089051094890512e-05, "loss": 0.6786, "step": 4830 }, { "epoch": 0.1410469767306064, "grad_norm": 0.7080857066655217, "learning_rate": 1.9088402270884024e-05, "loss": 0.6555, "step": 4831 }, { "epoch": 0.14107617295845376, "grad_norm": 0.7266445180026827, "learning_rate": 1.9087753446877536e-05, "loss": 0.6853, "step": 4832 }, { "epoch": 0.14110536918630112, "grad_norm": 0.7335782271480533, "learning_rate": 1.9087104622871048e-05, "loss": 0.7077, "step": 4833 }, { "epoch": 0.14113456541414848, "grad_norm": 0.6015639925864565, "learning_rate": 1.908645579886456e-05, "loss": 0.5448, "step": 4834 }, { "epoch": 0.14116376164199584, "grad_norm": 0.7258575197611998, "learning_rate": 1.9085806974858072e-05, "loss": 0.7106, "step": 4835 }, { "epoch": 0.1411929578698432, "grad_norm": 0.762895241990381, "learning_rate": 1.9085158150851584e-05, "loss": 0.7475, "step": 4836 }, { "epoch": 0.14122215409769057, "grad_norm": 0.7985120458493968, "learning_rate": 1.9084509326845096e-05, "loss": 0.7201, "step": 4837 }, { "epoch": 0.14125135032553793, "grad_norm": 0.743057347532542, "learning_rate": 1.9083860502838608e-05, "loss": 0.7411, "step": 4838 }, { "epoch": 0.1412805465533853, "grad_norm": 0.6729603813601431, "learning_rate": 1.908321167883212e-05, "loss": 0.5395, "step": 4839 }, { "epoch": 0.14130974278123268, "grad_norm": 0.7609103716133806, "learning_rate": 1.908256285482563e-05, "loss": 0.68, "step": 4840 }, { "epoch": 0.14133893900908004, "grad_norm": 0.7421177224779836, "learning_rate": 1.908191403081914e-05, "loss": 0.6711, "step": 4841 }, { "epoch": 0.1413681352369274, "grad_norm": 0.7776859932022707, "learning_rate": 1.9081265206812653e-05, "loss": 0.7191, "step": 4842 }, { "epoch": 0.14139733146477476, "grad_norm": 0.7240964736486535, "learning_rate": 1.9080616382806165e-05, "loss": 0.6359, "step": 4843 }, { "epoch": 0.14142652769262212, "grad_norm": 0.6939224525918511, "learning_rate": 1.9079967558799677e-05, "loss": 0.6191, "step": 4844 }, { "epoch": 0.14145572392046948, "grad_norm": 0.7981148845838287, "learning_rate": 1.907931873479319e-05, "loss": 0.7252, "step": 4845 }, { "epoch": 0.14148492014831685, "grad_norm": 0.7177801288140891, "learning_rate": 1.90786699107867e-05, "loss": 0.6794, "step": 4846 }, { "epoch": 0.1415141163761642, "grad_norm": 0.8231803138312234, "learning_rate": 1.9078021086780213e-05, "loss": 0.8017, "step": 4847 }, { "epoch": 0.14154331260401157, "grad_norm": 0.8012436718472743, "learning_rate": 1.9077372262773725e-05, "loss": 0.6867, "step": 4848 }, { "epoch": 0.14157250883185893, "grad_norm": 0.7504786812625012, "learning_rate": 1.9076723438767233e-05, "loss": 0.743, "step": 4849 }, { "epoch": 0.1416017050597063, "grad_norm": 0.8006169548834144, "learning_rate": 1.9076074614760746e-05, "loss": 0.7628, "step": 4850 }, { "epoch": 0.14163090128755365, "grad_norm": 0.7354027463061027, "learning_rate": 1.9075425790754258e-05, "loss": 0.7023, "step": 4851 }, { "epoch": 0.141660097515401, "grad_norm": 0.7593770755211053, "learning_rate": 1.9074776966747773e-05, "loss": 0.7443, "step": 4852 }, { "epoch": 0.14168929374324837, "grad_norm": 0.6812398147097892, "learning_rate": 1.9074128142741285e-05, "loss": 0.6569, "step": 4853 }, { "epoch": 0.14171848997109573, "grad_norm": 0.7253806123733064, "learning_rate": 1.9073479318734797e-05, "loss": 0.6585, "step": 4854 }, { "epoch": 0.1417476861989431, "grad_norm": 0.6814070448796545, "learning_rate": 1.9072830494728306e-05, "loss": 0.6306, "step": 4855 }, { "epoch": 0.14177688242679046, "grad_norm": 0.6862194956349253, "learning_rate": 1.9072181670721818e-05, "loss": 0.6694, "step": 4856 }, { "epoch": 0.14180607865463782, "grad_norm": 0.7335410636417719, "learning_rate": 1.907153284671533e-05, "loss": 0.6564, "step": 4857 }, { "epoch": 0.14183527488248518, "grad_norm": 0.7523334302941851, "learning_rate": 1.9070884022708842e-05, "loss": 0.6531, "step": 4858 }, { "epoch": 0.14186447111033254, "grad_norm": 0.7151625567194515, "learning_rate": 1.9070235198702354e-05, "loss": 0.6675, "step": 4859 }, { "epoch": 0.1418936673381799, "grad_norm": 0.6946784184914155, "learning_rate": 1.9069586374695866e-05, "loss": 0.671, "step": 4860 }, { "epoch": 0.14192286356602726, "grad_norm": 0.6859118869425065, "learning_rate": 1.9068937550689378e-05, "loss": 0.6113, "step": 4861 }, { "epoch": 0.14195205979387462, "grad_norm": 0.7973065594478795, "learning_rate": 1.906828872668289e-05, "loss": 0.7824, "step": 4862 }, { "epoch": 0.14198125602172199, "grad_norm": 0.7406587218039985, "learning_rate": 1.9067639902676402e-05, "loss": 0.6787, "step": 4863 }, { "epoch": 0.14201045224956935, "grad_norm": 0.6713274526838122, "learning_rate": 1.906699107866991e-05, "loss": 0.6296, "step": 4864 }, { "epoch": 0.1420396484774167, "grad_norm": 0.7444466929293116, "learning_rate": 1.9066342254663423e-05, "loss": 0.6681, "step": 4865 }, { "epoch": 0.14206884470526407, "grad_norm": 0.6879980398664104, "learning_rate": 1.9065693430656935e-05, "loss": 0.6524, "step": 4866 }, { "epoch": 0.14209804093311143, "grad_norm": 0.9650104174441733, "learning_rate": 1.9065044606650447e-05, "loss": 0.6795, "step": 4867 }, { "epoch": 0.1421272371609588, "grad_norm": 0.733014714497094, "learning_rate": 1.906439578264396e-05, "loss": 0.6934, "step": 4868 }, { "epoch": 0.14215643338880615, "grad_norm": 0.7434429342856391, "learning_rate": 1.906374695863747e-05, "loss": 0.7077, "step": 4869 }, { "epoch": 0.14218562961665354, "grad_norm": 0.829340671619325, "learning_rate": 1.9063098134630983e-05, "loss": 0.7672, "step": 4870 }, { "epoch": 0.1422148258445009, "grad_norm": 0.6866385584840523, "learning_rate": 1.9062449310624495e-05, "loss": 0.657, "step": 4871 }, { "epoch": 0.14224402207234826, "grad_norm": 0.6398081566489651, "learning_rate": 1.9061800486618007e-05, "loss": 0.6169, "step": 4872 }, { "epoch": 0.14227321830019563, "grad_norm": 0.692011460490322, "learning_rate": 1.906115166261152e-05, "loss": 0.6459, "step": 4873 }, { "epoch": 0.142302414528043, "grad_norm": 0.8881240873223923, "learning_rate": 1.906050283860503e-05, "loss": 0.6527, "step": 4874 }, { "epoch": 0.14233161075589035, "grad_norm": 0.6664396389799769, "learning_rate": 1.9059854014598543e-05, "loss": 0.6178, "step": 4875 }, { "epoch": 0.1423608069837377, "grad_norm": 0.7684513996472012, "learning_rate": 1.9059205190592055e-05, "loss": 0.7488, "step": 4876 }, { "epoch": 0.14239000321158507, "grad_norm": 0.7498664660523174, "learning_rate": 1.9058556366585567e-05, "loss": 0.7122, "step": 4877 }, { "epoch": 0.14241919943943243, "grad_norm": 0.7336034164213916, "learning_rate": 1.9057907542579075e-05, "loss": 0.6732, "step": 4878 }, { "epoch": 0.1424483956672798, "grad_norm": 0.7457652884330759, "learning_rate": 1.9057258718572588e-05, "loss": 0.6843, "step": 4879 }, { "epoch": 0.14247759189512715, "grad_norm": 0.7233560356081893, "learning_rate": 1.90566098945661e-05, "loss": 0.6805, "step": 4880 }, { "epoch": 0.14250678812297451, "grad_norm": 0.7144928756866842, "learning_rate": 1.905596107055961e-05, "loss": 0.6454, "step": 4881 }, { "epoch": 0.14253598435082188, "grad_norm": 0.680737762348683, "learning_rate": 1.9055312246553124e-05, "loss": 0.6354, "step": 4882 }, { "epoch": 0.14256518057866924, "grad_norm": 0.7714683211567231, "learning_rate": 1.9054663422546636e-05, "loss": 0.7636, "step": 4883 }, { "epoch": 0.1425943768065166, "grad_norm": 0.6807453862570095, "learning_rate": 1.9054014598540148e-05, "loss": 0.7015, "step": 4884 }, { "epoch": 0.14262357303436396, "grad_norm": 0.7182374000513724, "learning_rate": 1.905336577453366e-05, "loss": 0.6675, "step": 4885 }, { "epoch": 0.14265276926221132, "grad_norm": 0.6870560221682297, "learning_rate": 1.9052716950527172e-05, "loss": 0.7401, "step": 4886 }, { "epoch": 0.14268196549005868, "grad_norm": 0.734368004692498, "learning_rate": 1.905206812652068e-05, "loss": 0.6857, "step": 4887 }, { "epoch": 0.14271116171790604, "grad_norm": 0.8849671560454617, "learning_rate": 1.9051419302514192e-05, "loss": 0.6576, "step": 4888 }, { "epoch": 0.1427403579457534, "grad_norm": 1.2446084560907777, "learning_rate": 1.9050770478507708e-05, "loss": 0.7124, "step": 4889 }, { "epoch": 0.14276955417360077, "grad_norm": 0.69281291855719, "learning_rate": 1.905012165450122e-05, "loss": 0.6298, "step": 4890 }, { "epoch": 0.14279875040144813, "grad_norm": 0.7690356390236641, "learning_rate": 1.9049472830494732e-05, "loss": 0.7487, "step": 4891 }, { "epoch": 0.1428279466292955, "grad_norm": 0.7962333644765679, "learning_rate": 1.9048824006488244e-05, "loss": 0.7094, "step": 4892 }, { "epoch": 0.14285714285714285, "grad_norm": 0.742637242923166, "learning_rate": 1.9048175182481753e-05, "loss": 0.752, "step": 4893 }, { "epoch": 0.1428863390849902, "grad_norm": 0.9737819566751119, "learning_rate": 1.9047526358475265e-05, "loss": 0.6674, "step": 4894 }, { "epoch": 0.14291553531283757, "grad_norm": 0.6600568144102912, "learning_rate": 1.9046877534468777e-05, "loss": 0.6109, "step": 4895 }, { "epoch": 0.14294473154068493, "grad_norm": 0.7036606745544502, "learning_rate": 1.904622871046229e-05, "loss": 0.6052, "step": 4896 }, { "epoch": 0.1429739277685323, "grad_norm": 0.6470261455168296, "learning_rate": 1.90455798864558e-05, "loss": 0.5524, "step": 4897 }, { "epoch": 0.14300312399637966, "grad_norm": 0.7380256587807202, "learning_rate": 1.9044931062449313e-05, "loss": 0.7133, "step": 4898 }, { "epoch": 0.14303232022422702, "grad_norm": 0.6926154220728145, "learning_rate": 1.9044282238442825e-05, "loss": 0.6995, "step": 4899 }, { "epoch": 0.1430615164520744, "grad_norm": 0.7790326531985896, "learning_rate": 1.9043633414436337e-05, "loss": 0.6785, "step": 4900 }, { "epoch": 0.14309071267992177, "grad_norm": 0.7637264039090145, "learning_rate": 1.904298459042985e-05, "loss": 0.6821, "step": 4901 }, { "epoch": 0.14311990890776913, "grad_norm": 0.6183065178681566, "learning_rate": 1.9042335766423357e-05, "loss": 0.5444, "step": 4902 }, { "epoch": 0.1431491051356165, "grad_norm": 0.801295238141271, "learning_rate": 1.904168694241687e-05, "loss": 0.7479, "step": 4903 }, { "epoch": 0.14317830136346385, "grad_norm": 0.7955572775448613, "learning_rate": 1.904103811841038e-05, "loss": 0.7043, "step": 4904 }, { "epoch": 0.1432074975913112, "grad_norm": 0.6656846931942155, "learning_rate": 1.9040389294403893e-05, "loss": 0.6343, "step": 4905 }, { "epoch": 0.14323669381915857, "grad_norm": 0.6563129637786645, "learning_rate": 1.9039740470397405e-05, "loss": 0.5792, "step": 4906 }, { "epoch": 0.14326589004700593, "grad_norm": 0.6688950690529581, "learning_rate": 1.9039091646390918e-05, "loss": 0.5675, "step": 4907 }, { "epoch": 0.1432950862748533, "grad_norm": 0.8505573209929487, "learning_rate": 1.903844282238443e-05, "loss": 0.6604, "step": 4908 }, { "epoch": 0.14332428250270066, "grad_norm": 0.6783188966607071, "learning_rate": 1.903779399837794e-05, "loss": 0.6477, "step": 4909 }, { "epoch": 0.14335347873054802, "grad_norm": 0.6900486588368294, "learning_rate": 1.9037145174371454e-05, "loss": 0.6537, "step": 4910 }, { "epoch": 0.14338267495839538, "grad_norm": 0.7128698647279591, "learning_rate": 1.9036496350364966e-05, "loss": 0.6782, "step": 4911 }, { "epoch": 0.14341187118624274, "grad_norm": 0.8068694972326127, "learning_rate": 1.9035847526358478e-05, "loss": 0.6561, "step": 4912 }, { "epoch": 0.1434410674140901, "grad_norm": 0.6624884633896465, "learning_rate": 1.903519870235199e-05, "loss": 0.6362, "step": 4913 }, { "epoch": 0.14347026364193746, "grad_norm": 0.8478139115589501, "learning_rate": 1.90345498783455e-05, "loss": 0.7917, "step": 4914 }, { "epoch": 0.14349945986978482, "grad_norm": 0.8239692856070429, "learning_rate": 1.9033901054339014e-05, "loss": 0.6051, "step": 4915 }, { "epoch": 0.14352865609763218, "grad_norm": 0.7898226460677664, "learning_rate": 1.9033252230332522e-05, "loss": 0.7479, "step": 4916 }, { "epoch": 0.14355785232547955, "grad_norm": 0.6848691077518462, "learning_rate": 1.9032603406326034e-05, "loss": 0.6566, "step": 4917 }, { "epoch": 0.1435870485533269, "grad_norm": 0.7272169146845879, "learning_rate": 1.9031954582319546e-05, "loss": 0.7254, "step": 4918 }, { "epoch": 0.14361624478117427, "grad_norm": 0.7455154397788691, "learning_rate": 1.903130575831306e-05, "loss": 0.7623, "step": 4919 }, { "epoch": 0.14364544100902163, "grad_norm": 0.6441809832567971, "learning_rate": 1.903065693430657e-05, "loss": 0.5445, "step": 4920 }, { "epoch": 0.143674637236869, "grad_norm": 0.7013647373627506, "learning_rate": 1.9030008110300082e-05, "loss": 0.657, "step": 4921 }, { "epoch": 0.14370383346471635, "grad_norm": 0.7894139806102459, "learning_rate": 1.9029359286293595e-05, "loss": 0.6972, "step": 4922 }, { "epoch": 0.1437330296925637, "grad_norm": 0.7081989107894623, "learning_rate": 1.9028710462287107e-05, "loss": 0.6925, "step": 4923 }, { "epoch": 0.14376222592041107, "grad_norm": 0.7016996667798846, "learning_rate": 1.902806163828062e-05, "loss": 0.6363, "step": 4924 }, { "epoch": 0.14379142214825844, "grad_norm": 0.6990772199367887, "learning_rate": 1.9027412814274127e-05, "loss": 0.688, "step": 4925 }, { "epoch": 0.1438206183761058, "grad_norm": 1.038915923703503, "learning_rate": 1.902676399026764e-05, "loss": 0.683, "step": 4926 }, { "epoch": 0.14384981460395316, "grad_norm": 0.7046905150022447, "learning_rate": 1.9026115166261155e-05, "loss": 0.6773, "step": 4927 }, { "epoch": 0.14387901083180052, "grad_norm": 0.721495257038456, "learning_rate": 1.9025466342254667e-05, "loss": 0.7043, "step": 4928 }, { "epoch": 0.14390820705964788, "grad_norm": 0.8204972606397354, "learning_rate": 1.902481751824818e-05, "loss": 0.6398, "step": 4929 }, { "epoch": 0.14393740328749527, "grad_norm": 0.6633922346693049, "learning_rate": 1.902416869424169e-05, "loss": 0.6004, "step": 4930 }, { "epoch": 0.14396659951534263, "grad_norm": 0.6826168204782731, "learning_rate": 1.90235198702352e-05, "loss": 0.641, "step": 4931 }, { "epoch": 0.14399579574319, "grad_norm": 0.7067481354416311, "learning_rate": 1.902287104622871e-05, "loss": 0.6549, "step": 4932 }, { "epoch": 0.14402499197103735, "grad_norm": 0.6708962145176988, "learning_rate": 1.9022222222222223e-05, "loss": 0.6293, "step": 4933 }, { "epoch": 0.14405418819888471, "grad_norm": 0.6786955845615992, "learning_rate": 1.9021573398215735e-05, "loss": 0.5878, "step": 4934 }, { "epoch": 0.14408338442673208, "grad_norm": 0.7169497440183982, "learning_rate": 1.9020924574209247e-05, "loss": 0.6833, "step": 4935 }, { "epoch": 0.14411258065457944, "grad_norm": 0.7259987358822061, "learning_rate": 1.902027575020276e-05, "loss": 0.697, "step": 4936 }, { "epoch": 0.1441417768824268, "grad_norm": 0.834638614278555, "learning_rate": 1.901962692619627e-05, "loss": 0.7991, "step": 4937 }, { "epoch": 0.14417097311027416, "grad_norm": 0.7100308066085246, "learning_rate": 1.9018978102189784e-05, "loss": 0.7201, "step": 4938 }, { "epoch": 0.14420016933812152, "grad_norm": 1.2908109803128427, "learning_rate": 1.9018329278183296e-05, "loss": 0.7191, "step": 4939 }, { "epoch": 0.14422936556596888, "grad_norm": 0.6562941480863835, "learning_rate": 1.9017680454176804e-05, "loss": 0.6196, "step": 4940 }, { "epoch": 0.14425856179381624, "grad_norm": 0.8468279900954814, "learning_rate": 1.9017031630170316e-05, "loss": 0.6819, "step": 4941 }, { "epoch": 0.1442877580216636, "grad_norm": 0.7356903468701556, "learning_rate": 1.9016382806163828e-05, "loss": 0.6652, "step": 4942 }, { "epoch": 0.14431695424951096, "grad_norm": 0.8170234298073006, "learning_rate": 1.901573398215734e-05, "loss": 0.6447, "step": 4943 }, { "epoch": 0.14434615047735833, "grad_norm": 0.7301165219814616, "learning_rate": 1.9015085158150852e-05, "loss": 0.693, "step": 4944 }, { "epoch": 0.1443753467052057, "grad_norm": 0.6870518603889829, "learning_rate": 1.9014436334144364e-05, "loss": 0.6666, "step": 4945 }, { "epoch": 0.14440454293305305, "grad_norm": 0.7785567852018537, "learning_rate": 1.9013787510137876e-05, "loss": 0.636, "step": 4946 }, { "epoch": 0.1444337391609004, "grad_norm": 0.8632426388214417, "learning_rate": 1.901313868613139e-05, "loss": 0.7062, "step": 4947 }, { "epoch": 0.14446293538874777, "grad_norm": 0.8141821980807078, "learning_rate": 1.90124898621249e-05, "loss": 0.7513, "step": 4948 }, { "epoch": 0.14449213161659513, "grad_norm": 0.8078862311008267, "learning_rate": 1.9011841038118412e-05, "loss": 0.7541, "step": 4949 }, { "epoch": 0.1445213278444425, "grad_norm": 0.7958998502149968, "learning_rate": 1.9011192214111925e-05, "loss": 0.7695, "step": 4950 }, { "epoch": 0.14455052407228985, "grad_norm": 1.5767273197240972, "learning_rate": 1.9010543390105437e-05, "loss": 0.9296, "step": 4951 }, { "epoch": 0.14457972030013722, "grad_norm": 0.6791215969741078, "learning_rate": 1.900989456609895e-05, "loss": 0.6631, "step": 4952 }, { "epoch": 0.14460891652798458, "grad_norm": 0.6638787403701932, "learning_rate": 1.900924574209246e-05, "loss": 0.6026, "step": 4953 }, { "epoch": 0.14463811275583194, "grad_norm": 0.7286769431280904, "learning_rate": 1.900859691808597e-05, "loss": 0.7152, "step": 4954 }, { "epoch": 0.1446673089836793, "grad_norm": 0.6802749458935449, "learning_rate": 1.900794809407948e-05, "loss": 0.6565, "step": 4955 }, { "epoch": 0.14469650521152666, "grad_norm": 0.7715655226309186, "learning_rate": 1.9007299270072993e-05, "loss": 0.7012, "step": 4956 }, { "epoch": 0.14472570143937402, "grad_norm": 0.6625401370652131, "learning_rate": 1.9006650446066505e-05, "loss": 0.5869, "step": 4957 }, { "epoch": 0.14475489766722138, "grad_norm": 0.955840166526514, "learning_rate": 1.9006001622060017e-05, "loss": 0.6918, "step": 4958 }, { "epoch": 0.14478409389506874, "grad_norm": 0.6786715452634537, "learning_rate": 1.900535279805353e-05, "loss": 0.6397, "step": 4959 }, { "epoch": 0.14481329012291613, "grad_norm": 0.7893352250746475, "learning_rate": 1.900470397404704e-05, "loss": 0.7313, "step": 4960 }, { "epoch": 0.1448424863507635, "grad_norm": 0.7190049451537702, "learning_rate": 1.9004055150040553e-05, "loss": 0.6919, "step": 4961 }, { "epoch": 0.14487168257861086, "grad_norm": 0.7145216386655728, "learning_rate": 1.9003406326034065e-05, "loss": 0.717, "step": 4962 }, { "epoch": 0.14490087880645822, "grad_norm": 0.6852859327522394, "learning_rate": 1.9002757502027574e-05, "loss": 0.6212, "step": 4963 }, { "epoch": 0.14493007503430558, "grad_norm": 0.7756240544770279, "learning_rate": 1.9002108678021086e-05, "loss": 0.7078, "step": 4964 }, { "epoch": 0.14495927126215294, "grad_norm": 0.6786390163196572, "learning_rate": 1.90014598540146e-05, "loss": 0.6501, "step": 4965 }, { "epoch": 0.1449884674900003, "grad_norm": 1.3302586197367503, "learning_rate": 1.9000811030008114e-05, "loss": 0.7432, "step": 4966 }, { "epoch": 0.14501766371784766, "grad_norm": 0.7615987942204706, "learning_rate": 1.9000162206001626e-05, "loss": 0.6933, "step": 4967 }, { "epoch": 0.14504685994569502, "grad_norm": 0.7424963780378435, "learning_rate": 1.8999513381995138e-05, "loss": 0.7271, "step": 4968 }, { "epoch": 0.14507605617354238, "grad_norm": 0.7108697555560637, "learning_rate": 1.8998864557988646e-05, "loss": 0.6638, "step": 4969 }, { "epoch": 0.14510525240138974, "grad_norm": 0.7146319067206118, "learning_rate": 1.8998215733982158e-05, "loss": 0.6984, "step": 4970 }, { "epoch": 0.1451344486292371, "grad_norm": 0.7004367630568944, "learning_rate": 1.899756690997567e-05, "loss": 0.6129, "step": 4971 }, { "epoch": 0.14516364485708447, "grad_norm": 0.6657340030576047, "learning_rate": 1.8996918085969182e-05, "loss": 0.625, "step": 4972 }, { "epoch": 0.14519284108493183, "grad_norm": 0.7269639879996194, "learning_rate": 1.8996269261962694e-05, "loss": 0.6808, "step": 4973 }, { "epoch": 0.1452220373127792, "grad_norm": 0.7118512540276888, "learning_rate": 1.8995620437956206e-05, "loss": 0.6107, "step": 4974 }, { "epoch": 0.14525123354062655, "grad_norm": 0.5981871808002805, "learning_rate": 1.899497161394972e-05, "loss": 0.5367, "step": 4975 }, { "epoch": 0.1452804297684739, "grad_norm": 0.7153009142642633, "learning_rate": 1.899432278994323e-05, "loss": 0.6749, "step": 4976 }, { "epoch": 0.14530962599632127, "grad_norm": 0.7152182612380785, "learning_rate": 1.8993673965936742e-05, "loss": 0.7078, "step": 4977 }, { "epoch": 0.14533882222416863, "grad_norm": 0.7422064994215546, "learning_rate": 1.899302514193025e-05, "loss": 0.7399, "step": 4978 }, { "epoch": 0.145368018452016, "grad_norm": 0.7681510382875631, "learning_rate": 1.8992376317923763e-05, "loss": 0.825, "step": 4979 }, { "epoch": 0.14539721467986336, "grad_norm": 0.666398212387658, "learning_rate": 1.8991727493917275e-05, "loss": 0.5719, "step": 4980 }, { "epoch": 0.14542641090771072, "grad_norm": 0.731494054367201, "learning_rate": 1.8991078669910787e-05, "loss": 0.7053, "step": 4981 }, { "epoch": 0.14545560713555808, "grad_norm": 0.734378571209761, "learning_rate": 1.89904298459043e-05, "loss": 0.72, "step": 4982 }, { "epoch": 0.14548480336340544, "grad_norm": 0.6752559026239472, "learning_rate": 1.898978102189781e-05, "loss": 0.6473, "step": 4983 }, { "epoch": 0.1455139995912528, "grad_norm": 0.6727691208794212, "learning_rate": 1.8989132197891323e-05, "loss": 0.6577, "step": 4984 }, { "epoch": 0.14554319581910016, "grad_norm": 0.7320801332451775, "learning_rate": 1.8988483373884835e-05, "loss": 0.6974, "step": 4985 }, { "epoch": 0.14557239204694752, "grad_norm": 0.7259766880216044, "learning_rate": 1.8987834549878347e-05, "loss": 0.7192, "step": 4986 }, { "epoch": 0.14560158827479489, "grad_norm": 0.7344575588525756, "learning_rate": 1.898718572587186e-05, "loss": 0.6816, "step": 4987 }, { "epoch": 0.14563078450264225, "grad_norm": 0.6681545659689356, "learning_rate": 1.898653690186537e-05, "loss": 0.6654, "step": 4988 }, { "epoch": 0.1456599807304896, "grad_norm": 0.696737517389053, "learning_rate": 1.8985888077858883e-05, "loss": 0.7342, "step": 4989 }, { "epoch": 0.145689176958337, "grad_norm": 0.7205270958771263, "learning_rate": 1.8985239253852395e-05, "loss": 0.7124, "step": 4990 }, { "epoch": 0.14571837318618436, "grad_norm": 0.7057829345877477, "learning_rate": 1.8984590429845907e-05, "loss": 0.6691, "step": 4991 }, { "epoch": 0.14574756941403172, "grad_norm": 0.7929589895119858, "learning_rate": 1.8983941605839416e-05, "loss": 0.7585, "step": 4992 }, { "epoch": 0.14577676564187908, "grad_norm": 0.9506170575746951, "learning_rate": 1.8983292781832928e-05, "loss": 0.7469, "step": 4993 }, { "epoch": 0.14580596186972644, "grad_norm": 0.7482139282285031, "learning_rate": 1.898264395782644e-05, "loss": 0.6938, "step": 4994 }, { "epoch": 0.1458351580975738, "grad_norm": 0.7240857461514807, "learning_rate": 1.8981995133819952e-05, "loss": 0.746, "step": 4995 }, { "epoch": 0.14586435432542116, "grad_norm": 0.688236863026524, "learning_rate": 1.8981346309813464e-05, "loss": 0.5556, "step": 4996 }, { "epoch": 0.14589355055326853, "grad_norm": 0.7120878970802206, "learning_rate": 1.8980697485806976e-05, "loss": 0.6662, "step": 4997 }, { "epoch": 0.1459227467811159, "grad_norm": 0.7540261772869121, "learning_rate": 1.8980048661800488e-05, "loss": 0.7923, "step": 4998 }, { "epoch": 0.14595194300896325, "grad_norm": 0.7162911944993344, "learning_rate": 1.8979399837794e-05, "loss": 0.7121, "step": 4999 }, { "epoch": 0.1459811392368106, "grad_norm": 0.7043922020938064, "learning_rate": 1.8978751013787512e-05, "loss": 0.6547, "step": 5000 }, { "epoch": 0.14601033546465797, "grad_norm": 0.7404894679049638, "learning_rate": 1.897810218978102e-05, "loss": 0.6181, "step": 5001 }, { "epoch": 0.14603953169250533, "grad_norm": 0.7474268042070582, "learning_rate": 1.8977453365774533e-05, "loss": 0.6744, "step": 5002 }, { "epoch": 0.1460687279203527, "grad_norm": 0.6577959806284985, "learning_rate": 1.897680454176805e-05, "loss": 0.5488, "step": 5003 }, { "epoch": 0.14609792414820005, "grad_norm": 0.6862866912689803, "learning_rate": 1.897615571776156e-05, "loss": 0.6733, "step": 5004 }, { "epoch": 0.14612712037604741, "grad_norm": 0.7652890406789346, "learning_rate": 1.8975506893755072e-05, "loss": 0.749, "step": 5005 }, { "epoch": 0.14615631660389478, "grad_norm": 0.6659759474506338, "learning_rate": 1.8974858069748584e-05, "loss": 0.6326, "step": 5006 }, { "epoch": 0.14618551283174214, "grad_norm": 0.6642505055358612, "learning_rate": 1.8974209245742093e-05, "loss": 0.5849, "step": 5007 }, { "epoch": 0.1462147090595895, "grad_norm": 0.7336888968415389, "learning_rate": 1.8973560421735605e-05, "loss": 0.7401, "step": 5008 }, { "epoch": 0.14624390528743686, "grad_norm": 0.7392344511908446, "learning_rate": 1.8972911597729117e-05, "loss": 0.7029, "step": 5009 }, { "epoch": 0.14627310151528422, "grad_norm": 0.7251208966512436, "learning_rate": 1.897226277372263e-05, "loss": 0.7252, "step": 5010 }, { "epoch": 0.14630229774313158, "grad_norm": 0.7395423907207868, "learning_rate": 1.897161394971614e-05, "loss": 0.6691, "step": 5011 }, { "epoch": 0.14633149397097894, "grad_norm": 0.7387080724981706, "learning_rate": 1.8970965125709653e-05, "loss": 0.6748, "step": 5012 }, { "epoch": 0.1463606901988263, "grad_norm": 0.7606043411028838, "learning_rate": 1.8970316301703165e-05, "loss": 0.7391, "step": 5013 }, { "epoch": 0.14638988642667367, "grad_norm": 0.7028784684167456, "learning_rate": 1.8969667477696677e-05, "loss": 0.6733, "step": 5014 }, { "epoch": 0.14641908265452103, "grad_norm": 0.8410541485323972, "learning_rate": 1.896901865369019e-05, "loss": 0.8357, "step": 5015 }, { "epoch": 0.1464482788823684, "grad_norm": 0.7135329166308304, "learning_rate": 1.8968369829683698e-05, "loss": 0.6933, "step": 5016 }, { "epoch": 0.14647747511021575, "grad_norm": 0.6942443570739261, "learning_rate": 1.896772100567721e-05, "loss": 0.6925, "step": 5017 }, { "epoch": 0.1465066713380631, "grad_norm": 0.6712390031013385, "learning_rate": 1.8967072181670722e-05, "loss": 0.6543, "step": 5018 }, { "epoch": 0.14653586756591047, "grad_norm": 0.7247856700477427, "learning_rate": 1.8966423357664234e-05, "loss": 0.8001, "step": 5019 }, { "epoch": 0.14656506379375783, "grad_norm": 0.843102483916859, "learning_rate": 1.8965774533657746e-05, "loss": 0.7526, "step": 5020 }, { "epoch": 0.14659426002160522, "grad_norm": 0.7999813439071854, "learning_rate": 1.8965125709651258e-05, "loss": 0.6658, "step": 5021 }, { "epoch": 0.14662345624945258, "grad_norm": 0.7270769795191798, "learning_rate": 1.896447688564477e-05, "loss": 0.6497, "step": 5022 }, { "epoch": 0.14665265247729994, "grad_norm": 0.6655474463570277, "learning_rate": 1.8963828061638282e-05, "loss": 0.6485, "step": 5023 }, { "epoch": 0.1466818487051473, "grad_norm": 0.720658868644657, "learning_rate": 1.8963179237631794e-05, "loss": 0.6831, "step": 5024 }, { "epoch": 0.14671104493299467, "grad_norm": 0.6862938707191352, "learning_rate": 1.8962530413625306e-05, "loss": 0.68, "step": 5025 }, { "epoch": 0.14674024116084203, "grad_norm": 0.7407189917116838, "learning_rate": 1.8961881589618818e-05, "loss": 0.719, "step": 5026 }, { "epoch": 0.1467694373886894, "grad_norm": 0.7683075341663197, "learning_rate": 1.896123276561233e-05, "loss": 0.7436, "step": 5027 }, { "epoch": 0.14679863361653675, "grad_norm": 0.7378680181959688, "learning_rate": 1.8960583941605842e-05, "loss": 0.6698, "step": 5028 }, { "epoch": 0.1468278298443841, "grad_norm": 0.8748463704265602, "learning_rate": 1.8959935117599354e-05, "loss": 0.7695, "step": 5029 }, { "epoch": 0.14685702607223147, "grad_norm": 0.7088508411457521, "learning_rate": 1.8959286293592863e-05, "loss": 0.6362, "step": 5030 }, { "epoch": 0.14688622230007883, "grad_norm": 0.7300700795086091, "learning_rate": 1.8958637469586375e-05, "loss": 0.6494, "step": 5031 }, { "epoch": 0.1469154185279262, "grad_norm": 0.7201455853374, "learning_rate": 1.8957988645579887e-05, "loss": 0.6867, "step": 5032 }, { "epoch": 0.14694461475577356, "grad_norm": 0.7388489672103853, "learning_rate": 1.89573398215734e-05, "loss": 0.7861, "step": 5033 }, { "epoch": 0.14697381098362092, "grad_norm": 0.6564179653227485, "learning_rate": 1.895669099756691e-05, "loss": 0.5979, "step": 5034 }, { "epoch": 0.14700300721146828, "grad_norm": 0.784503354707212, "learning_rate": 1.8956042173560423e-05, "loss": 0.6646, "step": 5035 }, { "epoch": 0.14703220343931564, "grad_norm": 0.7212831220449776, "learning_rate": 1.8955393349553935e-05, "loss": 0.6672, "step": 5036 }, { "epoch": 0.147061399667163, "grad_norm": 0.7021876627324146, "learning_rate": 1.8954744525547447e-05, "loss": 0.6718, "step": 5037 }, { "epoch": 0.14709059589501036, "grad_norm": 0.655582237777214, "learning_rate": 1.895409570154096e-05, "loss": 0.5921, "step": 5038 }, { "epoch": 0.14711979212285772, "grad_norm": 0.7052046365654113, "learning_rate": 1.8953446877534468e-05, "loss": 0.6669, "step": 5039 }, { "epoch": 0.14714898835070508, "grad_norm": 0.6983248634278653, "learning_rate": 1.8952798053527983e-05, "loss": 0.6453, "step": 5040 }, { "epoch": 0.14717818457855245, "grad_norm": 0.7125182058405514, "learning_rate": 1.8952149229521495e-05, "loss": 0.6451, "step": 5041 }, { "epoch": 0.1472073808063998, "grad_norm": 0.6999191517493323, "learning_rate": 1.8951500405515007e-05, "loss": 0.6442, "step": 5042 }, { "epoch": 0.14723657703424717, "grad_norm": 0.7100671090316267, "learning_rate": 1.895085158150852e-05, "loss": 0.6999, "step": 5043 }, { "epoch": 0.14726577326209453, "grad_norm": 0.8004998146446298, "learning_rate": 1.895020275750203e-05, "loss": 0.6886, "step": 5044 }, { "epoch": 0.1472949694899419, "grad_norm": 0.7749835328464905, "learning_rate": 1.894955393349554e-05, "loss": 0.8135, "step": 5045 }, { "epoch": 0.14732416571778925, "grad_norm": 0.7020048263969185, "learning_rate": 1.8948905109489052e-05, "loss": 0.6768, "step": 5046 }, { "epoch": 0.1473533619456366, "grad_norm": 0.743528063568035, "learning_rate": 1.8948256285482564e-05, "loss": 0.7327, "step": 5047 }, { "epoch": 0.14738255817348397, "grad_norm": 1.0535349928944349, "learning_rate": 1.8947607461476076e-05, "loss": 0.6351, "step": 5048 }, { "epoch": 0.14741175440133134, "grad_norm": 0.6768103945519243, "learning_rate": 1.8946958637469588e-05, "loss": 0.6551, "step": 5049 }, { "epoch": 0.1474409506291787, "grad_norm": 0.8937921570923297, "learning_rate": 1.89463098134631e-05, "loss": 0.6327, "step": 5050 }, { "epoch": 0.14747014685702609, "grad_norm": 0.6976802507314396, "learning_rate": 1.8945660989456612e-05, "loss": 0.6539, "step": 5051 }, { "epoch": 0.14749934308487345, "grad_norm": 0.7518417408624573, "learning_rate": 1.8945012165450124e-05, "loss": 0.7583, "step": 5052 }, { "epoch": 0.1475285393127208, "grad_norm": 0.6794984138244659, "learning_rate": 1.8944363341443636e-05, "loss": 0.6394, "step": 5053 }, { "epoch": 0.14755773554056817, "grad_norm": 0.691380070610075, "learning_rate": 1.8943714517437145e-05, "loss": 0.6405, "step": 5054 }, { "epoch": 0.14758693176841553, "grad_norm": 0.7943273019833199, "learning_rate": 1.8943065693430657e-05, "loss": 0.7462, "step": 5055 }, { "epoch": 0.1476161279962629, "grad_norm": 0.706572578174514, "learning_rate": 1.894241686942417e-05, "loss": 0.6437, "step": 5056 }, { "epoch": 0.14764532422411025, "grad_norm": 0.647153097854874, "learning_rate": 1.894176804541768e-05, "loss": 0.6108, "step": 5057 }, { "epoch": 0.1476745204519576, "grad_norm": 0.6342101098620728, "learning_rate": 1.8941119221411193e-05, "loss": 0.6067, "step": 5058 }, { "epoch": 0.14770371667980497, "grad_norm": 0.6754765263749144, "learning_rate": 1.8940470397404705e-05, "loss": 0.6647, "step": 5059 }, { "epoch": 0.14773291290765234, "grad_norm": 0.7144689434466396, "learning_rate": 1.8939821573398217e-05, "loss": 0.6419, "step": 5060 }, { "epoch": 0.1477621091354997, "grad_norm": 0.6595979146318411, "learning_rate": 1.893917274939173e-05, "loss": 0.5964, "step": 5061 }, { "epoch": 0.14779130536334706, "grad_norm": 0.6586939056912717, "learning_rate": 1.893852392538524e-05, "loss": 0.5918, "step": 5062 }, { "epoch": 0.14782050159119442, "grad_norm": 0.7475256839745216, "learning_rate": 1.8937875101378753e-05, "loss": 0.752, "step": 5063 }, { "epoch": 0.14784969781904178, "grad_norm": 0.7406848718343362, "learning_rate": 1.8937226277372265e-05, "loss": 0.6784, "step": 5064 }, { "epoch": 0.14787889404688914, "grad_norm": 0.7420533326670277, "learning_rate": 1.8936577453365777e-05, "loss": 0.6993, "step": 5065 }, { "epoch": 0.1479080902747365, "grad_norm": 0.7004144908222164, "learning_rate": 1.893592862935929e-05, "loss": 0.6901, "step": 5066 }, { "epoch": 0.14793728650258386, "grad_norm": 0.7131380685697474, "learning_rate": 1.89352798053528e-05, "loss": 0.6351, "step": 5067 }, { "epoch": 0.14796648273043123, "grad_norm": 0.6942258884693274, "learning_rate": 1.893463098134631e-05, "loss": 0.6927, "step": 5068 }, { "epoch": 0.1479956789582786, "grad_norm": 0.7437740559877709, "learning_rate": 1.8933982157339822e-05, "loss": 0.7384, "step": 5069 }, { "epoch": 0.14802487518612595, "grad_norm": 0.6661451162311758, "learning_rate": 1.8933333333333334e-05, "loss": 0.62, "step": 5070 }, { "epoch": 0.1480540714139733, "grad_norm": 0.8364992663155699, "learning_rate": 1.8932684509326846e-05, "loss": 0.7108, "step": 5071 }, { "epoch": 0.14808326764182067, "grad_norm": 0.701858232570656, "learning_rate": 1.8932035685320358e-05, "loss": 0.632, "step": 5072 }, { "epoch": 0.14811246386966803, "grad_norm": 0.719828127977122, "learning_rate": 1.893138686131387e-05, "loss": 0.6562, "step": 5073 }, { "epoch": 0.1481416600975154, "grad_norm": 0.6946308295348333, "learning_rate": 1.8930738037307382e-05, "loss": 0.6489, "step": 5074 }, { "epoch": 0.14817085632536275, "grad_norm": 0.66334813642319, "learning_rate": 1.8930089213300894e-05, "loss": 0.6299, "step": 5075 }, { "epoch": 0.14820005255321012, "grad_norm": 0.7161224949209407, "learning_rate": 1.8929440389294406e-05, "loss": 0.7038, "step": 5076 }, { "epoch": 0.14822924878105748, "grad_norm": 0.6366072849466309, "learning_rate": 1.8928791565287915e-05, "loss": 0.6123, "step": 5077 }, { "epoch": 0.14825844500890484, "grad_norm": 0.6754796566899985, "learning_rate": 1.892814274128143e-05, "loss": 0.6387, "step": 5078 }, { "epoch": 0.1482876412367522, "grad_norm": 0.7433210807667537, "learning_rate": 1.8927493917274942e-05, "loss": 0.635, "step": 5079 }, { "epoch": 0.14831683746459956, "grad_norm": 0.6396002073079338, "learning_rate": 1.8926845093268454e-05, "loss": 0.5761, "step": 5080 }, { "epoch": 0.14834603369244695, "grad_norm": 0.7591135696793269, "learning_rate": 1.8926196269261966e-05, "loss": 0.7091, "step": 5081 }, { "epoch": 0.1483752299202943, "grad_norm": 0.7670286303315614, "learning_rate": 1.8925547445255478e-05, "loss": 0.6568, "step": 5082 }, { "epoch": 0.14840442614814167, "grad_norm": 0.784144611560037, "learning_rate": 1.8924898621248987e-05, "loss": 0.6175, "step": 5083 }, { "epoch": 0.14843362237598903, "grad_norm": 0.7386312488781884, "learning_rate": 1.89242497972425e-05, "loss": 0.6989, "step": 5084 }, { "epoch": 0.1484628186038364, "grad_norm": 0.7602854595832712, "learning_rate": 1.892360097323601e-05, "loss": 0.7401, "step": 5085 }, { "epoch": 0.14849201483168376, "grad_norm": 0.7085176745090092, "learning_rate": 1.8922952149229523e-05, "loss": 0.6958, "step": 5086 }, { "epoch": 0.14852121105953112, "grad_norm": 0.762816876382203, "learning_rate": 1.8922303325223035e-05, "loss": 0.6511, "step": 5087 }, { "epoch": 0.14855040728737848, "grad_norm": 0.6857147677785507, "learning_rate": 1.8921654501216547e-05, "loss": 0.6383, "step": 5088 }, { "epoch": 0.14857960351522584, "grad_norm": 0.7286177989670592, "learning_rate": 1.892100567721006e-05, "loss": 0.685, "step": 5089 }, { "epoch": 0.1486087997430732, "grad_norm": 0.757720055542336, "learning_rate": 1.892035685320357e-05, "loss": 0.7548, "step": 5090 }, { "epoch": 0.14863799597092056, "grad_norm": 0.687436480692838, "learning_rate": 1.8919708029197083e-05, "loss": 0.6271, "step": 5091 }, { "epoch": 0.14866719219876792, "grad_norm": 0.661789556726717, "learning_rate": 1.8919059205190592e-05, "loss": 0.5676, "step": 5092 }, { "epoch": 0.14869638842661528, "grad_norm": 0.7049543765795736, "learning_rate": 1.8918410381184104e-05, "loss": 0.704, "step": 5093 }, { "epoch": 0.14872558465446264, "grad_norm": 0.7198549093952709, "learning_rate": 1.8917761557177616e-05, "loss": 0.6117, "step": 5094 }, { "epoch": 0.14875478088231, "grad_norm": 0.6806035753991142, "learning_rate": 1.8917112733171128e-05, "loss": 0.6257, "step": 5095 }, { "epoch": 0.14878397711015737, "grad_norm": 0.6690869239941213, "learning_rate": 1.891646390916464e-05, "loss": 0.6281, "step": 5096 }, { "epoch": 0.14881317333800473, "grad_norm": 0.7185698852754554, "learning_rate": 1.8915815085158152e-05, "loss": 0.6606, "step": 5097 }, { "epoch": 0.1488423695658521, "grad_norm": 0.7480247029692142, "learning_rate": 1.8915166261151664e-05, "loss": 0.7185, "step": 5098 }, { "epoch": 0.14887156579369945, "grad_norm": 0.6801477679108009, "learning_rate": 1.8914517437145176e-05, "loss": 0.6546, "step": 5099 }, { "epoch": 0.1489007620215468, "grad_norm": 0.6941435721349585, "learning_rate": 1.8913868613138688e-05, "loss": 0.6428, "step": 5100 }, { "epoch": 0.14892995824939417, "grad_norm": 0.7086042524155677, "learning_rate": 1.89132197891322e-05, "loss": 0.6541, "step": 5101 }, { "epoch": 0.14895915447724153, "grad_norm": 0.6826500893329605, "learning_rate": 1.8912570965125712e-05, "loss": 0.6247, "step": 5102 }, { "epoch": 0.1489883507050889, "grad_norm": 0.6570111638299225, "learning_rate": 1.8911922141119224e-05, "loss": 0.6908, "step": 5103 }, { "epoch": 0.14901754693293626, "grad_norm": 0.7107517593378921, "learning_rate": 1.8911273317112736e-05, "loss": 0.6625, "step": 5104 }, { "epoch": 0.14904674316078362, "grad_norm": 0.6592095899271155, "learning_rate": 1.8910624493106248e-05, "loss": 0.6073, "step": 5105 }, { "epoch": 0.14907593938863098, "grad_norm": 0.66299973553961, "learning_rate": 1.8909975669099757e-05, "loss": 0.6135, "step": 5106 }, { "epoch": 0.14910513561647834, "grad_norm": 0.6958962871427019, "learning_rate": 1.890932684509327e-05, "loss": 0.6576, "step": 5107 }, { "epoch": 0.1491343318443257, "grad_norm": 0.7219125394046929, "learning_rate": 1.890867802108678e-05, "loss": 0.7031, "step": 5108 }, { "epoch": 0.14916352807217306, "grad_norm": 0.778444528219562, "learning_rate": 1.8908029197080293e-05, "loss": 0.6383, "step": 5109 }, { "epoch": 0.14919272430002042, "grad_norm": 1.0060076408298593, "learning_rate": 1.8907380373073805e-05, "loss": 0.8126, "step": 5110 }, { "epoch": 0.1492219205278678, "grad_norm": 0.8173409251134, "learning_rate": 1.8906731549067317e-05, "loss": 0.718, "step": 5111 }, { "epoch": 0.14925111675571517, "grad_norm": 0.7330701265396664, "learning_rate": 1.890608272506083e-05, "loss": 0.7235, "step": 5112 }, { "epoch": 0.14928031298356254, "grad_norm": 0.6363623829405551, "learning_rate": 1.890543390105434e-05, "loss": 0.53, "step": 5113 }, { "epoch": 0.1493095092114099, "grad_norm": 0.8156912804391084, "learning_rate": 1.8904785077047853e-05, "loss": 0.7747, "step": 5114 }, { "epoch": 0.14933870543925726, "grad_norm": 0.6772590551805231, "learning_rate": 1.890413625304136e-05, "loss": 0.6124, "step": 5115 }, { "epoch": 0.14936790166710462, "grad_norm": 0.649095999618417, "learning_rate": 1.8903487429034877e-05, "loss": 0.5647, "step": 5116 }, { "epoch": 0.14939709789495198, "grad_norm": 0.8060537880037487, "learning_rate": 1.890283860502839e-05, "loss": 0.7645, "step": 5117 }, { "epoch": 0.14942629412279934, "grad_norm": 0.739323459109243, "learning_rate": 1.89021897810219e-05, "loss": 0.6642, "step": 5118 }, { "epoch": 0.1494554903506467, "grad_norm": 0.7105448714699135, "learning_rate": 1.8901540957015413e-05, "loss": 0.6303, "step": 5119 }, { "epoch": 0.14948468657849406, "grad_norm": 0.6907849640946159, "learning_rate": 1.8900892133008925e-05, "loss": 0.6076, "step": 5120 }, { "epoch": 0.14951388280634142, "grad_norm": 0.6706117557462482, "learning_rate": 1.8900243309002434e-05, "loss": 0.6523, "step": 5121 }, { "epoch": 0.14954307903418879, "grad_norm": 0.75902029872242, "learning_rate": 1.8899594484995946e-05, "loss": 0.7272, "step": 5122 }, { "epoch": 0.14957227526203615, "grad_norm": 0.7351119284058515, "learning_rate": 1.8898945660989458e-05, "loss": 0.6504, "step": 5123 }, { "epoch": 0.1496014714898835, "grad_norm": 0.7767097080099953, "learning_rate": 1.889829683698297e-05, "loss": 0.671, "step": 5124 }, { "epoch": 0.14963066771773087, "grad_norm": 0.6923331559888081, "learning_rate": 1.8897648012976482e-05, "loss": 0.5603, "step": 5125 }, { "epoch": 0.14965986394557823, "grad_norm": 0.7991176746693861, "learning_rate": 1.8896999188969994e-05, "loss": 0.7632, "step": 5126 }, { "epoch": 0.1496890601734256, "grad_norm": 0.8298839770596914, "learning_rate": 1.8896350364963506e-05, "loss": 0.6384, "step": 5127 }, { "epoch": 0.14971825640127295, "grad_norm": 0.7038935920859797, "learning_rate": 1.8895701540957018e-05, "loss": 0.6914, "step": 5128 }, { "epoch": 0.14974745262912031, "grad_norm": 0.874025127872592, "learning_rate": 1.8895052716950527e-05, "loss": 0.6522, "step": 5129 }, { "epoch": 0.14977664885696768, "grad_norm": 0.7473700310201851, "learning_rate": 1.889440389294404e-05, "loss": 0.5977, "step": 5130 }, { "epoch": 0.14980584508481504, "grad_norm": 0.7355500357250417, "learning_rate": 1.889375506893755e-05, "loss": 0.7461, "step": 5131 }, { "epoch": 0.1498350413126624, "grad_norm": 0.7578728986102781, "learning_rate": 1.8893106244931063e-05, "loss": 0.6696, "step": 5132 }, { "epoch": 0.14986423754050976, "grad_norm": 0.7230307605377442, "learning_rate": 1.8892457420924575e-05, "loss": 0.6449, "step": 5133 }, { "epoch": 0.14989343376835712, "grad_norm": 0.7157874736898154, "learning_rate": 1.8891808596918087e-05, "loss": 0.6985, "step": 5134 }, { "epoch": 0.14992262999620448, "grad_norm": 0.7422086934339476, "learning_rate": 1.88911597729116e-05, "loss": 0.6432, "step": 5135 }, { "epoch": 0.14995182622405184, "grad_norm": 0.7005534254883727, "learning_rate": 1.889051094890511e-05, "loss": 0.6262, "step": 5136 }, { "epoch": 0.1499810224518992, "grad_norm": 0.6963582280021731, "learning_rate": 1.8889862124898623e-05, "loss": 0.6429, "step": 5137 }, { "epoch": 0.15001021867974657, "grad_norm": 0.7013417919183529, "learning_rate": 1.8889213300892135e-05, "loss": 0.6466, "step": 5138 }, { "epoch": 0.15003941490759393, "grad_norm": 0.74406798996621, "learning_rate": 1.8888564476885647e-05, "loss": 0.6704, "step": 5139 }, { "epoch": 0.1500686111354413, "grad_norm": 0.6593377796095009, "learning_rate": 1.888791565287916e-05, "loss": 0.6066, "step": 5140 }, { "epoch": 0.15009780736328868, "grad_norm": 0.7352156595448415, "learning_rate": 1.888726682887267e-05, "loss": 0.6313, "step": 5141 }, { "epoch": 0.15012700359113604, "grad_norm": 0.7138974347391142, "learning_rate": 1.8886618004866183e-05, "loss": 0.6422, "step": 5142 }, { "epoch": 0.1501561998189834, "grad_norm": 0.8076781478107805, "learning_rate": 1.8885969180859695e-05, "loss": 0.6261, "step": 5143 }, { "epoch": 0.15018539604683076, "grad_norm": 0.7546745065980991, "learning_rate": 1.8885320356853204e-05, "loss": 0.6124, "step": 5144 }, { "epoch": 0.15021459227467812, "grad_norm": 0.7218259851872065, "learning_rate": 1.8884671532846716e-05, "loss": 0.6682, "step": 5145 }, { "epoch": 0.15024378850252548, "grad_norm": 0.7400652056684238, "learning_rate": 1.8884022708840228e-05, "loss": 0.6856, "step": 5146 }, { "epoch": 0.15027298473037284, "grad_norm": 0.8552162319619162, "learning_rate": 1.888337388483374e-05, "loss": 0.7402, "step": 5147 }, { "epoch": 0.1503021809582202, "grad_norm": 0.7016747662516654, "learning_rate": 1.888272506082725e-05, "loss": 0.6172, "step": 5148 }, { "epoch": 0.15033137718606757, "grad_norm": 0.6848142046972693, "learning_rate": 1.8882076236820764e-05, "loss": 0.636, "step": 5149 }, { "epoch": 0.15036057341391493, "grad_norm": 0.7329737359820571, "learning_rate": 1.8881427412814276e-05, "loss": 0.6489, "step": 5150 }, { "epoch": 0.1503897696417623, "grad_norm": 0.6366710922781654, "learning_rate": 1.8880778588807788e-05, "loss": 0.6075, "step": 5151 }, { "epoch": 0.15041896586960965, "grad_norm": 0.7161348680679436, "learning_rate": 1.88801297648013e-05, "loss": 0.7024, "step": 5152 }, { "epoch": 0.150448162097457, "grad_norm": 0.7826273909382644, "learning_rate": 1.887948094079481e-05, "loss": 0.7529, "step": 5153 }, { "epoch": 0.15047735832530437, "grad_norm": 0.7203440141823458, "learning_rate": 1.8878832116788324e-05, "loss": 0.6986, "step": 5154 }, { "epoch": 0.15050655455315173, "grad_norm": 0.8336301474659135, "learning_rate": 1.8878183292781836e-05, "loss": 0.7185, "step": 5155 }, { "epoch": 0.1505357507809991, "grad_norm": 0.6992087763636994, "learning_rate": 1.8877534468775348e-05, "loss": 0.6868, "step": 5156 }, { "epoch": 0.15056494700884646, "grad_norm": 0.6631336626310005, "learning_rate": 1.887688564476886e-05, "loss": 0.6257, "step": 5157 }, { "epoch": 0.15059414323669382, "grad_norm": 0.7673032348484476, "learning_rate": 1.8876236820762372e-05, "loss": 0.6826, "step": 5158 }, { "epoch": 0.15062333946454118, "grad_norm": 0.7125104532501817, "learning_rate": 1.887558799675588e-05, "loss": 0.6677, "step": 5159 }, { "epoch": 0.15065253569238854, "grad_norm": 0.6818859107760209, "learning_rate": 1.8874939172749393e-05, "loss": 0.6396, "step": 5160 }, { "epoch": 0.1506817319202359, "grad_norm": 0.7310110091286327, "learning_rate": 1.8874290348742905e-05, "loss": 0.7176, "step": 5161 }, { "epoch": 0.15071092814808326, "grad_norm": 0.6852225111454229, "learning_rate": 1.8873641524736417e-05, "loss": 0.6619, "step": 5162 }, { "epoch": 0.15074012437593062, "grad_norm": 0.7059202696401499, "learning_rate": 1.887299270072993e-05, "loss": 0.668, "step": 5163 }, { "epoch": 0.15076932060377798, "grad_norm": 0.8464958230522771, "learning_rate": 1.887234387672344e-05, "loss": 0.7709, "step": 5164 }, { "epoch": 0.15079851683162535, "grad_norm": 0.7299018805857066, "learning_rate": 1.8871695052716953e-05, "loss": 0.7192, "step": 5165 }, { "epoch": 0.1508277130594727, "grad_norm": 0.6828818094142228, "learning_rate": 1.8871046228710465e-05, "loss": 0.6278, "step": 5166 }, { "epoch": 0.15085690928732007, "grad_norm": 0.7145701355438735, "learning_rate": 1.8870397404703973e-05, "loss": 0.6531, "step": 5167 }, { "epoch": 0.15088610551516743, "grad_norm": 0.6221629001519964, "learning_rate": 1.8869748580697485e-05, "loss": 0.5581, "step": 5168 }, { "epoch": 0.1509153017430148, "grad_norm": 0.7161932648119367, "learning_rate": 1.8869099756690997e-05, "loss": 0.6424, "step": 5169 }, { "epoch": 0.15094449797086215, "grad_norm": 0.7774155627116917, "learning_rate": 1.886845093268451e-05, "loss": 0.698, "step": 5170 }, { "epoch": 0.15097369419870954, "grad_norm": 0.7203111122327208, "learning_rate": 1.886780210867802e-05, "loss": 0.6372, "step": 5171 }, { "epoch": 0.1510028904265569, "grad_norm": 0.7636830986771903, "learning_rate": 1.8867153284671534e-05, "loss": 0.755, "step": 5172 }, { "epoch": 0.15103208665440426, "grad_norm": 0.611576903624663, "learning_rate": 1.8866504460665046e-05, "loss": 0.5306, "step": 5173 }, { "epoch": 0.15106128288225162, "grad_norm": 0.7812350785765013, "learning_rate": 1.8865855636658558e-05, "loss": 0.7456, "step": 5174 }, { "epoch": 0.15109047911009899, "grad_norm": 0.8146453002517391, "learning_rate": 1.886520681265207e-05, "loss": 0.7026, "step": 5175 }, { "epoch": 0.15111967533794635, "grad_norm": 0.7459650108253317, "learning_rate": 1.886455798864558e-05, "loss": 0.7167, "step": 5176 }, { "epoch": 0.1511488715657937, "grad_norm": 0.6419306362523485, "learning_rate": 1.8863909164639094e-05, "loss": 0.5921, "step": 5177 }, { "epoch": 0.15117806779364107, "grad_norm": 0.7539257887136962, "learning_rate": 1.8863260340632606e-05, "loss": 0.6968, "step": 5178 }, { "epoch": 0.15120726402148843, "grad_norm": 0.7513270379080242, "learning_rate": 1.8862611516626118e-05, "loss": 0.7467, "step": 5179 }, { "epoch": 0.1512364602493358, "grad_norm": 0.6436063939848886, "learning_rate": 1.886196269261963e-05, "loss": 0.6011, "step": 5180 }, { "epoch": 0.15126565647718315, "grad_norm": 0.7119534229142145, "learning_rate": 1.8861313868613142e-05, "loss": 0.6689, "step": 5181 }, { "epoch": 0.1512948527050305, "grad_norm": 0.7264287340405982, "learning_rate": 1.886066504460665e-05, "loss": 0.658, "step": 5182 }, { "epoch": 0.15132404893287787, "grad_norm": 0.8225360519662989, "learning_rate": 1.8860016220600162e-05, "loss": 0.6556, "step": 5183 }, { "epoch": 0.15135324516072524, "grad_norm": 0.727175474018974, "learning_rate": 1.8859367396593674e-05, "loss": 0.7173, "step": 5184 }, { "epoch": 0.1513824413885726, "grad_norm": 0.6390861163716816, "learning_rate": 1.8858718572587187e-05, "loss": 0.5753, "step": 5185 }, { "epoch": 0.15141163761641996, "grad_norm": 0.7242884081241602, "learning_rate": 1.88580697485807e-05, "loss": 0.7275, "step": 5186 }, { "epoch": 0.15144083384426732, "grad_norm": 0.7319682803592816, "learning_rate": 1.885742092457421e-05, "loss": 0.672, "step": 5187 }, { "epoch": 0.15147003007211468, "grad_norm": 0.6963786567244042, "learning_rate": 1.8856772100567723e-05, "loss": 0.713, "step": 5188 }, { "epoch": 0.15149922629996204, "grad_norm": 0.830760460360589, "learning_rate": 1.8856123276561235e-05, "loss": 0.7741, "step": 5189 }, { "epoch": 0.1515284225278094, "grad_norm": 0.7190020960668748, "learning_rate": 1.8855474452554747e-05, "loss": 0.7068, "step": 5190 }, { "epoch": 0.15155761875565676, "grad_norm": 0.7077217497920916, "learning_rate": 1.885482562854826e-05, "loss": 0.6252, "step": 5191 }, { "epoch": 0.15158681498350413, "grad_norm": 1.7926671847585804, "learning_rate": 1.885417680454177e-05, "loss": 0.6647, "step": 5192 }, { "epoch": 0.1516160112113515, "grad_norm": 0.7261529772301182, "learning_rate": 1.8853527980535283e-05, "loss": 0.7235, "step": 5193 }, { "epoch": 0.15164520743919885, "grad_norm": 0.7632138895408849, "learning_rate": 1.8852879156528795e-05, "loss": 0.6822, "step": 5194 }, { "epoch": 0.1516744036670462, "grad_norm": 0.7304908039651774, "learning_rate": 1.8852230332522307e-05, "loss": 0.6729, "step": 5195 }, { "epoch": 0.15170359989489357, "grad_norm": 0.9186292727863753, "learning_rate": 1.885158150851582e-05, "loss": 0.7607, "step": 5196 }, { "epoch": 0.15173279612274093, "grad_norm": 0.741356253369731, "learning_rate": 1.8850932684509327e-05, "loss": 0.6568, "step": 5197 }, { "epoch": 0.1517619923505883, "grad_norm": 0.7003642675663898, "learning_rate": 1.885028386050284e-05, "loss": 0.7191, "step": 5198 }, { "epoch": 0.15179118857843565, "grad_norm": 0.6489063356317796, "learning_rate": 1.884963503649635e-05, "loss": 0.5798, "step": 5199 }, { "epoch": 0.15182038480628302, "grad_norm": 0.7405157535896928, "learning_rate": 1.8848986212489864e-05, "loss": 0.7074, "step": 5200 }, { "epoch": 0.1518495810341304, "grad_norm": 0.8482657406536281, "learning_rate": 1.8848337388483376e-05, "loss": 0.6361, "step": 5201 }, { "epoch": 0.15187877726197777, "grad_norm": 0.7372636664853119, "learning_rate": 1.8847688564476888e-05, "loss": 0.6818, "step": 5202 }, { "epoch": 0.15190797348982513, "grad_norm": 0.6482961767693688, "learning_rate": 1.88470397404704e-05, "loss": 0.6017, "step": 5203 }, { "epoch": 0.1519371697176725, "grad_norm": 0.7039718844169954, "learning_rate": 1.884639091646391e-05, "loss": 0.6779, "step": 5204 }, { "epoch": 0.15196636594551985, "grad_norm": 0.7433873329939292, "learning_rate": 1.884574209245742e-05, "loss": 0.717, "step": 5205 }, { "epoch": 0.1519955621733672, "grad_norm": 0.6922169228344218, "learning_rate": 1.8845093268450932e-05, "loss": 0.6268, "step": 5206 }, { "epoch": 0.15202475840121457, "grad_norm": 0.6770531572679287, "learning_rate": 1.8844444444444444e-05, "loss": 0.621, "step": 5207 }, { "epoch": 0.15205395462906193, "grad_norm": 0.7479871875052242, "learning_rate": 1.8843795620437956e-05, "loss": 0.7705, "step": 5208 }, { "epoch": 0.1520831508569093, "grad_norm": 0.6772091857328242, "learning_rate": 1.884314679643147e-05, "loss": 0.6428, "step": 5209 }, { "epoch": 0.15211234708475666, "grad_norm": 0.6917598272831839, "learning_rate": 1.8842497972424984e-05, "loss": 0.6948, "step": 5210 }, { "epoch": 0.15214154331260402, "grad_norm": 0.7310040115204735, "learning_rate": 1.8841849148418492e-05, "loss": 0.7453, "step": 5211 }, { "epoch": 0.15217073954045138, "grad_norm": 0.6882438999458111, "learning_rate": 1.8841200324412004e-05, "loss": 0.6928, "step": 5212 }, { "epoch": 0.15219993576829874, "grad_norm": 0.6709172041113114, "learning_rate": 1.8840551500405517e-05, "loss": 0.6506, "step": 5213 }, { "epoch": 0.1522291319961461, "grad_norm": 0.7287020762942749, "learning_rate": 1.883990267639903e-05, "loss": 0.675, "step": 5214 }, { "epoch": 0.15225832822399346, "grad_norm": 0.645376258063916, "learning_rate": 1.883925385239254e-05, "loss": 0.5679, "step": 5215 }, { "epoch": 0.15228752445184082, "grad_norm": 0.6562756639551415, "learning_rate": 1.8838605028386053e-05, "loss": 0.6211, "step": 5216 }, { "epoch": 0.15231672067968818, "grad_norm": 0.7106654452116967, "learning_rate": 1.8837956204379565e-05, "loss": 0.6616, "step": 5217 }, { "epoch": 0.15234591690753554, "grad_norm": 0.7013480446013061, "learning_rate": 1.8837307380373077e-05, "loss": 0.6846, "step": 5218 }, { "epoch": 0.1523751131353829, "grad_norm": 0.6851003562056565, "learning_rate": 1.883665855636659e-05, "loss": 0.6286, "step": 5219 }, { "epoch": 0.15240430936323027, "grad_norm": 0.7409932450170411, "learning_rate": 1.8836009732360097e-05, "loss": 0.6846, "step": 5220 }, { "epoch": 0.15243350559107763, "grad_norm": 0.6670505537888466, "learning_rate": 1.883536090835361e-05, "loss": 0.6341, "step": 5221 }, { "epoch": 0.152462701818925, "grad_norm": 0.6741209151535578, "learning_rate": 1.883471208434712e-05, "loss": 0.656, "step": 5222 }, { "epoch": 0.15249189804677235, "grad_norm": 0.7366272098450669, "learning_rate": 1.8834063260340633e-05, "loss": 0.7473, "step": 5223 }, { "epoch": 0.1525210942746197, "grad_norm": 0.7361196874463979, "learning_rate": 1.8833414436334145e-05, "loss": 0.6848, "step": 5224 }, { "epoch": 0.15255029050246707, "grad_norm": 0.6461985922429286, "learning_rate": 1.8832765612327657e-05, "loss": 0.588, "step": 5225 }, { "epoch": 0.15257948673031443, "grad_norm": 0.682215086957738, "learning_rate": 1.883211678832117e-05, "loss": 0.6236, "step": 5226 }, { "epoch": 0.1526086829581618, "grad_norm": 0.6696444198573184, "learning_rate": 1.883146796431468e-05, "loss": 0.6314, "step": 5227 }, { "epoch": 0.15263787918600916, "grad_norm": 0.724292872529758, "learning_rate": 1.8830819140308194e-05, "loss": 0.7046, "step": 5228 }, { "epoch": 0.15266707541385652, "grad_norm": 0.708455232017202, "learning_rate": 1.8830170316301706e-05, "loss": 0.6534, "step": 5229 }, { "epoch": 0.15269627164170388, "grad_norm": 0.667150279662475, "learning_rate": 1.8829521492295218e-05, "loss": 0.6645, "step": 5230 }, { "epoch": 0.15272546786955124, "grad_norm": 0.6675325085466671, "learning_rate": 1.882887266828873e-05, "loss": 0.6136, "step": 5231 }, { "epoch": 0.15275466409739863, "grad_norm": 0.70594120962712, "learning_rate": 1.882822384428224e-05, "loss": 0.672, "step": 5232 }, { "epoch": 0.152783860325246, "grad_norm": 0.6660992565144258, "learning_rate": 1.8827575020275754e-05, "loss": 0.6026, "step": 5233 }, { "epoch": 0.15281305655309335, "grad_norm": 0.695171818641851, "learning_rate": 1.8826926196269262e-05, "loss": 0.6612, "step": 5234 }, { "epoch": 0.1528422527809407, "grad_norm": 0.661829220817637, "learning_rate": 1.8826277372262774e-05, "loss": 0.5782, "step": 5235 }, { "epoch": 0.15287144900878807, "grad_norm": 0.7108467548779087, "learning_rate": 1.8825628548256286e-05, "loss": 0.705, "step": 5236 }, { "epoch": 0.15290064523663544, "grad_norm": 0.9794682869692547, "learning_rate": 1.88249797242498e-05, "loss": 0.7462, "step": 5237 }, { "epoch": 0.1529298414644828, "grad_norm": 0.7023293201582684, "learning_rate": 1.882433090024331e-05, "loss": 0.6325, "step": 5238 }, { "epoch": 0.15295903769233016, "grad_norm": 0.7420809993472837, "learning_rate": 1.8823682076236822e-05, "loss": 0.6965, "step": 5239 }, { "epoch": 0.15298823392017752, "grad_norm": 0.6635778549329011, "learning_rate": 1.8823033252230334e-05, "loss": 0.5898, "step": 5240 }, { "epoch": 0.15301743014802488, "grad_norm": 0.7453193782631887, "learning_rate": 1.8822384428223846e-05, "loss": 0.7615, "step": 5241 }, { "epoch": 0.15304662637587224, "grad_norm": 0.6656162021166272, "learning_rate": 1.882173560421736e-05, "loss": 0.627, "step": 5242 }, { "epoch": 0.1530758226037196, "grad_norm": 0.6805305565517457, "learning_rate": 1.8821086780210867e-05, "loss": 0.6307, "step": 5243 }, { "epoch": 0.15310501883156696, "grad_norm": 0.6211921859146169, "learning_rate": 1.882043795620438e-05, "loss": 0.5812, "step": 5244 }, { "epoch": 0.15313421505941432, "grad_norm": 0.864075214200047, "learning_rate": 1.881978913219789e-05, "loss": 0.8029, "step": 5245 }, { "epoch": 0.15316341128726169, "grad_norm": 0.6809566794568737, "learning_rate": 1.8819140308191403e-05, "loss": 0.692, "step": 5246 }, { "epoch": 0.15319260751510905, "grad_norm": 0.6976159368020072, "learning_rate": 1.8818491484184915e-05, "loss": 0.6527, "step": 5247 }, { "epoch": 0.1532218037429564, "grad_norm": 0.7600971793427505, "learning_rate": 1.881784266017843e-05, "loss": 0.7703, "step": 5248 }, { "epoch": 0.15325099997080377, "grad_norm": 0.7679421645256297, "learning_rate": 1.881719383617194e-05, "loss": 0.6444, "step": 5249 }, { "epoch": 0.15328019619865113, "grad_norm": 0.6598942210298334, "learning_rate": 1.881654501216545e-05, "loss": 0.6504, "step": 5250 }, { "epoch": 0.1533093924264985, "grad_norm": 0.6208426277732138, "learning_rate": 1.8815896188158963e-05, "loss": 0.5654, "step": 5251 }, { "epoch": 0.15333858865434585, "grad_norm": 0.8196097723844161, "learning_rate": 1.8815247364152475e-05, "loss": 0.86, "step": 5252 }, { "epoch": 0.15336778488219321, "grad_norm": 0.7306600770829283, "learning_rate": 1.8814598540145987e-05, "loss": 0.6709, "step": 5253 }, { "epoch": 0.15339698111004058, "grad_norm": 0.7330968729332387, "learning_rate": 1.88139497161395e-05, "loss": 0.7225, "step": 5254 }, { "epoch": 0.15342617733788794, "grad_norm": 0.7250972883487428, "learning_rate": 1.881330089213301e-05, "loss": 0.7154, "step": 5255 }, { "epoch": 0.1534553735657353, "grad_norm": 0.6863646783349491, "learning_rate": 1.8812652068126524e-05, "loss": 0.6362, "step": 5256 }, { "epoch": 0.15348456979358266, "grad_norm": 0.650343253645391, "learning_rate": 1.8812003244120036e-05, "loss": 0.5681, "step": 5257 }, { "epoch": 0.15351376602143002, "grad_norm": 0.7229478561821161, "learning_rate": 1.8811354420113544e-05, "loss": 0.5867, "step": 5258 }, { "epoch": 0.15354296224927738, "grad_norm": 0.7038177112315472, "learning_rate": 1.8810705596107056e-05, "loss": 0.6584, "step": 5259 }, { "epoch": 0.15357215847712474, "grad_norm": 0.7287047635494286, "learning_rate": 1.8810056772100568e-05, "loss": 0.6869, "step": 5260 }, { "epoch": 0.1536013547049721, "grad_norm": 0.7908308671509945, "learning_rate": 1.880940794809408e-05, "loss": 0.7746, "step": 5261 }, { "epoch": 0.1536305509328195, "grad_norm": 0.7533368685316356, "learning_rate": 1.8808759124087592e-05, "loss": 0.7216, "step": 5262 }, { "epoch": 0.15365974716066685, "grad_norm": 0.7928729157323301, "learning_rate": 1.8808110300081104e-05, "loss": 0.7472, "step": 5263 }, { "epoch": 0.15368894338851422, "grad_norm": 0.7784881735976734, "learning_rate": 1.8807461476074616e-05, "loss": 0.747, "step": 5264 }, { "epoch": 0.15371813961636158, "grad_norm": 0.7939521432165451, "learning_rate": 1.880681265206813e-05, "loss": 0.8153, "step": 5265 }, { "epoch": 0.15374733584420894, "grad_norm": 0.7168689943071933, "learning_rate": 1.880616382806164e-05, "loss": 0.6753, "step": 5266 }, { "epoch": 0.1537765320720563, "grad_norm": 0.6610154170012889, "learning_rate": 1.8805515004055152e-05, "loss": 0.578, "step": 5267 }, { "epoch": 0.15380572829990366, "grad_norm": 0.7972426591028761, "learning_rate": 1.8804866180048664e-05, "loss": 0.7929, "step": 5268 }, { "epoch": 0.15383492452775102, "grad_norm": 0.7021831785979716, "learning_rate": 1.8804217356042176e-05, "loss": 0.6573, "step": 5269 }, { "epoch": 0.15386412075559838, "grad_norm": 0.6279174491297138, "learning_rate": 1.880356853203569e-05, "loss": 0.5639, "step": 5270 }, { "epoch": 0.15389331698344574, "grad_norm": 0.6817325097580922, "learning_rate": 1.88029197080292e-05, "loss": 0.6494, "step": 5271 }, { "epoch": 0.1539225132112931, "grad_norm": 1.3129794458619037, "learning_rate": 1.880227088402271e-05, "loss": 0.6694, "step": 5272 }, { "epoch": 0.15395170943914047, "grad_norm": 0.676578471383673, "learning_rate": 1.880162206001622e-05, "loss": 0.6383, "step": 5273 }, { "epoch": 0.15398090566698783, "grad_norm": 0.6399335759036431, "learning_rate": 1.8800973236009733e-05, "loss": 0.5829, "step": 5274 }, { "epoch": 0.1540101018948352, "grad_norm": 0.6577264228544677, "learning_rate": 1.8800324412003245e-05, "loss": 0.6004, "step": 5275 }, { "epoch": 0.15403929812268255, "grad_norm": 0.7273945743101051, "learning_rate": 1.8799675587996757e-05, "loss": 0.7341, "step": 5276 }, { "epoch": 0.1540684943505299, "grad_norm": 0.6469849623754559, "learning_rate": 1.879902676399027e-05, "loss": 0.5957, "step": 5277 }, { "epoch": 0.15409769057837727, "grad_norm": 0.7360526930079598, "learning_rate": 1.879837793998378e-05, "loss": 0.598, "step": 5278 }, { "epoch": 0.15412688680622463, "grad_norm": 0.7880208207954541, "learning_rate": 1.8797729115977293e-05, "loss": 0.6903, "step": 5279 }, { "epoch": 0.154156083034072, "grad_norm": 0.9181857056345463, "learning_rate": 1.8797080291970805e-05, "loss": 0.6939, "step": 5280 }, { "epoch": 0.15418527926191936, "grad_norm": 0.7858643132106305, "learning_rate": 1.8796431467964314e-05, "loss": 0.762, "step": 5281 }, { "epoch": 0.15421447548976672, "grad_norm": 0.7193278093954658, "learning_rate": 1.8795782643957826e-05, "loss": 0.7086, "step": 5282 }, { "epoch": 0.15424367171761408, "grad_norm": 0.7250566814568037, "learning_rate": 1.8795133819951338e-05, "loss": 0.6545, "step": 5283 }, { "epoch": 0.15427286794546144, "grad_norm": 0.6687054537410407, "learning_rate": 1.879448499594485e-05, "loss": 0.6344, "step": 5284 }, { "epoch": 0.1543020641733088, "grad_norm": 0.7424776861892795, "learning_rate": 1.8793836171938362e-05, "loss": 0.6963, "step": 5285 }, { "epoch": 0.15433126040115616, "grad_norm": 0.7579610969438164, "learning_rate": 1.8793187347931878e-05, "loss": 0.7017, "step": 5286 }, { "epoch": 0.15436045662900352, "grad_norm": 0.7329704992641244, "learning_rate": 1.8792538523925386e-05, "loss": 0.6966, "step": 5287 }, { "epoch": 0.15438965285685088, "grad_norm": 0.7522053765195168, "learning_rate": 1.8791889699918898e-05, "loss": 0.6528, "step": 5288 }, { "epoch": 0.15441884908469825, "grad_norm": 0.7088619440467904, "learning_rate": 1.879124087591241e-05, "loss": 0.6656, "step": 5289 }, { "epoch": 0.1544480453125456, "grad_norm": 0.6698036314786514, "learning_rate": 1.8790592051905922e-05, "loss": 0.6209, "step": 5290 }, { "epoch": 0.15447724154039297, "grad_norm": 0.6738499108865378, "learning_rate": 1.8789943227899434e-05, "loss": 0.6345, "step": 5291 }, { "epoch": 0.15450643776824036, "grad_norm": 0.7410341882272642, "learning_rate": 1.8789294403892946e-05, "loss": 0.7241, "step": 5292 }, { "epoch": 0.15453563399608772, "grad_norm": 0.6430584844995548, "learning_rate": 1.878864557988646e-05, "loss": 0.607, "step": 5293 }, { "epoch": 0.15456483022393508, "grad_norm": 0.730958568270563, "learning_rate": 1.878799675587997e-05, "loss": 0.7286, "step": 5294 }, { "epoch": 0.15459402645178244, "grad_norm": 0.6737094833631924, "learning_rate": 1.8787347931873482e-05, "loss": 0.6086, "step": 5295 }, { "epoch": 0.1546232226796298, "grad_norm": 0.6666018836704306, "learning_rate": 1.878669910786699e-05, "loss": 0.6183, "step": 5296 }, { "epoch": 0.15465241890747716, "grad_norm": 0.6918973441674234, "learning_rate": 1.8786050283860503e-05, "loss": 0.5981, "step": 5297 }, { "epoch": 0.15468161513532452, "grad_norm": 0.6176218273607011, "learning_rate": 1.8785401459854015e-05, "loss": 0.5785, "step": 5298 }, { "epoch": 0.15471081136317189, "grad_norm": 0.6826828060619716, "learning_rate": 1.8784752635847527e-05, "loss": 0.6921, "step": 5299 }, { "epoch": 0.15474000759101925, "grad_norm": 0.7590287493334485, "learning_rate": 1.878410381184104e-05, "loss": 0.7104, "step": 5300 }, { "epoch": 0.1547692038188666, "grad_norm": 0.7948890775120286, "learning_rate": 1.878345498783455e-05, "loss": 0.7533, "step": 5301 }, { "epoch": 0.15479840004671397, "grad_norm": 0.6265190722454642, "learning_rate": 1.8782806163828063e-05, "loss": 0.5801, "step": 5302 }, { "epoch": 0.15482759627456133, "grad_norm": 0.7008276417537499, "learning_rate": 1.8782157339821575e-05, "loss": 0.7457, "step": 5303 }, { "epoch": 0.1548567925024087, "grad_norm": 0.8929214167599574, "learning_rate": 1.8781508515815087e-05, "loss": 0.6531, "step": 5304 }, { "epoch": 0.15488598873025605, "grad_norm": 0.6847236135658843, "learning_rate": 1.87808596918086e-05, "loss": 0.6607, "step": 5305 }, { "epoch": 0.1549151849581034, "grad_norm": 0.6242435265543037, "learning_rate": 1.878021086780211e-05, "loss": 0.5452, "step": 5306 }, { "epoch": 0.15494438118595077, "grad_norm": 0.7499344464779136, "learning_rate": 1.8779562043795623e-05, "loss": 0.695, "step": 5307 }, { "epoch": 0.15497357741379814, "grad_norm": 0.7293421799070215, "learning_rate": 1.8778913219789135e-05, "loss": 0.6723, "step": 5308 }, { "epoch": 0.1550027736416455, "grad_norm": 0.7337633079862607, "learning_rate": 1.8778264395782647e-05, "loss": 0.7175, "step": 5309 }, { "epoch": 0.15503196986949286, "grad_norm": 0.6805839444291858, "learning_rate": 1.8777615571776156e-05, "loss": 0.6563, "step": 5310 }, { "epoch": 0.15506116609734022, "grad_norm": 0.8957208268694714, "learning_rate": 1.8776966747769668e-05, "loss": 0.7762, "step": 5311 }, { "epoch": 0.15509036232518758, "grad_norm": 0.7317627196601022, "learning_rate": 1.877631792376318e-05, "loss": 0.6738, "step": 5312 }, { "epoch": 0.15511955855303494, "grad_norm": 0.671183209988102, "learning_rate": 1.8775669099756692e-05, "loss": 0.6303, "step": 5313 }, { "epoch": 0.1551487547808823, "grad_norm": 0.6871615831074752, "learning_rate": 1.8775020275750204e-05, "loss": 0.5927, "step": 5314 }, { "epoch": 0.15517795100872966, "grad_norm": 0.6502508698054226, "learning_rate": 1.8774371451743716e-05, "loss": 0.6035, "step": 5315 }, { "epoch": 0.15520714723657703, "grad_norm": 0.6899031000725149, "learning_rate": 1.8773722627737228e-05, "loss": 0.6521, "step": 5316 }, { "epoch": 0.1552363434644244, "grad_norm": 0.8643689975043346, "learning_rate": 1.877307380373074e-05, "loss": 0.7518, "step": 5317 }, { "epoch": 0.15526553969227175, "grad_norm": 0.6786924406119126, "learning_rate": 1.8772424979724252e-05, "loss": 0.6834, "step": 5318 }, { "epoch": 0.1552947359201191, "grad_norm": 0.7465423958451617, "learning_rate": 1.877177615571776e-05, "loss": 0.6385, "step": 5319 }, { "epoch": 0.15532393214796647, "grad_norm": 0.6510549386899048, "learning_rate": 1.8771127331711273e-05, "loss": 0.571, "step": 5320 }, { "epoch": 0.15535312837581383, "grad_norm": 0.6480009495093009, "learning_rate": 1.8770478507704785e-05, "loss": 0.6004, "step": 5321 }, { "epoch": 0.15538232460366122, "grad_norm": 0.6938072970380581, "learning_rate": 1.8769829683698297e-05, "loss": 0.658, "step": 5322 }, { "epoch": 0.15541152083150858, "grad_norm": 0.7246968738480953, "learning_rate": 1.876918085969181e-05, "loss": 0.6538, "step": 5323 }, { "epoch": 0.15544071705935594, "grad_norm": 0.6923352114384074, "learning_rate": 1.8768532035685324e-05, "loss": 0.6505, "step": 5324 }, { "epoch": 0.1554699132872033, "grad_norm": 0.6713255550632654, "learning_rate": 1.8767883211678833e-05, "loss": 0.6661, "step": 5325 }, { "epoch": 0.15549910951505067, "grad_norm": 0.8152853783100361, "learning_rate": 1.8767234387672345e-05, "loss": 0.7441, "step": 5326 }, { "epoch": 0.15552830574289803, "grad_norm": 0.7887171150613861, "learning_rate": 1.8766585563665857e-05, "loss": 0.8301, "step": 5327 }, { "epoch": 0.1555575019707454, "grad_norm": 0.7341868625270865, "learning_rate": 1.876593673965937e-05, "loss": 0.6898, "step": 5328 }, { "epoch": 0.15558669819859275, "grad_norm": 0.743380693344587, "learning_rate": 1.876528791565288e-05, "loss": 0.6892, "step": 5329 }, { "epoch": 0.1556158944264401, "grad_norm": 0.710766911325418, "learning_rate": 1.8764639091646393e-05, "loss": 0.6705, "step": 5330 }, { "epoch": 0.15564509065428747, "grad_norm": 0.7531404579204326, "learning_rate": 1.8763990267639905e-05, "loss": 0.7758, "step": 5331 }, { "epoch": 0.15567428688213483, "grad_norm": 0.7085038741035089, "learning_rate": 1.8763341443633417e-05, "loss": 0.7272, "step": 5332 }, { "epoch": 0.1557034831099822, "grad_norm": 0.7457117029797568, "learning_rate": 1.876269261962693e-05, "loss": 0.7022, "step": 5333 }, { "epoch": 0.15573267933782955, "grad_norm": 0.6943326648532112, "learning_rate": 1.8762043795620438e-05, "loss": 0.6917, "step": 5334 }, { "epoch": 0.15576187556567692, "grad_norm": 0.7581601803926299, "learning_rate": 1.876139497161395e-05, "loss": 0.71, "step": 5335 }, { "epoch": 0.15579107179352428, "grad_norm": 0.794612993295526, "learning_rate": 1.8760746147607462e-05, "loss": 0.7098, "step": 5336 }, { "epoch": 0.15582026802137164, "grad_norm": 0.6351935642869163, "learning_rate": 1.8760097323600974e-05, "loss": 0.6335, "step": 5337 }, { "epoch": 0.155849464249219, "grad_norm": 0.6932468221594319, "learning_rate": 1.8759448499594486e-05, "loss": 0.7056, "step": 5338 }, { "epoch": 0.15587866047706636, "grad_norm": 0.6191698889816388, "learning_rate": 1.8758799675587998e-05, "loss": 0.5826, "step": 5339 }, { "epoch": 0.15590785670491372, "grad_norm": 0.689661716919197, "learning_rate": 1.875815085158151e-05, "loss": 0.579, "step": 5340 }, { "epoch": 0.15593705293276108, "grad_norm": 0.779245839863587, "learning_rate": 1.8757502027575022e-05, "loss": 0.6617, "step": 5341 }, { "epoch": 0.15596624916060844, "grad_norm": 0.7068198636659574, "learning_rate": 1.8756853203568534e-05, "loss": 0.699, "step": 5342 }, { "epoch": 0.1559954453884558, "grad_norm": 0.8092326202806108, "learning_rate": 1.8756204379562046e-05, "loss": 0.7392, "step": 5343 }, { "epoch": 0.15602464161630317, "grad_norm": 0.742135045726725, "learning_rate": 1.8755555555555558e-05, "loss": 0.7172, "step": 5344 }, { "epoch": 0.15605383784415053, "grad_norm": 0.6938267954345319, "learning_rate": 1.875490673154907e-05, "loss": 0.6442, "step": 5345 }, { "epoch": 0.1560830340719979, "grad_norm": 0.72683029257473, "learning_rate": 1.8754257907542582e-05, "loss": 0.6796, "step": 5346 }, { "epoch": 0.15611223029984525, "grad_norm": 0.763778349491718, "learning_rate": 1.8753609083536094e-05, "loss": 0.737, "step": 5347 }, { "epoch": 0.1561414265276926, "grad_norm": 0.6658983622364992, "learning_rate": 1.8752960259529603e-05, "loss": 0.5728, "step": 5348 }, { "epoch": 0.15617062275553997, "grad_norm": 0.7405320593686097, "learning_rate": 1.8752311435523115e-05, "loss": 0.5815, "step": 5349 }, { "epoch": 0.15619981898338733, "grad_norm": 0.7033755754337391, "learning_rate": 1.8751662611516627e-05, "loss": 0.6781, "step": 5350 }, { "epoch": 0.1562290152112347, "grad_norm": 0.6982818733380098, "learning_rate": 1.875101378751014e-05, "loss": 0.6671, "step": 5351 }, { "epoch": 0.15625821143908208, "grad_norm": 0.8324835271195745, "learning_rate": 1.875036496350365e-05, "loss": 0.7168, "step": 5352 }, { "epoch": 0.15628740766692945, "grad_norm": 0.6895595152097099, "learning_rate": 1.8749716139497163e-05, "loss": 0.6498, "step": 5353 }, { "epoch": 0.1563166038947768, "grad_norm": 0.6397876383162321, "learning_rate": 1.8749067315490675e-05, "loss": 0.5817, "step": 5354 }, { "epoch": 0.15634580012262417, "grad_norm": 0.679421179904403, "learning_rate": 1.8748418491484187e-05, "loss": 0.6115, "step": 5355 }, { "epoch": 0.15637499635047153, "grad_norm": 0.7387352207192405, "learning_rate": 1.87477696674777e-05, "loss": 0.6689, "step": 5356 }, { "epoch": 0.1564041925783189, "grad_norm": 0.6811691783081625, "learning_rate": 1.8747120843471208e-05, "loss": 0.6175, "step": 5357 }, { "epoch": 0.15643338880616625, "grad_norm": 0.758701062794084, "learning_rate": 1.874647201946472e-05, "loss": 0.7455, "step": 5358 }, { "epoch": 0.1564625850340136, "grad_norm": 0.691373445245175, "learning_rate": 1.8745823195458232e-05, "loss": 0.6481, "step": 5359 }, { "epoch": 0.15649178126186097, "grad_norm": 0.7080010023501515, "learning_rate": 1.8745174371451744e-05, "loss": 0.6656, "step": 5360 }, { "epoch": 0.15652097748970834, "grad_norm": 0.7192822362996664, "learning_rate": 1.874452554744526e-05, "loss": 0.684, "step": 5361 }, { "epoch": 0.1565501737175557, "grad_norm": 0.6808474243411019, "learning_rate": 1.874387672343877e-05, "loss": 0.6445, "step": 5362 }, { "epoch": 0.15657936994540306, "grad_norm": 0.7632040608237604, "learning_rate": 1.874322789943228e-05, "loss": 0.6853, "step": 5363 }, { "epoch": 0.15660856617325042, "grad_norm": 0.7110951781300704, "learning_rate": 1.8742579075425792e-05, "loss": 0.676, "step": 5364 }, { "epoch": 0.15663776240109778, "grad_norm": 0.6427332503956162, "learning_rate": 1.8741930251419304e-05, "loss": 0.5742, "step": 5365 }, { "epoch": 0.15666695862894514, "grad_norm": 0.6855862832923, "learning_rate": 1.8741281427412816e-05, "loss": 0.5891, "step": 5366 }, { "epoch": 0.1566961548567925, "grad_norm": 0.6875986521509101, "learning_rate": 1.8740632603406328e-05, "loss": 0.7098, "step": 5367 }, { "epoch": 0.15672535108463986, "grad_norm": 0.6116500307282159, "learning_rate": 1.873998377939984e-05, "loss": 0.5421, "step": 5368 }, { "epoch": 0.15675454731248722, "grad_norm": 0.7659762680696329, "learning_rate": 1.8739334955393352e-05, "loss": 0.8259, "step": 5369 }, { "epoch": 0.15678374354033459, "grad_norm": 0.7227351030144493, "learning_rate": 1.8738686131386864e-05, "loss": 0.6775, "step": 5370 }, { "epoch": 0.15681293976818195, "grad_norm": 0.6993615862717063, "learning_rate": 1.8738037307380376e-05, "loss": 0.6306, "step": 5371 }, { "epoch": 0.1568421359960293, "grad_norm": 0.6359197441650876, "learning_rate": 1.8737388483373885e-05, "loss": 0.5375, "step": 5372 }, { "epoch": 0.15687133222387667, "grad_norm": 0.6876903878487391, "learning_rate": 1.8736739659367397e-05, "loss": 0.6921, "step": 5373 }, { "epoch": 0.15690052845172403, "grad_norm": 0.6815772821627747, "learning_rate": 1.873609083536091e-05, "loss": 0.6291, "step": 5374 }, { "epoch": 0.1569297246795714, "grad_norm": 0.7485000622542856, "learning_rate": 1.873544201135442e-05, "loss": 0.7353, "step": 5375 }, { "epoch": 0.15695892090741875, "grad_norm": 0.6758735130722143, "learning_rate": 1.8734793187347933e-05, "loss": 0.5658, "step": 5376 }, { "epoch": 0.15698811713526611, "grad_norm": 0.679111859384469, "learning_rate": 1.8734144363341445e-05, "loss": 0.669, "step": 5377 }, { "epoch": 0.15701731336311348, "grad_norm": 0.7167247945740938, "learning_rate": 1.8733495539334957e-05, "loss": 0.6493, "step": 5378 }, { "epoch": 0.15704650959096084, "grad_norm": 0.6820251171800155, "learning_rate": 1.873284671532847e-05, "loss": 0.6397, "step": 5379 }, { "epoch": 0.1570757058188082, "grad_norm": 0.7557436697564548, "learning_rate": 1.873219789132198e-05, "loss": 0.7088, "step": 5380 }, { "epoch": 0.15710490204665556, "grad_norm": 0.6696263153276898, "learning_rate": 1.8731549067315493e-05, "loss": 0.6269, "step": 5381 }, { "epoch": 0.15713409827450295, "grad_norm": 0.7163205834247884, "learning_rate": 1.8730900243309005e-05, "loss": 0.6769, "step": 5382 }, { "epoch": 0.1571632945023503, "grad_norm": 0.7861238814212597, "learning_rate": 1.8730251419302517e-05, "loss": 0.6975, "step": 5383 }, { "epoch": 0.15719249073019767, "grad_norm": 0.7248862616684428, "learning_rate": 1.872960259529603e-05, "loss": 0.644, "step": 5384 }, { "epoch": 0.15722168695804503, "grad_norm": 0.6556509494043109, "learning_rate": 1.872895377128954e-05, "loss": 0.5898, "step": 5385 }, { "epoch": 0.1572508831858924, "grad_norm": 0.7241914477623765, "learning_rate": 1.872830494728305e-05, "loss": 0.6666, "step": 5386 }, { "epoch": 0.15728007941373975, "grad_norm": 0.687441760233005, "learning_rate": 1.8727656123276562e-05, "loss": 0.6546, "step": 5387 }, { "epoch": 0.15730927564158712, "grad_norm": 0.7036967082346306, "learning_rate": 1.8727007299270074e-05, "loss": 0.6799, "step": 5388 }, { "epoch": 0.15733847186943448, "grad_norm": 0.7128079632550604, "learning_rate": 1.8726358475263586e-05, "loss": 0.68, "step": 5389 }, { "epoch": 0.15736766809728184, "grad_norm": 0.7164521680603938, "learning_rate": 1.8725709651257098e-05, "loss": 0.6667, "step": 5390 }, { "epoch": 0.1573968643251292, "grad_norm": 0.679549700600863, "learning_rate": 1.872506082725061e-05, "loss": 0.6249, "step": 5391 }, { "epoch": 0.15742606055297656, "grad_norm": 0.73212082745476, "learning_rate": 1.8724412003244122e-05, "loss": 0.6451, "step": 5392 }, { "epoch": 0.15745525678082392, "grad_norm": 0.7683201212407595, "learning_rate": 1.8723763179237634e-05, "loss": 0.6923, "step": 5393 }, { "epoch": 0.15748445300867128, "grad_norm": 0.7135126353474953, "learning_rate": 1.8723114355231146e-05, "loss": 0.7048, "step": 5394 }, { "epoch": 0.15751364923651864, "grad_norm": 0.6530210780584175, "learning_rate": 1.8722465531224655e-05, "loss": 0.5324, "step": 5395 }, { "epoch": 0.157542845464366, "grad_norm": 0.7917229201621894, "learning_rate": 1.8721816707218167e-05, "loss": 0.7004, "step": 5396 }, { "epoch": 0.15757204169221337, "grad_norm": 0.6787999845727942, "learning_rate": 1.872116788321168e-05, "loss": 0.6774, "step": 5397 }, { "epoch": 0.15760123792006073, "grad_norm": 0.6987053883776515, "learning_rate": 1.872051905920519e-05, "loss": 0.6456, "step": 5398 }, { "epoch": 0.1576304341479081, "grad_norm": 0.7483629478728231, "learning_rate": 1.8719870235198706e-05, "loss": 0.7141, "step": 5399 }, { "epoch": 0.15765963037575545, "grad_norm": 0.6957398784469776, "learning_rate": 1.8719221411192218e-05, "loss": 0.6676, "step": 5400 }, { "epoch": 0.1576888266036028, "grad_norm": 0.7644785439704859, "learning_rate": 1.8718572587185727e-05, "loss": 0.7817, "step": 5401 }, { "epoch": 0.15771802283145017, "grad_norm": 0.6741734826971916, "learning_rate": 1.871792376317924e-05, "loss": 0.629, "step": 5402 }, { "epoch": 0.15774721905929753, "grad_norm": 0.6787817234104121, "learning_rate": 1.871727493917275e-05, "loss": 0.625, "step": 5403 }, { "epoch": 0.1577764152871449, "grad_norm": 0.6977025230750928, "learning_rate": 1.8716626115166263e-05, "loss": 0.5886, "step": 5404 }, { "epoch": 0.15780561151499226, "grad_norm": 0.6860735660538636, "learning_rate": 1.8715977291159775e-05, "loss": 0.6304, "step": 5405 }, { "epoch": 0.15783480774283962, "grad_norm": 0.7914109223020735, "learning_rate": 1.8715328467153287e-05, "loss": 0.7546, "step": 5406 }, { "epoch": 0.15786400397068698, "grad_norm": 0.9497123490365318, "learning_rate": 1.87146796431468e-05, "loss": 0.745, "step": 5407 }, { "epoch": 0.15789320019853434, "grad_norm": 0.7227312042570624, "learning_rate": 1.871403081914031e-05, "loss": 0.7001, "step": 5408 }, { "epoch": 0.1579223964263817, "grad_norm": 0.764443388426537, "learning_rate": 1.8713381995133823e-05, "loss": 0.7467, "step": 5409 }, { "epoch": 0.15795159265422906, "grad_norm": 0.621630165406753, "learning_rate": 1.871273317112733e-05, "loss": 0.5379, "step": 5410 }, { "epoch": 0.15798078888207642, "grad_norm": 0.6755355027290033, "learning_rate": 1.8712084347120844e-05, "loss": 0.6499, "step": 5411 }, { "epoch": 0.15800998510992378, "grad_norm": 0.6872084469028223, "learning_rate": 1.8711435523114356e-05, "loss": 0.6468, "step": 5412 }, { "epoch": 0.15803918133777117, "grad_norm": 0.8014650083703913, "learning_rate": 1.8710786699107868e-05, "loss": 0.6892, "step": 5413 }, { "epoch": 0.15806837756561853, "grad_norm": 0.7180416908253582, "learning_rate": 1.871013787510138e-05, "loss": 0.6132, "step": 5414 }, { "epoch": 0.1580975737934659, "grad_norm": 0.7294943936096873, "learning_rate": 1.8709489051094892e-05, "loss": 0.7054, "step": 5415 }, { "epoch": 0.15812677002131326, "grad_norm": 0.7192157420433977, "learning_rate": 1.8708840227088404e-05, "loss": 0.6739, "step": 5416 }, { "epoch": 0.15815596624916062, "grad_norm": 0.704846884351212, "learning_rate": 1.8708191403081916e-05, "loss": 0.629, "step": 5417 }, { "epoch": 0.15818516247700798, "grad_norm": 0.6640493261570308, "learning_rate": 1.8707542579075428e-05, "loss": 0.6486, "step": 5418 }, { "epoch": 0.15821435870485534, "grad_norm": 0.7784073081361103, "learning_rate": 1.870689375506894e-05, "loss": 0.7282, "step": 5419 }, { "epoch": 0.1582435549327027, "grad_norm": 0.681893257237743, "learning_rate": 1.8706244931062452e-05, "loss": 0.655, "step": 5420 }, { "epoch": 0.15827275116055006, "grad_norm": 0.8474020868233211, "learning_rate": 1.8705596107055964e-05, "loss": 0.6641, "step": 5421 }, { "epoch": 0.15830194738839742, "grad_norm": 0.7632890673178406, "learning_rate": 1.8704947283049476e-05, "loss": 0.7264, "step": 5422 }, { "epoch": 0.15833114361624478, "grad_norm": 0.7263554795784225, "learning_rate": 1.8704298459042988e-05, "loss": 0.7508, "step": 5423 }, { "epoch": 0.15836033984409215, "grad_norm": 0.7909414019758161, "learning_rate": 1.8703649635036497e-05, "loss": 0.7273, "step": 5424 }, { "epoch": 0.1583895360719395, "grad_norm": 0.6799492048613032, "learning_rate": 1.870300081103001e-05, "loss": 0.6542, "step": 5425 }, { "epoch": 0.15841873229978687, "grad_norm": 0.6931877874744585, "learning_rate": 1.870235198702352e-05, "loss": 0.6394, "step": 5426 }, { "epoch": 0.15844792852763423, "grad_norm": 0.6773410340160582, "learning_rate": 1.8701703163017033e-05, "loss": 0.6771, "step": 5427 }, { "epoch": 0.1584771247554816, "grad_norm": 0.6461504429156257, "learning_rate": 1.8701054339010545e-05, "loss": 0.6, "step": 5428 }, { "epoch": 0.15850632098332895, "grad_norm": 0.7116815714193953, "learning_rate": 1.8700405515004057e-05, "loss": 0.6302, "step": 5429 }, { "epoch": 0.1585355172111763, "grad_norm": 0.6644979150548398, "learning_rate": 1.869975669099757e-05, "loss": 0.617, "step": 5430 }, { "epoch": 0.15856471343902367, "grad_norm": 0.6543761222124336, "learning_rate": 1.869910786699108e-05, "loss": 0.6246, "step": 5431 }, { "epoch": 0.15859390966687104, "grad_norm": 0.7238180462086704, "learning_rate": 1.8698459042984593e-05, "loss": 0.6617, "step": 5432 }, { "epoch": 0.1586231058947184, "grad_norm": 0.7830965969912045, "learning_rate": 1.86978102189781e-05, "loss": 0.7612, "step": 5433 }, { "epoch": 0.15865230212256576, "grad_norm": 0.7312450631245647, "learning_rate": 1.8697161394971614e-05, "loss": 0.6031, "step": 5434 }, { "epoch": 0.15868149835041312, "grad_norm": 0.6953705511304316, "learning_rate": 1.8696512570965126e-05, "loss": 0.7098, "step": 5435 }, { "epoch": 0.15871069457826048, "grad_norm": 0.7070840918024186, "learning_rate": 1.8695863746958638e-05, "loss": 0.6105, "step": 5436 }, { "epoch": 0.15873989080610784, "grad_norm": 0.699724727838455, "learning_rate": 1.8695214922952153e-05, "loss": 0.7033, "step": 5437 }, { "epoch": 0.1587690870339552, "grad_norm": 0.7835205878803968, "learning_rate": 1.8694566098945665e-05, "loss": 0.6462, "step": 5438 }, { "epoch": 0.15879828326180256, "grad_norm": 0.7370286169590795, "learning_rate": 1.8693917274939174e-05, "loss": 0.7277, "step": 5439 }, { "epoch": 0.15882747948964993, "grad_norm": 0.7022761288400131, "learning_rate": 1.8693268450932686e-05, "loss": 0.6696, "step": 5440 }, { "epoch": 0.1588566757174973, "grad_norm": 0.7035850313814429, "learning_rate": 1.8692619626926198e-05, "loss": 0.6573, "step": 5441 }, { "epoch": 0.15888587194534465, "grad_norm": 0.6668151955648007, "learning_rate": 1.869197080291971e-05, "loss": 0.5745, "step": 5442 }, { "epoch": 0.15891506817319204, "grad_norm": 0.683371045361136, "learning_rate": 1.8691321978913222e-05, "loss": 0.639, "step": 5443 }, { "epoch": 0.1589442644010394, "grad_norm": 0.7038732849523829, "learning_rate": 1.8690673154906734e-05, "loss": 0.6685, "step": 5444 }, { "epoch": 0.15897346062888676, "grad_norm": 0.7049238293568516, "learning_rate": 1.8690024330900246e-05, "loss": 0.6879, "step": 5445 }, { "epoch": 0.15900265685673412, "grad_norm": 0.8765953708434644, "learning_rate": 1.8689375506893758e-05, "loss": 0.6262, "step": 5446 }, { "epoch": 0.15903185308458148, "grad_norm": 0.8030588837121737, "learning_rate": 1.868872668288727e-05, "loss": 0.6423, "step": 5447 }, { "epoch": 0.15906104931242884, "grad_norm": 0.7533670284781786, "learning_rate": 1.868807785888078e-05, "loss": 0.7908, "step": 5448 }, { "epoch": 0.1590902455402762, "grad_norm": 0.7620912792385475, "learning_rate": 1.868742903487429e-05, "loss": 0.7577, "step": 5449 }, { "epoch": 0.15911944176812357, "grad_norm": 0.6899686897516899, "learning_rate": 1.8686780210867803e-05, "loss": 0.621, "step": 5450 }, { "epoch": 0.15914863799597093, "grad_norm": 0.6937154729403738, "learning_rate": 1.8686131386861315e-05, "loss": 0.6408, "step": 5451 }, { "epoch": 0.1591778342238183, "grad_norm": 0.7114244300671251, "learning_rate": 1.8685482562854827e-05, "loss": 0.6696, "step": 5452 }, { "epoch": 0.15920703045166565, "grad_norm": 0.6489624074490855, "learning_rate": 1.868483373884834e-05, "loss": 0.5609, "step": 5453 }, { "epoch": 0.159236226679513, "grad_norm": 0.665182968533825, "learning_rate": 1.868418491484185e-05, "loss": 0.6472, "step": 5454 }, { "epoch": 0.15926542290736037, "grad_norm": 0.7602588198225617, "learning_rate": 1.8683536090835363e-05, "loss": 0.7272, "step": 5455 }, { "epoch": 0.15929461913520773, "grad_norm": 0.6866037762158799, "learning_rate": 1.8682887266828875e-05, "loss": 0.6198, "step": 5456 }, { "epoch": 0.1593238153630551, "grad_norm": 0.7383954560277342, "learning_rate": 1.8682238442822387e-05, "loss": 0.7067, "step": 5457 }, { "epoch": 0.15935301159090245, "grad_norm": 0.679889486588786, "learning_rate": 1.86815896188159e-05, "loss": 0.6631, "step": 5458 }, { "epoch": 0.15938220781874982, "grad_norm": 0.6739319613947937, "learning_rate": 1.868094079480941e-05, "loss": 0.6574, "step": 5459 }, { "epoch": 0.15941140404659718, "grad_norm": 0.7849273441353698, "learning_rate": 1.8680291970802923e-05, "loss": 0.8025, "step": 5460 }, { "epoch": 0.15944060027444454, "grad_norm": 0.7203660480505109, "learning_rate": 1.8679643146796435e-05, "loss": 0.7337, "step": 5461 }, { "epoch": 0.1594697965022919, "grad_norm": 0.7056026826877678, "learning_rate": 1.8678994322789944e-05, "loss": 0.6806, "step": 5462 }, { "epoch": 0.15949899273013926, "grad_norm": 0.6547548113794442, "learning_rate": 1.8678345498783456e-05, "loss": 0.6269, "step": 5463 }, { "epoch": 0.15952818895798662, "grad_norm": 0.72509176641678, "learning_rate": 1.8677696674776968e-05, "loss": 0.7091, "step": 5464 }, { "epoch": 0.15955738518583398, "grad_norm": 0.822319592435362, "learning_rate": 1.867704785077048e-05, "loss": 0.7223, "step": 5465 }, { "epoch": 0.15958658141368134, "grad_norm": 0.6700062901210384, "learning_rate": 1.867639902676399e-05, "loss": 0.6294, "step": 5466 }, { "epoch": 0.1596157776415287, "grad_norm": 0.6870909673440153, "learning_rate": 1.8675750202757504e-05, "loss": 0.6843, "step": 5467 }, { "epoch": 0.15964497386937607, "grad_norm": 0.705438764911367, "learning_rate": 1.8675101378751016e-05, "loss": 0.6502, "step": 5468 }, { "epoch": 0.15967417009722343, "grad_norm": 0.7852365569142351, "learning_rate": 1.8674452554744528e-05, "loss": 0.7856, "step": 5469 }, { "epoch": 0.1597033663250708, "grad_norm": 0.6947115256585427, "learning_rate": 1.867380373073804e-05, "loss": 0.6854, "step": 5470 }, { "epoch": 0.15973256255291815, "grad_norm": 0.7943676561494464, "learning_rate": 1.867315490673155e-05, "loss": 0.8034, "step": 5471 }, { "epoch": 0.1597617587807655, "grad_norm": 0.7275937956601404, "learning_rate": 1.867250608272506e-05, "loss": 0.677, "step": 5472 }, { "epoch": 0.1597909550086129, "grad_norm": 0.6434272909537662, "learning_rate": 1.8671857258718572e-05, "loss": 0.6217, "step": 5473 }, { "epoch": 0.15982015123646026, "grad_norm": 0.6776963745300035, "learning_rate": 1.8671208434712084e-05, "loss": 0.6026, "step": 5474 }, { "epoch": 0.15984934746430762, "grad_norm": 0.7340043305338626, "learning_rate": 1.86705596107056e-05, "loss": 0.7524, "step": 5475 }, { "epoch": 0.15987854369215498, "grad_norm": 0.6940021706950835, "learning_rate": 1.8669910786699112e-05, "loss": 0.6779, "step": 5476 }, { "epoch": 0.15990773992000235, "grad_norm": 0.7281182975386277, "learning_rate": 1.866926196269262e-05, "loss": 0.7184, "step": 5477 }, { "epoch": 0.1599369361478497, "grad_norm": 0.8937166023148908, "learning_rate": 1.8668613138686133e-05, "loss": 0.673, "step": 5478 }, { "epoch": 0.15996613237569707, "grad_norm": 0.7324363170439058, "learning_rate": 1.8667964314679645e-05, "loss": 0.7017, "step": 5479 }, { "epoch": 0.15999532860354443, "grad_norm": 0.6717521112862422, "learning_rate": 1.8667315490673157e-05, "loss": 0.6451, "step": 5480 }, { "epoch": 0.1600245248313918, "grad_norm": 0.6782566121248762, "learning_rate": 1.866666666666667e-05, "loss": 0.6832, "step": 5481 }, { "epoch": 0.16005372105923915, "grad_norm": 0.7424759754879847, "learning_rate": 1.866601784266018e-05, "loss": 0.7145, "step": 5482 }, { "epoch": 0.1600829172870865, "grad_norm": 0.6862760892334624, "learning_rate": 1.8665369018653693e-05, "loss": 0.6572, "step": 5483 }, { "epoch": 0.16011211351493387, "grad_norm": 0.8142965963746179, "learning_rate": 1.8664720194647205e-05, "loss": 0.7872, "step": 5484 }, { "epoch": 0.16014130974278123, "grad_norm": 0.6646820991760419, "learning_rate": 1.8664071370640713e-05, "loss": 0.5891, "step": 5485 }, { "epoch": 0.1601705059706286, "grad_norm": 0.8254090736183631, "learning_rate": 1.8663422546634225e-05, "loss": 0.664, "step": 5486 }, { "epoch": 0.16019970219847596, "grad_norm": 0.6805803978554674, "learning_rate": 1.8662773722627737e-05, "loss": 0.5786, "step": 5487 }, { "epoch": 0.16022889842632332, "grad_norm": 0.6824360257246811, "learning_rate": 1.866212489862125e-05, "loss": 0.6444, "step": 5488 }, { "epoch": 0.16025809465417068, "grad_norm": 0.71867221297714, "learning_rate": 1.866147607461476e-05, "loss": 0.7231, "step": 5489 }, { "epoch": 0.16028729088201804, "grad_norm": 0.7182083027436846, "learning_rate": 1.8660827250608274e-05, "loss": 0.6965, "step": 5490 }, { "epoch": 0.1603164871098654, "grad_norm": 0.7744640384322908, "learning_rate": 1.8660178426601786e-05, "loss": 0.6971, "step": 5491 }, { "epoch": 0.16034568333771276, "grad_norm": 0.9609408524999212, "learning_rate": 1.8659529602595298e-05, "loss": 0.8293, "step": 5492 }, { "epoch": 0.16037487956556012, "grad_norm": 0.7458916803708324, "learning_rate": 1.865888077858881e-05, "loss": 0.6861, "step": 5493 }, { "epoch": 0.16040407579340749, "grad_norm": 0.7317482333910393, "learning_rate": 1.865823195458232e-05, "loss": 0.716, "step": 5494 }, { "epoch": 0.16043327202125485, "grad_norm": 1.1822621880801656, "learning_rate": 1.8657583130575834e-05, "loss": 0.7165, "step": 5495 }, { "epoch": 0.1604624682491022, "grad_norm": 0.6702020560484041, "learning_rate": 1.8656934306569346e-05, "loss": 0.5977, "step": 5496 }, { "epoch": 0.16049166447694957, "grad_norm": 0.7363579079440699, "learning_rate": 1.8656285482562858e-05, "loss": 0.7666, "step": 5497 }, { "epoch": 0.16052086070479693, "grad_norm": 0.708339967712511, "learning_rate": 1.865563665855637e-05, "loss": 0.6971, "step": 5498 }, { "epoch": 0.1605500569326443, "grad_norm": 0.6491187395054421, "learning_rate": 1.8654987834549882e-05, "loss": 0.5828, "step": 5499 }, { "epoch": 0.16057925316049165, "grad_norm": 0.7050666445922402, "learning_rate": 1.865433901054339e-05, "loss": 0.681, "step": 5500 }, { "epoch": 0.16060844938833901, "grad_norm": 0.6995413074081752, "learning_rate": 1.8653690186536902e-05, "loss": 0.7188, "step": 5501 }, { "epoch": 0.16063764561618638, "grad_norm": 0.7535973530367457, "learning_rate": 1.8653041362530414e-05, "loss": 0.7157, "step": 5502 }, { "epoch": 0.16066684184403376, "grad_norm": 0.7733830849930124, "learning_rate": 1.8652392538523926e-05, "loss": 0.7098, "step": 5503 }, { "epoch": 0.16069603807188113, "grad_norm": 0.6065891353887904, "learning_rate": 1.865174371451744e-05, "loss": 0.5247, "step": 5504 }, { "epoch": 0.1607252342997285, "grad_norm": 0.6651689734022662, "learning_rate": 1.865109489051095e-05, "loss": 0.5926, "step": 5505 }, { "epoch": 0.16075443052757585, "grad_norm": 0.7065101775058131, "learning_rate": 1.8650446066504463e-05, "loss": 0.6444, "step": 5506 }, { "epoch": 0.1607836267554232, "grad_norm": 0.7185611636111987, "learning_rate": 1.8649797242497975e-05, "loss": 0.679, "step": 5507 }, { "epoch": 0.16081282298327057, "grad_norm": 0.6762516684304011, "learning_rate": 1.8649148418491487e-05, "loss": 0.6243, "step": 5508 }, { "epoch": 0.16084201921111793, "grad_norm": 0.6717516821801682, "learning_rate": 1.8648499594484995e-05, "loss": 0.6579, "step": 5509 }, { "epoch": 0.1608712154389653, "grad_norm": 0.7175666640624692, "learning_rate": 1.8647850770478507e-05, "loss": 0.6666, "step": 5510 }, { "epoch": 0.16090041166681265, "grad_norm": 0.811759401999161, "learning_rate": 1.864720194647202e-05, "loss": 0.8124, "step": 5511 }, { "epoch": 0.16092960789466002, "grad_norm": 0.6356290336460139, "learning_rate": 1.8646553122465535e-05, "loss": 0.5971, "step": 5512 }, { "epoch": 0.16095880412250738, "grad_norm": 0.7643589739855069, "learning_rate": 1.8645904298459047e-05, "loss": 0.7066, "step": 5513 }, { "epoch": 0.16098800035035474, "grad_norm": 0.6961546960650117, "learning_rate": 1.864525547445256e-05, "loss": 0.6132, "step": 5514 }, { "epoch": 0.1610171965782021, "grad_norm": 0.6626092602165293, "learning_rate": 1.8644606650446067e-05, "loss": 0.6437, "step": 5515 }, { "epoch": 0.16104639280604946, "grad_norm": 0.7091088302924994, "learning_rate": 1.864395782643958e-05, "loss": 0.7348, "step": 5516 }, { "epoch": 0.16107558903389682, "grad_norm": 0.7112635085859745, "learning_rate": 1.864330900243309e-05, "loss": 0.5943, "step": 5517 }, { "epoch": 0.16110478526174418, "grad_norm": 0.7058760080402795, "learning_rate": 1.8642660178426603e-05, "loss": 0.6919, "step": 5518 }, { "epoch": 0.16113398148959154, "grad_norm": 0.6471985075858654, "learning_rate": 1.8642011354420116e-05, "loss": 0.6563, "step": 5519 }, { "epoch": 0.1611631777174389, "grad_norm": 0.7064098899783976, "learning_rate": 1.8641362530413628e-05, "loss": 0.6934, "step": 5520 }, { "epoch": 0.16119237394528627, "grad_norm": 0.7143334112694236, "learning_rate": 1.864071370640714e-05, "loss": 0.7108, "step": 5521 }, { "epoch": 0.16122157017313363, "grad_norm": 1.090860464599442, "learning_rate": 1.864006488240065e-05, "loss": 0.8016, "step": 5522 }, { "epoch": 0.161250766400981, "grad_norm": 0.7203176127179763, "learning_rate": 1.863941605839416e-05, "loss": 0.7201, "step": 5523 }, { "epoch": 0.16127996262882835, "grad_norm": 0.7345823304920164, "learning_rate": 1.8638767234387672e-05, "loss": 0.7205, "step": 5524 }, { "epoch": 0.1613091588566757, "grad_norm": 0.7324681397240302, "learning_rate": 1.8638118410381184e-05, "loss": 0.7172, "step": 5525 }, { "epoch": 0.16133835508452307, "grad_norm": 0.8994130537660773, "learning_rate": 1.8637469586374696e-05, "loss": 0.6701, "step": 5526 }, { "epoch": 0.16136755131237043, "grad_norm": 0.688384217546113, "learning_rate": 1.863682076236821e-05, "loss": 0.6368, "step": 5527 }, { "epoch": 0.1613967475402178, "grad_norm": 0.6618884396771882, "learning_rate": 1.863617193836172e-05, "loss": 0.6285, "step": 5528 }, { "epoch": 0.16142594376806516, "grad_norm": 0.695108919427909, "learning_rate": 1.8635523114355232e-05, "loss": 0.6719, "step": 5529 }, { "epoch": 0.16145513999591252, "grad_norm": 0.7080688293893913, "learning_rate": 1.8634874290348744e-05, "loss": 0.6948, "step": 5530 }, { "epoch": 0.16148433622375988, "grad_norm": 0.7415543100149232, "learning_rate": 1.8634225466342256e-05, "loss": 0.6485, "step": 5531 }, { "epoch": 0.16151353245160724, "grad_norm": 0.6963449563534371, "learning_rate": 1.863357664233577e-05, "loss": 0.6531, "step": 5532 }, { "epoch": 0.16154272867945463, "grad_norm": 0.7064846186275163, "learning_rate": 1.863292781832928e-05, "loss": 0.6912, "step": 5533 }, { "epoch": 0.161571924907302, "grad_norm": 0.7608861830934524, "learning_rate": 1.8632278994322793e-05, "loss": 0.6962, "step": 5534 }, { "epoch": 0.16160112113514935, "grad_norm": 0.6839532177210832, "learning_rate": 1.8631630170316305e-05, "loss": 0.6109, "step": 5535 }, { "epoch": 0.1616303173629967, "grad_norm": 0.7794293846145818, "learning_rate": 1.8630981346309817e-05, "loss": 0.6488, "step": 5536 }, { "epoch": 0.16165951359084407, "grad_norm": 0.7246410038724763, "learning_rate": 1.863033252230333e-05, "loss": 0.7279, "step": 5537 }, { "epoch": 0.16168870981869143, "grad_norm": 0.6915547557272668, "learning_rate": 1.8629683698296837e-05, "loss": 0.6343, "step": 5538 }, { "epoch": 0.1617179060465388, "grad_norm": 0.6663743551071936, "learning_rate": 1.862903487429035e-05, "loss": 0.6361, "step": 5539 }, { "epoch": 0.16174710227438616, "grad_norm": 0.6992465507564826, "learning_rate": 1.862838605028386e-05, "loss": 0.6746, "step": 5540 }, { "epoch": 0.16177629850223352, "grad_norm": 0.7122557936477615, "learning_rate": 1.8627737226277373e-05, "loss": 0.702, "step": 5541 }, { "epoch": 0.16180549473008088, "grad_norm": 0.7611889746504807, "learning_rate": 1.8627088402270885e-05, "loss": 0.7123, "step": 5542 }, { "epoch": 0.16183469095792824, "grad_norm": 0.7230014969392482, "learning_rate": 1.8626439578264397e-05, "loss": 0.7205, "step": 5543 }, { "epoch": 0.1618638871857756, "grad_norm": 0.7897152800120428, "learning_rate": 1.862579075425791e-05, "loss": 0.7716, "step": 5544 }, { "epoch": 0.16189308341362296, "grad_norm": 0.669347012823472, "learning_rate": 1.862514193025142e-05, "loss": 0.6175, "step": 5545 }, { "epoch": 0.16192227964147032, "grad_norm": 0.6299678121865575, "learning_rate": 1.8624493106244933e-05, "loss": 0.5674, "step": 5546 }, { "epoch": 0.16195147586931768, "grad_norm": 0.7592162118610805, "learning_rate": 1.8623844282238442e-05, "loss": 0.7166, "step": 5547 }, { "epoch": 0.16198067209716505, "grad_norm": 0.6696186541252575, "learning_rate": 1.8623195458231954e-05, "loss": 0.6575, "step": 5548 }, { "epoch": 0.1620098683250124, "grad_norm": 0.6902299171788253, "learning_rate": 1.8622546634225466e-05, "loss": 0.6037, "step": 5549 }, { "epoch": 0.16203906455285977, "grad_norm": 0.6862252285636976, "learning_rate": 1.862189781021898e-05, "loss": 0.6323, "step": 5550 }, { "epoch": 0.16206826078070713, "grad_norm": 0.6546033252010348, "learning_rate": 1.8621248986212494e-05, "loss": 0.6083, "step": 5551 }, { "epoch": 0.1620974570085545, "grad_norm": 0.6801113953844886, "learning_rate": 1.8620600162206006e-05, "loss": 0.628, "step": 5552 }, { "epoch": 0.16212665323640185, "grad_norm": 0.7340918741808846, "learning_rate": 1.8619951338199514e-05, "loss": 0.7017, "step": 5553 }, { "epoch": 0.1621558494642492, "grad_norm": 0.671163770012785, "learning_rate": 1.8619302514193026e-05, "loss": 0.6301, "step": 5554 }, { "epoch": 0.16218504569209657, "grad_norm": 0.6155892787264546, "learning_rate": 1.861865369018654e-05, "loss": 0.6026, "step": 5555 }, { "epoch": 0.16221424191994394, "grad_norm": 0.6526027987986887, "learning_rate": 1.861800486618005e-05, "loss": 0.5769, "step": 5556 }, { "epoch": 0.1622434381477913, "grad_norm": 0.7211496599653487, "learning_rate": 1.8617356042173562e-05, "loss": 0.671, "step": 5557 }, { "epoch": 0.16227263437563866, "grad_norm": 0.6716038484741839, "learning_rate": 1.8616707218167074e-05, "loss": 0.6531, "step": 5558 }, { "epoch": 0.16230183060348602, "grad_norm": 0.6692973786320575, "learning_rate": 1.8616058394160586e-05, "loss": 0.6166, "step": 5559 }, { "epoch": 0.16233102683133338, "grad_norm": 0.6803583649291894, "learning_rate": 1.86154095701541e-05, "loss": 0.6873, "step": 5560 }, { "epoch": 0.16236022305918074, "grad_norm": 0.7041672822961084, "learning_rate": 1.8614760746147607e-05, "loss": 0.6466, "step": 5561 }, { "epoch": 0.1623894192870281, "grad_norm": 0.6944650615907362, "learning_rate": 1.861411192214112e-05, "loss": 0.6461, "step": 5562 }, { "epoch": 0.1624186155148755, "grad_norm": 0.7131698423546009, "learning_rate": 1.861346309813463e-05, "loss": 0.6577, "step": 5563 }, { "epoch": 0.16244781174272285, "grad_norm": 0.6534926400209081, "learning_rate": 1.8612814274128143e-05, "loss": 0.6035, "step": 5564 }, { "epoch": 0.16247700797057021, "grad_norm": 0.6965292454516387, "learning_rate": 1.8612165450121655e-05, "loss": 0.7143, "step": 5565 }, { "epoch": 0.16250620419841758, "grad_norm": 0.7171714468902841, "learning_rate": 1.8611516626115167e-05, "loss": 0.6984, "step": 5566 }, { "epoch": 0.16253540042626494, "grad_norm": 0.8365974834097608, "learning_rate": 1.861086780210868e-05, "loss": 0.6408, "step": 5567 }, { "epoch": 0.1625645966541123, "grad_norm": 0.7483810053953908, "learning_rate": 1.861021897810219e-05, "loss": 0.711, "step": 5568 }, { "epoch": 0.16259379288195966, "grad_norm": 0.7802970286750721, "learning_rate": 1.8609570154095703e-05, "loss": 0.7092, "step": 5569 }, { "epoch": 0.16262298910980702, "grad_norm": 0.7150846937963736, "learning_rate": 1.8608921330089215e-05, "loss": 0.6672, "step": 5570 }, { "epoch": 0.16265218533765438, "grad_norm": 0.6935963877841351, "learning_rate": 1.8608272506082727e-05, "loss": 0.6568, "step": 5571 }, { "epoch": 0.16268138156550174, "grad_norm": 0.8583910272459642, "learning_rate": 1.860762368207624e-05, "loss": 0.6531, "step": 5572 }, { "epoch": 0.1627105777933491, "grad_norm": 0.7095523481677207, "learning_rate": 1.860697485806975e-05, "loss": 0.6726, "step": 5573 }, { "epoch": 0.16273977402119646, "grad_norm": 0.7116569633706166, "learning_rate": 1.8606326034063263e-05, "loss": 0.7075, "step": 5574 }, { "epoch": 0.16276897024904383, "grad_norm": 0.6922676407617563, "learning_rate": 1.8605677210056775e-05, "loss": 0.6672, "step": 5575 }, { "epoch": 0.1627981664768912, "grad_norm": 0.8229937318559862, "learning_rate": 1.8605028386050284e-05, "loss": 0.6785, "step": 5576 }, { "epoch": 0.16282736270473855, "grad_norm": 0.7096859032846533, "learning_rate": 1.8604379562043796e-05, "loss": 0.6935, "step": 5577 }, { "epoch": 0.1628565589325859, "grad_norm": 0.7553664572710787, "learning_rate": 1.8603730738037308e-05, "loss": 0.7413, "step": 5578 }, { "epoch": 0.16288575516043327, "grad_norm": 0.7049418483456841, "learning_rate": 1.860308191403082e-05, "loss": 0.6806, "step": 5579 }, { "epoch": 0.16291495138828063, "grad_norm": 0.657205466716065, "learning_rate": 1.8602433090024332e-05, "loss": 0.5909, "step": 5580 }, { "epoch": 0.162944147616128, "grad_norm": 0.6817588804347056, "learning_rate": 1.8601784266017844e-05, "loss": 0.7091, "step": 5581 }, { "epoch": 0.16297334384397535, "grad_norm": 0.6921902462126964, "learning_rate": 1.8601135442011356e-05, "loss": 0.6773, "step": 5582 }, { "epoch": 0.16300254007182272, "grad_norm": 0.7082109238262092, "learning_rate": 1.860048661800487e-05, "loss": 0.6982, "step": 5583 }, { "epoch": 0.16303173629967008, "grad_norm": 0.7790168351220008, "learning_rate": 1.859983779399838e-05, "loss": 0.7288, "step": 5584 }, { "epoch": 0.16306093252751744, "grad_norm": 0.6929775521155812, "learning_rate": 1.859918896999189e-05, "loss": 0.647, "step": 5585 }, { "epoch": 0.1630901287553648, "grad_norm": 0.7539746908629577, "learning_rate": 1.85985401459854e-05, "loss": 0.7219, "step": 5586 }, { "epoch": 0.16311932498321216, "grad_norm": 0.62530062101217, "learning_rate": 1.8597891321978913e-05, "loss": 0.5779, "step": 5587 }, { "epoch": 0.16314852121105952, "grad_norm": 0.7313039783671677, "learning_rate": 1.859724249797243e-05, "loss": 0.6955, "step": 5588 }, { "epoch": 0.16317771743890688, "grad_norm": 0.6374102278730346, "learning_rate": 1.859659367396594e-05, "loss": 0.6143, "step": 5589 }, { "epoch": 0.16320691366675424, "grad_norm": 0.7595263221942712, "learning_rate": 1.8595944849959453e-05, "loss": 0.6901, "step": 5590 }, { "epoch": 0.1632361098946016, "grad_norm": 0.6467443537190001, "learning_rate": 1.859529602595296e-05, "loss": 0.5719, "step": 5591 }, { "epoch": 0.16326530612244897, "grad_norm": 0.7196622196023326, "learning_rate": 1.8594647201946473e-05, "loss": 0.6919, "step": 5592 }, { "epoch": 0.16329450235029636, "grad_norm": 0.6365522777233574, "learning_rate": 1.8593998377939985e-05, "loss": 0.5895, "step": 5593 }, { "epoch": 0.16332369857814372, "grad_norm": 0.662599788114862, "learning_rate": 1.8593349553933497e-05, "loss": 0.6395, "step": 5594 }, { "epoch": 0.16335289480599108, "grad_norm": 0.6964922345657617, "learning_rate": 1.859270072992701e-05, "loss": 0.6744, "step": 5595 }, { "epoch": 0.16338209103383844, "grad_norm": 0.6749674634089105, "learning_rate": 1.859205190592052e-05, "loss": 0.6032, "step": 5596 }, { "epoch": 0.1634112872616858, "grad_norm": 0.7238784125437949, "learning_rate": 1.8591403081914033e-05, "loss": 0.6345, "step": 5597 }, { "epoch": 0.16344048348953316, "grad_norm": 0.6511275770673874, "learning_rate": 1.8590754257907545e-05, "loss": 0.5561, "step": 5598 }, { "epoch": 0.16346967971738052, "grad_norm": 0.7041120002792851, "learning_rate": 1.8590105433901054e-05, "loss": 0.6537, "step": 5599 }, { "epoch": 0.16349887594522788, "grad_norm": 0.683149019199856, "learning_rate": 1.8589456609894566e-05, "loss": 0.669, "step": 5600 }, { "epoch": 0.16352807217307525, "grad_norm": 0.6706711929647319, "learning_rate": 1.8588807785888078e-05, "loss": 0.65, "step": 5601 }, { "epoch": 0.1635572684009226, "grad_norm": 0.7108101005605104, "learning_rate": 1.858815896188159e-05, "loss": 0.7293, "step": 5602 }, { "epoch": 0.16358646462876997, "grad_norm": 0.7042823476407563, "learning_rate": 1.8587510137875102e-05, "loss": 0.684, "step": 5603 }, { "epoch": 0.16361566085661733, "grad_norm": 0.7227448853427274, "learning_rate": 1.8586861313868614e-05, "loss": 0.618, "step": 5604 }, { "epoch": 0.1636448570844647, "grad_norm": 0.6782055680837676, "learning_rate": 1.8586212489862126e-05, "loss": 0.6083, "step": 5605 }, { "epoch": 0.16367405331231205, "grad_norm": 0.6920037260791543, "learning_rate": 1.8585563665855638e-05, "loss": 0.7034, "step": 5606 }, { "epoch": 0.1637032495401594, "grad_norm": 0.736994230485706, "learning_rate": 1.858491484184915e-05, "loss": 0.685, "step": 5607 }, { "epoch": 0.16373244576800677, "grad_norm": 0.6554653478182247, "learning_rate": 1.8584266017842662e-05, "loss": 0.6182, "step": 5608 }, { "epoch": 0.16376164199585413, "grad_norm": 0.7088774818949548, "learning_rate": 1.8583617193836174e-05, "loss": 0.7212, "step": 5609 }, { "epoch": 0.1637908382237015, "grad_norm": 0.6512216588622658, "learning_rate": 1.8582968369829686e-05, "loss": 0.5821, "step": 5610 }, { "epoch": 0.16382003445154886, "grad_norm": 0.8316876706216627, "learning_rate": 1.8582319545823198e-05, "loss": 0.7866, "step": 5611 }, { "epoch": 0.16384923067939622, "grad_norm": 0.78005939030673, "learning_rate": 1.858167072181671e-05, "loss": 0.647, "step": 5612 }, { "epoch": 0.16387842690724358, "grad_norm": 0.7769100432824794, "learning_rate": 1.8581021897810222e-05, "loss": 0.6829, "step": 5613 }, { "epoch": 0.16390762313509094, "grad_norm": 0.6502944940785929, "learning_rate": 1.858037307380373e-05, "loss": 0.6337, "step": 5614 }, { "epoch": 0.1639368193629383, "grad_norm": 0.7044388997150387, "learning_rate": 1.8579724249797243e-05, "loss": 0.6947, "step": 5615 }, { "epoch": 0.16396601559078566, "grad_norm": 0.7659063749146948, "learning_rate": 1.8579075425790755e-05, "loss": 0.755, "step": 5616 }, { "epoch": 0.16399521181863302, "grad_norm": 0.6784535767558121, "learning_rate": 1.8578426601784267e-05, "loss": 0.6203, "step": 5617 }, { "epoch": 0.16402440804648039, "grad_norm": 0.721858391104704, "learning_rate": 1.857777777777778e-05, "loss": 0.6784, "step": 5618 }, { "epoch": 0.16405360427432775, "grad_norm": 0.6835545683232319, "learning_rate": 1.857712895377129e-05, "loss": 0.6585, "step": 5619 }, { "epoch": 0.1640828005021751, "grad_norm": 0.7376732594334965, "learning_rate": 1.8576480129764803e-05, "loss": 0.6634, "step": 5620 }, { "epoch": 0.16411199673002247, "grad_norm": 0.7670738351651332, "learning_rate": 1.8575831305758315e-05, "loss": 0.7202, "step": 5621 }, { "epoch": 0.16414119295786983, "grad_norm": 0.7433606948591202, "learning_rate": 1.8575182481751827e-05, "loss": 0.6145, "step": 5622 }, { "epoch": 0.1641703891857172, "grad_norm": 0.6929449882948137, "learning_rate": 1.8574533657745336e-05, "loss": 0.639, "step": 5623 }, { "epoch": 0.16419958541356458, "grad_norm": 0.7543198506587777, "learning_rate": 1.8573884833738848e-05, "loss": 0.6832, "step": 5624 }, { "epoch": 0.16422878164141194, "grad_norm": 0.7164989978504511, "learning_rate": 1.857323600973236e-05, "loss": 0.6667, "step": 5625 }, { "epoch": 0.1642579778692593, "grad_norm": 0.6979422970400035, "learning_rate": 1.8572587185725875e-05, "loss": 0.7344, "step": 5626 }, { "epoch": 0.16428717409710666, "grad_norm": 0.6285096339790099, "learning_rate": 1.8571938361719387e-05, "loss": 0.6125, "step": 5627 }, { "epoch": 0.16431637032495403, "grad_norm": 0.6804230898804875, "learning_rate": 1.8571289537712896e-05, "loss": 0.6607, "step": 5628 }, { "epoch": 0.1643455665528014, "grad_norm": 0.7746705780708072, "learning_rate": 1.8570640713706408e-05, "loss": 0.7639, "step": 5629 }, { "epoch": 0.16437476278064875, "grad_norm": 0.735734105909636, "learning_rate": 1.856999188969992e-05, "loss": 0.6881, "step": 5630 }, { "epoch": 0.1644039590084961, "grad_norm": 0.6600896858620775, "learning_rate": 1.8569343065693432e-05, "loss": 0.612, "step": 5631 }, { "epoch": 0.16443315523634347, "grad_norm": 0.6599578227025783, "learning_rate": 1.8568694241686944e-05, "loss": 0.6107, "step": 5632 }, { "epoch": 0.16446235146419083, "grad_norm": 0.6849141879974592, "learning_rate": 1.8568045417680456e-05, "loss": 0.6393, "step": 5633 }, { "epoch": 0.1644915476920382, "grad_norm": 0.6496503375306283, "learning_rate": 1.8567396593673968e-05, "loss": 0.6011, "step": 5634 }, { "epoch": 0.16452074391988555, "grad_norm": 0.6842361920276434, "learning_rate": 1.856674776966748e-05, "loss": 0.683, "step": 5635 }, { "epoch": 0.16454994014773291, "grad_norm": 0.7168624780116835, "learning_rate": 1.8566098945660992e-05, "loss": 0.7117, "step": 5636 }, { "epoch": 0.16457913637558028, "grad_norm": 0.7884753660573764, "learning_rate": 1.85654501216545e-05, "loss": 0.7288, "step": 5637 }, { "epoch": 0.16460833260342764, "grad_norm": 0.7844741588046651, "learning_rate": 1.8564801297648013e-05, "loss": 0.7202, "step": 5638 }, { "epoch": 0.164637528831275, "grad_norm": 0.6578785306587986, "learning_rate": 1.8564152473641525e-05, "loss": 0.5908, "step": 5639 }, { "epoch": 0.16466672505912236, "grad_norm": 0.6649556813985164, "learning_rate": 1.8563503649635037e-05, "loss": 0.6757, "step": 5640 }, { "epoch": 0.16469592128696972, "grad_norm": 0.6969482566194385, "learning_rate": 1.856285482562855e-05, "loss": 0.678, "step": 5641 }, { "epoch": 0.16472511751481708, "grad_norm": 0.6585119482885434, "learning_rate": 1.856220600162206e-05, "loss": 0.6067, "step": 5642 }, { "epoch": 0.16475431374266444, "grad_norm": 0.7336056970603241, "learning_rate": 1.8561557177615573e-05, "loss": 0.7104, "step": 5643 }, { "epoch": 0.1647835099705118, "grad_norm": 0.7247236130193936, "learning_rate": 1.8560908353609085e-05, "loss": 0.6834, "step": 5644 }, { "epoch": 0.16481270619835917, "grad_norm": 0.6652346955023198, "learning_rate": 1.8560259529602597e-05, "loss": 0.6246, "step": 5645 }, { "epoch": 0.16484190242620653, "grad_norm": 0.6575357887129781, "learning_rate": 1.855961070559611e-05, "loss": 0.5924, "step": 5646 }, { "epoch": 0.1648710986540539, "grad_norm": 0.6694477232972668, "learning_rate": 1.855896188158962e-05, "loss": 0.6209, "step": 5647 }, { "epoch": 0.16490029488190125, "grad_norm": 0.7340605502514378, "learning_rate": 1.8558313057583133e-05, "loss": 0.558, "step": 5648 }, { "epoch": 0.1649294911097486, "grad_norm": 0.7097132806638621, "learning_rate": 1.8557664233576645e-05, "loss": 0.593, "step": 5649 }, { "epoch": 0.16495868733759597, "grad_norm": 0.6667942520639228, "learning_rate": 1.8557015409570157e-05, "loss": 0.6365, "step": 5650 }, { "epoch": 0.16498788356544333, "grad_norm": 0.7349023454244705, "learning_rate": 1.855636658556367e-05, "loss": 0.6838, "step": 5651 }, { "epoch": 0.1650170797932907, "grad_norm": 0.7004124305987118, "learning_rate": 1.8555717761557178e-05, "loss": 0.6893, "step": 5652 }, { "epoch": 0.16504627602113806, "grad_norm": 0.6619959519422254, "learning_rate": 1.855506893755069e-05, "loss": 0.5282, "step": 5653 }, { "epoch": 0.16507547224898544, "grad_norm": 0.6684546379796047, "learning_rate": 1.8554420113544202e-05, "loss": 0.6678, "step": 5654 }, { "epoch": 0.1651046684768328, "grad_norm": 0.6377460478148196, "learning_rate": 1.8553771289537714e-05, "loss": 0.5928, "step": 5655 }, { "epoch": 0.16513386470468017, "grad_norm": 0.7134348090810038, "learning_rate": 1.8553122465531226e-05, "loss": 0.6436, "step": 5656 }, { "epoch": 0.16516306093252753, "grad_norm": 0.7758526427890395, "learning_rate": 1.8552473641524738e-05, "loss": 0.7731, "step": 5657 }, { "epoch": 0.1651922571603749, "grad_norm": 0.6949065177489306, "learning_rate": 1.855182481751825e-05, "loss": 0.6195, "step": 5658 }, { "epoch": 0.16522145338822225, "grad_norm": 0.7136635807784314, "learning_rate": 1.8551175993511762e-05, "loss": 0.6956, "step": 5659 }, { "epoch": 0.1652506496160696, "grad_norm": 0.6639352657648229, "learning_rate": 1.8550527169505274e-05, "loss": 0.6035, "step": 5660 }, { "epoch": 0.16527984584391697, "grad_norm": 0.6672629709893563, "learning_rate": 1.8549878345498783e-05, "loss": 0.6371, "step": 5661 }, { "epoch": 0.16530904207176433, "grad_norm": 0.7210988387312371, "learning_rate": 1.8549229521492295e-05, "loss": 0.6442, "step": 5662 }, { "epoch": 0.1653382382996117, "grad_norm": 0.7193845707765375, "learning_rate": 1.854858069748581e-05, "loss": 0.7491, "step": 5663 }, { "epoch": 0.16536743452745906, "grad_norm": 0.9517405891917603, "learning_rate": 1.8547931873479322e-05, "loss": 0.8475, "step": 5664 }, { "epoch": 0.16539663075530642, "grad_norm": 0.7137316517022222, "learning_rate": 1.8547283049472834e-05, "loss": 0.6318, "step": 5665 }, { "epoch": 0.16542582698315378, "grad_norm": 0.6918657267047683, "learning_rate": 1.8546634225466343e-05, "loss": 0.6972, "step": 5666 }, { "epoch": 0.16545502321100114, "grad_norm": 0.7012413989674741, "learning_rate": 1.8545985401459855e-05, "loss": 0.6872, "step": 5667 }, { "epoch": 0.1654842194388485, "grad_norm": 0.663097288107696, "learning_rate": 1.8545336577453367e-05, "loss": 0.622, "step": 5668 }, { "epoch": 0.16551341566669586, "grad_norm": 0.7358709289505602, "learning_rate": 1.854468775344688e-05, "loss": 0.6922, "step": 5669 }, { "epoch": 0.16554261189454322, "grad_norm": 0.661261313712954, "learning_rate": 1.854403892944039e-05, "loss": 0.6257, "step": 5670 }, { "epoch": 0.16557180812239058, "grad_norm": 0.6730667365553856, "learning_rate": 1.8543390105433903e-05, "loss": 0.6591, "step": 5671 }, { "epoch": 0.16560100435023795, "grad_norm": 0.6792932986034462, "learning_rate": 1.8542741281427415e-05, "loss": 0.6835, "step": 5672 }, { "epoch": 0.1656302005780853, "grad_norm": 0.7127490693764302, "learning_rate": 1.8542092457420927e-05, "loss": 0.7224, "step": 5673 }, { "epoch": 0.16565939680593267, "grad_norm": 0.676428275123277, "learning_rate": 1.854144363341444e-05, "loss": 0.6251, "step": 5674 }, { "epoch": 0.16568859303378003, "grad_norm": 0.6864815657810722, "learning_rate": 1.8540794809407948e-05, "loss": 0.6451, "step": 5675 }, { "epoch": 0.1657177892616274, "grad_norm": 0.709890054801281, "learning_rate": 1.854014598540146e-05, "loss": 0.6848, "step": 5676 }, { "epoch": 0.16574698548947475, "grad_norm": 0.6683230450738795, "learning_rate": 1.8539497161394972e-05, "loss": 0.582, "step": 5677 }, { "epoch": 0.1657761817173221, "grad_norm": 0.7232147856974492, "learning_rate": 1.8538848337388484e-05, "loss": 0.6687, "step": 5678 }, { "epoch": 0.16580537794516947, "grad_norm": 0.7419323062746357, "learning_rate": 1.8538199513381996e-05, "loss": 0.6183, "step": 5679 }, { "epoch": 0.16583457417301684, "grad_norm": 0.715775503971526, "learning_rate": 1.8537550689375508e-05, "loss": 0.6851, "step": 5680 }, { "epoch": 0.1658637704008642, "grad_norm": 0.7199804958211139, "learning_rate": 1.853690186536902e-05, "loss": 0.6675, "step": 5681 }, { "epoch": 0.16589296662871156, "grad_norm": 0.8386439640060593, "learning_rate": 1.8536253041362532e-05, "loss": 0.6258, "step": 5682 }, { "epoch": 0.16592216285655892, "grad_norm": 0.7296296550709522, "learning_rate": 1.8535604217356044e-05, "loss": 0.7062, "step": 5683 }, { "epoch": 0.1659513590844063, "grad_norm": 0.7200069600830741, "learning_rate": 1.8534955393349556e-05, "loss": 0.6553, "step": 5684 }, { "epoch": 0.16598055531225367, "grad_norm": 0.7063093135710325, "learning_rate": 1.8534306569343068e-05, "loss": 0.6827, "step": 5685 }, { "epoch": 0.16600975154010103, "grad_norm": 0.6803468151332944, "learning_rate": 1.853365774533658e-05, "loss": 0.6538, "step": 5686 }, { "epoch": 0.1660389477679484, "grad_norm": 0.7536007996032844, "learning_rate": 1.8533008921330092e-05, "loss": 0.735, "step": 5687 }, { "epoch": 0.16606814399579575, "grad_norm": 0.7603357063795115, "learning_rate": 1.8532360097323604e-05, "loss": 0.6829, "step": 5688 }, { "epoch": 0.16609734022364311, "grad_norm": 0.705895521562137, "learning_rate": 1.8531711273317116e-05, "loss": 0.6416, "step": 5689 }, { "epoch": 0.16612653645149048, "grad_norm": 0.6641450239798495, "learning_rate": 1.8531062449310625e-05, "loss": 0.6342, "step": 5690 }, { "epoch": 0.16615573267933784, "grad_norm": 0.6384131961849685, "learning_rate": 1.8530413625304137e-05, "loss": 0.5853, "step": 5691 }, { "epoch": 0.1661849289071852, "grad_norm": 0.6287959474462241, "learning_rate": 1.852976480129765e-05, "loss": 0.5826, "step": 5692 }, { "epoch": 0.16621412513503256, "grad_norm": 0.7152688581033257, "learning_rate": 1.852911597729116e-05, "loss": 0.6873, "step": 5693 }, { "epoch": 0.16624332136287992, "grad_norm": 0.6803880818400114, "learning_rate": 1.8528467153284673e-05, "loss": 0.6341, "step": 5694 }, { "epoch": 0.16627251759072728, "grad_norm": 0.7296935544470018, "learning_rate": 1.8527818329278185e-05, "loss": 0.6613, "step": 5695 }, { "epoch": 0.16630171381857464, "grad_norm": 0.6989698433687397, "learning_rate": 1.8527169505271697e-05, "loss": 0.7032, "step": 5696 }, { "epoch": 0.166330910046422, "grad_norm": 0.6791173517677257, "learning_rate": 1.852652068126521e-05, "loss": 0.654, "step": 5697 }, { "epoch": 0.16636010627426936, "grad_norm": 0.6608966373669084, "learning_rate": 1.852587185725872e-05, "loss": 0.5914, "step": 5698 }, { "epoch": 0.16638930250211673, "grad_norm": 0.7045851635980556, "learning_rate": 1.852522303325223e-05, "loss": 0.679, "step": 5699 }, { "epoch": 0.1664184987299641, "grad_norm": 0.7147423908323542, "learning_rate": 1.852457420924574e-05, "loss": 0.6453, "step": 5700 }, { "epoch": 0.16644769495781145, "grad_norm": 0.7445707639261827, "learning_rate": 1.8523925385239257e-05, "loss": 0.6754, "step": 5701 }, { "epoch": 0.1664768911856588, "grad_norm": 0.6448575795287824, "learning_rate": 1.852327656123277e-05, "loss": 0.6199, "step": 5702 }, { "epoch": 0.16650608741350617, "grad_norm": 0.8604713923771758, "learning_rate": 1.852262773722628e-05, "loss": 0.6755, "step": 5703 }, { "epoch": 0.16653528364135353, "grad_norm": 0.6483541566695651, "learning_rate": 1.852197891321979e-05, "loss": 0.5999, "step": 5704 }, { "epoch": 0.1665644798692009, "grad_norm": 0.7882908526070913, "learning_rate": 1.8521330089213302e-05, "loss": 0.7847, "step": 5705 }, { "epoch": 0.16659367609704825, "grad_norm": 0.7439906923268401, "learning_rate": 1.8520681265206814e-05, "loss": 0.698, "step": 5706 }, { "epoch": 0.16662287232489562, "grad_norm": 0.6595502094481309, "learning_rate": 1.8520032441200326e-05, "loss": 0.586, "step": 5707 }, { "epoch": 0.16665206855274298, "grad_norm": 0.7489169762326304, "learning_rate": 1.8519383617193838e-05, "loss": 0.7475, "step": 5708 }, { "epoch": 0.16668126478059034, "grad_norm": 0.6949783789037842, "learning_rate": 1.851873479318735e-05, "loss": 0.6515, "step": 5709 }, { "epoch": 0.1667104610084377, "grad_norm": 0.6659150654664094, "learning_rate": 1.8518085969180862e-05, "loss": 0.5832, "step": 5710 }, { "epoch": 0.16673965723628506, "grad_norm": 0.6543702393998447, "learning_rate": 1.8517437145174374e-05, "loss": 0.5743, "step": 5711 }, { "epoch": 0.16676885346413242, "grad_norm": 0.696260567984137, "learning_rate": 1.8516788321167886e-05, "loss": 0.7115, "step": 5712 }, { "epoch": 0.16679804969197978, "grad_norm": 0.6326795665871847, "learning_rate": 1.8516139497161395e-05, "loss": 0.614, "step": 5713 }, { "epoch": 0.16682724591982717, "grad_norm": 0.7426017566583544, "learning_rate": 1.8515490673154907e-05, "loss": 0.7452, "step": 5714 }, { "epoch": 0.16685644214767453, "grad_norm": 0.6763643453961227, "learning_rate": 1.851484184914842e-05, "loss": 0.5569, "step": 5715 }, { "epoch": 0.1668856383755219, "grad_norm": 0.6839520329636642, "learning_rate": 1.851419302514193e-05, "loss": 0.6231, "step": 5716 }, { "epoch": 0.16691483460336926, "grad_norm": 0.6666287592854195, "learning_rate": 1.8513544201135443e-05, "loss": 0.6272, "step": 5717 }, { "epoch": 0.16694403083121662, "grad_norm": 0.6499042048676753, "learning_rate": 1.8512895377128955e-05, "loss": 0.6182, "step": 5718 }, { "epoch": 0.16697322705906398, "grad_norm": 0.6769586312399325, "learning_rate": 1.8512246553122467e-05, "loss": 0.6207, "step": 5719 }, { "epoch": 0.16700242328691134, "grad_norm": 0.7001195373417385, "learning_rate": 1.851159772911598e-05, "loss": 0.7159, "step": 5720 }, { "epoch": 0.1670316195147587, "grad_norm": 0.7269412908303262, "learning_rate": 1.851094890510949e-05, "loss": 0.713, "step": 5721 }, { "epoch": 0.16706081574260606, "grad_norm": 0.6475217383465764, "learning_rate": 1.8510300081103003e-05, "loss": 0.6018, "step": 5722 }, { "epoch": 0.16709001197045342, "grad_norm": 0.7871968619573515, "learning_rate": 1.8509651257096515e-05, "loss": 0.6193, "step": 5723 }, { "epoch": 0.16711920819830078, "grad_norm": 0.7172583308126259, "learning_rate": 1.8509002433090027e-05, "loss": 0.6265, "step": 5724 }, { "epoch": 0.16714840442614814, "grad_norm": 0.6231242819204632, "learning_rate": 1.850835360908354e-05, "loss": 0.5693, "step": 5725 }, { "epoch": 0.1671776006539955, "grad_norm": 0.6907504125908169, "learning_rate": 1.850770478507705e-05, "loss": 0.6997, "step": 5726 }, { "epoch": 0.16720679688184287, "grad_norm": 0.7060976513089764, "learning_rate": 1.8507055961070563e-05, "loss": 0.7043, "step": 5727 }, { "epoch": 0.16723599310969023, "grad_norm": 0.6573956386161947, "learning_rate": 1.850640713706407e-05, "loss": 0.653, "step": 5728 }, { "epoch": 0.1672651893375376, "grad_norm": 0.7013077051850342, "learning_rate": 1.8505758313057584e-05, "loss": 0.6953, "step": 5729 }, { "epoch": 0.16729438556538495, "grad_norm": 0.6938817354785255, "learning_rate": 1.8505109489051096e-05, "loss": 0.6581, "step": 5730 }, { "epoch": 0.1673235817932323, "grad_norm": 0.8082444675253048, "learning_rate": 1.8504460665044608e-05, "loss": 0.7832, "step": 5731 }, { "epoch": 0.16735277802107967, "grad_norm": 0.7384674465842691, "learning_rate": 1.850381184103812e-05, "loss": 0.7048, "step": 5732 }, { "epoch": 0.16738197424892703, "grad_norm": 0.7597346044987556, "learning_rate": 1.8503163017031632e-05, "loss": 0.7115, "step": 5733 }, { "epoch": 0.1674111704767744, "grad_norm": 0.6652530539969355, "learning_rate": 1.8502514193025144e-05, "loss": 0.6491, "step": 5734 }, { "epoch": 0.16744036670462176, "grad_norm": 0.6394673784941245, "learning_rate": 1.8501865369018656e-05, "loss": 0.5856, "step": 5735 }, { "epoch": 0.16746956293246912, "grad_norm": 1.1162551638753313, "learning_rate": 1.8501216545012168e-05, "loss": 0.7384, "step": 5736 }, { "epoch": 0.16749875916031648, "grad_norm": 0.6585855964536834, "learning_rate": 1.8500567721005676e-05, "loss": 0.6118, "step": 5737 }, { "epoch": 0.16752795538816384, "grad_norm": 0.8140359430484537, "learning_rate": 1.849991889699919e-05, "loss": 0.727, "step": 5738 }, { "epoch": 0.1675571516160112, "grad_norm": 0.7531042843298617, "learning_rate": 1.8499270072992704e-05, "loss": 0.7601, "step": 5739 }, { "epoch": 0.16758634784385856, "grad_norm": 0.7162644549715724, "learning_rate": 1.8498621248986216e-05, "loss": 0.6921, "step": 5740 }, { "epoch": 0.16761554407170592, "grad_norm": 0.6959213658108987, "learning_rate": 1.8497972424979728e-05, "loss": 0.6576, "step": 5741 }, { "epoch": 0.16764474029955329, "grad_norm": 0.6769269516763944, "learning_rate": 1.8497323600973237e-05, "loss": 0.6895, "step": 5742 }, { "epoch": 0.16767393652740065, "grad_norm": 0.5927501899342389, "learning_rate": 1.849667477696675e-05, "loss": 0.552, "step": 5743 }, { "epoch": 0.16770313275524804, "grad_norm": 0.6615359966473215, "learning_rate": 1.849602595296026e-05, "loss": 0.6104, "step": 5744 }, { "epoch": 0.1677323289830954, "grad_norm": 0.7140085814705872, "learning_rate": 1.8495377128953773e-05, "loss": 0.7021, "step": 5745 }, { "epoch": 0.16776152521094276, "grad_norm": 0.6853917388936895, "learning_rate": 1.8494728304947285e-05, "loss": 0.659, "step": 5746 }, { "epoch": 0.16779072143879012, "grad_norm": 0.7505277843517121, "learning_rate": 1.8494079480940797e-05, "loss": 0.6554, "step": 5747 }, { "epoch": 0.16781991766663748, "grad_norm": 0.7195925826624537, "learning_rate": 1.849343065693431e-05, "loss": 0.6719, "step": 5748 }, { "epoch": 0.16784911389448484, "grad_norm": 0.6714048741060288, "learning_rate": 1.849278183292782e-05, "loss": 0.5953, "step": 5749 }, { "epoch": 0.1678783101223322, "grad_norm": 0.6773717445377752, "learning_rate": 1.8492133008921333e-05, "loss": 0.6957, "step": 5750 }, { "epoch": 0.16790750635017956, "grad_norm": 0.6510579427146844, "learning_rate": 1.849148418491484e-05, "loss": 0.6169, "step": 5751 }, { "epoch": 0.16793670257802693, "grad_norm": 0.6525116524307496, "learning_rate": 1.8490835360908353e-05, "loss": 0.6018, "step": 5752 }, { "epoch": 0.1679658988058743, "grad_norm": 0.6326977192110624, "learning_rate": 1.8490186536901866e-05, "loss": 0.5452, "step": 5753 }, { "epoch": 0.16799509503372165, "grad_norm": 0.6647385050187355, "learning_rate": 1.8489537712895378e-05, "loss": 0.6026, "step": 5754 }, { "epoch": 0.168024291261569, "grad_norm": 0.8154308445718562, "learning_rate": 1.848888888888889e-05, "loss": 0.7112, "step": 5755 }, { "epoch": 0.16805348748941637, "grad_norm": 0.6770548534426768, "learning_rate": 1.84882400648824e-05, "loss": 0.6333, "step": 5756 }, { "epoch": 0.16808268371726373, "grad_norm": 0.8022032714131853, "learning_rate": 1.8487591240875914e-05, "loss": 0.7191, "step": 5757 }, { "epoch": 0.1681118799451111, "grad_norm": 0.6789909091967875, "learning_rate": 1.8486942416869426e-05, "loss": 0.6442, "step": 5758 }, { "epoch": 0.16814107617295845, "grad_norm": 0.6494104517818547, "learning_rate": 1.8486293592862938e-05, "loss": 0.6377, "step": 5759 }, { "epoch": 0.16817027240080581, "grad_norm": 0.660589449719966, "learning_rate": 1.848564476885645e-05, "loss": 0.6153, "step": 5760 }, { "epoch": 0.16819946862865318, "grad_norm": 0.8650774546709006, "learning_rate": 1.8484995944849962e-05, "loss": 0.8177, "step": 5761 }, { "epoch": 0.16822866485650054, "grad_norm": 0.7470354302770855, "learning_rate": 1.8484347120843474e-05, "loss": 0.6715, "step": 5762 }, { "epoch": 0.1682578610843479, "grad_norm": 0.6686303179530926, "learning_rate": 1.8483698296836986e-05, "loss": 0.6642, "step": 5763 }, { "epoch": 0.16828705731219526, "grad_norm": 0.7039840684960564, "learning_rate": 1.8483049472830498e-05, "loss": 0.6924, "step": 5764 }, { "epoch": 0.16831625354004262, "grad_norm": 0.6728043502203164, "learning_rate": 1.848240064882401e-05, "loss": 0.6686, "step": 5765 }, { "epoch": 0.16834544976788998, "grad_norm": 0.6939902804637422, "learning_rate": 1.848175182481752e-05, "loss": 0.6543, "step": 5766 }, { "epoch": 0.16837464599573734, "grad_norm": 0.7919139853154369, "learning_rate": 1.848110300081103e-05, "loss": 0.7495, "step": 5767 }, { "epoch": 0.1684038422235847, "grad_norm": 0.7116193976610767, "learning_rate": 1.8480454176804543e-05, "loss": 0.6005, "step": 5768 }, { "epoch": 0.16843303845143207, "grad_norm": 0.7259410872622379, "learning_rate": 1.8479805352798055e-05, "loss": 0.6912, "step": 5769 }, { "epoch": 0.16846223467927943, "grad_norm": 0.6981227019250271, "learning_rate": 1.8479156528791567e-05, "loss": 0.6736, "step": 5770 }, { "epoch": 0.1684914309071268, "grad_norm": 0.7196938239934573, "learning_rate": 1.847850770478508e-05, "loss": 0.6384, "step": 5771 }, { "epoch": 0.16852062713497415, "grad_norm": 0.6925174572571269, "learning_rate": 1.847785888077859e-05, "loss": 0.5863, "step": 5772 }, { "epoch": 0.1685498233628215, "grad_norm": 0.7875839718867154, "learning_rate": 1.8477210056772103e-05, "loss": 0.6351, "step": 5773 }, { "epoch": 0.1685790195906689, "grad_norm": 0.6340254632688841, "learning_rate": 1.847656123276561e-05, "loss": 0.5404, "step": 5774 }, { "epoch": 0.16860821581851626, "grad_norm": 0.6621338220309472, "learning_rate": 1.8475912408759123e-05, "loss": 0.611, "step": 5775 }, { "epoch": 0.16863741204636362, "grad_norm": 0.7378562223418411, "learning_rate": 1.8475263584752635e-05, "loss": 0.6572, "step": 5776 }, { "epoch": 0.16866660827421098, "grad_norm": 0.6331988388836002, "learning_rate": 1.847461476074615e-05, "loss": 0.5673, "step": 5777 }, { "epoch": 0.16869580450205834, "grad_norm": 0.6791213433456926, "learning_rate": 1.8473965936739663e-05, "loss": 0.6264, "step": 5778 }, { "epoch": 0.1687250007299057, "grad_norm": 0.7853331115029663, "learning_rate": 1.8473317112733175e-05, "loss": 0.702, "step": 5779 }, { "epoch": 0.16875419695775307, "grad_norm": 0.6637087927018113, "learning_rate": 1.8472668288726683e-05, "loss": 0.6612, "step": 5780 }, { "epoch": 0.16878339318560043, "grad_norm": 0.6466977668284964, "learning_rate": 1.8472019464720195e-05, "loss": 0.5853, "step": 5781 }, { "epoch": 0.1688125894134478, "grad_norm": 0.6832259732696199, "learning_rate": 1.8471370640713708e-05, "loss": 0.6981, "step": 5782 }, { "epoch": 0.16884178564129515, "grad_norm": 0.7305101220461518, "learning_rate": 1.847072181670722e-05, "loss": 0.6936, "step": 5783 }, { "epoch": 0.1688709818691425, "grad_norm": 0.676502397817791, "learning_rate": 1.847007299270073e-05, "loss": 0.6236, "step": 5784 }, { "epoch": 0.16890017809698987, "grad_norm": 0.7082991505877243, "learning_rate": 1.8469424168694244e-05, "loss": 0.6017, "step": 5785 }, { "epoch": 0.16892937432483723, "grad_norm": 0.6796250189973518, "learning_rate": 1.8468775344687756e-05, "loss": 0.6389, "step": 5786 }, { "epoch": 0.1689585705526846, "grad_norm": 0.7132277986767389, "learning_rate": 1.8468126520681268e-05, "loss": 0.7028, "step": 5787 }, { "epoch": 0.16898776678053196, "grad_norm": 0.6557043884687009, "learning_rate": 1.846747769667478e-05, "loss": 0.5788, "step": 5788 }, { "epoch": 0.16901696300837932, "grad_norm": 0.7496104462971327, "learning_rate": 1.846682887266829e-05, "loss": 0.694, "step": 5789 }, { "epoch": 0.16904615923622668, "grad_norm": 0.7002595580868364, "learning_rate": 1.84661800486618e-05, "loss": 0.6623, "step": 5790 }, { "epoch": 0.16907535546407404, "grad_norm": 0.6842076353822483, "learning_rate": 1.8465531224655312e-05, "loss": 0.6776, "step": 5791 }, { "epoch": 0.1691045516919214, "grad_norm": 0.6804854472299601, "learning_rate": 1.8464882400648824e-05, "loss": 0.6475, "step": 5792 }, { "epoch": 0.16913374791976876, "grad_norm": 0.6756952833250829, "learning_rate": 1.8464233576642336e-05, "loss": 0.6505, "step": 5793 }, { "epoch": 0.16916294414761612, "grad_norm": 0.6559671999349382, "learning_rate": 1.846358475263585e-05, "loss": 0.5766, "step": 5794 }, { "epoch": 0.16919214037546348, "grad_norm": 0.678289222260026, "learning_rate": 1.846293592862936e-05, "loss": 0.6475, "step": 5795 }, { "epoch": 0.16922133660331085, "grad_norm": 0.7791540364013285, "learning_rate": 1.8462287104622873e-05, "loss": 0.7252, "step": 5796 }, { "epoch": 0.1692505328311582, "grad_norm": 0.7337975299673716, "learning_rate": 1.8461638280616385e-05, "loss": 0.7487, "step": 5797 }, { "epoch": 0.16927972905900557, "grad_norm": 0.7127259918495645, "learning_rate": 1.8460989456609897e-05, "loss": 0.6997, "step": 5798 }, { "epoch": 0.16930892528685293, "grad_norm": 0.7097129626944839, "learning_rate": 1.846034063260341e-05, "loss": 0.7207, "step": 5799 }, { "epoch": 0.1693381215147003, "grad_norm": 1.2054951035479977, "learning_rate": 1.845969180859692e-05, "loss": 0.6581, "step": 5800 }, { "epoch": 0.16936731774254765, "grad_norm": 0.7906448938027543, "learning_rate": 1.8459042984590433e-05, "loss": 0.7983, "step": 5801 }, { "epoch": 0.169396513970395, "grad_norm": 0.7559878148666128, "learning_rate": 1.8458394160583945e-05, "loss": 0.7164, "step": 5802 }, { "epoch": 0.16942571019824237, "grad_norm": 0.6512345420700101, "learning_rate": 1.8457745336577457e-05, "loss": 0.6298, "step": 5803 }, { "epoch": 0.16945490642608976, "grad_norm": 0.7379050741858747, "learning_rate": 1.8457096512570965e-05, "loss": 0.7347, "step": 5804 }, { "epoch": 0.16948410265393712, "grad_norm": 0.6814409940951918, "learning_rate": 1.8456447688564477e-05, "loss": 0.6466, "step": 5805 }, { "epoch": 0.16951329888178449, "grad_norm": 0.6950981455964791, "learning_rate": 1.845579886455799e-05, "loss": 0.6662, "step": 5806 }, { "epoch": 0.16954249510963185, "grad_norm": 0.8305998284655909, "learning_rate": 1.84551500405515e-05, "loss": 0.7922, "step": 5807 }, { "epoch": 0.1695716913374792, "grad_norm": 0.7484867858341152, "learning_rate": 1.8454501216545013e-05, "loss": 0.7499, "step": 5808 }, { "epoch": 0.16960088756532657, "grad_norm": 0.7499013343064224, "learning_rate": 1.8453852392538525e-05, "loss": 0.7091, "step": 5809 }, { "epoch": 0.16963008379317393, "grad_norm": 0.647330420214833, "learning_rate": 1.8453203568532038e-05, "loss": 0.5854, "step": 5810 }, { "epoch": 0.1696592800210213, "grad_norm": 0.6953862141736028, "learning_rate": 1.845255474452555e-05, "loss": 0.6391, "step": 5811 }, { "epoch": 0.16968847624886865, "grad_norm": 0.6539598554773368, "learning_rate": 1.8451905920519058e-05, "loss": 0.6245, "step": 5812 }, { "epoch": 0.169717672476716, "grad_norm": 0.7316775481368234, "learning_rate": 1.845125709651257e-05, "loss": 0.6984, "step": 5813 }, { "epoch": 0.16974686870456338, "grad_norm": 0.7951813268015145, "learning_rate": 1.8450608272506086e-05, "loss": 0.6878, "step": 5814 }, { "epoch": 0.16977606493241074, "grad_norm": 0.6917564634586943, "learning_rate": 1.8449959448499598e-05, "loss": 0.6462, "step": 5815 }, { "epoch": 0.1698052611602581, "grad_norm": 0.6920758510683347, "learning_rate": 1.844931062449311e-05, "loss": 0.5929, "step": 5816 }, { "epoch": 0.16983445738810546, "grad_norm": 0.7134444099124597, "learning_rate": 1.844866180048662e-05, "loss": 0.6369, "step": 5817 }, { "epoch": 0.16986365361595282, "grad_norm": 1.2020122480660562, "learning_rate": 1.844801297648013e-05, "loss": 0.7425, "step": 5818 }, { "epoch": 0.16989284984380018, "grad_norm": 0.7024785455314252, "learning_rate": 1.8447364152473642e-05, "loss": 0.6695, "step": 5819 }, { "epoch": 0.16992204607164754, "grad_norm": 0.6756962622093192, "learning_rate": 1.8446715328467154e-05, "loss": 0.6666, "step": 5820 }, { "epoch": 0.1699512422994949, "grad_norm": 0.8016894366587458, "learning_rate": 1.8446066504460666e-05, "loss": 0.7746, "step": 5821 }, { "epoch": 0.16998043852734226, "grad_norm": 0.6733180628358243, "learning_rate": 1.844541768045418e-05, "loss": 0.6815, "step": 5822 }, { "epoch": 0.17000963475518963, "grad_norm": 0.6564311426484423, "learning_rate": 1.844476885644769e-05, "loss": 0.6124, "step": 5823 }, { "epoch": 0.170038830983037, "grad_norm": 0.7490756384237139, "learning_rate": 1.8444120032441202e-05, "loss": 0.7667, "step": 5824 }, { "epoch": 0.17006802721088435, "grad_norm": 0.7506421081999805, "learning_rate": 1.8443471208434715e-05, "loss": 0.6852, "step": 5825 }, { "epoch": 0.1700972234387317, "grad_norm": 0.7760807017514483, "learning_rate": 1.8442822384428227e-05, "loss": 0.7239, "step": 5826 }, { "epoch": 0.17012641966657907, "grad_norm": 0.6970162186269624, "learning_rate": 1.8442173560421735e-05, "loss": 0.615, "step": 5827 }, { "epoch": 0.17015561589442643, "grad_norm": 0.7291791157952795, "learning_rate": 1.8441524736415247e-05, "loss": 0.6654, "step": 5828 }, { "epoch": 0.1701848121222738, "grad_norm": 0.6475651580694466, "learning_rate": 1.844087591240876e-05, "loss": 0.615, "step": 5829 }, { "epoch": 0.17021400835012115, "grad_norm": 0.8733080252418579, "learning_rate": 1.844022708840227e-05, "loss": 0.6936, "step": 5830 }, { "epoch": 0.17024320457796852, "grad_norm": 0.8758750875465446, "learning_rate": 1.8439578264395783e-05, "loss": 0.7268, "step": 5831 }, { "epoch": 0.17027240080581588, "grad_norm": 0.6736638070510292, "learning_rate": 1.8438929440389295e-05, "loss": 0.6662, "step": 5832 }, { "epoch": 0.17030159703366324, "grad_norm": 0.6779037812283221, "learning_rate": 1.8438280616382807e-05, "loss": 0.6531, "step": 5833 }, { "epoch": 0.1703307932615106, "grad_norm": 0.6106090296761056, "learning_rate": 1.843763179237632e-05, "loss": 0.5366, "step": 5834 }, { "epoch": 0.170359989489358, "grad_norm": 0.6529456402177288, "learning_rate": 1.843698296836983e-05, "loss": 0.5934, "step": 5835 }, { "epoch": 0.17038918571720535, "grad_norm": 0.6381998172407856, "learning_rate": 1.8436334144363343e-05, "loss": 0.6128, "step": 5836 }, { "epoch": 0.1704183819450527, "grad_norm": 0.6455274531811153, "learning_rate": 1.8435685320356855e-05, "loss": 0.608, "step": 5837 }, { "epoch": 0.17044757817290007, "grad_norm": 0.7596608009405664, "learning_rate": 1.8435036496350367e-05, "loss": 0.6124, "step": 5838 }, { "epoch": 0.17047677440074743, "grad_norm": 0.7443916252011088, "learning_rate": 1.843438767234388e-05, "loss": 0.695, "step": 5839 }, { "epoch": 0.1705059706285948, "grad_norm": 0.6812717300911842, "learning_rate": 1.843373884833739e-05, "loss": 0.6006, "step": 5840 }, { "epoch": 0.17053516685644216, "grad_norm": 1.2883876286951554, "learning_rate": 1.8433090024330904e-05, "loss": 0.6749, "step": 5841 }, { "epoch": 0.17056436308428952, "grad_norm": 0.7142331528757059, "learning_rate": 1.8432441200324412e-05, "loss": 0.6641, "step": 5842 }, { "epoch": 0.17059355931213688, "grad_norm": 0.7133553864282495, "learning_rate": 1.8431792376317924e-05, "loss": 0.7002, "step": 5843 }, { "epoch": 0.17062275553998424, "grad_norm": 0.7214250788836554, "learning_rate": 1.8431143552311436e-05, "loss": 0.6625, "step": 5844 }, { "epoch": 0.1706519517678316, "grad_norm": 0.7041839619653988, "learning_rate": 1.8430494728304948e-05, "loss": 0.7077, "step": 5845 }, { "epoch": 0.17068114799567896, "grad_norm": 0.6775472065244501, "learning_rate": 1.842984590429846e-05, "loss": 0.6109, "step": 5846 }, { "epoch": 0.17071034422352632, "grad_norm": 0.7346695706920313, "learning_rate": 1.8429197080291972e-05, "loss": 0.7399, "step": 5847 }, { "epoch": 0.17073954045137368, "grad_norm": 0.7127465730913789, "learning_rate": 1.8428548256285484e-05, "loss": 0.6335, "step": 5848 }, { "epoch": 0.17076873667922104, "grad_norm": 0.7753561665566029, "learning_rate": 1.8427899432278996e-05, "loss": 0.7127, "step": 5849 }, { "epoch": 0.1707979329070684, "grad_norm": 0.741795794941472, "learning_rate": 1.8427250608272505e-05, "loss": 0.7504, "step": 5850 }, { "epoch": 0.17082712913491577, "grad_norm": 0.6778706190806774, "learning_rate": 1.8426601784266017e-05, "loss": 0.6333, "step": 5851 }, { "epoch": 0.17085632536276313, "grad_norm": 0.6627554817762886, "learning_rate": 1.8425952960259532e-05, "loss": 0.6638, "step": 5852 }, { "epoch": 0.1708855215906105, "grad_norm": 0.7622506068573364, "learning_rate": 1.8425304136253045e-05, "loss": 0.7157, "step": 5853 }, { "epoch": 0.17091471781845785, "grad_norm": 0.6578931296451289, "learning_rate": 1.8424655312246557e-05, "loss": 0.5614, "step": 5854 }, { "epoch": 0.1709439140463052, "grad_norm": 0.6656836431281198, "learning_rate": 1.842400648824007e-05, "loss": 0.5807, "step": 5855 }, { "epoch": 0.17097311027415257, "grad_norm": 0.6676573015911318, "learning_rate": 1.8423357664233577e-05, "loss": 0.5782, "step": 5856 }, { "epoch": 0.17100230650199993, "grad_norm": 0.6577130294929003, "learning_rate": 1.842270884022709e-05, "loss": 0.6137, "step": 5857 }, { "epoch": 0.1710315027298473, "grad_norm": 0.694831624993132, "learning_rate": 1.84220600162206e-05, "loss": 0.6604, "step": 5858 }, { "epoch": 0.17106069895769466, "grad_norm": 0.7111916279791658, "learning_rate": 1.8421411192214113e-05, "loss": 0.6888, "step": 5859 }, { "epoch": 0.17108989518554202, "grad_norm": 0.7549477970914641, "learning_rate": 1.8420762368207625e-05, "loss": 0.6806, "step": 5860 }, { "epoch": 0.17111909141338938, "grad_norm": 0.7565088433915264, "learning_rate": 1.8420113544201137e-05, "loss": 0.7702, "step": 5861 }, { "epoch": 0.17114828764123674, "grad_norm": 0.6492369768462087, "learning_rate": 1.841946472019465e-05, "loss": 0.6116, "step": 5862 }, { "epoch": 0.1711774838690841, "grad_norm": 0.7014927011454017, "learning_rate": 1.841881589618816e-05, "loss": 0.6537, "step": 5863 }, { "epoch": 0.17120668009693146, "grad_norm": 0.8830044098858942, "learning_rate": 1.8418167072181673e-05, "loss": 0.7306, "step": 5864 }, { "epoch": 0.17123587632477885, "grad_norm": 0.6876337300175456, "learning_rate": 1.8417518248175182e-05, "loss": 0.6703, "step": 5865 }, { "epoch": 0.1712650725526262, "grad_norm": 0.6396174406190579, "learning_rate": 1.8416869424168694e-05, "loss": 0.6203, "step": 5866 }, { "epoch": 0.17129426878047357, "grad_norm": 0.8875848216215413, "learning_rate": 1.8416220600162206e-05, "loss": 0.618, "step": 5867 }, { "epoch": 0.17132346500832094, "grad_norm": 0.6895918152923881, "learning_rate": 1.8415571776155718e-05, "loss": 0.618, "step": 5868 }, { "epoch": 0.1713526612361683, "grad_norm": 0.7401269161410203, "learning_rate": 1.841492295214923e-05, "loss": 0.6865, "step": 5869 }, { "epoch": 0.17138185746401566, "grad_norm": 0.7762409136854342, "learning_rate": 1.8414274128142742e-05, "loss": 0.6364, "step": 5870 }, { "epoch": 0.17141105369186302, "grad_norm": 0.7294626613473117, "learning_rate": 1.8413625304136254e-05, "loss": 0.6456, "step": 5871 }, { "epoch": 0.17144024991971038, "grad_norm": 0.6827900848100455, "learning_rate": 1.8412976480129766e-05, "loss": 0.684, "step": 5872 }, { "epoch": 0.17146944614755774, "grad_norm": 0.7291127742623712, "learning_rate": 1.8412327656123278e-05, "loss": 0.6924, "step": 5873 }, { "epoch": 0.1714986423754051, "grad_norm": 0.7197808880005393, "learning_rate": 1.841167883211679e-05, "loss": 0.6717, "step": 5874 }, { "epoch": 0.17152783860325246, "grad_norm": 0.6910386117828698, "learning_rate": 1.8411030008110302e-05, "loss": 0.6728, "step": 5875 }, { "epoch": 0.17155703483109982, "grad_norm": 0.821924057834831, "learning_rate": 1.8410381184103814e-05, "loss": 0.7351, "step": 5876 }, { "epoch": 0.1715862310589472, "grad_norm": 0.7495587116235591, "learning_rate": 1.8409732360097326e-05, "loss": 0.6974, "step": 5877 }, { "epoch": 0.17161542728679455, "grad_norm": 0.772809254247765, "learning_rate": 1.840908353609084e-05, "loss": 0.7501, "step": 5878 }, { "epoch": 0.1716446235146419, "grad_norm": 0.6623800049546807, "learning_rate": 1.8408434712084347e-05, "loss": 0.6314, "step": 5879 }, { "epoch": 0.17167381974248927, "grad_norm": 0.7142968213252275, "learning_rate": 1.840778588807786e-05, "loss": 0.7884, "step": 5880 }, { "epoch": 0.17170301597033663, "grad_norm": 0.7166257947644216, "learning_rate": 1.840713706407137e-05, "loss": 0.6709, "step": 5881 }, { "epoch": 0.171732212198184, "grad_norm": 0.6699107659409859, "learning_rate": 1.8406488240064883e-05, "loss": 0.6615, "step": 5882 }, { "epoch": 0.17176140842603135, "grad_norm": 0.6777940007590509, "learning_rate": 1.8405839416058395e-05, "loss": 0.6435, "step": 5883 }, { "epoch": 0.17179060465387871, "grad_norm": 0.7008712212041429, "learning_rate": 1.8405190592051907e-05, "loss": 0.6749, "step": 5884 }, { "epoch": 0.17181980088172608, "grad_norm": 0.6570146564284983, "learning_rate": 1.840454176804542e-05, "loss": 0.6171, "step": 5885 }, { "epoch": 0.17184899710957344, "grad_norm": 0.6426820527294138, "learning_rate": 1.840389294403893e-05, "loss": 0.6299, "step": 5886 }, { "epoch": 0.1718781933374208, "grad_norm": 0.6875241622784032, "learning_rate": 1.8403244120032443e-05, "loss": 0.6565, "step": 5887 }, { "epoch": 0.17190738956526816, "grad_norm": 0.7541601913156897, "learning_rate": 1.8402595296025952e-05, "loss": 0.6769, "step": 5888 }, { "epoch": 0.17193658579311552, "grad_norm": 0.6805568297721017, "learning_rate": 1.8401946472019464e-05, "loss": 0.6212, "step": 5889 }, { "epoch": 0.17196578202096288, "grad_norm": 0.6699983029406754, "learning_rate": 1.840129764801298e-05, "loss": 0.6073, "step": 5890 }, { "epoch": 0.17199497824881024, "grad_norm": 0.6785195472738732, "learning_rate": 1.840064882400649e-05, "loss": 0.6543, "step": 5891 }, { "epoch": 0.1720241744766576, "grad_norm": 0.6830023453827078, "learning_rate": 1.8400000000000003e-05, "loss": 0.653, "step": 5892 }, { "epoch": 0.17205337070450497, "grad_norm": 0.6819309615379915, "learning_rate": 1.8399351175993515e-05, "loss": 0.6549, "step": 5893 }, { "epoch": 0.17208256693235233, "grad_norm": 0.7010260684434965, "learning_rate": 1.8398702351987024e-05, "loss": 0.6428, "step": 5894 }, { "epoch": 0.17211176316019972, "grad_norm": 0.8357708842241568, "learning_rate": 1.8398053527980536e-05, "loss": 0.6347, "step": 5895 }, { "epoch": 0.17214095938804708, "grad_norm": 0.6808401569862199, "learning_rate": 1.8397404703974048e-05, "loss": 0.6319, "step": 5896 }, { "epoch": 0.17217015561589444, "grad_norm": 0.7003087626263167, "learning_rate": 1.839675587996756e-05, "loss": 0.7081, "step": 5897 }, { "epoch": 0.1721993518437418, "grad_norm": 0.6865104266380289, "learning_rate": 1.8396107055961072e-05, "loss": 0.6344, "step": 5898 }, { "epoch": 0.17222854807158916, "grad_norm": 0.6647583121388878, "learning_rate": 1.8395458231954584e-05, "loss": 0.5977, "step": 5899 }, { "epoch": 0.17225774429943652, "grad_norm": 0.7508417012726992, "learning_rate": 1.8394809407948096e-05, "loss": 0.7557, "step": 5900 }, { "epoch": 0.17228694052728388, "grad_norm": 0.696418947848033, "learning_rate": 1.8394160583941608e-05, "loss": 0.6186, "step": 5901 }, { "epoch": 0.17231613675513124, "grad_norm": 0.6983932320590681, "learning_rate": 1.839351175993512e-05, "loss": 0.6606, "step": 5902 }, { "epoch": 0.1723453329829786, "grad_norm": 0.6682278266409146, "learning_rate": 1.839286293592863e-05, "loss": 0.6038, "step": 5903 }, { "epoch": 0.17237452921082597, "grad_norm": 0.6692430956507438, "learning_rate": 1.839221411192214e-05, "loss": 0.644, "step": 5904 }, { "epoch": 0.17240372543867333, "grad_norm": 0.6836365126350866, "learning_rate": 1.8391565287915653e-05, "loss": 0.5991, "step": 5905 }, { "epoch": 0.1724329216665207, "grad_norm": 0.6512048963194014, "learning_rate": 1.8390916463909165e-05, "loss": 0.635, "step": 5906 }, { "epoch": 0.17246211789436805, "grad_norm": 0.7163395283695894, "learning_rate": 1.8390267639902677e-05, "loss": 0.7162, "step": 5907 }, { "epoch": 0.1724913141222154, "grad_norm": 0.6694740806328207, "learning_rate": 1.838961881589619e-05, "loss": 0.6353, "step": 5908 }, { "epoch": 0.17252051035006277, "grad_norm": 0.6613681714898506, "learning_rate": 1.83889699918897e-05, "loss": 0.6153, "step": 5909 }, { "epoch": 0.17254970657791013, "grad_norm": 0.6299341750446535, "learning_rate": 1.8388321167883213e-05, "loss": 0.5773, "step": 5910 }, { "epoch": 0.1725789028057575, "grad_norm": 0.6952008532940959, "learning_rate": 1.8387672343876725e-05, "loss": 0.6852, "step": 5911 }, { "epoch": 0.17260809903360486, "grad_norm": 0.7127975654671116, "learning_rate": 1.8387023519870237e-05, "loss": 0.6445, "step": 5912 }, { "epoch": 0.17263729526145222, "grad_norm": 0.7001817641149508, "learning_rate": 1.838637469586375e-05, "loss": 0.5907, "step": 5913 }, { "epoch": 0.17266649148929958, "grad_norm": 0.6421301542315409, "learning_rate": 1.838572587185726e-05, "loss": 0.5667, "step": 5914 }, { "epoch": 0.17269568771714694, "grad_norm": 0.6721079725253407, "learning_rate": 1.8385077047850773e-05, "loss": 0.6123, "step": 5915 }, { "epoch": 0.1727248839449943, "grad_norm": 0.723050686416467, "learning_rate": 1.8384428223844285e-05, "loss": 0.6569, "step": 5916 }, { "epoch": 0.17275408017284166, "grad_norm": 0.7567268245100272, "learning_rate": 1.8383779399837794e-05, "loss": 0.7628, "step": 5917 }, { "epoch": 0.17278327640068902, "grad_norm": 0.6988665597685835, "learning_rate": 1.8383130575831306e-05, "loss": 0.7035, "step": 5918 }, { "epoch": 0.17281247262853638, "grad_norm": 0.6714932098023031, "learning_rate": 1.8382481751824818e-05, "loss": 0.6401, "step": 5919 }, { "epoch": 0.17284166885638375, "grad_norm": 0.7090760942894233, "learning_rate": 1.838183292781833e-05, "loss": 0.652, "step": 5920 }, { "epoch": 0.1728708650842311, "grad_norm": 0.8448618460562126, "learning_rate": 1.8381184103811842e-05, "loss": 0.7086, "step": 5921 }, { "epoch": 0.17290006131207847, "grad_norm": 0.7290326922350722, "learning_rate": 1.8380535279805354e-05, "loss": 0.6744, "step": 5922 }, { "epoch": 0.17292925753992583, "grad_norm": 0.6544860955263855, "learning_rate": 1.8379886455798866e-05, "loss": 0.6198, "step": 5923 }, { "epoch": 0.1729584537677732, "grad_norm": 0.7224895002335456, "learning_rate": 1.8379237631792378e-05, "loss": 0.6324, "step": 5924 }, { "epoch": 0.17298764999562058, "grad_norm": 0.6927713439501643, "learning_rate": 1.837858880778589e-05, "loss": 0.6589, "step": 5925 }, { "epoch": 0.17301684622346794, "grad_norm": 0.7596390291473434, "learning_rate": 1.83779399837794e-05, "loss": 0.7875, "step": 5926 }, { "epoch": 0.1730460424513153, "grad_norm": 0.7430789458925369, "learning_rate": 1.837729115977291e-05, "loss": 0.6928, "step": 5927 }, { "epoch": 0.17307523867916266, "grad_norm": 0.6769980519510878, "learning_rate": 1.8376642335766426e-05, "loss": 0.6818, "step": 5928 }, { "epoch": 0.17310443490701002, "grad_norm": 0.6320634971578798, "learning_rate": 1.8375993511759938e-05, "loss": 0.6405, "step": 5929 }, { "epoch": 0.17313363113485739, "grad_norm": 0.721372202364155, "learning_rate": 1.837534468775345e-05, "loss": 0.6978, "step": 5930 }, { "epoch": 0.17316282736270475, "grad_norm": 0.6774009893989356, "learning_rate": 1.8374695863746962e-05, "loss": 0.6832, "step": 5931 }, { "epoch": 0.1731920235905521, "grad_norm": 0.674199406981109, "learning_rate": 1.837404703974047e-05, "loss": 0.6371, "step": 5932 }, { "epoch": 0.17322121981839947, "grad_norm": 0.7542687384441579, "learning_rate": 1.8373398215733983e-05, "loss": 0.7615, "step": 5933 }, { "epoch": 0.17325041604624683, "grad_norm": 0.6816036841337831, "learning_rate": 1.8372749391727495e-05, "loss": 0.6472, "step": 5934 }, { "epoch": 0.1732796122740942, "grad_norm": 1.0690069991933613, "learning_rate": 1.8372100567721007e-05, "loss": 0.6574, "step": 5935 }, { "epoch": 0.17330880850194155, "grad_norm": 0.6748108422107582, "learning_rate": 1.837145174371452e-05, "loss": 0.663, "step": 5936 }, { "epoch": 0.1733380047297889, "grad_norm": 0.6820003371545866, "learning_rate": 1.837080291970803e-05, "loss": 0.6836, "step": 5937 }, { "epoch": 0.17336720095763627, "grad_norm": 0.6646981121524868, "learning_rate": 1.8370154095701543e-05, "loss": 0.5975, "step": 5938 }, { "epoch": 0.17339639718548364, "grad_norm": 0.6910901238942028, "learning_rate": 1.8369505271695055e-05, "loss": 0.6452, "step": 5939 }, { "epoch": 0.173425593413331, "grad_norm": 0.7430810748590992, "learning_rate": 1.8368856447688567e-05, "loss": 0.6687, "step": 5940 }, { "epoch": 0.17345478964117836, "grad_norm": 0.7234817645792635, "learning_rate": 1.8368207623682076e-05, "loss": 0.723, "step": 5941 }, { "epoch": 0.17348398586902572, "grad_norm": 0.8387664747816063, "learning_rate": 1.8367558799675588e-05, "loss": 0.6873, "step": 5942 }, { "epoch": 0.17351318209687308, "grad_norm": 0.6625204966019028, "learning_rate": 1.83669099756691e-05, "loss": 0.6379, "step": 5943 }, { "epoch": 0.17354237832472044, "grad_norm": 0.6731929160934216, "learning_rate": 1.8366261151662612e-05, "loss": 0.6412, "step": 5944 }, { "epoch": 0.1735715745525678, "grad_norm": 0.7440772755081522, "learning_rate": 1.8365612327656124e-05, "loss": 0.6702, "step": 5945 }, { "epoch": 0.17360077078041516, "grad_norm": 0.6893074190616492, "learning_rate": 1.836496350364964e-05, "loss": 0.6975, "step": 5946 }, { "epoch": 0.17362996700826253, "grad_norm": 0.7012184959311049, "learning_rate": 1.8364314679643148e-05, "loss": 0.668, "step": 5947 }, { "epoch": 0.1736591632361099, "grad_norm": 0.6591462092224945, "learning_rate": 1.836366585563666e-05, "loss": 0.6225, "step": 5948 }, { "epoch": 0.17368835946395725, "grad_norm": 0.691194581613079, "learning_rate": 1.8363017031630172e-05, "loss": 0.6795, "step": 5949 }, { "epoch": 0.1737175556918046, "grad_norm": 0.7606134460236451, "learning_rate": 1.8362368207623684e-05, "loss": 0.7507, "step": 5950 }, { "epoch": 0.17374675191965197, "grad_norm": 0.8218121567319079, "learning_rate": 1.8361719383617196e-05, "loss": 0.6471, "step": 5951 }, { "epoch": 0.17377594814749933, "grad_norm": 0.6481558371866409, "learning_rate": 1.8361070559610708e-05, "loss": 0.5727, "step": 5952 }, { "epoch": 0.1738051443753467, "grad_norm": 0.771264974264783, "learning_rate": 1.836042173560422e-05, "loss": 0.8264, "step": 5953 }, { "epoch": 0.17383434060319405, "grad_norm": 0.6833997408814094, "learning_rate": 1.8359772911597732e-05, "loss": 0.6586, "step": 5954 }, { "epoch": 0.17386353683104144, "grad_norm": 0.6890244178927419, "learning_rate": 1.835912408759124e-05, "loss": 0.6232, "step": 5955 }, { "epoch": 0.1738927330588888, "grad_norm": 0.8100975161887733, "learning_rate": 1.8358475263584753e-05, "loss": 0.6968, "step": 5956 }, { "epoch": 0.17392192928673617, "grad_norm": 0.693738004623673, "learning_rate": 1.8357826439578265e-05, "loss": 0.6121, "step": 5957 }, { "epoch": 0.17395112551458353, "grad_norm": 0.795987377164519, "learning_rate": 1.8357177615571777e-05, "loss": 0.7474, "step": 5958 }, { "epoch": 0.1739803217424309, "grad_norm": 0.7014877173008259, "learning_rate": 1.835652879156529e-05, "loss": 0.654, "step": 5959 }, { "epoch": 0.17400951797027825, "grad_norm": 0.7918525090368883, "learning_rate": 1.83558799675588e-05, "loss": 0.7318, "step": 5960 }, { "epoch": 0.1740387141981256, "grad_norm": 0.7517087335705945, "learning_rate": 1.8355231143552313e-05, "loss": 0.665, "step": 5961 }, { "epoch": 0.17406791042597297, "grad_norm": 0.7101013668186512, "learning_rate": 1.8354582319545825e-05, "loss": 0.7069, "step": 5962 }, { "epoch": 0.17409710665382033, "grad_norm": 0.655824962075249, "learning_rate": 1.8353933495539337e-05, "loss": 0.6079, "step": 5963 }, { "epoch": 0.1741263028816677, "grad_norm": 0.6872114353200716, "learning_rate": 1.8353284671532846e-05, "loss": 0.6894, "step": 5964 }, { "epoch": 0.17415549910951506, "grad_norm": 0.6207848326103257, "learning_rate": 1.835263584752636e-05, "loss": 0.5466, "step": 5965 }, { "epoch": 0.17418469533736242, "grad_norm": 0.7255833680041816, "learning_rate": 1.8351987023519873e-05, "loss": 0.7448, "step": 5966 }, { "epoch": 0.17421389156520978, "grad_norm": 0.8815681730785574, "learning_rate": 1.8351338199513385e-05, "loss": 0.7099, "step": 5967 }, { "epoch": 0.17424308779305714, "grad_norm": 0.6531511373290176, "learning_rate": 1.8350689375506897e-05, "loss": 0.6021, "step": 5968 }, { "epoch": 0.1742722840209045, "grad_norm": 0.6567075464281014, "learning_rate": 1.835004055150041e-05, "loss": 0.5746, "step": 5969 }, { "epoch": 0.17430148024875186, "grad_norm": 0.6802991605664405, "learning_rate": 1.8349391727493918e-05, "loss": 0.5913, "step": 5970 }, { "epoch": 0.17433067647659922, "grad_norm": 0.778126632375884, "learning_rate": 1.834874290348743e-05, "loss": 0.7031, "step": 5971 }, { "epoch": 0.17435987270444658, "grad_norm": 0.673463354158396, "learning_rate": 1.8348094079480942e-05, "loss": 0.6447, "step": 5972 }, { "epoch": 0.17438906893229394, "grad_norm": 0.7005337608056104, "learning_rate": 1.8347445255474454e-05, "loss": 0.6388, "step": 5973 }, { "epoch": 0.1744182651601413, "grad_norm": 0.6889948606982997, "learning_rate": 1.8346796431467966e-05, "loss": 0.6981, "step": 5974 }, { "epoch": 0.17444746138798867, "grad_norm": 0.677024449533385, "learning_rate": 1.8346147607461478e-05, "loss": 0.6111, "step": 5975 }, { "epoch": 0.17447665761583603, "grad_norm": 0.7560755309813231, "learning_rate": 1.834549878345499e-05, "loss": 0.758, "step": 5976 }, { "epoch": 0.1745058538436834, "grad_norm": 0.6285961997384176, "learning_rate": 1.8344849959448502e-05, "loss": 0.5822, "step": 5977 }, { "epoch": 0.17453505007153075, "grad_norm": 0.641807162935315, "learning_rate": 1.8344201135442014e-05, "loss": 0.5249, "step": 5978 }, { "epoch": 0.1745642462993781, "grad_norm": 0.676734687040307, "learning_rate": 1.8343552311435523e-05, "loss": 0.6076, "step": 5979 }, { "epoch": 0.17459344252722547, "grad_norm": 0.6681841359017878, "learning_rate": 1.8342903487429035e-05, "loss": 0.5878, "step": 5980 }, { "epoch": 0.17462263875507283, "grad_norm": 0.7068578102721959, "learning_rate": 1.8342254663422547e-05, "loss": 0.6036, "step": 5981 }, { "epoch": 0.1746518349829202, "grad_norm": 0.669051203228254, "learning_rate": 1.834160583941606e-05, "loss": 0.6289, "step": 5982 }, { "epoch": 0.17468103121076756, "grad_norm": 0.6480319882471236, "learning_rate": 1.834095701540957e-05, "loss": 0.593, "step": 5983 }, { "epoch": 0.17471022743861492, "grad_norm": 0.6764217005773874, "learning_rate": 1.8340308191403086e-05, "loss": 0.602, "step": 5984 }, { "epoch": 0.1747394236664623, "grad_norm": 0.7473255268695647, "learning_rate": 1.8339659367396595e-05, "loss": 0.7131, "step": 5985 }, { "epoch": 0.17476861989430967, "grad_norm": 0.6870120203181778, "learning_rate": 1.8339010543390107e-05, "loss": 0.6363, "step": 5986 }, { "epoch": 0.17479781612215703, "grad_norm": 0.7290244017334433, "learning_rate": 1.833836171938362e-05, "loss": 0.688, "step": 5987 }, { "epoch": 0.1748270123500044, "grad_norm": 0.697911043167265, "learning_rate": 1.833771289537713e-05, "loss": 0.6598, "step": 5988 }, { "epoch": 0.17485620857785175, "grad_norm": 0.6974149528784839, "learning_rate": 1.8337064071370643e-05, "loss": 0.6668, "step": 5989 }, { "epoch": 0.1748854048056991, "grad_norm": 0.7787522644137798, "learning_rate": 1.8336415247364155e-05, "loss": 0.7309, "step": 5990 }, { "epoch": 0.17491460103354647, "grad_norm": 0.7371428091904196, "learning_rate": 1.8335766423357667e-05, "loss": 0.709, "step": 5991 }, { "epoch": 0.17494379726139384, "grad_norm": 0.7014928267773775, "learning_rate": 1.833511759935118e-05, "loss": 0.727, "step": 5992 }, { "epoch": 0.1749729934892412, "grad_norm": 0.6826083228838462, "learning_rate": 1.8334468775344688e-05, "loss": 0.623, "step": 5993 }, { "epoch": 0.17500218971708856, "grad_norm": 0.6927494214314961, "learning_rate": 1.83338199513382e-05, "loss": 0.6282, "step": 5994 }, { "epoch": 0.17503138594493592, "grad_norm": 0.6468327181683962, "learning_rate": 1.8333171127331712e-05, "loss": 0.6089, "step": 5995 }, { "epoch": 0.17506058217278328, "grad_norm": 0.7166996635869345, "learning_rate": 1.8332522303325224e-05, "loss": 0.6641, "step": 5996 }, { "epoch": 0.17508977840063064, "grad_norm": 0.7157029895365513, "learning_rate": 1.8331873479318736e-05, "loss": 0.6874, "step": 5997 }, { "epoch": 0.175118974628478, "grad_norm": 0.7361051945995718, "learning_rate": 1.8331224655312248e-05, "loss": 0.706, "step": 5998 }, { "epoch": 0.17514817085632536, "grad_norm": 0.6780266621102201, "learning_rate": 1.833057583130576e-05, "loss": 0.6065, "step": 5999 }, { "epoch": 0.17517736708417272, "grad_norm": 0.7336861143386927, "learning_rate": 1.8329927007299272e-05, "loss": 0.7299, "step": 6000 }, { "epoch": 0.17520656331202009, "grad_norm": 0.680861720321108, "learning_rate": 1.8329278183292784e-05, "loss": 0.5942, "step": 6001 }, { "epoch": 0.17523575953986745, "grad_norm": 0.7037923068673593, "learning_rate": 1.8328629359286293e-05, "loss": 0.6739, "step": 6002 }, { "epoch": 0.1752649557677148, "grad_norm": 0.6438596014004396, "learning_rate": 1.8327980535279808e-05, "loss": 0.574, "step": 6003 }, { "epoch": 0.17529415199556217, "grad_norm": 0.6792233167239868, "learning_rate": 1.832733171127332e-05, "loss": 0.6044, "step": 6004 }, { "epoch": 0.17532334822340953, "grad_norm": 0.8656816194875295, "learning_rate": 1.8326682887266832e-05, "loss": 0.6719, "step": 6005 }, { "epoch": 0.1753525444512569, "grad_norm": 0.6671627280198734, "learning_rate": 1.8326034063260344e-05, "loss": 0.6502, "step": 6006 }, { "epoch": 0.17538174067910425, "grad_norm": 0.6017379371829709, "learning_rate": 1.8325385239253856e-05, "loss": 0.5186, "step": 6007 }, { "epoch": 0.17541093690695161, "grad_norm": 0.6484406737229683, "learning_rate": 1.8324736415247365e-05, "loss": 0.5663, "step": 6008 }, { "epoch": 0.17544013313479898, "grad_norm": 0.8115007393016191, "learning_rate": 1.8324087591240877e-05, "loss": 0.7623, "step": 6009 }, { "epoch": 0.17546932936264634, "grad_norm": 0.7134149767952029, "learning_rate": 1.832343876723439e-05, "loss": 0.6396, "step": 6010 }, { "epoch": 0.1754985255904937, "grad_norm": 0.6918992167551257, "learning_rate": 1.83227899432279e-05, "loss": 0.6914, "step": 6011 }, { "epoch": 0.17552772181834106, "grad_norm": 0.7203736795168807, "learning_rate": 1.8322141119221413e-05, "loss": 0.6801, "step": 6012 }, { "epoch": 0.17555691804618842, "grad_norm": 0.7057511185811862, "learning_rate": 1.8321492295214925e-05, "loss": 0.6723, "step": 6013 }, { "epoch": 0.17558611427403578, "grad_norm": 0.7054548074273572, "learning_rate": 1.8320843471208437e-05, "loss": 0.6652, "step": 6014 }, { "epoch": 0.17561531050188314, "grad_norm": 0.6791340489950165, "learning_rate": 1.832019464720195e-05, "loss": 0.658, "step": 6015 }, { "epoch": 0.17564450672973053, "grad_norm": 0.6737392560030272, "learning_rate": 1.831954582319546e-05, "loss": 0.6517, "step": 6016 }, { "epoch": 0.1756737029575779, "grad_norm": 0.6493017975059746, "learning_rate": 1.831889699918897e-05, "loss": 0.6062, "step": 6017 }, { "epoch": 0.17570289918542525, "grad_norm": 0.7265677516027673, "learning_rate": 1.831824817518248e-05, "loss": 0.7361, "step": 6018 }, { "epoch": 0.17573209541327262, "grad_norm": 0.6859913288777164, "learning_rate": 1.8317599351175994e-05, "loss": 0.728, "step": 6019 }, { "epoch": 0.17576129164111998, "grad_norm": 0.6006195991834093, "learning_rate": 1.8316950527169506e-05, "loss": 0.5184, "step": 6020 }, { "epoch": 0.17579048786896734, "grad_norm": 0.7049957357069785, "learning_rate": 1.8316301703163018e-05, "loss": 0.703, "step": 6021 }, { "epoch": 0.1758196840968147, "grad_norm": 0.6755611624217711, "learning_rate": 1.831565287915653e-05, "loss": 0.6772, "step": 6022 }, { "epoch": 0.17584888032466206, "grad_norm": 0.6539622021615017, "learning_rate": 1.8315004055150042e-05, "loss": 0.6082, "step": 6023 }, { "epoch": 0.17587807655250942, "grad_norm": 0.653133087698645, "learning_rate": 1.8314355231143554e-05, "loss": 0.6076, "step": 6024 }, { "epoch": 0.17590727278035678, "grad_norm": 0.675712859245378, "learning_rate": 1.8313706407137066e-05, "loss": 0.6384, "step": 6025 }, { "epoch": 0.17593646900820414, "grad_norm": 0.7303799801627355, "learning_rate": 1.8313057583130578e-05, "loss": 0.7526, "step": 6026 }, { "epoch": 0.1759656652360515, "grad_norm": 1.2310788472349499, "learning_rate": 1.831240875912409e-05, "loss": 0.8057, "step": 6027 }, { "epoch": 0.17599486146389887, "grad_norm": 0.6866518625186803, "learning_rate": 1.8311759935117602e-05, "loss": 0.6322, "step": 6028 }, { "epoch": 0.17602405769174623, "grad_norm": 0.6596805105953529, "learning_rate": 1.8311111111111114e-05, "loss": 0.6456, "step": 6029 }, { "epoch": 0.1760532539195936, "grad_norm": 0.6288099357986309, "learning_rate": 1.8310462287104626e-05, "loss": 0.5696, "step": 6030 }, { "epoch": 0.17608245014744095, "grad_norm": 0.696775367314422, "learning_rate": 1.8309813463098135e-05, "loss": 0.6388, "step": 6031 }, { "epoch": 0.1761116463752883, "grad_norm": 0.6918830053283389, "learning_rate": 1.8309164639091647e-05, "loss": 0.6597, "step": 6032 }, { "epoch": 0.17614084260313567, "grad_norm": 0.6766189035503234, "learning_rate": 1.830851581508516e-05, "loss": 0.6663, "step": 6033 }, { "epoch": 0.17617003883098303, "grad_norm": 0.683621633948792, "learning_rate": 1.830786699107867e-05, "loss": 0.6405, "step": 6034 }, { "epoch": 0.1761992350588304, "grad_norm": 0.7516229825059647, "learning_rate": 1.8307218167072183e-05, "loss": 0.7701, "step": 6035 }, { "epoch": 0.17622843128667776, "grad_norm": 0.6614483313324521, "learning_rate": 1.8306569343065695e-05, "loss": 0.655, "step": 6036 }, { "epoch": 0.17625762751452512, "grad_norm": 0.6792278943685855, "learning_rate": 1.8305920519059207e-05, "loss": 0.66, "step": 6037 }, { "epoch": 0.17628682374237248, "grad_norm": 0.7792713507864197, "learning_rate": 1.830527169505272e-05, "loss": 0.8255, "step": 6038 }, { "epoch": 0.17631601997021984, "grad_norm": 0.6664040258391952, "learning_rate": 1.830462287104623e-05, "loss": 0.6056, "step": 6039 }, { "epoch": 0.1763452161980672, "grad_norm": 0.7021592317557066, "learning_rate": 1.830397404703974e-05, "loss": 0.6501, "step": 6040 }, { "epoch": 0.17637441242591456, "grad_norm": 0.7788759189450895, "learning_rate": 1.8303325223033255e-05, "loss": 0.7063, "step": 6041 }, { "epoch": 0.17640360865376192, "grad_norm": 0.7418637738277355, "learning_rate": 1.8302676399026767e-05, "loss": 0.7878, "step": 6042 }, { "epoch": 0.17643280488160928, "grad_norm": 0.7486008602220169, "learning_rate": 1.830202757502028e-05, "loss": 0.6506, "step": 6043 }, { "epoch": 0.17646200110945665, "grad_norm": 0.7972117650194036, "learning_rate": 1.830137875101379e-05, "loss": 0.7878, "step": 6044 }, { "epoch": 0.176491197337304, "grad_norm": 0.7300109706260745, "learning_rate": 1.8300729927007303e-05, "loss": 0.743, "step": 6045 }, { "epoch": 0.1765203935651514, "grad_norm": 1.0186684056372834, "learning_rate": 1.830008110300081e-05, "loss": 0.7103, "step": 6046 }, { "epoch": 0.17654958979299876, "grad_norm": 0.7856058967762026, "learning_rate": 1.8299432278994324e-05, "loss": 0.762, "step": 6047 }, { "epoch": 0.17657878602084612, "grad_norm": 0.7333153824812114, "learning_rate": 1.8298783454987836e-05, "loss": 0.708, "step": 6048 }, { "epoch": 0.17660798224869348, "grad_norm": 0.7183765700212589, "learning_rate": 1.8298134630981348e-05, "loss": 0.7121, "step": 6049 }, { "epoch": 0.17663717847654084, "grad_norm": 0.6558584649638964, "learning_rate": 1.829748580697486e-05, "loss": 0.6595, "step": 6050 }, { "epoch": 0.1766663747043882, "grad_norm": 0.6725406388857648, "learning_rate": 1.829683698296837e-05, "loss": 0.6605, "step": 6051 }, { "epoch": 0.17669557093223556, "grad_norm": 0.6479351000771364, "learning_rate": 1.8296188158961884e-05, "loss": 0.5881, "step": 6052 }, { "epoch": 0.17672476716008292, "grad_norm": 0.7135248583346268, "learning_rate": 1.8295539334955396e-05, "loss": 0.6648, "step": 6053 }, { "epoch": 0.17675396338793029, "grad_norm": 0.7419775625360545, "learning_rate": 1.8294890510948908e-05, "loss": 0.7475, "step": 6054 }, { "epoch": 0.17678315961577765, "grad_norm": 0.6418188579059749, "learning_rate": 1.8294241686942416e-05, "loss": 0.5819, "step": 6055 }, { "epoch": 0.176812355843625, "grad_norm": 0.6720593816245423, "learning_rate": 1.829359286293593e-05, "loss": 0.6841, "step": 6056 }, { "epoch": 0.17684155207147237, "grad_norm": 0.6628519386628309, "learning_rate": 1.829294403892944e-05, "loss": 0.6849, "step": 6057 }, { "epoch": 0.17687074829931973, "grad_norm": 0.6685227672484555, "learning_rate": 1.8292295214922952e-05, "loss": 0.6426, "step": 6058 }, { "epoch": 0.1768999445271671, "grad_norm": 0.6813526462935622, "learning_rate": 1.8291646390916465e-05, "loss": 0.6581, "step": 6059 }, { "epoch": 0.17692914075501445, "grad_norm": 0.746795485960149, "learning_rate": 1.8290997566909977e-05, "loss": 0.7231, "step": 6060 }, { "epoch": 0.1769583369828618, "grad_norm": 0.7300953237038976, "learning_rate": 1.829034874290349e-05, "loss": 0.7359, "step": 6061 }, { "epoch": 0.17698753321070917, "grad_norm": 0.6385031162417194, "learning_rate": 1.8289699918897e-05, "loss": 0.538, "step": 6062 }, { "epoch": 0.17701672943855654, "grad_norm": 0.7209897257397888, "learning_rate": 1.8289051094890513e-05, "loss": 0.7105, "step": 6063 }, { "epoch": 0.1770459256664039, "grad_norm": 0.7399401191822209, "learning_rate": 1.8288402270884025e-05, "loss": 0.7272, "step": 6064 }, { "epoch": 0.17707512189425126, "grad_norm": 0.6671141239610645, "learning_rate": 1.8287753446877537e-05, "loss": 0.6486, "step": 6065 }, { "epoch": 0.17710431812209862, "grad_norm": 0.7372095067546108, "learning_rate": 1.828710462287105e-05, "loss": 0.61, "step": 6066 }, { "epoch": 0.17713351434994598, "grad_norm": 0.6697122901160985, "learning_rate": 1.828645579886456e-05, "loss": 0.6563, "step": 6067 }, { "epoch": 0.17716271057779334, "grad_norm": 0.6808219993361234, "learning_rate": 1.8285806974858073e-05, "loss": 0.6341, "step": 6068 }, { "epoch": 0.1771919068056407, "grad_norm": 0.6855810674821898, "learning_rate": 1.828515815085158e-05, "loss": 0.6717, "step": 6069 }, { "epoch": 0.17722110303348806, "grad_norm": 0.6564624511546522, "learning_rate": 1.8284509326845093e-05, "loss": 0.597, "step": 6070 }, { "epoch": 0.17725029926133543, "grad_norm": 0.6685295213325053, "learning_rate": 1.8283860502838605e-05, "loss": 0.6847, "step": 6071 }, { "epoch": 0.1772794954891828, "grad_norm": 0.6486356314404528, "learning_rate": 1.8283211678832117e-05, "loss": 0.5897, "step": 6072 }, { "epoch": 0.17730869171703015, "grad_norm": 0.7347789648051651, "learning_rate": 1.828256285482563e-05, "loss": 0.7191, "step": 6073 }, { "epoch": 0.1773378879448775, "grad_norm": 0.7907309816545974, "learning_rate": 1.828191403081914e-05, "loss": 0.7951, "step": 6074 }, { "epoch": 0.17736708417272487, "grad_norm": 0.6237513002221368, "learning_rate": 1.8281265206812654e-05, "loss": 0.5607, "step": 6075 }, { "epoch": 0.17739628040057226, "grad_norm": 0.6830076710950727, "learning_rate": 1.8280616382806166e-05, "loss": 0.6465, "step": 6076 }, { "epoch": 0.17742547662841962, "grad_norm": 0.6495254323533536, "learning_rate": 1.8279967558799678e-05, "loss": 0.569, "step": 6077 }, { "epoch": 0.17745467285626698, "grad_norm": 0.7159602849390372, "learning_rate": 1.8279318734793186e-05, "loss": 0.7045, "step": 6078 }, { "epoch": 0.17748386908411434, "grad_norm": 0.7704469966500115, "learning_rate": 1.82786699107867e-05, "loss": 0.7108, "step": 6079 }, { "epoch": 0.1775130653119617, "grad_norm": 0.681006908242786, "learning_rate": 1.8278021086780214e-05, "loss": 0.5974, "step": 6080 }, { "epoch": 0.17754226153980907, "grad_norm": 0.6839845371328983, "learning_rate": 1.8277372262773726e-05, "loss": 0.6469, "step": 6081 }, { "epoch": 0.17757145776765643, "grad_norm": 0.6613006432723538, "learning_rate": 1.8276723438767238e-05, "loss": 0.6082, "step": 6082 }, { "epoch": 0.1776006539955038, "grad_norm": 0.7661255409422889, "learning_rate": 1.827607461476075e-05, "loss": 0.7639, "step": 6083 }, { "epoch": 0.17762985022335115, "grad_norm": 0.6963624352111355, "learning_rate": 1.827542579075426e-05, "loss": 0.6861, "step": 6084 }, { "epoch": 0.1776590464511985, "grad_norm": 0.6771468726783755, "learning_rate": 1.827477696674777e-05, "loss": 0.6232, "step": 6085 }, { "epoch": 0.17768824267904587, "grad_norm": 0.6739652226324206, "learning_rate": 1.8274128142741282e-05, "loss": 0.6264, "step": 6086 }, { "epoch": 0.17771743890689323, "grad_norm": 0.6851424636275755, "learning_rate": 1.8273479318734794e-05, "loss": 0.6477, "step": 6087 }, { "epoch": 0.1777466351347406, "grad_norm": 0.6853933980319149, "learning_rate": 1.8272830494728307e-05, "loss": 0.6402, "step": 6088 }, { "epoch": 0.17777583136258795, "grad_norm": 0.7303539113292316, "learning_rate": 1.827218167072182e-05, "loss": 0.6812, "step": 6089 }, { "epoch": 0.17780502759043532, "grad_norm": 0.6879345620331362, "learning_rate": 1.827153284671533e-05, "loss": 0.6168, "step": 6090 }, { "epoch": 0.17783422381828268, "grad_norm": 0.6551190091037594, "learning_rate": 1.8270884022708843e-05, "loss": 0.624, "step": 6091 }, { "epoch": 0.17786342004613004, "grad_norm": 0.645200824196108, "learning_rate": 1.8270235198702355e-05, "loss": 0.6085, "step": 6092 }, { "epoch": 0.1778926162739774, "grad_norm": 0.6989158087652517, "learning_rate": 1.8269586374695863e-05, "loss": 0.6837, "step": 6093 }, { "epoch": 0.17792181250182476, "grad_norm": 0.7056963478007858, "learning_rate": 1.8268937550689375e-05, "loss": 0.7142, "step": 6094 }, { "epoch": 0.17795100872967212, "grad_norm": 0.7286203001182058, "learning_rate": 1.8268288726682887e-05, "loss": 0.6816, "step": 6095 }, { "epoch": 0.17798020495751948, "grad_norm": 0.6693500377496756, "learning_rate": 1.82676399026764e-05, "loss": 0.5889, "step": 6096 }, { "epoch": 0.17800940118536684, "grad_norm": 0.7063261108581816, "learning_rate": 1.8266991078669915e-05, "loss": 0.6449, "step": 6097 }, { "epoch": 0.1780385974132142, "grad_norm": 0.7394986746733996, "learning_rate": 1.8266342254663423e-05, "loss": 0.7237, "step": 6098 }, { "epoch": 0.17806779364106157, "grad_norm": 0.6362858220749213, "learning_rate": 1.8265693430656935e-05, "loss": 0.5887, "step": 6099 }, { "epoch": 0.17809698986890893, "grad_norm": 0.6554304077985802, "learning_rate": 1.8265044606650447e-05, "loss": 0.6408, "step": 6100 }, { "epoch": 0.1781261860967563, "grad_norm": 0.7541544023830203, "learning_rate": 1.826439578264396e-05, "loss": 0.6869, "step": 6101 }, { "epoch": 0.17815538232460365, "grad_norm": 0.7157713803259496, "learning_rate": 1.826374695863747e-05, "loss": 0.6805, "step": 6102 }, { "epoch": 0.178184578552451, "grad_norm": 0.6847760855657199, "learning_rate": 1.8263098134630984e-05, "loss": 0.666, "step": 6103 }, { "epoch": 0.17821377478029837, "grad_norm": 0.6633250497450109, "learning_rate": 1.8262449310624496e-05, "loss": 0.61, "step": 6104 }, { "epoch": 0.17824297100814573, "grad_norm": 0.7641754054302925, "learning_rate": 1.8261800486618008e-05, "loss": 0.7835, "step": 6105 }, { "epoch": 0.17827216723599312, "grad_norm": 0.7068567767012476, "learning_rate": 1.826115166261152e-05, "loss": 0.6519, "step": 6106 }, { "epoch": 0.17830136346384048, "grad_norm": 0.7961345540867207, "learning_rate": 1.8260502838605028e-05, "loss": 0.6504, "step": 6107 }, { "epoch": 0.17833055969168785, "grad_norm": 0.6834714767649818, "learning_rate": 1.825985401459854e-05, "loss": 0.6905, "step": 6108 }, { "epoch": 0.1783597559195352, "grad_norm": 0.6806600897168762, "learning_rate": 1.8259205190592052e-05, "loss": 0.6117, "step": 6109 }, { "epoch": 0.17838895214738257, "grad_norm": 0.7227156027655041, "learning_rate": 1.8258556366585564e-05, "loss": 0.7406, "step": 6110 }, { "epoch": 0.17841814837522993, "grad_norm": 0.6269642269110988, "learning_rate": 1.8257907542579076e-05, "loss": 0.559, "step": 6111 }, { "epoch": 0.1784473446030773, "grad_norm": 0.699277355914746, "learning_rate": 1.825725871857259e-05, "loss": 0.6427, "step": 6112 }, { "epoch": 0.17847654083092465, "grad_norm": 0.6682257108519961, "learning_rate": 1.82566098945661e-05, "loss": 0.6209, "step": 6113 }, { "epoch": 0.178505737058772, "grad_norm": 0.7029931527846189, "learning_rate": 1.8255961070559612e-05, "loss": 0.6924, "step": 6114 }, { "epoch": 0.17853493328661937, "grad_norm": 0.6887457322862552, "learning_rate": 1.8255312246553124e-05, "loss": 0.6972, "step": 6115 }, { "epoch": 0.17856412951446674, "grad_norm": 0.7198521359520157, "learning_rate": 1.8254663422546637e-05, "loss": 0.6376, "step": 6116 }, { "epoch": 0.1785933257423141, "grad_norm": 0.6601429776803579, "learning_rate": 1.825401459854015e-05, "loss": 0.6428, "step": 6117 }, { "epoch": 0.17862252197016146, "grad_norm": 0.6683191212871779, "learning_rate": 1.825336577453366e-05, "loss": 0.6466, "step": 6118 }, { "epoch": 0.17865171819800882, "grad_norm": 0.6912353079489831, "learning_rate": 1.8252716950527173e-05, "loss": 0.7195, "step": 6119 }, { "epoch": 0.17868091442585618, "grad_norm": 0.6605411594764643, "learning_rate": 1.8252068126520685e-05, "loss": 0.6145, "step": 6120 }, { "epoch": 0.17871011065370354, "grad_norm": 0.7253457360171827, "learning_rate": 1.8251419302514197e-05, "loss": 0.7025, "step": 6121 }, { "epoch": 0.1787393068815509, "grad_norm": 0.7168698633669018, "learning_rate": 1.8250770478507705e-05, "loss": 0.6383, "step": 6122 }, { "epoch": 0.17876850310939826, "grad_norm": 0.6367703253534392, "learning_rate": 1.8250121654501217e-05, "loss": 0.5889, "step": 6123 }, { "epoch": 0.17879769933724562, "grad_norm": 0.6832794751232973, "learning_rate": 1.824947283049473e-05, "loss": 0.6985, "step": 6124 }, { "epoch": 0.17882689556509299, "grad_norm": 0.7268503453911177, "learning_rate": 1.824882400648824e-05, "loss": 0.7387, "step": 6125 }, { "epoch": 0.17885609179294035, "grad_norm": 0.6902500927142499, "learning_rate": 1.8248175182481753e-05, "loss": 0.6659, "step": 6126 }, { "epoch": 0.1788852880207877, "grad_norm": 0.7523788029117268, "learning_rate": 1.8247526358475265e-05, "loss": 0.6573, "step": 6127 }, { "epoch": 0.17891448424863507, "grad_norm": 0.7064414045721025, "learning_rate": 1.8246877534468777e-05, "loss": 0.6846, "step": 6128 }, { "epoch": 0.17894368047648243, "grad_norm": 0.8437131316198707, "learning_rate": 1.824622871046229e-05, "loss": 0.7919, "step": 6129 }, { "epoch": 0.1789728767043298, "grad_norm": 0.5622860461692294, "learning_rate": 1.8245579886455798e-05, "loss": 0.5061, "step": 6130 }, { "epoch": 0.17900207293217715, "grad_norm": 0.7398765115332171, "learning_rate": 1.824493106244931e-05, "loss": 0.7188, "step": 6131 }, { "epoch": 0.17903126916002451, "grad_norm": 0.6537692588488182, "learning_rate": 1.8244282238442822e-05, "loss": 0.6702, "step": 6132 }, { "epoch": 0.17906046538787188, "grad_norm": 0.6958130448652904, "learning_rate": 1.8243633414436334e-05, "loss": 0.6948, "step": 6133 }, { "epoch": 0.17908966161571924, "grad_norm": 0.7435930912386567, "learning_rate": 1.8242984590429846e-05, "loss": 0.6104, "step": 6134 }, { "epoch": 0.1791188578435666, "grad_norm": 0.7125532264464084, "learning_rate": 1.824233576642336e-05, "loss": 0.6607, "step": 6135 }, { "epoch": 0.179148054071414, "grad_norm": 0.6468372214109177, "learning_rate": 1.824168694241687e-05, "loss": 0.626, "step": 6136 }, { "epoch": 0.17917725029926135, "grad_norm": 0.6669619907788046, "learning_rate": 1.8241038118410382e-05, "loss": 0.658, "step": 6137 }, { "epoch": 0.1792064465271087, "grad_norm": 0.743504163268884, "learning_rate": 1.8240389294403894e-05, "loss": 0.6602, "step": 6138 }, { "epoch": 0.17923564275495607, "grad_norm": 0.6862696228694527, "learning_rate": 1.8239740470397406e-05, "loss": 0.6288, "step": 6139 }, { "epoch": 0.17926483898280343, "grad_norm": 0.695663587179364, "learning_rate": 1.823909164639092e-05, "loss": 0.7103, "step": 6140 }, { "epoch": 0.1792940352106508, "grad_norm": 0.6633370203229227, "learning_rate": 1.823844282238443e-05, "loss": 0.6083, "step": 6141 }, { "epoch": 0.17932323143849815, "grad_norm": 0.6986777802084929, "learning_rate": 1.8237793998377942e-05, "loss": 0.6612, "step": 6142 }, { "epoch": 0.17935242766634552, "grad_norm": 0.6921024379970188, "learning_rate": 1.8237145174371454e-05, "loss": 0.6811, "step": 6143 }, { "epoch": 0.17938162389419288, "grad_norm": 0.7638513748765086, "learning_rate": 1.8236496350364966e-05, "loss": 0.78, "step": 6144 }, { "epoch": 0.17941082012204024, "grad_norm": 0.6665952513723814, "learning_rate": 1.8235847526358475e-05, "loss": 0.6585, "step": 6145 }, { "epoch": 0.1794400163498876, "grad_norm": 0.6925212507881252, "learning_rate": 1.8235198702351987e-05, "loss": 0.6333, "step": 6146 }, { "epoch": 0.17946921257773496, "grad_norm": 0.6153207817258182, "learning_rate": 1.82345498783455e-05, "loss": 0.5333, "step": 6147 }, { "epoch": 0.17949840880558232, "grad_norm": 0.6804342318651032, "learning_rate": 1.823390105433901e-05, "loss": 0.5799, "step": 6148 }, { "epoch": 0.17952760503342968, "grad_norm": 0.7308515012912534, "learning_rate": 1.8233252230332523e-05, "loss": 0.6755, "step": 6149 }, { "epoch": 0.17955680126127704, "grad_norm": 0.7358961138424311, "learning_rate": 1.8232603406326035e-05, "loss": 0.7002, "step": 6150 }, { "epoch": 0.1795859974891244, "grad_norm": 0.6858388327150116, "learning_rate": 1.8231954582319547e-05, "loss": 0.6619, "step": 6151 }, { "epoch": 0.17961519371697177, "grad_norm": 0.7322883780792538, "learning_rate": 1.823130575831306e-05, "loss": 0.7592, "step": 6152 }, { "epoch": 0.17964438994481913, "grad_norm": 0.6895357571659962, "learning_rate": 1.823065693430657e-05, "loss": 0.6444, "step": 6153 }, { "epoch": 0.1796735861726665, "grad_norm": 0.7321257942326106, "learning_rate": 1.8230008110300083e-05, "loss": 0.69, "step": 6154 }, { "epoch": 0.17970278240051385, "grad_norm": 0.6989928385286043, "learning_rate": 1.8229359286293595e-05, "loss": 0.6957, "step": 6155 }, { "epoch": 0.1797319786283612, "grad_norm": 0.6403260906560166, "learning_rate": 1.8228710462287107e-05, "loss": 0.613, "step": 6156 }, { "epoch": 0.17976117485620857, "grad_norm": 0.765856314605189, "learning_rate": 1.822806163828062e-05, "loss": 0.6729, "step": 6157 }, { "epoch": 0.17979037108405593, "grad_norm": 0.642420074198605, "learning_rate": 1.822741281427413e-05, "loss": 0.6418, "step": 6158 }, { "epoch": 0.1798195673119033, "grad_norm": 0.7151396870787154, "learning_rate": 1.8226763990267644e-05, "loss": 0.7356, "step": 6159 }, { "epoch": 0.17984876353975066, "grad_norm": 0.8190288896722852, "learning_rate": 1.8226115166261152e-05, "loss": 0.6887, "step": 6160 }, { "epoch": 0.17987795976759802, "grad_norm": 0.723397978958359, "learning_rate": 1.8225466342254664e-05, "loss": 0.703, "step": 6161 }, { "epoch": 0.17990715599544538, "grad_norm": 0.7393765738848235, "learning_rate": 1.8224817518248176e-05, "loss": 0.7056, "step": 6162 }, { "epoch": 0.17993635222329274, "grad_norm": 0.7161836669354895, "learning_rate": 1.8224168694241688e-05, "loss": 0.7413, "step": 6163 }, { "epoch": 0.1799655484511401, "grad_norm": 0.7459687998870856, "learning_rate": 1.82235198702352e-05, "loss": 0.7412, "step": 6164 }, { "epoch": 0.17999474467898746, "grad_norm": 0.7649885480347893, "learning_rate": 1.8222871046228712e-05, "loss": 0.7848, "step": 6165 }, { "epoch": 0.18002394090683485, "grad_norm": 0.7034110245872915, "learning_rate": 1.8222222222222224e-05, "loss": 0.6679, "step": 6166 }, { "epoch": 0.1800531371346822, "grad_norm": 0.774935332653416, "learning_rate": 1.8221573398215736e-05, "loss": 0.7285, "step": 6167 }, { "epoch": 0.18008233336252957, "grad_norm": 0.6908206954054485, "learning_rate": 1.8220924574209245e-05, "loss": 0.6337, "step": 6168 }, { "epoch": 0.18011152959037693, "grad_norm": 0.7125318843734028, "learning_rate": 1.8220275750202757e-05, "loss": 0.7396, "step": 6169 }, { "epoch": 0.1801407258182243, "grad_norm": 0.8603820170670827, "learning_rate": 1.821962692619627e-05, "loss": 0.71, "step": 6170 }, { "epoch": 0.18016992204607166, "grad_norm": 0.6663715803611943, "learning_rate": 1.821897810218978e-05, "loss": 0.6705, "step": 6171 }, { "epoch": 0.18019911827391902, "grad_norm": 0.7688289294532794, "learning_rate": 1.8218329278183293e-05, "loss": 0.7025, "step": 6172 }, { "epoch": 0.18022831450176638, "grad_norm": 0.70758735102378, "learning_rate": 1.821768045417681e-05, "loss": 0.6467, "step": 6173 }, { "epoch": 0.18025751072961374, "grad_norm": 0.6250335569529548, "learning_rate": 1.8217031630170317e-05, "loss": 0.5166, "step": 6174 }, { "epoch": 0.1802867069574611, "grad_norm": 0.7373118444168082, "learning_rate": 1.821638280616383e-05, "loss": 0.7349, "step": 6175 }, { "epoch": 0.18031590318530846, "grad_norm": 0.6291286745275992, "learning_rate": 1.821573398215734e-05, "loss": 0.5927, "step": 6176 }, { "epoch": 0.18034509941315582, "grad_norm": 0.7030955165546471, "learning_rate": 1.8215085158150853e-05, "loss": 0.6239, "step": 6177 }, { "epoch": 0.18037429564100319, "grad_norm": 0.6714623859544117, "learning_rate": 1.8214436334144365e-05, "loss": 0.6546, "step": 6178 }, { "epoch": 0.18040349186885055, "grad_norm": 0.6337065334968391, "learning_rate": 1.8213787510137877e-05, "loss": 0.5765, "step": 6179 }, { "epoch": 0.1804326880966979, "grad_norm": 0.6511774979923717, "learning_rate": 1.821313868613139e-05, "loss": 0.5638, "step": 6180 }, { "epoch": 0.18046188432454527, "grad_norm": 0.6635651284387634, "learning_rate": 1.82124898621249e-05, "loss": 0.6309, "step": 6181 }, { "epoch": 0.18049108055239263, "grad_norm": 0.6522621633467316, "learning_rate": 1.8211841038118413e-05, "loss": 0.5924, "step": 6182 }, { "epoch": 0.18052027678024, "grad_norm": 0.646917380215522, "learning_rate": 1.8211192214111922e-05, "loss": 0.6142, "step": 6183 }, { "epoch": 0.18054947300808735, "grad_norm": 0.741978306018495, "learning_rate": 1.8210543390105434e-05, "loss": 0.5806, "step": 6184 }, { "epoch": 0.1805786692359347, "grad_norm": 0.6550427945943059, "learning_rate": 1.8209894566098946e-05, "loss": 0.5885, "step": 6185 }, { "epoch": 0.18060786546378207, "grad_norm": 0.8562444783681306, "learning_rate": 1.8209245742092458e-05, "loss": 0.7801, "step": 6186 }, { "epoch": 0.18063706169162944, "grad_norm": 0.6589961837564244, "learning_rate": 1.820859691808597e-05, "loss": 0.6431, "step": 6187 }, { "epoch": 0.1806662579194768, "grad_norm": 0.7052060351914319, "learning_rate": 1.8207948094079482e-05, "loss": 0.7028, "step": 6188 }, { "epoch": 0.18069545414732416, "grad_norm": 0.7089954441121542, "learning_rate": 1.8207299270072994e-05, "loss": 0.6749, "step": 6189 }, { "epoch": 0.18072465037517152, "grad_norm": 0.7691647547528687, "learning_rate": 1.8206650446066506e-05, "loss": 0.8166, "step": 6190 }, { "epoch": 0.18075384660301888, "grad_norm": 0.7633350800200628, "learning_rate": 1.8206001622060018e-05, "loss": 0.6836, "step": 6191 }, { "epoch": 0.18078304283086624, "grad_norm": 0.7136691478711122, "learning_rate": 1.820535279805353e-05, "loss": 0.6612, "step": 6192 }, { "epoch": 0.1808122390587136, "grad_norm": 0.6608330100657608, "learning_rate": 1.8204703974047042e-05, "loss": 0.6063, "step": 6193 }, { "epoch": 0.18084143528656096, "grad_norm": 0.6603522174787149, "learning_rate": 1.8204055150040554e-05, "loss": 0.6616, "step": 6194 }, { "epoch": 0.18087063151440833, "grad_norm": 0.7278301573034659, "learning_rate": 1.8203406326034066e-05, "loss": 0.7333, "step": 6195 }, { "epoch": 0.18089982774225571, "grad_norm": 0.6850352768966991, "learning_rate": 1.820275750202758e-05, "loss": 0.6172, "step": 6196 }, { "epoch": 0.18092902397010308, "grad_norm": 0.9077347488660363, "learning_rate": 1.820210867802109e-05, "loss": 0.7969, "step": 6197 }, { "epoch": 0.18095822019795044, "grad_norm": 0.7506422317644936, "learning_rate": 1.82014598540146e-05, "loss": 0.7056, "step": 6198 }, { "epoch": 0.1809874164257978, "grad_norm": 0.793721711430317, "learning_rate": 1.820081103000811e-05, "loss": 0.7706, "step": 6199 }, { "epoch": 0.18101661265364516, "grad_norm": 0.7412127221304268, "learning_rate": 1.8200162206001623e-05, "loss": 0.697, "step": 6200 }, { "epoch": 0.18104580888149252, "grad_norm": 0.7095728869530294, "learning_rate": 1.8199513381995135e-05, "loss": 0.6641, "step": 6201 }, { "epoch": 0.18107500510933988, "grad_norm": 0.6795832113663673, "learning_rate": 1.8198864557988647e-05, "loss": 0.6613, "step": 6202 }, { "epoch": 0.18110420133718724, "grad_norm": 0.7075125965911039, "learning_rate": 1.819821573398216e-05, "loss": 0.6555, "step": 6203 }, { "epoch": 0.1811333975650346, "grad_norm": 0.700229707075016, "learning_rate": 1.819756690997567e-05, "loss": 0.6733, "step": 6204 }, { "epoch": 0.18116259379288197, "grad_norm": 0.6743565998770894, "learning_rate": 1.8196918085969183e-05, "loss": 0.5949, "step": 6205 }, { "epoch": 0.18119179002072933, "grad_norm": 0.6976123849618447, "learning_rate": 1.8196269261962692e-05, "loss": 0.6704, "step": 6206 }, { "epoch": 0.1812209862485767, "grad_norm": 0.7029072300584039, "learning_rate": 1.8195620437956204e-05, "loss": 0.6908, "step": 6207 }, { "epoch": 0.18125018247642405, "grad_norm": 0.6130373895211939, "learning_rate": 1.8194971613949716e-05, "loss": 0.5533, "step": 6208 }, { "epoch": 0.1812793787042714, "grad_norm": 0.652080653292696, "learning_rate": 1.8194322789943228e-05, "loss": 0.5981, "step": 6209 }, { "epoch": 0.18130857493211877, "grad_norm": 0.7804597581739157, "learning_rate": 1.819367396593674e-05, "loss": 0.7719, "step": 6210 }, { "epoch": 0.18133777115996613, "grad_norm": 0.7075606528904116, "learning_rate": 1.8193025141930255e-05, "loss": 0.652, "step": 6211 }, { "epoch": 0.1813669673878135, "grad_norm": 0.6758168470883438, "learning_rate": 1.8192376317923764e-05, "loss": 0.6615, "step": 6212 }, { "epoch": 0.18139616361566085, "grad_norm": 0.6558478011050082, "learning_rate": 1.8191727493917276e-05, "loss": 0.5894, "step": 6213 }, { "epoch": 0.18142535984350822, "grad_norm": 0.7463204479272901, "learning_rate": 1.8191078669910788e-05, "loss": 0.6492, "step": 6214 }, { "epoch": 0.18145455607135558, "grad_norm": 0.7316637946903491, "learning_rate": 1.81904298459043e-05, "loss": 0.737, "step": 6215 }, { "epoch": 0.18148375229920294, "grad_norm": 0.5896869487765315, "learning_rate": 1.8189781021897812e-05, "loss": 0.5168, "step": 6216 }, { "epoch": 0.1815129485270503, "grad_norm": 0.6795605230301095, "learning_rate": 1.8189132197891324e-05, "loss": 0.6415, "step": 6217 }, { "epoch": 0.18154214475489766, "grad_norm": 0.7182506261845947, "learning_rate": 1.8188483373884836e-05, "loss": 0.7724, "step": 6218 }, { "epoch": 0.18157134098274502, "grad_norm": 0.7518359183793185, "learning_rate": 1.8187834549878348e-05, "loss": 0.656, "step": 6219 }, { "epoch": 0.18160053721059238, "grad_norm": 0.7333528458949261, "learning_rate": 1.818718572587186e-05, "loss": 0.7636, "step": 6220 }, { "epoch": 0.18162973343843974, "grad_norm": 0.6746513828331906, "learning_rate": 1.818653690186537e-05, "loss": 0.6281, "step": 6221 }, { "epoch": 0.1816589296662871, "grad_norm": 0.7345527210614703, "learning_rate": 1.818588807785888e-05, "loss": 0.7329, "step": 6222 }, { "epoch": 0.18168812589413447, "grad_norm": 0.6720063621911089, "learning_rate": 1.8185239253852393e-05, "loss": 0.5908, "step": 6223 }, { "epoch": 0.18171732212198183, "grad_norm": 0.7110353854230561, "learning_rate": 1.8184590429845905e-05, "loss": 0.6872, "step": 6224 }, { "epoch": 0.1817465183498292, "grad_norm": 0.739344972878991, "learning_rate": 1.8183941605839417e-05, "loss": 0.6736, "step": 6225 }, { "epoch": 0.18177571457767655, "grad_norm": 0.6552803288119675, "learning_rate": 1.818329278183293e-05, "loss": 0.5983, "step": 6226 }, { "epoch": 0.18180491080552394, "grad_norm": 0.7550866666435091, "learning_rate": 1.818264395782644e-05, "loss": 0.7381, "step": 6227 }, { "epoch": 0.1818341070333713, "grad_norm": 0.6665252369830357, "learning_rate": 1.8181995133819953e-05, "loss": 0.6132, "step": 6228 }, { "epoch": 0.18186330326121866, "grad_norm": 0.7441836135199266, "learning_rate": 1.8181346309813465e-05, "loss": 0.7048, "step": 6229 }, { "epoch": 0.18189249948906602, "grad_norm": 0.7052192360950489, "learning_rate": 1.8180697485806977e-05, "loss": 0.6775, "step": 6230 }, { "epoch": 0.18192169571691338, "grad_norm": 0.6435877778690551, "learning_rate": 1.818004866180049e-05, "loss": 0.5414, "step": 6231 }, { "epoch": 0.18195089194476075, "grad_norm": 0.7638572763202961, "learning_rate": 1.8179399837794e-05, "loss": 0.7276, "step": 6232 }, { "epoch": 0.1819800881726081, "grad_norm": 0.786018927324264, "learning_rate": 1.8178751013787513e-05, "loss": 0.6155, "step": 6233 }, { "epoch": 0.18200928440045547, "grad_norm": 0.755969123162924, "learning_rate": 1.8178102189781025e-05, "loss": 0.7206, "step": 6234 }, { "epoch": 0.18203848062830283, "grad_norm": 0.6379715225738606, "learning_rate": 1.8177453365774537e-05, "loss": 0.5961, "step": 6235 }, { "epoch": 0.1820676768561502, "grad_norm": 0.7534403397150006, "learning_rate": 1.8176804541768046e-05, "loss": 0.6849, "step": 6236 }, { "epoch": 0.18209687308399755, "grad_norm": 0.7126035974760178, "learning_rate": 1.8176155717761558e-05, "loss": 0.6398, "step": 6237 }, { "epoch": 0.1821260693118449, "grad_norm": 0.6498308725295378, "learning_rate": 1.817550689375507e-05, "loss": 0.6199, "step": 6238 }, { "epoch": 0.18215526553969227, "grad_norm": 0.7222295193364084, "learning_rate": 1.8174858069748582e-05, "loss": 0.6736, "step": 6239 }, { "epoch": 0.18218446176753963, "grad_norm": 0.6693154974394288, "learning_rate": 1.8174209245742094e-05, "loss": 0.6587, "step": 6240 }, { "epoch": 0.182213657995387, "grad_norm": 0.6818240104037233, "learning_rate": 1.8173560421735606e-05, "loss": 0.6217, "step": 6241 }, { "epoch": 0.18224285422323436, "grad_norm": 0.7683789272402739, "learning_rate": 1.8172911597729118e-05, "loss": 0.7366, "step": 6242 }, { "epoch": 0.18227205045108172, "grad_norm": 0.7624417335599225, "learning_rate": 1.817226277372263e-05, "loss": 0.6753, "step": 6243 }, { "epoch": 0.18230124667892908, "grad_norm": 0.6433021209941535, "learning_rate": 1.817161394971614e-05, "loss": 0.5715, "step": 6244 }, { "epoch": 0.18233044290677644, "grad_norm": 0.6546845223118875, "learning_rate": 1.817096512570965e-05, "loss": 0.5897, "step": 6245 }, { "epoch": 0.1823596391346238, "grad_norm": 0.6961751304315665, "learning_rate": 1.8170316301703163e-05, "loss": 0.6827, "step": 6246 }, { "epoch": 0.18238883536247116, "grad_norm": 0.6771172921629601, "learning_rate": 1.8169667477696675e-05, "loss": 0.6461, "step": 6247 }, { "epoch": 0.18241803159031852, "grad_norm": 0.62918042700038, "learning_rate": 1.8169018653690187e-05, "loss": 0.5859, "step": 6248 }, { "epoch": 0.18244722781816589, "grad_norm": 0.6522601746864961, "learning_rate": 1.8168369829683702e-05, "loss": 0.6113, "step": 6249 }, { "epoch": 0.18247642404601325, "grad_norm": 0.6579151333080081, "learning_rate": 1.816772100567721e-05, "loss": 0.6333, "step": 6250 }, { "epoch": 0.1825056202738606, "grad_norm": 0.6451317793947912, "learning_rate": 1.8167072181670723e-05, "loss": 0.5777, "step": 6251 }, { "epoch": 0.18253481650170797, "grad_norm": 0.707442857651366, "learning_rate": 1.8166423357664235e-05, "loss": 0.7022, "step": 6252 }, { "epoch": 0.18256401272955533, "grad_norm": 0.7385910361605716, "learning_rate": 1.8165774533657747e-05, "loss": 0.6656, "step": 6253 }, { "epoch": 0.1825932089574027, "grad_norm": 0.6958618817253497, "learning_rate": 1.816512570965126e-05, "loss": 0.711, "step": 6254 }, { "epoch": 0.18262240518525005, "grad_norm": 0.8381092284139722, "learning_rate": 1.816447688564477e-05, "loss": 0.656, "step": 6255 }, { "epoch": 0.18265160141309741, "grad_norm": 0.6688223795989435, "learning_rate": 1.8163828061638283e-05, "loss": 0.6493, "step": 6256 }, { "epoch": 0.1826807976409448, "grad_norm": 0.6897415122188468, "learning_rate": 1.8163179237631795e-05, "loss": 0.6271, "step": 6257 }, { "epoch": 0.18270999386879216, "grad_norm": 0.6724424329523419, "learning_rate": 1.8162530413625307e-05, "loss": 0.6293, "step": 6258 }, { "epoch": 0.18273919009663953, "grad_norm": 0.6826250076794448, "learning_rate": 1.8161881589618816e-05, "loss": 0.6225, "step": 6259 }, { "epoch": 0.1827683863244869, "grad_norm": 0.659512202134277, "learning_rate": 1.8161232765612328e-05, "loss": 0.6568, "step": 6260 }, { "epoch": 0.18279758255233425, "grad_norm": 0.6340817128985181, "learning_rate": 1.816058394160584e-05, "loss": 0.5552, "step": 6261 }, { "epoch": 0.1828267787801816, "grad_norm": 0.7184424494061656, "learning_rate": 1.8159935117599352e-05, "loss": 0.6945, "step": 6262 }, { "epoch": 0.18285597500802897, "grad_norm": 0.7327187082372872, "learning_rate": 1.8159286293592864e-05, "loss": 0.7519, "step": 6263 }, { "epoch": 0.18288517123587633, "grad_norm": 0.6638219653534014, "learning_rate": 1.8158637469586376e-05, "loss": 0.6807, "step": 6264 }, { "epoch": 0.1829143674637237, "grad_norm": 0.684025130101197, "learning_rate": 1.8157988645579888e-05, "loss": 0.6812, "step": 6265 }, { "epoch": 0.18294356369157105, "grad_norm": 0.7241903063247603, "learning_rate": 1.81573398215734e-05, "loss": 0.7316, "step": 6266 }, { "epoch": 0.18297275991941842, "grad_norm": 0.8032323913224328, "learning_rate": 1.8156690997566912e-05, "loss": 0.8195, "step": 6267 }, { "epoch": 0.18300195614726578, "grad_norm": 0.7293918493525636, "learning_rate": 1.8156042173560424e-05, "loss": 0.681, "step": 6268 }, { "epoch": 0.18303115237511314, "grad_norm": 0.7244923931363892, "learning_rate": 1.8155393349553936e-05, "loss": 0.6593, "step": 6269 }, { "epoch": 0.1830603486029605, "grad_norm": 0.7434984391636345, "learning_rate": 1.8154744525547448e-05, "loss": 0.6395, "step": 6270 }, { "epoch": 0.18308954483080786, "grad_norm": 0.7199991670929887, "learning_rate": 1.815409570154096e-05, "loss": 0.7042, "step": 6271 }, { "epoch": 0.18311874105865522, "grad_norm": 0.7790899996650946, "learning_rate": 1.8153446877534472e-05, "loss": 0.7235, "step": 6272 }, { "epoch": 0.18314793728650258, "grad_norm": 0.6839075172123998, "learning_rate": 1.815279805352798e-05, "loss": 0.6067, "step": 6273 }, { "epoch": 0.18317713351434994, "grad_norm": 0.7388333105757406, "learning_rate": 1.8152149229521493e-05, "loss": 0.7334, "step": 6274 }, { "epoch": 0.1832063297421973, "grad_norm": 0.7025757506474362, "learning_rate": 1.8151500405515005e-05, "loss": 0.7438, "step": 6275 }, { "epoch": 0.18323552597004467, "grad_norm": 0.7423025492452275, "learning_rate": 1.8150851581508517e-05, "loss": 0.7355, "step": 6276 }, { "epoch": 0.18326472219789203, "grad_norm": 0.6720850611257921, "learning_rate": 1.815020275750203e-05, "loss": 0.6332, "step": 6277 }, { "epoch": 0.1832939184257394, "grad_norm": 0.734935813670742, "learning_rate": 1.814955393349554e-05, "loss": 0.7044, "step": 6278 }, { "epoch": 0.18332311465358675, "grad_norm": 0.6542289071457416, "learning_rate": 1.8148905109489053e-05, "loss": 0.5853, "step": 6279 }, { "epoch": 0.1833523108814341, "grad_norm": 0.7584988019095429, "learning_rate": 1.8148256285482565e-05, "loss": 0.7201, "step": 6280 }, { "epoch": 0.18338150710928147, "grad_norm": 0.7443899669079971, "learning_rate": 1.8147607461476077e-05, "loss": 0.7551, "step": 6281 }, { "epoch": 0.18341070333712883, "grad_norm": 29.015509562830538, "learning_rate": 1.8146958637469586e-05, "loss": 1.2135, "step": 6282 }, { "epoch": 0.1834398995649762, "grad_norm": 0.7402256619282397, "learning_rate": 1.8146309813463098e-05, "loss": 0.7041, "step": 6283 }, { "epoch": 0.18346909579282356, "grad_norm": 0.7037484782778294, "learning_rate": 1.814566098945661e-05, "loss": 0.6096, "step": 6284 }, { "epoch": 0.18349829202067092, "grad_norm": 0.6581145378288944, "learning_rate": 1.814501216545012e-05, "loss": 0.5675, "step": 6285 }, { "epoch": 0.18352748824851828, "grad_norm": 0.7001291643836286, "learning_rate": 1.8144363341443637e-05, "loss": 0.6733, "step": 6286 }, { "epoch": 0.18355668447636567, "grad_norm": 0.7336775645653921, "learning_rate": 1.814371451743715e-05, "loss": 0.6694, "step": 6287 }, { "epoch": 0.18358588070421303, "grad_norm": 0.7357253895685937, "learning_rate": 1.8143065693430658e-05, "loss": 0.7463, "step": 6288 }, { "epoch": 0.1836150769320604, "grad_norm": 0.6849352364422726, "learning_rate": 1.814241686942417e-05, "loss": 0.6708, "step": 6289 }, { "epoch": 0.18364427315990775, "grad_norm": 0.6359634326020396, "learning_rate": 1.8141768045417682e-05, "loss": 0.5614, "step": 6290 }, { "epoch": 0.1836734693877551, "grad_norm": 0.7414589359658361, "learning_rate": 1.8141119221411194e-05, "loss": 0.6775, "step": 6291 }, { "epoch": 0.18370266561560247, "grad_norm": 0.6697029123452202, "learning_rate": 1.8140470397404706e-05, "loss": 0.6758, "step": 6292 }, { "epoch": 0.18373186184344983, "grad_norm": 0.6917130170510236, "learning_rate": 1.8139821573398218e-05, "loss": 0.6582, "step": 6293 }, { "epoch": 0.1837610580712972, "grad_norm": 0.728635802094349, "learning_rate": 1.813917274939173e-05, "loss": 0.7055, "step": 6294 }, { "epoch": 0.18379025429914456, "grad_norm": 0.6636210949939545, "learning_rate": 1.8138523925385242e-05, "loss": 0.6476, "step": 6295 }, { "epoch": 0.18381945052699192, "grad_norm": 0.7162290975605593, "learning_rate": 1.8137875101378754e-05, "loss": 0.631, "step": 6296 }, { "epoch": 0.18384864675483928, "grad_norm": 0.7062243887982379, "learning_rate": 1.8137226277372263e-05, "loss": 0.7211, "step": 6297 }, { "epoch": 0.18387784298268664, "grad_norm": 0.6828522385250451, "learning_rate": 1.8136577453365775e-05, "loss": 0.632, "step": 6298 }, { "epoch": 0.183907039210534, "grad_norm": 0.6936575989055871, "learning_rate": 1.8135928629359287e-05, "loss": 0.7017, "step": 6299 }, { "epoch": 0.18393623543838136, "grad_norm": 0.7149774195373594, "learning_rate": 1.81352798053528e-05, "loss": 0.7428, "step": 6300 }, { "epoch": 0.18396543166622872, "grad_norm": 0.6954279526619738, "learning_rate": 1.813463098134631e-05, "loss": 0.6766, "step": 6301 }, { "epoch": 0.18399462789407608, "grad_norm": 0.7637119891791011, "learning_rate": 1.8133982157339823e-05, "loss": 0.733, "step": 6302 }, { "epoch": 0.18402382412192345, "grad_norm": 0.7443787070397176, "learning_rate": 1.8133333333333335e-05, "loss": 0.6719, "step": 6303 }, { "epoch": 0.1840530203497708, "grad_norm": 0.6360404393832587, "learning_rate": 1.8132684509326847e-05, "loss": 0.5799, "step": 6304 }, { "epoch": 0.18408221657761817, "grad_norm": 0.5812395142133912, "learning_rate": 1.813203568532036e-05, "loss": 0.5121, "step": 6305 }, { "epoch": 0.18411141280546553, "grad_norm": 0.738806621006959, "learning_rate": 1.813138686131387e-05, "loss": 0.7187, "step": 6306 }, { "epoch": 0.1841406090333129, "grad_norm": 0.6770024673582185, "learning_rate": 1.8130738037307383e-05, "loss": 0.6877, "step": 6307 }, { "epoch": 0.18416980526116025, "grad_norm": 1.5385208034462972, "learning_rate": 1.8130089213300895e-05, "loss": 0.7169, "step": 6308 }, { "epoch": 0.1841990014890076, "grad_norm": 0.69945058443495, "learning_rate": 1.8129440389294407e-05, "loss": 0.6276, "step": 6309 }, { "epoch": 0.18422819771685497, "grad_norm": 0.7214522463455585, "learning_rate": 1.812879156528792e-05, "loss": 0.6919, "step": 6310 }, { "epoch": 0.18425739394470234, "grad_norm": 0.7039344145813793, "learning_rate": 1.8128142741281428e-05, "loss": 0.7265, "step": 6311 }, { "epoch": 0.1842865901725497, "grad_norm": 0.7244634274686438, "learning_rate": 1.812749391727494e-05, "loss": 0.6701, "step": 6312 }, { "epoch": 0.18431578640039706, "grad_norm": 0.6957818463014014, "learning_rate": 1.812684509326845e-05, "loss": 0.6504, "step": 6313 }, { "epoch": 0.18434498262824442, "grad_norm": 0.7151687766796989, "learning_rate": 1.8126196269261964e-05, "loss": 0.6539, "step": 6314 }, { "epoch": 0.18437417885609178, "grad_norm": 0.7622230850669034, "learning_rate": 1.8125547445255476e-05, "loss": 0.6127, "step": 6315 }, { "epoch": 0.18440337508393914, "grad_norm": 0.7496084570145585, "learning_rate": 1.8124898621248988e-05, "loss": 0.7188, "step": 6316 }, { "epoch": 0.18443257131178653, "grad_norm": 0.6784759360219129, "learning_rate": 1.81242497972425e-05, "loss": 0.6261, "step": 6317 }, { "epoch": 0.1844617675396339, "grad_norm": 0.7062121517883531, "learning_rate": 1.8123600973236012e-05, "loss": 0.6592, "step": 6318 }, { "epoch": 0.18449096376748125, "grad_norm": 0.6850663262267102, "learning_rate": 1.8122952149229524e-05, "loss": 0.6555, "step": 6319 }, { "epoch": 0.18452015999532861, "grad_norm": 0.7179766881625016, "learning_rate": 1.8122303325223032e-05, "loss": 0.7129, "step": 6320 }, { "epoch": 0.18454935622317598, "grad_norm": 0.7159890745219528, "learning_rate": 1.8121654501216544e-05, "loss": 0.6825, "step": 6321 }, { "epoch": 0.18457855245102334, "grad_norm": 0.660713799353955, "learning_rate": 1.8121005677210057e-05, "loss": 0.5993, "step": 6322 }, { "epoch": 0.1846077486788707, "grad_norm": 0.6852079214687393, "learning_rate": 1.812035685320357e-05, "loss": 0.6285, "step": 6323 }, { "epoch": 0.18463694490671806, "grad_norm": 0.69153352875923, "learning_rate": 1.8119708029197084e-05, "loss": 0.6723, "step": 6324 }, { "epoch": 0.18466614113456542, "grad_norm": 0.7083635898780238, "learning_rate": 1.8119059205190596e-05, "loss": 0.6942, "step": 6325 }, { "epoch": 0.18469533736241278, "grad_norm": 0.7074037078906069, "learning_rate": 1.8118410381184105e-05, "loss": 0.6382, "step": 6326 }, { "epoch": 0.18472453359026014, "grad_norm": 0.6807177696772816, "learning_rate": 1.8117761557177617e-05, "loss": 0.7117, "step": 6327 }, { "epoch": 0.1847537298181075, "grad_norm": 0.675521340839237, "learning_rate": 1.811711273317113e-05, "loss": 0.6506, "step": 6328 }, { "epoch": 0.18478292604595487, "grad_norm": 0.7765232173626264, "learning_rate": 1.811646390916464e-05, "loss": 0.78, "step": 6329 }, { "epoch": 0.18481212227380223, "grad_norm": 0.7166307147070495, "learning_rate": 1.8115815085158153e-05, "loss": 0.727, "step": 6330 }, { "epoch": 0.1848413185016496, "grad_norm": 0.6474404347175565, "learning_rate": 1.8115166261151665e-05, "loss": 0.6005, "step": 6331 }, { "epoch": 0.18487051472949695, "grad_norm": 0.6663450550981188, "learning_rate": 1.8114517437145177e-05, "loss": 0.6213, "step": 6332 }, { "epoch": 0.1848997109573443, "grad_norm": 0.716652434563271, "learning_rate": 1.811386861313869e-05, "loss": 0.65, "step": 6333 }, { "epoch": 0.18492890718519167, "grad_norm": 0.713950128224107, "learning_rate": 1.81132197891322e-05, "loss": 0.7008, "step": 6334 }, { "epoch": 0.18495810341303903, "grad_norm": 0.7116301120071097, "learning_rate": 1.811257096512571e-05, "loss": 0.7302, "step": 6335 }, { "epoch": 0.1849872996408864, "grad_norm": 0.6598604798143739, "learning_rate": 1.811192214111922e-05, "loss": 0.6315, "step": 6336 }, { "epoch": 0.18501649586873375, "grad_norm": 0.6554805695457193, "learning_rate": 1.8111273317112734e-05, "loss": 0.6296, "step": 6337 }, { "epoch": 0.18504569209658112, "grad_norm": 0.7668586232903277, "learning_rate": 1.8110624493106246e-05, "loss": 0.6522, "step": 6338 }, { "epoch": 0.18507488832442848, "grad_norm": 0.718526559290652, "learning_rate": 1.8109975669099758e-05, "loss": 0.6447, "step": 6339 }, { "epoch": 0.18510408455227584, "grad_norm": 0.6459284042571014, "learning_rate": 1.810932684509327e-05, "loss": 0.6126, "step": 6340 }, { "epoch": 0.1851332807801232, "grad_norm": 0.6686522415143805, "learning_rate": 1.810867802108678e-05, "loss": 0.6369, "step": 6341 }, { "epoch": 0.18516247700797056, "grad_norm": 0.7762205058621073, "learning_rate": 1.8108029197080294e-05, "loss": 0.7018, "step": 6342 }, { "epoch": 0.18519167323581792, "grad_norm": 0.7030073671260941, "learning_rate": 1.8107380373073806e-05, "loss": 0.7094, "step": 6343 }, { "epoch": 0.18522086946366528, "grad_norm": 0.7789317856483181, "learning_rate": 1.8106731549067318e-05, "loss": 0.7283, "step": 6344 }, { "epoch": 0.18525006569151264, "grad_norm": 0.6943666780190775, "learning_rate": 1.810608272506083e-05, "loss": 0.6288, "step": 6345 }, { "epoch": 0.18527926191936, "grad_norm": 0.7671901965553898, "learning_rate": 1.8105433901054342e-05, "loss": 0.6447, "step": 6346 }, { "epoch": 0.1853084581472074, "grad_norm": 0.6821048763651144, "learning_rate": 1.8104785077047854e-05, "loss": 0.622, "step": 6347 }, { "epoch": 0.18533765437505476, "grad_norm": 0.6608947519121695, "learning_rate": 1.8104136253041366e-05, "loss": 0.6625, "step": 6348 }, { "epoch": 0.18536685060290212, "grad_norm": 0.688092728694324, "learning_rate": 1.8103487429034874e-05, "loss": 0.6567, "step": 6349 }, { "epoch": 0.18539604683074948, "grad_norm": 0.680787356222834, "learning_rate": 1.8102838605028386e-05, "loss": 0.6567, "step": 6350 }, { "epoch": 0.18542524305859684, "grad_norm": 0.6920130265204607, "learning_rate": 1.81021897810219e-05, "loss": 0.6961, "step": 6351 }, { "epoch": 0.1854544392864442, "grad_norm": 0.7267779180057771, "learning_rate": 1.810154095701541e-05, "loss": 0.6769, "step": 6352 }, { "epoch": 0.18548363551429156, "grad_norm": 0.6680445504946594, "learning_rate": 1.8100892133008923e-05, "loss": 0.6555, "step": 6353 }, { "epoch": 0.18551283174213892, "grad_norm": 0.6741218954208621, "learning_rate": 1.8100243309002435e-05, "loss": 0.6456, "step": 6354 }, { "epoch": 0.18554202796998628, "grad_norm": 0.7469073048393212, "learning_rate": 1.8099594484995947e-05, "loss": 0.6258, "step": 6355 }, { "epoch": 0.18557122419783365, "grad_norm": 0.6547341273605178, "learning_rate": 1.809894566098946e-05, "loss": 0.64, "step": 6356 }, { "epoch": 0.185600420425681, "grad_norm": 0.6434888630564624, "learning_rate": 1.809829683698297e-05, "loss": 0.5716, "step": 6357 }, { "epoch": 0.18562961665352837, "grad_norm": 0.7143294916374547, "learning_rate": 1.809764801297648e-05, "loss": 0.6716, "step": 6358 }, { "epoch": 0.18565881288137573, "grad_norm": 0.7590339420924912, "learning_rate": 1.809699918896999e-05, "loss": 0.736, "step": 6359 }, { "epoch": 0.1856880091092231, "grad_norm": 0.68980980916362, "learning_rate": 1.8096350364963503e-05, "loss": 0.6331, "step": 6360 }, { "epoch": 0.18571720533707045, "grad_norm": 0.6755316724025339, "learning_rate": 1.8095701540957015e-05, "loss": 0.6547, "step": 6361 }, { "epoch": 0.1857464015649178, "grad_norm": 0.6954523897019713, "learning_rate": 1.809505271695053e-05, "loss": 0.679, "step": 6362 }, { "epoch": 0.18577559779276517, "grad_norm": 0.6403299731225984, "learning_rate": 1.8094403892944043e-05, "loss": 0.5633, "step": 6363 }, { "epoch": 0.18580479402061253, "grad_norm": 0.7643846682506162, "learning_rate": 1.809375506893755e-05, "loss": 0.719, "step": 6364 }, { "epoch": 0.1858339902484599, "grad_norm": 0.8894413807104363, "learning_rate": 1.8093106244931064e-05, "loss": 0.743, "step": 6365 }, { "epoch": 0.18586318647630726, "grad_norm": 0.7236472268204416, "learning_rate": 1.8092457420924576e-05, "loss": 0.7459, "step": 6366 }, { "epoch": 0.18589238270415462, "grad_norm": 0.7058791687134178, "learning_rate": 1.8091808596918088e-05, "loss": 0.7015, "step": 6367 }, { "epoch": 0.18592157893200198, "grad_norm": 0.6939438104627655, "learning_rate": 1.80911597729116e-05, "loss": 0.6869, "step": 6368 }, { "epoch": 0.18595077515984934, "grad_norm": 0.690605261095726, "learning_rate": 1.809051094890511e-05, "loss": 0.7121, "step": 6369 }, { "epoch": 0.1859799713876967, "grad_norm": 0.6321896791461534, "learning_rate": 1.8089862124898624e-05, "loss": 0.515, "step": 6370 }, { "epoch": 0.18600916761554406, "grad_norm": 0.6974680820331096, "learning_rate": 1.8089213300892136e-05, "loss": 0.6891, "step": 6371 }, { "epoch": 0.18603836384339142, "grad_norm": 0.6582960084018991, "learning_rate": 1.8088564476885648e-05, "loss": 0.6077, "step": 6372 }, { "epoch": 0.18606756007123879, "grad_norm": 0.6129299520740643, "learning_rate": 1.8087915652879156e-05, "loss": 0.5789, "step": 6373 }, { "epoch": 0.18609675629908615, "grad_norm": 0.7015769108025942, "learning_rate": 1.808726682887267e-05, "loss": 0.6808, "step": 6374 }, { "epoch": 0.1861259525269335, "grad_norm": 0.7056854412111371, "learning_rate": 1.808661800486618e-05, "loss": 0.718, "step": 6375 }, { "epoch": 0.18615514875478087, "grad_norm": 0.6777290953571364, "learning_rate": 1.8085969180859692e-05, "loss": 0.6109, "step": 6376 }, { "epoch": 0.18618434498262826, "grad_norm": 0.6822148331283269, "learning_rate": 1.8085320356853204e-05, "loss": 0.5945, "step": 6377 }, { "epoch": 0.18621354121047562, "grad_norm": 0.6971695413660547, "learning_rate": 1.8084671532846716e-05, "loss": 0.6545, "step": 6378 }, { "epoch": 0.18624273743832298, "grad_norm": 0.6824657073863423, "learning_rate": 1.808402270884023e-05, "loss": 0.6425, "step": 6379 }, { "epoch": 0.18627193366617034, "grad_norm": 0.7178002445999864, "learning_rate": 1.808337388483374e-05, "loss": 0.688, "step": 6380 }, { "epoch": 0.1863011298940177, "grad_norm": 0.7340826136014404, "learning_rate": 1.8082725060827253e-05, "loss": 0.7352, "step": 6381 }, { "epoch": 0.18633032612186506, "grad_norm": 0.7383381838138845, "learning_rate": 1.8082076236820765e-05, "loss": 0.7598, "step": 6382 }, { "epoch": 0.18635952234971243, "grad_norm": 0.7043250492081671, "learning_rate": 1.8081427412814277e-05, "loss": 0.6322, "step": 6383 }, { "epoch": 0.1863887185775598, "grad_norm": 0.7006934783246784, "learning_rate": 1.808077858880779e-05, "loss": 0.6937, "step": 6384 }, { "epoch": 0.18641791480540715, "grad_norm": 0.6766258982021152, "learning_rate": 1.80801297648013e-05, "loss": 0.6654, "step": 6385 }, { "epoch": 0.1864471110332545, "grad_norm": 0.6687497603211745, "learning_rate": 1.8079480940794813e-05, "loss": 0.6364, "step": 6386 }, { "epoch": 0.18647630726110187, "grad_norm": 0.7824034285658329, "learning_rate": 1.807883211678832e-05, "loss": 0.6475, "step": 6387 }, { "epoch": 0.18650550348894923, "grad_norm": 0.7035140760401719, "learning_rate": 1.8078183292781833e-05, "loss": 0.7024, "step": 6388 }, { "epoch": 0.1865346997167966, "grad_norm": 0.819385537639336, "learning_rate": 1.8077534468775345e-05, "loss": 0.709, "step": 6389 }, { "epoch": 0.18656389594464395, "grad_norm": 0.6621164157577499, "learning_rate": 1.8076885644768857e-05, "loss": 0.6945, "step": 6390 }, { "epoch": 0.18659309217249131, "grad_norm": 0.6443455230476128, "learning_rate": 1.807623682076237e-05, "loss": 0.6559, "step": 6391 }, { "epoch": 0.18662228840033868, "grad_norm": 0.6593348981676058, "learning_rate": 1.807558799675588e-05, "loss": 0.6192, "step": 6392 }, { "epoch": 0.18665148462818604, "grad_norm": 0.6117108500022844, "learning_rate": 1.8074939172749394e-05, "loss": 0.5317, "step": 6393 }, { "epoch": 0.1866806808560334, "grad_norm": 0.6499410443292325, "learning_rate": 1.8074290348742906e-05, "loss": 0.6364, "step": 6394 }, { "epoch": 0.18670987708388076, "grad_norm": 0.6919769809011935, "learning_rate": 1.8073641524736418e-05, "loss": 0.647, "step": 6395 }, { "epoch": 0.18673907331172812, "grad_norm": 0.6466741658405446, "learning_rate": 1.8072992700729926e-05, "loss": 0.5827, "step": 6396 }, { "epoch": 0.18676826953957548, "grad_norm": 0.6896096308575503, "learning_rate": 1.8072343876723438e-05, "loss": 0.7033, "step": 6397 }, { "epoch": 0.18679746576742284, "grad_norm": 0.6856970284177046, "learning_rate": 1.807169505271695e-05, "loss": 0.6601, "step": 6398 }, { "epoch": 0.1868266619952702, "grad_norm": 0.699769398944966, "learning_rate": 1.8071046228710462e-05, "loss": 0.7152, "step": 6399 }, { "epoch": 0.18685585822311757, "grad_norm": 0.6769935277947844, "learning_rate": 1.8070397404703978e-05, "loss": 0.6513, "step": 6400 }, { "epoch": 0.18688505445096493, "grad_norm": 0.6830248116687618, "learning_rate": 1.806974858069749e-05, "loss": 0.5871, "step": 6401 }, { "epoch": 0.1869142506788123, "grad_norm": 0.680908551062178, "learning_rate": 1.8069099756691e-05, "loss": 0.64, "step": 6402 }, { "epoch": 0.18694344690665965, "grad_norm": 0.675384016434881, "learning_rate": 1.806845093268451e-05, "loss": 0.6414, "step": 6403 }, { "epoch": 0.186972643134507, "grad_norm": 0.6721161475614481, "learning_rate": 1.8067802108678022e-05, "loss": 0.6202, "step": 6404 }, { "epoch": 0.18700183936235437, "grad_norm": 0.662000122999, "learning_rate": 1.8067153284671534e-05, "loss": 0.6748, "step": 6405 }, { "epoch": 0.18703103559020173, "grad_norm": 0.7192646581236241, "learning_rate": 1.8066504460665046e-05, "loss": 0.637, "step": 6406 }, { "epoch": 0.18706023181804912, "grad_norm": 0.6662101598465182, "learning_rate": 1.806585563665856e-05, "loss": 0.6294, "step": 6407 }, { "epoch": 0.18708942804589648, "grad_norm": 0.7978002679450885, "learning_rate": 1.806520681265207e-05, "loss": 0.7257, "step": 6408 }, { "epoch": 0.18711862427374384, "grad_norm": 0.7811164934080175, "learning_rate": 1.8064557988645583e-05, "loss": 0.712, "step": 6409 }, { "epoch": 0.1871478205015912, "grad_norm": 0.6869756639963918, "learning_rate": 1.8063909164639095e-05, "loss": 0.6395, "step": 6410 }, { "epoch": 0.18717701672943857, "grad_norm": 0.687527018264002, "learning_rate": 1.8063260340632603e-05, "loss": 0.6275, "step": 6411 }, { "epoch": 0.18720621295728593, "grad_norm": 0.8757282526859141, "learning_rate": 1.8062611516626115e-05, "loss": 0.7473, "step": 6412 }, { "epoch": 0.1872354091851333, "grad_norm": 0.7102133122978558, "learning_rate": 1.8061962692619627e-05, "loss": 0.6384, "step": 6413 }, { "epoch": 0.18726460541298065, "grad_norm": 0.725103471271054, "learning_rate": 1.806131386861314e-05, "loss": 0.6919, "step": 6414 }, { "epoch": 0.187293801640828, "grad_norm": 0.8322842963930708, "learning_rate": 1.806066504460665e-05, "loss": 0.7953, "step": 6415 }, { "epoch": 0.18732299786867537, "grad_norm": 0.7339650334108052, "learning_rate": 1.8060016220600163e-05, "loss": 0.7054, "step": 6416 }, { "epoch": 0.18735219409652273, "grad_norm": 0.7523946140641999, "learning_rate": 1.8059367396593675e-05, "loss": 0.7088, "step": 6417 }, { "epoch": 0.1873813903243701, "grad_norm": 0.7476677007471279, "learning_rate": 1.8058718572587187e-05, "loss": 0.6553, "step": 6418 }, { "epoch": 0.18741058655221746, "grad_norm": 0.721549425429817, "learning_rate": 1.80580697485807e-05, "loss": 0.7416, "step": 6419 }, { "epoch": 0.18743978278006482, "grad_norm": 0.721507053073828, "learning_rate": 1.805742092457421e-05, "loss": 0.7182, "step": 6420 }, { "epoch": 0.18746897900791218, "grad_norm": 0.6602468144179722, "learning_rate": 1.8056772100567723e-05, "loss": 0.6289, "step": 6421 }, { "epoch": 0.18749817523575954, "grad_norm": 0.754395343589888, "learning_rate": 1.8056123276561236e-05, "loss": 0.7373, "step": 6422 }, { "epoch": 0.1875273714636069, "grad_norm": 0.7461898891096915, "learning_rate": 1.8055474452554748e-05, "loss": 0.6983, "step": 6423 }, { "epoch": 0.18755656769145426, "grad_norm": 0.6951816256593, "learning_rate": 1.805482562854826e-05, "loss": 0.6685, "step": 6424 }, { "epoch": 0.18758576391930162, "grad_norm": 0.7663397577280938, "learning_rate": 1.8054176804541768e-05, "loss": 0.725, "step": 6425 }, { "epoch": 0.18761496014714898, "grad_norm": 0.6516180945119185, "learning_rate": 1.805352798053528e-05, "loss": 0.6137, "step": 6426 }, { "epoch": 0.18764415637499635, "grad_norm": 0.7725065067072308, "learning_rate": 1.8052879156528792e-05, "loss": 0.7901, "step": 6427 }, { "epoch": 0.1876733526028437, "grad_norm": 0.6876397795064226, "learning_rate": 1.8052230332522304e-05, "loss": 0.696, "step": 6428 }, { "epoch": 0.18770254883069107, "grad_norm": 0.7144572263744506, "learning_rate": 1.8051581508515816e-05, "loss": 0.7408, "step": 6429 }, { "epoch": 0.18773174505853843, "grad_norm": 0.6775135949248796, "learning_rate": 1.805093268450933e-05, "loss": 0.7004, "step": 6430 }, { "epoch": 0.1877609412863858, "grad_norm": 0.6801533204755577, "learning_rate": 1.805028386050284e-05, "loss": 0.6678, "step": 6431 }, { "epoch": 0.18779013751423315, "grad_norm": 0.6716059652168838, "learning_rate": 1.8049635036496352e-05, "loss": 0.5816, "step": 6432 }, { "epoch": 0.1878193337420805, "grad_norm": 0.6829689462031352, "learning_rate": 1.8048986212489864e-05, "loss": 0.6719, "step": 6433 }, { "epoch": 0.18784852996992787, "grad_norm": 0.6423823174070203, "learning_rate": 1.8048337388483373e-05, "loss": 0.5711, "step": 6434 }, { "epoch": 0.18787772619777524, "grad_norm": 0.6643756623600444, "learning_rate": 1.8047688564476885e-05, "loss": 0.6307, "step": 6435 }, { "epoch": 0.1879069224256226, "grad_norm": 0.6725331965202269, "learning_rate": 1.8047039740470397e-05, "loss": 0.6398, "step": 6436 }, { "epoch": 0.18793611865346996, "grad_norm": 0.7296135780926233, "learning_rate": 1.8046390916463913e-05, "loss": 0.6873, "step": 6437 }, { "epoch": 0.18796531488131735, "grad_norm": 0.654276666889487, "learning_rate": 1.8045742092457425e-05, "loss": 0.6236, "step": 6438 }, { "epoch": 0.1879945111091647, "grad_norm": 0.74743113409269, "learning_rate": 1.8045093268450937e-05, "loss": 0.7633, "step": 6439 }, { "epoch": 0.18802370733701207, "grad_norm": 0.6408687382255868, "learning_rate": 1.8044444444444445e-05, "loss": 0.5651, "step": 6440 }, { "epoch": 0.18805290356485943, "grad_norm": 0.700855076547069, "learning_rate": 1.8043795620437957e-05, "loss": 0.6699, "step": 6441 }, { "epoch": 0.1880820997927068, "grad_norm": 0.6579469450771034, "learning_rate": 1.804314679643147e-05, "loss": 0.637, "step": 6442 }, { "epoch": 0.18811129602055415, "grad_norm": 0.6828169093848168, "learning_rate": 1.804249797242498e-05, "loss": 0.6761, "step": 6443 }, { "epoch": 0.18814049224840151, "grad_norm": 0.779508140740748, "learning_rate": 1.8041849148418493e-05, "loss": 0.8268, "step": 6444 }, { "epoch": 0.18816968847624888, "grad_norm": 0.6387005603495287, "learning_rate": 1.8041200324412005e-05, "loss": 0.5987, "step": 6445 }, { "epoch": 0.18819888470409624, "grad_norm": 0.661692428076493, "learning_rate": 1.8040551500405517e-05, "loss": 0.6145, "step": 6446 }, { "epoch": 0.1882280809319436, "grad_norm": 1.0340730794712414, "learning_rate": 1.803990267639903e-05, "loss": 0.6393, "step": 6447 }, { "epoch": 0.18825727715979096, "grad_norm": 0.6904830868175595, "learning_rate": 1.803925385239254e-05, "loss": 0.5814, "step": 6448 }, { "epoch": 0.18828647338763832, "grad_norm": 0.6782106600031771, "learning_rate": 1.803860502838605e-05, "loss": 0.6658, "step": 6449 }, { "epoch": 0.18831566961548568, "grad_norm": 0.7414268406768167, "learning_rate": 1.8037956204379562e-05, "loss": 0.7495, "step": 6450 }, { "epoch": 0.18834486584333304, "grad_norm": 0.6704777393173849, "learning_rate": 1.8037307380373074e-05, "loss": 0.6546, "step": 6451 }, { "epoch": 0.1883740620711804, "grad_norm": 0.729404851479216, "learning_rate": 1.8036658556366586e-05, "loss": 0.6486, "step": 6452 }, { "epoch": 0.18840325829902776, "grad_norm": 0.7259115272268126, "learning_rate": 1.8036009732360098e-05, "loss": 0.7367, "step": 6453 }, { "epoch": 0.18843245452687513, "grad_norm": 0.6912630080126468, "learning_rate": 1.803536090835361e-05, "loss": 0.6513, "step": 6454 }, { "epoch": 0.1884616507547225, "grad_norm": 0.7100633249993706, "learning_rate": 1.8034712084347122e-05, "loss": 0.6661, "step": 6455 }, { "epoch": 0.18849084698256985, "grad_norm": 0.7504119602192945, "learning_rate": 1.8034063260340634e-05, "loss": 0.6911, "step": 6456 }, { "epoch": 0.1885200432104172, "grad_norm": 0.7047949981811449, "learning_rate": 1.8033414436334146e-05, "loss": 0.7147, "step": 6457 }, { "epoch": 0.18854923943826457, "grad_norm": 0.7442395809843904, "learning_rate": 1.803276561232766e-05, "loss": 0.6422, "step": 6458 }, { "epoch": 0.18857843566611193, "grad_norm": 0.688691207029726, "learning_rate": 1.803211678832117e-05, "loss": 0.662, "step": 6459 }, { "epoch": 0.1886076318939593, "grad_norm": 0.7091616254928849, "learning_rate": 1.8031467964314682e-05, "loss": 0.652, "step": 6460 }, { "epoch": 0.18863682812180665, "grad_norm": 0.714945238891365, "learning_rate": 1.8030819140308194e-05, "loss": 0.6984, "step": 6461 }, { "epoch": 0.18866602434965402, "grad_norm": 0.7008966906869902, "learning_rate": 1.8030170316301706e-05, "loss": 0.6412, "step": 6462 }, { "epoch": 0.18869522057750138, "grad_norm": 0.730299663519541, "learning_rate": 1.8029521492295215e-05, "loss": 0.706, "step": 6463 }, { "epoch": 0.18872441680534874, "grad_norm": 0.6939660106266616, "learning_rate": 1.8028872668288727e-05, "loss": 0.703, "step": 6464 }, { "epoch": 0.1887536130331961, "grad_norm": 0.6610335271554394, "learning_rate": 1.802822384428224e-05, "loss": 0.6382, "step": 6465 }, { "epoch": 0.18878280926104346, "grad_norm": 0.66437342568428, "learning_rate": 1.802757502027575e-05, "loss": 0.5596, "step": 6466 }, { "epoch": 0.18881200548889082, "grad_norm": 0.6648777444725243, "learning_rate": 1.8026926196269263e-05, "loss": 0.6067, "step": 6467 }, { "epoch": 0.1888412017167382, "grad_norm": 0.7094587201996503, "learning_rate": 1.8026277372262775e-05, "loss": 0.6554, "step": 6468 }, { "epoch": 0.18887039794458557, "grad_norm": 0.700772608586564, "learning_rate": 1.8025628548256287e-05, "loss": 0.7041, "step": 6469 }, { "epoch": 0.18889959417243293, "grad_norm": 0.6637967988351563, "learning_rate": 1.80249797242498e-05, "loss": 0.6384, "step": 6470 }, { "epoch": 0.1889287904002803, "grad_norm": 0.6548122607191702, "learning_rate": 1.802433090024331e-05, "loss": 0.6663, "step": 6471 }, { "epoch": 0.18895798662812766, "grad_norm": 0.656861370787021, "learning_rate": 1.802368207623682e-05, "loss": 0.6386, "step": 6472 }, { "epoch": 0.18898718285597502, "grad_norm": 0.6959625203438726, "learning_rate": 1.8023033252230332e-05, "loss": 0.6654, "step": 6473 }, { "epoch": 0.18901637908382238, "grad_norm": 0.6446803823125394, "learning_rate": 1.8022384428223844e-05, "loss": 0.5327, "step": 6474 }, { "epoch": 0.18904557531166974, "grad_norm": 0.7464074808094805, "learning_rate": 1.802173560421736e-05, "loss": 0.7132, "step": 6475 }, { "epoch": 0.1890747715395171, "grad_norm": 0.735295712618301, "learning_rate": 1.802108678021087e-05, "loss": 0.6264, "step": 6476 }, { "epoch": 0.18910396776736446, "grad_norm": 0.6897659777371442, "learning_rate": 1.8020437956204383e-05, "loss": 0.624, "step": 6477 }, { "epoch": 0.18913316399521182, "grad_norm": 0.7551802257290103, "learning_rate": 1.8019789132197892e-05, "loss": 0.7088, "step": 6478 }, { "epoch": 0.18916236022305918, "grad_norm": 0.7048110386071536, "learning_rate": 1.8019140308191404e-05, "loss": 0.6887, "step": 6479 }, { "epoch": 0.18919155645090655, "grad_norm": 0.7075361106413686, "learning_rate": 1.8018491484184916e-05, "loss": 0.6684, "step": 6480 }, { "epoch": 0.1892207526787539, "grad_norm": 0.7179827814489551, "learning_rate": 1.8017842660178428e-05, "loss": 0.6909, "step": 6481 }, { "epoch": 0.18924994890660127, "grad_norm": 0.7005938841501479, "learning_rate": 1.801719383617194e-05, "loss": 0.6665, "step": 6482 }, { "epoch": 0.18927914513444863, "grad_norm": 0.720827158184985, "learning_rate": 1.8016545012165452e-05, "loss": 0.7008, "step": 6483 }, { "epoch": 0.189308341362296, "grad_norm": 0.6994979337113412, "learning_rate": 1.8015896188158964e-05, "loss": 0.6558, "step": 6484 }, { "epoch": 0.18933753759014335, "grad_norm": 0.6677197144195676, "learning_rate": 1.8015247364152476e-05, "loss": 0.6389, "step": 6485 }, { "epoch": 0.1893667338179907, "grad_norm": 0.6525125226480012, "learning_rate": 1.801459854014599e-05, "loss": 0.5964, "step": 6486 }, { "epoch": 0.18939593004583807, "grad_norm": 0.6694783426067596, "learning_rate": 1.8013949716139497e-05, "loss": 0.6192, "step": 6487 }, { "epoch": 0.18942512627368543, "grad_norm": 0.860015558437744, "learning_rate": 1.801330089213301e-05, "loss": 0.7747, "step": 6488 }, { "epoch": 0.1894543225015328, "grad_norm": 0.6948438767087055, "learning_rate": 1.801265206812652e-05, "loss": 0.6443, "step": 6489 }, { "epoch": 0.18948351872938016, "grad_norm": 0.7307904152288364, "learning_rate": 1.8012003244120033e-05, "loss": 0.7159, "step": 6490 }, { "epoch": 0.18951271495722752, "grad_norm": 0.675396686831826, "learning_rate": 1.8011354420113545e-05, "loss": 0.7127, "step": 6491 }, { "epoch": 0.18954191118507488, "grad_norm": 0.7118840211453674, "learning_rate": 1.8010705596107057e-05, "loss": 0.6632, "step": 6492 }, { "epoch": 0.18957110741292224, "grad_norm": 0.949151954333599, "learning_rate": 1.801005677210057e-05, "loss": 0.7403, "step": 6493 }, { "epoch": 0.1896003036407696, "grad_norm": 0.6835804848544844, "learning_rate": 1.800940794809408e-05, "loss": 0.7135, "step": 6494 }, { "epoch": 0.18962949986861696, "grad_norm": 0.6327816906115219, "learning_rate": 1.8008759124087593e-05, "loss": 0.6031, "step": 6495 }, { "epoch": 0.18965869609646432, "grad_norm": 0.6777698968524034, "learning_rate": 1.8008110300081105e-05, "loss": 0.6321, "step": 6496 }, { "epoch": 0.18968789232431169, "grad_norm": 0.6251280486214319, "learning_rate": 1.8007461476074617e-05, "loss": 0.5584, "step": 6497 }, { "epoch": 0.18971708855215907, "grad_norm": 0.694566143350639, "learning_rate": 1.800681265206813e-05, "loss": 0.6854, "step": 6498 }, { "epoch": 0.18974628478000644, "grad_norm": 0.6671791263476401, "learning_rate": 1.800616382806164e-05, "loss": 0.5964, "step": 6499 }, { "epoch": 0.1897754810078538, "grad_norm": 0.6680060168818337, "learning_rate": 1.8005515004055153e-05, "loss": 0.6215, "step": 6500 }, { "epoch": 0.18980467723570116, "grad_norm": 0.7585841510019585, "learning_rate": 1.8004866180048662e-05, "loss": 0.6114, "step": 6501 }, { "epoch": 0.18983387346354852, "grad_norm": 0.6590477093466492, "learning_rate": 1.8004217356042174e-05, "loss": 0.6306, "step": 6502 }, { "epoch": 0.18986306969139588, "grad_norm": 1.2652863388601332, "learning_rate": 1.8003568532035686e-05, "loss": 0.7336, "step": 6503 }, { "epoch": 0.18989226591924324, "grad_norm": 0.6837638886930683, "learning_rate": 1.8002919708029198e-05, "loss": 0.6728, "step": 6504 }, { "epoch": 0.1899214621470906, "grad_norm": 0.7482927649291632, "learning_rate": 1.800227088402271e-05, "loss": 0.7386, "step": 6505 }, { "epoch": 0.18995065837493796, "grad_norm": 0.7146630530171048, "learning_rate": 1.8001622060016222e-05, "loss": 0.7303, "step": 6506 }, { "epoch": 0.18997985460278533, "grad_norm": 0.7038196705546148, "learning_rate": 1.8000973236009734e-05, "loss": 0.6395, "step": 6507 }, { "epoch": 0.1900090508306327, "grad_norm": 0.6449880241978202, "learning_rate": 1.8000324412003246e-05, "loss": 0.6315, "step": 6508 }, { "epoch": 0.19003824705848005, "grad_norm": 0.6299677298900515, "learning_rate": 1.7999675587996758e-05, "loss": 0.5668, "step": 6509 }, { "epoch": 0.1900674432863274, "grad_norm": 0.7512253146941779, "learning_rate": 1.7999026763990267e-05, "loss": 0.6438, "step": 6510 }, { "epoch": 0.19009663951417477, "grad_norm": 0.748701875164978, "learning_rate": 1.799837793998378e-05, "loss": 0.6614, "step": 6511 }, { "epoch": 0.19012583574202213, "grad_norm": 0.7266444826596641, "learning_rate": 1.799772911597729e-05, "loss": 0.7245, "step": 6512 }, { "epoch": 0.1901550319698695, "grad_norm": 0.7211217887852557, "learning_rate": 1.7997080291970806e-05, "loss": 0.7112, "step": 6513 }, { "epoch": 0.19018422819771685, "grad_norm": 0.7183561122047669, "learning_rate": 1.7996431467964318e-05, "loss": 0.6766, "step": 6514 }, { "epoch": 0.19021342442556421, "grad_norm": 0.6633602023339485, "learning_rate": 1.799578264395783e-05, "loss": 0.6539, "step": 6515 }, { "epoch": 0.19024262065341158, "grad_norm": 0.7206758114968794, "learning_rate": 1.799513381995134e-05, "loss": 0.6919, "step": 6516 }, { "epoch": 0.19027181688125894, "grad_norm": 0.7912222614052181, "learning_rate": 1.799448499594485e-05, "loss": 0.7723, "step": 6517 }, { "epoch": 0.1903010131091063, "grad_norm": 0.9538808295925885, "learning_rate": 1.7993836171938363e-05, "loss": 0.7249, "step": 6518 }, { "epoch": 0.19033020933695366, "grad_norm": 0.6423845226845725, "learning_rate": 1.7993187347931875e-05, "loss": 0.6116, "step": 6519 }, { "epoch": 0.19035940556480102, "grad_norm": 0.6814470261689796, "learning_rate": 1.7992538523925387e-05, "loss": 0.7011, "step": 6520 }, { "epoch": 0.19038860179264838, "grad_norm": 0.6673727850776567, "learning_rate": 1.79918896999189e-05, "loss": 0.6432, "step": 6521 }, { "epoch": 0.19041779802049574, "grad_norm": 0.6382802690643896, "learning_rate": 1.799124087591241e-05, "loss": 0.5719, "step": 6522 }, { "epoch": 0.1904469942483431, "grad_norm": 0.6647736503303572, "learning_rate": 1.7990592051905923e-05, "loss": 0.6462, "step": 6523 }, { "epoch": 0.19047619047619047, "grad_norm": 0.6679566956737603, "learning_rate": 1.7989943227899432e-05, "loss": 0.6897, "step": 6524 }, { "epoch": 0.19050538670403783, "grad_norm": 0.6418933590812382, "learning_rate": 1.7989294403892944e-05, "loss": 0.6141, "step": 6525 }, { "epoch": 0.1905345829318852, "grad_norm": 0.6952374944204202, "learning_rate": 1.7988645579886456e-05, "loss": 0.6668, "step": 6526 }, { "epoch": 0.19056377915973255, "grad_norm": 0.7665003722300294, "learning_rate": 1.7987996755879968e-05, "loss": 0.7167, "step": 6527 }, { "epoch": 0.19059297538757994, "grad_norm": 0.6735422473552494, "learning_rate": 1.798734793187348e-05, "loss": 0.6145, "step": 6528 }, { "epoch": 0.1906221716154273, "grad_norm": 0.6126868481674048, "learning_rate": 1.7986699107866992e-05, "loss": 0.5438, "step": 6529 }, { "epoch": 0.19065136784327466, "grad_norm": 0.697477336041536, "learning_rate": 1.7986050283860504e-05, "loss": 0.6667, "step": 6530 }, { "epoch": 0.19068056407112202, "grad_norm": 0.673270792516437, "learning_rate": 1.7985401459854016e-05, "loss": 0.6397, "step": 6531 }, { "epoch": 0.19070976029896938, "grad_norm": 0.6278285314346516, "learning_rate": 1.7984752635847528e-05, "loss": 0.5572, "step": 6532 }, { "epoch": 0.19073895652681674, "grad_norm": 0.7289531732143909, "learning_rate": 1.798410381184104e-05, "loss": 0.6507, "step": 6533 }, { "epoch": 0.1907681527546641, "grad_norm": 0.7483535755153758, "learning_rate": 1.7983454987834552e-05, "loss": 0.752, "step": 6534 }, { "epoch": 0.19079734898251147, "grad_norm": 0.7516516367634991, "learning_rate": 1.7982806163828064e-05, "loss": 0.7322, "step": 6535 }, { "epoch": 0.19082654521035883, "grad_norm": 0.6639814405319244, "learning_rate": 1.7982157339821576e-05, "loss": 0.6292, "step": 6536 }, { "epoch": 0.1908557414382062, "grad_norm": 0.684931279256789, "learning_rate": 1.7981508515815088e-05, "loss": 0.712, "step": 6537 }, { "epoch": 0.19088493766605355, "grad_norm": 0.676743227305645, "learning_rate": 1.79808596918086e-05, "loss": 0.6389, "step": 6538 }, { "epoch": 0.1909141338939009, "grad_norm": 0.6349468103306137, "learning_rate": 1.798021086780211e-05, "loss": 0.5713, "step": 6539 }, { "epoch": 0.19094333012174827, "grad_norm": 0.7150334604273609, "learning_rate": 1.797956204379562e-05, "loss": 0.7087, "step": 6540 }, { "epoch": 0.19097252634959563, "grad_norm": 0.6988669500578206, "learning_rate": 1.7978913219789133e-05, "loss": 0.571, "step": 6541 }, { "epoch": 0.191001722577443, "grad_norm": 0.6078177749665372, "learning_rate": 1.7978264395782645e-05, "loss": 0.5395, "step": 6542 }, { "epoch": 0.19103091880529036, "grad_norm": 0.689473170824217, "learning_rate": 1.7977615571776157e-05, "loss": 0.7528, "step": 6543 }, { "epoch": 0.19106011503313772, "grad_norm": 0.7142145864358752, "learning_rate": 1.797696674776967e-05, "loss": 0.5992, "step": 6544 }, { "epoch": 0.19108931126098508, "grad_norm": 0.6685933970401835, "learning_rate": 1.797631792376318e-05, "loss": 0.698, "step": 6545 }, { "epoch": 0.19111850748883244, "grad_norm": 0.6829368536929502, "learning_rate": 1.7975669099756693e-05, "loss": 0.6748, "step": 6546 }, { "epoch": 0.1911477037166798, "grad_norm": 0.6964874036115278, "learning_rate": 1.7975020275750205e-05, "loss": 0.6293, "step": 6547 }, { "epoch": 0.19117689994452716, "grad_norm": 0.6941956782118728, "learning_rate": 1.7974371451743714e-05, "loss": 0.6988, "step": 6548 }, { "epoch": 0.19120609617237452, "grad_norm": 0.7289433488880873, "learning_rate": 1.7973722627737226e-05, "loss": 0.69, "step": 6549 }, { "epoch": 0.19123529240022188, "grad_norm": 0.7195803131444806, "learning_rate": 1.7973073803730738e-05, "loss": 0.7468, "step": 6550 }, { "epoch": 0.19126448862806925, "grad_norm": 0.7370962979630463, "learning_rate": 1.7972424979724253e-05, "loss": 0.7115, "step": 6551 }, { "epoch": 0.1912936848559166, "grad_norm": 0.6838330008119717, "learning_rate": 1.7971776155717765e-05, "loss": 0.6344, "step": 6552 }, { "epoch": 0.19132288108376397, "grad_norm": 0.7208890305112707, "learning_rate": 1.7971127331711277e-05, "loss": 0.7062, "step": 6553 }, { "epoch": 0.19135207731161133, "grad_norm": 0.6994803297911791, "learning_rate": 1.7970478507704786e-05, "loss": 0.6573, "step": 6554 }, { "epoch": 0.1913812735394587, "grad_norm": 0.6578017264824311, "learning_rate": 1.7969829683698298e-05, "loss": 0.5715, "step": 6555 }, { "epoch": 0.19141046976730605, "grad_norm": 0.7187247503622882, "learning_rate": 1.796918085969181e-05, "loss": 0.7281, "step": 6556 }, { "epoch": 0.1914396659951534, "grad_norm": 0.656911172010835, "learning_rate": 1.7968532035685322e-05, "loss": 0.5721, "step": 6557 }, { "epoch": 0.1914688622230008, "grad_norm": 0.9130875428173736, "learning_rate": 1.7967883211678834e-05, "loss": 0.6948, "step": 6558 }, { "epoch": 0.19149805845084816, "grad_norm": 0.7021369626125081, "learning_rate": 1.7967234387672346e-05, "loss": 0.7037, "step": 6559 }, { "epoch": 0.19152725467869552, "grad_norm": 0.6445737152328406, "learning_rate": 1.7966585563665858e-05, "loss": 0.5601, "step": 6560 }, { "epoch": 0.19155645090654289, "grad_norm": 0.7451053113173611, "learning_rate": 1.796593673965937e-05, "loss": 0.6892, "step": 6561 }, { "epoch": 0.19158564713439025, "grad_norm": 0.7094497273690903, "learning_rate": 1.796528791565288e-05, "loss": 0.6503, "step": 6562 }, { "epoch": 0.1916148433622376, "grad_norm": 0.6443493712378948, "learning_rate": 1.796463909164639e-05, "loss": 0.5915, "step": 6563 }, { "epoch": 0.19164403959008497, "grad_norm": 0.6870477215887519, "learning_rate": 1.7963990267639903e-05, "loss": 0.6371, "step": 6564 }, { "epoch": 0.19167323581793233, "grad_norm": 0.6728941502170298, "learning_rate": 1.7963341443633415e-05, "loss": 0.6407, "step": 6565 }, { "epoch": 0.1917024320457797, "grad_norm": 0.660148467600132, "learning_rate": 1.7962692619626927e-05, "loss": 0.6572, "step": 6566 }, { "epoch": 0.19173162827362705, "grad_norm": 0.7217908838645439, "learning_rate": 1.796204379562044e-05, "loss": 0.6763, "step": 6567 }, { "epoch": 0.19176082450147441, "grad_norm": 0.7226606550440248, "learning_rate": 1.796139497161395e-05, "loss": 0.7292, "step": 6568 }, { "epoch": 0.19179002072932178, "grad_norm": 0.7358251989324235, "learning_rate": 1.7960746147607463e-05, "loss": 0.6864, "step": 6569 }, { "epoch": 0.19181921695716914, "grad_norm": 0.695931470665563, "learning_rate": 1.7960097323600975e-05, "loss": 0.6904, "step": 6570 }, { "epoch": 0.1918484131850165, "grad_norm": 0.7895624490205475, "learning_rate": 1.7959448499594487e-05, "loss": 0.6422, "step": 6571 }, { "epoch": 0.19187760941286386, "grad_norm": 0.6589348378064095, "learning_rate": 1.7958799675588e-05, "loss": 0.6111, "step": 6572 }, { "epoch": 0.19190680564071122, "grad_norm": 0.8609290950398049, "learning_rate": 1.795815085158151e-05, "loss": 0.6735, "step": 6573 }, { "epoch": 0.19193600186855858, "grad_norm": 0.7264543166537545, "learning_rate": 1.7957502027575023e-05, "loss": 0.6447, "step": 6574 }, { "epoch": 0.19196519809640594, "grad_norm": 0.7299079759305461, "learning_rate": 1.7956853203568535e-05, "loss": 0.6522, "step": 6575 }, { "epoch": 0.1919943943242533, "grad_norm": 0.7311868010885926, "learning_rate": 1.7956204379562047e-05, "loss": 0.709, "step": 6576 }, { "epoch": 0.19202359055210066, "grad_norm": 0.7536630675197846, "learning_rate": 1.7955555555555556e-05, "loss": 0.7423, "step": 6577 }, { "epoch": 0.19205278677994803, "grad_norm": 0.6818409598811, "learning_rate": 1.7954906731549068e-05, "loss": 0.6456, "step": 6578 }, { "epoch": 0.1920819830077954, "grad_norm": 0.6428130493721677, "learning_rate": 1.795425790754258e-05, "loss": 0.6355, "step": 6579 }, { "epoch": 0.19211117923564275, "grad_norm": 0.7422936520310185, "learning_rate": 1.7953609083536092e-05, "loss": 0.7169, "step": 6580 }, { "epoch": 0.1921403754634901, "grad_norm": 0.6950948831088392, "learning_rate": 1.7952960259529604e-05, "loss": 0.6355, "step": 6581 }, { "epoch": 0.19216957169133747, "grad_norm": 0.6996393274166843, "learning_rate": 1.7952311435523116e-05, "loss": 0.7184, "step": 6582 }, { "epoch": 0.19219876791918483, "grad_norm": 0.6903554289749326, "learning_rate": 1.7951662611516628e-05, "loss": 0.665, "step": 6583 }, { "epoch": 0.1922279641470322, "grad_norm": 0.6969263314854056, "learning_rate": 1.795101378751014e-05, "loss": 0.6663, "step": 6584 }, { "epoch": 0.19225716037487955, "grad_norm": 0.661261282068457, "learning_rate": 1.7950364963503652e-05, "loss": 0.6478, "step": 6585 }, { "epoch": 0.19228635660272692, "grad_norm": 0.7810958871727521, "learning_rate": 1.794971613949716e-05, "loss": 0.7211, "step": 6586 }, { "epoch": 0.19231555283057428, "grad_norm": 0.6224077371617587, "learning_rate": 1.7949067315490673e-05, "loss": 0.5402, "step": 6587 }, { "epoch": 0.19234474905842167, "grad_norm": 0.6475541412718184, "learning_rate": 1.7948418491484188e-05, "loss": 0.5914, "step": 6588 }, { "epoch": 0.19237394528626903, "grad_norm": 0.8494153418006, "learning_rate": 1.79477696674777e-05, "loss": 0.7113, "step": 6589 }, { "epoch": 0.1924031415141164, "grad_norm": 0.6505001533951397, "learning_rate": 1.7947120843471212e-05, "loss": 0.5881, "step": 6590 }, { "epoch": 0.19243233774196375, "grad_norm": 0.6435372762356575, "learning_rate": 1.7946472019464724e-05, "loss": 0.5763, "step": 6591 }, { "epoch": 0.1924615339698111, "grad_norm": 0.7217387658927198, "learning_rate": 1.7945823195458233e-05, "loss": 0.6684, "step": 6592 }, { "epoch": 0.19249073019765847, "grad_norm": 0.731193045109826, "learning_rate": 1.7945174371451745e-05, "loss": 0.6955, "step": 6593 }, { "epoch": 0.19251992642550583, "grad_norm": 0.6066587167110018, "learning_rate": 1.7944525547445257e-05, "loss": 0.5415, "step": 6594 }, { "epoch": 0.1925491226533532, "grad_norm": 0.6607105845602408, "learning_rate": 1.794387672343877e-05, "loss": 0.5822, "step": 6595 }, { "epoch": 0.19257831888120056, "grad_norm": 0.7165531483620312, "learning_rate": 1.794322789943228e-05, "loss": 0.7513, "step": 6596 }, { "epoch": 0.19260751510904792, "grad_norm": 0.6811168717824093, "learning_rate": 1.7942579075425793e-05, "loss": 0.6981, "step": 6597 }, { "epoch": 0.19263671133689528, "grad_norm": 0.6323302168397369, "learning_rate": 1.7941930251419305e-05, "loss": 0.5653, "step": 6598 }, { "epoch": 0.19266590756474264, "grad_norm": 0.7078868811242098, "learning_rate": 1.7941281427412817e-05, "loss": 0.7055, "step": 6599 }, { "epoch": 0.19269510379259, "grad_norm": 0.7074667768728814, "learning_rate": 1.7940632603406326e-05, "loss": 0.7126, "step": 6600 }, { "epoch": 0.19272430002043736, "grad_norm": 0.6676503044313116, "learning_rate": 1.7939983779399838e-05, "loss": 0.6281, "step": 6601 }, { "epoch": 0.19275349624828472, "grad_norm": 0.7611819093150896, "learning_rate": 1.793933495539335e-05, "loss": 0.7045, "step": 6602 }, { "epoch": 0.19278269247613208, "grad_norm": 0.6495161065311913, "learning_rate": 1.793868613138686e-05, "loss": 0.6359, "step": 6603 }, { "epoch": 0.19281188870397944, "grad_norm": 0.6819981562206077, "learning_rate": 1.7938037307380374e-05, "loss": 0.6508, "step": 6604 }, { "epoch": 0.1928410849318268, "grad_norm": 0.6828592859492266, "learning_rate": 1.7937388483373886e-05, "loss": 0.6723, "step": 6605 }, { "epoch": 0.19287028115967417, "grad_norm": 0.7527450529838261, "learning_rate": 1.7936739659367398e-05, "loss": 0.7323, "step": 6606 }, { "epoch": 0.19289947738752153, "grad_norm": 0.6523041363848179, "learning_rate": 1.793609083536091e-05, "loss": 0.6213, "step": 6607 }, { "epoch": 0.1929286736153689, "grad_norm": 0.7231110602873134, "learning_rate": 1.7935442011354422e-05, "loss": 0.7381, "step": 6608 }, { "epoch": 0.19295786984321625, "grad_norm": 0.8810998322849122, "learning_rate": 1.7934793187347934e-05, "loss": 0.7415, "step": 6609 }, { "epoch": 0.1929870660710636, "grad_norm": 0.6802875876595522, "learning_rate": 1.7934144363341446e-05, "loss": 0.6562, "step": 6610 }, { "epoch": 0.19301626229891097, "grad_norm": 0.67546292460311, "learning_rate": 1.7933495539334958e-05, "loss": 0.6199, "step": 6611 }, { "epoch": 0.19304545852675833, "grad_norm": 0.6886372841156868, "learning_rate": 1.793284671532847e-05, "loss": 0.7322, "step": 6612 }, { "epoch": 0.1930746547546057, "grad_norm": 0.6695133900760012, "learning_rate": 1.7932197891321982e-05, "loss": 0.6025, "step": 6613 }, { "epoch": 0.19310385098245306, "grad_norm": 0.7046469219291946, "learning_rate": 1.7931549067315494e-05, "loss": 0.6439, "step": 6614 }, { "epoch": 0.19313304721030042, "grad_norm": 0.6867493663987011, "learning_rate": 1.7930900243309003e-05, "loss": 0.6488, "step": 6615 }, { "epoch": 0.19316224343814778, "grad_norm": 0.8219194502709208, "learning_rate": 1.7930251419302515e-05, "loss": 0.6433, "step": 6616 }, { "epoch": 0.19319143966599514, "grad_norm": 0.6757877123483134, "learning_rate": 1.7929602595296027e-05, "loss": 0.6106, "step": 6617 }, { "epoch": 0.1932206358938425, "grad_norm": 0.6852017089283898, "learning_rate": 1.792895377128954e-05, "loss": 0.6301, "step": 6618 }, { "epoch": 0.1932498321216899, "grad_norm": 0.7375324982934902, "learning_rate": 1.792830494728305e-05, "loss": 0.6589, "step": 6619 }, { "epoch": 0.19327902834953725, "grad_norm": 0.6553443944675501, "learning_rate": 1.7927656123276563e-05, "loss": 0.6314, "step": 6620 }, { "epoch": 0.1933082245773846, "grad_norm": 0.6792512945801183, "learning_rate": 1.7927007299270075e-05, "loss": 0.6453, "step": 6621 }, { "epoch": 0.19333742080523197, "grad_norm": 0.6893815047360548, "learning_rate": 1.7926358475263587e-05, "loss": 0.6363, "step": 6622 }, { "epoch": 0.19336661703307934, "grad_norm": 0.7475602121046361, "learning_rate": 1.79257096512571e-05, "loss": 0.6964, "step": 6623 }, { "epoch": 0.1933958132609267, "grad_norm": 0.6745099416664729, "learning_rate": 1.7925060827250607e-05, "loss": 0.6412, "step": 6624 }, { "epoch": 0.19342500948877406, "grad_norm": 0.7295731973057176, "learning_rate": 1.792441200324412e-05, "loss": 0.6637, "step": 6625 }, { "epoch": 0.19345420571662142, "grad_norm": 0.7258033291505607, "learning_rate": 1.7923763179237635e-05, "loss": 0.6897, "step": 6626 }, { "epoch": 0.19348340194446878, "grad_norm": 0.6591112974171285, "learning_rate": 1.7923114355231147e-05, "loss": 0.618, "step": 6627 }, { "epoch": 0.19351259817231614, "grad_norm": 0.6565531768182935, "learning_rate": 1.792246553122466e-05, "loss": 0.6061, "step": 6628 }, { "epoch": 0.1935417944001635, "grad_norm": 0.6480608119548357, "learning_rate": 1.792181670721817e-05, "loss": 0.6349, "step": 6629 }, { "epoch": 0.19357099062801086, "grad_norm": 0.7062945389021752, "learning_rate": 1.792116788321168e-05, "loss": 0.6756, "step": 6630 }, { "epoch": 0.19360018685585823, "grad_norm": 0.6637200997905053, "learning_rate": 1.792051905920519e-05, "loss": 0.6652, "step": 6631 }, { "epoch": 0.1936293830837056, "grad_norm": 0.7357463276631924, "learning_rate": 1.7919870235198704e-05, "loss": 0.695, "step": 6632 }, { "epoch": 0.19365857931155295, "grad_norm": 0.6562309895995765, "learning_rate": 1.7919221411192216e-05, "loss": 0.6467, "step": 6633 }, { "epoch": 0.1936877755394003, "grad_norm": 0.7068984533815235, "learning_rate": 1.7918572587185728e-05, "loss": 0.698, "step": 6634 }, { "epoch": 0.19371697176724767, "grad_norm": 0.6450487352048122, "learning_rate": 1.791792376317924e-05, "loss": 0.6521, "step": 6635 }, { "epoch": 0.19374616799509503, "grad_norm": 0.6177952560254963, "learning_rate": 1.7917274939172752e-05, "loss": 0.5634, "step": 6636 }, { "epoch": 0.1937753642229424, "grad_norm": 0.6933807267483061, "learning_rate": 1.7916626115166264e-05, "loss": 0.7285, "step": 6637 }, { "epoch": 0.19380456045078975, "grad_norm": 0.6229573496814187, "learning_rate": 1.7915977291159772e-05, "loss": 0.5683, "step": 6638 }, { "epoch": 0.19383375667863711, "grad_norm": 0.7585411584601782, "learning_rate": 1.7915328467153284e-05, "loss": 0.7007, "step": 6639 }, { "epoch": 0.19386295290648448, "grad_norm": 0.7078560265179442, "learning_rate": 1.7914679643146796e-05, "loss": 0.6403, "step": 6640 }, { "epoch": 0.19389214913433184, "grad_norm": 0.7094052915363096, "learning_rate": 1.791403081914031e-05, "loss": 0.6781, "step": 6641 }, { "epoch": 0.1939213453621792, "grad_norm": 0.8209325753804286, "learning_rate": 1.791338199513382e-05, "loss": 0.6561, "step": 6642 }, { "epoch": 0.19395054159002656, "grad_norm": 0.7007647117222445, "learning_rate": 1.7912733171127333e-05, "loss": 0.6523, "step": 6643 }, { "epoch": 0.19397973781787392, "grad_norm": 0.6659397407969784, "learning_rate": 1.7912084347120845e-05, "loss": 0.5671, "step": 6644 }, { "epoch": 0.19400893404572128, "grad_norm": 0.6998628246132185, "learning_rate": 1.7911435523114357e-05, "loss": 0.6785, "step": 6645 }, { "epoch": 0.19403813027356864, "grad_norm": 0.7201532147535512, "learning_rate": 1.791078669910787e-05, "loss": 0.6859, "step": 6646 }, { "epoch": 0.194067326501416, "grad_norm": 0.6361580777249415, "learning_rate": 1.791013787510138e-05, "loss": 0.5505, "step": 6647 }, { "epoch": 0.19409652272926337, "grad_norm": 0.6897908772114941, "learning_rate": 1.7909489051094893e-05, "loss": 0.6626, "step": 6648 }, { "epoch": 0.19412571895711075, "grad_norm": 0.6739935418715616, "learning_rate": 1.7908840227088405e-05, "loss": 0.6205, "step": 6649 }, { "epoch": 0.19415491518495812, "grad_norm": 0.7324948517257676, "learning_rate": 1.7908191403081917e-05, "loss": 0.6843, "step": 6650 }, { "epoch": 0.19418411141280548, "grad_norm": 0.6880034902555354, "learning_rate": 1.790754257907543e-05, "loss": 0.5833, "step": 6651 }, { "epoch": 0.19421330764065284, "grad_norm": 0.7299486634591081, "learning_rate": 1.790689375506894e-05, "loss": 0.6313, "step": 6652 }, { "epoch": 0.1942425038685002, "grad_norm": 0.7247310939062919, "learning_rate": 1.790624493106245e-05, "loss": 0.687, "step": 6653 }, { "epoch": 0.19427170009634756, "grad_norm": 0.6410649290796572, "learning_rate": 1.790559610705596e-05, "loss": 0.5828, "step": 6654 }, { "epoch": 0.19430089632419492, "grad_norm": 0.7298978438416014, "learning_rate": 1.7904947283049473e-05, "loss": 0.6902, "step": 6655 }, { "epoch": 0.19433009255204228, "grad_norm": 0.743297346606547, "learning_rate": 1.7904298459042986e-05, "loss": 0.6703, "step": 6656 }, { "epoch": 0.19435928877988964, "grad_norm": 0.640455636611807, "learning_rate": 1.7903649635036498e-05, "loss": 0.6016, "step": 6657 }, { "epoch": 0.194388485007737, "grad_norm": 1.0407366458450868, "learning_rate": 1.790300081103001e-05, "loss": 0.5807, "step": 6658 }, { "epoch": 0.19441768123558437, "grad_norm": 0.7578289011332878, "learning_rate": 1.790235198702352e-05, "loss": 0.7247, "step": 6659 }, { "epoch": 0.19444687746343173, "grad_norm": 0.7021572084459863, "learning_rate": 1.7901703163017034e-05, "loss": 0.7195, "step": 6660 }, { "epoch": 0.1944760736912791, "grad_norm": 0.7328141474369263, "learning_rate": 1.7901054339010546e-05, "loss": 0.6586, "step": 6661 }, { "epoch": 0.19450526991912645, "grad_norm": 0.8169312810246884, "learning_rate": 1.7900405515004054e-05, "loss": 0.7242, "step": 6662 }, { "epoch": 0.1945344661469738, "grad_norm": 0.6910642087296791, "learning_rate": 1.7899756690997566e-05, "loss": 0.6918, "step": 6663 }, { "epoch": 0.19456366237482117, "grad_norm": 0.6088243740208984, "learning_rate": 1.7899107866991082e-05, "loss": 0.5739, "step": 6664 }, { "epoch": 0.19459285860266853, "grad_norm": 0.6693094240980532, "learning_rate": 1.7898459042984594e-05, "loss": 0.634, "step": 6665 }, { "epoch": 0.1946220548305159, "grad_norm": 0.7271576852850161, "learning_rate": 1.7897810218978106e-05, "loss": 0.7476, "step": 6666 }, { "epoch": 0.19465125105836326, "grad_norm": 0.7019856980228074, "learning_rate": 1.7897161394971614e-05, "loss": 0.7072, "step": 6667 }, { "epoch": 0.19468044728621062, "grad_norm": 0.7492050936958413, "learning_rate": 1.7896512570965126e-05, "loss": 0.6442, "step": 6668 }, { "epoch": 0.19470964351405798, "grad_norm": 0.64252984200422, "learning_rate": 1.789586374695864e-05, "loss": 0.6005, "step": 6669 }, { "epoch": 0.19473883974190534, "grad_norm": 0.6645515628884631, "learning_rate": 1.789521492295215e-05, "loss": 0.6418, "step": 6670 }, { "epoch": 0.1947680359697527, "grad_norm": 0.7261114754689086, "learning_rate": 1.7894566098945663e-05, "loss": 0.7111, "step": 6671 }, { "epoch": 0.19479723219760006, "grad_norm": 0.7008833069028336, "learning_rate": 1.7893917274939175e-05, "loss": 0.7106, "step": 6672 }, { "epoch": 0.19482642842544742, "grad_norm": 0.6814723604859316, "learning_rate": 1.7893268450932687e-05, "loss": 0.6635, "step": 6673 }, { "epoch": 0.19485562465329478, "grad_norm": 0.7055541211027129, "learning_rate": 1.78926196269262e-05, "loss": 0.6972, "step": 6674 }, { "epoch": 0.19488482088114215, "grad_norm": 0.6430985533242218, "learning_rate": 1.789197080291971e-05, "loss": 0.5704, "step": 6675 }, { "epoch": 0.1949140171089895, "grad_norm": 0.737431727868103, "learning_rate": 1.789132197891322e-05, "loss": 0.7406, "step": 6676 }, { "epoch": 0.19494321333683687, "grad_norm": 0.7474210165764144, "learning_rate": 1.789067315490673e-05, "loss": 0.605, "step": 6677 }, { "epoch": 0.19497240956468423, "grad_norm": 0.6454076923538862, "learning_rate": 1.7890024330900243e-05, "loss": 0.6401, "step": 6678 }, { "epoch": 0.19500160579253162, "grad_norm": 0.6955815906888547, "learning_rate": 1.7889375506893755e-05, "loss": 0.6742, "step": 6679 }, { "epoch": 0.19503080202037898, "grad_norm": 0.72233959716293, "learning_rate": 1.7888726682887267e-05, "loss": 0.7125, "step": 6680 }, { "epoch": 0.19505999824822634, "grad_norm": 0.6323217206825945, "learning_rate": 1.788807785888078e-05, "loss": 0.5809, "step": 6681 }, { "epoch": 0.1950891944760737, "grad_norm": 0.6588689274703661, "learning_rate": 1.788742903487429e-05, "loss": 0.6272, "step": 6682 }, { "epoch": 0.19511839070392106, "grad_norm": 0.6726534813716744, "learning_rate": 1.7886780210867803e-05, "loss": 0.6109, "step": 6683 }, { "epoch": 0.19514758693176842, "grad_norm": 0.653210125150112, "learning_rate": 1.7886131386861315e-05, "loss": 0.6195, "step": 6684 }, { "epoch": 0.19517678315961579, "grad_norm": 0.6626005851006498, "learning_rate": 1.7885482562854828e-05, "loss": 0.6301, "step": 6685 }, { "epoch": 0.19520597938746315, "grad_norm": 0.6802980441940738, "learning_rate": 1.788483373884834e-05, "loss": 0.6331, "step": 6686 }, { "epoch": 0.1952351756153105, "grad_norm": 0.7374603629297161, "learning_rate": 1.788418491484185e-05, "loss": 0.7004, "step": 6687 }, { "epoch": 0.19526437184315787, "grad_norm": 0.6747061665147427, "learning_rate": 1.7883536090835364e-05, "loss": 0.6035, "step": 6688 }, { "epoch": 0.19529356807100523, "grad_norm": 0.7297888143880149, "learning_rate": 1.7882887266828876e-05, "loss": 0.6941, "step": 6689 }, { "epoch": 0.1953227642988526, "grad_norm": 0.6657376484315667, "learning_rate": 1.7882238442822388e-05, "loss": 0.6879, "step": 6690 }, { "epoch": 0.19535196052669995, "grad_norm": 0.7103520538174336, "learning_rate": 1.7881589618815896e-05, "loss": 0.727, "step": 6691 }, { "epoch": 0.1953811567545473, "grad_norm": 0.6654246240181031, "learning_rate": 1.788094079480941e-05, "loss": 0.6524, "step": 6692 }, { "epoch": 0.19541035298239467, "grad_norm": 0.9135537496305185, "learning_rate": 1.788029197080292e-05, "loss": 0.8565, "step": 6693 }, { "epoch": 0.19543954921024204, "grad_norm": 0.647870153844063, "learning_rate": 1.7879643146796432e-05, "loss": 0.619, "step": 6694 }, { "epoch": 0.1954687454380894, "grad_norm": 0.6377971436781493, "learning_rate": 1.7878994322789944e-05, "loss": 0.6031, "step": 6695 }, { "epoch": 0.19549794166593676, "grad_norm": 0.6602743258452936, "learning_rate": 1.7878345498783456e-05, "loss": 0.5972, "step": 6696 }, { "epoch": 0.19552713789378412, "grad_norm": 0.6196069261864801, "learning_rate": 1.787769667477697e-05, "loss": 0.5835, "step": 6697 }, { "epoch": 0.19555633412163148, "grad_norm": 0.6555886306097108, "learning_rate": 1.787704785077048e-05, "loss": 0.5927, "step": 6698 }, { "epoch": 0.19558553034947884, "grad_norm": 0.7141495824854338, "learning_rate": 1.7876399026763993e-05, "loss": 0.6688, "step": 6699 }, { "epoch": 0.1956147265773262, "grad_norm": 0.73125930014783, "learning_rate": 1.78757502027575e-05, "loss": 0.7005, "step": 6700 }, { "epoch": 0.19564392280517356, "grad_norm": 0.6891211776732376, "learning_rate": 1.7875101378751013e-05, "loss": 0.6576, "step": 6701 }, { "epoch": 0.19567311903302093, "grad_norm": 0.7162893091179905, "learning_rate": 1.787445255474453e-05, "loss": 0.7465, "step": 6702 }, { "epoch": 0.1957023152608683, "grad_norm": 0.6652637271403995, "learning_rate": 1.787380373073804e-05, "loss": 0.6828, "step": 6703 }, { "epoch": 0.19573151148871565, "grad_norm": 0.7193062549340982, "learning_rate": 1.7873154906731553e-05, "loss": 0.6687, "step": 6704 }, { "epoch": 0.195760707716563, "grad_norm": 0.6862738128334493, "learning_rate": 1.787250608272506e-05, "loss": 0.6136, "step": 6705 }, { "epoch": 0.19578990394441037, "grad_norm": 0.6701886653310111, "learning_rate": 1.7871857258718573e-05, "loss": 0.6103, "step": 6706 }, { "epoch": 0.19581910017225773, "grad_norm": 0.6377703169704172, "learning_rate": 1.7871208434712085e-05, "loss": 0.5611, "step": 6707 }, { "epoch": 0.1958482964001051, "grad_norm": 0.7029176170597391, "learning_rate": 1.7870559610705597e-05, "loss": 0.724, "step": 6708 }, { "epoch": 0.19587749262795248, "grad_norm": 0.6739496343717049, "learning_rate": 1.786991078669911e-05, "loss": 0.6849, "step": 6709 }, { "epoch": 0.19590668885579984, "grad_norm": 0.6915941186658983, "learning_rate": 1.786926196269262e-05, "loss": 0.6789, "step": 6710 }, { "epoch": 0.1959358850836472, "grad_norm": 0.7454935858532244, "learning_rate": 1.7868613138686133e-05, "loss": 0.6725, "step": 6711 }, { "epoch": 0.19596508131149457, "grad_norm": 0.6861517156773443, "learning_rate": 1.7867964314679645e-05, "loss": 0.5783, "step": 6712 }, { "epoch": 0.19599427753934193, "grad_norm": 0.6339974961892886, "learning_rate": 1.7867315490673158e-05, "loss": 0.6106, "step": 6713 }, { "epoch": 0.1960234737671893, "grad_norm": 0.7078156280781018, "learning_rate": 1.7866666666666666e-05, "loss": 0.7055, "step": 6714 }, { "epoch": 0.19605266999503665, "grad_norm": 0.6578058400271382, "learning_rate": 1.7866017842660178e-05, "loss": 0.6401, "step": 6715 }, { "epoch": 0.196081866222884, "grad_norm": 0.6716861979067668, "learning_rate": 1.786536901865369e-05, "loss": 0.6625, "step": 6716 }, { "epoch": 0.19611106245073137, "grad_norm": 0.5963371073326058, "learning_rate": 1.7864720194647202e-05, "loss": 0.5614, "step": 6717 }, { "epoch": 0.19614025867857873, "grad_norm": 0.6487131907279649, "learning_rate": 1.7864071370640714e-05, "loss": 0.6089, "step": 6718 }, { "epoch": 0.1961694549064261, "grad_norm": 0.7803352981313144, "learning_rate": 1.7863422546634226e-05, "loss": 0.7056, "step": 6719 }, { "epoch": 0.19619865113427346, "grad_norm": 0.7344831401667989, "learning_rate": 1.7862773722627738e-05, "loss": 0.7314, "step": 6720 }, { "epoch": 0.19622784736212082, "grad_norm": 0.6959677679636006, "learning_rate": 1.786212489862125e-05, "loss": 0.671, "step": 6721 }, { "epoch": 0.19625704358996818, "grad_norm": 0.722640637042746, "learning_rate": 1.7861476074614762e-05, "loss": 0.7297, "step": 6722 }, { "epoch": 0.19628623981781554, "grad_norm": 0.6674430763985137, "learning_rate": 1.7860827250608274e-05, "loss": 0.5517, "step": 6723 }, { "epoch": 0.1963154360456629, "grad_norm": 0.6944735573116219, "learning_rate": 1.7860178426601786e-05, "loss": 0.6971, "step": 6724 }, { "epoch": 0.19634463227351026, "grad_norm": 0.8501751517621835, "learning_rate": 1.78595296025953e-05, "loss": 0.7354, "step": 6725 }, { "epoch": 0.19637382850135762, "grad_norm": 0.6540040836052373, "learning_rate": 1.785888077858881e-05, "loss": 0.6432, "step": 6726 }, { "epoch": 0.19640302472920498, "grad_norm": 0.8528878516166685, "learning_rate": 1.7858231954582322e-05, "loss": 0.6352, "step": 6727 }, { "epoch": 0.19643222095705234, "grad_norm": 0.7425900820904737, "learning_rate": 1.7857583130575835e-05, "loss": 0.6519, "step": 6728 }, { "epoch": 0.1964614171848997, "grad_norm": 0.664374233838547, "learning_rate": 1.7856934306569343e-05, "loss": 0.6761, "step": 6729 }, { "epoch": 0.19649061341274707, "grad_norm": 0.684797617958135, "learning_rate": 1.7856285482562855e-05, "loss": 0.6833, "step": 6730 }, { "epoch": 0.19651980964059443, "grad_norm": 0.65303655094157, "learning_rate": 1.7855636658556367e-05, "loss": 0.581, "step": 6731 }, { "epoch": 0.1965490058684418, "grad_norm": 0.7882673537786518, "learning_rate": 1.785498783454988e-05, "loss": 0.7238, "step": 6732 }, { "epoch": 0.19657820209628915, "grad_norm": 0.6829906236916393, "learning_rate": 1.785433901054339e-05, "loss": 0.6595, "step": 6733 }, { "epoch": 0.1966073983241365, "grad_norm": 0.6381713668074066, "learning_rate": 1.7853690186536903e-05, "loss": 0.6147, "step": 6734 }, { "epoch": 0.19663659455198387, "grad_norm": 0.6820820296270832, "learning_rate": 1.7853041362530415e-05, "loss": 0.6548, "step": 6735 }, { "epoch": 0.19666579077983123, "grad_norm": 1.1823940062418188, "learning_rate": 1.7852392538523927e-05, "loss": 0.643, "step": 6736 }, { "epoch": 0.1966949870076786, "grad_norm": 0.6697115626448692, "learning_rate": 1.785174371451744e-05, "loss": 0.6214, "step": 6737 }, { "epoch": 0.19672418323552596, "grad_norm": 0.6856354667169662, "learning_rate": 1.7851094890510948e-05, "loss": 0.6521, "step": 6738 }, { "epoch": 0.19675337946337335, "grad_norm": 0.7432718781968413, "learning_rate": 1.7850446066504463e-05, "loss": 0.7492, "step": 6739 }, { "epoch": 0.1967825756912207, "grad_norm": 0.7305537353509252, "learning_rate": 1.7849797242497975e-05, "loss": 0.7607, "step": 6740 }, { "epoch": 0.19681177191906807, "grad_norm": 0.6930775023583959, "learning_rate": 1.7849148418491487e-05, "loss": 0.6348, "step": 6741 }, { "epoch": 0.19684096814691543, "grad_norm": 0.6674162958100331, "learning_rate": 1.7848499594485e-05, "loss": 0.6229, "step": 6742 }, { "epoch": 0.1968701643747628, "grad_norm": 0.6791226247538728, "learning_rate": 1.7847850770478508e-05, "loss": 0.6866, "step": 6743 }, { "epoch": 0.19689936060261015, "grad_norm": 0.6949246575295852, "learning_rate": 1.784720194647202e-05, "loss": 0.7458, "step": 6744 }, { "epoch": 0.1969285568304575, "grad_norm": 0.6847396921817114, "learning_rate": 1.7846553122465532e-05, "loss": 0.6263, "step": 6745 }, { "epoch": 0.19695775305830487, "grad_norm": 0.6399415443790781, "learning_rate": 1.7845904298459044e-05, "loss": 0.61, "step": 6746 }, { "epoch": 0.19698694928615224, "grad_norm": 0.6969860748631006, "learning_rate": 1.7845255474452556e-05, "loss": 0.6575, "step": 6747 }, { "epoch": 0.1970161455139996, "grad_norm": 0.6894813668755632, "learning_rate": 1.7844606650446068e-05, "loss": 0.7053, "step": 6748 }, { "epoch": 0.19704534174184696, "grad_norm": 0.7083588204457678, "learning_rate": 1.784395782643958e-05, "loss": 0.6779, "step": 6749 }, { "epoch": 0.19707453796969432, "grad_norm": 0.779198876499393, "learning_rate": 1.7843309002433092e-05, "loss": 0.7198, "step": 6750 }, { "epoch": 0.19710373419754168, "grad_norm": 0.7406736990552963, "learning_rate": 1.7842660178426604e-05, "loss": 0.7358, "step": 6751 }, { "epoch": 0.19713293042538904, "grad_norm": 0.790459883624758, "learning_rate": 1.7842011354420113e-05, "loss": 0.7596, "step": 6752 }, { "epoch": 0.1971621266532364, "grad_norm": 0.6802290774194578, "learning_rate": 1.7841362530413625e-05, "loss": 0.6483, "step": 6753 }, { "epoch": 0.19719132288108376, "grad_norm": 0.7139814444997847, "learning_rate": 1.7840713706407137e-05, "loss": 0.6974, "step": 6754 }, { "epoch": 0.19722051910893112, "grad_norm": 0.717090122869085, "learning_rate": 1.784006488240065e-05, "loss": 0.7511, "step": 6755 }, { "epoch": 0.19724971533677849, "grad_norm": 0.6991911677600801, "learning_rate": 1.783941605839416e-05, "loss": 0.6968, "step": 6756 }, { "epoch": 0.19727891156462585, "grad_norm": 0.6893010452954472, "learning_rate": 1.7838767234387673e-05, "loss": 0.635, "step": 6757 }, { "epoch": 0.1973081077924732, "grad_norm": 0.7225116095423118, "learning_rate": 1.7838118410381185e-05, "loss": 0.682, "step": 6758 }, { "epoch": 0.19733730402032057, "grad_norm": 0.6731869933573648, "learning_rate": 1.7837469586374697e-05, "loss": 0.6444, "step": 6759 }, { "epoch": 0.19736650024816793, "grad_norm": 0.7038139812263638, "learning_rate": 1.783682076236821e-05, "loss": 0.6737, "step": 6760 }, { "epoch": 0.1973956964760153, "grad_norm": 0.7015839119093701, "learning_rate": 1.783617193836172e-05, "loss": 0.6605, "step": 6761 }, { "epoch": 0.19742489270386265, "grad_norm": 0.684170860108434, "learning_rate": 1.7835523114355233e-05, "loss": 0.6866, "step": 6762 }, { "epoch": 0.19745408893171001, "grad_norm": 0.6396939297614759, "learning_rate": 1.7834874290348745e-05, "loss": 0.5642, "step": 6763 }, { "epoch": 0.19748328515955738, "grad_norm": 0.6960483206761922, "learning_rate": 1.7834225466342257e-05, "loss": 0.6844, "step": 6764 }, { "epoch": 0.19751248138740474, "grad_norm": 0.7486199084556266, "learning_rate": 1.783357664233577e-05, "loss": 0.6682, "step": 6765 }, { "epoch": 0.1975416776152521, "grad_norm": 0.7384923167315539, "learning_rate": 1.783292781832928e-05, "loss": 0.7441, "step": 6766 }, { "epoch": 0.19757087384309946, "grad_norm": 0.7927723811359786, "learning_rate": 1.783227899432279e-05, "loss": 0.7324, "step": 6767 }, { "epoch": 0.19760007007094682, "grad_norm": 0.7495978723951565, "learning_rate": 1.7831630170316302e-05, "loss": 0.6254, "step": 6768 }, { "epoch": 0.1976292662987942, "grad_norm": 0.7777346931611037, "learning_rate": 1.7830981346309814e-05, "loss": 0.6935, "step": 6769 }, { "epoch": 0.19765846252664157, "grad_norm": 0.6796535221014999, "learning_rate": 1.7830332522303326e-05, "loss": 0.6288, "step": 6770 }, { "epoch": 0.19768765875448893, "grad_norm": 0.6548517738825973, "learning_rate": 1.7829683698296838e-05, "loss": 0.6768, "step": 6771 }, { "epoch": 0.1977168549823363, "grad_norm": 0.7413445867171177, "learning_rate": 1.782903487429035e-05, "loss": 0.7141, "step": 6772 }, { "epoch": 0.19774605121018365, "grad_norm": 0.7136719056996883, "learning_rate": 1.7828386050283862e-05, "loss": 0.6592, "step": 6773 }, { "epoch": 0.19777524743803102, "grad_norm": 0.7190214100325851, "learning_rate": 1.7827737226277374e-05, "loss": 0.7213, "step": 6774 }, { "epoch": 0.19780444366587838, "grad_norm": 0.6578074001560225, "learning_rate": 1.7827088402270883e-05, "loss": 0.5897, "step": 6775 }, { "epoch": 0.19783363989372574, "grad_norm": 0.7478765598658155, "learning_rate": 1.7826439578264395e-05, "loss": 0.7624, "step": 6776 }, { "epoch": 0.1978628361215731, "grad_norm": 0.6985647739257378, "learning_rate": 1.782579075425791e-05, "loss": 0.6744, "step": 6777 }, { "epoch": 0.19789203234942046, "grad_norm": 0.7165796986142727, "learning_rate": 1.7825141930251422e-05, "loss": 0.6572, "step": 6778 }, { "epoch": 0.19792122857726782, "grad_norm": 0.7312780137680112, "learning_rate": 1.7824493106244934e-05, "loss": 0.6692, "step": 6779 }, { "epoch": 0.19795042480511518, "grad_norm": 0.6430875003157589, "learning_rate": 1.7823844282238446e-05, "loss": 0.606, "step": 6780 }, { "epoch": 0.19797962103296254, "grad_norm": 0.6096061107607795, "learning_rate": 1.7823195458231955e-05, "loss": 0.5286, "step": 6781 }, { "epoch": 0.1980088172608099, "grad_norm": 0.7050103541404306, "learning_rate": 1.7822546634225467e-05, "loss": 0.6585, "step": 6782 }, { "epoch": 0.19803801348865727, "grad_norm": 0.7085729523832407, "learning_rate": 1.782189781021898e-05, "loss": 0.6882, "step": 6783 }, { "epoch": 0.19806720971650463, "grad_norm": 0.6705954295391882, "learning_rate": 1.782124898621249e-05, "loss": 0.6183, "step": 6784 }, { "epoch": 0.198096405944352, "grad_norm": 0.7404650007465395, "learning_rate": 1.7820600162206003e-05, "loss": 0.7454, "step": 6785 }, { "epoch": 0.19812560217219935, "grad_norm": 0.6811873790845667, "learning_rate": 1.7819951338199515e-05, "loss": 0.6373, "step": 6786 }, { "epoch": 0.1981547984000467, "grad_norm": 0.6765208630420096, "learning_rate": 1.7819302514193027e-05, "loss": 0.6451, "step": 6787 }, { "epoch": 0.19818399462789407, "grad_norm": 0.7651510433632522, "learning_rate": 1.781865369018654e-05, "loss": 0.776, "step": 6788 }, { "epoch": 0.19821319085574143, "grad_norm": 0.7222386339962577, "learning_rate": 1.781800486618005e-05, "loss": 0.684, "step": 6789 }, { "epoch": 0.1982423870835888, "grad_norm": 0.663444177707204, "learning_rate": 1.781735604217356e-05, "loss": 0.6368, "step": 6790 }, { "epoch": 0.19827158331143616, "grad_norm": 0.6721443893966609, "learning_rate": 1.7816707218167072e-05, "loss": 0.6143, "step": 6791 }, { "epoch": 0.19830077953928352, "grad_norm": 0.7064896822633945, "learning_rate": 1.7816058394160584e-05, "loss": 0.6554, "step": 6792 }, { "epoch": 0.19832997576713088, "grad_norm": 0.6816649561164148, "learning_rate": 1.7815409570154096e-05, "loss": 0.5916, "step": 6793 }, { "epoch": 0.19835917199497824, "grad_norm": 0.7062872061803062, "learning_rate": 1.7814760746147608e-05, "loss": 0.6936, "step": 6794 }, { "epoch": 0.1983883682228256, "grad_norm": 0.7241180651002893, "learning_rate": 1.781411192214112e-05, "loss": 0.7222, "step": 6795 }, { "epoch": 0.19841756445067296, "grad_norm": 0.7195339060872246, "learning_rate": 1.7813463098134632e-05, "loss": 0.6979, "step": 6796 }, { "epoch": 0.19844676067852032, "grad_norm": 0.6580299805759876, "learning_rate": 1.7812814274128144e-05, "loss": 0.5889, "step": 6797 }, { "epoch": 0.19847595690636768, "grad_norm": 0.6710981418227286, "learning_rate": 1.7812165450121656e-05, "loss": 0.6603, "step": 6798 }, { "epoch": 0.19850515313421507, "grad_norm": 0.6251218590043056, "learning_rate": 1.7811516626115168e-05, "loss": 0.5655, "step": 6799 }, { "epoch": 0.19853434936206243, "grad_norm": 0.7449849491108537, "learning_rate": 1.781086780210868e-05, "loss": 0.6837, "step": 6800 }, { "epoch": 0.1985635455899098, "grad_norm": 0.7281316286223779, "learning_rate": 1.7810218978102192e-05, "loss": 0.7495, "step": 6801 }, { "epoch": 0.19859274181775716, "grad_norm": 0.654341006322472, "learning_rate": 1.7809570154095704e-05, "loss": 0.5713, "step": 6802 }, { "epoch": 0.19862193804560452, "grad_norm": 0.6470898295372993, "learning_rate": 1.7808921330089216e-05, "loss": 0.5676, "step": 6803 }, { "epoch": 0.19865113427345188, "grad_norm": 0.6031796629399947, "learning_rate": 1.7808272506082728e-05, "loss": 0.5571, "step": 6804 }, { "epoch": 0.19868033050129924, "grad_norm": 0.9202221825718973, "learning_rate": 1.7807623682076237e-05, "loss": 0.683, "step": 6805 }, { "epoch": 0.1987095267291466, "grad_norm": 0.7009269596743515, "learning_rate": 1.780697485806975e-05, "loss": 0.7058, "step": 6806 }, { "epoch": 0.19873872295699396, "grad_norm": 0.6898773526992441, "learning_rate": 1.780632603406326e-05, "loss": 0.6367, "step": 6807 }, { "epoch": 0.19876791918484132, "grad_norm": 0.6703619863380816, "learning_rate": 1.7805677210056773e-05, "loss": 0.6751, "step": 6808 }, { "epoch": 0.19879711541268869, "grad_norm": 0.6614526891382131, "learning_rate": 1.7805028386050285e-05, "loss": 0.6371, "step": 6809 }, { "epoch": 0.19882631164053605, "grad_norm": 0.6999281383735559, "learning_rate": 1.7804379562043797e-05, "loss": 0.6818, "step": 6810 }, { "epoch": 0.1988555078683834, "grad_norm": 0.678052919457369, "learning_rate": 1.780373073803731e-05, "loss": 0.6353, "step": 6811 }, { "epoch": 0.19888470409623077, "grad_norm": 0.6947220517903927, "learning_rate": 1.780308191403082e-05, "loss": 0.6478, "step": 6812 }, { "epoch": 0.19891390032407813, "grad_norm": 0.7799651532166341, "learning_rate": 1.780243309002433e-05, "loss": 0.7496, "step": 6813 }, { "epoch": 0.1989430965519255, "grad_norm": 0.6935819148364453, "learning_rate": 1.7801784266017842e-05, "loss": 0.6515, "step": 6814 }, { "epoch": 0.19897229277977285, "grad_norm": 0.687131471694409, "learning_rate": 1.7801135442011357e-05, "loss": 0.6047, "step": 6815 }, { "epoch": 0.1990014890076202, "grad_norm": 0.6868264308009826, "learning_rate": 1.780048661800487e-05, "loss": 0.6306, "step": 6816 }, { "epoch": 0.19903068523546757, "grad_norm": 0.737379285228564, "learning_rate": 1.779983779399838e-05, "loss": 0.7517, "step": 6817 }, { "epoch": 0.19905988146331494, "grad_norm": 0.693201382740082, "learning_rate": 1.7799188969991893e-05, "loss": 0.6919, "step": 6818 }, { "epoch": 0.1990890776911623, "grad_norm": 0.7079913995024824, "learning_rate": 1.7798540145985402e-05, "loss": 0.7174, "step": 6819 }, { "epoch": 0.19911827391900966, "grad_norm": 0.7035323643563767, "learning_rate": 1.7797891321978914e-05, "loss": 0.681, "step": 6820 }, { "epoch": 0.19914747014685702, "grad_norm": 0.6524754586339547, "learning_rate": 1.7797242497972426e-05, "loss": 0.5915, "step": 6821 }, { "epoch": 0.19917666637470438, "grad_norm": 0.6967310001509245, "learning_rate": 1.7796593673965938e-05, "loss": 0.6135, "step": 6822 }, { "epoch": 0.19920586260255174, "grad_norm": 0.6451618128417352, "learning_rate": 1.779594484995945e-05, "loss": 0.6369, "step": 6823 }, { "epoch": 0.1992350588303991, "grad_norm": 0.6443824552400709, "learning_rate": 1.7795296025952962e-05, "loss": 0.6176, "step": 6824 }, { "epoch": 0.19926425505824646, "grad_norm": 0.7543571329978253, "learning_rate": 1.7794647201946474e-05, "loss": 0.6777, "step": 6825 }, { "epoch": 0.19929345128609383, "grad_norm": 0.705248991020028, "learning_rate": 1.7793998377939986e-05, "loss": 0.6653, "step": 6826 }, { "epoch": 0.1993226475139412, "grad_norm": 0.6727392742855982, "learning_rate": 1.7793349553933498e-05, "loss": 0.6261, "step": 6827 }, { "epoch": 0.19935184374178855, "grad_norm": 0.6910276313761514, "learning_rate": 1.7792700729927007e-05, "loss": 0.6482, "step": 6828 }, { "epoch": 0.1993810399696359, "grad_norm": 0.698155694635263, "learning_rate": 1.779205190592052e-05, "loss": 0.6868, "step": 6829 }, { "epoch": 0.1994102361974833, "grad_norm": 0.6380133435861034, "learning_rate": 1.779140308191403e-05, "loss": 0.632, "step": 6830 }, { "epoch": 0.19943943242533066, "grad_norm": 0.7658996692086909, "learning_rate": 1.7790754257907543e-05, "loss": 0.7937, "step": 6831 }, { "epoch": 0.19946862865317802, "grad_norm": 0.6875856533134594, "learning_rate": 1.7790105433901055e-05, "loss": 0.5982, "step": 6832 }, { "epoch": 0.19949782488102538, "grad_norm": 0.7344379751802432, "learning_rate": 1.7789456609894567e-05, "loss": 0.7417, "step": 6833 }, { "epoch": 0.19952702110887274, "grad_norm": 0.7066702446802438, "learning_rate": 1.778880778588808e-05, "loss": 0.6625, "step": 6834 }, { "epoch": 0.1995562173367201, "grad_norm": 0.6404837960717212, "learning_rate": 1.778815896188159e-05, "loss": 0.5727, "step": 6835 }, { "epoch": 0.19958541356456747, "grad_norm": 0.7190216262444198, "learning_rate": 1.7787510137875103e-05, "loss": 0.6525, "step": 6836 }, { "epoch": 0.19961460979241483, "grad_norm": 0.7292189590639084, "learning_rate": 1.7786861313868615e-05, "loss": 0.6887, "step": 6837 }, { "epoch": 0.1996438060202622, "grad_norm": 0.7079726950367993, "learning_rate": 1.7786212489862127e-05, "loss": 0.6577, "step": 6838 }, { "epoch": 0.19967300224810955, "grad_norm": 0.63069130769254, "learning_rate": 1.778556366585564e-05, "loss": 0.5936, "step": 6839 }, { "epoch": 0.1997021984759569, "grad_norm": 0.6894700732456743, "learning_rate": 1.778491484184915e-05, "loss": 0.6139, "step": 6840 }, { "epoch": 0.19973139470380427, "grad_norm": 0.7124195816564657, "learning_rate": 1.7784266017842663e-05, "loss": 0.6784, "step": 6841 }, { "epoch": 0.19976059093165163, "grad_norm": 0.6703094611394177, "learning_rate": 1.7783617193836175e-05, "loss": 0.6112, "step": 6842 }, { "epoch": 0.199789787159499, "grad_norm": 0.6718427022771761, "learning_rate": 1.7782968369829684e-05, "loss": 0.6374, "step": 6843 }, { "epoch": 0.19981898338734636, "grad_norm": 0.6803671380977604, "learning_rate": 1.7782319545823196e-05, "loss": 0.6051, "step": 6844 }, { "epoch": 0.19984817961519372, "grad_norm": 0.6944110824304516, "learning_rate": 1.7781670721816708e-05, "loss": 0.6635, "step": 6845 }, { "epoch": 0.19987737584304108, "grad_norm": 0.6907144722996446, "learning_rate": 1.778102189781022e-05, "loss": 0.644, "step": 6846 }, { "epoch": 0.19990657207088844, "grad_norm": 0.651136229881445, "learning_rate": 1.7780373073803732e-05, "loss": 0.5822, "step": 6847 }, { "epoch": 0.1999357682987358, "grad_norm": 0.6634170617610431, "learning_rate": 1.7779724249797244e-05, "loss": 0.6212, "step": 6848 }, { "epoch": 0.19996496452658316, "grad_norm": 0.6720362938626072, "learning_rate": 1.7779075425790756e-05, "loss": 0.576, "step": 6849 }, { "epoch": 0.19999416075443052, "grad_norm": 0.6965401346682426, "learning_rate": 1.7778426601784268e-05, "loss": 0.632, "step": 6850 }, { "epoch": 0.20002335698227788, "grad_norm": 0.6629216483379121, "learning_rate": 1.7777777777777777e-05, "loss": 0.5866, "step": 6851 }, { "epoch": 0.20005255321012524, "grad_norm": 0.6565022424300901, "learning_rate": 1.777712895377129e-05, "loss": 0.5707, "step": 6852 }, { "epoch": 0.2000817494379726, "grad_norm": 0.7688205069775763, "learning_rate": 1.7776480129764804e-05, "loss": 0.7348, "step": 6853 }, { "epoch": 0.20011094566581997, "grad_norm": 0.7902667553958621, "learning_rate": 1.7775831305758316e-05, "loss": 0.7217, "step": 6854 }, { "epoch": 0.20014014189366733, "grad_norm": 0.7911872880327823, "learning_rate": 1.7775182481751828e-05, "loss": 0.7731, "step": 6855 }, { "epoch": 0.2001693381215147, "grad_norm": 0.720887985877199, "learning_rate": 1.777453365774534e-05, "loss": 0.7244, "step": 6856 }, { "epoch": 0.20019853434936205, "grad_norm": 0.6718694243950261, "learning_rate": 1.777388483373885e-05, "loss": 0.6121, "step": 6857 }, { "epoch": 0.2002277305772094, "grad_norm": 0.6774165627958698, "learning_rate": 1.777323600973236e-05, "loss": 0.6603, "step": 6858 }, { "epoch": 0.20025692680505677, "grad_norm": 0.6923051418106337, "learning_rate": 1.7772587185725873e-05, "loss": 0.662, "step": 6859 }, { "epoch": 0.20028612303290416, "grad_norm": 0.70670468277527, "learning_rate": 1.7771938361719385e-05, "loss": 0.6129, "step": 6860 }, { "epoch": 0.20031531926075152, "grad_norm": 0.6973563576595304, "learning_rate": 1.7771289537712897e-05, "loss": 0.6776, "step": 6861 }, { "epoch": 0.20034451548859888, "grad_norm": 0.7508493914442658, "learning_rate": 1.777064071370641e-05, "loss": 0.6853, "step": 6862 }, { "epoch": 0.20037371171644625, "grad_norm": 0.634246809587527, "learning_rate": 1.776999188969992e-05, "loss": 0.6245, "step": 6863 }, { "epoch": 0.2004029079442936, "grad_norm": 0.6784450987109434, "learning_rate": 1.7769343065693433e-05, "loss": 0.6993, "step": 6864 }, { "epoch": 0.20043210417214097, "grad_norm": 0.6338175707161108, "learning_rate": 1.7768694241686945e-05, "loss": 0.5776, "step": 6865 }, { "epoch": 0.20046130039998833, "grad_norm": 0.6307294613357481, "learning_rate": 1.7768045417680454e-05, "loss": 0.5996, "step": 6866 }, { "epoch": 0.2004904966278357, "grad_norm": 0.7041828384405905, "learning_rate": 1.7767396593673966e-05, "loss": 0.7194, "step": 6867 }, { "epoch": 0.20051969285568305, "grad_norm": 0.6625803864221369, "learning_rate": 1.7766747769667478e-05, "loss": 0.6511, "step": 6868 }, { "epoch": 0.2005488890835304, "grad_norm": 0.7108189357555154, "learning_rate": 1.776609894566099e-05, "loss": 0.6927, "step": 6869 }, { "epoch": 0.20057808531137777, "grad_norm": 0.6489157242759871, "learning_rate": 1.7765450121654502e-05, "loss": 0.6412, "step": 6870 }, { "epoch": 0.20060728153922514, "grad_norm": 0.6518485122428042, "learning_rate": 1.7764801297648017e-05, "loss": 0.5898, "step": 6871 }, { "epoch": 0.2006364777670725, "grad_norm": 0.6795050597079908, "learning_rate": 1.7764152473641526e-05, "loss": 0.6602, "step": 6872 }, { "epoch": 0.20066567399491986, "grad_norm": 0.8468877245191877, "learning_rate": 1.7763503649635038e-05, "loss": 0.6105, "step": 6873 }, { "epoch": 0.20069487022276722, "grad_norm": 0.6587394759095289, "learning_rate": 1.776285482562855e-05, "loss": 0.6114, "step": 6874 }, { "epoch": 0.20072406645061458, "grad_norm": 0.7023786015494209, "learning_rate": 1.7762206001622062e-05, "loss": 0.6858, "step": 6875 }, { "epoch": 0.20075326267846194, "grad_norm": 0.6802136304526963, "learning_rate": 1.7761557177615574e-05, "loss": 0.654, "step": 6876 }, { "epoch": 0.2007824589063093, "grad_norm": 0.6707228940281706, "learning_rate": 1.7760908353609086e-05, "loss": 0.6965, "step": 6877 }, { "epoch": 0.20081165513415666, "grad_norm": 0.6976909037438648, "learning_rate": 1.7760259529602598e-05, "loss": 0.6689, "step": 6878 }, { "epoch": 0.20084085136200402, "grad_norm": 0.7466285892811381, "learning_rate": 1.775961070559611e-05, "loss": 0.755, "step": 6879 }, { "epoch": 0.20087004758985139, "grad_norm": 0.6646285039527858, "learning_rate": 1.7758961881589622e-05, "loss": 0.6143, "step": 6880 }, { "epoch": 0.20089924381769875, "grad_norm": 0.6586560516721703, "learning_rate": 1.775831305758313e-05, "loss": 0.5916, "step": 6881 }, { "epoch": 0.2009284400455461, "grad_norm": 0.6764135118783275, "learning_rate": 1.7757664233576643e-05, "loss": 0.6767, "step": 6882 }, { "epoch": 0.20095763627339347, "grad_norm": 0.6559883039155695, "learning_rate": 1.7757015409570155e-05, "loss": 0.6419, "step": 6883 }, { "epoch": 0.20098683250124083, "grad_norm": 0.6361489445202131, "learning_rate": 1.7756366585563667e-05, "loss": 0.6117, "step": 6884 }, { "epoch": 0.2010160287290882, "grad_norm": 0.9796346003006797, "learning_rate": 1.775571776155718e-05, "loss": 0.7422, "step": 6885 }, { "epoch": 0.20104522495693555, "grad_norm": 0.733026567757311, "learning_rate": 1.775506893755069e-05, "loss": 0.7112, "step": 6886 }, { "epoch": 0.20107442118478291, "grad_norm": 0.6310897382407784, "learning_rate": 1.7754420113544203e-05, "loss": 0.595, "step": 6887 }, { "epoch": 0.20110361741263028, "grad_norm": 0.6190788359452147, "learning_rate": 1.7753771289537715e-05, "loss": 0.5792, "step": 6888 }, { "epoch": 0.20113281364047764, "grad_norm": 0.6824200174368186, "learning_rate": 1.7753122465531223e-05, "loss": 0.6443, "step": 6889 }, { "epoch": 0.20116200986832503, "grad_norm": 0.668254982434424, "learning_rate": 1.775247364152474e-05, "loss": 0.6695, "step": 6890 }, { "epoch": 0.2011912060961724, "grad_norm": 0.6718759913964436, "learning_rate": 1.775182481751825e-05, "loss": 0.611, "step": 6891 }, { "epoch": 0.20122040232401975, "grad_norm": 0.66948298906811, "learning_rate": 1.7751175993511763e-05, "loss": 0.6738, "step": 6892 }, { "epoch": 0.2012495985518671, "grad_norm": 0.6439109236166324, "learning_rate": 1.7750527169505275e-05, "loss": 0.6258, "step": 6893 }, { "epoch": 0.20127879477971447, "grad_norm": 0.7243356378385454, "learning_rate": 1.7749878345498787e-05, "loss": 0.7229, "step": 6894 }, { "epoch": 0.20130799100756183, "grad_norm": 0.6715752412558995, "learning_rate": 1.7749229521492296e-05, "loss": 0.6453, "step": 6895 }, { "epoch": 0.2013371872354092, "grad_norm": 0.6449474519100645, "learning_rate": 1.7748580697485808e-05, "loss": 0.5922, "step": 6896 }, { "epoch": 0.20136638346325655, "grad_norm": 0.7041668522552029, "learning_rate": 1.774793187347932e-05, "loss": 0.7231, "step": 6897 }, { "epoch": 0.20139557969110392, "grad_norm": 0.6505191407612492, "learning_rate": 1.7747283049472832e-05, "loss": 0.5875, "step": 6898 }, { "epoch": 0.20142477591895128, "grad_norm": 0.6628734858344226, "learning_rate": 1.7746634225466344e-05, "loss": 0.6283, "step": 6899 }, { "epoch": 0.20145397214679864, "grad_norm": 0.6099568292082811, "learning_rate": 1.7745985401459856e-05, "loss": 0.5731, "step": 6900 }, { "epoch": 0.201483168374646, "grad_norm": 0.7062982971251748, "learning_rate": 1.7745336577453368e-05, "loss": 0.6825, "step": 6901 }, { "epoch": 0.20151236460249336, "grad_norm": 0.671934185215661, "learning_rate": 1.774468775344688e-05, "loss": 0.616, "step": 6902 }, { "epoch": 0.20154156083034072, "grad_norm": 0.6820207737759536, "learning_rate": 1.7744038929440392e-05, "loss": 0.683, "step": 6903 }, { "epoch": 0.20157075705818808, "grad_norm": 0.7306542419639815, "learning_rate": 1.77433901054339e-05, "loss": 0.6997, "step": 6904 }, { "epoch": 0.20159995328603544, "grad_norm": 0.641174818394485, "learning_rate": 1.7742741281427413e-05, "loss": 0.5816, "step": 6905 }, { "epoch": 0.2016291495138828, "grad_norm": 0.6361820478387844, "learning_rate": 1.7742092457420925e-05, "loss": 0.6085, "step": 6906 }, { "epoch": 0.20165834574173017, "grad_norm": 0.6423715853259999, "learning_rate": 1.7741443633414437e-05, "loss": 0.6026, "step": 6907 }, { "epoch": 0.20168754196957753, "grad_norm": 0.8686930235355893, "learning_rate": 1.774079480940795e-05, "loss": 0.7419, "step": 6908 }, { "epoch": 0.2017167381974249, "grad_norm": 0.7063920576229678, "learning_rate": 1.7740145985401464e-05, "loss": 0.7098, "step": 6909 }, { "epoch": 0.20174593442527225, "grad_norm": 0.7392910882619689, "learning_rate": 1.7739497161394973e-05, "loss": 0.7013, "step": 6910 }, { "epoch": 0.2017751306531196, "grad_norm": 0.702299905307751, "learning_rate": 1.7738848337388485e-05, "loss": 0.6776, "step": 6911 }, { "epoch": 0.20180432688096697, "grad_norm": 0.720271994017841, "learning_rate": 1.7738199513381997e-05, "loss": 0.7449, "step": 6912 }, { "epoch": 0.20183352310881433, "grad_norm": 0.6834131996984868, "learning_rate": 1.773755068937551e-05, "loss": 0.6986, "step": 6913 }, { "epoch": 0.2018627193366617, "grad_norm": 0.6937047300905848, "learning_rate": 1.773690186536902e-05, "loss": 0.649, "step": 6914 }, { "epoch": 0.20189191556450906, "grad_norm": 0.7114887641977099, "learning_rate": 1.7736253041362533e-05, "loss": 0.7334, "step": 6915 }, { "epoch": 0.20192111179235642, "grad_norm": 0.7248579325737248, "learning_rate": 1.7735604217356045e-05, "loss": 0.7123, "step": 6916 }, { "epoch": 0.20195030802020378, "grad_norm": 0.7092540245463824, "learning_rate": 1.7734955393349557e-05, "loss": 0.6789, "step": 6917 }, { "epoch": 0.20197950424805114, "grad_norm": 0.6762944214808005, "learning_rate": 1.7734306569343065e-05, "loss": 0.6725, "step": 6918 }, { "epoch": 0.2020087004758985, "grad_norm": 0.7609039587263114, "learning_rate": 1.7733657745336578e-05, "loss": 0.6688, "step": 6919 }, { "epoch": 0.2020378967037459, "grad_norm": 0.6448329556155079, "learning_rate": 1.773300892133009e-05, "loss": 0.5787, "step": 6920 }, { "epoch": 0.20206709293159325, "grad_norm": 0.6675565691916407, "learning_rate": 1.77323600973236e-05, "loss": 0.601, "step": 6921 }, { "epoch": 0.2020962891594406, "grad_norm": 0.7152828811942955, "learning_rate": 1.7731711273317114e-05, "loss": 0.6746, "step": 6922 }, { "epoch": 0.20212548538728797, "grad_norm": 0.6234359451059556, "learning_rate": 1.7731062449310626e-05, "loss": 0.5819, "step": 6923 }, { "epoch": 0.20215468161513533, "grad_norm": 0.655200589974953, "learning_rate": 1.7730413625304138e-05, "loss": 0.6185, "step": 6924 }, { "epoch": 0.2021838778429827, "grad_norm": 0.6914462610024427, "learning_rate": 1.772976480129765e-05, "loss": 0.6278, "step": 6925 }, { "epoch": 0.20221307407083006, "grad_norm": 0.729260520778545, "learning_rate": 1.7729115977291162e-05, "loss": 0.6697, "step": 6926 }, { "epoch": 0.20224227029867742, "grad_norm": 1.0152755934154414, "learning_rate": 1.772846715328467e-05, "loss": 0.758, "step": 6927 }, { "epoch": 0.20227146652652478, "grad_norm": 0.6287508099052718, "learning_rate": 1.7727818329278186e-05, "loss": 0.5944, "step": 6928 }, { "epoch": 0.20230066275437214, "grad_norm": 0.7196579412940918, "learning_rate": 1.7727169505271698e-05, "loss": 0.7189, "step": 6929 }, { "epoch": 0.2023298589822195, "grad_norm": 0.7405561357701462, "learning_rate": 1.772652068126521e-05, "loss": 0.7263, "step": 6930 }, { "epoch": 0.20235905521006686, "grad_norm": 0.6656713954761391, "learning_rate": 1.7725871857258722e-05, "loss": 0.64, "step": 6931 }, { "epoch": 0.20238825143791422, "grad_norm": 0.6136720410771387, "learning_rate": 1.7725223033252234e-05, "loss": 0.5882, "step": 6932 }, { "epoch": 0.20241744766576159, "grad_norm": 0.6397997364840667, "learning_rate": 1.7724574209245742e-05, "loss": 0.6319, "step": 6933 }, { "epoch": 0.20244664389360895, "grad_norm": 0.6389775531772892, "learning_rate": 1.7723925385239255e-05, "loss": 0.6062, "step": 6934 }, { "epoch": 0.2024758401214563, "grad_norm": 0.8075684263280971, "learning_rate": 1.7723276561232767e-05, "loss": 0.8711, "step": 6935 }, { "epoch": 0.20250503634930367, "grad_norm": 0.6279299334953399, "learning_rate": 1.772262773722628e-05, "loss": 0.6012, "step": 6936 }, { "epoch": 0.20253423257715103, "grad_norm": 0.6985241981203675, "learning_rate": 1.772197891321979e-05, "loss": 0.5754, "step": 6937 }, { "epoch": 0.2025634288049984, "grad_norm": 0.7195579050559955, "learning_rate": 1.7721330089213303e-05, "loss": 0.6314, "step": 6938 }, { "epoch": 0.20259262503284575, "grad_norm": 0.7082994886723375, "learning_rate": 1.7720681265206815e-05, "loss": 0.6622, "step": 6939 }, { "epoch": 0.2026218212606931, "grad_norm": 0.687528231415589, "learning_rate": 1.7720032441200327e-05, "loss": 0.6547, "step": 6940 }, { "epoch": 0.20265101748854047, "grad_norm": 0.7175275438463569, "learning_rate": 1.771938361719384e-05, "loss": 0.6865, "step": 6941 }, { "epoch": 0.20268021371638784, "grad_norm": 0.6987771261768073, "learning_rate": 1.7718734793187347e-05, "loss": 0.6444, "step": 6942 }, { "epoch": 0.2027094099442352, "grad_norm": 0.6751108715197648, "learning_rate": 1.771808596918086e-05, "loss": 0.6603, "step": 6943 }, { "epoch": 0.20273860617208256, "grad_norm": 0.6945454765570079, "learning_rate": 1.771743714517437e-05, "loss": 0.6611, "step": 6944 }, { "epoch": 0.20276780239992992, "grad_norm": 0.7081993708138344, "learning_rate": 1.7716788321167883e-05, "loss": 0.6864, "step": 6945 }, { "epoch": 0.20279699862777728, "grad_norm": 0.6758279080376349, "learning_rate": 1.7716139497161395e-05, "loss": 0.6176, "step": 6946 }, { "epoch": 0.20282619485562464, "grad_norm": 0.6473439193442244, "learning_rate": 1.771549067315491e-05, "loss": 0.6131, "step": 6947 }, { "epoch": 0.202855391083472, "grad_norm": 0.7817517823489858, "learning_rate": 1.771484184914842e-05, "loss": 0.6929, "step": 6948 }, { "epoch": 0.20288458731131936, "grad_norm": 0.640798824826375, "learning_rate": 1.771419302514193e-05, "loss": 0.6392, "step": 6949 }, { "epoch": 0.20291378353916675, "grad_norm": 0.6834415006719962, "learning_rate": 1.7713544201135444e-05, "loss": 0.6405, "step": 6950 }, { "epoch": 0.20294297976701411, "grad_norm": 0.625242683518624, "learning_rate": 1.7712895377128956e-05, "loss": 0.5385, "step": 6951 }, { "epoch": 0.20297217599486148, "grad_norm": 0.6783862964907129, "learning_rate": 1.7712246553122468e-05, "loss": 0.5681, "step": 6952 }, { "epoch": 0.20300137222270884, "grad_norm": 0.6459654561613787, "learning_rate": 1.771159772911598e-05, "loss": 0.5563, "step": 6953 }, { "epoch": 0.2030305684505562, "grad_norm": 0.734922327300457, "learning_rate": 1.771094890510949e-05, "loss": 0.6194, "step": 6954 }, { "epoch": 0.20305976467840356, "grad_norm": 0.7807815788256717, "learning_rate": 1.7710300081103004e-05, "loss": 0.7644, "step": 6955 }, { "epoch": 0.20308896090625092, "grad_norm": 0.7225020480106196, "learning_rate": 1.7709651257096512e-05, "loss": 0.7229, "step": 6956 }, { "epoch": 0.20311815713409828, "grad_norm": 0.6588500524647549, "learning_rate": 1.7709002433090024e-05, "loss": 0.6708, "step": 6957 }, { "epoch": 0.20314735336194564, "grad_norm": 0.6324370173418189, "learning_rate": 1.7708353609083536e-05, "loss": 0.5837, "step": 6958 }, { "epoch": 0.203176549589793, "grad_norm": 0.6528650237755591, "learning_rate": 1.770770478507705e-05, "loss": 0.5907, "step": 6959 }, { "epoch": 0.20320574581764037, "grad_norm": 0.7283974198218135, "learning_rate": 1.770705596107056e-05, "loss": 0.6757, "step": 6960 }, { "epoch": 0.20323494204548773, "grad_norm": 0.7823080731791293, "learning_rate": 1.7706407137064072e-05, "loss": 0.7212, "step": 6961 }, { "epoch": 0.2032641382733351, "grad_norm": 0.7155815365021322, "learning_rate": 1.7705758313057585e-05, "loss": 0.6665, "step": 6962 }, { "epoch": 0.20329333450118245, "grad_norm": 0.6567087215481152, "learning_rate": 1.7705109489051097e-05, "loss": 0.6367, "step": 6963 }, { "epoch": 0.2033225307290298, "grad_norm": 0.7096163060266795, "learning_rate": 1.770446066504461e-05, "loss": 0.6907, "step": 6964 }, { "epoch": 0.20335172695687717, "grad_norm": 0.6916316309729371, "learning_rate": 1.7703811841038117e-05, "loss": 0.6711, "step": 6965 }, { "epoch": 0.20338092318472453, "grad_norm": 0.6766340640572568, "learning_rate": 1.7703163017031633e-05, "loss": 0.6261, "step": 6966 }, { "epoch": 0.2034101194125719, "grad_norm": 0.7034788589818078, "learning_rate": 1.7702514193025145e-05, "loss": 0.6477, "step": 6967 }, { "epoch": 0.20343931564041925, "grad_norm": 0.699635267152394, "learning_rate": 1.7701865369018657e-05, "loss": 0.6762, "step": 6968 }, { "epoch": 0.20346851186826662, "grad_norm": 0.6955160592542711, "learning_rate": 1.770121654501217e-05, "loss": 0.6724, "step": 6969 }, { "epoch": 0.20349770809611398, "grad_norm": 0.6374817103376685, "learning_rate": 1.770056772100568e-05, "loss": 0.5652, "step": 6970 }, { "epoch": 0.20352690432396134, "grad_norm": 0.7563877649888371, "learning_rate": 1.769991889699919e-05, "loss": 0.792, "step": 6971 }, { "epoch": 0.2035561005518087, "grad_norm": 0.6812710817160577, "learning_rate": 1.76992700729927e-05, "loss": 0.6701, "step": 6972 }, { "epoch": 0.20358529677965606, "grad_norm": 0.648306508835782, "learning_rate": 1.7698621248986213e-05, "loss": 0.666, "step": 6973 }, { "epoch": 0.20361449300750342, "grad_norm": 0.6138772191761144, "learning_rate": 1.7697972424979725e-05, "loss": 0.5717, "step": 6974 }, { "epoch": 0.20364368923535078, "grad_norm": 0.6527195907169446, "learning_rate": 1.7697323600973237e-05, "loss": 0.5888, "step": 6975 }, { "epoch": 0.20367288546319814, "grad_norm": 0.6349121372101915, "learning_rate": 1.769667477696675e-05, "loss": 0.5708, "step": 6976 }, { "epoch": 0.2037020816910455, "grad_norm": 0.6231734167375784, "learning_rate": 1.769602595296026e-05, "loss": 0.6018, "step": 6977 }, { "epoch": 0.20373127791889287, "grad_norm": 0.678816958591077, "learning_rate": 1.7695377128953774e-05, "loss": 0.7115, "step": 6978 }, { "epoch": 0.20376047414674023, "grad_norm": 0.6915739724828752, "learning_rate": 1.7694728304947286e-05, "loss": 0.6491, "step": 6979 }, { "epoch": 0.20378967037458762, "grad_norm": 0.636800652824723, "learning_rate": 1.7694079480940794e-05, "loss": 0.5678, "step": 6980 }, { "epoch": 0.20381886660243498, "grad_norm": 0.7422221091353274, "learning_rate": 1.7693430656934306e-05, "loss": 0.7904, "step": 6981 }, { "epoch": 0.20384806283028234, "grad_norm": 0.6525068728307125, "learning_rate": 1.7692781832927818e-05, "loss": 0.625, "step": 6982 }, { "epoch": 0.2038772590581297, "grad_norm": 0.619075003135004, "learning_rate": 1.769213300892133e-05, "loss": 0.5636, "step": 6983 }, { "epoch": 0.20390645528597706, "grad_norm": 0.6714387412293708, "learning_rate": 1.7691484184914842e-05, "loss": 0.6256, "step": 6984 }, { "epoch": 0.20393565151382442, "grad_norm": 0.677827554284063, "learning_rate": 1.7690835360908358e-05, "loss": 0.6127, "step": 6985 }, { "epoch": 0.20396484774167178, "grad_norm": 0.7269199892964996, "learning_rate": 1.7690186536901866e-05, "loss": 0.6814, "step": 6986 }, { "epoch": 0.20399404396951915, "grad_norm": 0.6938345850387575, "learning_rate": 1.768953771289538e-05, "loss": 0.6736, "step": 6987 }, { "epoch": 0.2040232401973665, "grad_norm": 0.7098758443532548, "learning_rate": 1.768888888888889e-05, "loss": 0.6779, "step": 6988 }, { "epoch": 0.20405243642521387, "grad_norm": 0.7327035736689068, "learning_rate": 1.7688240064882402e-05, "loss": 0.6743, "step": 6989 }, { "epoch": 0.20408163265306123, "grad_norm": 0.6402562564174125, "learning_rate": 1.7687591240875914e-05, "loss": 0.6, "step": 6990 }, { "epoch": 0.2041108288809086, "grad_norm": 0.7218243052672102, "learning_rate": 1.7686942416869427e-05, "loss": 0.6606, "step": 6991 }, { "epoch": 0.20414002510875595, "grad_norm": 0.6991632291845482, "learning_rate": 1.768629359286294e-05, "loss": 0.6834, "step": 6992 }, { "epoch": 0.2041692213366033, "grad_norm": 0.7318427371568491, "learning_rate": 1.768564476885645e-05, "loss": 0.5949, "step": 6993 }, { "epoch": 0.20419841756445067, "grad_norm": 0.8388060934289204, "learning_rate": 1.768499594484996e-05, "loss": 0.7319, "step": 6994 }, { "epoch": 0.20422761379229804, "grad_norm": 0.7082679705538923, "learning_rate": 1.768434712084347e-05, "loss": 0.6301, "step": 6995 }, { "epoch": 0.2042568100201454, "grad_norm": 0.6668495242417166, "learning_rate": 1.7683698296836983e-05, "loss": 0.6597, "step": 6996 }, { "epoch": 0.20428600624799276, "grad_norm": 0.6529075641996922, "learning_rate": 1.7683049472830495e-05, "loss": 0.6549, "step": 6997 }, { "epoch": 0.20431520247584012, "grad_norm": 0.6709811513645011, "learning_rate": 1.7682400648824007e-05, "loss": 0.6482, "step": 6998 }, { "epoch": 0.20434439870368748, "grad_norm": 0.721453579805524, "learning_rate": 1.768175182481752e-05, "loss": 0.715, "step": 6999 }, { "epoch": 0.20437359493153484, "grad_norm": 0.6984600737130567, "learning_rate": 1.768110300081103e-05, "loss": 0.6814, "step": 7000 }, { "epoch": 0.2044027911593822, "grad_norm": 0.6445426145526677, "learning_rate": 1.7680454176804543e-05, "loss": 0.6352, "step": 7001 }, { "epoch": 0.20443198738722956, "grad_norm": 0.6889143296399023, "learning_rate": 1.7679805352798055e-05, "loss": 0.6359, "step": 7002 }, { "epoch": 0.20446118361507692, "grad_norm": 0.6935007240571179, "learning_rate": 1.7679156528791564e-05, "loss": 0.7213, "step": 7003 }, { "epoch": 0.20449037984292429, "grad_norm": 0.6725244096373031, "learning_rate": 1.767850770478508e-05, "loss": 0.6044, "step": 7004 }, { "epoch": 0.20451957607077165, "grad_norm": 0.7486457320956535, "learning_rate": 1.767785888077859e-05, "loss": 0.7548, "step": 7005 }, { "epoch": 0.204548772298619, "grad_norm": 0.7102873276319748, "learning_rate": 1.7677210056772104e-05, "loss": 0.7075, "step": 7006 }, { "epoch": 0.20457796852646637, "grad_norm": 0.681914504798524, "learning_rate": 1.7676561232765616e-05, "loss": 0.6646, "step": 7007 }, { "epoch": 0.20460716475431373, "grad_norm": 1.0397110910885143, "learning_rate": 1.7675912408759128e-05, "loss": 0.6602, "step": 7008 }, { "epoch": 0.2046363609821611, "grad_norm": 0.6689464311750281, "learning_rate": 1.7675263584752636e-05, "loss": 0.605, "step": 7009 }, { "epoch": 0.20466555721000848, "grad_norm": 0.638755099050044, "learning_rate": 1.7674614760746148e-05, "loss": 0.622, "step": 7010 }, { "epoch": 0.20469475343785584, "grad_norm": 0.6487772301828556, "learning_rate": 1.767396593673966e-05, "loss": 0.6009, "step": 7011 }, { "epoch": 0.2047239496657032, "grad_norm": 0.7320333075073177, "learning_rate": 1.7673317112733172e-05, "loss": 0.7789, "step": 7012 }, { "epoch": 0.20475314589355056, "grad_norm": 0.7007886460623667, "learning_rate": 1.7672668288726684e-05, "loss": 0.6403, "step": 7013 }, { "epoch": 0.20478234212139793, "grad_norm": 0.6795993927920823, "learning_rate": 1.7672019464720196e-05, "loss": 0.6748, "step": 7014 }, { "epoch": 0.2048115383492453, "grad_norm": 0.6582616363926498, "learning_rate": 1.767137064071371e-05, "loss": 0.6603, "step": 7015 }, { "epoch": 0.20484073457709265, "grad_norm": 0.7284004240890322, "learning_rate": 1.767072181670722e-05, "loss": 0.6827, "step": 7016 }, { "epoch": 0.20486993080494, "grad_norm": 0.7654351704636716, "learning_rate": 1.7670072992700732e-05, "loss": 0.7823, "step": 7017 }, { "epoch": 0.20489912703278737, "grad_norm": 0.7010777085812551, "learning_rate": 1.766942416869424e-05, "loss": 0.7131, "step": 7018 }, { "epoch": 0.20492832326063473, "grad_norm": 0.701558034355891, "learning_rate": 1.7668775344687753e-05, "loss": 0.7042, "step": 7019 }, { "epoch": 0.2049575194884821, "grad_norm": 0.9048743704006685, "learning_rate": 1.7668126520681265e-05, "loss": 0.7706, "step": 7020 }, { "epoch": 0.20498671571632945, "grad_norm": 0.6537279994915856, "learning_rate": 1.7667477696674777e-05, "loss": 0.6121, "step": 7021 }, { "epoch": 0.20501591194417682, "grad_norm": 0.7118046167830455, "learning_rate": 1.7666828872668293e-05, "loss": 0.6815, "step": 7022 }, { "epoch": 0.20504510817202418, "grad_norm": 0.69628791558189, "learning_rate": 1.76661800486618e-05, "loss": 0.6724, "step": 7023 }, { "epoch": 0.20507430439987154, "grad_norm": 0.6882526108915726, "learning_rate": 1.7665531224655313e-05, "loss": 0.6699, "step": 7024 }, { "epoch": 0.2051035006277189, "grad_norm": 0.6997912632295201, "learning_rate": 1.7664882400648825e-05, "loss": 0.6626, "step": 7025 }, { "epoch": 0.20513269685556626, "grad_norm": 0.6389341993969443, "learning_rate": 1.7664233576642337e-05, "loss": 0.5451, "step": 7026 }, { "epoch": 0.20516189308341362, "grad_norm": 0.6374973562513895, "learning_rate": 1.766358475263585e-05, "loss": 0.6227, "step": 7027 }, { "epoch": 0.20519108931126098, "grad_norm": 0.7328848582641413, "learning_rate": 1.766293592862936e-05, "loss": 0.7176, "step": 7028 }, { "epoch": 0.20522028553910834, "grad_norm": 0.6760303123502657, "learning_rate": 1.7662287104622873e-05, "loss": 0.6595, "step": 7029 }, { "epoch": 0.2052494817669557, "grad_norm": 0.6912456842068074, "learning_rate": 1.7661638280616385e-05, "loss": 0.6536, "step": 7030 }, { "epoch": 0.20527867799480307, "grad_norm": 0.8511327714816239, "learning_rate": 1.7660989456609897e-05, "loss": 0.6672, "step": 7031 }, { "epoch": 0.20530787422265043, "grad_norm": 0.6767272892369594, "learning_rate": 1.7660340632603406e-05, "loss": 0.6248, "step": 7032 }, { "epoch": 0.2053370704504978, "grad_norm": 0.6689574591929627, "learning_rate": 1.7659691808596918e-05, "loss": 0.6637, "step": 7033 }, { "epoch": 0.20536626667834515, "grad_norm": 0.6548167793023507, "learning_rate": 1.765904298459043e-05, "loss": 0.5883, "step": 7034 }, { "epoch": 0.2053954629061925, "grad_norm": 0.6375868005893204, "learning_rate": 1.7658394160583942e-05, "loss": 0.4972, "step": 7035 }, { "epoch": 0.20542465913403987, "grad_norm": 0.6808010947461812, "learning_rate": 1.7657745336577454e-05, "loss": 0.6676, "step": 7036 }, { "epoch": 0.20545385536188723, "grad_norm": 0.685645555836638, "learning_rate": 1.7657096512570966e-05, "loss": 0.6873, "step": 7037 }, { "epoch": 0.2054830515897346, "grad_norm": 0.8817435115113272, "learning_rate": 1.7656447688564478e-05, "loss": 0.6434, "step": 7038 }, { "epoch": 0.20551224781758196, "grad_norm": 0.6767425713707093, "learning_rate": 1.765579886455799e-05, "loss": 0.6573, "step": 7039 }, { "epoch": 0.20554144404542932, "grad_norm": 0.675420790408181, "learning_rate": 1.7655150040551502e-05, "loss": 0.6336, "step": 7040 }, { "epoch": 0.2055706402732767, "grad_norm": 0.6445585585799964, "learning_rate": 1.7654501216545014e-05, "loss": 0.5726, "step": 7041 }, { "epoch": 0.20559983650112407, "grad_norm": 0.641476524218427, "learning_rate": 1.7653852392538526e-05, "loss": 0.5651, "step": 7042 }, { "epoch": 0.20562903272897143, "grad_norm": 0.6489842960854438, "learning_rate": 1.765320356853204e-05, "loss": 0.6671, "step": 7043 }, { "epoch": 0.2056582289568188, "grad_norm": 0.7385484869453517, "learning_rate": 1.765255474452555e-05, "loss": 0.6121, "step": 7044 }, { "epoch": 0.20568742518466615, "grad_norm": 0.6618170446950508, "learning_rate": 1.7651905920519062e-05, "loss": 0.6881, "step": 7045 }, { "epoch": 0.2057166214125135, "grad_norm": 0.7617365607376091, "learning_rate": 1.7651257096512574e-05, "loss": 0.7154, "step": 7046 }, { "epoch": 0.20574581764036087, "grad_norm": 0.6848348521423809, "learning_rate": 1.7650608272506083e-05, "loss": 0.687, "step": 7047 }, { "epoch": 0.20577501386820823, "grad_norm": 0.6808029357516916, "learning_rate": 1.7649959448499595e-05, "loss": 0.6744, "step": 7048 }, { "epoch": 0.2058042100960556, "grad_norm": 0.7151114411484653, "learning_rate": 1.7649310624493107e-05, "loss": 0.6691, "step": 7049 }, { "epoch": 0.20583340632390296, "grad_norm": 0.6643061341274924, "learning_rate": 1.764866180048662e-05, "loss": 0.6197, "step": 7050 }, { "epoch": 0.20586260255175032, "grad_norm": 0.7632232723170309, "learning_rate": 1.764801297648013e-05, "loss": 0.7848, "step": 7051 }, { "epoch": 0.20589179877959768, "grad_norm": 0.7281934883076754, "learning_rate": 1.7647364152473643e-05, "loss": 0.6521, "step": 7052 }, { "epoch": 0.20592099500744504, "grad_norm": 0.7051270529106057, "learning_rate": 1.7646715328467155e-05, "loss": 0.6616, "step": 7053 }, { "epoch": 0.2059501912352924, "grad_norm": 0.613514270030318, "learning_rate": 1.7646066504460667e-05, "loss": 0.5519, "step": 7054 }, { "epoch": 0.20597938746313976, "grad_norm": 0.6943326409030937, "learning_rate": 1.764541768045418e-05, "loss": 0.621, "step": 7055 }, { "epoch": 0.20600858369098712, "grad_norm": 0.6662151509740188, "learning_rate": 1.7644768856447688e-05, "loss": 0.6689, "step": 7056 }, { "epoch": 0.20603777991883448, "grad_norm": 0.6888427920226519, "learning_rate": 1.76441200324412e-05, "loss": 0.7212, "step": 7057 }, { "epoch": 0.20606697614668185, "grad_norm": 0.7897866433632247, "learning_rate": 1.7643471208434712e-05, "loss": 0.768, "step": 7058 }, { "epoch": 0.2060961723745292, "grad_norm": 0.6741272617977826, "learning_rate": 1.7642822384428224e-05, "loss": 0.6831, "step": 7059 }, { "epoch": 0.20612536860237657, "grad_norm": 0.709395762309363, "learning_rate": 1.764217356042174e-05, "loss": 0.633, "step": 7060 }, { "epoch": 0.20615456483022393, "grad_norm": 0.6581256030596786, "learning_rate": 1.7641524736415248e-05, "loss": 0.547, "step": 7061 }, { "epoch": 0.2061837610580713, "grad_norm": 0.726631908874659, "learning_rate": 1.764087591240876e-05, "loss": 0.6664, "step": 7062 }, { "epoch": 0.20621295728591865, "grad_norm": 0.6321243249058759, "learning_rate": 1.7640227088402272e-05, "loss": 0.6169, "step": 7063 }, { "epoch": 0.206242153513766, "grad_norm": 0.688636573461949, "learning_rate": 1.7639578264395784e-05, "loss": 0.6511, "step": 7064 }, { "epoch": 0.20627134974161337, "grad_norm": 0.7284436125759827, "learning_rate": 1.7638929440389296e-05, "loss": 0.6847, "step": 7065 }, { "epoch": 0.20630054596946074, "grad_norm": 0.663328380012122, "learning_rate": 1.7638280616382808e-05, "loss": 0.6447, "step": 7066 }, { "epoch": 0.2063297421973081, "grad_norm": 0.6545491266951681, "learning_rate": 1.763763179237632e-05, "loss": 0.6059, "step": 7067 }, { "epoch": 0.20635893842515546, "grad_norm": 0.7272695272172458, "learning_rate": 1.7636982968369832e-05, "loss": 0.6659, "step": 7068 }, { "epoch": 0.20638813465300282, "grad_norm": 0.7075071092810681, "learning_rate": 1.7636334144363344e-05, "loss": 0.6706, "step": 7069 }, { "epoch": 0.20641733088085018, "grad_norm": 0.6906895698332293, "learning_rate": 1.7635685320356853e-05, "loss": 0.6584, "step": 7070 }, { "epoch": 0.20644652710869757, "grad_norm": 0.7556763707222891, "learning_rate": 1.7635036496350365e-05, "loss": 0.7446, "step": 7071 }, { "epoch": 0.20647572333654493, "grad_norm": 0.7384240264783254, "learning_rate": 1.7634387672343877e-05, "loss": 0.7607, "step": 7072 }, { "epoch": 0.2065049195643923, "grad_norm": 0.6481174259319192, "learning_rate": 1.763373884833739e-05, "loss": 0.6028, "step": 7073 }, { "epoch": 0.20653411579223965, "grad_norm": 0.7308261323577585, "learning_rate": 1.76330900243309e-05, "loss": 0.6705, "step": 7074 }, { "epoch": 0.20656331202008701, "grad_norm": 0.6937926844371186, "learning_rate": 1.7632441200324413e-05, "loss": 0.7012, "step": 7075 }, { "epoch": 0.20659250824793438, "grad_norm": 0.681426357489843, "learning_rate": 1.7631792376317925e-05, "loss": 0.6634, "step": 7076 }, { "epoch": 0.20662170447578174, "grad_norm": 0.7354279867127395, "learning_rate": 1.7631143552311437e-05, "loss": 0.6611, "step": 7077 }, { "epoch": 0.2066509007036291, "grad_norm": 0.6730940470308269, "learning_rate": 1.763049472830495e-05, "loss": 0.6366, "step": 7078 }, { "epoch": 0.20668009693147646, "grad_norm": 0.6533410222661528, "learning_rate": 1.762984590429846e-05, "loss": 0.6232, "step": 7079 }, { "epoch": 0.20670929315932382, "grad_norm": 0.6925294875207243, "learning_rate": 1.7629197080291973e-05, "loss": 0.6239, "step": 7080 }, { "epoch": 0.20673848938717118, "grad_norm": 0.7110031557271206, "learning_rate": 1.7628548256285485e-05, "loss": 0.6887, "step": 7081 }, { "epoch": 0.20676768561501854, "grad_norm": 0.8906389983237708, "learning_rate": 1.7627899432278997e-05, "loss": 0.7268, "step": 7082 }, { "epoch": 0.2067968818428659, "grad_norm": 0.7486867584756244, "learning_rate": 1.762725060827251e-05, "loss": 0.6323, "step": 7083 }, { "epoch": 0.20682607807071327, "grad_norm": 0.6966498298807609, "learning_rate": 1.762660178426602e-05, "loss": 0.7035, "step": 7084 }, { "epoch": 0.20685527429856063, "grad_norm": 0.6918090134088677, "learning_rate": 1.762595296025953e-05, "loss": 0.6755, "step": 7085 }, { "epoch": 0.206884470526408, "grad_norm": 0.6904744297432698, "learning_rate": 1.7625304136253042e-05, "loss": 0.6945, "step": 7086 }, { "epoch": 0.20691366675425535, "grad_norm": 0.9797332991907817, "learning_rate": 1.7624655312246554e-05, "loss": 0.6783, "step": 7087 }, { "epoch": 0.2069428629821027, "grad_norm": 0.6485112440775475, "learning_rate": 1.7624006488240066e-05, "loss": 0.6025, "step": 7088 }, { "epoch": 0.20697205920995007, "grad_norm": 0.6968536424589181, "learning_rate": 1.7623357664233578e-05, "loss": 0.6383, "step": 7089 }, { "epoch": 0.20700125543779743, "grad_norm": 0.6928449575040776, "learning_rate": 1.762270884022709e-05, "loss": 0.6464, "step": 7090 }, { "epoch": 0.2070304516656448, "grad_norm": 0.7323428870627129, "learning_rate": 1.7622060016220602e-05, "loss": 0.7365, "step": 7091 }, { "epoch": 0.20705964789349215, "grad_norm": 0.699462742847816, "learning_rate": 1.7621411192214114e-05, "loss": 0.7055, "step": 7092 }, { "epoch": 0.20708884412133952, "grad_norm": 0.6652694030142877, "learning_rate": 1.7620762368207626e-05, "loss": 0.6138, "step": 7093 }, { "epoch": 0.20711804034918688, "grad_norm": 0.6716341280828964, "learning_rate": 1.7620113544201135e-05, "loss": 0.6343, "step": 7094 }, { "epoch": 0.20714723657703424, "grad_norm": 0.7455870702879739, "learning_rate": 1.7619464720194647e-05, "loss": 0.7004, "step": 7095 }, { "epoch": 0.2071764328048816, "grad_norm": 0.6838002730659157, "learning_rate": 1.761881589618816e-05, "loss": 0.7104, "step": 7096 }, { "epoch": 0.20720562903272896, "grad_norm": 0.7124024784028653, "learning_rate": 1.761816707218167e-05, "loss": 0.7163, "step": 7097 }, { "epoch": 0.20723482526057632, "grad_norm": 0.6751080824658781, "learning_rate": 1.7617518248175186e-05, "loss": 0.6578, "step": 7098 }, { "epoch": 0.20726402148842368, "grad_norm": 0.6929958270170716, "learning_rate": 1.7616869424168695e-05, "loss": 0.6265, "step": 7099 }, { "epoch": 0.20729321771627104, "grad_norm": 1.074591828578093, "learning_rate": 1.7616220600162207e-05, "loss": 0.6541, "step": 7100 }, { "epoch": 0.20732241394411843, "grad_norm": 0.7512672823828067, "learning_rate": 1.761557177615572e-05, "loss": 0.7739, "step": 7101 }, { "epoch": 0.2073516101719658, "grad_norm": 0.7283290589118445, "learning_rate": 1.761492295214923e-05, "loss": 0.6897, "step": 7102 }, { "epoch": 0.20738080639981316, "grad_norm": 0.7245998931747046, "learning_rate": 1.7614274128142743e-05, "loss": 0.6361, "step": 7103 }, { "epoch": 0.20741000262766052, "grad_norm": 0.6424927414363647, "learning_rate": 1.7613625304136255e-05, "loss": 0.6102, "step": 7104 }, { "epoch": 0.20743919885550788, "grad_norm": 0.7246631059373176, "learning_rate": 1.7612976480129767e-05, "loss": 0.7242, "step": 7105 }, { "epoch": 0.20746839508335524, "grad_norm": 0.6815669331234709, "learning_rate": 1.761232765612328e-05, "loss": 0.6472, "step": 7106 }, { "epoch": 0.2074975913112026, "grad_norm": 0.7058273841408879, "learning_rate": 1.761167883211679e-05, "loss": 0.7057, "step": 7107 }, { "epoch": 0.20752678753904996, "grad_norm": 0.6724496721383908, "learning_rate": 1.76110300081103e-05, "loss": 0.5788, "step": 7108 }, { "epoch": 0.20755598376689732, "grad_norm": 0.7074015729803946, "learning_rate": 1.7610381184103812e-05, "loss": 0.7253, "step": 7109 }, { "epoch": 0.20758517999474468, "grad_norm": 0.6635707135043035, "learning_rate": 1.7609732360097324e-05, "loss": 0.6402, "step": 7110 }, { "epoch": 0.20761437622259205, "grad_norm": 0.6907653322203319, "learning_rate": 1.7609083536090836e-05, "loss": 0.6833, "step": 7111 }, { "epoch": 0.2076435724504394, "grad_norm": 0.6626445698905069, "learning_rate": 1.7608434712084348e-05, "loss": 0.628, "step": 7112 }, { "epoch": 0.20767276867828677, "grad_norm": 0.6478141691169125, "learning_rate": 1.760778588807786e-05, "loss": 0.6099, "step": 7113 }, { "epoch": 0.20770196490613413, "grad_norm": 0.6460948492542635, "learning_rate": 1.7607137064071372e-05, "loss": 0.6437, "step": 7114 }, { "epoch": 0.2077311611339815, "grad_norm": 0.6534739297444375, "learning_rate": 1.7606488240064884e-05, "loss": 0.6365, "step": 7115 }, { "epoch": 0.20776035736182885, "grad_norm": 0.6737037088991379, "learning_rate": 1.7605839416058396e-05, "loss": 0.6603, "step": 7116 }, { "epoch": 0.2077895535896762, "grad_norm": 0.6377284116226858, "learning_rate": 1.7605190592051908e-05, "loss": 0.5864, "step": 7117 }, { "epoch": 0.20781874981752357, "grad_norm": 0.655566555925418, "learning_rate": 1.760454176804542e-05, "loss": 0.6805, "step": 7118 }, { "epoch": 0.20784794604537093, "grad_norm": 0.692077314310468, "learning_rate": 1.7603892944038932e-05, "loss": 0.654, "step": 7119 }, { "epoch": 0.2078771422732183, "grad_norm": 0.7209807343493531, "learning_rate": 1.7603244120032444e-05, "loss": 0.6606, "step": 7120 }, { "epoch": 0.20790633850106566, "grad_norm": 0.8179892279287482, "learning_rate": 1.7602595296025956e-05, "loss": 0.6962, "step": 7121 }, { "epoch": 0.20793553472891302, "grad_norm": 0.6078479353706375, "learning_rate": 1.7601946472019468e-05, "loss": 0.5677, "step": 7122 }, { "epoch": 0.20796473095676038, "grad_norm": 0.6905939258207694, "learning_rate": 1.7601297648012977e-05, "loss": 0.7199, "step": 7123 }, { "epoch": 0.20799392718460774, "grad_norm": 0.7323013882005591, "learning_rate": 1.760064882400649e-05, "loss": 0.7016, "step": 7124 }, { "epoch": 0.2080231234124551, "grad_norm": 0.646096379663394, "learning_rate": 1.76e-05, "loss": 0.6484, "step": 7125 }, { "epoch": 0.20805231964030246, "grad_norm": 0.7828366661907551, "learning_rate": 1.7599351175993513e-05, "loss": 0.8518, "step": 7126 }, { "epoch": 0.20808151586814982, "grad_norm": 0.7394797394585108, "learning_rate": 1.7598702351987025e-05, "loss": 0.7299, "step": 7127 }, { "epoch": 0.20811071209599719, "grad_norm": 0.6472417689428026, "learning_rate": 1.7598053527980537e-05, "loss": 0.6106, "step": 7128 }, { "epoch": 0.20813990832384455, "grad_norm": 0.7127833692181066, "learning_rate": 1.759740470397405e-05, "loss": 0.6846, "step": 7129 }, { "epoch": 0.2081691045516919, "grad_norm": 0.7463265627024058, "learning_rate": 1.759675587996756e-05, "loss": 0.7053, "step": 7130 }, { "epoch": 0.2081983007795393, "grad_norm": 0.7059860817511725, "learning_rate": 1.7596107055961073e-05, "loss": 0.6982, "step": 7131 }, { "epoch": 0.20822749700738666, "grad_norm": 0.6599999060473282, "learning_rate": 1.7595458231954582e-05, "loss": 0.661, "step": 7132 }, { "epoch": 0.20825669323523402, "grad_norm": 0.6467324026383235, "learning_rate": 1.7594809407948094e-05, "loss": 0.615, "step": 7133 }, { "epoch": 0.20828588946308138, "grad_norm": 0.6508837796565912, "learning_rate": 1.7594160583941606e-05, "loss": 0.6041, "step": 7134 }, { "epoch": 0.20831508569092874, "grad_norm": 0.6868301693347392, "learning_rate": 1.7593511759935118e-05, "loss": 0.6742, "step": 7135 }, { "epoch": 0.2083442819187761, "grad_norm": 0.7892276947165245, "learning_rate": 1.7592862935928633e-05, "loss": 0.6893, "step": 7136 }, { "epoch": 0.20837347814662346, "grad_norm": 0.6710720012924956, "learning_rate": 1.7592214111922142e-05, "loss": 0.6369, "step": 7137 }, { "epoch": 0.20840267437447083, "grad_norm": 0.6653251870496967, "learning_rate": 1.7591565287915654e-05, "loss": 0.6369, "step": 7138 }, { "epoch": 0.2084318706023182, "grad_norm": 0.6729962439755538, "learning_rate": 1.7590916463909166e-05, "loss": 0.5934, "step": 7139 }, { "epoch": 0.20846106683016555, "grad_norm": 0.742578409905894, "learning_rate": 1.7590267639902678e-05, "loss": 0.6138, "step": 7140 }, { "epoch": 0.2084902630580129, "grad_norm": 0.7769847365264588, "learning_rate": 1.758961881589619e-05, "loss": 0.7242, "step": 7141 }, { "epoch": 0.20851945928586027, "grad_norm": 0.7350717965108069, "learning_rate": 1.7588969991889702e-05, "loss": 0.6597, "step": 7142 }, { "epoch": 0.20854865551370763, "grad_norm": 0.7182290585536053, "learning_rate": 1.7588321167883214e-05, "loss": 0.6992, "step": 7143 }, { "epoch": 0.208577851741555, "grad_norm": 0.7270557707353931, "learning_rate": 1.7587672343876726e-05, "loss": 0.6902, "step": 7144 }, { "epoch": 0.20860704796940235, "grad_norm": 0.7053471681793195, "learning_rate": 1.7587023519870238e-05, "loss": 0.6144, "step": 7145 }, { "epoch": 0.20863624419724972, "grad_norm": 0.6409048186598442, "learning_rate": 1.7586374695863747e-05, "loss": 0.637, "step": 7146 }, { "epoch": 0.20866544042509708, "grad_norm": 0.7332796316701613, "learning_rate": 1.758572587185726e-05, "loss": 0.6581, "step": 7147 }, { "epoch": 0.20869463665294444, "grad_norm": 0.6412051322008677, "learning_rate": 1.758507704785077e-05, "loss": 0.5719, "step": 7148 }, { "epoch": 0.2087238328807918, "grad_norm": 0.6773593178057816, "learning_rate": 1.7584428223844283e-05, "loss": 0.6104, "step": 7149 }, { "epoch": 0.20875302910863916, "grad_norm": 0.7682584045515076, "learning_rate": 1.7583779399837795e-05, "loss": 0.7007, "step": 7150 }, { "epoch": 0.20878222533648652, "grad_norm": 0.7136549467284763, "learning_rate": 1.7583130575831307e-05, "loss": 0.7156, "step": 7151 }, { "epoch": 0.20881142156433388, "grad_norm": 0.6950258814423379, "learning_rate": 1.758248175182482e-05, "loss": 0.6572, "step": 7152 }, { "epoch": 0.20884061779218124, "grad_norm": 0.709725580520412, "learning_rate": 1.758183292781833e-05, "loss": 0.6155, "step": 7153 }, { "epoch": 0.2088698140200286, "grad_norm": 0.8140462864352688, "learning_rate": 1.7581184103811843e-05, "loss": 0.7068, "step": 7154 }, { "epoch": 0.20889901024787597, "grad_norm": 0.7006085207721884, "learning_rate": 1.7580535279805355e-05, "loss": 0.6393, "step": 7155 }, { "epoch": 0.20892820647572333, "grad_norm": 0.6147264955182206, "learning_rate": 1.7579886455798867e-05, "loss": 0.5748, "step": 7156 }, { "epoch": 0.2089574027035707, "grad_norm": 0.6818529536065219, "learning_rate": 1.757923763179238e-05, "loss": 0.662, "step": 7157 }, { "epoch": 0.20898659893141805, "grad_norm": 0.6489032272150069, "learning_rate": 1.757858880778589e-05, "loss": 0.6043, "step": 7158 }, { "epoch": 0.2090157951592654, "grad_norm": 0.6870362055568499, "learning_rate": 1.7577939983779403e-05, "loss": 0.6318, "step": 7159 }, { "epoch": 0.20904499138711277, "grad_norm": 0.673653610661257, "learning_rate": 1.7577291159772915e-05, "loss": 0.6259, "step": 7160 }, { "epoch": 0.20907418761496016, "grad_norm": 0.7013494100806937, "learning_rate": 1.7576642335766424e-05, "loss": 0.7247, "step": 7161 }, { "epoch": 0.20910338384280752, "grad_norm": 0.7722602711935509, "learning_rate": 1.7575993511759936e-05, "loss": 0.6993, "step": 7162 }, { "epoch": 0.20913258007065488, "grad_norm": 0.7515298042510962, "learning_rate": 1.7575344687753448e-05, "loss": 0.6585, "step": 7163 }, { "epoch": 0.20916177629850224, "grad_norm": 0.6691872710420805, "learning_rate": 1.757469586374696e-05, "loss": 0.5989, "step": 7164 }, { "epoch": 0.2091909725263496, "grad_norm": 0.6458317253890695, "learning_rate": 1.7574047039740472e-05, "loss": 0.5943, "step": 7165 }, { "epoch": 0.20922016875419697, "grad_norm": 0.6588180409252613, "learning_rate": 1.7573398215733984e-05, "loss": 0.667, "step": 7166 }, { "epoch": 0.20924936498204433, "grad_norm": 0.6368952920221417, "learning_rate": 1.7572749391727496e-05, "loss": 0.5676, "step": 7167 }, { "epoch": 0.2092785612098917, "grad_norm": 0.7207077003990505, "learning_rate": 1.7572100567721008e-05, "loss": 0.6449, "step": 7168 }, { "epoch": 0.20930775743773905, "grad_norm": 0.6431649700851961, "learning_rate": 1.7571451743714517e-05, "loss": 0.6361, "step": 7169 }, { "epoch": 0.2093369536655864, "grad_norm": 0.6910641488497602, "learning_rate": 1.757080291970803e-05, "loss": 0.6569, "step": 7170 }, { "epoch": 0.20936614989343377, "grad_norm": 0.7608765866134538, "learning_rate": 1.757015409570154e-05, "loss": 0.7285, "step": 7171 }, { "epoch": 0.20939534612128113, "grad_norm": 0.6501116384283891, "learning_rate": 1.7569505271695053e-05, "loss": 0.6549, "step": 7172 }, { "epoch": 0.2094245423491285, "grad_norm": 0.7465361172141849, "learning_rate": 1.7568856447688568e-05, "loss": 0.7667, "step": 7173 }, { "epoch": 0.20945373857697586, "grad_norm": 0.7365392803942755, "learning_rate": 1.756820762368208e-05, "loss": 0.7625, "step": 7174 }, { "epoch": 0.20948293480482322, "grad_norm": 0.6596897313652746, "learning_rate": 1.756755879967559e-05, "loss": 0.637, "step": 7175 }, { "epoch": 0.20951213103267058, "grad_norm": 0.6220836951726365, "learning_rate": 1.75669099756691e-05, "loss": 0.6076, "step": 7176 }, { "epoch": 0.20954132726051794, "grad_norm": 0.6760283668746203, "learning_rate": 1.7566261151662613e-05, "loss": 0.6428, "step": 7177 }, { "epoch": 0.2095705234883653, "grad_norm": 0.6585796166672996, "learning_rate": 1.7565612327656125e-05, "loss": 0.6076, "step": 7178 }, { "epoch": 0.20959971971621266, "grad_norm": 0.6588620497112229, "learning_rate": 1.7564963503649637e-05, "loss": 0.6074, "step": 7179 }, { "epoch": 0.20962891594406002, "grad_norm": 0.6235225935924874, "learning_rate": 1.756431467964315e-05, "loss": 0.5822, "step": 7180 }, { "epoch": 0.20965811217190738, "grad_norm": 0.6689412225345535, "learning_rate": 1.756366585563666e-05, "loss": 0.6178, "step": 7181 }, { "epoch": 0.20968730839975475, "grad_norm": 0.681972537533384, "learning_rate": 1.7563017031630173e-05, "loss": 0.652, "step": 7182 }, { "epoch": 0.2097165046276021, "grad_norm": 0.6368135537150983, "learning_rate": 1.7562368207623685e-05, "loss": 0.6048, "step": 7183 }, { "epoch": 0.20974570085544947, "grad_norm": 0.6943095315102576, "learning_rate": 1.7561719383617194e-05, "loss": 0.6948, "step": 7184 }, { "epoch": 0.20977489708329683, "grad_norm": 0.6453493434633252, "learning_rate": 1.7561070559610706e-05, "loss": 0.5771, "step": 7185 }, { "epoch": 0.2098040933111442, "grad_norm": 0.6454706433243134, "learning_rate": 1.7560421735604218e-05, "loss": 0.6585, "step": 7186 }, { "epoch": 0.20983328953899155, "grad_norm": 0.6622688362076617, "learning_rate": 1.755977291159773e-05, "loss": 0.6487, "step": 7187 }, { "epoch": 0.2098624857668389, "grad_norm": 0.6423002335528087, "learning_rate": 1.755912408759124e-05, "loss": 0.6054, "step": 7188 }, { "epoch": 0.20989168199468627, "grad_norm": 0.7097271891494366, "learning_rate": 1.7558475263584754e-05, "loss": 0.7365, "step": 7189 }, { "epoch": 0.20992087822253364, "grad_norm": 0.7183616171523429, "learning_rate": 1.7557826439578266e-05, "loss": 0.6391, "step": 7190 }, { "epoch": 0.20995007445038102, "grad_norm": 0.6625004075300849, "learning_rate": 1.7557177615571778e-05, "loss": 0.6506, "step": 7191 }, { "epoch": 0.20997927067822839, "grad_norm": 0.6668171073118486, "learning_rate": 1.755652879156529e-05, "loss": 0.6684, "step": 7192 }, { "epoch": 0.21000846690607575, "grad_norm": 0.6894495870692262, "learning_rate": 1.7555879967558802e-05, "loss": 0.6299, "step": 7193 }, { "epoch": 0.2100376631339231, "grad_norm": 0.7217480235774071, "learning_rate": 1.7555231143552314e-05, "loss": 0.6438, "step": 7194 }, { "epoch": 0.21006685936177047, "grad_norm": 0.7428793998578541, "learning_rate": 1.7554582319545826e-05, "loss": 0.7321, "step": 7195 }, { "epoch": 0.21009605558961783, "grad_norm": 0.722269071784743, "learning_rate": 1.7553933495539338e-05, "loss": 0.6675, "step": 7196 }, { "epoch": 0.2101252518174652, "grad_norm": 0.7587234485040789, "learning_rate": 1.755328467153285e-05, "loss": 0.6937, "step": 7197 }, { "epoch": 0.21015444804531255, "grad_norm": 0.6475305361541764, "learning_rate": 1.7552635847526362e-05, "loss": 0.5983, "step": 7198 }, { "epoch": 0.21018364427315991, "grad_norm": 0.6052555734252009, "learning_rate": 1.755198702351987e-05, "loss": 0.5439, "step": 7199 }, { "epoch": 0.21021284050100728, "grad_norm": 0.6639377034461272, "learning_rate": 1.7551338199513383e-05, "loss": 0.5824, "step": 7200 }, { "epoch": 0.21024203672885464, "grad_norm": 0.6919385522542011, "learning_rate": 1.7550689375506895e-05, "loss": 0.6245, "step": 7201 }, { "epoch": 0.210271232956702, "grad_norm": 0.8108437868056235, "learning_rate": 1.7550040551500407e-05, "loss": 0.7299, "step": 7202 }, { "epoch": 0.21030042918454936, "grad_norm": 0.750513892469674, "learning_rate": 1.754939172749392e-05, "loss": 0.7242, "step": 7203 }, { "epoch": 0.21032962541239672, "grad_norm": 0.6944246462969306, "learning_rate": 1.754874290348743e-05, "loss": 0.6283, "step": 7204 }, { "epoch": 0.21035882164024408, "grad_norm": 0.7449514961204333, "learning_rate": 1.7548094079480943e-05, "loss": 0.6899, "step": 7205 }, { "epoch": 0.21038801786809144, "grad_norm": 0.7531237546214193, "learning_rate": 1.7547445255474455e-05, "loss": 0.745, "step": 7206 }, { "epoch": 0.2104172140959388, "grad_norm": 0.6977939403839597, "learning_rate": 1.7546796431467963e-05, "loss": 0.6585, "step": 7207 }, { "epoch": 0.21044641032378616, "grad_norm": 0.7325543419569532, "learning_rate": 1.7546147607461475e-05, "loss": 0.6665, "step": 7208 }, { "epoch": 0.21047560655163353, "grad_norm": 0.7393361719077471, "learning_rate": 1.7545498783454987e-05, "loss": 0.6506, "step": 7209 }, { "epoch": 0.2105048027794809, "grad_norm": 0.6859291568902706, "learning_rate": 1.75448499594485e-05, "loss": 0.6443, "step": 7210 }, { "epoch": 0.21053399900732825, "grad_norm": 0.7071820927146344, "learning_rate": 1.7544201135442015e-05, "loss": 0.6317, "step": 7211 }, { "epoch": 0.2105631952351756, "grad_norm": 0.6565872361216478, "learning_rate": 1.7543552311435527e-05, "loss": 0.631, "step": 7212 }, { "epoch": 0.21059239146302297, "grad_norm": 0.6492247478675799, "learning_rate": 1.7542903487429036e-05, "loss": 0.6274, "step": 7213 }, { "epoch": 0.21062158769087033, "grad_norm": 0.6785485909526033, "learning_rate": 1.7542254663422548e-05, "loss": 0.628, "step": 7214 }, { "epoch": 0.2106507839187177, "grad_norm": 0.6960706420691647, "learning_rate": 1.754160583941606e-05, "loss": 0.6796, "step": 7215 }, { "epoch": 0.21067998014656505, "grad_norm": 0.7387127124502021, "learning_rate": 1.754095701540957e-05, "loss": 0.6897, "step": 7216 }, { "epoch": 0.21070917637441242, "grad_norm": 0.6762828288306905, "learning_rate": 1.7540308191403084e-05, "loss": 0.6565, "step": 7217 }, { "epoch": 0.21073837260225978, "grad_norm": 0.6708954595537571, "learning_rate": 1.7539659367396596e-05, "loss": 0.6536, "step": 7218 }, { "epoch": 0.21076756883010714, "grad_norm": 0.7686661535522159, "learning_rate": 1.7539010543390108e-05, "loss": 0.7301, "step": 7219 }, { "epoch": 0.2107967650579545, "grad_norm": 0.6689933907801443, "learning_rate": 1.753836171938362e-05, "loss": 0.5899, "step": 7220 }, { "epoch": 0.21082596128580186, "grad_norm": 0.693498272107364, "learning_rate": 1.7537712895377132e-05, "loss": 0.6546, "step": 7221 }, { "epoch": 0.21085515751364925, "grad_norm": 0.6589911210229306, "learning_rate": 1.753706407137064e-05, "loss": 0.5567, "step": 7222 }, { "epoch": 0.2108843537414966, "grad_norm": 0.6721257527420843, "learning_rate": 1.7536415247364152e-05, "loss": 0.6392, "step": 7223 }, { "epoch": 0.21091354996934397, "grad_norm": 0.6461243123635391, "learning_rate": 1.7535766423357664e-05, "loss": 0.6261, "step": 7224 }, { "epoch": 0.21094274619719133, "grad_norm": 0.6826947022063637, "learning_rate": 1.7535117599351177e-05, "loss": 0.5929, "step": 7225 }, { "epoch": 0.2109719424250387, "grad_norm": 0.6887037370409312, "learning_rate": 1.753446877534469e-05, "loss": 0.6373, "step": 7226 }, { "epoch": 0.21100113865288606, "grad_norm": 0.6712933222922838, "learning_rate": 1.75338199513382e-05, "loss": 0.6129, "step": 7227 }, { "epoch": 0.21103033488073342, "grad_norm": 0.6498057822744625, "learning_rate": 1.7533171127331713e-05, "loss": 0.5438, "step": 7228 }, { "epoch": 0.21105953110858078, "grad_norm": 0.7019650095827746, "learning_rate": 1.7532522303325225e-05, "loss": 0.6799, "step": 7229 }, { "epoch": 0.21108872733642814, "grad_norm": 0.6444031288211085, "learning_rate": 1.7531873479318737e-05, "loss": 0.6032, "step": 7230 }, { "epoch": 0.2111179235642755, "grad_norm": 0.6676634219761833, "learning_rate": 1.753122465531225e-05, "loss": 0.6541, "step": 7231 }, { "epoch": 0.21114711979212286, "grad_norm": 0.6537070193525695, "learning_rate": 1.753057583130576e-05, "loss": 0.5859, "step": 7232 }, { "epoch": 0.21117631601997022, "grad_norm": 0.7128305512261423, "learning_rate": 1.7529927007299273e-05, "loss": 0.6346, "step": 7233 }, { "epoch": 0.21120551224781758, "grad_norm": 0.6775219976590533, "learning_rate": 1.7529278183292785e-05, "loss": 0.6453, "step": 7234 }, { "epoch": 0.21123470847566495, "grad_norm": 0.6267455069176566, "learning_rate": 1.7528629359286297e-05, "loss": 0.5537, "step": 7235 }, { "epoch": 0.2112639047035123, "grad_norm": 0.6612896073003306, "learning_rate": 1.752798053527981e-05, "loss": 0.5877, "step": 7236 }, { "epoch": 0.21129310093135967, "grad_norm": 0.7720686486583732, "learning_rate": 1.7527331711273317e-05, "loss": 0.6851, "step": 7237 }, { "epoch": 0.21132229715920703, "grad_norm": 0.8963818489678012, "learning_rate": 1.752668288726683e-05, "loss": 0.6272, "step": 7238 }, { "epoch": 0.2113514933870544, "grad_norm": 0.7362866874642794, "learning_rate": 1.752603406326034e-05, "loss": 0.7148, "step": 7239 }, { "epoch": 0.21138068961490175, "grad_norm": 0.6939807159487503, "learning_rate": 1.7525385239253854e-05, "loss": 0.7018, "step": 7240 }, { "epoch": 0.2114098858427491, "grad_norm": 0.6762385908594716, "learning_rate": 1.7524736415247366e-05, "loss": 0.6615, "step": 7241 }, { "epoch": 0.21143908207059647, "grad_norm": 0.6393960550529392, "learning_rate": 1.7524087591240878e-05, "loss": 0.6237, "step": 7242 }, { "epoch": 0.21146827829844383, "grad_norm": 0.6345525668266228, "learning_rate": 1.752343876723439e-05, "loss": 0.598, "step": 7243 }, { "epoch": 0.2114974745262912, "grad_norm": 0.8308182536547001, "learning_rate": 1.75227899432279e-05, "loss": 0.6691, "step": 7244 }, { "epoch": 0.21152667075413856, "grad_norm": 0.6998764144376305, "learning_rate": 1.752214111922141e-05, "loss": 0.6626, "step": 7245 }, { "epoch": 0.21155586698198592, "grad_norm": 0.697174424815296, "learning_rate": 1.7521492295214922e-05, "loss": 0.6478, "step": 7246 }, { "epoch": 0.21158506320983328, "grad_norm": 0.6310493456679063, "learning_rate": 1.7520843471208434e-05, "loss": 0.5176, "step": 7247 }, { "epoch": 0.21161425943768064, "grad_norm": 0.7880701546036171, "learning_rate": 1.7520194647201946e-05, "loss": 0.8032, "step": 7248 }, { "epoch": 0.211643455665528, "grad_norm": 0.7254326955323969, "learning_rate": 1.7519545823195462e-05, "loss": 0.6903, "step": 7249 }, { "epoch": 0.21167265189337536, "grad_norm": 0.6721228275547236, "learning_rate": 1.7518896999188974e-05, "loss": 0.6611, "step": 7250 }, { "epoch": 0.21170184812122272, "grad_norm": 0.6907157289055166, "learning_rate": 1.7518248175182482e-05, "loss": 0.6694, "step": 7251 }, { "epoch": 0.2117310443490701, "grad_norm": 0.6985713436002319, "learning_rate": 1.7517599351175994e-05, "loss": 0.733, "step": 7252 }, { "epoch": 0.21176024057691747, "grad_norm": 0.7136366170602017, "learning_rate": 1.7516950527169506e-05, "loss": 0.6735, "step": 7253 }, { "epoch": 0.21178943680476484, "grad_norm": 0.6967368465943361, "learning_rate": 1.751630170316302e-05, "loss": 0.5912, "step": 7254 }, { "epoch": 0.2118186330326122, "grad_norm": 0.7312933043912181, "learning_rate": 1.751565287915653e-05, "loss": 0.7413, "step": 7255 }, { "epoch": 0.21184782926045956, "grad_norm": 0.6675753183535668, "learning_rate": 1.7515004055150043e-05, "loss": 0.6175, "step": 7256 }, { "epoch": 0.21187702548830692, "grad_norm": 0.782139968490342, "learning_rate": 1.7514355231143555e-05, "loss": 0.7494, "step": 7257 }, { "epoch": 0.21190622171615428, "grad_norm": 0.7981186803943723, "learning_rate": 1.7513706407137067e-05, "loss": 0.696, "step": 7258 }, { "epoch": 0.21193541794400164, "grad_norm": 0.6890373634169048, "learning_rate": 1.751305758313058e-05, "loss": 0.6758, "step": 7259 }, { "epoch": 0.211964614171849, "grad_norm": 0.659071076677775, "learning_rate": 1.7512408759124087e-05, "loss": 0.6169, "step": 7260 }, { "epoch": 0.21199381039969636, "grad_norm": 0.6764329489041753, "learning_rate": 1.75117599351176e-05, "loss": 0.6731, "step": 7261 }, { "epoch": 0.21202300662754373, "grad_norm": 0.6604484936764038, "learning_rate": 1.751111111111111e-05, "loss": 0.6662, "step": 7262 }, { "epoch": 0.2120522028553911, "grad_norm": 0.6630264751066518, "learning_rate": 1.7510462287104623e-05, "loss": 0.6337, "step": 7263 }, { "epoch": 0.21208139908323845, "grad_norm": 0.6748387011702017, "learning_rate": 1.7509813463098135e-05, "loss": 0.6566, "step": 7264 }, { "epoch": 0.2121105953110858, "grad_norm": 0.6347914930050509, "learning_rate": 1.7509164639091647e-05, "loss": 0.6257, "step": 7265 }, { "epoch": 0.21213979153893317, "grad_norm": 0.7004120478520963, "learning_rate": 1.750851581508516e-05, "loss": 0.6896, "step": 7266 }, { "epoch": 0.21216898776678053, "grad_norm": 0.6679084039998385, "learning_rate": 1.750786699107867e-05, "loss": 0.6692, "step": 7267 }, { "epoch": 0.2121981839946279, "grad_norm": 0.6908773141988648, "learning_rate": 1.7507218167072184e-05, "loss": 0.6593, "step": 7268 }, { "epoch": 0.21222738022247525, "grad_norm": 0.6466817261050327, "learning_rate": 1.7506569343065696e-05, "loss": 0.5976, "step": 7269 }, { "epoch": 0.21225657645032261, "grad_norm": 0.6247778402582495, "learning_rate": 1.7505920519059208e-05, "loss": 0.5452, "step": 7270 }, { "epoch": 0.21228577267816998, "grad_norm": 0.6253534779413605, "learning_rate": 1.750527169505272e-05, "loss": 0.5886, "step": 7271 }, { "epoch": 0.21231496890601734, "grad_norm": 0.6680908395155623, "learning_rate": 1.750462287104623e-05, "loss": 0.5924, "step": 7272 }, { "epoch": 0.2123441651338647, "grad_norm": 0.6617362162706718, "learning_rate": 1.7503974047039744e-05, "loss": 0.6359, "step": 7273 }, { "epoch": 0.21237336136171206, "grad_norm": 0.7194097448493129, "learning_rate": 1.7503325223033256e-05, "loss": 0.729, "step": 7274 }, { "epoch": 0.21240255758955942, "grad_norm": 0.6801869571368971, "learning_rate": 1.7502676399026764e-05, "loss": 0.6531, "step": 7275 }, { "epoch": 0.21243175381740678, "grad_norm": 0.6568329946271272, "learning_rate": 1.7502027575020276e-05, "loss": 0.6419, "step": 7276 }, { "epoch": 0.21246095004525414, "grad_norm": 0.6842617543934294, "learning_rate": 1.750137875101379e-05, "loss": 0.6673, "step": 7277 }, { "epoch": 0.2124901462731015, "grad_norm": 0.6547804810695242, "learning_rate": 1.75007299270073e-05, "loss": 0.6527, "step": 7278 }, { "epoch": 0.21251934250094887, "grad_norm": 0.7415842496292756, "learning_rate": 1.7500081103000812e-05, "loss": 0.8055, "step": 7279 }, { "epoch": 0.21254853872879623, "grad_norm": 0.6882105251635144, "learning_rate": 1.7499432278994324e-05, "loss": 0.6588, "step": 7280 }, { "epoch": 0.2125777349566436, "grad_norm": 0.6379469040961824, "learning_rate": 1.7498783454987836e-05, "loss": 0.5932, "step": 7281 }, { "epoch": 0.21260693118449098, "grad_norm": 0.9999519908991613, "learning_rate": 1.749813463098135e-05, "loss": 0.7508, "step": 7282 }, { "epoch": 0.21263612741233834, "grad_norm": 0.8326627250007715, "learning_rate": 1.7497485806974857e-05, "loss": 0.6507, "step": 7283 }, { "epoch": 0.2126653236401857, "grad_norm": 0.7035473616491374, "learning_rate": 1.749683698296837e-05, "loss": 0.6709, "step": 7284 }, { "epoch": 0.21269451986803306, "grad_norm": 0.7737511440279614, "learning_rate": 1.749618815896188e-05, "loss": 0.7707, "step": 7285 }, { "epoch": 0.21272371609588042, "grad_norm": 0.7297748682400876, "learning_rate": 1.7495539334955393e-05, "loss": 0.6899, "step": 7286 }, { "epoch": 0.21275291232372778, "grad_norm": 0.669258780657724, "learning_rate": 1.749489051094891e-05, "loss": 0.6097, "step": 7287 }, { "epoch": 0.21278210855157514, "grad_norm": 0.6738771179340893, "learning_rate": 1.749424168694242e-05, "loss": 0.6568, "step": 7288 }, { "epoch": 0.2128113047794225, "grad_norm": 0.7040105905806002, "learning_rate": 1.749359286293593e-05, "loss": 0.7306, "step": 7289 }, { "epoch": 0.21284050100726987, "grad_norm": 0.6687972414563756, "learning_rate": 1.749294403892944e-05, "loss": 0.5568, "step": 7290 }, { "epoch": 0.21286969723511723, "grad_norm": 0.6636311757256128, "learning_rate": 1.7492295214922953e-05, "loss": 0.6719, "step": 7291 }, { "epoch": 0.2128988934629646, "grad_norm": 0.678965132738045, "learning_rate": 1.7491646390916465e-05, "loss": 0.7098, "step": 7292 }, { "epoch": 0.21292808969081195, "grad_norm": 0.7622623269950807, "learning_rate": 1.7490997566909977e-05, "loss": 0.7035, "step": 7293 }, { "epoch": 0.2129572859186593, "grad_norm": 0.7177666462428173, "learning_rate": 1.749034874290349e-05, "loss": 0.6466, "step": 7294 }, { "epoch": 0.21298648214650667, "grad_norm": 0.7269624048939098, "learning_rate": 1.7489699918897e-05, "loss": 0.6995, "step": 7295 }, { "epoch": 0.21301567837435403, "grad_norm": 0.6826568724226744, "learning_rate": 1.7489051094890514e-05, "loss": 0.6789, "step": 7296 }, { "epoch": 0.2130448746022014, "grad_norm": 0.692030117561733, "learning_rate": 1.7488402270884026e-05, "loss": 0.6588, "step": 7297 }, { "epoch": 0.21307407083004876, "grad_norm": 0.7065322242990664, "learning_rate": 1.7487753446877534e-05, "loss": 0.6832, "step": 7298 }, { "epoch": 0.21310326705789612, "grad_norm": 0.734193472705573, "learning_rate": 1.7487104622871046e-05, "loss": 0.686, "step": 7299 }, { "epoch": 0.21313246328574348, "grad_norm": 0.7137580037249496, "learning_rate": 1.7486455798864558e-05, "loss": 0.6936, "step": 7300 }, { "epoch": 0.21316165951359084, "grad_norm": 0.656578061528843, "learning_rate": 1.748580697485807e-05, "loss": 0.6478, "step": 7301 }, { "epoch": 0.2131908557414382, "grad_norm": 0.692054271547739, "learning_rate": 1.7485158150851582e-05, "loss": 0.6743, "step": 7302 }, { "epoch": 0.21322005196928556, "grad_norm": 0.6545498438225287, "learning_rate": 1.7484509326845094e-05, "loss": 0.5752, "step": 7303 }, { "epoch": 0.21324924819713292, "grad_norm": 0.7077481343130849, "learning_rate": 1.7483860502838606e-05, "loss": 0.652, "step": 7304 }, { "epoch": 0.21327844442498028, "grad_norm": 0.7110587661397391, "learning_rate": 1.748321167883212e-05, "loss": 0.7182, "step": 7305 }, { "epoch": 0.21330764065282765, "grad_norm": 0.66872647046844, "learning_rate": 1.748256285482563e-05, "loss": 0.6521, "step": 7306 }, { "epoch": 0.213336836880675, "grad_norm": 0.6591324615805498, "learning_rate": 1.7481914030819142e-05, "loss": 0.6341, "step": 7307 }, { "epoch": 0.21336603310852237, "grad_norm": 0.7371017098101001, "learning_rate": 1.7481265206812654e-05, "loss": 0.7525, "step": 7308 }, { "epoch": 0.21339522933636973, "grad_norm": 0.7494977760790398, "learning_rate": 1.7480616382806166e-05, "loss": 0.6518, "step": 7309 }, { "epoch": 0.2134244255642171, "grad_norm": 0.6985858339557973, "learning_rate": 1.747996755879968e-05, "loss": 0.6575, "step": 7310 }, { "epoch": 0.21345362179206445, "grad_norm": 0.6358030270531504, "learning_rate": 1.747931873479319e-05, "loss": 0.6103, "step": 7311 }, { "epoch": 0.21348281801991184, "grad_norm": 0.6930908624515417, "learning_rate": 1.74786699107867e-05, "loss": 0.7034, "step": 7312 }, { "epoch": 0.2135120142477592, "grad_norm": 0.6377172108952956, "learning_rate": 1.747802108678021e-05, "loss": 0.5817, "step": 7313 }, { "epoch": 0.21354121047560656, "grad_norm": 0.6886706352551666, "learning_rate": 1.7477372262773723e-05, "loss": 0.6238, "step": 7314 }, { "epoch": 0.21357040670345392, "grad_norm": 0.6544640318642332, "learning_rate": 1.7476723438767235e-05, "loss": 0.6145, "step": 7315 }, { "epoch": 0.21359960293130129, "grad_norm": 0.6938002166437028, "learning_rate": 1.7476074614760747e-05, "loss": 0.6731, "step": 7316 }, { "epoch": 0.21362879915914865, "grad_norm": 0.7136896572983037, "learning_rate": 1.747542579075426e-05, "loss": 0.6873, "step": 7317 }, { "epoch": 0.213657995386996, "grad_norm": 0.6899098988289765, "learning_rate": 1.747477696674777e-05, "loss": 0.6864, "step": 7318 }, { "epoch": 0.21368719161484337, "grad_norm": 0.658611035152112, "learning_rate": 1.7474128142741283e-05, "loss": 0.6061, "step": 7319 }, { "epoch": 0.21371638784269073, "grad_norm": 0.6534593005603415, "learning_rate": 1.7473479318734795e-05, "loss": 0.5868, "step": 7320 }, { "epoch": 0.2137455840705381, "grad_norm": 0.6237533643180502, "learning_rate": 1.7472830494728304e-05, "loss": 0.5717, "step": 7321 }, { "epoch": 0.21377478029838545, "grad_norm": 0.657662590419593, "learning_rate": 1.7472181670721816e-05, "loss": 0.6192, "step": 7322 }, { "epoch": 0.21380397652623281, "grad_norm": 0.6864053817602717, "learning_rate": 1.7471532846715328e-05, "loss": 0.6337, "step": 7323 }, { "epoch": 0.21383317275408018, "grad_norm": 0.6887218634962955, "learning_rate": 1.7470884022708843e-05, "loss": 0.6462, "step": 7324 }, { "epoch": 0.21386236898192754, "grad_norm": 0.6593581026723154, "learning_rate": 1.7470235198702356e-05, "loss": 0.6102, "step": 7325 }, { "epoch": 0.2138915652097749, "grad_norm": 1.2778902147216549, "learning_rate": 1.7469586374695868e-05, "loss": 0.6822, "step": 7326 }, { "epoch": 0.21392076143762226, "grad_norm": 0.7307272723644725, "learning_rate": 1.7468937550689376e-05, "loss": 0.7375, "step": 7327 }, { "epoch": 0.21394995766546962, "grad_norm": 0.7432121703596404, "learning_rate": 1.7468288726682888e-05, "loss": 0.7308, "step": 7328 }, { "epoch": 0.21397915389331698, "grad_norm": 0.6830438450148176, "learning_rate": 1.74676399026764e-05, "loss": 0.6529, "step": 7329 }, { "epoch": 0.21400835012116434, "grad_norm": 0.636260939435226, "learning_rate": 1.7466991078669912e-05, "loss": 0.5914, "step": 7330 }, { "epoch": 0.2140375463490117, "grad_norm": 0.6773046139314393, "learning_rate": 1.7466342254663424e-05, "loss": 0.6633, "step": 7331 }, { "epoch": 0.21406674257685906, "grad_norm": 0.6590598711729376, "learning_rate": 1.7465693430656936e-05, "loss": 0.6424, "step": 7332 }, { "epoch": 0.21409593880470643, "grad_norm": 0.689466323613055, "learning_rate": 1.746504460665045e-05, "loss": 0.6067, "step": 7333 }, { "epoch": 0.2141251350325538, "grad_norm": 0.7496364170102169, "learning_rate": 1.746439578264396e-05, "loss": 0.5878, "step": 7334 }, { "epoch": 0.21415433126040115, "grad_norm": 0.649953111669822, "learning_rate": 1.7463746958637472e-05, "loss": 0.6028, "step": 7335 }, { "epoch": 0.2141835274882485, "grad_norm": 0.6272089932918404, "learning_rate": 1.746309813463098e-05, "loss": 0.5802, "step": 7336 }, { "epoch": 0.21421272371609587, "grad_norm": 0.6686313891640696, "learning_rate": 1.7462449310624493e-05, "loss": 0.6208, "step": 7337 }, { "epoch": 0.21424191994394323, "grad_norm": 0.6941503700771646, "learning_rate": 1.7461800486618005e-05, "loss": 0.6511, "step": 7338 }, { "epoch": 0.2142711161717906, "grad_norm": 0.68955289586201, "learning_rate": 1.7461151662611517e-05, "loss": 0.6012, "step": 7339 }, { "epoch": 0.21430031239963795, "grad_norm": 0.6734404112974305, "learning_rate": 1.746050283860503e-05, "loss": 0.6244, "step": 7340 }, { "epoch": 0.21432950862748532, "grad_norm": 0.6729366266823795, "learning_rate": 1.745985401459854e-05, "loss": 0.6151, "step": 7341 }, { "epoch": 0.2143587048553327, "grad_norm": 0.707637050023484, "learning_rate": 1.7459205190592053e-05, "loss": 0.6695, "step": 7342 }, { "epoch": 0.21438790108318007, "grad_norm": 0.7218064824231877, "learning_rate": 1.7458556366585565e-05, "loss": 0.6679, "step": 7343 }, { "epoch": 0.21441709731102743, "grad_norm": 0.6456033362896756, "learning_rate": 1.7457907542579077e-05, "loss": 0.6226, "step": 7344 }, { "epoch": 0.2144462935388748, "grad_norm": 0.6690801552443256, "learning_rate": 1.745725871857259e-05, "loss": 0.6486, "step": 7345 }, { "epoch": 0.21447548976672215, "grad_norm": 0.6547204130787512, "learning_rate": 1.74566098945661e-05, "loss": 0.5496, "step": 7346 }, { "epoch": 0.2145046859945695, "grad_norm": 0.7013021054307037, "learning_rate": 1.7455961070559613e-05, "loss": 0.6963, "step": 7347 }, { "epoch": 0.21453388222241687, "grad_norm": 0.7247377926720341, "learning_rate": 1.7455312246553125e-05, "loss": 0.6968, "step": 7348 }, { "epoch": 0.21456307845026423, "grad_norm": 0.6654318315247085, "learning_rate": 1.7454663422546637e-05, "loss": 0.5706, "step": 7349 }, { "epoch": 0.2145922746781116, "grad_norm": 0.683647657483636, "learning_rate": 1.7454014598540146e-05, "loss": 0.6791, "step": 7350 }, { "epoch": 0.21462147090595896, "grad_norm": 0.679874112873134, "learning_rate": 1.7453365774533658e-05, "loss": 0.6922, "step": 7351 }, { "epoch": 0.21465066713380632, "grad_norm": 0.6758983341414871, "learning_rate": 1.745271695052717e-05, "loss": 0.6807, "step": 7352 }, { "epoch": 0.21467986336165368, "grad_norm": 0.6954604457457088, "learning_rate": 1.7452068126520682e-05, "loss": 0.6335, "step": 7353 }, { "epoch": 0.21470905958950104, "grad_norm": 0.6363694462320462, "learning_rate": 1.7451419302514194e-05, "loss": 0.577, "step": 7354 }, { "epoch": 0.2147382558173484, "grad_norm": 0.6723004369096923, "learning_rate": 1.7450770478507706e-05, "loss": 0.6837, "step": 7355 }, { "epoch": 0.21476745204519576, "grad_norm": 0.6521457390531806, "learning_rate": 1.7450121654501218e-05, "loss": 0.6278, "step": 7356 }, { "epoch": 0.21479664827304312, "grad_norm": 0.6438725191308695, "learning_rate": 1.744947283049473e-05, "loss": 0.6391, "step": 7357 }, { "epoch": 0.21482584450089048, "grad_norm": 0.8827881041504264, "learning_rate": 1.7448824006488242e-05, "loss": 0.7577, "step": 7358 }, { "epoch": 0.21485504072873784, "grad_norm": 1.044271970720711, "learning_rate": 1.744817518248175e-05, "loss": 0.7694, "step": 7359 }, { "epoch": 0.2148842369565852, "grad_norm": 0.6506679007418986, "learning_rate": 1.7447526358475263e-05, "loss": 0.6173, "step": 7360 }, { "epoch": 0.21491343318443257, "grad_norm": 0.9779335777079805, "learning_rate": 1.7446877534468775e-05, "loss": 0.6512, "step": 7361 }, { "epoch": 0.21494262941227993, "grad_norm": 0.712979702386236, "learning_rate": 1.744622871046229e-05, "loss": 0.684, "step": 7362 }, { "epoch": 0.2149718256401273, "grad_norm": 0.6801753618347189, "learning_rate": 1.7445579886455802e-05, "loss": 0.6483, "step": 7363 }, { "epoch": 0.21500102186797465, "grad_norm": 0.6641286250815398, "learning_rate": 1.7444931062449314e-05, "loss": 0.6756, "step": 7364 }, { "epoch": 0.215030218095822, "grad_norm": 0.6356555018336827, "learning_rate": 1.7444282238442823e-05, "loss": 0.5973, "step": 7365 }, { "epoch": 0.21505941432366937, "grad_norm": 0.6586569930092475, "learning_rate": 1.7443633414436335e-05, "loss": 0.6546, "step": 7366 }, { "epoch": 0.21508861055151673, "grad_norm": 0.6974489249302501, "learning_rate": 1.7442984590429847e-05, "loss": 0.7334, "step": 7367 }, { "epoch": 0.2151178067793641, "grad_norm": 0.6714052212039453, "learning_rate": 1.744233576642336e-05, "loss": 0.6532, "step": 7368 }, { "epoch": 0.21514700300721146, "grad_norm": 0.7094409563619768, "learning_rate": 1.744168694241687e-05, "loss": 0.6413, "step": 7369 }, { "epoch": 0.21517619923505882, "grad_norm": 0.6764423134257739, "learning_rate": 1.7441038118410383e-05, "loss": 0.6268, "step": 7370 }, { "epoch": 0.21520539546290618, "grad_norm": 0.6790778015659559, "learning_rate": 1.7440389294403895e-05, "loss": 0.6335, "step": 7371 }, { "epoch": 0.21523459169075357, "grad_norm": 0.7069448676889847, "learning_rate": 1.7439740470397407e-05, "loss": 0.6938, "step": 7372 }, { "epoch": 0.21526378791860093, "grad_norm": 0.6641616323451051, "learning_rate": 1.743909164639092e-05, "loss": 0.6326, "step": 7373 }, { "epoch": 0.2152929841464483, "grad_norm": 0.678616469717154, "learning_rate": 1.7438442822384428e-05, "loss": 0.6014, "step": 7374 }, { "epoch": 0.21532218037429565, "grad_norm": 0.7449191913164415, "learning_rate": 1.743779399837794e-05, "loss": 0.7686, "step": 7375 }, { "epoch": 0.215351376602143, "grad_norm": 0.6907729609294676, "learning_rate": 1.7437145174371452e-05, "loss": 0.7329, "step": 7376 }, { "epoch": 0.21538057282999037, "grad_norm": 1.116183423673109, "learning_rate": 1.7436496350364964e-05, "loss": 0.6387, "step": 7377 }, { "epoch": 0.21540976905783774, "grad_norm": 0.6483409127456496, "learning_rate": 1.7435847526358476e-05, "loss": 0.5766, "step": 7378 }, { "epoch": 0.2154389652856851, "grad_norm": 0.6930256915062545, "learning_rate": 1.7435198702351988e-05, "loss": 0.6673, "step": 7379 }, { "epoch": 0.21546816151353246, "grad_norm": 0.6541060364981351, "learning_rate": 1.74345498783455e-05, "loss": 0.5902, "step": 7380 }, { "epoch": 0.21549735774137982, "grad_norm": 0.7311843241624528, "learning_rate": 1.7433901054339012e-05, "loss": 0.5559, "step": 7381 }, { "epoch": 0.21552655396922718, "grad_norm": 0.6693689896956242, "learning_rate": 1.7433252230332524e-05, "loss": 0.5859, "step": 7382 }, { "epoch": 0.21555575019707454, "grad_norm": 0.6619649664540657, "learning_rate": 1.7432603406326036e-05, "loss": 0.705, "step": 7383 }, { "epoch": 0.2155849464249219, "grad_norm": 0.7097296811676774, "learning_rate": 1.7431954582319548e-05, "loss": 0.6927, "step": 7384 }, { "epoch": 0.21561414265276926, "grad_norm": 0.7412470481877185, "learning_rate": 1.743130575831306e-05, "loss": 0.7462, "step": 7385 }, { "epoch": 0.21564333888061663, "grad_norm": 0.6876928517486568, "learning_rate": 1.7430656934306572e-05, "loss": 0.7092, "step": 7386 }, { "epoch": 0.215672535108464, "grad_norm": 0.634618787653116, "learning_rate": 1.7430008110300084e-05, "loss": 0.5599, "step": 7387 }, { "epoch": 0.21570173133631135, "grad_norm": 0.7001773134329509, "learning_rate": 1.7429359286293593e-05, "loss": 0.6581, "step": 7388 }, { "epoch": 0.2157309275641587, "grad_norm": 1.112270644206109, "learning_rate": 1.7428710462287105e-05, "loss": 0.7371, "step": 7389 }, { "epoch": 0.21576012379200607, "grad_norm": 0.6949895868009663, "learning_rate": 1.7428061638280617e-05, "loss": 0.635, "step": 7390 }, { "epoch": 0.21578932001985343, "grad_norm": 0.6656287334875854, "learning_rate": 1.742741281427413e-05, "loss": 0.6201, "step": 7391 }, { "epoch": 0.2158185162477008, "grad_norm": 0.6887669440705647, "learning_rate": 1.742676399026764e-05, "loss": 0.6684, "step": 7392 }, { "epoch": 0.21584771247554815, "grad_norm": 0.9412351935499256, "learning_rate": 1.7426115166261153e-05, "loss": 0.7061, "step": 7393 }, { "epoch": 0.21587690870339551, "grad_norm": 0.6743556260197843, "learning_rate": 1.7425466342254665e-05, "loss": 0.6494, "step": 7394 }, { "epoch": 0.21590610493124288, "grad_norm": 0.6851928243904827, "learning_rate": 1.7424817518248177e-05, "loss": 0.6582, "step": 7395 }, { "epoch": 0.21593530115909024, "grad_norm": 0.7568727040676546, "learning_rate": 1.742416869424169e-05, "loss": 0.7777, "step": 7396 }, { "epoch": 0.2159644973869376, "grad_norm": 0.6395054773753027, "learning_rate": 1.7423519870235198e-05, "loss": 0.5741, "step": 7397 }, { "epoch": 0.21599369361478496, "grad_norm": 0.6613226456981347, "learning_rate": 1.742287104622871e-05, "loss": 0.603, "step": 7398 }, { "epoch": 0.21602288984263232, "grad_norm": 0.6782932369761913, "learning_rate": 1.7422222222222222e-05, "loss": 0.6515, "step": 7399 }, { "epoch": 0.21605208607047968, "grad_norm": 0.6744424833928051, "learning_rate": 1.7421573398215737e-05, "loss": 0.6428, "step": 7400 }, { "epoch": 0.21608128229832704, "grad_norm": 0.621886570642861, "learning_rate": 1.742092457420925e-05, "loss": 0.5362, "step": 7401 }, { "epoch": 0.21611047852617443, "grad_norm": 0.6178805472962453, "learning_rate": 1.742027575020276e-05, "loss": 0.5483, "step": 7402 }, { "epoch": 0.2161396747540218, "grad_norm": 0.6341665401839812, "learning_rate": 1.741962692619627e-05, "loss": 0.6125, "step": 7403 }, { "epoch": 0.21616887098186915, "grad_norm": 0.7184401214316193, "learning_rate": 1.7418978102189782e-05, "loss": 0.6858, "step": 7404 }, { "epoch": 0.21619806720971652, "grad_norm": 0.6411100948376628, "learning_rate": 1.7418329278183294e-05, "loss": 0.5634, "step": 7405 }, { "epoch": 0.21622726343756388, "grad_norm": 0.6999712182891883, "learning_rate": 1.7417680454176806e-05, "loss": 0.5785, "step": 7406 }, { "epoch": 0.21625645966541124, "grad_norm": 0.8392177398666046, "learning_rate": 1.7417031630170318e-05, "loss": 0.6971, "step": 7407 }, { "epoch": 0.2162856558932586, "grad_norm": 0.7129749609621034, "learning_rate": 1.741638280616383e-05, "loss": 0.6765, "step": 7408 }, { "epoch": 0.21631485212110596, "grad_norm": 0.6442038845045237, "learning_rate": 1.7415733982157342e-05, "loss": 0.6411, "step": 7409 }, { "epoch": 0.21634404834895332, "grad_norm": 0.6566546291288448, "learning_rate": 1.7415085158150854e-05, "loss": 0.6134, "step": 7410 }, { "epoch": 0.21637324457680068, "grad_norm": 0.6516451486406162, "learning_rate": 1.7414436334144366e-05, "loss": 0.663, "step": 7411 }, { "epoch": 0.21640244080464804, "grad_norm": 0.7048276850054199, "learning_rate": 1.7413787510137875e-05, "loss": 0.7207, "step": 7412 }, { "epoch": 0.2164316370324954, "grad_norm": 0.7191953607253414, "learning_rate": 1.7413138686131387e-05, "loss": 0.6398, "step": 7413 }, { "epoch": 0.21646083326034277, "grad_norm": 0.6216188621212514, "learning_rate": 1.74124898621249e-05, "loss": 0.5969, "step": 7414 }, { "epoch": 0.21649002948819013, "grad_norm": 0.7565916521295641, "learning_rate": 1.741184103811841e-05, "loss": 0.7232, "step": 7415 }, { "epoch": 0.2165192257160375, "grad_norm": 0.7120749559689371, "learning_rate": 1.7411192214111923e-05, "loss": 0.7124, "step": 7416 }, { "epoch": 0.21654842194388485, "grad_norm": 0.7063663104999643, "learning_rate": 1.7410543390105435e-05, "loss": 0.6965, "step": 7417 }, { "epoch": 0.2165776181717322, "grad_norm": 0.7147271363149754, "learning_rate": 1.7409894566098947e-05, "loss": 0.7166, "step": 7418 }, { "epoch": 0.21660681439957957, "grad_norm": 0.7028289906395485, "learning_rate": 1.740924574209246e-05, "loss": 0.661, "step": 7419 }, { "epoch": 0.21663601062742693, "grad_norm": 0.7457625067214662, "learning_rate": 1.740859691808597e-05, "loss": 0.6794, "step": 7420 }, { "epoch": 0.2166652068552743, "grad_norm": 0.6275807504512749, "learning_rate": 1.7407948094079483e-05, "loss": 0.5655, "step": 7421 }, { "epoch": 0.21669440308312166, "grad_norm": 0.7489226803826953, "learning_rate": 1.7407299270072995e-05, "loss": 0.7335, "step": 7422 }, { "epoch": 0.21672359931096902, "grad_norm": 0.6340360908104126, "learning_rate": 1.7406650446066507e-05, "loss": 0.6133, "step": 7423 }, { "epoch": 0.21675279553881638, "grad_norm": 0.701327185917628, "learning_rate": 1.740600162206002e-05, "loss": 0.6887, "step": 7424 }, { "epoch": 0.21678199176666374, "grad_norm": 0.6424234679704716, "learning_rate": 1.740535279805353e-05, "loss": 0.5822, "step": 7425 }, { "epoch": 0.2168111879945111, "grad_norm": 0.678763780267833, "learning_rate": 1.740470397404704e-05, "loss": 0.663, "step": 7426 }, { "epoch": 0.21684038422235846, "grad_norm": 0.6833462854605458, "learning_rate": 1.7404055150040552e-05, "loss": 0.6517, "step": 7427 }, { "epoch": 0.21686958045020582, "grad_norm": 0.6074451157621901, "learning_rate": 1.7403406326034064e-05, "loss": 0.5648, "step": 7428 }, { "epoch": 0.21689877667805318, "grad_norm": 0.6459376166150083, "learning_rate": 1.7402757502027576e-05, "loss": 0.6503, "step": 7429 }, { "epoch": 0.21692797290590055, "grad_norm": 0.7158907140790276, "learning_rate": 1.7402108678021088e-05, "loss": 0.7071, "step": 7430 }, { "epoch": 0.2169571691337479, "grad_norm": 0.7188878368362537, "learning_rate": 1.74014598540146e-05, "loss": 0.6623, "step": 7431 }, { "epoch": 0.21698636536159527, "grad_norm": 0.6850074401986416, "learning_rate": 1.7400811030008112e-05, "loss": 0.6629, "step": 7432 }, { "epoch": 0.21701556158944266, "grad_norm": 0.6945003321013873, "learning_rate": 1.7400162206001624e-05, "loss": 0.5812, "step": 7433 }, { "epoch": 0.21704475781729002, "grad_norm": 0.7418299678377621, "learning_rate": 1.7399513381995136e-05, "loss": 0.7781, "step": 7434 }, { "epoch": 0.21707395404513738, "grad_norm": 0.653645781547825, "learning_rate": 1.7398864557988645e-05, "loss": 0.5465, "step": 7435 }, { "epoch": 0.21710315027298474, "grad_norm": 0.6973426384229964, "learning_rate": 1.7398215733982157e-05, "loss": 0.6513, "step": 7436 }, { "epoch": 0.2171323465008321, "grad_norm": 0.6695917127946545, "learning_rate": 1.739756690997567e-05, "loss": 0.661, "step": 7437 }, { "epoch": 0.21716154272867946, "grad_norm": 0.7831465446134587, "learning_rate": 1.7396918085969184e-05, "loss": 0.7868, "step": 7438 }, { "epoch": 0.21719073895652682, "grad_norm": 0.7088901205169428, "learning_rate": 1.7396269261962696e-05, "loss": 0.7384, "step": 7439 }, { "epoch": 0.21721993518437419, "grad_norm": 0.6563011559163662, "learning_rate": 1.7395620437956208e-05, "loss": 0.6475, "step": 7440 }, { "epoch": 0.21724913141222155, "grad_norm": 0.6664922230478735, "learning_rate": 1.7394971613949717e-05, "loss": 0.6395, "step": 7441 }, { "epoch": 0.2172783276400689, "grad_norm": 0.6084422697009129, "learning_rate": 1.739432278994323e-05, "loss": 0.5061, "step": 7442 }, { "epoch": 0.21730752386791627, "grad_norm": 0.675536863680636, "learning_rate": 1.739367396593674e-05, "loss": 0.5948, "step": 7443 }, { "epoch": 0.21733672009576363, "grad_norm": 0.7584777872119345, "learning_rate": 1.7393025141930253e-05, "loss": 0.7241, "step": 7444 }, { "epoch": 0.217365916323611, "grad_norm": 0.6904754694824986, "learning_rate": 1.7392376317923765e-05, "loss": 0.6828, "step": 7445 }, { "epoch": 0.21739511255145835, "grad_norm": 0.6353424016593748, "learning_rate": 1.7391727493917277e-05, "loss": 0.6037, "step": 7446 }, { "epoch": 0.2174243087793057, "grad_norm": 0.6829922181848008, "learning_rate": 1.739107866991079e-05, "loss": 0.6585, "step": 7447 }, { "epoch": 0.21745350500715308, "grad_norm": 0.688318553993765, "learning_rate": 1.73904298459043e-05, "loss": 0.6911, "step": 7448 }, { "epoch": 0.21748270123500044, "grad_norm": 0.6204030767972677, "learning_rate": 1.7389781021897813e-05, "loss": 0.5402, "step": 7449 }, { "epoch": 0.2175118974628478, "grad_norm": 0.6088645029614002, "learning_rate": 1.738913219789132e-05, "loss": 0.5194, "step": 7450 }, { "epoch": 0.21754109369069516, "grad_norm": 0.6837164571357793, "learning_rate": 1.7388483373884834e-05, "loss": 0.6814, "step": 7451 }, { "epoch": 0.21757028991854252, "grad_norm": 0.6090204678880432, "learning_rate": 1.7387834549878346e-05, "loss": 0.5601, "step": 7452 }, { "epoch": 0.21759948614638988, "grad_norm": 0.7162670792835151, "learning_rate": 1.7387185725871858e-05, "loss": 0.6594, "step": 7453 }, { "epoch": 0.21762868237423724, "grad_norm": 0.6847701880051191, "learning_rate": 1.738653690186537e-05, "loss": 0.6468, "step": 7454 }, { "epoch": 0.2176578786020846, "grad_norm": 0.7040208475609576, "learning_rate": 1.7385888077858882e-05, "loss": 0.6612, "step": 7455 }, { "epoch": 0.21768707482993196, "grad_norm": 0.6887255568569857, "learning_rate": 1.7385239253852394e-05, "loss": 0.6104, "step": 7456 }, { "epoch": 0.21771627105777933, "grad_norm": 0.7525471277992354, "learning_rate": 1.7384590429845906e-05, "loss": 0.7026, "step": 7457 }, { "epoch": 0.2177454672856267, "grad_norm": 0.6525899474270039, "learning_rate": 1.7383941605839418e-05, "loss": 0.6212, "step": 7458 }, { "epoch": 0.21777466351347405, "grad_norm": 0.6881729840744598, "learning_rate": 1.738329278183293e-05, "loss": 0.6643, "step": 7459 }, { "epoch": 0.2178038597413214, "grad_norm": 1.3680017320635245, "learning_rate": 1.7382643957826442e-05, "loss": 0.6125, "step": 7460 }, { "epoch": 0.21783305596916877, "grad_norm": 0.6332809949350071, "learning_rate": 1.7381995133819954e-05, "loss": 0.6264, "step": 7461 }, { "epoch": 0.21786225219701613, "grad_norm": 0.666333111756601, "learning_rate": 1.7381346309813466e-05, "loss": 0.6677, "step": 7462 }, { "epoch": 0.21789144842486352, "grad_norm": 0.6950982377467093, "learning_rate": 1.7380697485806978e-05, "loss": 0.6544, "step": 7463 }, { "epoch": 0.21792064465271088, "grad_norm": 0.6756984912978811, "learning_rate": 1.7380048661800487e-05, "loss": 0.6263, "step": 7464 }, { "epoch": 0.21794984088055824, "grad_norm": 0.7173186782496, "learning_rate": 1.7379399837794e-05, "loss": 0.7823, "step": 7465 }, { "epoch": 0.2179790371084056, "grad_norm": 0.7424286543439134, "learning_rate": 1.737875101378751e-05, "loss": 0.7049, "step": 7466 }, { "epoch": 0.21800823333625297, "grad_norm": 0.6669264847473386, "learning_rate": 1.7378102189781023e-05, "loss": 0.6458, "step": 7467 }, { "epoch": 0.21803742956410033, "grad_norm": 0.6578541342792659, "learning_rate": 1.7377453365774535e-05, "loss": 0.6414, "step": 7468 }, { "epoch": 0.2180666257919477, "grad_norm": 0.6174553085014338, "learning_rate": 1.7376804541768047e-05, "loss": 0.5577, "step": 7469 }, { "epoch": 0.21809582201979505, "grad_norm": 0.6863743230216035, "learning_rate": 1.737615571776156e-05, "loss": 0.6297, "step": 7470 }, { "epoch": 0.2181250182476424, "grad_norm": 0.6179304094215258, "learning_rate": 1.737550689375507e-05, "loss": 0.5962, "step": 7471 }, { "epoch": 0.21815421447548977, "grad_norm": 0.6700589368445139, "learning_rate": 1.7374858069748583e-05, "loss": 0.6203, "step": 7472 }, { "epoch": 0.21818341070333713, "grad_norm": 0.7510429215447952, "learning_rate": 1.737420924574209e-05, "loss": 0.7407, "step": 7473 }, { "epoch": 0.2182126069311845, "grad_norm": 0.6964907584783007, "learning_rate": 1.7373560421735604e-05, "loss": 0.7011, "step": 7474 }, { "epoch": 0.21824180315903186, "grad_norm": 0.7491952398701666, "learning_rate": 1.737291159772912e-05, "loss": 0.6191, "step": 7475 }, { "epoch": 0.21827099938687922, "grad_norm": 0.6236611463637183, "learning_rate": 1.737226277372263e-05, "loss": 0.556, "step": 7476 }, { "epoch": 0.21830019561472658, "grad_norm": 1.3648198547568922, "learning_rate": 1.7371613949716143e-05, "loss": 0.6432, "step": 7477 }, { "epoch": 0.21832939184257394, "grad_norm": 0.6788578604918224, "learning_rate": 1.7370965125709655e-05, "loss": 0.6354, "step": 7478 }, { "epoch": 0.2183585880704213, "grad_norm": 0.6829563522451618, "learning_rate": 1.7370316301703164e-05, "loss": 0.5869, "step": 7479 }, { "epoch": 0.21838778429826866, "grad_norm": 0.640567319455553, "learning_rate": 1.7369667477696676e-05, "loss": 0.6326, "step": 7480 }, { "epoch": 0.21841698052611602, "grad_norm": 0.6362794670302553, "learning_rate": 1.7369018653690188e-05, "loss": 0.546, "step": 7481 }, { "epoch": 0.21844617675396338, "grad_norm": 0.6976360852635116, "learning_rate": 1.73683698296837e-05, "loss": 0.5636, "step": 7482 }, { "epoch": 0.21847537298181074, "grad_norm": 0.6251725395182017, "learning_rate": 1.7367721005677212e-05, "loss": 0.5409, "step": 7483 }, { "epoch": 0.2185045692096581, "grad_norm": 0.6974793276092384, "learning_rate": 1.7367072181670724e-05, "loss": 0.7071, "step": 7484 }, { "epoch": 0.21853376543750547, "grad_norm": 0.684660383458802, "learning_rate": 1.7366423357664236e-05, "loss": 0.6402, "step": 7485 }, { "epoch": 0.21856296166535283, "grad_norm": 0.6594643578902768, "learning_rate": 1.7365774533657748e-05, "loss": 0.6398, "step": 7486 }, { "epoch": 0.2185921578932002, "grad_norm": 0.6711228557927532, "learning_rate": 1.736512570965126e-05, "loss": 0.6126, "step": 7487 }, { "epoch": 0.21862135412104755, "grad_norm": 0.7599900332491732, "learning_rate": 1.736447688564477e-05, "loss": 0.674, "step": 7488 }, { "epoch": 0.2186505503488949, "grad_norm": 0.6924574016806403, "learning_rate": 1.736382806163828e-05, "loss": 0.6501, "step": 7489 }, { "epoch": 0.21867974657674227, "grad_norm": 0.6779187098407283, "learning_rate": 1.7363179237631793e-05, "loss": 0.6542, "step": 7490 }, { "epoch": 0.21870894280458963, "grad_norm": 0.7119770823346435, "learning_rate": 1.7362530413625305e-05, "loss": 0.6501, "step": 7491 }, { "epoch": 0.218738139032437, "grad_norm": 0.6311273836055621, "learning_rate": 1.7361881589618817e-05, "loss": 0.5707, "step": 7492 }, { "epoch": 0.21876733526028438, "grad_norm": 0.6380031439086841, "learning_rate": 1.736123276561233e-05, "loss": 0.6287, "step": 7493 }, { "epoch": 0.21879653148813175, "grad_norm": 0.6966962602092559, "learning_rate": 1.736058394160584e-05, "loss": 0.675, "step": 7494 }, { "epoch": 0.2188257277159791, "grad_norm": 0.6706800584435492, "learning_rate": 1.7359935117599353e-05, "loss": 0.6031, "step": 7495 }, { "epoch": 0.21885492394382647, "grad_norm": 0.652737774359918, "learning_rate": 1.7359286293592865e-05, "loss": 0.6301, "step": 7496 }, { "epoch": 0.21888412017167383, "grad_norm": 0.7018167285672982, "learning_rate": 1.7358637469586377e-05, "loss": 0.658, "step": 7497 }, { "epoch": 0.2189133163995212, "grad_norm": 0.6677736375484356, "learning_rate": 1.735798864557989e-05, "loss": 0.6388, "step": 7498 }, { "epoch": 0.21894251262736855, "grad_norm": 0.6573495810650537, "learning_rate": 1.73573398215734e-05, "loss": 0.5999, "step": 7499 }, { "epoch": 0.2189717088552159, "grad_norm": 0.6740043868424782, "learning_rate": 1.7356690997566913e-05, "loss": 0.6171, "step": 7500 }, { "epoch": 0.21900090508306327, "grad_norm": 0.63916929018512, "learning_rate": 1.7356042173560425e-05, "loss": 0.6287, "step": 7501 }, { "epoch": 0.21903010131091064, "grad_norm": 0.9938716814731466, "learning_rate": 1.7355393349553934e-05, "loss": 0.6391, "step": 7502 }, { "epoch": 0.219059297538758, "grad_norm": 0.7058636675897894, "learning_rate": 1.7354744525547446e-05, "loss": 0.6558, "step": 7503 }, { "epoch": 0.21908849376660536, "grad_norm": 0.6951250826533235, "learning_rate": 1.7354095701540958e-05, "loss": 0.663, "step": 7504 }, { "epoch": 0.21911768999445272, "grad_norm": 0.6368369045973772, "learning_rate": 1.735344687753447e-05, "loss": 0.5819, "step": 7505 }, { "epoch": 0.21914688622230008, "grad_norm": 0.7127660002807839, "learning_rate": 1.735279805352798e-05, "loss": 0.6663, "step": 7506 }, { "epoch": 0.21917608245014744, "grad_norm": 0.6677917986947444, "learning_rate": 1.7352149229521494e-05, "loss": 0.5915, "step": 7507 }, { "epoch": 0.2192052786779948, "grad_norm": 0.651213452286966, "learning_rate": 1.7351500405515006e-05, "loss": 0.6336, "step": 7508 }, { "epoch": 0.21923447490584216, "grad_norm": 0.6705774129470338, "learning_rate": 1.7350851581508518e-05, "loss": 0.6552, "step": 7509 }, { "epoch": 0.21926367113368952, "grad_norm": 0.685705916262277, "learning_rate": 1.735020275750203e-05, "loss": 0.6563, "step": 7510 }, { "epoch": 0.2192928673615369, "grad_norm": 0.6678137044519462, "learning_rate": 1.734955393349554e-05, "loss": 0.6337, "step": 7511 }, { "epoch": 0.21932206358938425, "grad_norm": 0.7242066168149737, "learning_rate": 1.734890510948905e-05, "loss": 0.7221, "step": 7512 }, { "epoch": 0.2193512598172316, "grad_norm": 0.7250829109447409, "learning_rate": 1.7348256285482566e-05, "loss": 0.7272, "step": 7513 }, { "epoch": 0.21938045604507897, "grad_norm": 0.6904475505889033, "learning_rate": 1.7347607461476078e-05, "loss": 0.7111, "step": 7514 }, { "epoch": 0.21940965227292633, "grad_norm": 0.7226812082472506, "learning_rate": 1.734695863746959e-05, "loss": 0.725, "step": 7515 }, { "epoch": 0.2194388485007737, "grad_norm": 0.9291240943323179, "learning_rate": 1.7346309813463102e-05, "loss": 0.6935, "step": 7516 }, { "epoch": 0.21946804472862105, "grad_norm": 0.6677925411678135, "learning_rate": 1.734566098945661e-05, "loss": 0.5952, "step": 7517 }, { "epoch": 0.21949724095646841, "grad_norm": 0.6323622294722004, "learning_rate": 1.7345012165450123e-05, "loss": 0.5584, "step": 7518 }, { "epoch": 0.21952643718431578, "grad_norm": 0.7373060221669387, "learning_rate": 1.7344363341443635e-05, "loss": 0.632, "step": 7519 }, { "epoch": 0.21955563341216314, "grad_norm": 0.7410239886883444, "learning_rate": 1.7343714517437147e-05, "loss": 0.7367, "step": 7520 }, { "epoch": 0.2195848296400105, "grad_norm": 0.6593701288053082, "learning_rate": 1.734306569343066e-05, "loss": 0.6862, "step": 7521 }, { "epoch": 0.21961402586785786, "grad_norm": 0.6515377026367252, "learning_rate": 1.734241686942417e-05, "loss": 0.6049, "step": 7522 }, { "epoch": 0.21964322209570525, "grad_norm": 0.6625849019339286, "learning_rate": 1.7341768045417683e-05, "loss": 0.6357, "step": 7523 }, { "epoch": 0.2196724183235526, "grad_norm": 0.7091123866580611, "learning_rate": 1.7341119221411195e-05, "loss": 0.6318, "step": 7524 }, { "epoch": 0.21970161455139997, "grad_norm": 0.6748812857839221, "learning_rate": 1.7340470397404707e-05, "loss": 0.715, "step": 7525 }, { "epoch": 0.21973081077924733, "grad_norm": 0.6737067816793559, "learning_rate": 1.7339821573398215e-05, "loss": 0.6336, "step": 7526 }, { "epoch": 0.2197600070070947, "grad_norm": 0.6464877337099093, "learning_rate": 1.7339172749391727e-05, "loss": 0.6137, "step": 7527 }, { "epoch": 0.21978920323494205, "grad_norm": 0.6969088619031245, "learning_rate": 1.733852392538524e-05, "loss": 0.577, "step": 7528 }, { "epoch": 0.21981839946278942, "grad_norm": 0.7089938998002518, "learning_rate": 1.733787510137875e-05, "loss": 0.6586, "step": 7529 }, { "epoch": 0.21984759569063678, "grad_norm": 0.6577644333808922, "learning_rate": 1.7337226277372263e-05, "loss": 0.5871, "step": 7530 }, { "epoch": 0.21987679191848414, "grad_norm": 0.6760468224114974, "learning_rate": 1.7336577453365776e-05, "loss": 0.6868, "step": 7531 }, { "epoch": 0.2199059881463315, "grad_norm": 0.7272649050076097, "learning_rate": 1.7335928629359288e-05, "loss": 0.7148, "step": 7532 }, { "epoch": 0.21993518437417886, "grad_norm": 0.6877447264918571, "learning_rate": 1.73352798053528e-05, "loss": 0.6498, "step": 7533 }, { "epoch": 0.21996438060202622, "grad_norm": 0.7086312468829089, "learning_rate": 1.733463098134631e-05, "loss": 0.6239, "step": 7534 }, { "epoch": 0.21999357682987358, "grad_norm": 0.6683709891041575, "learning_rate": 1.7333982157339824e-05, "loss": 0.6252, "step": 7535 }, { "epoch": 0.22002277305772094, "grad_norm": 0.6015236956279867, "learning_rate": 1.7333333333333336e-05, "loss": 0.5253, "step": 7536 }, { "epoch": 0.2200519692855683, "grad_norm": 0.5878401925367809, "learning_rate": 1.7332684509326848e-05, "loss": 0.5178, "step": 7537 }, { "epoch": 0.22008116551341567, "grad_norm": 0.6757565808075292, "learning_rate": 1.733203568532036e-05, "loss": 0.6292, "step": 7538 }, { "epoch": 0.22011036174126303, "grad_norm": 0.6694470028524031, "learning_rate": 1.7331386861313872e-05, "loss": 0.6019, "step": 7539 }, { "epoch": 0.2201395579691104, "grad_norm": 0.7023239995060009, "learning_rate": 1.733073803730738e-05, "loss": 0.6511, "step": 7540 }, { "epoch": 0.22016875419695775, "grad_norm": 0.6280999250571709, "learning_rate": 1.7330089213300892e-05, "loss": 0.5937, "step": 7541 }, { "epoch": 0.2201979504248051, "grad_norm": 0.7175432048423093, "learning_rate": 1.7329440389294404e-05, "loss": 0.6345, "step": 7542 }, { "epoch": 0.22022714665265247, "grad_norm": 0.6583298470470033, "learning_rate": 1.7328791565287916e-05, "loss": 0.5591, "step": 7543 }, { "epoch": 0.22025634288049983, "grad_norm": 0.7429602607838175, "learning_rate": 1.732814274128143e-05, "loss": 0.6813, "step": 7544 }, { "epoch": 0.2202855391083472, "grad_norm": 0.6918795125294027, "learning_rate": 1.732749391727494e-05, "loss": 0.6603, "step": 7545 }, { "epoch": 0.22031473533619456, "grad_norm": 0.7433696112601998, "learning_rate": 1.7326845093268453e-05, "loss": 0.7744, "step": 7546 }, { "epoch": 0.22034393156404192, "grad_norm": 0.6771113290531882, "learning_rate": 1.7326196269261965e-05, "loss": 0.6402, "step": 7547 }, { "epoch": 0.22037312779188928, "grad_norm": 0.7174648323397389, "learning_rate": 1.7325547445255477e-05, "loss": 0.6911, "step": 7548 }, { "epoch": 0.22040232401973664, "grad_norm": 0.6831002765646107, "learning_rate": 1.7324898621248985e-05, "loss": 0.6334, "step": 7549 }, { "epoch": 0.220431520247584, "grad_norm": 0.7129821393555775, "learning_rate": 1.7324249797242497e-05, "loss": 0.7312, "step": 7550 }, { "epoch": 0.22046071647543136, "grad_norm": 0.6696928737052512, "learning_rate": 1.7323600973236013e-05, "loss": 0.6192, "step": 7551 }, { "epoch": 0.22048991270327872, "grad_norm": 0.5793405450718071, "learning_rate": 1.7322952149229525e-05, "loss": 0.537, "step": 7552 }, { "epoch": 0.2205191089311261, "grad_norm": 0.6493409024306697, "learning_rate": 1.7322303325223037e-05, "loss": 0.6276, "step": 7553 }, { "epoch": 0.22054830515897347, "grad_norm": 0.6679258924498511, "learning_rate": 1.732165450121655e-05, "loss": 0.627, "step": 7554 }, { "epoch": 0.22057750138682083, "grad_norm": 0.658237417592581, "learning_rate": 1.7321005677210057e-05, "loss": 0.6094, "step": 7555 }, { "epoch": 0.2206066976146682, "grad_norm": 0.818067042565357, "learning_rate": 1.732035685320357e-05, "loss": 0.7361, "step": 7556 }, { "epoch": 0.22063589384251556, "grad_norm": 0.6682574304957113, "learning_rate": 1.731970802919708e-05, "loss": 0.6415, "step": 7557 }, { "epoch": 0.22066509007036292, "grad_norm": 0.6657206518142763, "learning_rate": 1.7319059205190593e-05, "loss": 0.6157, "step": 7558 }, { "epoch": 0.22069428629821028, "grad_norm": 0.7319000365831402, "learning_rate": 1.7318410381184106e-05, "loss": 0.6767, "step": 7559 }, { "epoch": 0.22072348252605764, "grad_norm": 0.8016273121352673, "learning_rate": 1.7317761557177618e-05, "loss": 0.6903, "step": 7560 }, { "epoch": 0.220752678753905, "grad_norm": 0.7037048329919137, "learning_rate": 1.731711273317113e-05, "loss": 0.707, "step": 7561 }, { "epoch": 0.22078187498175236, "grad_norm": 0.6599712802636807, "learning_rate": 1.731646390916464e-05, "loss": 0.6265, "step": 7562 }, { "epoch": 0.22081107120959972, "grad_norm": 0.7078129430765316, "learning_rate": 1.731581508515815e-05, "loss": 0.6945, "step": 7563 }, { "epoch": 0.22084026743744709, "grad_norm": 0.7370561940428693, "learning_rate": 1.7315166261151662e-05, "loss": 0.6656, "step": 7564 }, { "epoch": 0.22086946366529445, "grad_norm": 0.6848330173901078, "learning_rate": 1.7314517437145174e-05, "loss": 0.6761, "step": 7565 }, { "epoch": 0.2208986598931418, "grad_norm": 0.7214504700153647, "learning_rate": 1.7313868613138686e-05, "loss": 0.667, "step": 7566 }, { "epoch": 0.22092785612098917, "grad_norm": 0.6555815928351817, "learning_rate": 1.73132197891322e-05, "loss": 0.633, "step": 7567 }, { "epoch": 0.22095705234883653, "grad_norm": 0.6651750383421515, "learning_rate": 1.731257096512571e-05, "loss": 0.6359, "step": 7568 }, { "epoch": 0.2209862485766839, "grad_norm": 0.7345977526910084, "learning_rate": 1.7311922141119222e-05, "loss": 0.6872, "step": 7569 }, { "epoch": 0.22101544480453125, "grad_norm": 0.6657653118312916, "learning_rate": 1.7311273317112734e-05, "loss": 0.6602, "step": 7570 }, { "epoch": 0.2210446410323786, "grad_norm": 0.6213399215289708, "learning_rate": 1.7310624493106246e-05, "loss": 0.5414, "step": 7571 }, { "epoch": 0.22107383726022597, "grad_norm": 0.6699337110725674, "learning_rate": 1.730997566909976e-05, "loss": 0.6646, "step": 7572 }, { "epoch": 0.22110303348807334, "grad_norm": 0.7403468872589939, "learning_rate": 1.730932684509327e-05, "loss": 0.7365, "step": 7573 }, { "epoch": 0.2211322297159207, "grad_norm": 0.6857852360457348, "learning_rate": 1.7308678021086783e-05, "loss": 0.6481, "step": 7574 }, { "epoch": 0.22116142594376806, "grad_norm": 0.6969631332791494, "learning_rate": 1.7308029197080295e-05, "loss": 0.7104, "step": 7575 }, { "epoch": 0.22119062217161542, "grad_norm": 0.6663333504291363, "learning_rate": 1.7307380373073807e-05, "loss": 0.6358, "step": 7576 }, { "epoch": 0.22121981839946278, "grad_norm": 0.7058301947009256, "learning_rate": 1.730673154906732e-05, "loss": 0.5961, "step": 7577 }, { "epoch": 0.22124901462731014, "grad_norm": 0.6553733851156117, "learning_rate": 1.7306082725060827e-05, "loss": 0.6505, "step": 7578 }, { "epoch": 0.2212782108551575, "grad_norm": 0.7293021689753307, "learning_rate": 1.730543390105434e-05, "loss": 0.7537, "step": 7579 }, { "epoch": 0.22130740708300486, "grad_norm": 0.671581680908539, "learning_rate": 1.730478507704785e-05, "loss": 0.6535, "step": 7580 }, { "epoch": 0.22133660331085223, "grad_norm": 0.7028364316097099, "learning_rate": 1.7304136253041363e-05, "loss": 0.666, "step": 7581 }, { "epoch": 0.2213657995386996, "grad_norm": 0.6908330338423269, "learning_rate": 1.7303487429034875e-05, "loss": 0.6571, "step": 7582 }, { "epoch": 0.22139499576654698, "grad_norm": 0.6843043245951355, "learning_rate": 1.7302838605028387e-05, "loss": 0.7261, "step": 7583 }, { "epoch": 0.22142419199439434, "grad_norm": 0.6555958190819875, "learning_rate": 1.73021897810219e-05, "loss": 0.6754, "step": 7584 }, { "epoch": 0.2214533882222417, "grad_norm": 0.7202321577593301, "learning_rate": 1.730154095701541e-05, "loss": 0.7026, "step": 7585 }, { "epoch": 0.22148258445008906, "grad_norm": 0.6621308557817464, "learning_rate": 1.7300892133008923e-05, "loss": 0.6242, "step": 7586 }, { "epoch": 0.22151178067793642, "grad_norm": 0.7058824994215109, "learning_rate": 1.7300243309002432e-05, "loss": 0.7251, "step": 7587 }, { "epoch": 0.22154097690578378, "grad_norm": 0.6913735891781567, "learning_rate": 1.7299594484995944e-05, "loss": 0.6874, "step": 7588 }, { "epoch": 0.22157017313363114, "grad_norm": 0.6810849315974935, "learning_rate": 1.729894566098946e-05, "loss": 0.6716, "step": 7589 }, { "epoch": 0.2215993693614785, "grad_norm": 0.6518234537445161, "learning_rate": 1.729829683698297e-05, "loss": 0.6366, "step": 7590 }, { "epoch": 0.22162856558932587, "grad_norm": 0.6471589483368777, "learning_rate": 1.7297648012976484e-05, "loss": 0.6197, "step": 7591 }, { "epoch": 0.22165776181717323, "grad_norm": 0.6651417265245533, "learning_rate": 1.7296999188969996e-05, "loss": 0.6301, "step": 7592 }, { "epoch": 0.2216869580450206, "grad_norm": 0.744875051906087, "learning_rate": 1.7296350364963504e-05, "loss": 0.7137, "step": 7593 }, { "epoch": 0.22171615427286795, "grad_norm": 0.7240475798957945, "learning_rate": 1.7295701540957016e-05, "loss": 0.7117, "step": 7594 }, { "epoch": 0.2217453505007153, "grad_norm": 0.7628067132040317, "learning_rate": 1.729505271695053e-05, "loss": 0.7811, "step": 7595 }, { "epoch": 0.22177454672856267, "grad_norm": 0.6585722435873993, "learning_rate": 1.729440389294404e-05, "loss": 0.6396, "step": 7596 }, { "epoch": 0.22180374295641003, "grad_norm": 0.6680788027227273, "learning_rate": 1.7293755068937552e-05, "loss": 0.5896, "step": 7597 }, { "epoch": 0.2218329391842574, "grad_norm": 0.6994693828566717, "learning_rate": 1.7293106244931064e-05, "loss": 0.6699, "step": 7598 }, { "epoch": 0.22186213541210476, "grad_norm": 0.7392233218399246, "learning_rate": 1.7292457420924576e-05, "loss": 0.7407, "step": 7599 }, { "epoch": 0.22189133163995212, "grad_norm": 0.6911563482052696, "learning_rate": 1.729180859691809e-05, "loss": 0.5963, "step": 7600 }, { "epoch": 0.22192052786779948, "grad_norm": 0.6841050805031946, "learning_rate": 1.7291159772911597e-05, "loss": 0.6389, "step": 7601 }, { "epoch": 0.22194972409564684, "grad_norm": 0.6775372051006567, "learning_rate": 1.729051094890511e-05, "loss": 0.6167, "step": 7602 }, { "epoch": 0.2219789203234942, "grad_norm": 0.7347575867463346, "learning_rate": 1.728986212489862e-05, "loss": 0.6348, "step": 7603 }, { "epoch": 0.22200811655134156, "grad_norm": 0.67739596785855, "learning_rate": 1.7289213300892133e-05, "loss": 0.6055, "step": 7604 }, { "epoch": 0.22203731277918892, "grad_norm": 0.6892801043397144, "learning_rate": 1.7288564476885645e-05, "loss": 0.6224, "step": 7605 }, { "epoch": 0.22206650900703628, "grad_norm": 0.7047049045818651, "learning_rate": 1.7287915652879157e-05, "loss": 0.6766, "step": 7606 }, { "epoch": 0.22209570523488364, "grad_norm": 0.8021198181379923, "learning_rate": 1.728726682887267e-05, "loss": 0.7807, "step": 7607 }, { "epoch": 0.222124901462731, "grad_norm": 0.6680436667522985, "learning_rate": 1.728661800486618e-05, "loss": 0.6447, "step": 7608 }, { "epoch": 0.22215409769057837, "grad_norm": 0.6257190850028757, "learning_rate": 1.7285969180859693e-05, "loss": 0.5864, "step": 7609 }, { "epoch": 0.22218329391842573, "grad_norm": 0.6872435587234214, "learning_rate": 1.7285320356853205e-05, "loss": 0.6364, "step": 7610 }, { "epoch": 0.2222124901462731, "grad_norm": 0.694203335044585, "learning_rate": 1.7284671532846717e-05, "loss": 0.5702, "step": 7611 }, { "epoch": 0.22224168637412045, "grad_norm": 0.6993477313038541, "learning_rate": 1.728402270884023e-05, "loss": 0.7108, "step": 7612 }, { "epoch": 0.2222708826019678, "grad_norm": 0.6920911805393309, "learning_rate": 1.728337388483374e-05, "loss": 0.6719, "step": 7613 }, { "epoch": 0.2223000788298152, "grad_norm": 0.8205969738508101, "learning_rate": 1.7282725060827253e-05, "loss": 0.6939, "step": 7614 }, { "epoch": 0.22232927505766256, "grad_norm": 0.6738328143629366, "learning_rate": 1.7282076236820765e-05, "loss": 0.661, "step": 7615 }, { "epoch": 0.22235847128550992, "grad_norm": 0.6388957462822221, "learning_rate": 1.7281427412814274e-05, "loss": 0.6329, "step": 7616 }, { "epoch": 0.22238766751335728, "grad_norm": 0.5962997236292364, "learning_rate": 1.7280778588807786e-05, "loss": 0.5334, "step": 7617 }, { "epoch": 0.22241686374120465, "grad_norm": 0.7588787354973845, "learning_rate": 1.7280129764801298e-05, "loss": 0.6725, "step": 7618 }, { "epoch": 0.222446059969052, "grad_norm": 0.6504324433328187, "learning_rate": 1.727948094079481e-05, "loss": 0.5775, "step": 7619 }, { "epoch": 0.22247525619689937, "grad_norm": 0.6242064075099324, "learning_rate": 1.7278832116788322e-05, "loss": 0.5858, "step": 7620 }, { "epoch": 0.22250445242474673, "grad_norm": 0.6465300404829731, "learning_rate": 1.7278183292781834e-05, "loss": 0.6087, "step": 7621 }, { "epoch": 0.2225336486525941, "grad_norm": 0.6748504642956259, "learning_rate": 1.7277534468775346e-05, "loss": 0.6878, "step": 7622 }, { "epoch": 0.22256284488044145, "grad_norm": 0.6812838827191036, "learning_rate": 1.7276885644768858e-05, "loss": 0.6564, "step": 7623 }, { "epoch": 0.2225920411082888, "grad_norm": 0.6528113960081728, "learning_rate": 1.727623682076237e-05, "loss": 0.6771, "step": 7624 }, { "epoch": 0.22262123733613617, "grad_norm": 0.7138683281909174, "learning_rate": 1.727558799675588e-05, "loss": 0.7225, "step": 7625 }, { "epoch": 0.22265043356398354, "grad_norm": 0.9414667187767419, "learning_rate": 1.7274939172749394e-05, "loss": 0.636, "step": 7626 }, { "epoch": 0.2226796297918309, "grad_norm": 0.7252875187368648, "learning_rate": 1.7274290348742906e-05, "loss": 0.6692, "step": 7627 }, { "epoch": 0.22270882601967826, "grad_norm": 0.7392879027590711, "learning_rate": 1.727364152473642e-05, "loss": 0.6649, "step": 7628 }, { "epoch": 0.22273802224752562, "grad_norm": 0.7738374298896378, "learning_rate": 1.727299270072993e-05, "loss": 0.6366, "step": 7629 }, { "epoch": 0.22276721847537298, "grad_norm": 0.6657061869259905, "learning_rate": 1.7272343876723442e-05, "loss": 0.6321, "step": 7630 }, { "epoch": 0.22279641470322034, "grad_norm": 0.6747015452258098, "learning_rate": 1.727169505271695e-05, "loss": 0.6578, "step": 7631 }, { "epoch": 0.2228256109310677, "grad_norm": 0.7084733384716755, "learning_rate": 1.7271046228710463e-05, "loss": 0.7068, "step": 7632 }, { "epoch": 0.22285480715891506, "grad_norm": 0.658747474359655, "learning_rate": 1.7270397404703975e-05, "loss": 0.5947, "step": 7633 }, { "epoch": 0.22288400338676242, "grad_norm": 0.6840982232649204, "learning_rate": 1.7269748580697487e-05, "loss": 0.7112, "step": 7634 }, { "epoch": 0.22291319961460979, "grad_norm": 0.684857352321933, "learning_rate": 1.7269099756691e-05, "loss": 0.6483, "step": 7635 }, { "epoch": 0.22294239584245715, "grad_norm": 0.8816816499978855, "learning_rate": 1.726845093268451e-05, "loss": 0.8804, "step": 7636 }, { "epoch": 0.2229715920703045, "grad_norm": 0.6964706624481632, "learning_rate": 1.7267802108678023e-05, "loss": 0.6815, "step": 7637 }, { "epoch": 0.22300078829815187, "grad_norm": 0.7148712090767344, "learning_rate": 1.7267153284671535e-05, "loss": 0.7831, "step": 7638 }, { "epoch": 0.22302998452599923, "grad_norm": 0.696388365888987, "learning_rate": 1.7266504460665044e-05, "loss": 0.7068, "step": 7639 }, { "epoch": 0.2230591807538466, "grad_norm": 0.6955860129378628, "learning_rate": 1.7265855636658556e-05, "loss": 0.6704, "step": 7640 }, { "epoch": 0.22308837698169395, "grad_norm": 0.6150751451212497, "learning_rate": 1.7265206812652068e-05, "loss": 0.6019, "step": 7641 }, { "epoch": 0.22311757320954131, "grad_norm": 0.6672944850861529, "learning_rate": 1.726455798864558e-05, "loss": 0.6367, "step": 7642 }, { "epoch": 0.22314676943738868, "grad_norm": 0.6914165422868186, "learning_rate": 1.7263909164639092e-05, "loss": 0.6544, "step": 7643 }, { "epoch": 0.22317596566523606, "grad_norm": 0.6577317782458715, "learning_rate": 1.7263260340632604e-05, "loss": 0.6038, "step": 7644 }, { "epoch": 0.22320516189308343, "grad_norm": 0.8981546545737134, "learning_rate": 1.7262611516626116e-05, "loss": 0.7285, "step": 7645 }, { "epoch": 0.2232343581209308, "grad_norm": 0.6799906656755677, "learning_rate": 1.7261962692619628e-05, "loss": 0.6706, "step": 7646 }, { "epoch": 0.22326355434877815, "grad_norm": 0.6467681272957476, "learning_rate": 1.726131386861314e-05, "loss": 0.6284, "step": 7647 }, { "epoch": 0.2232927505766255, "grad_norm": 0.6564771558551687, "learning_rate": 1.7260665044606652e-05, "loss": 0.5778, "step": 7648 }, { "epoch": 0.22332194680447287, "grad_norm": 0.7243876261634151, "learning_rate": 1.7260016220600164e-05, "loss": 0.6308, "step": 7649 }, { "epoch": 0.22335114303232023, "grad_norm": 0.7935588409724711, "learning_rate": 1.7259367396593676e-05, "loss": 0.5896, "step": 7650 }, { "epoch": 0.2233803392601676, "grad_norm": 0.6914919874075874, "learning_rate": 1.7258718572587188e-05, "loss": 0.7012, "step": 7651 }, { "epoch": 0.22340953548801495, "grad_norm": 0.6693231595788544, "learning_rate": 1.72580697485807e-05, "loss": 0.6385, "step": 7652 }, { "epoch": 0.22343873171586232, "grad_norm": 0.6646559613674191, "learning_rate": 1.7257420924574212e-05, "loss": 0.6241, "step": 7653 }, { "epoch": 0.22346792794370968, "grad_norm": 0.6626289159514023, "learning_rate": 1.725677210056772e-05, "loss": 0.591, "step": 7654 }, { "epoch": 0.22349712417155704, "grad_norm": 0.7608078342421803, "learning_rate": 1.7256123276561233e-05, "loss": 0.6865, "step": 7655 }, { "epoch": 0.2235263203994044, "grad_norm": 0.7301863645290227, "learning_rate": 1.7255474452554745e-05, "loss": 0.6819, "step": 7656 }, { "epoch": 0.22355551662725176, "grad_norm": 0.6429880524503148, "learning_rate": 1.7254825628548257e-05, "loss": 0.6021, "step": 7657 }, { "epoch": 0.22358471285509912, "grad_norm": 0.662146392224384, "learning_rate": 1.725417680454177e-05, "loss": 0.6782, "step": 7658 }, { "epoch": 0.22361390908294648, "grad_norm": 0.6521032671509717, "learning_rate": 1.725352798053528e-05, "loss": 0.6571, "step": 7659 }, { "epoch": 0.22364310531079384, "grad_norm": 0.7337142805236357, "learning_rate": 1.7252879156528793e-05, "loss": 0.7754, "step": 7660 }, { "epoch": 0.2236723015386412, "grad_norm": 0.641505491321727, "learning_rate": 1.7252230332522305e-05, "loss": 0.6353, "step": 7661 }, { "epoch": 0.22370149776648857, "grad_norm": 0.7233205273468003, "learning_rate": 1.7251581508515817e-05, "loss": 0.7051, "step": 7662 }, { "epoch": 0.22373069399433593, "grad_norm": 0.6542880974918555, "learning_rate": 1.7250932684509326e-05, "loss": 0.6507, "step": 7663 }, { "epoch": 0.2237598902221833, "grad_norm": 0.6983101033610454, "learning_rate": 1.725028386050284e-05, "loss": 0.7164, "step": 7664 }, { "epoch": 0.22378908645003065, "grad_norm": 0.666003359568277, "learning_rate": 1.7249635036496353e-05, "loss": 0.6797, "step": 7665 }, { "epoch": 0.223818282677878, "grad_norm": 0.641679874980061, "learning_rate": 1.7248986212489865e-05, "loss": 0.5848, "step": 7666 }, { "epoch": 0.22384747890572537, "grad_norm": 0.758342794435473, "learning_rate": 1.7248337388483377e-05, "loss": 0.82, "step": 7667 }, { "epoch": 0.22387667513357273, "grad_norm": 0.6452440567167953, "learning_rate": 1.7247688564476886e-05, "loss": 0.6236, "step": 7668 }, { "epoch": 0.2239058713614201, "grad_norm": 0.7341985758979095, "learning_rate": 1.7247039740470398e-05, "loss": 0.6319, "step": 7669 }, { "epoch": 0.22393506758926746, "grad_norm": 0.6550833863655624, "learning_rate": 1.724639091646391e-05, "loss": 0.6522, "step": 7670 }, { "epoch": 0.22396426381711482, "grad_norm": 0.6745103955069729, "learning_rate": 1.7245742092457422e-05, "loss": 0.6642, "step": 7671 }, { "epoch": 0.22399346004496218, "grad_norm": 0.5907923858105371, "learning_rate": 1.7245093268450934e-05, "loss": 0.5525, "step": 7672 }, { "epoch": 0.22402265627280954, "grad_norm": 0.6471532622329148, "learning_rate": 1.7244444444444446e-05, "loss": 0.631, "step": 7673 }, { "epoch": 0.22405185250065693, "grad_norm": 0.6164657842452995, "learning_rate": 1.7243795620437958e-05, "loss": 0.5597, "step": 7674 }, { "epoch": 0.2240810487285043, "grad_norm": 0.6671343798948015, "learning_rate": 1.724314679643147e-05, "loss": 0.69, "step": 7675 }, { "epoch": 0.22411024495635165, "grad_norm": 0.6962631420298349, "learning_rate": 1.7242497972424982e-05, "loss": 0.6742, "step": 7676 }, { "epoch": 0.224139441184199, "grad_norm": 0.6495662748292145, "learning_rate": 1.724184914841849e-05, "loss": 0.5644, "step": 7677 }, { "epoch": 0.22416863741204637, "grad_norm": 0.7003297746135253, "learning_rate": 1.7241200324412003e-05, "loss": 0.7046, "step": 7678 }, { "epoch": 0.22419783363989373, "grad_norm": 0.6602760360239154, "learning_rate": 1.7240551500405515e-05, "loss": 0.646, "step": 7679 }, { "epoch": 0.2242270298677411, "grad_norm": 0.6809092901552912, "learning_rate": 1.7239902676399027e-05, "loss": 0.6522, "step": 7680 }, { "epoch": 0.22425622609558846, "grad_norm": 0.6796447409870797, "learning_rate": 1.723925385239254e-05, "loss": 0.6705, "step": 7681 }, { "epoch": 0.22428542232343582, "grad_norm": 0.6693712119457343, "learning_rate": 1.723860502838605e-05, "loss": 0.6223, "step": 7682 }, { "epoch": 0.22431461855128318, "grad_norm": 0.6786655222735742, "learning_rate": 1.7237956204379563e-05, "loss": 0.6318, "step": 7683 }, { "epoch": 0.22434381477913054, "grad_norm": 0.7063971948309766, "learning_rate": 1.7237307380373075e-05, "loss": 0.6502, "step": 7684 }, { "epoch": 0.2243730110069779, "grad_norm": 0.6884740229880704, "learning_rate": 1.7236658556366587e-05, "loss": 0.633, "step": 7685 }, { "epoch": 0.22440220723482526, "grad_norm": 0.6369925773979567, "learning_rate": 1.72360097323601e-05, "loss": 0.6185, "step": 7686 }, { "epoch": 0.22443140346267262, "grad_norm": 0.7124656698195038, "learning_rate": 1.723536090835361e-05, "loss": 0.7261, "step": 7687 }, { "epoch": 0.22446059969051999, "grad_norm": 0.7207054894751196, "learning_rate": 1.7234712084347123e-05, "loss": 0.6739, "step": 7688 }, { "epoch": 0.22448979591836735, "grad_norm": 0.7204437985802897, "learning_rate": 1.7234063260340635e-05, "loss": 0.718, "step": 7689 }, { "epoch": 0.2245189921462147, "grad_norm": 0.8032661530765316, "learning_rate": 1.7233414436334147e-05, "loss": 0.7314, "step": 7690 }, { "epoch": 0.22454818837406207, "grad_norm": 0.6632613514254048, "learning_rate": 1.723276561232766e-05, "loss": 0.6636, "step": 7691 }, { "epoch": 0.22457738460190943, "grad_norm": 0.6489607835901653, "learning_rate": 1.7232116788321168e-05, "loss": 0.63, "step": 7692 }, { "epoch": 0.2246065808297568, "grad_norm": 0.6173886800814617, "learning_rate": 1.723146796431468e-05, "loss": 0.5497, "step": 7693 }, { "epoch": 0.22463577705760415, "grad_norm": 0.7378996058379653, "learning_rate": 1.7230819140308192e-05, "loss": 0.6576, "step": 7694 }, { "epoch": 0.2246649732854515, "grad_norm": 0.6290786596773175, "learning_rate": 1.7230170316301704e-05, "loss": 0.5508, "step": 7695 }, { "epoch": 0.22469416951329887, "grad_norm": 0.7133802655813539, "learning_rate": 1.7229521492295216e-05, "loss": 0.6479, "step": 7696 }, { "epoch": 0.22472336574114624, "grad_norm": 0.661494639474178, "learning_rate": 1.7228872668288728e-05, "loss": 0.586, "step": 7697 }, { "epoch": 0.2247525619689936, "grad_norm": 0.6391628872045164, "learning_rate": 1.722822384428224e-05, "loss": 0.6198, "step": 7698 }, { "epoch": 0.22478175819684096, "grad_norm": 0.7301633154291419, "learning_rate": 1.7227575020275752e-05, "loss": 0.7401, "step": 7699 }, { "epoch": 0.22481095442468832, "grad_norm": 0.6361872619757861, "learning_rate": 1.7226926196269264e-05, "loss": 0.6271, "step": 7700 }, { "epoch": 0.22484015065253568, "grad_norm": 0.6704918875958092, "learning_rate": 1.7226277372262773e-05, "loss": 0.6936, "step": 7701 }, { "epoch": 0.22486934688038304, "grad_norm": 0.735656125397129, "learning_rate": 1.7225628548256288e-05, "loss": 0.6987, "step": 7702 }, { "epoch": 0.2248985431082304, "grad_norm": 0.7028823503876901, "learning_rate": 1.72249797242498e-05, "loss": 0.6415, "step": 7703 }, { "epoch": 0.2249277393360778, "grad_norm": 0.692922484813433, "learning_rate": 1.7224330900243312e-05, "loss": 0.6693, "step": 7704 }, { "epoch": 0.22495693556392515, "grad_norm": 0.7107021587776304, "learning_rate": 1.7223682076236824e-05, "loss": 0.6782, "step": 7705 }, { "epoch": 0.22498613179177251, "grad_norm": 0.7189812750578967, "learning_rate": 1.7223033252230333e-05, "loss": 0.714, "step": 7706 }, { "epoch": 0.22501532801961988, "grad_norm": 0.7063607791310693, "learning_rate": 1.7222384428223845e-05, "loss": 0.688, "step": 7707 }, { "epoch": 0.22504452424746724, "grad_norm": 0.6634408586912691, "learning_rate": 1.7221735604217357e-05, "loss": 0.6133, "step": 7708 }, { "epoch": 0.2250737204753146, "grad_norm": 0.6578417502353107, "learning_rate": 1.722108678021087e-05, "loss": 0.6455, "step": 7709 }, { "epoch": 0.22510291670316196, "grad_norm": 0.6718449789921367, "learning_rate": 1.722043795620438e-05, "loss": 0.6995, "step": 7710 }, { "epoch": 0.22513211293100932, "grad_norm": 0.6426909091275407, "learning_rate": 1.7219789132197893e-05, "loss": 0.6108, "step": 7711 }, { "epoch": 0.22516130915885668, "grad_norm": 0.6574904839469712, "learning_rate": 1.7219140308191405e-05, "loss": 0.5786, "step": 7712 }, { "epoch": 0.22519050538670404, "grad_norm": 0.6960054903766836, "learning_rate": 1.7218491484184917e-05, "loss": 0.6426, "step": 7713 }, { "epoch": 0.2252197016145514, "grad_norm": 0.6545900927072019, "learning_rate": 1.721784266017843e-05, "loss": 0.6354, "step": 7714 }, { "epoch": 0.22524889784239877, "grad_norm": 0.686649318257089, "learning_rate": 1.7217193836171938e-05, "loss": 0.7045, "step": 7715 }, { "epoch": 0.22527809407024613, "grad_norm": 0.6721073572132619, "learning_rate": 1.721654501216545e-05, "loss": 0.5908, "step": 7716 }, { "epoch": 0.2253072902980935, "grad_norm": 0.70565164565378, "learning_rate": 1.7215896188158962e-05, "loss": 0.7286, "step": 7717 }, { "epoch": 0.22533648652594085, "grad_norm": 0.707050031807442, "learning_rate": 1.7215247364152474e-05, "loss": 0.7068, "step": 7718 }, { "epoch": 0.2253656827537882, "grad_norm": 0.678655674055642, "learning_rate": 1.7214598540145986e-05, "loss": 0.661, "step": 7719 }, { "epoch": 0.22539487898163557, "grad_norm": 0.6823979121520265, "learning_rate": 1.7213949716139498e-05, "loss": 0.6839, "step": 7720 }, { "epoch": 0.22542407520948293, "grad_norm": 0.6953563941403913, "learning_rate": 1.721330089213301e-05, "loss": 0.6494, "step": 7721 }, { "epoch": 0.2254532714373303, "grad_norm": 0.9205540485574358, "learning_rate": 1.7212652068126522e-05, "loss": 0.7482, "step": 7722 }, { "epoch": 0.22548246766517765, "grad_norm": 0.6644488723906968, "learning_rate": 1.7212003244120034e-05, "loss": 0.6205, "step": 7723 }, { "epoch": 0.22551166389302502, "grad_norm": 0.688991324722344, "learning_rate": 1.7211354420113546e-05, "loss": 0.6617, "step": 7724 }, { "epoch": 0.22554086012087238, "grad_norm": 0.6638983841790362, "learning_rate": 1.7210705596107058e-05, "loss": 0.6506, "step": 7725 }, { "epoch": 0.22557005634871974, "grad_norm": 0.7122916754672994, "learning_rate": 1.721005677210057e-05, "loss": 0.6308, "step": 7726 }, { "epoch": 0.2255992525765671, "grad_norm": 0.5974958941045044, "learning_rate": 1.7209407948094082e-05, "loss": 0.5344, "step": 7727 }, { "epoch": 0.22562844880441446, "grad_norm": 0.7334058605485078, "learning_rate": 1.7208759124087594e-05, "loss": 0.5838, "step": 7728 }, { "epoch": 0.22565764503226182, "grad_norm": 0.6715627479731507, "learning_rate": 1.7208110300081106e-05, "loss": 0.6409, "step": 7729 }, { "epoch": 0.22568684126010918, "grad_norm": 0.6601858454990776, "learning_rate": 1.7207461476074615e-05, "loss": 0.6407, "step": 7730 }, { "epoch": 0.22571603748795654, "grad_norm": 0.6741713334686128, "learning_rate": 1.7206812652068127e-05, "loss": 0.6446, "step": 7731 }, { "epoch": 0.2257452337158039, "grad_norm": 0.6708936062280192, "learning_rate": 1.720616382806164e-05, "loss": 0.5794, "step": 7732 }, { "epoch": 0.22577442994365127, "grad_norm": 0.7133875968487744, "learning_rate": 1.720551500405515e-05, "loss": 0.6443, "step": 7733 }, { "epoch": 0.22580362617149866, "grad_norm": 0.6694580076929725, "learning_rate": 1.7204866180048663e-05, "loss": 0.6295, "step": 7734 }, { "epoch": 0.22583282239934602, "grad_norm": 0.6963506470507229, "learning_rate": 1.7204217356042175e-05, "loss": 0.6531, "step": 7735 }, { "epoch": 0.22586201862719338, "grad_norm": 0.6852863920089187, "learning_rate": 1.7203568532035687e-05, "loss": 0.6709, "step": 7736 }, { "epoch": 0.22589121485504074, "grad_norm": 0.642320510698941, "learning_rate": 1.72029197080292e-05, "loss": 0.6084, "step": 7737 }, { "epoch": 0.2259204110828881, "grad_norm": 0.6975640352636197, "learning_rate": 1.720227088402271e-05, "loss": 0.6183, "step": 7738 }, { "epoch": 0.22594960731073546, "grad_norm": 0.6491472795500324, "learning_rate": 1.720162206001622e-05, "loss": 0.644, "step": 7739 }, { "epoch": 0.22597880353858282, "grad_norm": 0.6594513490644652, "learning_rate": 1.7200973236009735e-05, "loss": 0.6122, "step": 7740 }, { "epoch": 0.22600799976643018, "grad_norm": 0.7045349359937838, "learning_rate": 1.7200324412003247e-05, "loss": 0.6664, "step": 7741 }, { "epoch": 0.22603719599427755, "grad_norm": 0.66531655963136, "learning_rate": 1.719967558799676e-05, "loss": 0.5888, "step": 7742 }, { "epoch": 0.2260663922221249, "grad_norm": 0.6842424479448344, "learning_rate": 1.719902676399027e-05, "loss": 0.6396, "step": 7743 }, { "epoch": 0.22609558844997227, "grad_norm": 0.7360931733885612, "learning_rate": 1.719837793998378e-05, "loss": 0.7039, "step": 7744 }, { "epoch": 0.22612478467781963, "grad_norm": 0.7225676303004159, "learning_rate": 1.7197729115977292e-05, "loss": 0.6983, "step": 7745 }, { "epoch": 0.226153980905667, "grad_norm": 0.7202423266396751, "learning_rate": 1.7197080291970804e-05, "loss": 0.6924, "step": 7746 }, { "epoch": 0.22618317713351435, "grad_norm": 0.7460927852324375, "learning_rate": 1.7196431467964316e-05, "loss": 0.7535, "step": 7747 }, { "epoch": 0.2262123733613617, "grad_norm": 0.672060348026156, "learning_rate": 1.7195782643957828e-05, "loss": 0.5956, "step": 7748 }, { "epoch": 0.22624156958920907, "grad_norm": 0.6964374903559621, "learning_rate": 1.719513381995134e-05, "loss": 0.596, "step": 7749 }, { "epoch": 0.22627076581705644, "grad_norm": 1.422010285140936, "learning_rate": 1.7194484995944852e-05, "loss": 0.6982, "step": 7750 }, { "epoch": 0.2262999620449038, "grad_norm": 0.7198482707107097, "learning_rate": 1.7193836171938364e-05, "loss": 0.7225, "step": 7751 }, { "epoch": 0.22632915827275116, "grad_norm": 0.6842210919765627, "learning_rate": 1.7193187347931876e-05, "loss": 0.6208, "step": 7752 }, { "epoch": 0.22635835450059852, "grad_norm": 0.7544618800856584, "learning_rate": 1.7192538523925385e-05, "loss": 0.6802, "step": 7753 }, { "epoch": 0.22638755072844588, "grad_norm": 0.7306341071400404, "learning_rate": 1.7191889699918897e-05, "loss": 0.6832, "step": 7754 }, { "epoch": 0.22641674695629324, "grad_norm": 0.6471416240861253, "learning_rate": 1.719124087591241e-05, "loss": 0.6132, "step": 7755 }, { "epoch": 0.2264459431841406, "grad_norm": 0.7110859788351236, "learning_rate": 1.719059205190592e-05, "loss": 0.624, "step": 7756 }, { "epoch": 0.22647513941198796, "grad_norm": 0.636446749068026, "learning_rate": 1.7189943227899433e-05, "loss": 0.6041, "step": 7757 }, { "epoch": 0.22650433563983532, "grad_norm": 0.6683419784573591, "learning_rate": 1.7189294403892945e-05, "loss": 0.6297, "step": 7758 }, { "epoch": 0.22653353186768269, "grad_norm": 0.7307630200150537, "learning_rate": 1.7188645579886457e-05, "loss": 0.7287, "step": 7759 }, { "epoch": 0.22656272809553005, "grad_norm": 0.6915589796999346, "learning_rate": 1.718799675587997e-05, "loss": 0.6568, "step": 7760 }, { "epoch": 0.2265919243233774, "grad_norm": 0.6649304285605345, "learning_rate": 1.718734793187348e-05, "loss": 0.6363, "step": 7761 }, { "epoch": 0.22662112055122477, "grad_norm": 0.6874432654199791, "learning_rate": 1.7186699107866993e-05, "loss": 0.7125, "step": 7762 }, { "epoch": 0.22665031677907213, "grad_norm": 0.6952207026517094, "learning_rate": 1.7186050283860505e-05, "loss": 0.633, "step": 7763 }, { "epoch": 0.22667951300691952, "grad_norm": 0.6520138984146462, "learning_rate": 1.7185401459854017e-05, "loss": 0.5977, "step": 7764 }, { "epoch": 0.22670870923476688, "grad_norm": 0.5961480565163686, "learning_rate": 1.718475263584753e-05, "loss": 0.5613, "step": 7765 }, { "epoch": 0.22673790546261424, "grad_norm": 0.6712649806928161, "learning_rate": 1.718410381184104e-05, "loss": 0.6148, "step": 7766 }, { "epoch": 0.2267671016904616, "grad_norm": 0.6344474060841294, "learning_rate": 1.7183454987834553e-05, "loss": 0.6087, "step": 7767 }, { "epoch": 0.22679629791830896, "grad_norm": 0.6823893162346096, "learning_rate": 1.718280616382806e-05, "loss": 0.6757, "step": 7768 }, { "epoch": 0.22682549414615633, "grad_norm": 0.5788041436258303, "learning_rate": 1.7182157339821574e-05, "loss": 0.5005, "step": 7769 }, { "epoch": 0.2268546903740037, "grad_norm": 0.6965036120161057, "learning_rate": 1.7181508515815086e-05, "loss": 0.6614, "step": 7770 }, { "epoch": 0.22688388660185105, "grad_norm": 0.654029187012568, "learning_rate": 1.7180859691808598e-05, "loss": 0.654, "step": 7771 }, { "epoch": 0.2269130828296984, "grad_norm": 0.6731345719054003, "learning_rate": 1.718021086780211e-05, "loss": 0.6411, "step": 7772 }, { "epoch": 0.22694227905754577, "grad_norm": 0.6336920741492261, "learning_rate": 1.7179562043795622e-05, "loss": 0.6237, "step": 7773 }, { "epoch": 0.22697147528539313, "grad_norm": 0.7292927177160085, "learning_rate": 1.7178913219789134e-05, "loss": 0.7526, "step": 7774 }, { "epoch": 0.2270006715132405, "grad_norm": 0.6940781350233196, "learning_rate": 1.7178264395782646e-05, "loss": 0.6227, "step": 7775 }, { "epoch": 0.22702986774108785, "grad_norm": 0.6922730693785942, "learning_rate": 1.7177615571776158e-05, "loss": 0.6317, "step": 7776 }, { "epoch": 0.22705906396893522, "grad_norm": 0.6509594393518324, "learning_rate": 1.717696674776967e-05, "loss": 0.5939, "step": 7777 }, { "epoch": 0.22708826019678258, "grad_norm": 0.6578328982628427, "learning_rate": 1.7176317923763182e-05, "loss": 0.6203, "step": 7778 }, { "epoch": 0.22711745642462994, "grad_norm": 0.6472187971205108, "learning_rate": 1.7175669099756694e-05, "loss": 0.5951, "step": 7779 }, { "epoch": 0.2271466526524773, "grad_norm": 0.6417236629885285, "learning_rate": 1.7175020275750206e-05, "loss": 0.6169, "step": 7780 }, { "epoch": 0.22717584888032466, "grad_norm": 0.6920634382921724, "learning_rate": 1.7174371451743718e-05, "loss": 0.6541, "step": 7781 }, { "epoch": 0.22720504510817202, "grad_norm": 0.7149252340222533, "learning_rate": 1.7173722627737227e-05, "loss": 0.7169, "step": 7782 }, { "epoch": 0.22723424133601938, "grad_norm": 0.6300178871684613, "learning_rate": 1.717307380373074e-05, "loss": 0.5877, "step": 7783 }, { "epoch": 0.22726343756386674, "grad_norm": 0.6339010558784232, "learning_rate": 1.717242497972425e-05, "loss": 0.5554, "step": 7784 }, { "epoch": 0.2272926337917141, "grad_norm": 0.6650372411680482, "learning_rate": 1.7171776155717763e-05, "loss": 0.6291, "step": 7785 }, { "epoch": 0.22732183001956147, "grad_norm": 0.6561686595679114, "learning_rate": 1.7171127331711275e-05, "loss": 0.6028, "step": 7786 }, { "epoch": 0.22735102624740883, "grad_norm": 0.6213861180126116, "learning_rate": 1.7170478507704787e-05, "loss": 0.5811, "step": 7787 }, { "epoch": 0.2273802224752562, "grad_norm": 0.6796844034673978, "learning_rate": 1.71698296836983e-05, "loss": 0.6452, "step": 7788 }, { "epoch": 0.22740941870310355, "grad_norm": 0.7626296024179747, "learning_rate": 1.716918085969181e-05, "loss": 0.6639, "step": 7789 }, { "epoch": 0.2274386149309509, "grad_norm": 0.6574152627417092, "learning_rate": 1.7168532035685323e-05, "loss": 0.6412, "step": 7790 }, { "epoch": 0.22746781115879827, "grad_norm": 0.6995709349471889, "learning_rate": 1.716788321167883e-05, "loss": 0.648, "step": 7791 }, { "epoch": 0.22749700738664563, "grad_norm": 0.6874179040124192, "learning_rate": 1.7167234387672343e-05, "loss": 0.6607, "step": 7792 }, { "epoch": 0.227526203614493, "grad_norm": 0.6853397933028021, "learning_rate": 1.7166585563665855e-05, "loss": 0.6978, "step": 7793 }, { "epoch": 0.22755539984234038, "grad_norm": 0.6796539708641435, "learning_rate": 1.7165936739659368e-05, "loss": 0.6221, "step": 7794 }, { "epoch": 0.22758459607018774, "grad_norm": 0.6868336473418799, "learning_rate": 1.716528791565288e-05, "loss": 0.6316, "step": 7795 }, { "epoch": 0.2276137922980351, "grad_norm": 0.6768307845007602, "learning_rate": 1.7164639091646395e-05, "loss": 0.6502, "step": 7796 }, { "epoch": 0.22764298852588247, "grad_norm": 0.8135229964036815, "learning_rate": 1.7163990267639904e-05, "loss": 0.7433, "step": 7797 }, { "epoch": 0.22767218475372983, "grad_norm": 0.6725663943259476, "learning_rate": 1.7163341443633416e-05, "loss": 0.6423, "step": 7798 }, { "epoch": 0.2277013809815772, "grad_norm": 0.6287924934527428, "learning_rate": 1.7162692619626928e-05, "loss": 0.5945, "step": 7799 }, { "epoch": 0.22773057720942455, "grad_norm": 0.6947105534747581, "learning_rate": 1.716204379562044e-05, "loss": 0.6064, "step": 7800 }, { "epoch": 0.2277597734372719, "grad_norm": 0.7041735360889512, "learning_rate": 1.7161394971613952e-05, "loss": 0.6138, "step": 7801 }, { "epoch": 0.22778896966511927, "grad_norm": 0.7323568695853832, "learning_rate": 1.7160746147607464e-05, "loss": 0.6661, "step": 7802 }, { "epoch": 0.22781816589296663, "grad_norm": 0.6760832871306911, "learning_rate": 1.7160097323600976e-05, "loss": 0.607, "step": 7803 }, { "epoch": 0.227847362120814, "grad_norm": 0.6692098055824768, "learning_rate": 1.7159448499594488e-05, "loss": 0.6657, "step": 7804 }, { "epoch": 0.22787655834866136, "grad_norm": 0.6288069281661792, "learning_rate": 1.7158799675588e-05, "loss": 0.5348, "step": 7805 }, { "epoch": 0.22790575457650872, "grad_norm": 0.6898367986044167, "learning_rate": 1.715815085158151e-05, "loss": 0.6958, "step": 7806 }, { "epoch": 0.22793495080435608, "grad_norm": 0.6087725878028507, "learning_rate": 1.715750202757502e-05, "loss": 0.5687, "step": 7807 }, { "epoch": 0.22796414703220344, "grad_norm": 0.6726764177606007, "learning_rate": 1.7156853203568533e-05, "loss": 0.6956, "step": 7808 }, { "epoch": 0.2279933432600508, "grad_norm": 0.661246796447546, "learning_rate": 1.7156204379562045e-05, "loss": 0.6452, "step": 7809 }, { "epoch": 0.22802253948789816, "grad_norm": 0.6902065310641624, "learning_rate": 1.7155555555555557e-05, "loss": 0.6655, "step": 7810 }, { "epoch": 0.22805173571574552, "grad_norm": 0.680934023853407, "learning_rate": 1.715490673154907e-05, "loss": 0.6895, "step": 7811 }, { "epoch": 0.22808093194359289, "grad_norm": 0.6193247489942549, "learning_rate": 1.715425790754258e-05, "loss": 0.5482, "step": 7812 }, { "epoch": 0.22811012817144025, "grad_norm": 0.6830112801932028, "learning_rate": 1.7153609083536093e-05, "loss": 0.6733, "step": 7813 }, { "epoch": 0.2281393243992876, "grad_norm": 0.6909802262438464, "learning_rate": 1.71529602595296e-05, "loss": 0.6433, "step": 7814 }, { "epoch": 0.22816852062713497, "grad_norm": 0.65717226515085, "learning_rate": 1.7152311435523117e-05, "loss": 0.6222, "step": 7815 }, { "epoch": 0.22819771685498233, "grad_norm": 0.6241418981420181, "learning_rate": 1.715166261151663e-05, "loss": 0.5962, "step": 7816 }, { "epoch": 0.2282269130828297, "grad_norm": 0.69241544762436, "learning_rate": 1.715101378751014e-05, "loss": 0.6476, "step": 7817 }, { "epoch": 0.22825610931067705, "grad_norm": 0.6879210896501108, "learning_rate": 1.7150364963503653e-05, "loss": 0.6767, "step": 7818 }, { "epoch": 0.2282853055385244, "grad_norm": 0.7544902238104809, "learning_rate": 1.7149716139497165e-05, "loss": 0.7088, "step": 7819 }, { "epoch": 0.22831450176637177, "grad_norm": 0.6284390311628196, "learning_rate": 1.7149067315490673e-05, "loss": 0.605, "step": 7820 }, { "epoch": 0.22834369799421914, "grad_norm": 0.6528047529351512, "learning_rate": 1.7148418491484185e-05, "loss": 0.6177, "step": 7821 }, { "epoch": 0.2283728942220665, "grad_norm": 0.6677724982834546, "learning_rate": 1.7147769667477698e-05, "loss": 0.5877, "step": 7822 }, { "epoch": 0.22840209044991386, "grad_norm": 0.689986488192347, "learning_rate": 1.714712084347121e-05, "loss": 0.6874, "step": 7823 }, { "epoch": 0.22843128667776122, "grad_norm": 0.6246621881368133, "learning_rate": 1.714647201946472e-05, "loss": 0.5921, "step": 7824 }, { "epoch": 0.2284604829056086, "grad_norm": 0.64827351369187, "learning_rate": 1.7145823195458234e-05, "loss": 0.6278, "step": 7825 }, { "epoch": 0.22848967913345597, "grad_norm": 0.6829051108950955, "learning_rate": 1.7145174371451746e-05, "loss": 0.6145, "step": 7826 }, { "epoch": 0.22851887536130333, "grad_norm": 0.5886817744285656, "learning_rate": 1.7144525547445258e-05, "loss": 0.5213, "step": 7827 }, { "epoch": 0.2285480715891507, "grad_norm": 0.6191325535477157, "learning_rate": 1.714387672343877e-05, "loss": 0.5117, "step": 7828 }, { "epoch": 0.22857726781699805, "grad_norm": 0.8123781823308079, "learning_rate": 1.714322789943228e-05, "loss": 0.6011, "step": 7829 }, { "epoch": 0.22860646404484541, "grad_norm": 0.7169806996677456, "learning_rate": 1.714257907542579e-05, "loss": 0.6023, "step": 7830 }, { "epoch": 0.22863566027269278, "grad_norm": 0.6652413834948311, "learning_rate": 1.7141930251419302e-05, "loss": 0.6194, "step": 7831 }, { "epoch": 0.22866485650054014, "grad_norm": 0.7200760249479049, "learning_rate": 1.7141281427412814e-05, "loss": 0.6817, "step": 7832 }, { "epoch": 0.2286940527283875, "grad_norm": 0.710411693898224, "learning_rate": 1.7140632603406326e-05, "loss": 0.7191, "step": 7833 }, { "epoch": 0.22872324895623486, "grad_norm": 0.7346785016936699, "learning_rate": 1.7139983779399842e-05, "loss": 0.7377, "step": 7834 }, { "epoch": 0.22875244518408222, "grad_norm": 0.6760764324183021, "learning_rate": 1.713933495539335e-05, "loss": 0.6297, "step": 7835 }, { "epoch": 0.22878164141192958, "grad_norm": 0.67434626714416, "learning_rate": 1.7138686131386862e-05, "loss": 0.6328, "step": 7836 }, { "epoch": 0.22881083763977694, "grad_norm": 0.6365980590746965, "learning_rate": 1.7138037307380375e-05, "loss": 0.5936, "step": 7837 }, { "epoch": 0.2288400338676243, "grad_norm": 0.7191870569861314, "learning_rate": 1.7137388483373887e-05, "loss": 0.7426, "step": 7838 }, { "epoch": 0.22886923009547167, "grad_norm": 0.669055972298261, "learning_rate": 1.71367396593674e-05, "loss": 0.6458, "step": 7839 }, { "epoch": 0.22889842632331903, "grad_norm": 0.672837929620525, "learning_rate": 1.713609083536091e-05, "loss": 0.6678, "step": 7840 }, { "epoch": 0.2289276225511664, "grad_norm": 0.6888743070536472, "learning_rate": 1.7135442011354423e-05, "loss": 0.6833, "step": 7841 }, { "epoch": 0.22895681877901375, "grad_norm": 0.9000922565659526, "learning_rate": 1.7134793187347935e-05, "loss": 0.6167, "step": 7842 }, { "epoch": 0.2289860150068611, "grad_norm": 0.5807592262179778, "learning_rate": 1.7134144363341447e-05, "loss": 0.5189, "step": 7843 }, { "epoch": 0.22901521123470847, "grad_norm": 0.7116127453835206, "learning_rate": 1.7133495539334955e-05, "loss": 0.6971, "step": 7844 }, { "epoch": 0.22904440746255583, "grad_norm": 0.6053319592510574, "learning_rate": 1.7132846715328467e-05, "loss": 0.5624, "step": 7845 }, { "epoch": 0.2290736036904032, "grad_norm": 0.6441798280266573, "learning_rate": 1.713219789132198e-05, "loss": 0.6176, "step": 7846 }, { "epoch": 0.22910279991825055, "grad_norm": 0.6566376389228151, "learning_rate": 1.713154906731549e-05, "loss": 0.6186, "step": 7847 }, { "epoch": 0.22913199614609792, "grad_norm": 0.6621598939339144, "learning_rate": 1.7130900243309003e-05, "loss": 0.6474, "step": 7848 }, { "epoch": 0.22916119237394528, "grad_norm": 6.538521594002857, "learning_rate": 1.7130251419302515e-05, "loss": 0.7505, "step": 7849 }, { "epoch": 0.22919038860179264, "grad_norm": 0.7386978739980231, "learning_rate": 1.7129602595296027e-05, "loss": 0.7286, "step": 7850 }, { "epoch": 0.22921958482964, "grad_norm": 0.6538686671677948, "learning_rate": 1.712895377128954e-05, "loss": 0.6002, "step": 7851 }, { "epoch": 0.22924878105748736, "grad_norm": 0.613545007985348, "learning_rate": 1.7128304947283048e-05, "loss": 0.5665, "step": 7852 }, { "epoch": 0.22927797728533472, "grad_norm": 0.6631632356062379, "learning_rate": 1.7127656123276564e-05, "loss": 0.6344, "step": 7853 }, { "epoch": 0.22930717351318208, "grad_norm": 0.6883527049922642, "learning_rate": 1.7127007299270076e-05, "loss": 0.6547, "step": 7854 }, { "epoch": 0.22933636974102947, "grad_norm": 0.7116794233681305, "learning_rate": 1.7126358475263588e-05, "loss": 0.6985, "step": 7855 }, { "epoch": 0.22936556596887683, "grad_norm": 0.743820037832224, "learning_rate": 1.71257096512571e-05, "loss": 0.7633, "step": 7856 }, { "epoch": 0.2293947621967242, "grad_norm": 0.7210831832289666, "learning_rate": 1.712506082725061e-05, "loss": 0.624, "step": 7857 }, { "epoch": 0.22942395842457156, "grad_norm": 0.7032334969109343, "learning_rate": 1.712441200324412e-05, "loss": 0.6704, "step": 7858 }, { "epoch": 0.22945315465241892, "grad_norm": 0.6226811617563316, "learning_rate": 1.7123763179237632e-05, "loss": 0.5184, "step": 7859 }, { "epoch": 0.22948235088026628, "grad_norm": 0.6568400380338426, "learning_rate": 1.7123114355231144e-05, "loss": 0.6366, "step": 7860 }, { "epoch": 0.22951154710811364, "grad_norm": 0.6369848556569542, "learning_rate": 1.7122465531224656e-05, "loss": 0.6248, "step": 7861 }, { "epoch": 0.229540743335961, "grad_norm": 0.6771691804027754, "learning_rate": 1.712181670721817e-05, "loss": 0.6967, "step": 7862 }, { "epoch": 0.22956993956380836, "grad_norm": 0.7569234507018272, "learning_rate": 1.712116788321168e-05, "loss": 0.7502, "step": 7863 }, { "epoch": 0.22959913579165572, "grad_norm": 0.7177490018438343, "learning_rate": 1.7120519059205192e-05, "loss": 0.7287, "step": 7864 }, { "epoch": 0.22962833201950308, "grad_norm": 0.6571301717815868, "learning_rate": 1.7119870235198705e-05, "loss": 0.5939, "step": 7865 }, { "epoch": 0.22965752824735045, "grad_norm": 0.660495859536312, "learning_rate": 1.7119221411192217e-05, "loss": 0.651, "step": 7866 }, { "epoch": 0.2296867244751978, "grad_norm": 0.6380582610030319, "learning_rate": 1.7118572587185725e-05, "loss": 0.6172, "step": 7867 }, { "epoch": 0.22971592070304517, "grad_norm": 0.6911424076841305, "learning_rate": 1.7117923763179237e-05, "loss": 0.6838, "step": 7868 }, { "epoch": 0.22974511693089253, "grad_norm": 0.632727132086522, "learning_rate": 1.711727493917275e-05, "loss": 0.6059, "step": 7869 }, { "epoch": 0.2297743131587399, "grad_norm": 0.6960420175384112, "learning_rate": 1.711662611516626e-05, "loss": 0.6982, "step": 7870 }, { "epoch": 0.22980350938658725, "grad_norm": 0.6596956104738271, "learning_rate": 1.7115977291159773e-05, "loss": 0.6223, "step": 7871 }, { "epoch": 0.2298327056144346, "grad_norm": 0.6756004430113393, "learning_rate": 1.711532846715329e-05, "loss": 0.675, "step": 7872 }, { "epoch": 0.22986190184228197, "grad_norm": 0.6664872776360653, "learning_rate": 1.7114679643146797e-05, "loss": 0.6358, "step": 7873 }, { "epoch": 0.22989109807012933, "grad_norm": 0.6977487510561144, "learning_rate": 1.711403081914031e-05, "loss": 0.674, "step": 7874 }, { "epoch": 0.2299202942979767, "grad_norm": 0.6855194511958856, "learning_rate": 1.711338199513382e-05, "loss": 0.6732, "step": 7875 }, { "epoch": 0.22994949052582406, "grad_norm": 0.7101736638950141, "learning_rate": 1.7112733171127333e-05, "loss": 0.6897, "step": 7876 }, { "epoch": 0.22997868675367142, "grad_norm": 0.6447330066733692, "learning_rate": 1.7112084347120845e-05, "loss": 0.5854, "step": 7877 }, { "epoch": 0.23000788298151878, "grad_norm": 0.6753363088823704, "learning_rate": 1.7111435523114357e-05, "loss": 0.6458, "step": 7878 }, { "epoch": 0.23003707920936614, "grad_norm": 0.7092555963365438, "learning_rate": 1.711078669910787e-05, "loss": 0.6928, "step": 7879 }, { "epoch": 0.2300662754372135, "grad_norm": 0.6595408933764704, "learning_rate": 1.711013787510138e-05, "loss": 0.6747, "step": 7880 }, { "epoch": 0.23009547166506086, "grad_norm": 0.6867190418852438, "learning_rate": 1.7109489051094894e-05, "loss": 0.706, "step": 7881 }, { "epoch": 0.23012466789290822, "grad_norm": 0.7140821967084564, "learning_rate": 1.7108840227088402e-05, "loss": 0.7218, "step": 7882 }, { "epoch": 0.23015386412075559, "grad_norm": 0.6269442411096755, "learning_rate": 1.7108191403081914e-05, "loss": 0.5735, "step": 7883 }, { "epoch": 0.23018306034860295, "grad_norm": 0.6550606893387158, "learning_rate": 1.7107542579075426e-05, "loss": 0.6219, "step": 7884 }, { "epoch": 0.23021225657645034, "grad_norm": 0.6038439523239718, "learning_rate": 1.7106893755068938e-05, "loss": 0.5302, "step": 7885 }, { "epoch": 0.2302414528042977, "grad_norm": 0.6532026587890866, "learning_rate": 1.710624493106245e-05, "loss": 0.6278, "step": 7886 }, { "epoch": 0.23027064903214506, "grad_norm": 0.6583121572339694, "learning_rate": 1.7105596107055962e-05, "loss": 0.603, "step": 7887 }, { "epoch": 0.23029984525999242, "grad_norm": 0.6687542060213594, "learning_rate": 1.7104947283049474e-05, "loss": 0.6573, "step": 7888 }, { "epoch": 0.23032904148783978, "grad_norm": 0.6659178370113333, "learning_rate": 1.7104298459042986e-05, "loss": 0.6895, "step": 7889 }, { "epoch": 0.23035823771568714, "grad_norm": 0.7219366768653424, "learning_rate": 1.7103649635036495e-05, "loss": 0.6834, "step": 7890 }, { "epoch": 0.2303874339435345, "grad_norm": 0.7105877082494311, "learning_rate": 1.710300081103001e-05, "loss": 0.7081, "step": 7891 }, { "epoch": 0.23041663017138186, "grad_norm": 0.6680156990804961, "learning_rate": 1.7102351987023522e-05, "loss": 0.6581, "step": 7892 }, { "epoch": 0.23044582639922923, "grad_norm": 0.6845551019362476, "learning_rate": 1.7101703163017034e-05, "loss": 0.6349, "step": 7893 }, { "epoch": 0.2304750226270766, "grad_norm": 0.7371026313616605, "learning_rate": 1.7101054339010547e-05, "loss": 0.7069, "step": 7894 }, { "epoch": 0.23050421885492395, "grad_norm": 0.6341631472077831, "learning_rate": 1.710040551500406e-05, "loss": 0.6018, "step": 7895 }, { "epoch": 0.2305334150827713, "grad_norm": 0.6451980560375734, "learning_rate": 1.7099756690997567e-05, "loss": 0.6148, "step": 7896 }, { "epoch": 0.23056261131061867, "grad_norm": 0.7322132215639076, "learning_rate": 1.709910786699108e-05, "loss": 0.5927, "step": 7897 }, { "epoch": 0.23059180753846603, "grad_norm": 0.6936498372194361, "learning_rate": 1.709845904298459e-05, "loss": 0.6129, "step": 7898 }, { "epoch": 0.2306210037663134, "grad_norm": 0.8807630687687624, "learning_rate": 1.7097810218978103e-05, "loss": 0.7851, "step": 7899 }, { "epoch": 0.23065019999416075, "grad_norm": 0.6592419846438957, "learning_rate": 1.7097161394971615e-05, "loss": 0.638, "step": 7900 }, { "epoch": 0.23067939622200812, "grad_norm": 0.7212102903099629, "learning_rate": 1.7096512570965127e-05, "loss": 0.7042, "step": 7901 }, { "epoch": 0.23070859244985548, "grad_norm": 0.7051519923722319, "learning_rate": 1.709586374695864e-05, "loss": 0.6637, "step": 7902 }, { "epoch": 0.23073778867770284, "grad_norm": 0.659115134306074, "learning_rate": 1.709521492295215e-05, "loss": 0.616, "step": 7903 }, { "epoch": 0.2307669849055502, "grad_norm": 0.6781076283556599, "learning_rate": 1.7094566098945663e-05, "loss": 0.6721, "step": 7904 }, { "epoch": 0.23079618113339756, "grad_norm": 0.74855448571515, "learning_rate": 1.7093917274939172e-05, "loss": 0.7824, "step": 7905 }, { "epoch": 0.23082537736124492, "grad_norm": 0.6646886342657355, "learning_rate": 1.7093268450932684e-05, "loss": 0.5406, "step": 7906 }, { "epoch": 0.23085457358909228, "grad_norm": 0.6543889508541127, "learning_rate": 1.7092619626926196e-05, "loss": 0.6431, "step": 7907 }, { "epoch": 0.23088376981693964, "grad_norm": 0.7278576603294291, "learning_rate": 1.7091970802919708e-05, "loss": 0.7115, "step": 7908 }, { "epoch": 0.230912966044787, "grad_norm": 0.6471321315973371, "learning_rate": 1.709132197891322e-05, "loss": 0.6224, "step": 7909 }, { "epoch": 0.23094216227263437, "grad_norm": 0.7173973803589344, "learning_rate": 1.7090673154906736e-05, "loss": 0.6794, "step": 7910 }, { "epoch": 0.23097135850048173, "grad_norm": 0.6491720713271767, "learning_rate": 1.7090024330900244e-05, "loss": 0.5875, "step": 7911 }, { "epoch": 0.2310005547283291, "grad_norm": 0.6526380477408775, "learning_rate": 1.7089375506893756e-05, "loss": 0.5922, "step": 7912 }, { "epoch": 0.23102975095617645, "grad_norm": 0.695781709185608, "learning_rate": 1.7088726682887268e-05, "loss": 0.643, "step": 7913 }, { "epoch": 0.2310589471840238, "grad_norm": 0.667549873749283, "learning_rate": 1.708807785888078e-05, "loss": 0.6201, "step": 7914 }, { "epoch": 0.2310881434118712, "grad_norm": 0.6433258211153465, "learning_rate": 1.7087429034874292e-05, "loss": 0.552, "step": 7915 }, { "epoch": 0.23111733963971856, "grad_norm": 0.6901484339222691, "learning_rate": 1.7086780210867804e-05, "loss": 0.7002, "step": 7916 }, { "epoch": 0.23114653586756592, "grad_norm": 0.7119136683746374, "learning_rate": 1.7086131386861316e-05, "loss": 0.714, "step": 7917 }, { "epoch": 0.23117573209541328, "grad_norm": 0.6939333881907042, "learning_rate": 1.708548256285483e-05, "loss": 0.6244, "step": 7918 }, { "epoch": 0.23120492832326064, "grad_norm": 0.6583693124063729, "learning_rate": 1.708483373884834e-05, "loss": 0.6019, "step": 7919 }, { "epoch": 0.231234124551108, "grad_norm": 0.6505674563544266, "learning_rate": 1.708418491484185e-05, "loss": 0.6609, "step": 7920 }, { "epoch": 0.23126332077895537, "grad_norm": 0.7761964153280169, "learning_rate": 1.708353609083536e-05, "loss": 0.7791, "step": 7921 }, { "epoch": 0.23129251700680273, "grad_norm": 0.7316478541969049, "learning_rate": 1.7082887266828873e-05, "loss": 0.6894, "step": 7922 }, { "epoch": 0.2313217132346501, "grad_norm": 0.7039652693837204, "learning_rate": 1.7082238442822385e-05, "loss": 0.6719, "step": 7923 }, { "epoch": 0.23135090946249745, "grad_norm": 0.6350954032419276, "learning_rate": 1.7081589618815897e-05, "loss": 0.5599, "step": 7924 }, { "epoch": 0.2313801056903448, "grad_norm": 0.6613777398374024, "learning_rate": 1.708094079480941e-05, "loss": 0.5467, "step": 7925 }, { "epoch": 0.23140930191819217, "grad_norm": 0.7038017758548961, "learning_rate": 1.708029197080292e-05, "loss": 0.6382, "step": 7926 }, { "epoch": 0.23143849814603953, "grad_norm": 0.6509486586953753, "learning_rate": 1.7079643146796433e-05, "loss": 0.601, "step": 7927 }, { "epoch": 0.2314676943738869, "grad_norm": 0.7288036051245445, "learning_rate": 1.7078994322789945e-05, "loss": 0.6586, "step": 7928 }, { "epoch": 0.23149689060173426, "grad_norm": 0.6954566527011439, "learning_rate": 1.7078345498783457e-05, "loss": 0.69, "step": 7929 }, { "epoch": 0.23152608682958162, "grad_norm": 0.6914762708033387, "learning_rate": 1.707769667477697e-05, "loss": 0.656, "step": 7930 }, { "epoch": 0.23155528305742898, "grad_norm": 0.7116807764312525, "learning_rate": 1.707704785077048e-05, "loss": 0.7246, "step": 7931 }, { "epoch": 0.23158447928527634, "grad_norm": 0.6349546805145249, "learning_rate": 1.7076399026763993e-05, "loss": 0.551, "step": 7932 }, { "epoch": 0.2316136755131237, "grad_norm": 0.8322820874840102, "learning_rate": 1.7075750202757505e-05, "loss": 0.7473, "step": 7933 }, { "epoch": 0.23164287174097106, "grad_norm": 0.7143130478534917, "learning_rate": 1.7075101378751014e-05, "loss": 0.5853, "step": 7934 }, { "epoch": 0.23167206796881842, "grad_norm": 0.672613351530996, "learning_rate": 1.7074452554744526e-05, "loss": 0.6337, "step": 7935 }, { "epoch": 0.23170126419666578, "grad_norm": 0.6892174139939244, "learning_rate": 1.7073803730738038e-05, "loss": 0.6959, "step": 7936 }, { "epoch": 0.23173046042451315, "grad_norm": 0.677007794172935, "learning_rate": 1.707315490673155e-05, "loss": 0.6163, "step": 7937 }, { "epoch": 0.2317596566523605, "grad_norm": 0.6701140622501791, "learning_rate": 1.7072506082725062e-05, "loss": 0.6623, "step": 7938 }, { "epoch": 0.23178885288020787, "grad_norm": 0.7194738880149784, "learning_rate": 1.7071857258718574e-05, "loss": 0.7015, "step": 7939 }, { "epoch": 0.23181804910805523, "grad_norm": 0.6252544484099872, "learning_rate": 1.7071208434712086e-05, "loss": 0.5223, "step": 7940 }, { "epoch": 0.2318472453359026, "grad_norm": 0.7011938628302078, "learning_rate": 1.7070559610705598e-05, "loss": 0.6567, "step": 7941 }, { "epoch": 0.23187644156374995, "grad_norm": 0.6101782177814603, "learning_rate": 1.706991078669911e-05, "loss": 0.5488, "step": 7942 }, { "epoch": 0.2319056377915973, "grad_norm": 0.701935233543458, "learning_rate": 1.706926196269262e-05, "loss": 0.695, "step": 7943 }, { "epoch": 0.23193483401944467, "grad_norm": 0.6598671330870446, "learning_rate": 1.706861313868613e-05, "loss": 0.6067, "step": 7944 }, { "epoch": 0.23196403024729206, "grad_norm": 0.6949508052457943, "learning_rate": 1.7067964314679643e-05, "loss": 0.7387, "step": 7945 }, { "epoch": 0.23199322647513942, "grad_norm": 0.6257565873048578, "learning_rate": 1.7067315490673155e-05, "loss": 0.5969, "step": 7946 }, { "epoch": 0.23202242270298679, "grad_norm": 0.6359552160263878, "learning_rate": 1.706666666666667e-05, "loss": 0.5778, "step": 7947 }, { "epoch": 0.23205161893083415, "grad_norm": 0.7077746970182124, "learning_rate": 1.7066017842660182e-05, "loss": 0.6476, "step": 7948 }, { "epoch": 0.2320808151586815, "grad_norm": 0.6519922956760256, "learning_rate": 1.706536901865369e-05, "loss": 0.5728, "step": 7949 }, { "epoch": 0.23211001138652887, "grad_norm": 1.1646141417975011, "learning_rate": 1.7064720194647203e-05, "loss": 0.6214, "step": 7950 }, { "epoch": 0.23213920761437623, "grad_norm": 0.6610601659822545, "learning_rate": 1.7064071370640715e-05, "loss": 0.6554, "step": 7951 }, { "epoch": 0.2321684038422236, "grad_norm": 0.6003209186541093, "learning_rate": 1.7063422546634227e-05, "loss": 0.5423, "step": 7952 }, { "epoch": 0.23219760007007095, "grad_norm": 0.6500352107264884, "learning_rate": 1.706277372262774e-05, "loss": 0.6307, "step": 7953 }, { "epoch": 0.23222679629791831, "grad_norm": 0.7642778375683907, "learning_rate": 1.706212489862125e-05, "loss": 0.7026, "step": 7954 }, { "epoch": 0.23225599252576568, "grad_norm": 0.6689674294919973, "learning_rate": 1.7061476074614763e-05, "loss": 0.6201, "step": 7955 }, { "epoch": 0.23228518875361304, "grad_norm": 0.7580733886312935, "learning_rate": 1.7060827250608275e-05, "loss": 0.7269, "step": 7956 }, { "epoch": 0.2323143849814604, "grad_norm": 0.7021315256244312, "learning_rate": 1.7060178426601784e-05, "loss": 0.6916, "step": 7957 }, { "epoch": 0.23234358120930776, "grad_norm": 0.7110387866015057, "learning_rate": 1.7059529602595296e-05, "loss": 0.7371, "step": 7958 }, { "epoch": 0.23237277743715512, "grad_norm": 0.6812010981458276, "learning_rate": 1.7058880778588808e-05, "loss": 0.6531, "step": 7959 }, { "epoch": 0.23240197366500248, "grad_norm": 0.7254723024899813, "learning_rate": 1.705823195458232e-05, "loss": 0.6415, "step": 7960 }, { "epoch": 0.23243116989284984, "grad_norm": 0.7001826316323287, "learning_rate": 1.7057583130575832e-05, "loss": 0.6507, "step": 7961 }, { "epoch": 0.2324603661206972, "grad_norm": 0.6524594668412174, "learning_rate": 1.7056934306569344e-05, "loss": 0.6242, "step": 7962 }, { "epoch": 0.23248956234854457, "grad_norm": 0.7045671011060276, "learning_rate": 1.7056285482562856e-05, "loss": 0.6256, "step": 7963 }, { "epoch": 0.23251875857639193, "grad_norm": 0.6610061572786492, "learning_rate": 1.7055636658556368e-05, "loss": 0.6585, "step": 7964 }, { "epoch": 0.2325479548042393, "grad_norm": 0.6885053483124066, "learning_rate": 1.705498783454988e-05, "loss": 0.6828, "step": 7965 }, { "epoch": 0.23257715103208665, "grad_norm": 0.6897116905108226, "learning_rate": 1.7054339010543392e-05, "loss": 0.6419, "step": 7966 }, { "epoch": 0.232606347259934, "grad_norm": 0.6154430918653886, "learning_rate": 1.7053690186536904e-05, "loss": 0.5671, "step": 7967 }, { "epoch": 0.23263554348778137, "grad_norm": 0.6836681281219225, "learning_rate": 1.7053041362530416e-05, "loss": 0.6435, "step": 7968 }, { "epoch": 0.23266473971562873, "grad_norm": 0.6498160490235215, "learning_rate": 1.7052392538523928e-05, "loss": 0.637, "step": 7969 }, { "epoch": 0.2326939359434761, "grad_norm": 0.701476724911002, "learning_rate": 1.705174371451744e-05, "loss": 0.7438, "step": 7970 }, { "epoch": 0.23272313217132345, "grad_norm": 0.6971504471041714, "learning_rate": 1.7051094890510952e-05, "loss": 0.699, "step": 7971 }, { "epoch": 0.23275232839917082, "grad_norm": 0.7111920379321356, "learning_rate": 1.705044606650446e-05, "loss": 0.6914, "step": 7972 }, { "epoch": 0.23278152462701818, "grad_norm": 0.59754554033742, "learning_rate": 1.7049797242497973e-05, "loss": 0.5459, "step": 7973 }, { "epoch": 0.23281072085486554, "grad_norm": 0.6550066547240422, "learning_rate": 1.7049148418491485e-05, "loss": 0.6546, "step": 7974 }, { "epoch": 0.23283991708271293, "grad_norm": 0.6536899857256581, "learning_rate": 1.7048499594484997e-05, "loss": 0.6144, "step": 7975 }, { "epoch": 0.2328691133105603, "grad_norm": 0.7186902653321319, "learning_rate": 1.704785077047851e-05, "loss": 0.6805, "step": 7976 }, { "epoch": 0.23289830953840765, "grad_norm": 0.665074453068334, "learning_rate": 1.704720194647202e-05, "loss": 0.6709, "step": 7977 }, { "epoch": 0.232927505766255, "grad_norm": 0.6323665249742432, "learning_rate": 1.7046553122465533e-05, "loss": 0.5808, "step": 7978 }, { "epoch": 0.23295670199410237, "grad_norm": 0.6903777319485614, "learning_rate": 1.7045904298459045e-05, "loss": 0.6696, "step": 7979 }, { "epoch": 0.23298589822194973, "grad_norm": 0.6504135376804623, "learning_rate": 1.7045255474452557e-05, "loss": 0.6114, "step": 7980 }, { "epoch": 0.2330150944497971, "grad_norm": 0.6492413615254866, "learning_rate": 1.7044606650446066e-05, "loss": 0.6176, "step": 7981 }, { "epoch": 0.23304429067764446, "grad_norm": 0.7059539652131006, "learning_rate": 1.7043957826439578e-05, "loss": 0.659, "step": 7982 }, { "epoch": 0.23307348690549182, "grad_norm": 0.6966585428369377, "learning_rate": 1.704330900243309e-05, "loss": 0.6471, "step": 7983 }, { "epoch": 0.23310268313333918, "grad_norm": 0.6922126184332181, "learning_rate": 1.7042660178426602e-05, "loss": 0.692, "step": 7984 }, { "epoch": 0.23313187936118654, "grad_norm": 0.7211729098323523, "learning_rate": 1.7042011354420117e-05, "loss": 0.7514, "step": 7985 }, { "epoch": 0.2331610755890339, "grad_norm": 0.6590574608365569, "learning_rate": 1.704136253041363e-05, "loss": 0.6084, "step": 7986 }, { "epoch": 0.23319027181688126, "grad_norm": 0.6478153089495133, "learning_rate": 1.7040713706407138e-05, "loss": 0.6571, "step": 7987 }, { "epoch": 0.23321946804472862, "grad_norm": 0.6783397556749101, "learning_rate": 1.704006488240065e-05, "loss": 0.638, "step": 7988 }, { "epoch": 0.23324866427257598, "grad_norm": 0.6903795353661489, "learning_rate": 1.7039416058394162e-05, "loss": 0.6831, "step": 7989 }, { "epoch": 0.23327786050042335, "grad_norm": 0.6610193364075583, "learning_rate": 1.7038767234387674e-05, "loss": 0.597, "step": 7990 }, { "epoch": 0.2333070567282707, "grad_norm": 0.7376117503263501, "learning_rate": 1.7038118410381186e-05, "loss": 0.6653, "step": 7991 }, { "epoch": 0.23333625295611807, "grad_norm": 0.7656266016908325, "learning_rate": 1.7037469586374698e-05, "loss": 0.7433, "step": 7992 }, { "epoch": 0.23336544918396543, "grad_norm": 0.6357703605195005, "learning_rate": 1.703682076236821e-05, "loss": 0.5871, "step": 7993 }, { "epoch": 0.2333946454118128, "grad_norm": 0.6205754955657301, "learning_rate": 1.7036171938361722e-05, "loss": 0.5378, "step": 7994 }, { "epoch": 0.23342384163966015, "grad_norm": 0.6392663207900363, "learning_rate": 1.703552311435523e-05, "loss": 0.6284, "step": 7995 }, { "epoch": 0.2334530378675075, "grad_norm": 0.6676067582270275, "learning_rate": 1.7034874290348743e-05, "loss": 0.5851, "step": 7996 }, { "epoch": 0.23348223409535487, "grad_norm": 0.5984866520701713, "learning_rate": 1.7034225466342255e-05, "loss": 0.5557, "step": 7997 }, { "epoch": 0.23351143032320223, "grad_norm": 0.6795984674864931, "learning_rate": 1.7033576642335767e-05, "loss": 0.7241, "step": 7998 }, { "epoch": 0.2335406265510496, "grad_norm": 0.63980349213411, "learning_rate": 1.703292781832928e-05, "loss": 0.5797, "step": 7999 }, { "epoch": 0.23356982277889696, "grad_norm": 0.6994252946720424, "learning_rate": 1.703227899432279e-05, "loss": 0.6275, "step": 8000 }, { "epoch": 0.23359901900674432, "grad_norm": 0.6849933210751605, "learning_rate": 1.7031630170316303e-05, "loss": 0.6398, "step": 8001 }, { "epoch": 0.23362821523459168, "grad_norm": 0.6864502493270108, "learning_rate": 1.7030981346309815e-05, "loss": 0.6176, "step": 8002 }, { "epoch": 0.23365741146243904, "grad_norm": 0.7377034332883405, "learning_rate": 1.7030332522303327e-05, "loss": 0.6594, "step": 8003 }, { "epoch": 0.2336866076902864, "grad_norm": 0.6875166348396538, "learning_rate": 1.702968369829684e-05, "loss": 0.6753, "step": 8004 }, { "epoch": 0.2337158039181338, "grad_norm": 0.7413708389676396, "learning_rate": 1.702903487429035e-05, "loss": 0.7176, "step": 8005 }, { "epoch": 0.23374500014598115, "grad_norm": 0.6897860236754694, "learning_rate": 1.7028386050283863e-05, "loss": 0.6261, "step": 8006 }, { "epoch": 0.2337741963738285, "grad_norm": 0.9508822864597392, "learning_rate": 1.7027737226277375e-05, "loss": 0.6354, "step": 8007 }, { "epoch": 0.23380339260167587, "grad_norm": 0.6709444396181432, "learning_rate": 1.7027088402270887e-05, "loss": 0.6712, "step": 8008 }, { "epoch": 0.23383258882952324, "grad_norm": 0.7037980885913995, "learning_rate": 1.70264395782644e-05, "loss": 0.6834, "step": 8009 }, { "epoch": 0.2338617850573706, "grad_norm": 0.6681927839530747, "learning_rate": 1.7025790754257908e-05, "loss": 0.5677, "step": 8010 }, { "epoch": 0.23389098128521796, "grad_norm": 0.683500111233209, "learning_rate": 1.702514193025142e-05, "loss": 0.6955, "step": 8011 }, { "epoch": 0.23392017751306532, "grad_norm": 0.6342578573635728, "learning_rate": 1.7024493106244932e-05, "loss": 0.5937, "step": 8012 }, { "epoch": 0.23394937374091268, "grad_norm": 0.599625824723562, "learning_rate": 1.7023844282238444e-05, "loss": 0.5324, "step": 8013 }, { "epoch": 0.23397856996876004, "grad_norm": 0.6831500778751611, "learning_rate": 1.7023195458231956e-05, "loss": 0.6572, "step": 8014 }, { "epoch": 0.2340077661966074, "grad_norm": 0.7243192707888861, "learning_rate": 1.7022546634225468e-05, "loss": 0.6899, "step": 8015 }, { "epoch": 0.23403696242445476, "grad_norm": 0.6769062060797062, "learning_rate": 1.702189781021898e-05, "loss": 0.5915, "step": 8016 }, { "epoch": 0.23406615865230213, "grad_norm": 0.7203763292118837, "learning_rate": 1.7021248986212492e-05, "loss": 0.6824, "step": 8017 }, { "epoch": 0.2340953548801495, "grad_norm": 0.6405033926471057, "learning_rate": 1.7020600162206004e-05, "loss": 0.639, "step": 8018 }, { "epoch": 0.23412455110799685, "grad_norm": 0.642647355205149, "learning_rate": 1.7019951338199513e-05, "loss": 0.5905, "step": 8019 }, { "epoch": 0.2341537473358442, "grad_norm": 0.7159123753024277, "learning_rate": 1.7019302514193025e-05, "loss": 0.6974, "step": 8020 }, { "epoch": 0.23418294356369157, "grad_norm": 0.7157053428949075, "learning_rate": 1.7018653690186537e-05, "loss": 0.6913, "step": 8021 }, { "epoch": 0.23421213979153893, "grad_norm": 0.7564398879482546, "learning_rate": 1.701800486618005e-05, "loss": 0.7381, "step": 8022 }, { "epoch": 0.2342413360193863, "grad_norm": 0.6421162161468201, "learning_rate": 1.7017356042173564e-05, "loss": 0.6462, "step": 8023 }, { "epoch": 0.23427053224723365, "grad_norm": 0.7141970004822794, "learning_rate": 1.7016707218167076e-05, "loss": 0.6416, "step": 8024 }, { "epoch": 0.23429972847508101, "grad_norm": 0.7289221890376488, "learning_rate": 1.7016058394160585e-05, "loss": 0.6483, "step": 8025 }, { "epoch": 0.23432892470292838, "grad_norm": 0.6430400582116972, "learning_rate": 1.7015409570154097e-05, "loss": 0.5564, "step": 8026 }, { "epoch": 0.23435812093077574, "grad_norm": 0.6644403857436019, "learning_rate": 1.701476074614761e-05, "loss": 0.6406, "step": 8027 }, { "epoch": 0.2343873171586231, "grad_norm": 0.6582430544249073, "learning_rate": 1.701411192214112e-05, "loss": 0.6725, "step": 8028 }, { "epoch": 0.23441651338647046, "grad_norm": 0.7655256264522553, "learning_rate": 1.7013463098134633e-05, "loss": 0.6797, "step": 8029 }, { "epoch": 0.23444570961431782, "grad_norm": 0.72493955725467, "learning_rate": 1.7012814274128145e-05, "loss": 0.6668, "step": 8030 }, { "epoch": 0.23447490584216518, "grad_norm": 0.7334411064419587, "learning_rate": 1.7012165450121657e-05, "loss": 0.6907, "step": 8031 }, { "epoch": 0.23450410207001254, "grad_norm": 0.6436629765513849, "learning_rate": 1.701151662611517e-05, "loss": 0.5978, "step": 8032 }, { "epoch": 0.2345332982978599, "grad_norm": 0.6115585497689551, "learning_rate": 1.7010867802108678e-05, "loss": 0.5775, "step": 8033 }, { "epoch": 0.23456249452570727, "grad_norm": 0.6279031591954677, "learning_rate": 1.701021897810219e-05, "loss": 0.56, "step": 8034 }, { "epoch": 0.23459169075355463, "grad_norm": 0.7066025040058219, "learning_rate": 1.7009570154095702e-05, "loss": 0.6622, "step": 8035 }, { "epoch": 0.23462088698140202, "grad_norm": 0.6071986976750684, "learning_rate": 1.7008921330089214e-05, "loss": 0.5198, "step": 8036 }, { "epoch": 0.23465008320924938, "grad_norm": 0.7103230434800187, "learning_rate": 1.7008272506082726e-05, "loss": 0.6478, "step": 8037 }, { "epoch": 0.23467927943709674, "grad_norm": 0.7178649549997728, "learning_rate": 1.7007623682076238e-05, "loss": 0.6978, "step": 8038 }, { "epoch": 0.2347084756649441, "grad_norm": 0.680459635147421, "learning_rate": 1.700697485806975e-05, "loss": 0.6717, "step": 8039 }, { "epoch": 0.23473767189279146, "grad_norm": 0.6733479592196805, "learning_rate": 1.7006326034063262e-05, "loss": 0.62, "step": 8040 }, { "epoch": 0.23476686812063882, "grad_norm": 0.6577655437340604, "learning_rate": 1.7005677210056774e-05, "loss": 0.6597, "step": 8041 }, { "epoch": 0.23479606434848618, "grad_norm": 0.7671872480466162, "learning_rate": 1.7005028386050286e-05, "loss": 0.7288, "step": 8042 }, { "epoch": 0.23482526057633354, "grad_norm": 0.7036570399484336, "learning_rate": 1.7004379562043798e-05, "loss": 0.6882, "step": 8043 }, { "epoch": 0.2348544568041809, "grad_norm": 0.6554588183684738, "learning_rate": 1.700373073803731e-05, "loss": 0.672, "step": 8044 }, { "epoch": 0.23488365303202827, "grad_norm": 0.666498470931463, "learning_rate": 1.7003081914030822e-05, "loss": 0.6512, "step": 8045 }, { "epoch": 0.23491284925987563, "grad_norm": 0.707972535505997, "learning_rate": 1.7002433090024334e-05, "loss": 0.6191, "step": 8046 }, { "epoch": 0.234942045487723, "grad_norm": 0.6604415008989207, "learning_rate": 1.7001784266017846e-05, "loss": 0.6758, "step": 8047 }, { "epoch": 0.23497124171557035, "grad_norm": 0.7614630697884001, "learning_rate": 1.7001135442011355e-05, "loss": 0.737, "step": 8048 }, { "epoch": 0.2350004379434177, "grad_norm": 0.7361516748185044, "learning_rate": 1.7000486618004867e-05, "loss": 0.74, "step": 8049 }, { "epoch": 0.23502963417126507, "grad_norm": 0.7460046564872223, "learning_rate": 1.699983779399838e-05, "loss": 0.7464, "step": 8050 }, { "epoch": 0.23505883039911243, "grad_norm": 0.6743548145294551, "learning_rate": 1.699918896999189e-05, "loss": 0.6211, "step": 8051 }, { "epoch": 0.2350880266269598, "grad_norm": 0.7106485662602178, "learning_rate": 1.6998540145985403e-05, "loss": 0.7223, "step": 8052 }, { "epoch": 0.23511722285480716, "grad_norm": 0.6829623953348767, "learning_rate": 1.6997891321978915e-05, "loss": 0.7188, "step": 8053 }, { "epoch": 0.23514641908265452, "grad_norm": 0.636628002519543, "learning_rate": 1.6997242497972427e-05, "loss": 0.6153, "step": 8054 }, { "epoch": 0.23517561531050188, "grad_norm": 0.6653665368617806, "learning_rate": 1.699659367396594e-05, "loss": 0.6367, "step": 8055 }, { "epoch": 0.23520481153834924, "grad_norm": 0.7414478192609253, "learning_rate": 1.699594484995945e-05, "loss": 0.7508, "step": 8056 }, { "epoch": 0.2352340077661966, "grad_norm": 0.6623336168625551, "learning_rate": 1.699529602595296e-05, "loss": 0.6352, "step": 8057 }, { "epoch": 0.23526320399404396, "grad_norm": 0.6757931211226632, "learning_rate": 1.699464720194647e-05, "loss": 0.6699, "step": 8058 }, { "epoch": 0.23529240022189132, "grad_norm": 0.6979707419779135, "learning_rate": 1.6993998377939984e-05, "loss": 0.6247, "step": 8059 }, { "epoch": 0.23532159644973868, "grad_norm": 0.7093647526599298, "learning_rate": 1.6993349553933496e-05, "loss": 0.6092, "step": 8060 }, { "epoch": 0.23535079267758605, "grad_norm": 0.684892776564269, "learning_rate": 1.699270072992701e-05, "loss": 0.708, "step": 8061 }, { "epoch": 0.2353799889054334, "grad_norm": 0.7182652659589732, "learning_rate": 1.699205190592052e-05, "loss": 0.6657, "step": 8062 }, { "epoch": 0.23540918513328077, "grad_norm": 0.6665632063475181, "learning_rate": 1.699140308191403e-05, "loss": 0.6264, "step": 8063 }, { "epoch": 0.23543838136112813, "grad_norm": 0.6849646880172074, "learning_rate": 1.6990754257907544e-05, "loss": 0.6329, "step": 8064 }, { "epoch": 0.2354675775889755, "grad_norm": 0.7154107587166849, "learning_rate": 1.6990105433901056e-05, "loss": 0.6269, "step": 8065 }, { "epoch": 0.23549677381682288, "grad_norm": 0.6753968763570267, "learning_rate": 1.6989456609894568e-05, "loss": 0.6339, "step": 8066 }, { "epoch": 0.23552597004467024, "grad_norm": 0.7003602546776742, "learning_rate": 1.698880778588808e-05, "loss": 0.7073, "step": 8067 }, { "epoch": 0.2355551662725176, "grad_norm": 0.6134001396083132, "learning_rate": 1.6988158961881592e-05, "loss": 0.5923, "step": 8068 }, { "epoch": 0.23558436250036496, "grad_norm": 0.6665849961386228, "learning_rate": 1.6987510137875104e-05, "loss": 0.6419, "step": 8069 }, { "epoch": 0.23561355872821232, "grad_norm": 0.6369942937294831, "learning_rate": 1.6986861313868616e-05, "loss": 0.6453, "step": 8070 }, { "epoch": 0.23564275495605969, "grad_norm": 0.6682836968129057, "learning_rate": 1.6986212489862125e-05, "loss": 0.6082, "step": 8071 }, { "epoch": 0.23567195118390705, "grad_norm": 0.6853872385111744, "learning_rate": 1.6985563665855637e-05, "loss": 0.6469, "step": 8072 }, { "epoch": 0.2357011474117544, "grad_norm": 0.640381471273155, "learning_rate": 1.698491484184915e-05, "loss": 0.6031, "step": 8073 }, { "epoch": 0.23573034363960177, "grad_norm": 0.7410419028144932, "learning_rate": 1.698426601784266e-05, "loss": 0.7501, "step": 8074 }, { "epoch": 0.23575953986744913, "grad_norm": 0.6872536665257387, "learning_rate": 1.6983617193836173e-05, "loss": 0.63, "step": 8075 }, { "epoch": 0.2357887360952965, "grad_norm": 0.6672369456953836, "learning_rate": 1.6982968369829685e-05, "loss": 0.6002, "step": 8076 }, { "epoch": 0.23581793232314385, "grad_norm": 0.7560844995562347, "learning_rate": 1.6982319545823197e-05, "loss": 0.704, "step": 8077 }, { "epoch": 0.23584712855099121, "grad_norm": 0.6777963156677423, "learning_rate": 1.698167072181671e-05, "loss": 0.6763, "step": 8078 }, { "epoch": 0.23587632477883858, "grad_norm": 0.6400500050112508, "learning_rate": 1.698102189781022e-05, "loss": 0.6378, "step": 8079 }, { "epoch": 0.23590552100668594, "grad_norm": 0.7072056227695062, "learning_rate": 1.6980373073803733e-05, "loss": 0.6839, "step": 8080 }, { "epoch": 0.2359347172345333, "grad_norm": 0.6996092997483289, "learning_rate": 1.6979724249797245e-05, "loss": 0.7118, "step": 8081 }, { "epoch": 0.23596391346238066, "grad_norm": 0.690101649855634, "learning_rate": 1.6979075425790757e-05, "loss": 0.692, "step": 8082 }, { "epoch": 0.23599310969022802, "grad_norm": 0.7162043496532166, "learning_rate": 1.697842660178427e-05, "loss": 0.7115, "step": 8083 }, { "epoch": 0.23602230591807538, "grad_norm": 0.6520917529178271, "learning_rate": 1.697777777777778e-05, "loss": 0.5822, "step": 8084 }, { "epoch": 0.23605150214592274, "grad_norm": 0.7498616692979809, "learning_rate": 1.6977128953771293e-05, "loss": 0.7179, "step": 8085 }, { "epoch": 0.2360806983737701, "grad_norm": 0.7067657747907474, "learning_rate": 1.69764801297648e-05, "loss": 0.6863, "step": 8086 }, { "epoch": 0.23610989460161746, "grad_norm": 0.676293438451832, "learning_rate": 1.6975831305758314e-05, "loss": 0.6928, "step": 8087 }, { "epoch": 0.23613909082946483, "grad_norm": 0.691394270455572, "learning_rate": 1.6975182481751826e-05, "loss": 0.7467, "step": 8088 }, { "epoch": 0.2361682870573122, "grad_norm": 0.6839270342658361, "learning_rate": 1.6974533657745338e-05, "loss": 0.6502, "step": 8089 }, { "epoch": 0.23619748328515955, "grad_norm": 0.7199993947292767, "learning_rate": 1.697388483373885e-05, "loss": 0.7778, "step": 8090 }, { "epoch": 0.2362266795130069, "grad_norm": 0.6776726060692289, "learning_rate": 1.697323600973236e-05, "loss": 0.6448, "step": 8091 }, { "epoch": 0.23625587574085427, "grad_norm": 0.681596176005117, "learning_rate": 1.6972587185725874e-05, "loss": 0.6253, "step": 8092 }, { "epoch": 0.23628507196870163, "grad_norm": 0.6885169104959387, "learning_rate": 1.6971938361719386e-05, "loss": 0.6958, "step": 8093 }, { "epoch": 0.236314268196549, "grad_norm": 0.7395953739476494, "learning_rate": 1.6971289537712898e-05, "loss": 0.6716, "step": 8094 }, { "epoch": 0.23634346442439635, "grad_norm": 0.64270233389918, "learning_rate": 1.6970640713706406e-05, "loss": 0.6007, "step": 8095 }, { "epoch": 0.23637266065224374, "grad_norm": 0.6298019173317037, "learning_rate": 1.696999188969992e-05, "loss": 0.5508, "step": 8096 }, { "epoch": 0.2364018568800911, "grad_norm": 0.6762342649585852, "learning_rate": 1.696934306569343e-05, "loss": 0.6198, "step": 8097 }, { "epoch": 0.23643105310793847, "grad_norm": 0.7388654245717811, "learning_rate": 1.6968694241686946e-05, "loss": 0.7797, "step": 8098 }, { "epoch": 0.23646024933578583, "grad_norm": 0.7075511610771695, "learning_rate": 1.6968045417680458e-05, "loss": 0.6041, "step": 8099 }, { "epoch": 0.2364894455636332, "grad_norm": 0.6737466662787764, "learning_rate": 1.6967396593673967e-05, "loss": 0.667, "step": 8100 }, { "epoch": 0.23651864179148055, "grad_norm": 0.7084981454807632, "learning_rate": 1.696674776966748e-05, "loss": 0.6943, "step": 8101 }, { "epoch": 0.2365478380193279, "grad_norm": 0.7076067397360036, "learning_rate": 1.696609894566099e-05, "loss": 0.6424, "step": 8102 }, { "epoch": 0.23657703424717527, "grad_norm": 0.6650991859965699, "learning_rate": 1.6965450121654503e-05, "loss": 0.5903, "step": 8103 }, { "epoch": 0.23660623047502263, "grad_norm": 0.6587848918939866, "learning_rate": 1.6964801297648015e-05, "loss": 0.6938, "step": 8104 }, { "epoch": 0.23663542670287, "grad_norm": 0.6646299857412948, "learning_rate": 1.6964152473641527e-05, "loss": 0.6012, "step": 8105 }, { "epoch": 0.23666462293071736, "grad_norm": 0.6502889522755427, "learning_rate": 1.696350364963504e-05, "loss": 0.6348, "step": 8106 }, { "epoch": 0.23669381915856472, "grad_norm": 0.6633066905812567, "learning_rate": 1.696285482562855e-05, "loss": 0.5995, "step": 8107 }, { "epoch": 0.23672301538641208, "grad_norm": 0.811184641148763, "learning_rate": 1.6962206001622063e-05, "loss": 0.6726, "step": 8108 }, { "epoch": 0.23675221161425944, "grad_norm": 0.6835745132347821, "learning_rate": 1.696155717761557e-05, "loss": 0.6932, "step": 8109 }, { "epoch": 0.2367814078421068, "grad_norm": 0.6213439570516949, "learning_rate": 1.6960908353609083e-05, "loss": 0.5761, "step": 8110 }, { "epoch": 0.23681060406995416, "grad_norm": 0.69568428731162, "learning_rate": 1.6960259529602595e-05, "loss": 0.6699, "step": 8111 }, { "epoch": 0.23683980029780152, "grad_norm": 0.7621642221138543, "learning_rate": 1.6959610705596107e-05, "loss": 0.8049, "step": 8112 }, { "epoch": 0.23686899652564888, "grad_norm": 0.694669549142807, "learning_rate": 1.695896188158962e-05, "loss": 0.6675, "step": 8113 }, { "epoch": 0.23689819275349625, "grad_norm": 0.6591817936868559, "learning_rate": 1.695831305758313e-05, "loss": 0.6042, "step": 8114 }, { "epoch": 0.2369273889813436, "grad_norm": 0.6752466354444491, "learning_rate": 1.6957664233576644e-05, "loss": 0.6883, "step": 8115 }, { "epoch": 0.23695658520919097, "grad_norm": 0.6194456063663774, "learning_rate": 1.6957015409570156e-05, "loss": 0.5907, "step": 8116 }, { "epoch": 0.23698578143703833, "grad_norm": 0.69008413177047, "learning_rate": 1.6956366585563668e-05, "loss": 0.6446, "step": 8117 }, { "epoch": 0.2370149776648857, "grad_norm": 0.622264479982457, "learning_rate": 1.695571776155718e-05, "loss": 0.604, "step": 8118 }, { "epoch": 0.23704417389273305, "grad_norm": 0.6971682410526175, "learning_rate": 1.695506893755069e-05, "loss": 0.6914, "step": 8119 }, { "epoch": 0.2370733701205804, "grad_norm": 0.5998654201195833, "learning_rate": 1.6954420113544204e-05, "loss": 0.5541, "step": 8120 }, { "epoch": 0.23710256634842777, "grad_norm": 0.6353074323744665, "learning_rate": 1.6953771289537716e-05, "loss": 0.6235, "step": 8121 }, { "epoch": 0.23713176257627513, "grad_norm": 0.5704808804797722, "learning_rate": 1.6953122465531228e-05, "loss": 0.4891, "step": 8122 }, { "epoch": 0.2371609588041225, "grad_norm": 0.7361218169352179, "learning_rate": 1.695247364152474e-05, "loss": 0.7291, "step": 8123 }, { "epoch": 0.23719015503196986, "grad_norm": 0.7408735694662807, "learning_rate": 1.695182481751825e-05, "loss": 0.7167, "step": 8124 }, { "epoch": 0.23721935125981722, "grad_norm": 0.6360574954872357, "learning_rate": 1.695117599351176e-05, "loss": 0.5902, "step": 8125 }, { "epoch": 0.2372485474876646, "grad_norm": 0.6733995689299515, "learning_rate": 1.6950527169505272e-05, "loss": 0.6759, "step": 8126 }, { "epoch": 0.23727774371551197, "grad_norm": 0.7383637261526712, "learning_rate": 1.6949878345498784e-05, "loss": 0.7157, "step": 8127 }, { "epoch": 0.23730693994335933, "grad_norm": 0.6789753110883093, "learning_rate": 1.6949229521492297e-05, "loss": 0.6238, "step": 8128 }, { "epoch": 0.2373361361712067, "grad_norm": 0.6550700365373087, "learning_rate": 1.694858069748581e-05, "loss": 0.5834, "step": 8129 }, { "epoch": 0.23736533239905405, "grad_norm": 0.6522198602629046, "learning_rate": 1.694793187347932e-05, "loss": 0.6148, "step": 8130 }, { "epoch": 0.2373945286269014, "grad_norm": 0.6935843747429846, "learning_rate": 1.6947283049472833e-05, "loss": 0.657, "step": 8131 }, { "epoch": 0.23742372485474877, "grad_norm": 0.6906670698371156, "learning_rate": 1.6946634225466345e-05, "loss": 0.6821, "step": 8132 }, { "epoch": 0.23745292108259614, "grad_norm": 0.691330996850899, "learning_rate": 1.6945985401459853e-05, "loss": 0.7198, "step": 8133 }, { "epoch": 0.2374821173104435, "grad_norm": 0.6793153216601379, "learning_rate": 1.6945336577453365e-05, "loss": 0.6418, "step": 8134 }, { "epoch": 0.23751131353829086, "grad_norm": 0.7188018630288341, "learning_rate": 1.6944687753446877e-05, "loss": 0.6719, "step": 8135 }, { "epoch": 0.23754050976613822, "grad_norm": 0.6334338916519405, "learning_rate": 1.6944038929440393e-05, "loss": 0.566, "step": 8136 }, { "epoch": 0.23756970599398558, "grad_norm": 0.6290384127639959, "learning_rate": 1.6943390105433905e-05, "loss": 0.6263, "step": 8137 }, { "epoch": 0.23759890222183294, "grad_norm": 0.7375304269953885, "learning_rate": 1.6942741281427413e-05, "loss": 0.6918, "step": 8138 }, { "epoch": 0.2376280984496803, "grad_norm": 0.6480656765238741, "learning_rate": 1.6942092457420925e-05, "loss": 0.5786, "step": 8139 }, { "epoch": 0.23765729467752766, "grad_norm": 0.6453638778910351, "learning_rate": 1.6941443633414437e-05, "loss": 0.6132, "step": 8140 }, { "epoch": 0.23768649090537503, "grad_norm": 0.6417134769516298, "learning_rate": 1.694079480940795e-05, "loss": 0.6467, "step": 8141 }, { "epoch": 0.2377156871332224, "grad_norm": 0.6032570072839661, "learning_rate": 1.694014598540146e-05, "loss": 0.5463, "step": 8142 }, { "epoch": 0.23774488336106975, "grad_norm": 0.6751800595727603, "learning_rate": 1.6939497161394974e-05, "loss": 0.6781, "step": 8143 }, { "epoch": 0.2377740795889171, "grad_norm": 0.6119139711883036, "learning_rate": 1.6938848337388486e-05, "loss": 0.5824, "step": 8144 }, { "epoch": 0.23780327581676447, "grad_norm": 0.8056093505967824, "learning_rate": 1.6938199513381998e-05, "loss": 0.668, "step": 8145 }, { "epoch": 0.23783247204461183, "grad_norm": 0.6573730103356435, "learning_rate": 1.693755068937551e-05, "loss": 0.6247, "step": 8146 }, { "epoch": 0.2378616682724592, "grad_norm": 0.679298403548588, "learning_rate": 1.6936901865369018e-05, "loss": 0.7045, "step": 8147 }, { "epoch": 0.23789086450030655, "grad_norm": 0.6314643690720796, "learning_rate": 1.693625304136253e-05, "loss": 0.605, "step": 8148 }, { "epoch": 0.23792006072815391, "grad_norm": 0.6564774672320964, "learning_rate": 1.6935604217356042e-05, "loss": 0.6268, "step": 8149 }, { "epoch": 0.23794925695600128, "grad_norm": 0.6068393496553665, "learning_rate": 1.6934955393349554e-05, "loss": 0.5926, "step": 8150 }, { "epoch": 0.23797845318384864, "grad_norm": 0.6308579664119884, "learning_rate": 1.6934306569343066e-05, "loss": 0.5701, "step": 8151 }, { "epoch": 0.238007649411696, "grad_norm": 0.6647316017230986, "learning_rate": 1.693365774533658e-05, "loss": 0.6635, "step": 8152 }, { "epoch": 0.23803684563954336, "grad_norm": 0.6521222595677388, "learning_rate": 1.693300892133009e-05, "loss": 0.6582, "step": 8153 }, { "epoch": 0.23806604186739072, "grad_norm": 0.737931654243514, "learning_rate": 1.6932360097323602e-05, "loss": 0.7349, "step": 8154 }, { "epoch": 0.23809523809523808, "grad_norm": 0.7264917447630787, "learning_rate": 1.6931711273317114e-05, "loss": 0.6612, "step": 8155 }, { "epoch": 0.23812443432308547, "grad_norm": 0.7197679773431611, "learning_rate": 1.6931062449310626e-05, "loss": 0.6716, "step": 8156 }, { "epoch": 0.23815363055093283, "grad_norm": 0.669583060293528, "learning_rate": 1.693041362530414e-05, "loss": 0.6058, "step": 8157 }, { "epoch": 0.2381828267787802, "grad_norm": 0.6803413543729538, "learning_rate": 1.692976480129765e-05, "loss": 0.6375, "step": 8158 }, { "epoch": 0.23821202300662755, "grad_norm": 0.6302073389933753, "learning_rate": 1.6929115977291163e-05, "loss": 0.6172, "step": 8159 }, { "epoch": 0.23824121923447492, "grad_norm": 0.720806831985892, "learning_rate": 1.6928467153284675e-05, "loss": 0.671, "step": 8160 }, { "epoch": 0.23827041546232228, "grad_norm": 0.6278850955217823, "learning_rate": 1.6927818329278187e-05, "loss": 0.5875, "step": 8161 }, { "epoch": 0.23829961169016964, "grad_norm": 0.6566194119719502, "learning_rate": 1.6927169505271695e-05, "loss": 0.5936, "step": 8162 }, { "epoch": 0.238328807918017, "grad_norm": 0.655320225010279, "learning_rate": 1.6926520681265207e-05, "loss": 0.65, "step": 8163 }, { "epoch": 0.23835800414586436, "grad_norm": 0.6912654122757522, "learning_rate": 1.692587185725872e-05, "loss": 0.6671, "step": 8164 }, { "epoch": 0.23838720037371172, "grad_norm": 0.6207138716424908, "learning_rate": 1.692522303325223e-05, "loss": 0.5789, "step": 8165 }, { "epoch": 0.23841639660155908, "grad_norm": 0.6710498948605921, "learning_rate": 1.6924574209245743e-05, "loss": 0.6178, "step": 8166 }, { "epoch": 0.23844559282940644, "grad_norm": 0.686970850189392, "learning_rate": 1.6923925385239255e-05, "loss": 0.6605, "step": 8167 }, { "epoch": 0.2384747890572538, "grad_norm": 0.6334175130503776, "learning_rate": 1.6923276561232767e-05, "loss": 0.5614, "step": 8168 }, { "epoch": 0.23850398528510117, "grad_norm": 0.6422735430929174, "learning_rate": 1.692262773722628e-05, "loss": 0.6122, "step": 8169 }, { "epoch": 0.23853318151294853, "grad_norm": 0.666626370524777, "learning_rate": 1.692197891321979e-05, "loss": 0.6911, "step": 8170 }, { "epoch": 0.2385623777407959, "grad_norm": 0.6494065708395939, "learning_rate": 1.69213300892133e-05, "loss": 0.614, "step": 8171 }, { "epoch": 0.23859157396864325, "grad_norm": 0.6956115269610664, "learning_rate": 1.6920681265206812e-05, "loss": 0.64, "step": 8172 }, { "epoch": 0.2386207701964906, "grad_norm": 0.6301295515871402, "learning_rate": 1.6920032441200324e-05, "loss": 0.5562, "step": 8173 }, { "epoch": 0.23864996642433797, "grad_norm": 0.6712239965741564, "learning_rate": 1.691938361719384e-05, "loss": 0.674, "step": 8174 }, { "epoch": 0.23867916265218533, "grad_norm": 0.7022871931451771, "learning_rate": 1.691873479318735e-05, "loss": 0.7068, "step": 8175 }, { "epoch": 0.2387083588800327, "grad_norm": 0.6295136319417486, "learning_rate": 1.691808596918086e-05, "loss": 0.5411, "step": 8176 }, { "epoch": 0.23873755510788006, "grad_norm": 0.7015720181859436, "learning_rate": 1.6917437145174372e-05, "loss": 0.7136, "step": 8177 }, { "epoch": 0.23876675133572742, "grad_norm": 0.6535220195033032, "learning_rate": 1.6916788321167884e-05, "loss": 0.6798, "step": 8178 }, { "epoch": 0.23879594756357478, "grad_norm": 0.7041995854953211, "learning_rate": 1.6916139497161396e-05, "loss": 0.71, "step": 8179 }, { "epoch": 0.23882514379142214, "grad_norm": 0.6425055079364728, "learning_rate": 1.691549067315491e-05, "loss": 0.5774, "step": 8180 }, { "epoch": 0.2388543400192695, "grad_norm": 0.6633492265665248, "learning_rate": 1.691484184914842e-05, "loss": 0.6723, "step": 8181 }, { "epoch": 0.23888353624711686, "grad_norm": 0.6641195666718711, "learning_rate": 1.6914193025141932e-05, "loss": 0.6587, "step": 8182 }, { "epoch": 0.23891273247496422, "grad_norm": 0.6634262167956969, "learning_rate": 1.6913544201135444e-05, "loss": 0.638, "step": 8183 }, { "epoch": 0.23894192870281158, "grad_norm": 0.7195290384589966, "learning_rate": 1.6912895377128956e-05, "loss": 0.718, "step": 8184 }, { "epoch": 0.23897112493065895, "grad_norm": 0.643903714650576, "learning_rate": 1.6912246553122465e-05, "loss": 0.6588, "step": 8185 }, { "epoch": 0.23900032115850633, "grad_norm": 0.8971768990356984, "learning_rate": 1.6911597729115977e-05, "loss": 0.7514, "step": 8186 }, { "epoch": 0.2390295173863537, "grad_norm": 0.6585497478481245, "learning_rate": 1.691094890510949e-05, "loss": 0.6692, "step": 8187 }, { "epoch": 0.23905871361420106, "grad_norm": 0.6607541945811285, "learning_rate": 1.6910300081103e-05, "loss": 0.6426, "step": 8188 }, { "epoch": 0.23908790984204842, "grad_norm": 0.6944000041726521, "learning_rate": 1.6909651257096513e-05, "loss": 0.6752, "step": 8189 }, { "epoch": 0.23911710606989578, "grad_norm": 0.6939059686064791, "learning_rate": 1.6909002433090025e-05, "loss": 0.6435, "step": 8190 }, { "epoch": 0.23914630229774314, "grad_norm": 0.6384788220255506, "learning_rate": 1.6908353609083537e-05, "loss": 0.6118, "step": 8191 }, { "epoch": 0.2391754985255905, "grad_norm": 0.6376308645237374, "learning_rate": 1.690770478507705e-05, "loss": 0.6333, "step": 8192 }, { "epoch": 0.23920469475343786, "grad_norm": 0.6725782979486921, "learning_rate": 1.690705596107056e-05, "loss": 0.6474, "step": 8193 }, { "epoch": 0.23923389098128522, "grad_norm": 0.6442822976525855, "learning_rate": 1.6906407137064073e-05, "loss": 0.6049, "step": 8194 }, { "epoch": 0.23926308720913259, "grad_norm": 0.6962308190873504, "learning_rate": 1.6905758313057585e-05, "loss": 0.6829, "step": 8195 }, { "epoch": 0.23929228343697995, "grad_norm": 0.6425657808106269, "learning_rate": 1.6905109489051097e-05, "loss": 0.6159, "step": 8196 }, { "epoch": 0.2393214796648273, "grad_norm": 0.7031056997257833, "learning_rate": 1.690446066504461e-05, "loss": 0.667, "step": 8197 }, { "epoch": 0.23935067589267467, "grad_norm": 0.7683272629355365, "learning_rate": 1.690381184103812e-05, "loss": 0.715, "step": 8198 }, { "epoch": 0.23937987212052203, "grad_norm": 0.6452104679443648, "learning_rate": 1.6903163017031634e-05, "loss": 0.6265, "step": 8199 }, { "epoch": 0.2394090683483694, "grad_norm": 0.6799556624919841, "learning_rate": 1.6902514193025142e-05, "loss": 0.6431, "step": 8200 }, { "epoch": 0.23943826457621675, "grad_norm": 0.7218114014690225, "learning_rate": 1.6901865369018654e-05, "loss": 0.731, "step": 8201 }, { "epoch": 0.23946746080406411, "grad_norm": 0.6822193838107716, "learning_rate": 1.6901216545012166e-05, "loss": 0.6131, "step": 8202 }, { "epoch": 0.23949665703191148, "grad_norm": 0.6425583281684434, "learning_rate": 1.6900567721005678e-05, "loss": 0.6115, "step": 8203 }, { "epoch": 0.23952585325975884, "grad_norm": 0.7273650378578103, "learning_rate": 1.689991889699919e-05, "loss": 0.7181, "step": 8204 }, { "epoch": 0.2395550494876062, "grad_norm": 0.6654306226231725, "learning_rate": 1.6899270072992702e-05, "loss": 0.623, "step": 8205 }, { "epoch": 0.23958424571545356, "grad_norm": 0.6614509708262388, "learning_rate": 1.6898621248986214e-05, "loss": 0.536, "step": 8206 }, { "epoch": 0.23961344194330092, "grad_norm": 0.712185217508956, "learning_rate": 1.6897972424979726e-05, "loss": 0.659, "step": 8207 }, { "epoch": 0.23964263817114828, "grad_norm": 0.6791810662432021, "learning_rate": 1.6897323600973235e-05, "loss": 0.6483, "step": 8208 }, { "epoch": 0.23967183439899564, "grad_norm": 0.6676443133197443, "learning_rate": 1.6896674776966747e-05, "loss": 0.6264, "step": 8209 }, { "epoch": 0.239701030626843, "grad_norm": 0.6841380027016111, "learning_rate": 1.689602595296026e-05, "loss": 0.656, "step": 8210 }, { "epoch": 0.23973022685469036, "grad_norm": 1.039309080667362, "learning_rate": 1.689537712895377e-05, "loss": 0.6353, "step": 8211 }, { "epoch": 0.23975942308253773, "grad_norm": 0.6575769592691708, "learning_rate": 1.6894728304947286e-05, "loss": 0.6242, "step": 8212 }, { "epoch": 0.2397886193103851, "grad_norm": 0.6347647489730753, "learning_rate": 1.68940794809408e-05, "loss": 0.6002, "step": 8213 }, { "epoch": 0.23981781553823245, "grad_norm": 0.698158669399008, "learning_rate": 1.6893430656934307e-05, "loss": 0.681, "step": 8214 }, { "epoch": 0.2398470117660798, "grad_norm": 0.6263513840846922, "learning_rate": 1.689278183292782e-05, "loss": 0.6177, "step": 8215 }, { "epoch": 0.23987620799392717, "grad_norm": 0.6879213580070149, "learning_rate": 1.689213300892133e-05, "loss": 0.6506, "step": 8216 }, { "epoch": 0.23990540422177456, "grad_norm": 0.6507809708826239, "learning_rate": 1.6891484184914843e-05, "loss": 0.6351, "step": 8217 }, { "epoch": 0.23993460044962192, "grad_norm": 0.6534762965937635, "learning_rate": 1.6890835360908355e-05, "loss": 0.6153, "step": 8218 }, { "epoch": 0.23996379667746928, "grad_norm": 0.707369530383296, "learning_rate": 1.6890186536901867e-05, "loss": 0.6937, "step": 8219 }, { "epoch": 0.23999299290531664, "grad_norm": 0.6777621467844274, "learning_rate": 1.688953771289538e-05, "loss": 0.6578, "step": 8220 }, { "epoch": 0.240022189133164, "grad_norm": 0.6917275860565333, "learning_rate": 1.688888888888889e-05, "loss": 0.6814, "step": 8221 }, { "epoch": 0.24005138536101137, "grad_norm": 0.6171633753520898, "learning_rate": 1.6888240064882403e-05, "loss": 0.603, "step": 8222 }, { "epoch": 0.24008058158885873, "grad_norm": 0.7765123062360496, "learning_rate": 1.6887591240875912e-05, "loss": 0.7767, "step": 8223 }, { "epoch": 0.2401097778167061, "grad_norm": 0.6655450985620236, "learning_rate": 1.6886942416869424e-05, "loss": 0.6606, "step": 8224 }, { "epoch": 0.24013897404455345, "grad_norm": 0.7253977653847031, "learning_rate": 1.6886293592862936e-05, "loss": 0.7308, "step": 8225 }, { "epoch": 0.2401681702724008, "grad_norm": 0.6654053456989835, "learning_rate": 1.6885644768856448e-05, "loss": 0.6441, "step": 8226 }, { "epoch": 0.24019736650024817, "grad_norm": 0.646532697782869, "learning_rate": 1.688499594484996e-05, "loss": 0.5923, "step": 8227 }, { "epoch": 0.24022656272809553, "grad_norm": 0.7030407240027552, "learning_rate": 1.6884347120843472e-05, "loss": 0.6702, "step": 8228 }, { "epoch": 0.2402557589559429, "grad_norm": 0.6808367285337364, "learning_rate": 1.6883698296836984e-05, "loss": 0.6931, "step": 8229 }, { "epoch": 0.24028495518379026, "grad_norm": 0.6416385517374805, "learning_rate": 1.6883049472830496e-05, "loss": 0.6215, "step": 8230 }, { "epoch": 0.24031415141163762, "grad_norm": 0.687481229941495, "learning_rate": 1.6882400648824008e-05, "loss": 0.7221, "step": 8231 }, { "epoch": 0.24034334763948498, "grad_norm": 0.6669454280446262, "learning_rate": 1.688175182481752e-05, "loss": 0.6386, "step": 8232 }, { "epoch": 0.24037254386733234, "grad_norm": 0.6220127250752939, "learning_rate": 1.6881103000811032e-05, "loss": 0.5535, "step": 8233 }, { "epoch": 0.2404017400951797, "grad_norm": 0.6064199560090074, "learning_rate": 1.6880454176804544e-05, "loss": 0.5289, "step": 8234 }, { "epoch": 0.24043093632302706, "grad_norm": 0.6806596679367378, "learning_rate": 1.6879805352798056e-05, "loss": 0.7077, "step": 8235 }, { "epoch": 0.24046013255087442, "grad_norm": 0.9398747413538372, "learning_rate": 1.687915652879157e-05, "loss": 0.7608, "step": 8236 }, { "epoch": 0.24048932877872178, "grad_norm": 0.6593565339769251, "learning_rate": 1.687850770478508e-05, "loss": 0.6277, "step": 8237 }, { "epoch": 0.24051852500656914, "grad_norm": 0.7203879434253564, "learning_rate": 1.687785888077859e-05, "loss": 0.7352, "step": 8238 }, { "epoch": 0.2405477212344165, "grad_norm": 0.7304572913119521, "learning_rate": 1.68772100567721e-05, "loss": 0.7028, "step": 8239 }, { "epoch": 0.24057691746226387, "grad_norm": 0.7100685616607626, "learning_rate": 1.6876561232765613e-05, "loss": 0.619, "step": 8240 }, { "epoch": 0.24060611369011123, "grad_norm": 0.6648608467902443, "learning_rate": 1.6875912408759125e-05, "loss": 0.6012, "step": 8241 }, { "epoch": 0.2406353099179586, "grad_norm": 0.7076164103913725, "learning_rate": 1.6875263584752637e-05, "loss": 0.7204, "step": 8242 }, { "epoch": 0.24066450614580595, "grad_norm": 0.6640443551201883, "learning_rate": 1.687461476074615e-05, "loss": 0.6008, "step": 8243 }, { "epoch": 0.2406937023736533, "grad_norm": 0.6534624161799205, "learning_rate": 1.687396593673966e-05, "loss": 0.6489, "step": 8244 }, { "epoch": 0.24072289860150067, "grad_norm": 0.6562049534842168, "learning_rate": 1.6873317112733173e-05, "loss": 0.6305, "step": 8245 }, { "epoch": 0.24075209482934803, "grad_norm": 0.6619217727524156, "learning_rate": 1.6872668288726682e-05, "loss": 0.6498, "step": 8246 }, { "epoch": 0.24078129105719542, "grad_norm": 0.7086689458011164, "learning_rate": 1.6872019464720194e-05, "loss": 0.7169, "step": 8247 }, { "epoch": 0.24081048728504278, "grad_norm": 0.7280332468395005, "learning_rate": 1.6871370640713706e-05, "loss": 0.6738, "step": 8248 }, { "epoch": 0.24083968351289015, "grad_norm": 0.697552383982276, "learning_rate": 1.687072181670722e-05, "loss": 0.6574, "step": 8249 }, { "epoch": 0.2408688797407375, "grad_norm": 0.6574115924840839, "learning_rate": 1.6870072992700733e-05, "loss": 0.6308, "step": 8250 }, { "epoch": 0.24089807596858487, "grad_norm": 0.6989565432646564, "learning_rate": 1.6869424168694245e-05, "loss": 0.7119, "step": 8251 }, { "epoch": 0.24092727219643223, "grad_norm": 0.6581160835848449, "learning_rate": 1.6868775344687754e-05, "loss": 0.5857, "step": 8252 }, { "epoch": 0.2409564684242796, "grad_norm": 0.6490099667175546, "learning_rate": 1.6868126520681266e-05, "loss": 0.5862, "step": 8253 }, { "epoch": 0.24098566465212695, "grad_norm": 0.7041559248233257, "learning_rate": 1.6867477696674778e-05, "loss": 0.6369, "step": 8254 }, { "epoch": 0.2410148608799743, "grad_norm": 0.6956365357637379, "learning_rate": 1.686682887266829e-05, "loss": 0.6681, "step": 8255 }, { "epoch": 0.24104405710782167, "grad_norm": 0.6464086707224124, "learning_rate": 1.6866180048661802e-05, "loss": 0.6007, "step": 8256 }, { "epoch": 0.24107325333566904, "grad_norm": 0.6689742893475965, "learning_rate": 1.6865531224655314e-05, "loss": 0.6533, "step": 8257 }, { "epoch": 0.2411024495635164, "grad_norm": 0.7054006584768663, "learning_rate": 1.6864882400648826e-05, "loss": 0.693, "step": 8258 }, { "epoch": 0.24113164579136376, "grad_norm": 0.6656194542655169, "learning_rate": 1.6864233576642338e-05, "loss": 0.5949, "step": 8259 }, { "epoch": 0.24116084201921112, "grad_norm": 0.6531546998291694, "learning_rate": 1.686358475263585e-05, "loss": 0.6116, "step": 8260 }, { "epoch": 0.24119003824705848, "grad_norm": 0.743491957861973, "learning_rate": 1.686293592862936e-05, "loss": 0.7543, "step": 8261 }, { "epoch": 0.24121923447490584, "grad_norm": 0.7019693929055522, "learning_rate": 1.686228710462287e-05, "loss": 0.6878, "step": 8262 }, { "epoch": 0.2412484307027532, "grad_norm": 0.6775154217157456, "learning_rate": 1.6861638280616383e-05, "loss": 0.6469, "step": 8263 }, { "epoch": 0.24127762693060056, "grad_norm": 0.6982922424960022, "learning_rate": 1.6860989456609895e-05, "loss": 0.6893, "step": 8264 }, { "epoch": 0.24130682315844793, "grad_norm": 0.6806693464384505, "learning_rate": 1.6860340632603407e-05, "loss": 0.6696, "step": 8265 }, { "epoch": 0.2413360193862953, "grad_norm": 0.7837388987374898, "learning_rate": 1.685969180859692e-05, "loss": 0.7116, "step": 8266 }, { "epoch": 0.24136521561414265, "grad_norm": 0.699044567284988, "learning_rate": 1.685904298459043e-05, "loss": 0.7026, "step": 8267 }, { "epoch": 0.24139441184199, "grad_norm": 0.6653597131193825, "learning_rate": 1.6858394160583943e-05, "loss": 0.6239, "step": 8268 }, { "epoch": 0.24142360806983737, "grad_norm": 0.6587732322559381, "learning_rate": 1.6857745336577455e-05, "loss": 0.6453, "step": 8269 }, { "epoch": 0.24145280429768473, "grad_norm": 0.6966216277036729, "learning_rate": 1.6857096512570967e-05, "loss": 0.6787, "step": 8270 }, { "epoch": 0.2414820005255321, "grad_norm": 0.6153081602340374, "learning_rate": 1.685644768856448e-05, "loss": 0.6153, "step": 8271 }, { "epoch": 0.24151119675337945, "grad_norm": 0.6709279710920245, "learning_rate": 1.685579886455799e-05, "loss": 0.6774, "step": 8272 }, { "epoch": 0.24154039298122681, "grad_norm": 0.6223988053129591, "learning_rate": 1.6855150040551503e-05, "loss": 0.5864, "step": 8273 }, { "epoch": 0.24156958920907418, "grad_norm": 0.7299371000343668, "learning_rate": 1.6854501216545015e-05, "loss": 0.69, "step": 8274 }, { "epoch": 0.24159878543692154, "grad_norm": 0.6907226451522531, "learning_rate": 1.6853852392538527e-05, "loss": 0.71, "step": 8275 }, { "epoch": 0.2416279816647689, "grad_norm": 0.6671558121366841, "learning_rate": 1.6853203568532036e-05, "loss": 0.6476, "step": 8276 }, { "epoch": 0.2416571778926163, "grad_norm": 0.6441419784185567, "learning_rate": 1.6852554744525548e-05, "loss": 0.6069, "step": 8277 }, { "epoch": 0.24168637412046365, "grad_norm": 0.681671752487693, "learning_rate": 1.685190592051906e-05, "loss": 0.6867, "step": 8278 }, { "epoch": 0.241715570348311, "grad_norm": 0.7103997499723098, "learning_rate": 1.6851257096512572e-05, "loss": 0.7256, "step": 8279 }, { "epoch": 0.24174476657615837, "grad_norm": 0.728468886460275, "learning_rate": 1.6850608272506084e-05, "loss": 0.7144, "step": 8280 }, { "epoch": 0.24177396280400573, "grad_norm": 0.6774994676355272, "learning_rate": 1.6849959448499596e-05, "loss": 0.6442, "step": 8281 }, { "epoch": 0.2418031590318531, "grad_norm": 0.6634759415793521, "learning_rate": 1.6849310624493108e-05, "loss": 0.6509, "step": 8282 }, { "epoch": 0.24183235525970045, "grad_norm": 0.6590497914803977, "learning_rate": 1.684866180048662e-05, "loss": 0.635, "step": 8283 }, { "epoch": 0.24186155148754782, "grad_norm": 0.6574103646509006, "learning_rate": 1.684801297648013e-05, "loss": 0.6338, "step": 8284 }, { "epoch": 0.24189074771539518, "grad_norm": 0.6820167889761645, "learning_rate": 1.684736415247364e-05, "loss": 0.6639, "step": 8285 }, { "epoch": 0.24191994394324254, "grad_norm": 0.6238370111319522, "learning_rate": 1.6846715328467153e-05, "loss": 0.5998, "step": 8286 }, { "epoch": 0.2419491401710899, "grad_norm": 0.6717137872055765, "learning_rate": 1.6846066504460668e-05, "loss": 0.6255, "step": 8287 }, { "epoch": 0.24197833639893726, "grad_norm": 0.6460025284675276, "learning_rate": 1.684541768045418e-05, "loss": 0.6468, "step": 8288 }, { "epoch": 0.24200753262678462, "grad_norm": 0.6551771639112532, "learning_rate": 1.6844768856447692e-05, "loss": 0.6237, "step": 8289 }, { "epoch": 0.24203672885463198, "grad_norm": 0.7239445838607085, "learning_rate": 1.68441200324412e-05, "loss": 0.7676, "step": 8290 }, { "epoch": 0.24206592508247934, "grad_norm": 0.6323649191265827, "learning_rate": 1.6843471208434713e-05, "loss": 0.6011, "step": 8291 }, { "epoch": 0.2420951213103267, "grad_norm": 0.6927360872974386, "learning_rate": 1.6842822384428225e-05, "loss": 0.6291, "step": 8292 }, { "epoch": 0.24212431753817407, "grad_norm": 0.7816323417308235, "learning_rate": 1.6842173560421737e-05, "loss": 0.744, "step": 8293 }, { "epoch": 0.24215351376602143, "grad_norm": 0.7332230948854654, "learning_rate": 1.684152473641525e-05, "loss": 0.7315, "step": 8294 }, { "epoch": 0.2421827099938688, "grad_norm": 0.639835875277794, "learning_rate": 1.684087591240876e-05, "loss": 0.5875, "step": 8295 }, { "epoch": 0.24221190622171615, "grad_norm": 0.6075527835102047, "learning_rate": 1.6840227088402273e-05, "loss": 0.5654, "step": 8296 }, { "epoch": 0.2422411024495635, "grad_norm": 0.6333301373919903, "learning_rate": 1.6839578264395785e-05, "loss": 0.5916, "step": 8297 }, { "epoch": 0.24227029867741087, "grad_norm": 0.9087558021568346, "learning_rate": 1.6838929440389297e-05, "loss": 0.6959, "step": 8298 }, { "epoch": 0.24229949490525823, "grad_norm": 0.6718276442354162, "learning_rate": 1.6838280616382806e-05, "loss": 0.6616, "step": 8299 }, { "epoch": 0.2423286911331056, "grad_norm": 0.6526793440570516, "learning_rate": 1.6837631792376318e-05, "loss": 0.6695, "step": 8300 }, { "epoch": 0.24235788736095296, "grad_norm": 0.706828525395913, "learning_rate": 1.683698296836983e-05, "loss": 0.7332, "step": 8301 }, { "epoch": 0.24238708358880032, "grad_norm": 0.6691940234221467, "learning_rate": 1.6836334144363342e-05, "loss": 0.7068, "step": 8302 }, { "epoch": 0.24241627981664768, "grad_norm": 0.688367593632489, "learning_rate": 1.6835685320356854e-05, "loss": 0.6196, "step": 8303 }, { "epoch": 0.24244547604449504, "grad_norm": 0.671813825218331, "learning_rate": 1.6835036496350366e-05, "loss": 0.6492, "step": 8304 }, { "epoch": 0.2424746722723424, "grad_norm": 0.7092915096870671, "learning_rate": 1.6834387672343878e-05, "loss": 0.6925, "step": 8305 }, { "epoch": 0.24250386850018976, "grad_norm": 0.7465414093711489, "learning_rate": 1.683373884833739e-05, "loss": 0.7355, "step": 8306 }, { "epoch": 0.24253306472803715, "grad_norm": 0.6654958867604668, "learning_rate": 1.6833090024330902e-05, "loss": 0.6673, "step": 8307 }, { "epoch": 0.2425622609558845, "grad_norm": 0.6465290600526795, "learning_rate": 1.6832441200324414e-05, "loss": 0.6244, "step": 8308 }, { "epoch": 0.24259145718373187, "grad_norm": 0.6433318099498491, "learning_rate": 1.6831792376317926e-05, "loss": 0.6182, "step": 8309 }, { "epoch": 0.24262065341157923, "grad_norm": 0.6731073349079627, "learning_rate": 1.6831143552311438e-05, "loss": 0.6943, "step": 8310 }, { "epoch": 0.2426498496394266, "grad_norm": 0.6148252922369521, "learning_rate": 1.683049472830495e-05, "loss": 0.567, "step": 8311 }, { "epoch": 0.24267904586727396, "grad_norm": 0.7547318784765747, "learning_rate": 1.6829845904298462e-05, "loss": 0.7787, "step": 8312 }, { "epoch": 0.24270824209512132, "grad_norm": 0.6989976820866703, "learning_rate": 1.682919708029197e-05, "loss": 0.6081, "step": 8313 }, { "epoch": 0.24273743832296868, "grad_norm": 0.7586643203792032, "learning_rate": 1.6828548256285483e-05, "loss": 0.7785, "step": 8314 }, { "epoch": 0.24276663455081604, "grad_norm": 0.6896345419975097, "learning_rate": 1.6827899432278995e-05, "loss": 0.6853, "step": 8315 }, { "epoch": 0.2427958307786634, "grad_norm": 0.8370168573061285, "learning_rate": 1.6827250608272507e-05, "loss": 0.6409, "step": 8316 }, { "epoch": 0.24282502700651076, "grad_norm": 0.6488023873961694, "learning_rate": 1.682660178426602e-05, "loss": 0.6361, "step": 8317 }, { "epoch": 0.24285422323435812, "grad_norm": 0.6892275298939161, "learning_rate": 1.682595296025953e-05, "loss": 0.6635, "step": 8318 }, { "epoch": 0.24288341946220549, "grad_norm": 0.7209237547239248, "learning_rate": 1.6825304136253043e-05, "loss": 0.6893, "step": 8319 }, { "epoch": 0.24291261569005285, "grad_norm": 0.6866443146844053, "learning_rate": 1.6824655312246555e-05, "loss": 0.6461, "step": 8320 }, { "epoch": 0.2429418119179002, "grad_norm": 0.6543785511536994, "learning_rate": 1.6824006488240067e-05, "loss": 0.6153, "step": 8321 }, { "epoch": 0.24297100814574757, "grad_norm": 0.6485660627672221, "learning_rate": 1.6823357664233576e-05, "loss": 0.5744, "step": 8322 }, { "epoch": 0.24300020437359493, "grad_norm": 0.6529314450368042, "learning_rate": 1.6822708840227088e-05, "loss": 0.6204, "step": 8323 }, { "epoch": 0.2430294006014423, "grad_norm": 0.7935556642241879, "learning_rate": 1.68220600162206e-05, "loss": 0.7363, "step": 8324 }, { "epoch": 0.24305859682928965, "grad_norm": 0.652655289247242, "learning_rate": 1.6821411192214115e-05, "loss": 0.6008, "step": 8325 }, { "epoch": 0.243087793057137, "grad_norm": 0.7189034725708043, "learning_rate": 1.6820762368207627e-05, "loss": 0.7426, "step": 8326 }, { "epoch": 0.24311698928498437, "grad_norm": 0.6838492883472662, "learning_rate": 1.682011354420114e-05, "loss": 0.6278, "step": 8327 }, { "epoch": 0.24314618551283174, "grad_norm": 0.6772245312279735, "learning_rate": 1.6819464720194648e-05, "loss": 0.6857, "step": 8328 }, { "epoch": 0.2431753817406791, "grad_norm": 0.6411722729939586, "learning_rate": 1.681881589618816e-05, "loss": 0.5916, "step": 8329 }, { "epoch": 0.24320457796852646, "grad_norm": 0.6417633970664027, "learning_rate": 1.6818167072181672e-05, "loss": 0.6124, "step": 8330 }, { "epoch": 0.24323377419637382, "grad_norm": 0.7091607420775168, "learning_rate": 1.6817518248175184e-05, "loss": 0.6661, "step": 8331 }, { "epoch": 0.24326297042422118, "grad_norm": 0.622449474241252, "learning_rate": 1.6816869424168696e-05, "loss": 0.5657, "step": 8332 }, { "epoch": 0.24329216665206854, "grad_norm": 0.6724334551068036, "learning_rate": 1.6816220600162208e-05, "loss": 0.6864, "step": 8333 }, { "epoch": 0.2433213628799159, "grad_norm": 0.6803988716137248, "learning_rate": 1.681557177615572e-05, "loss": 0.6271, "step": 8334 }, { "epoch": 0.24335055910776326, "grad_norm": 0.7215310080999978, "learning_rate": 1.6814922952149232e-05, "loss": 0.7169, "step": 8335 }, { "epoch": 0.24337975533561063, "grad_norm": 0.7162921133501707, "learning_rate": 1.6814274128142744e-05, "loss": 0.7201, "step": 8336 }, { "epoch": 0.24340895156345801, "grad_norm": 0.6399501115179163, "learning_rate": 1.6813625304136253e-05, "loss": 0.6018, "step": 8337 }, { "epoch": 0.24343814779130538, "grad_norm": 0.6890279410714018, "learning_rate": 1.6812976480129765e-05, "loss": 0.6614, "step": 8338 }, { "epoch": 0.24346734401915274, "grad_norm": 0.6559891864953022, "learning_rate": 1.6812327656123277e-05, "loss": 0.6135, "step": 8339 }, { "epoch": 0.2434965402470001, "grad_norm": 0.6860442827692346, "learning_rate": 1.681167883211679e-05, "loss": 0.6691, "step": 8340 }, { "epoch": 0.24352573647484746, "grad_norm": 0.7401467838154857, "learning_rate": 1.68110300081103e-05, "loss": 0.7408, "step": 8341 }, { "epoch": 0.24355493270269482, "grad_norm": 0.6630062184678104, "learning_rate": 1.6810381184103813e-05, "loss": 0.6975, "step": 8342 }, { "epoch": 0.24358412893054218, "grad_norm": 0.6283089344943371, "learning_rate": 1.6809732360097325e-05, "loss": 0.5753, "step": 8343 }, { "epoch": 0.24361332515838954, "grad_norm": 0.7202555863973775, "learning_rate": 1.6809083536090837e-05, "loss": 0.6979, "step": 8344 }, { "epoch": 0.2436425213862369, "grad_norm": 0.7153093969470692, "learning_rate": 1.680843471208435e-05, "loss": 0.7333, "step": 8345 }, { "epoch": 0.24367171761408427, "grad_norm": 0.6063975352535302, "learning_rate": 1.680778588807786e-05, "loss": 0.5597, "step": 8346 }, { "epoch": 0.24370091384193163, "grad_norm": 0.6426800920050013, "learning_rate": 1.6807137064071373e-05, "loss": 0.6077, "step": 8347 }, { "epoch": 0.243730110069779, "grad_norm": 0.7342483944429581, "learning_rate": 1.6806488240064885e-05, "loss": 0.7765, "step": 8348 }, { "epoch": 0.24375930629762635, "grad_norm": 0.6819738486231165, "learning_rate": 1.6805839416058397e-05, "loss": 0.6448, "step": 8349 }, { "epoch": 0.2437885025254737, "grad_norm": 0.6285861850850397, "learning_rate": 1.680519059205191e-05, "loss": 0.5864, "step": 8350 }, { "epoch": 0.24381769875332107, "grad_norm": 0.6782953146455973, "learning_rate": 1.6804541768045418e-05, "loss": 0.6662, "step": 8351 }, { "epoch": 0.24384689498116843, "grad_norm": 0.6534513944087279, "learning_rate": 1.680389294403893e-05, "loss": 0.6078, "step": 8352 }, { "epoch": 0.2438760912090158, "grad_norm": 0.637903005886362, "learning_rate": 1.680324412003244e-05, "loss": 0.6149, "step": 8353 }, { "epoch": 0.24390528743686316, "grad_norm": 0.6641402348411733, "learning_rate": 1.6802595296025954e-05, "loss": 0.5873, "step": 8354 }, { "epoch": 0.24393448366471052, "grad_norm": 0.6795371236904816, "learning_rate": 1.6801946472019466e-05, "loss": 0.6727, "step": 8355 }, { "epoch": 0.24396367989255788, "grad_norm": 0.6516869705024917, "learning_rate": 1.6801297648012978e-05, "loss": 0.6316, "step": 8356 }, { "epoch": 0.24399287612040524, "grad_norm": 0.7489232156910619, "learning_rate": 1.680064882400649e-05, "loss": 0.6688, "step": 8357 }, { "epoch": 0.2440220723482526, "grad_norm": 0.6720717313170986, "learning_rate": 1.6800000000000002e-05, "loss": 0.6754, "step": 8358 }, { "epoch": 0.24405126857609996, "grad_norm": 0.6993946879832258, "learning_rate": 1.6799351175993514e-05, "loss": 0.6537, "step": 8359 }, { "epoch": 0.24408046480394732, "grad_norm": 0.6600664944384343, "learning_rate": 1.6798702351987022e-05, "loss": 0.6909, "step": 8360 }, { "epoch": 0.24410966103179468, "grad_norm": 0.6451339559717296, "learning_rate": 1.6798053527980534e-05, "loss": 0.621, "step": 8361 }, { "epoch": 0.24413885725964204, "grad_norm": 0.6913563909543491, "learning_rate": 1.6797404703974047e-05, "loss": 0.6665, "step": 8362 }, { "epoch": 0.2441680534874894, "grad_norm": 0.6482208018947515, "learning_rate": 1.6796755879967562e-05, "loss": 0.5932, "step": 8363 }, { "epoch": 0.24419724971533677, "grad_norm": 0.6309976123348516, "learning_rate": 1.6796107055961074e-05, "loss": 0.5932, "step": 8364 }, { "epoch": 0.24422644594318413, "grad_norm": 0.919424540049306, "learning_rate": 1.6795458231954586e-05, "loss": 0.6138, "step": 8365 }, { "epoch": 0.2442556421710315, "grad_norm": 0.7097382463015586, "learning_rate": 1.6794809407948095e-05, "loss": 0.6812, "step": 8366 }, { "epoch": 0.24428483839887888, "grad_norm": 0.6652869889003378, "learning_rate": 1.6794160583941607e-05, "loss": 0.6476, "step": 8367 }, { "epoch": 0.24431403462672624, "grad_norm": 0.6723678294298449, "learning_rate": 1.679351175993512e-05, "loss": 0.6313, "step": 8368 }, { "epoch": 0.2443432308545736, "grad_norm": 0.6324885337329693, "learning_rate": 1.679286293592863e-05, "loss": 0.577, "step": 8369 }, { "epoch": 0.24437242708242096, "grad_norm": 0.7064358581023756, "learning_rate": 1.6792214111922143e-05, "loss": 0.689, "step": 8370 }, { "epoch": 0.24440162331026832, "grad_norm": 0.6335906573955011, "learning_rate": 1.6791565287915655e-05, "loss": 0.6181, "step": 8371 }, { "epoch": 0.24443081953811568, "grad_norm": 0.6648864041313002, "learning_rate": 1.6790916463909167e-05, "loss": 0.6359, "step": 8372 }, { "epoch": 0.24446001576596305, "grad_norm": 0.7470817275746712, "learning_rate": 1.679026763990268e-05, "loss": 0.6474, "step": 8373 }, { "epoch": 0.2444892119938104, "grad_norm": 0.6420114884797175, "learning_rate": 1.678961881589619e-05, "loss": 0.6576, "step": 8374 }, { "epoch": 0.24451840822165777, "grad_norm": 0.6756137960375638, "learning_rate": 1.67889699918897e-05, "loss": 0.6465, "step": 8375 }, { "epoch": 0.24454760444950513, "grad_norm": 0.6940571022634762, "learning_rate": 1.678832116788321e-05, "loss": 0.6929, "step": 8376 }, { "epoch": 0.2445768006773525, "grad_norm": 0.6966081090385718, "learning_rate": 1.6787672343876724e-05, "loss": 0.6412, "step": 8377 }, { "epoch": 0.24460599690519985, "grad_norm": 0.6406028183868135, "learning_rate": 1.6787023519870236e-05, "loss": 0.5009, "step": 8378 }, { "epoch": 0.2446351931330472, "grad_norm": 0.6611316612199462, "learning_rate": 1.6786374695863748e-05, "loss": 0.6688, "step": 8379 }, { "epoch": 0.24466438936089457, "grad_norm": 0.7076153142295998, "learning_rate": 1.678572587185726e-05, "loss": 0.7097, "step": 8380 }, { "epoch": 0.24469358558874194, "grad_norm": 0.717391911068937, "learning_rate": 1.678507704785077e-05, "loss": 0.665, "step": 8381 }, { "epoch": 0.2447227818165893, "grad_norm": 0.6714685046087959, "learning_rate": 1.6784428223844284e-05, "loss": 0.6555, "step": 8382 }, { "epoch": 0.24475197804443666, "grad_norm": 0.6498754160911115, "learning_rate": 1.6783779399837796e-05, "loss": 0.6138, "step": 8383 }, { "epoch": 0.24478117427228402, "grad_norm": 0.7220730941544398, "learning_rate": 1.6783130575831308e-05, "loss": 0.7312, "step": 8384 }, { "epoch": 0.24481037050013138, "grad_norm": 0.6476618163809414, "learning_rate": 1.678248175182482e-05, "loss": 0.6879, "step": 8385 }, { "epoch": 0.24483956672797874, "grad_norm": 0.6611294727665665, "learning_rate": 1.6781832927818332e-05, "loss": 0.5838, "step": 8386 }, { "epoch": 0.2448687629558261, "grad_norm": 0.6381831405339344, "learning_rate": 1.6781184103811844e-05, "loss": 0.5975, "step": 8387 }, { "epoch": 0.24489795918367346, "grad_norm": 0.6684444225471594, "learning_rate": 1.6780535279805356e-05, "loss": 0.6796, "step": 8388 }, { "epoch": 0.24492715541152082, "grad_norm": 0.6651283967211904, "learning_rate": 1.6779886455798864e-05, "loss": 0.6216, "step": 8389 }, { "epoch": 0.2449563516393682, "grad_norm": 0.6980767139060076, "learning_rate": 1.6779237631792376e-05, "loss": 0.7016, "step": 8390 }, { "epoch": 0.24498554786721555, "grad_norm": 0.6574412999744168, "learning_rate": 1.677858880778589e-05, "loss": 0.5811, "step": 8391 }, { "epoch": 0.2450147440950629, "grad_norm": 0.6480881355018335, "learning_rate": 1.67779399837794e-05, "loss": 0.6325, "step": 8392 }, { "epoch": 0.24504394032291027, "grad_norm": 0.69336994946338, "learning_rate": 1.6777291159772913e-05, "loss": 0.6706, "step": 8393 }, { "epoch": 0.24507313655075763, "grad_norm": 0.6498775077064597, "learning_rate": 1.6776642335766425e-05, "loss": 0.6113, "step": 8394 }, { "epoch": 0.245102332778605, "grad_norm": 0.6430516047101608, "learning_rate": 1.6775993511759937e-05, "loss": 0.6068, "step": 8395 }, { "epoch": 0.24513152900645235, "grad_norm": 0.7332497641375086, "learning_rate": 1.677534468775345e-05, "loss": 0.6536, "step": 8396 }, { "epoch": 0.24516072523429974, "grad_norm": 0.6751491322424269, "learning_rate": 1.677469586374696e-05, "loss": 0.6585, "step": 8397 }, { "epoch": 0.2451899214621471, "grad_norm": 0.6132115642650883, "learning_rate": 1.677404703974047e-05, "loss": 0.5621, "step": 8398 }, { "epoch": 0.24521911768999446, "grad_norm": 0.6388917215432898, "learning_rate": 1.677339821573398e-05, "loss": 0.575, "step": 8399 }, { "epoch": 0.24524831391784183, "grad_norm": 0.6230491502888046, "learning_rate": 1.6772749391727497e-05, "loss": 0.5851, "step": 8400 }, { "epoch": 0.2452775101456892, "grad_norm": 0.7063382182416673, "learning_rate": 1.677210056772101e-05, "loss": 0.6792, "step": 8401 }, { "epoch": 0.24530670637353655, "grad_norm": 0.6307267616671167, "learning_rate": 1.677145174371452e-05, "loss": 0.6014, "step": 8402 }, { "epoch": 0.2453359026013839, "grad_norm": 0.7044898103441908, "learning_rate": 1.6770802919708033e-05, "loss": 0.6772, "step": 8403 }, { "epoch": 0.24536509882923127, "grad_norm": 0.70421506182567, "learning_rate": 1.677015409570154e-05, "loss": 0.6438, "step": 8404 }, { "epoch": 0.24539429505707863, "grad_norm": 0.6352746197390683, "learning_rate": 1.6769505271695054e-05, "loss": 0.5625, "step": 8405 }, { "epoch": 0.245423491284926, "grad_norm": 0.6358136453592828, "learning_rate": 1.6768856447688566e-05, "loss": 0.6067, "step": 8406 }, { "epoch": 0.24545268751277335, "grad_norm": 0.680553098502167, "learning_rate": 1.6768207623682078e-05, "loss": 0.6996, "step": 8407 }, { "epoch": 0.24548188374062072, "grad_norm": 0.6824301961013277, "learning_rate": 1.676755879967559e-05, "loss": 0.6547, "step": 8408 }, { "epoch": 0.24551107996846808, "grad_norm": 0.6111211382523453, "learning_rate": 1.67669099756691e-05, "loss": 0.5688, "step": 8409 }, { "epoch": 0.24554027619631544, "grad_norm": 0.6340495327996934, "learning_rate": 1.6766261151662614e-05, "loss": 0.5908, "step": 8410 }, { "epoch": 0.2455694724241628, "grad_norm": 0.7081958865210278, "learning_rate": 1.6765612327656126e-05, "loss": 0.669, "step": 8411 }, { "epoch": 0.24559866865201016, "grad_norm": 0.6527708776288611, "learning_rate": 1.6764963503649638e-05, "loss": 0.6235, "step": 8412 }, { "epoch": 0.24562786487985752, "grad_norm": 0.670818466796643, "learning_rate": 1.6764314679643146e-05, "loss": 0.5823, "step": 8413 }, { "epoch": 0.24565706110770488, "grad_norm": 0.6803545070941788, "learning_rate": 1.676366585563666e-05, "loss": 0.631, "step": 8414 }, { "epoch": 0.24568625733555224, "grad_norm": 0.6142176889799946, "learning_rate": 1.676301703163017e-05, "loss": 0.5515, "step": 8415 }, { "epoch": 0.2457154535633996, "grad_norm": 0.6846835507448789, "learning_rate": 1.6762368207623682e-05, "loss": 0.6463, "step": 8416 }, { "epoch": 0.24574464979124697, "grad_norm": 0.7265284385370637, "learning_rate": 1.6761719383617194e-05, "loss": 0.7972, "step": 8417 }, { "epoch": 0.24577384601909433, "grad_norm": 0.6233122648519441, "learning_rate": 1.6761070559610706e-05, "loss": 0.6109, "step": 8418 }, { "epoch": 0.2458030422469417, "grad_norm": 0.7202353524447302, "learning_rate": 1.676042173560422e-05, "loss": 0.7069, "step": 8419 }, { "epoch": 0.24583223847478905, "grad_norm": 0.6690749013703199, "learning_rate": 1.675977291159773e-05, "loss": 0.6223, "step": 8420 }, { "epoch": 0.2458614347026364, "grad_norm": 0.6795761775423793, "learning_rate": 1.6759124087591243e-05, "loss": 0.7049, "step": 8421 }, { "epoch": 0.24589063093048377, "grad_norm": 0.6461631270243147, "learning_rate": 1.6758475263584755e-05, "loss": 0.6683, "step": 8422 }, { "epoch": 0.24591982715833113, "grad_norm": 0.6620262479372123, "learning_rate": 1.6757826439578267e-05, "loss": 0.6551, "step": 8423 }, { "epoch": 0.2459490233861785, "grad_norm": 0.7788500532100935, "learning_rate": 1.675717761557178e-05, "loss": 0.6493, "step": 8424 }, { "epoch": 0.24597821961402586, "grad_norm": 0.6869189259160517, "learning_rate": 1.675652879156529e-05, "loss": 0.6778, "step": 8425 }, { "epoch": 0.24600741584187322, "grad_norm": 0.7395976065324935, "learning_rate": 1.6755879967558803e-05, "loss": 0.6112, "step": 8426 }, { "epoch": 0.24603661206972058, "grad_norm": 0.6848777844914904, "learning_rate": 1.675523114355231e-05, "loss": 0.6904, "step": 8427 }, { "epoch": 0.24606580829756797, "grad_norm": 0.7196981553029544, "learning_rate": 1.6754582319545823e-05, "loss": 0.6911, "step": 8428 }, { "epoch": 0.24609500452541533, "grad_norm": 0.6990834154715415, "learning_rate": 1.6753933495539335e-05, "loss": 0.6865, "step": 8429 }, { "epoch": 0.2461242007532627, "grad_norm": 0.9117492230351784, "learning_rate": 1.6753284671532847e-05, "loss": 0.7553, "step": 8430 }, { "epoch": 0.24615339698111005, "grad_norm": 0.6820230205127444, "learning_rate": 1.675263584752636e-05, "loss": 0.6815, "step": 8431 }, { "epoch": 0.2461825932089574, "grad_norm": 0.6890941268748029, "learning_rate": 1.675198702351987e-05, "loss": 0.6007, "step": 8432 }, { "epoch": 0.24621178943680477, "grad_norm": 0.6641791324004382, "learning_rate": 1.6751338199513383e-05, "loss": 0.6118, "step": 8433 }, { "epoch": 0.24624098566465213, "grad_norm": 0.6604089435590498, "learning_rate": 1.6750689375506896e-05, "loss": 0.6714, "step": 8434 }, { "epoch": 0.2462701818924995, "grad_norm": 0.6633361987818304, "learning_rate": 1.6750040551500408e-05, "loss": 0.6428, "step": 8435 }, { "epoch": 0.24629937812034686, "grad_norm": 0.6783794942079615, "learning_rate": 1.6749391727493916e-05, "loss": 0.6796, "step": 8436 }, { "epoch": 0.24632857434819422, "grad_norm": 0.691545790033146, "learning_rate": 1.6748742903487428e-05, "loss": 0.6589, "step": 8437 }, { "epoch": 0.24635777057604158, "grad_norm": 0.7155109368199588, "learning_rate": 1.6748094079480944e-05, "loss": 0.6813, "step": 8438 }, { "epoch": 0.24638696680388894, "grad_norm": 0.7558713828041067, "learning_rate": 1.6747445255474456e-05, "loss": 0.7595, "step": 8439 }, { "epoch": 0.2464161630317363, "grad_norm": 0.6575943833718775, "learning_rate": 1.6746796431467968e-05, "loss": 0.6908, "step": 8440 }, { "epoch": 0.24644535925958366, "grad_norm": 0.7043808685776082, "learning_rate": 1.674614760746148e-05, "loss": 0.6318, "step": 8441 }, { "epoch": 0.24647455548743102, "grad_norm": 0.6049229863024946, "learning_rate": 1.674549878345499e-05, "loss": 0.5526, "step": 8442 }, { "epoch": 0.24650375171527839, "grad_norm": 0.6465322385436901, "learning_rate": 1.67448499594485e-05, "loss": 0.5794, "step": 8443 }, { "epoch": 0.24653294794312575, "grad_norm": 0.7219430400174731, "learning_rate": 1.6744201135442012e-05, "loss": 0.6701, "step": 8444 }, { "epoch": 0.2465621441709731, "grad_norm": 0.7508283667916276, "learning_rate": 1.6743552311435524e-05, "loss": 0.6445, "step": 8445 }, { "epoch": 0.24659134039882047, "grad_norm": 0.6732633717100105, "learning_rate": 1.6742903487429036e-05, "loss": 0.651, "step": 8446 }, { "epoch": 0.24662053662666783, "grad_norm": 0.6711676443084036, "learning_rate": 1.674225466342255e-05, "loss": 0.6284, "step": 8447 }, { "epoch": 0.2466497328545152, "grad_norm": 0.717347585891035, "learning_rate": 1.674160583941606e-05, "loss": 0.6667, "step": 8448 }, { "epoch": 0.24667892908236255, "grad_norm": 0.6773158896848671, "learning_rate": 1.6740957015409573e-05, "loss": 0.6476, "step": 8449 }, { "epoch": 0.2467081253102099, "grad_norm": 0.6418400215944808, "learning_rate": 1.6740308191403085e-05, "loss": 0.6037, "step": 8450 }, { "epoch": 0.24673732153805727, "grad_norm": 0.664564628522463, "learning_rate": 1.6739659367396593e-05, "loss": 0.6144, "step": 8451 }, { "epoch": 0.24676651776590464, "grad_norm": 0.5899508172723823, "learning_rate": 1.6739010543390105e-05, "loss": 0.5342, "step": 8452 }, { "epoch": 0.246795713993752, "grad_norm": 0.6566888551204288, "learning_rate": 1.6738361719383617e-05, "loss": 0.6423, "step": 8453 }, { "epoch": 0.24682491022159936, "grad_norm": 0.7019439892242237, "learning_rate": 1.673771289537713e-05, "loss": 0.6503, "step": 8454 }, { "epoch": 0.24685410644944672, "grad_norm": 0.639313594890209, "learning_rate": 1.673706407137064e-05, "loss": 0.5893, "step": 8455 }, { "epoch": 0.24688330267729408, "grad_norm": 0.6468225731028071, "learning_rate": 1.6736415247364153e-05, "loss": 0.6471, "step": 8456 }, { "epoch": 0.24691249890514144, "grad_norm": 0.6201272757632248, "learning_rate": 1.6735766423357665e-05, "loss": 0.5448, "step": 8457 }, { "epoch": 0.24694169513298883, "grad_norm": 0.7237933593682708, "learning_rate": 1.6735117599351177e-05, "loss": 0.6934, "step": 8458 }, { "epoch": 0.2469708913608362, "grad_norm": 0.6148616157124515, "learning_rate": 1.673446877534469e-05, "loss": 0.5538, "step": 8459 }, { "epoch": 0.24700008758868355, "grad_norm": 0.6954151383850193, "learning_rate": 1.67338199513382e-05, "loss": 0.6227, "step": 8460 }, { "epoch": 0.24702928381653091, "grad_norm": 0.7213806024747834, "learning_rate": 1.6733171127331713e-05, "loss": 0.6708, "step": 8461 }, { "epoch": 0.24705848004437828, "grad_norm": 0.6678277486756, "learning_rate": 1.6732522303325226e-05, "loss": 0.6299, "step": 8462 }, { "epoch": 0.24708767627222564, "grad_norm": 0.6651713661803618, "learning_rate": 1.6731873479318738e-05, "loss": 0.5717, "step": 8463 }, { "epoch": 0.247116872500073, "grad_norm": 0.6819488573631973, "learning_rate": 1.673122465531225e-05, "loss": 0.6759, "step": 8464 }, { "epoch": 0.24714606872792036, "grad_norm": 0.6665207228292969, "learning_rate": 1.6730575831305758e-05, "loss": 0.6523, "step": 8465 }, { "epoch": 0.24717526495576772, "grad_norm": 0.6924906935147702, "learning_rate": 1.672992700729927e-05, "loss": 0.6581, "step": 8466 }, { "epoch": 0.24720446118361508, "grad_norm": 0.7202619419395133, "learning_rate": 1.6729278183292782e-05, "loss": 0.6994, "step": 8467 }, { "epoch": 0.24723365741146244, "grad_norm": 0.6231753763376872, "learning_rate": 1.6728629359286294e-05, "loss": 0.6279, "step": 8468 }, { "epoch": 0.2472628536393098, "grad_norm": 0.716394108944847, "learning_rate": 1.6727980535279806e-05, "loss": 0.6688, "step": 8469 }, { "epoch": 0.24729204986715717, "grad_norm": 0.7457938716909386, "learning_rate": 1.672733171127332e-05, "loss": 0.732, "step": 8470 }, { "epoch": 0.24732124609500453, "grad_norm": 0.7163331427956559, "learning_rate": 1.672668288726683e-05, "loss": 0.6547, "step": 8471 }, { "epoch": 0.2473504423228519, "grad_norm": 0.6821897463886896, "learning_rate": 1.6726034063260342e-05, "loss": 0.6882, "step": 8472 }, { "epoch": 0.24737963855069925, "grad_norm": 0.813732766074828, "learning_rate": 1.6725385239253854e-05, "loss": 0.7036, "step": 8473 }, { "epoch": 0.2474088347785466, "grad_norm": 0.6292184670390892, "learning_rate": 1.6724736415247363e-05, "loss": 0.5561, "step": 8474 }, { "epoch": 0.24743803100639397, "grad_norm": 0.7122069749580907, "learning_rate": 1.6724087591240875e-05, "loss": 0.6445, "step": 8475 }, { "epoch": 0.24746722723424133, "grad_norm": 0.6619537456213551, "learning_rate": 1.672343876723439e-05, "loss": 0.7215, "step": 8476 }, { "epoch": 0.2474964234620887, "grad_norm": 0.6868634540666941, "learning_rate": 1.6722789943227903e-05, "loss": 0.6281, "step": 8477 }, { "epoch": 0.24752561968993606, "grad_norm": 0.6747673569112121, "learning_rate": 1.6722141119221415e-05, "loss": 0.6138, "step": 8478 }, { "epoch": 0.24755481591778342, "grad_norm": 0.693797207760936, "learning_rate": 1.6721492295214927e-05, "loss": 0.6801, "step": 8479 }, { "epoch": 0.24758401214563078, "grad_norm": 0.6839546790155671, "learning_rate": 1.6720843471208435e-05, "loss": 0.6564, "step": 8480 }, { "epoch": 0.24761320837347814, "grad_norm": 0.6704486908901652, "learning_rate": 1.6720194647201947e-05, "loss": 0.6839, "step": 8481 }, { "epoch": 0.2476424046013255, "grad_norm": 0.6626219603694805, "learning_rate": 1.671954582319546e-05, "loss": 0.6275, "step": 8482 }, { "epoch": 0.24767160082917286, "grad_norm": 0.6837433108942816, "learning_rate": 1.671889699918897e-05, "loss": 0.6771, "step": 8483 }, { "epoch": 0.24770079705702022, "grad_norm": 0.6404721566665932, "learning_rate": 1.6718248175182483e-05, "loss": 0.5864, "step": 8484 }, { "epoch": 0.24772999328486758, "grad_norm": 0.6590528469719552, "learning_rate": 1.6717599351175995e-05, "loss": 0.6278, "step": 8485 }, { "epoch": 0.24775918951271494, "grad_norm": 0.7579250389479426, "learning_rate": 1.6716950527169507e-05, "loss": 0.7605, "step": 8486 }, { "epoch": 0.2477883857405623, "grad_norm": 0.6140327755341772, "learning_rate": 1.671630170316302e-05, "loss": 0.5615, "step": 8487 }, { "epoch": 0.2478175819684097, "grad_norm": 0.7207059315482804, "learning_rate": 1.671565287915653e-05, "loss": 0.6593, "step": 8488 }, { "epoch": 0.24784677819625706, "grad_norm": 0.6119388550474338, "learning_rate": 1.671500405515004e-05, "loss": 0.5667, "step": 8489 }, { "epoch": 0.24787597442410442, "grad_norm": 0.6555249836246522, "learning_rate": 1.6714355231143552e-05, "loss": 0.6457, "step": 8490 }, { "epoch": 0.24790517065195178, "grad_norm": 0.6685916116721541, "learning_rate": 1.6713706407137064e-05, "loss": 0.6164, "step": 8491 }, { "epoch": 0.24793436687979914, "grad_norm": 0.6302884881096827, "learning_rate": 1.6713057583130576e-05, "loss": 0.6028, "step": 8492 }, { "epoch": 0.2479635631076465, "grad_norm": 0.728376237605769, "learning_rate": 1.6712408759124088e-05, "loss": 0.6643, "step": 8493 }, { "epoch": 0.24799275933549386, "grad_norm": 0.7120638747798137, "learning_rate": 1.67117599351176e-05, "loss": 0.7226, "step": 8494 }, { "epoch": 0.24802195556334122, "grad_norm": 0.6393784266996606, "learning_rate": 1.6711111111111112e-05, "loss": 0.5569, "step": 8495 }, { "epoch": 0.24805115179118858, "grad_norm": 0.6707191006119984, "learning_rate": 1.6710462287104624e-05, "loss": 0.6325, "step": 8496 }, { "epoch": 0.24808034801903595, "grad_norm": 0.6838566842009894, "learning_rate": 1.6709813463098136e-05, "loss": 0.6709, "step": 8497 }, { "epoch": 0.2481095442468833, "grad_norm": 0.6406401503705487, "learning_rate": 1.670916463909165e-05, "loss": 0.6327, "step": 8498 }, { "epoch": 0.24813874047473067, "grad_norm": 0.7063844184315978, "learning_rate": 1.670851581508516e-05, "loss": 0.708, "step": 8499 }, { "epoch": 0.24816793670257803, "grad_norm": 0.7497068921838127, "learning_rate": 1.6707866991078672e-05, "loss": 0.6664, "step": 8500 }, { "epoch": 0.2481971329304254, "grad_norm": 0.6793646389774423, "learning_rate": 1.6707218167072184e-05, "loss": 0.6581, "step": 8501 }, { "epoch": 0.24822632915827275, "grad_norm": 0.6704805688618003, "learning_rate": 1.6706569343065696e-05, "loss": 0.6362, "step": 8502 }, { "epoch": 0.2482555253861201, "grad_norm": 0.7259482741414859, "learning_rate": 1.6705920519059205e-05, "loss": 0.7399, "step": 8503 }, { "epoch": 0.24828472161396747, "grad_norm": 0.6284948866299818, "learning_rate": 1.6705271695052717e-05, "loss": 0.578, "step": 8504 }, { "epoch": 0.24831391784181484, "grad_norm": 0.6177798275763345, "learning_rate": 1.670462287104623e-05, "loss": 0.5862, "step": 8505 }, { "epoch": 0.2483431140696622, "grad_norm": 0.6611736065649308, "learning_rate": 1.670397404703974e-05, "loss": 0.6693, "step": 8506 }, { "epoch": 0.24837231029750956, "grad_norm": 0.6378966687391733, "learning_rate": 1.6703325223033253e-05, "loss": 0.6021, "step": 8507 }, { "epoch": 0.24840150652535692, "grad_norm": 0.6495484377192942, "learning_rate": 1.6702676399026765e-05, "loss": 0.6702, "step": 8508 }, { "epoch": 0.24843070275320428, "grad_norm": 0.637376441576546, "learning_rate": 1.6702027575020277e-05, "loss": 0.5624, "step": 8509 }, { "epoch": 0.24845989898105164, "grad_norm": 0.6746221594387378, "learning_rate": 1.670137875101379e-05, "loss": 0.6429, "step": 8510 }, { "epoch": 0.248489095208899, "grad_norm": 0.6515209334613691, "learning_rate": 1.67007299270073e-05, "loss": 0.6542, "step": 8511 }, { "epoch": 0.24851829143674636, "grad_norm": 0.7056773299320549, "learning_rate": 1.670008110300081e-05, "loss": 0.6857, "step": 8512 }, { "epoch": 0.24854748766459372, "grad_norm": 0.6681713572326231, "learning_rate": 1.6699432278994322e-05, "loss": 0.6262, "step": 8513 }, { "epoch": 0.24857668389244109, "grad_norm": 0.6953777219234807, "learning_rate": 1.6698783454987837e-05, "loss": 0.6336, "step": 8514 }, { "epoch": 0.24860588012028845, "grad_norm": 0.7005323011348166, "learning_rate": 1.669813463098135e-05, "loss": 0.6492, "step": 8515 }, { "epoch": 0.2486350763481358, "grad_norm": 0.7096665774831287, "learning_rate": 1.669748580697486e-05, "loss": 0.6768, "step": 8516 }, { "epoch": 0.24866427257598317, "grad_norm": 0.6473461377823698, "learning_rate": 1.6696836982968373e-05, "loss": 0.5702, "step": 8517 }, { "epoch": 0.24869346880383056, "grad_norm": 0.7638198785432255, "learning_rate": 1.6696188158961882e-05, "loss": 0.6414, "step": 8518 }, { "epoch": 0.24872266503167792, "grad_norm": 0.687218160209831, "learning_rate": 1.6695539334955394e-05, "loss": 0.598, "step": 8519 }, { "epoch": 0.24875186125952528, "grad_norm": 0.6793255279621055, "learning_rate": 1.6694890510948906e-05, "loss": 0.6258, "step": 8520 }, { "epoch": 0.24878105748737264, "grad_norm": 0.6660939906751251, "learning_rate": 1.6694241686942418e-05, "loss": 0.6124, "step": 8521 }, { "epoch": 0.24881025371522, "grad_norm": 0.6862154442097163, "learning_rate": 1.669359286293593e-05, "loss": 0.6231, "step": 8522 }, { "epoch": 0.24883944994306736, "grad_norm": 0.6279593458919737, "learning_rate": 1.6692944038929442e-05, "loss": 0.6168, "step": 8523 }, { "epoch": 0.24886864617091473, "grad_norm": 0.6368165863714095, "learning_rate": 1.6692295214922954e-05, "loss": 0.6224, "step": 8524 }, { "epoch": 0.2488978423987621, "grad_norm": 0.6004653366136193, "learning_rate": 1.6691646390916466e-05, "loss": 0.5487, "step": 8525 }, { "epoch": 0.24892703862660945, "grad_norm": 0.7183402095792543, "learning_rate": 1.6690997566909978e-05, "loss": 0.6642, "step": 8526 }, { "epoch": 0.2489562348544568, "grad_norm": 0.6833784055083494, "learning_rate": 1.6690348742903487e-05, "loss": 0.6908, "step": 8527 }, { "epoch": 0.24898543108230417, "grad_norm": 0.7892655839108766, "learning_rate": 1.6689699918897e-05, "loss": 0.7052, "step": 8528 }, { "epoch": 0.24901462731015153, "grad_norm": 0.7661699794198733, "learning_rate": 1.668905109489051e-05, "loss": 0.702, "step": 8529 }, { "epoch": 0.2490438235379989, "grad_norm": 0.6248969525785889, "learning_rate": 1.6688402270884023e-05, "loss": 0.5855, "step": 8530 }, { "epoch": 0.24907301976584625, "grad_norm": 0.7407321393264924, "learning_rate": 1.6687753446877535e-05, "loss": 0.734, "step": 8531 }, { "epoch": 0.24910221599369362, "grad_norm": 0.6324858946722037, "learning_rate": 1.6687104622871047e-05, "loss": 0.5825, "step": 8532 }, { "epoch": 0.24913141222154098, "grad_norm": 0.6195682955542295, "learning_rate": 1.668645579886456e-05, "loss": 0.5876, "step": 8533 }, { "epoch": 0.24916060844938834, "grad_norm": 0.659614767930946, "learning_rate": 1.668580697485807e-05, "loss": 0.6634, "step": 8534 }, { "epoch": 0.2491898046772357, "grad_norm": 0.6805800349446808, "learning_rate": 1.6685158150851583e-05, "loss": 0.6916, "step": 8535 }, { "epoch": 0.24921900090508306, "grad_norm": 0.6743008962690283, "learning_rate": 1.6684509326845095e-05, "loss": 0.6568, "step": 8536 }, { "epoch": 0.24924819713293042, "grad_norm": 0.7524526890453813, "learning_rate": 1.6683860502838607e-05, "loss": 0.6193, "step": 8537 }, { "epoch": 0.24927739336077778, "grad_norm": 0.6653336781476322, "learning_rate": 1.668321167883212e-05, "loss": 0.6418, "step": 8538 }, { "epoch": 0.24930658958862514, "grad_norm": 0.6861196249288657, "learning_rate": 1.668256285482563e-05, "loss": 0.6933, "step": 8539 }, { "epoch": 0.2493357858164725, "grad_norm": 0.6700577088079764, "learning_rate": 1.6681914030819143e-05, "loss": 0.6426, "step": 8540 }, { "epoch": 0.24936498204431987, "grad_norm": 0.6357463035393727, "learning_rate": 1.6681265206812652e-05, "loss": 0.6128, "step": 8541 }, { "epoch": 0.24939417827216723, "grad_norm": 0.6075716838349864, "learning_rate": 1.6680616382806164e-05, "loss": 0.5332, "step": 8542 }, { "epoch": 0.2494233745000146, "grad_norm": 0.6943712677377587, "learning_rate": 1.6679967558799676e-05, "loss": 0.6267, "step": 8543 }, { "epoch": 0.24945257072786195, "grad_norm": 0.675284237956795, "learning_rate": 1.6679318734793188e-05, "loss": 0.667, "step": 8544 }, { "epoch": 0.2494817669557093, "grad_norm": 0.9440868465021691, "learning_rate": 1.66786699107867e-05, "loss": 0.6932, "step": 8545 }, { "epoch": 0.24951096318355667, "grad_norm": 0.6303762359472598, "learning_rate": 1.6678021086780212e-05, "loss": 0.5945, "step": 8546 }, { "epoch": 0.24954015941140403, "grad_norm": 0.6385108614680676, "learning_rate": 1.6677372262773724e-05, "loss": 0.6438, "step": 8547 }, { "epoch": 0.24956935563925142, "grad_norm": 0.6534177092836413, "learning_rate": 1.6676723438767236e-05, "loss": 0.6178, "step": 8548 }, { "epoch": 0.24959855186709878, "grad_norm": 0.6909835717820372, "learning_rate": 1.6676074614760748e-05, "loss": 0.6725, "step": 8549 }, { "epoch": 0.24962774809494614, "grad_norm": 0.6699955722338876, "learning_rate": 1.6675425790754257e-05, "loss": 0.5973, "step": 8550 }, { "epoch": 0.2496569443227935, "grad_norm": 0.7198545040607497, "learning_rate": 1.6674776966747772e-05, "loss": 0.7163, "step": 8551 }, { "epoch": 0.24968614055064087, "grad_norm": 0.7420133090707207, "learning_rate": 1.6674128142741284e-05, "loss": 0.6916, "step": 8552 }, { "epoch": 0.24971533677848823, "grad_norm": 0.6232715470482832, "learning_rate": 1.6673479318734796e-05, "loss": 0.5797, "step": 8553 }, { "epoch": 0.2497445330063356, "grad_norm": 0.656022784476823, "learning_rate": 1.6672830494728308e-05, "loss": 0.6307, "step": 8554 }, { "epoch": 0.24977372923418295, "grad_norm": 0.8269025426913726, "learning_rate": 1.667218167072182e-05, "loss": 0.6892, "step": 8555 }, { "epoch": 0.2498029254620303, "grad_norm": 0.672430324893781, "learning_rate": 1.667153284671533e-05, "loss": 0.653, "step": 8556 }, { "epoch": 0.24983212168987767, "grad_norm": 0.7259582087481707, "learning_rate": 1.667088402270884e-05, "loss": 0.7363, "step": 8557 }, { "epoch": 0.24986131791772503, "grad_norm": 0.7202898115063967, "learning_rate": 1.6670235198702353e-05, "loss": 0.7384, "step": 8558 }, { "epoch": 0.2498905141455724, "grad_norm": 0.7071788365185087, "learning_rate": 1.6669586374695865e-05, "loss": 0.6498, "step": 8559 }, { "epoch": 0.24991971037341976, "grad_norm": 0.6556058687791152, "learning_rate": 1.6668937550689377e-05, "loss": 0.6638, "step": 8560 }, { "epoch": 0.24994890660126712, "grad_norm": 0.7806022283957849, "learning_rate": 1.666828872668289e-05, "loss": 0.6996, "step": 8561 }, { "epoch": 0.24997810282911448, "grad_norm": 0.6678740812351555, "learning_rate": 1.66676399026764e-05, "loss": 0.7054, "step": 8562 }, { "epoch": 0.25000729905696184, "grad_norm": 0.6586547867133089, "learning_rate": 1.6666991078669913e-05, "loss": 0.6682, "step": 8563 }, { "epoch": 0.2500364952848092, "grad_norm": 0.7303905388772934, "learning_rate": 1.6666342254663425e-05, "loss": 0.7976, "step": 8564 }, { "epoch": 0.25006569151265656, "grad_norm": 0.6607725274319941, "learning_rate": 1.6665693430656934e-05, "loss": 0.6415, "step": 8565 }, { "epoch": 0.2500948877405039, "grad_norm": 0.686675550173139, "learning_rate": 1.6665044606650446e-05, "loss": 0.641, "step": 8566 }, { "epoch": 0.2501240839683513, "grad_norm": 0.6476148852676156, "learning_rate": 1.6664395782643958e-05, "loss": 0.6221, "step": 8567 }, { "epoch": 0.25015328019619865, "grad_norm": 0.7403930656252077, "learning_rate": 1.666374695863747e-05, "loss": 0.6217, "step": 8568 }, { "epoch": 0.250182476424046, "grad_norm": 0.6905954855107372, "learning_rate": 1.6663098134630982e-05, "loss": 0.6741, "step": 8569 }, { "epoch": 0.25021167265189337, "grad_norm": 0.6390103774855996, "learning_rate": 1.6662449310624494e-05, "loss": 0.6121, "step": 8570 }, { "epoch": 0.25024086887974073, "grad_norm": 0.6404964002585198, "learning_rate": 1.6661800486618006e-05, "loss": 0.631, "step": 8571 }, { "epoch": 0.2502700651075881, "grad_norm": 0.6402197119302467, "learning_rate": 1.6661151662611518e-05, "loss": 0.5845, "step": 8572 }, { "epoch": 0.25029926133543545, "grad_norm": 0.6521760518076998, "learning_rate": 1.666050283860503e-05, "loss": 0.6167, "step": 8573 }, { "epoch": 0.2503284575632828, "grad_norm": 0.612394153905207, "learning_rate": 1.6659854014598542e-05, "loss": 0.6016, "step": 8574 }, { "epoch": 0.2503576537911302, "grad_norm": 0.6395093214276869, "learning_rate": 1.6659205190592054e-05, "loss": 0.5843, "step": 8575 }, { "epoch": 0.25038685001897754, "grad_norm": 0.6351724120506786, "learning_rate": 1.6658556366585566e-05, "loss": 0.5416, "step": 8576 }, { "epoch": 0.2504160462468249, "grad_norm": 0.7064859842052591, "learning_rate": 1.6657907542579078e-05, "loss": 0.7178, "step": 8577 }, { "epoch": 0.25044524247467226, "grad_norm": 0.966862373650561, "learning_rate": 1.665725871857259e-05, "loss": 0.6421, "step": 8578 }, { "epoch": 0.2504744387025196, "grad_norm": 0.6254839775416587, "learning_rate": 1.66566098945661e-05, "loss": 0.5479, "step": 8579 }, { "epoch": 0.250503634930367, "grad_norm": 0.6708129885070884, "learning_rate": 1.665596107055961e-05, "loss": 0.5439, "step": 8580 }, { "epoch": 0.25053283115821434, "grad_norm": 0.704902116838573, "learning_rate": 1.6655312246553123e-05, "loss": 0.6554, "step": 8581 }, { "epoch": 0.2505620273860617, "grad_norm": 0.6124658567625275, "learning_rate": 1.6654663422546635e-05, "loss": 0.5786, "step": 8582 }, { "epoch": 0.25059122361390906, "grad_norm": 0.675769046881399, "learning_rate": 1.6654014598540147e-05, "loss": 0.6553, "step": 8583 }, { "epoch": 0.2506204198417564, "grad_norm": 0.714865877419772, "learning_rate": 1.665336577453366e-05, "loss": 0.681, "step": 8584 }, { "epoch": 0.2506496160696038, "grad_norm": 0.6562525341916977, "learning_rate": 1.665271695052717e-05, "loss": 0.6088, "step": 8585 }, { "epoch": 0.25067881229745115, "grad_norm": 0.6894021471116534, "learning_rate": 1.6652068126520683e-05, "loss": 0.667, "step": 8586 }, { "epoch": 0.2507080085252985, "grad_norm": 0.6538748432053677, "learning_rate": 1.6651419302514195e-05, "loss": 0.6575, "step": 8587 }, { "epoch": 0.25073720475314587, "grad_norm": 0.6580467099248516, "learning_rate": 1.6650770478507704e-05, "loss": 0.6473, "step": 8588 }, { "epoch": 0.25076640098099323, "grad_norm": 0.6897017305875819, "learning_rate": 1.665012165450122e-05, "loss": 0.6587, "step": 8589 }, { "epoch": 0.2507955972088406, "grad_norm": 0.6548675397008612, "learning_rate": 1.664947283049473e-05, "loss": 0.6068, "step": 8590 }, { "epoch": 0.25082479343668795, "grad_norm": 1.1346183336459283, "learning_rate": 1.6648824006488243e-05, "loss": 0.7577, "step": 8591 }, { "epoch": 0.2508539896645353, "grad_norm": 0.6916665317882137, "learning_rate": 1.6648175182481755e-05, "loss": 0.6453, "step": 8592 }, { "epoch": 0.25088318589238273, "grad_norm": 0.7205887195753798, "learning_rate": 1.6647526358475267e-05, "loss": 0.6893, "step": 8593 }, { "epoch": 0.2509123821202301, "grad_norm": 0.6546412244587981, "learning_rate": 1.6646877534468776e-05, "loss": 0.6556, "step": 8594 }, { "epoch": 0.25094157834807745, "grad_norm": 0.6660853995530378, "learning_rate": 1.6646228710462288e-05, "loss": 0.5853, "step": 8595 }, { "epoch": 0.2509707745759248, "grad_norm": 0.6921423438830474, "learning_rate": 1.66455798864558e-05, "loss": 0.6493, "step": 8596 }, { "epoch": 0.2509999708037722, "grad_norm": 0.6870090538134888, "learning_rate": 1.6644931062449312e-05, "loss": 0.6602, "step": 8597 }, { "epoch": 0.25102916703161954, "grad_norm": 0.9102813101519437, "learning_rate": 1.6644282238442824e-05, "loss": 0.6151, "step": 8598 }, { "epoch": 0.2510583632594669, "grad_norm": 0.6909349663942165, "learning_rate": 1.6643633414436336e-05, "loss": 0.6859, "step": 8599 }, { "epoch": 0.25108755948731426, "grad_norm": 0.6766383117514151, "learning_rate": 1.6642984590429848e-05, "loss": 0.5885, "step": 8600 }, { "epoch": 0.2511167557151616, "grad_norm": 0.6860152675577926, "learning_rate": 1.664233576642336e-05, "loss": 0.6473, "step": 8601 }, { "epoch": 0.251145951943009, "grad_norm": 0.7176880303318497, "learning_rate": 1.664168694241687e-05, "loss": 0.6014, "step": 8602 }, { "epoch": 0.25117514817085634, "grad_norm": 0.7228165903524374, "learning_rate": 1.664103811841038e-05, "loss": 0.7105, "step": 8603 }, { "epoch": 0.2512043443987037, "grad_norm": 0.6664419098276928, "learning_rate": 1.6640389294403893e-05, "loss": 0.6655, "step": 8604 }, { "epoch": 0.25123354062655107, "grad_norm": 0.6647421929477529, "learning_rate": 1.6639740470397405e-05, "loss": 0.6441, "step": 8605 }, { "epoch": 0.2512627368543984, "grad_norm": 0.699306220166719, "learning_rate": 1.6639091646390917e-05, "loss": 0.6584, "step": 8606 }, { "epoch": 0.2512919330822458, "grad_norm": 0.6911662918002663, "learning_rate": 1.663844282238443e-05, "loss": 0.7348, "step": 8607 }, { "epoch": 0.25132112931009315, "grad_norm": 0.6452369872121492, "learning_rate": 1.663779399837794e-05, "loss": 0.5853, "step": 8608 }, { "epoch": 0.2513503255379405, "grad_norm": 0.6445567441276582, "learning_rate": 1.6637145174371453e-05, "loss": 0.6451, "step": 8609 }, { "epoch": 0.25137952176578787, "grad_norm": 0.750743291241077, "learning_rate": 1.6636496350364965e-05, "loss": 0.7293, "step": 8610 }, { "epoch": 0.25140871799363523, "grad_norm": 0.6382757518631996, "learning_rate": 1.6635847526358477e-05, "loss": 0.5826, "step": 8611 }, { "epoch": 0.2514379142214826, "grad_norm": 0.641806283133195, "learning_rate": 1.663519870235199e-05, "loss": 0.6312, "step": 8612 }, { "epoch": 0.25146711044932996, "grad_norm": 0.7132493357481668, "learning_rate": 1.66345498783455e-05, "loss": 0.7394, "step": 8613 }, { "epoch": 0.2514963066771773, "grad_norm": 0.6423272900774661, "learning_rate": 1.6633901054339013e-05, "loss": 0.5999, "step": 8614 }, { "epoch": 0.2515255029050247, "grad_norm": 0.6597773278015816, "learning_rate": 1.6633252230332525e-05, "loss": 0.6709, "step": 8615 }, { "epoch": 0.25155469913287204, "grad_norm": 0.6305623660649837, "learning_rate": 1.6632603406326037e-05, "loss": 0.646, "step": 8616 }, { "epoch": 0.2515838953607194, "grad_norm": 0.643955940371328, "learning_rate": 1.6631954582319546e-05, "loss": 0.652, "step": 8617 }, { "epoch": 0.25161309158856676, "grad_norm": 0.6941609591691609, "learning_rate": 1.6631305758313058e-05, "loss": 0.6783, "step": 8618 }, { "epoch": 0.2516422878164141, "grad_norm": 0.6220812303527276, "learning_rate": 1.663065693430657e-05, "loss": 0.5995, "step": 8619 }, { "epoch": 0.2516714840442615, "grad_norm": 0.6179987639090013, "learning_rate": 1.6630008110300082e-05, "loss": 0.5851, "step": 8620 }, { "epoch": 0.25170068027210885, "grad_norm": 0.6485519590272533, "learning_rate": 1.6629359286293594e-05, "loss": 0.6325, "step": 8621 }, { "epoch": 0.2517298764999562, "grad_norm": 0.666608738272335, "learning_rate": 1.6628710462287106e-05, "loss": 0.6333, "step": 8622 }, { "epoch": 0.25175907272780357, "grad_norm": 0.7024670606735541, "learning_rate": 1.6628061638280618e-05, "loss": 0.7009, "step": 8623 }, { "epoch": 0.25178826895565093, "grad_norm": 0.674476213170332, "learning_rate": 1.662741281427413e-05, "loss": 0.6726, "step": 8624 }, { "epoch": 0.2518174651834983, "grad_norm": 0.6439262760461405, "learning_rate": 1.6626763990267642e-05, "loss": 0.658, "step": 8625 }, { "epoch": 0.25184666141134565, "grad_norm": 0.6817637975123976, "learning_rate": 1.662611516626115e-05, "loss": 0.6391, "step": 8626 }, { "epoch": 0.251875857639193, "grad_norm": 0.7625037262893192, "learning_rate": 1.6625466342254666e-05, "loss": 0.7335, "step": 8627 }, { "epoch": 0.2519050538670404, "grad_norm": 0.6306741345911377, "learning_rate": 1.6624817518248178e-05, "loss": 0.6068, "step": 8628 }, { "epoch": 0.25193425009488774, "grad_norm": 0.724000961543759, "learning_rate": 1.662416869424169e-05, "loss": 0.7545, "step": 8629 }, { "epoch": 0.2519634463227351, "grad_norm": 0.6302850587439885, "learning_rate": 1.6623519870235202e-05, "loss": 0.5989, "step": 8630 }, { "epoch": 0.25199264255058246, "grad_norm": 0.665051377493351, "learning_rate": 1.6622871046228714e-05, "loss": 0.6203, "step": 8631 }, { "epoch": 0.2520218387784298, "grad_norm": 0.6459623024067561, "learning_rate": 1.6622222222222223e-05, "loss": 0.6235, "step": 8632 }, { "epoch": 0.2520510350062772, "grad_norm": 0.6721982308547225, "learning_rate": 1.6621573398215735e-05, "loss": 0.7586, "step": 8633 }, { "epoch": 0.25208023123412454, "grad_norm": 0.6556332776563623, "learning_rate": 1.6620924574209247e-05, "loss": 0.6359, "step": 8634 }, { "epoch": 0.2521094274619719, "grad_norm": 0.6562525040280176, "learning_rate": 1.662027575020276e-05, "loss": 0.6122, "step": 8635 }, { "epoch": 0.25213862368981926, "grad_norm": 0.6459535400181988, "learning_rate": 1.661962692619627e-05, "loss": 0.616, "step": 8636 }, { "epoch": 0.2521678199176666, "grad_norm": 0.6695979919632369, "learning_rate": 1.6618978102189783e-05, "loss": 0.6182, "step": 8637 }, { "epoch": 0.252197016145514, "grad_norm": 0.6305978296850153, "learning_rate": 1.6618329278183295e-05, "loss": 0.5825, "step": 8638 }, { "epoch": 0.25222621237336135, "grad_norm": 0.6931765946602423, "learning_rate": 1.6617680454176807e-05, "loss": 0.7463, "step": 8639 }, { "epoch": 0.2522554086012087, "grad_norm": 0.6389548228864006, "learning_rate": 1.6617031630170316e-05, "loss": 0.6136, "step": 8640 }, { "epoch": 0.25228460482905607, "grad_norm": 0.6353032637135912, "learning_rate": 1.6616382806163828e-05, "loss": 0.5766, "step": 8641 }, { "epoch": 0.25231380105690343, "grad_norm": 0.6709008053788439, "learning_rate": 1.661573398215734e-05, "loss": 0.6331, "step": 8642 }, { "epoch": 0.2523429972847508, "grad_norm": 0.7575387930828617, "learning_rate": 1.661508515815085e-05, "loss": 0.6895, "step": 8643 }, { "epoch": 0.25237219351259815, "grad_norm": 0.6844919709684518, "learning_rate": 1.6614436334144364e-05, "loss": 0.6454, "step": 8644 }, { "epoch": 0.2524013897404455, "grad_norm": 0.7080760480400478, "learning_rate": 1.6613787510137876e-05, "loss": 0.7444, "step": 8645 }, { "epoch": 0.2524305859682929, "grad_norm": 0.6759156173476296, "learning_rate": 1.6613138686131388e-05, "loss": 0.6421, "step": 8646 }, { "epoch": 0.25245978219614024, "grad_norm": 0.6699914950221444, "learning_rate": 1.66124898621249e-05, "loss": 0.6259, "step": 8647 }, { "epoch": 0.2524889784239876, "grad_norm": 0.7871314736657234, "learning_rate": 1.6611841038118412e-05, "loss": 0.7601, "step": 8648 }, { "epoch": 0.25251817465183496, "grad_norm": 0.752602282887184, "learning_rate": 1.6611192214111924e-05, "loss": 0.6439, "step": 8649 }, { "epoch": 0.2525473708796823, "grad_norm": 0.7198458655934974, "learning_rate": 1.6610543390105436e-05, "loss": 0.7516, "step": 8650 }, { "epoch": 0.2525765671075297, "grad_norm": 0.7069600239791818, "learning_rate": 1.6609894566098948e-05, "loss": 0.6919, "step": 8651 }, { "epoch": 0.25260576333537704, "grad_norm": 0.7088839120062161, "learning_rate": 1.660924574209246e-05, "loss": 0.7282, "step": 8652 }, { "epoch": 0.2526349595632244, "grad_norm": 0.7129591752842486, "learning_rate": 1.6608596918085972e-05, "loss": 0.6541, "step": 8653 }, { "epoch": 0.2526641557910718, "grad_norm": 0.6741409681351527, "learning_rate": 1.6607948094079484e-05, "loss": 0.6074, "step": 8654 }, { "epoch": 0.2526933520189192, "grad_norm": 0.6729450875786754, "learning_rate": 1.6607299270072993e-05, "loss": 0.6163, "step": 8655 }, { "epoch": 0.25272254824676654, "grad_norm": 0.6404310295114015, "learning_rate": 1.6606650446066505e-05, "loss": 0.6064, "step": 8656 }, { "epoch": 0.2527517444746139, "grad_norm": 0.6714467820100469, "learning_rate": 1.6606001622060017e-05, "loss": 0.6587, "step": 8657 }, { "epoch": 0.25278094070246127, "grad_norm": 0.6481880745597189, "learning_rate": 1.660535279805353e-05, "loss": 0.6268, "step": 8658 }, { "epoch": 0.2528101369303086, "grad_norm": 0.6886951140773495, "learning_rate": 1.660470397404704e-05, "loss": 0.6852, "step": 8659 }, { "epoch": 0.252839333158156, "grad_norm": 0.7114647046535811, "learning_rate": 1.6604055150040553e-05, "loss": 0.6575, "step": 8660 }, { "epoch": 0.25286852938600335, "grad_norm": 0.7248824154175612, "learning_rate": 1.6603406326034065e-05, "loss": 0.6613, "step": 8661 }, { "epoch": 0.2528977256138507, "grad_norm": 0.7824676411259, "learning_rate": 1.6602757502027577e-05, "loss": 0.7252, "step": 8662 }, { "epoch": 0.25292692184169807, "grad_norm": 0.7488444601845253, "learning_rate": 1.660210867802109e-05, "loss": 0.7573, "step": 8663 }, { "epoch": 0.25295611806954543, "grad_norm": 0.6745311619834475, "learning_rate": 1.6601459854014597e-05, "loss": 0.6658, "step": 8664 }, { "epoch": 0.2529853142973928, "grad_norm": 0.6542553303602542, "learning_rate": 1.6600811030008113e-05, "loss": 0.649, "step": 8665 }, { "epoch": 0.25301451052524015, "grad_norm": 0.7408400849709725, "learning_rate": 1.6600162206001625e-05, "loss": 0.645, "step": 8666 }, { "epoch": 0.2530437067530875, "grad_norm": 1.517880909353058, "learning_rate": 1.6599513381995137e-05, "loss": 0.6661, "step": 8667 }, { "epoch": 0.2530729029809349, "grad_norm": 0.6994887544851858, "learning_rate": 1.659886455798865e-05, "loss": 0.7572, "step": 8668 }, { "epoch": 0.25310209920878224, "grad_norm": 0.6803140395219633, "learning_rate": 1.659821573398216e-05, "loss": 0.6598, "step": 8669 }, { "epoch": 0.2531312954366296, "grad_norm": 0.6909457070679118, "learning_rate": 1.659756690997567e-05, "loss": 0.7004, "step": 8670 }, { "epoch": 0.25316049166447696, "grad_norm": 0.679344666397936, "learning_rate": 1.659691808596918e-05, "loss": 0.6792, "step": 8671 }, { "epoch": 0.2531896878923243, "grad_norm": 0.6896617237022608, "learning_rate": 1.6596269261962694e-05, "loss": 0.7277, "step": 8672 }, { "epoch": 0.2532188841201717, "grad_norm": 0.63530139879624, "learning_rate": 1.6595620437956206e-05, "loss": 0.5797, "step": 8673 }, { "epoch": 0.25324808034801904, "grad_norm": 0.6811267800425553, "learning_rate": 1.6594971613949718e-05, "loss": 0.7236, "step": 8674 }, { "epoch": 0.2532772765758664, "grad_norm": 0.6859703243787038, "learning_rate": 1.659432278994323e-05, "loss": 0.6517, "step": 8675 }, { "epoch": 0.25330647280371377, "grad_norm": 0.6253870548147206, "learning_rate": 1.6593673965936742e-05, "loss": 0.6154, "step": 8676 }, { "epoch": 0.25333566903156113, "grad_norm": 0.7071003530274265, "learning_rate": 1.6593025141930254e-05, "loss": 0.7387, "step": 8677 }, { "epoch": 0.2533648652594085, "grad_norm": 0.6607058830780015, "learning_rate": 1.6592376317923762e-05, "loss": 0.6362, "step": 8678 }, { "epoch": 0.25339406148725585, "grad_norm": 0.6651313931913185, "learning_rate": 1.6591727493917274e-05, "loss": 0.6283, "step": 8679 }, { "epoch": 0.2534232577151032, "grad_norm": 0.73984597993478, "learning_rate": 1.6591078669910786e-05, "loss": 0.7266, "step": 8680 }, { "epoch": 0.2534524539429506, "grad_norm": 0.6357064628275679, "learning_rate": 1.65904298459043e-05, "loss": 0.5889, "step": 8681 }, { "epoch": 0.25348165017079793, "grad_norm": 0.895361817432598, "learning_rate": 1.658978102189781e-05, "loss": 0.664, "step": 8682 }, { "epoch": 0.2535108463986453, "grad_norm": 0.640084294071099, "learning_rate": 1.6589132197891323e-05, "loss": 0.6275, "step": 8683 }, { "epoch": 0.25354004262649266, "grad_norm": 0.6680984476319001, "learning_rate": 1.6588483373884835e-05, "loss": 0.6088, "step": 8684 }, { "epoch": 0.25356923885434, "grad_norm": 0.6893709596175733, "learning_rate": 1.6587834549878347e-05, "loss": 0.6889, "step": 8685 }, { "epoch": 0.2535984350821874, "grad_norm": 0.6634353119424218, "learning_rate": 1.658718572587186e-05, "loss": 0.6343, "step": 8686 }, { "epoch": 0.25362763131003474, "grad_norm": 0.7527409946266141, "learning_rate": 1.658653690186537e-05, "loss": 0.6899, "step": 8687 }, { "epoch": 0.2536568275378821, "grad_norm": 0.6550928270027172, "learning_rate": 1.6585888077858883e-05, "loss": 0.6214, "step": 8688 }, { "epoch": 0.25368602376572946, "grad_norm": 0.684626535316901, "learning_rate": 1.6585239253852395e-05, "loss": 0.6192, "step": 8689 }, { "epoch": 0.2537152199935768, "grad_norm": 0.6672187131592127, "learning_rate": 1.6584590429845907e-05, "loss": 0.6539, "step": 8690 }, { "epoch": 0.2537444162214242, "grad_norm": 0.6630597535435889, "learning_rate": 1.658394160583942e-05, "loss": 0.6619, "step": 8691 }, { "epoch": 0.25377361244927155, "grad_norm": 0.6813855607645253, "learning_rate": 1.658329278183293e-05, "loss": 0.7018, "step": 8692 }, { "epoch": 0.2538028086771189, "grad_norm": 0.6807397795998313, "learning_rate": 1.658264395782644e-05, "loss": 0.6784, "step": 8693 }, { "epoch": 0.25383200490496627, "grad_norm": 0.6679689208747709, "learning_rate": 1.658199513381995e-05, "loss": 0.6632, "step": 8694 }, { "epoch": 0.25386120113281363, "grad_norm": 0.6899970600476977, "learning_rate": 1.6581346309813463e-05, "loss": 0.6948, "step": 8695 }, { "epoch": 0.253890397360661, "grad_norm": 0.6978308115730598, "learning_rate": 1.6580697485806975e-05, "loss": 0.6526, "step": 8696 }, { "epoch": 0.25391959358850835, "grad_norm": 0.6395565498150202, "learning_rate": 1.6580048661800488e-05, "loss": 0.6014, "step": 8697 }, { "epoch": 0.2539487898163557, "grad_norm": 0.6862128777436484, "learning_rate": 1.6579399837794e-05, "loss": 0.6368, "step": 8698 }, { "epoch": 0.2539779860442031, "grad_norm": 0.7083403316016985, "learning_rate": 1.657875101378751e-05, "loss": 0.6842, "step": 8699 }, { "epoch": 0.25400718227205044, "grad_norm": 0.7233940528763937, "learning_rate": 1.6578102189781024e-05, "loss": 0.74, "step": 8700 }, { "epoch": 0.2540363784998978, "grad_norm": 0.6707549440672353, "learning_rate": 1.6577453365774536e-05, "loss": 0.6167, "step": 8701 }, { "epoch": 0.25406557472774516, "grad_norm": 0.6413737809935723, "learning_rate": 1.6576804541768048e-05, "loss": 0.5728, "step": 8702 }, { "epoch": 0.2540947709555925, "grad_norm": 0.7511744051091043, "learning_rate": 1.657615571776156e-05, "loss": 0.7251, "step": 8703 }, { "epoch": 0.2541239671834399, "grad_norm": 0.6050937721938621, "learning_rate": 1.6575506893755072e-05, "loss": 0.5731, "step": 8704 }, { "epoch": 0.25415316341128724, "grad_norm": 0.6929827221201785, "learning_rate": 1.6574858069748584e-05, "loss": 0.6684, "step": 8705 }, { "epoch": 0.2541823596391346, "grad_norm": 0.716219949842222, "learning_rate": 1.6574209245742096e-05, "loss": 0.7842, "step": 8706 }, { "epoch": 0.25421155586698196, "grad_norm": 0.6293886827937897, "learning_rate": 1.6573560421735604e-05, "loss": 0.569, "step": 8707 }, { "epoch": 0.2542407520948293, "grad_norm": 0.6421710223060717, "learning_rate": 1.6572911597729116e-05, "loss": 0.5762, "step": 8708 }, { "epoch": 0.2542699483226767, "grad_norm": 0.6263779424093457, "learning_rate": 1.657226277372263e-05, "loss": 0.6035, "step": 8709 }, { "epoch": 0.25429914455052405, "grad_norm": 0.6423549960177475, "learning_rate": 1.657161394971614e-05, "loss": 0.651, "step": 8710 }, { "epoch": 0.2543283407783714, "grad_norm": 0.74974735287169, "learning_rate": 1.6570965125709653e-05, "loss": 0.624, "step": 8711 }, { "epoch": 0.25435753700621877, "grad_norm": 0.6829435917104898, "learning_rate": 1.6570316301703165e-05, "loss": 0.7111, "step": 8712 }, { "epoch": 0.25438673323406613, "grad_norm": 0.8131521535901445, "learning_rate": 1.6569667477696677e-05, "loss": 0.7731, "step": 8713 }, { "epoch": 0.25441592946191355, "grad_norm": 0.5953793382109308, "learning_rate": 1.656901865369019e-05, "loss": 0.582, "step": 8714 }, { "epoch": 0.2544451256897609, "grad_norm": 0.6641961707032551, "learning_rate": 1.65683698296837e-05, "loss": 0.6533, "step": 8715 }, { "epoch": 0.25447432191760827, "grad_norm": 0.6366695915311539, "learning_rate": 1.656772100567721e-05, "loss": 0.6074, "step": 8716 }, { "epoch": 0.25450351814545563, "grad_norm": 0.7173839597270594, "learning_rate": 1.656707218167072e-05, "loss": 0.7063, "step": 8717 }, { "epoch": 0.254532714373303, "grad_norm": 0.6770842873580184, "learning_rate": 1.6566423357664233e-05, "loss": 0.6765, "step": 8718 }, { "epoch": 0.25456191060115035, "grad_norm": 0.6985201699018437, "learning_rate": 1.6565774533657745e-05, "loss": 0.689, "step": 8719 }, { "epoch": 0.2545911068289977, "grad_norm": 0.6944961823671703, "learning_rate": 1.6565125709651257e-05, "loss": 0.6719, "step": 8720 }, { "epoch": 0.2546203030568451, "grad_norm": 0.6725161361550839, "learning_rate": 1.6564476885644773e-05, "loss": 0.6191, "step": 8721 }, { "epoch": 0.25464949928469244, "grad_norm": 0.7351431534768802, "learning_rate": 1.656382806163828e-05, "loss": 0.7414, "step": 8722 }, { "epoch": 0.2546786955125398, "grad_norm": 0.6655102757307512, "learning_rate": 1.6563179237631793e-05, "loss": 0.6084, "step": 8723 }, { "epoch": 0.25470789174038716, "grad_norm": 0.7796852467093607, "learning_rate": 1.6562530413625305e-05, "loss": 0.6687, "step": 8724 }, { "epoch": 0.2547370879682345, "grad_norm": 0.6625606397457003, "learning_rate": 1.6561881589618818e-05, "loss": 0.6489, "step": 8725 }, { "epoch": 0.2547662841960819, "grad_norm": 0.6756246604660856, "learning_rate": 1.656123276561233e-05, "loss": 0.5926, "step": 8726 }, { "epoch": 0.25479548042392924, "grad_norm": 0.7306373990152866, "learning_rate": 1.656058394160584e-05, "loss": 0.6665, "step": 8727 }, { "epoch": 0.2548246766517766, "grad_norm": 0.6653971705999675, "learning_rate": 1.6559935117599354e-05, "loss": 0.6633, "step": 8728 }, { "epoch": 0.25485387287962397, "grad_norm": 0.6875460717108685, "learning_rate": 1.6559286293592866e-05, "loss": 0.6976, "step": 8729 }, { "epoch": 0.2548830691074713, "grad_norm": 0.600742726726413, "learning_rate": 1.6558637469586378e-05, "loss": 0.5081, "step": 8730 }, { "epoch": 0.2549122653353187, "grad_norm": 0.6928819333940434, "learning_rate": 1.6557988645579886e-05, "loss": 0.6707, "step": 8731 }, { "epoch": 0.25494146156316605, "grad_norm": 0.6345900650174044, "learning_rate": 1.65573398215734e-05, "loss": 0.5873, "step": 8732 }, { "epoch": 0.2549706577910134, "grad_norm": 0.682895830517082, "learning_rate": 1.655669099756691e-05, "loss": 0.6454, "step": 8733 }, { "epoch": 0.25499985401886077, "grad_norm": 0.6705701750541165, "learning_rate": 1.6556042173560422e-05, "loss": 0.6497, "step": 8734 }, { "epoch": 0.25502905024670813, "grad_norm": 0.6564212481652135, "learning_rate": 1.6555393349553934e-05, "loss": 0.6036, "step": 8735 }, { "epoch": 0.2550582464745555, "grad_norm": 0.6166736353942049, "learning_rate": 1.6554744525547446e-05, "loss": 0.5588, "step": 8736 }, { "epoch": 0.25508744270240286, "grad_norm": 0.6761977007992566, "learning_rate": 1.655409570154096e-05, "loss": 0.639, "step": 8737 }, { "epoch": 0.2551166389302502, "grad_norm": 0.6665009640338388, "learning_rate": 1.655344687753447e-05, "loss": 0.6639, "step": 8738 }, { "epoch": 0.2551458351580976, "grad_norm": 0.7447839980298122, "learning_rate": 1.6552798053527982e-05, "loss": 0.7424, "step": 8739 }, { "epoch": 0.25517503138594494, "grad_norm": 0.6929045030919297, "learning_rate": 1.6552149229521495e-05, "loss": 0.6741, "step": 8740 }, { "epoch": 0.2552042276137923, "grad_norm": 0.6356558604109589, "learning_rate": 1.6551500405515007e-05, "loss": 0.6382, "step": 8741 }, { "epoch": 0.25523342384163966, "grad_norm": 0.6445872646508088, "learning_rate": 1.655085158150852e-05, "loss": 0.5836, "step": 8742 }, { "epoch": 0.255262620069487, "grad_norm": 0.7811032855726031, "learning_rate": 1.655020275750203e-05, "loss": 0.7512, "step": 8743 }, { "epoch": 0.2552918162973344, "grad_norm": 0.7160638862277986, "learning_rate": 1.6549553933495543e-05, "loss": 0.6869, "step": 8744 }, { "epoch": 0.25532101252518175, "grad_norm": 0.6914723240297174, "learning_rate": 1.654890510948905e-05, "loss": 0.6739, "step": 8745 }, { "epoch": 0.2553502087530291, "grad_norm": 0.6695775394667508, "learning_rate": 1.6548256285482563e-05, "loss": 0.6485, "step": 8746 }, { "epoch": 0.25537940498087647, "grad_norm": 0.6840741351307188, "learning_rate": 1.6547607461476075e-05, "loss": 0.6581, "step": 8747 }, { "epoch": 0.25540860120872383, "grad_norm": 0.6390863056169374, "learning_rate": 1.6546958637469587e-05, "loss": 0.6121, "step": 8748 }, { "epoch": 0.2554377974365712, "grad_norm": 0.8033723939926202, "learning_rate": 1.65463098134631e-05, "loss": 0.6318, "step": 8749 }, { "epoch": 0.25546699366441855, "grad_norm": 0.6532814598107742, "learning_rate": 1.654566098945661e-05, "loss": 0.689, "step": 8750 }, { "epoch": 0.2554961898922659, "grad_norm": 0.5905565604273662, "learning_rate": 1.6545012165450123e-05, "loss": 0.5319, "step": 8751 }, { "epoch": 0.2555253861201133, "grad_norm": 0.6986783289357458, "learning_rate": 1.6544363341443635e-05, "loss": 0.6737, "step": 8752 }, { "epoch": 0.25555458234796063, "grad_norm": 0.7523890829922468, "learning_rate": 1.6543714517437147e-05, "loss": 0.667, "step": 8753 }, { "epoch": 0.255583778575808, "grad_norm": 0.7014191643716589, "learning_rate": 1.6543065693430656e-05, "loss": 0.6808, "step": 8754 }, { "epoch": 0.25561297480365536, "grad_norm": 0.7052403640670108, "learning_rate": 1.6542416869424168e-05, "loss": 0.7423, "step": 8755 }, { "epoch": 0.2556421710315027, "grad_norm": 0.6795208948758289, "learning_rate": 1.654176804541768e-05, "loss": 0.6347, "step": 8756 }, { "epoch": 0.2556713672593501, "grad_norm": 0.685154292855506, "learning_rate": 1.6541119221411192e-05, "loss": 0.6444, "step": 8757 }, { "epoch": 0.25570056348719744, "grad_norm": 0.5948331034318339, "learning_rate": 1.6540470397404704e-05, "loss": 0.5378, "step": 8758 }, { "epoch": 0.2557297597150448, "grad_norm": 0.6826802753192431, "learning_rate": 1.653982157339822e-05, "loss": 0.638, "step": 8759 }, { "epoch": 0.25575895594289216, "grad_norm": 0.6333624806937355, "learning_rate": 1.6539172749391728e-05, "loss": 0.55, "step": 8760 }, { "epoch": 0.2557881521707395, "grad_norm": 0.8180459490953403, "learning_rate": 1.653852392538524e-05, "loss": 0.7038, "step": 8761 }, { "epoch": 0.2558173483985869, "grad_norm": 0.6454372689424032, "learning_rate": 1.6537875101378752e-05, "loss": 0.5725, "step": 8762 }, { "epoch": 0.25584654462643425, "grad_norm": 0.7076639249348394, "learning_rate": 1.6537226277372264e-05, "loss": 0.702, "step": 8763 }, { "epoch": 0.2558757408542816, "grad_norm": 0.6869043733949783, "learning_rate": 1.6536577453365776e-05, "loss": 0.6736, "step": 8764 }, { "epoch": 0.25590493708212897, "grad_norm": 0.6739098498081396, "learning_rate": 1.653592862935929e-05, "loss": 0.6211, "step": 8765 }, { "epoch": 0.25593413330997633, "grad_norm": 0.6785681047043395, "learning_rate": 1.65352798053528e-05, "loss": 0.6099, "step": 8766 }, { "epoch": 0.2559633295378237, "grad_norm": 0.6656036653169646, "learning_rate": 1.6534630981346312e-05, "loss": 0.626, "step": 8767 }, { "epoch": 0.25599252576567105, "grad_norm": 0.6731258266885772, "learning_rate": 1.6533982157339825e-05, "loss": 0.6809, "step": 8768 }, { "epoch": 0.2560217219935184, "grad_norm": 0.6936464807971611, "learning_rate": 1.6533333333333333e-05, "loss": 0.6148, "step": 8769 }, { "epoch": 0.2560509182213658, "grad_norm": 0.721318009886937, "learning_rate": 1.6532684509326845e-05, "loss": 0.7038, "step": 8770 }, { "epoch": 0.25608011444921314, "grad_norm": 0.7523723994647161, "learning_rate": 1.6532035685320357e-05, "loss": 0.7144, "step": 8771 }, { "epoch": 0.2561093106770605, "grad_norm": 0.7328044532577316, "learning_rate": 1.653138686131387e-05, "loss": 0.7508, "step": 8772 }, { "epoch": 0.25613850690490786, "grad_norm": 0.733544302414074, "learning_rate": 1.653073803730738e-05, "loss": 0.7628, "step": 8773 }, { "epoch": 0.2561677031327553, "grad_norm": 0.658961977240987, "learning_rate": 1.6530089213300893e-05, "loss": 0.6567, "step": 8774 }, { "epoch": 0.25619689936060264, "grad_norm": 0.7339883430707148, "learning_rate": 1.6529440389294405e-05, "loss": 0.7037, "step": 8775 }, { "epoch": 0.25622609558845, "grad_norm": 0.682659185719944, "learning_rate": 1.6528791565287917e-05, "loss": 0.7154, "step": 8776 }, { "epoch": 0.25625529181629736, "grad_norm": 0.6345392414755006, "learning_rate": 1.652814274128143e-05, "loss": 0.5635, "step": 8777 }, { "epoch": 0.2562844880441447, "grad_norm": 0.7057654072040256, "learning_rate": 1.652749391727494e-05, "loss": 0.7201, "step": 8778 }, { "epoch": 0.2563136842719921, "grad_norm": 0.655263595925702, "learning_rate": 1.6526845093268453e-05, "loss": 0.6332, "step": 8779 }, { "epoch": 0.25634288049983944, "grad_norm": 0.6152079793742876, "learning_rate": 1.6526196269261965e-05, "loss": 0.5643, "step": 8780 }, { "epoch": 0.2563720767276868, "grad_norm": 0.6435982418289398, "learning_rate": 1.6525547445255477e-05, "loss": 0.5506, "step": 8781 }, { "epoch": 0.25640127295553417, "grad_norm": 0.6486312114717819, "learning_rate": 1.652489862124899e-05, "loss": 0.563, "step": 8782 }, { "epoch": 0.2564304691833815, "grad_norm": 0.6776153401694148, "learning_rate": 1.6524249797242498e-05, "loss": 0.6041, "step": 8783 }, { "epoch": 0.2564596654112289, "grad_norm": 0.6496615182571513, "learning_rate": 1.652360097323601e-05, "loss": 0.6347, "step": 8784 }, { "epoch": 0.25648886163907625, "grad_norm": 0.6440463051113492, "learning_rate": 1.6522952149229522e-05, "loss": 0.549, "step": 8785 }, { "epoch": 0.2565180578669236, "grad_norm": 0.6326933582935607, "learning_rate": 1.6522303325223034e-05, "loss": 0.6008, "step": 8786 }, { "epoch": 0.25654725409477097, "grad_norm": 0.6359624991570796, "learning_rate": 1.6521654501216546e-05, "loss": 0.5741, "step": 8787 }, { "epoch": 0.25657645032261833, "grad_norm": 0.6513089420790313, "learning_rate": 1.6521005677210058e-05, "loss": 0.6158, "step": 8788 }, { "epoch": 0.2566056465504657, "grad_norm": 0.655601217806492, "learning_rate": 1.652035685320357e-05, "loss": 0.6467, "step": 8789 }, { "epoch": 0.25663484277831305, "grad_norm": 0.6475966340093351, "learning_rate": 1.6519708029197082e-05, "loss": 0.5954, "step": 8790 }, { "epoch": 0.2566640390061604, "grad_norm": 0.6965424958851751, "learning_rate": 1.6519059205190594e-05, "loss": 0.6308, "step": 8791 }, { "epoch": 0.2566932352340078, "grad_norm": 0.6866722194799589, "learning_rate": 1.6518410381184103e-05, "loss": 0.6454, "step": 8792 }, { "epoch": 0.25672243146185514, "grad_norm": 0.6775741288410658, "learning_rate": 1.6517761557177615e-05, "loss": 0.6703, "step": 8793 }, { "epoch": 0.2567516276897025, "grad_norm": 0.6086914224085701, "learning_rate": 1.6517112733171127e-05, "loss": 0.5737, "step": 8794 }, { "epoch": 0.25678082391754986, "grad_norm": 0.6629337433524158, "learning_rate": 1.651646390916464e-05, "loss": 0.6173, "step": 8795 }, { "epoch": 0.2568100201453972, "grad_norm": 0.7259457475575036, "learning_rate": 1.651581508515815e-05, "loss": 0.6175, "step": 8796 }, { "epoch": 0.2568392163732446, "grad_norm": 0.6299287546089698, "learning_rate": 1.6515166261151667e-05, "loss": 0.5713, "step": 8797 }, { "epoch": 0.25686841260109194, "grad_norm": 0.6587012956495392, "learning_rate": 1.6514517437145175e-05, "loss": 0.6259, "step": 8798 }, { "epoch": 0.2568976088289393, "grad_norm": 0.6298615780628339, "learning_rate": 1.6513868613138687e-05, "loss": 0.5848, "step": 8799 }, { "epoch": 0.25692680505678667, "grad_norm": 0.6476114841919298, "learning_rate": 1.65132197891322e-05, "loss": 0.6161, "step": 8800 }, { "epoch": 0.25695600128463403, "grad_norm": 0.7045615783898252, "learning_rate": 1.651257096512571e-05, "loss": 0.6639, "step": 8801 }, { "epoch": 0.2569851975124814, "grad_norm": 0.6885528922253297, "learning_rate": 1.6511922141119223e-05, "loss": 0.6491, "step": 8802 }, { "epoch": 0.25701439374032875, "grad_norm": 0.8264691596076488, "learning_rate": 1.6511273317112735e-05, "loss": 0.6936, "step": 8803 }, { "epoch": 0.2570435899681761, "grad_norm": 0.6437990663095243, "learning_rate": 1.6510624493106247e-05, "loss": 0.6418, "step": 8804 }, { "epoch": 0.2570727861960235, "grad_norm": 0.6007571586333861, "learning_rate": 1.650997566909976e-05, "loss": 0.5788, "step": 8805 }, { "epoch": 0.25710198242387083, "grad_norm": 0.6569087044315196, "learning_rate": 1.650932684509327e-05, "loss": 0.6197, "step": 8806 }, { "epoch": 0.2571311786517182, "grad_norm": 0.6922098837867994, "learning_rate": 1.650867802108678e-05, "loss": 0.6682, "step": 8807 }, { "epoch": 0.25716037487956556, "grad_norm": 0.8514637027996061, "learning_rate": 1.6508029197080292e-05, "loss": 0.7693, "step": 8808 }, { "epoch": 0.2571895711074129, "grad_norm": 0.7052693599096693, "learning_rate": 1.6507380373073804e-05, "loss": 0.6343, "step": 8809 }, { "epoch": 0.2572187673352603, "grad_norm": 0.7524842162656069, "learning_rate": 1.6506731549067316e-05, "loss": 0.7476, "step": 8810 }, { "epoch": 0.25724796356310764, "grad_norm": 0.6100557674906082, "learning_rate": 1.6506082725060828e-05, "loss": 0.5548, "step": 8811 }, { "epoch": 0.257277159790955, "grad_norm": 0.7386168200378236, "learning_rate": 1.650543390105434e-05, "loss": 0.7026, "step": 8812 }, { "epoch": 0.25730635601880236, "grad_norm": 0.6821522181041051, "learning_rate": 1.6504785077047852e-05, "loss": 0.6942, "step": 8813 }, { "epoch": 0.2573355522466497, "grad_norm": 0.7941986206148818, "learning_rate": 1.6504136253041364e-05, "loss": 0.8155, "step": 8814 }, { "epoch": 0.2573647484744971, "grad_norm": 0.6354004950608491, "learning_rate": 1.6503487429034876e-05, "loss": 0.581, "step": 8815 }, { "epoch": 0.25739394470234445, "grad_norm": 0.6783850417205012, "learning_rate": 1.6502838605028388e-05, "loss": 0.6719, "step": 8816 }, { "epoch": 0.2574231409301918, "grad_norm": 0.6341055423270424, "learning_rate": 1.65021897810219e-05, "loss": 0.5632, "step": 8817 }, { "epoch": 0.25745233715803917, "grad_norm": 0.706215242924588, "learning_rate": 1.6501540957015412e-05, "loss": 0.6584, "step": 8818 }, { "epoch": 0.25748153338588653, "grad_norm": 0.6911682450115127, "learning_rate": 1.6500892133008924e-05, "loss": 0.749, "step": 8819 }, { "epoch": 0.2575107296137339, "grad_norm": 0.665837797685387, "learning_rate": 1.6500243309002436e-05, "loss": 0.6705, "step": 8820 }, { "epoch": 0.25753992584158125, "grad_norm": 0.6862414029087076, "learning_rate": 1.6499594484995945e-05, "loss": 0.6352, "step": 8821 }, { "epoch": 0.2575691220694286, "grad_norm": 0.6138350416476268, "learning_rate": 1.6498945660989457e-05, "loss": 0.5767, "step": 8822 }, { "epoch": 0.257598318297276, "grad_norm": 0.7134400248068085, "learning_rate": 1.649829683698297e-05, "loss": 0.6772, "step": 8823 }, { "epoch": 0.25762751452512334, "grad_norm": 0.7410169750249008, "learning_rate": 1.649764801297648e-05, "loss": 0.7272, "step": 8824 }, { "epoch": 0.2576567107529707, "grad_norm": 0.7077258661129758, "learning_rate": 1.6496999188969993e-05, "loss": 0.7415, "step": 8825 }, { "epoch": 0.25768590698081806, "grad_norm": 0.712628415804334, "learning_rate": 1.6496350364963505e-05, "loss": 0.7372, "step": 8826 }, { "epoch": 0.2577151032086654, "grad_norm": 0.6573794051481359, "learning_rate": 1.6495701540957017e-05, "loss": 0.653, "step": 8827 }, { "epoch": 0.2577442994365128, "grad_norm": 0.6705929158490197, "learning_rate": 1.649505271695053e-05, "loss": 0.6178, "step": 8828 }, { "epoch": 0.25777349566436014, "grad_norm": 0.6778149662458433, "learning_rate": 1.649440389294404e-05, "loss": 0.5729, "step": 8829 }, { "epoch": 0.2578026918922075, "grad_norm": 0.7198472669874038, "learning_rate": 1.649375506893755e-05, "loss": 0.685, "step": 8830 }, { "epoch": 0.25783188812005486, "grad_norm": 0.6584557305105253, "learning_rate": 1.6493106244931062e-05, "loss": 0.6088, "step": 8831 }, { "epoch": 0.2578610843479022, "grad_norm": 0.6633837536624823, "learning_rate": 1.6492457420924574e-05, "loss": 0.6288, "step": 8832 }, { "epoch": 0.2578902805757496, "grad_norm": 0.7466929126243013, "learning_rate": 1.6491808596918086e-05, "loss": 0.7138, "step": 8833 }, { "epoch": 0.25791947680359695, "grad_norm": 0.6379378712012312, "learning_rate": 1.6491159772911598e-05, "loss": 0.6118, "step": 8834 }, { "epoch": 0.25794867303144436, "grad_norm": 0.5878997626216468, "learning_rate": 1.6490510948905113e-05, "loss": 0.5193, "step": 8835 }, { "epoch": 0.2579778692592917, "grad_norm": 0.6481658515224269, "learning_rate": 1.6489862124898622e-05, "loss": 0.6358, "step": 8836 }, { "epoch": 0.2580070654871391, "grad_norm": 0.6505976542228888, "learning_rate": 1.6489213300892134e-05, "loss": 0.6412, "step": 8837 }, { "epoch": 0.25803626171498645, "grad_norm": 0.7060037649004738, "learning_rate": 1.6488564476885646e-05, "loss": 0.703, "step": 8838 }, { "epoch": 0.2580654579428338, "grad_norm": 0.6480771884516797, "learning_rate": 1.6487915652879158e-05, "loss": 0.6018, "step": 8839 }, { "epoch": 0.25809465417068117, "grad_norm": 0.7039708027256358, "learning_rate": 1.648726682887267e-05, "loss": 0.7669, "step": 8840 }, { "epoch": 0.25812385039852853, "grad_norm": 0.7224324649801512, "learning_rate": 1.6486618004866182e-05, "loss": 0.6572, "step": 8841 }, { "epoch": 0.2581530466263759, "grad_norm": 0.5988369838972192, "learning_rate": 1.6485969180859694e-05, "loss": 0.5286, "step": 8842 }, { "epoch": 0.25818224285422325, "grad_norm": 0.6506996176335941, "learning_rate": 1.6485320356853206e-05, "loss": 0.609, "step": 8843 }, { "epoch": 0.2582114390820706, "grad_norm": 0.609610831493459, "learning_rate": 1.6484671532846718e-05, "loss": 0.5407, "step": 8844 }, { "epoch": 0.258240635309918, "grad_norm": 0.6675280114961397, "learning_rate": 1.6484022708840227e-05, "loss": 0.607, "step": 8845 }, { "epoch": 0.25826983153776534, "grad_norm": 0.6943002079518866, "learning_rate": 1.648337388483374e-05, "loss": 0.7247, "step": 8846 }, { "epoch": 0.2582990277656127, "grad_norm": 0.6852650981047651, "learning_rate": 1.648272506082725e-05, "loss": 0.6651, "step": 8847 }, { "epoch": 0.25832822399346006, "grad_norm": 0.6397795573520515, "learning_rate": 1.6482076236820763e-05, "loss": 0.6113, "step": 8848 }, { "epoch": 0.2583574202213074, "grad_norm": 0.6645077090052703, "learning_rate": 1.6481427412814275e-05, "loss": 0.6771, "step": 8849 }, { "epoch": 0.2583866164491548, "grad_norm": 0.6392840020470165, "learning_rate": 1.6480778588807787e-05, "loss": 0.6298, "step": 8850 }, { "epoch": 0.25841581267700214, "grad_norm": 0.6875672827840347, "learning_rate": 1.64801297648013e-05, "loss": 0.6638, "step": 8851 }, { "epoch": 0.2584450089048495, "grad_norm": 0.6219715172910755, "learning_rate": 1.647948094079481e-05, "loss": 0.5629, "step": 8852 }, { "epoch": 0.25847420513269687, "grad_norm": 0.6562130050181462, "learning_rate": 1.6478832116788323e-05, "loss": 0.5702, "step": 8853 }, { "epoch": 0.2585034013605442, "grad_norm": 0.6744577066876042, "learning_rate": 1.6478183292781835e-05, "loss": 0.6706, "step": 8854 }, { "epoch": 0.2585325975883916, "grad_norm": 0.6613752079126477, "learning_rate": 1.6477534468775347e-05, "loss": 0.6291, "step": 8855 }, { "epoch": 0.25856179381623895, "grad_norm": 0.6827404711219471, "learning_rate": 1.647688564476886e-05, "loss": 0.6984, "step": 8856 }, { "epoch": 0.2585909900440863, "grad_norm": 0.6219577295732878, "learning_rate": 1.647623682076237e-05, "loss": 0.5676, "step": 8857 }, { "epoch": 0.25862018627193367, "grad_norm": 0.6260711862702146, "learning_rate": 1.6475587996755883e-05, "loss": 0.5738, "step": 8858 }, { "epoch": 0.25864938249978103, "grad_norm": 0.6788763238420357, "learning_rate": 1.6474939172749392e-05, "loss": 0.6849, "step": 8859 }, { "epoch": 0.2586785787276284, "grad_norm": 0.6581367086431703, "learning_rate": 1.6474290348742904e-05, "loss": 0.6548, "step": 8860 }, { "epoch": 0.25870777495547576, "grad_norm": 0.6578382119152593, "learning_rate": 1.6473641524736416e-05, "loss": 0.6373, "step": 8861 }, { "epoch": 0.2587369711833231, "grad_norm": 0.667074534000764, "learning_rate": 1.6472992700729928e-05, "loss": 0.6904, "step": 8862 }, { "epoch": 0.2587661674111705, "grad_norm": 0.7058893117073433, "learning_rate": 1.647234387672344e-05, "loss": 0.682, "step": 8863 }, { "epoch": 0.25879536363901784, "grad_norm": 0.6281783578973202, "learning_rate": 1.6471695052716952e-05, "loss": 0.6022, "step": 8864 }, { "epoch": 0.2588245598668652, "grad_norm": 0.658013785466213, "learning_rate": 1.6471046228710464e-05, "loss": 0.6659, "step": 8865 }, { "epoch": 0.25885375609471256, "grad_norm": 0.6405571552673827, "learning_rate": 1.6470397404703976e-05, "loss": 0.6174, "step": 8866 }, { "epoch": 0.2588829523225599, "grad_norm": 0.6897969429653783, "learning_rate": 1.6469748580697488e-05, "loss": 0.6713, "step": 8867 }, { "epoch": 0.2589121485504073, "grad_norm": 0.7412644682115219, "learning_rate": 1.6469099756690997e-05, "loss": 0.6123, "step": 8868 }, { "epoch": 0.25894134477825465, "grad_norm": 0.6691612916646257, "learning_rate": 1.646845093268451e-05, "loss": 0.5685, "step": 8869 }, { "epoch": 0.258970541006102, "grad_norm": 0.6995730064229121, "learning_rate": 1.646780210867802e-05, "loss": 0.6619, "step": 8870 }, { "epoch": 0.25899973723394937, "grad_norm": 0.6202050481774021, "learning_rate": 1.6467153284671533e-05, "loss": 0.5457, "step": 8871 }, { "epoch": 0.25902893346179673, "grad_norm": 0.6365079624155454, "learning_rate": 1.6466504460665048e-05, "loss": 0.633, "step": 8872 }, { "epoch": 0.2590581296896441, "grad_norm": 0.644792406388776, "learning_rate": 1.646585563665856e-05, "loss": 0.6434, "step": 8873 }, { "epoch": 0.25908732591749145, "grad_norm": 0.7300881758090046, "learning_rate": 1.646520681265207e-05, "loss": 0.723, "step": 8874 }, { "epoch": 0.2591165221453388, "grad_norm": 0.662227150907666, "learning_rate": 1.646455798864558e-05, "loss": 0.6644, "step": 8875 }, { "epoch": 0.2591457183731862, "grad_norm": 0.7295074839059817, "learning_rate": 1.6463909164639093e-05, "loss": 0.7365, "step": 8876 }, { "epoch": 0.25917491460103353, "grad_norm": 0.6727627577918581, "learning_rate": 1.6463260340632605e-05, "loss": 0.6999, "step": 8877 }, { "epoch": 0.2592041108288809, "grad_norm": 0.5858089018883029, "learning_rate": 1.6462611516626117e-05, "loss": 0.5095, "step": 8878 }, { "epoch": 0.25923330705672826, "grad_norm": 0.6393104480152586, "learning_rate": 1.646196269261963e-05, "loss": 0.6118, "step": 8879 }, { "epoch": 0.2592625032845756, "grad_norm": 0.7210342323703237, "learning_rate": 1.646131386861314e-05, "loss": 0.688, "step": 8880 }, { "epoch": 0.259291699512423, "grad_norm": 0.6468199911979536, "learning_rate": 1.6460665044606653e-05, "loss": 0.6017, "step": 8881 }, { "epoch": 0.25932089574027034, "grad_norm": 0.6407400893580901, "learning_rate": 1.6460016220600165e-05, "loss": 0.638, "step": 8882 }, { "epoch": 0.2593500919681177, "grad_norm": 0.6899847123130479, "learning_rate": 1.6459367396593674e-05, "loss": 0.6728, "step": 8883 }, { "epoch": 0.25937928819596506, "grad_norm": 0.5722303569748327, "learning_rate": 1.6458718572587186e-05, "loss": 0.5317, "step": 8884 }, { "epoch": 0.2594084844238124, "grad_norm": 0.6236887796265576, "learning_rate": 1.6458069748580698e-05, "loss": 0.5899, "step": 8885 }, { "epoch": 0.2594376806516598, "grad_norm": 0.6477246027710302, "learning_rate": 1.645742092457421e-05, "loss": 0.6753, "step": 8886 }, { "epoch": 0.25946687687950715, "grad_norm": 0.6868068044643978, "learning_rate": 1.6456772100567722e-05, "loss": 0.6993, "step": 8887 }, { "epoch": 0.2594960731073545, "grad_norm": 0.6440738334386198, "learning_rate": 1.6456123276561234e-05, "loss": 0.587, "step": 8888 }, { "epoch": 0.25952526933520187, "grad_norm": 0.6870612692553656, "learning_rate": 1.6455474452554746e-05, "loss": 0.6388, "step": 8889 }, { "epoch": 0.25955446556304923, "grad_norm": 0.7031390928316908, "learning_rate": 1.6454825628548258e-05, "loss": 0.6336, "step": 8890 }, { "epoch": 0.2595836617908966, "grad_norm": 0.6566789159401131, "learning_rate": 1.645417680454177e-05, "loss": 0.6554, "step": 8891 }, { "epoch": 0.25961285801874395, "grad_norm": 0.6799251880912178, "learning_rate": 1.6453527980535282e-05, "loss": 0.6287, "step": 8892 }, { "epoch": 0.2596420542465913, "grad_norm": 0.675250187247384, "learning_rate": 1.6452879156528794e-05, "loss": 0.6298, "step": 8893 }, { "epoch": 0.2596712504744387, "grad_norm": 0.6234355500416291, "learning_rate": 1.6452230332522306e-05, "loss": 0.5677, "step": 8894 }, { "epoch": 0.2597004467022861, "grad_norm": 0.7046787549496265, "learning_rate": 1.6451581508515818e-05, "loss": 0.7124, "step": 8895 }, { "epoch": 0.25972964293013345, "grad_norm": 0.6125296970903839, "learning_rate": 1.645093268450933e-05, "loss": 0.5611, "step": 8896 }, { "epoch": 0.2597588391579808, "grad_norm": 0.6539959401149669, "learning_rate": 1.645028386050284e-05, "loss": 0.6398, "step": 8897 }, { "epoch": 0.2597880353858282, "grad_norm": 0.6984344765093609, "learning_rate": 1.644963503649635e-05, "loss": 0.6594, "step": 8898 }, { "epoch": 0.25981723161367554, "grad_norm": 0.6767102666623979, "learning_rate": 1.6448986212489863e-05, "loss": 0.6473, "step": 8899 }, { "epoch": 0.2598464278415229, "grad_norm": 0.6923790742245461, "learning_rate": 1.6448337388483375e-05, "loss": 0.6743, "step": 8900 }, { "epoch": 0.25987562406937026, "grad_norm": 0.5692147184268945, "learning_rate": 1.6447688564476887e-05, "loss": 0.5234, "step": 8901 }, { "epoch": 0.2599048202972176, "grad_norm": 0.6638552602365618, "learning_rate": 1.64470397404704e-05, "loss": 0.6779, "step": 8902 }, { "epoch": 0.259934016525065, "grad_norm": 0.6519524226466836, "learning_rate": 1.644639091646391e-05, "loss": 0.67, "step": 8903 }, { "epoch": 0.25996321275291234, "grad_norm": 0.5938915648732911, "learning_rate": 1.6445742092457423e-05, "loss": 0.5164, "step": 8904 }, { "epoch": 0.2599924089807597, "grad_norm": 0.5994392419186929, "learning_rate": 1.6445093268450935e-05, "loss": 0.5355, "step": 8905 }, { "epoch": 0.26002160520860707, "grad_norm": 0.701088273455532, "learning_rate": 1.6444444444444444e-05, "loss": 0.6799, "step": 8906 }, { "epoch": 0.2600508014364544, "grad_norm": 0.6192171038220513, "learning_rate": 1.6443795620437956e-05, "loss": 0.5012, "step": 8907 }, { "epoch": 0.2600799976643018, "grad_norm": 0.6373864646678571, "learning_rate": 1.6443146796431468e-05, "loss": 0.5844, "step": 8908 }, { "epoch": 0.26010919389214915, "grad_norm": 0.6307887070647427, "learning_rate": 1.644249797242498e-05, "loss": 0.5985, "step": 8909 }, { "epoch": 0.2601383901199965, "grad_norm": 0.6572380777173128, "learning_rate": 1.6441849148418495e-05, "loss": 0.6484, "step": 8910 }, { "epoch": 0.26016758634784387, "grad_norm": 0.6854030150937561, "learning_rate": 1.6441200324412007e-05, "loss": 0.6756, "step": 8911 }, { "epoch": 0.26019678257569123, "grad_norm": 0.766966914795328, "learning_rate": 1.6440551500405516e-05, "loss": 0.7569, "step": 8912 }, { "epoch": 0.2602259788035386, "grad_norm": 0.6711426629834935, "learning_rate": 1.6439902676399028e-05, "loss": 0.705, "step": 8913 }, { "epoch": 0.26025517503138595, "grad_norm": 0.6117094435531271, "learning_rate": 1.643925385239254e-05, "loss": 0.5416, "step": 8914 }, { "epoch": 0.2602843712592333, "grad_norm": 0.704760577812277, "learning_rate": 1.6438605028386052e-05, "loss": 0.697, "step": 8915 }, { "epoch": 0.2603135674870807, "grad_norm": 0.6724697439962146, "learning_rate": 1.6437956204379564e-05, "loss": 0.6363, "step": 8916 }, { "epoch": 0.26034276371492804, "grad_norm": 0.6996208849036709, "learning_rate": 1.6437307380373076e-05, "loss": 0.6657, "step": 8917 }, { "epoch": 0.2603719599427754, "grad_norm": 0.6569391331512513, "learning_rate": 1.6436658556366588e-05, "loss": 0.629, "step": 8918 }, { "epoch": 0.26040115617062276, "grad_norm": 0.7232930910879865, "learning_rate": 1.64360097323601e-05, "loss": 0.7487, "step": 8919 }, { "epoch": 0.2604303523984701, "grad_norm": 0.6959863345364753, "learning_rate": 1.6435360908353612e-05, "loss": 0.701, "step": 8920 }, { "epoch": 0.2604595486263175, "grad_norm": 0.6190545301304213, "learning_rate": 1.643471208434712e-05, "loss": 0.5363, "step": 8921 }, { "epoch": 0.26048874485416484, "grad_norm": 0.6382765488141537, "learning_rate": 1.6434063260340633e-05, "loss": 0.599, "step": 8922 }, { "epoch": 0.2605179410820122, "grad_norm": 0.6496975811907745, "learning_rate": 1.6433414436334145e-05, "loss": 0.5903, "step": 8923 }, { "epoch": 0.26054713730985957, "grad_norm": 0.7130343205183528, "learning_rate": 1.6432765612327657e-05, "loss": 0.7213, "step": 8924 }, { "epoch": 0.26057633353770693, "grad_norm": 0.7396729058575183, "learning_rate": 1.643211678832117e-05, "loss": 0.7148, "step": 8925 }, { "epoch": 0.2606055297655543, "grad_norm": 0.6373464171419772, "learning_rate": 1.643146796431468e-05, "loss": 0.5802, "step": 8926 }, { "epoch": 0.26063472599340165, "grad_norm": 0.8074733280775748, "learning_rate": 1.6430819140308193e-05, "loss": 0.6826, "step": 8927 }, { "epoch": 0.260663922221249, "grad_norm": 0.6495491686716115, "learning_rate": 1.6430170316301705e-05, "loss": 0.6459, "step": 8928 }, { "epoch": 0.2606931184490964, "grad_norm": 0.6094811719216626, "learning_rate": 1.6429521492295217e-05, "loss": 0.5463, "step": 8929 }, { "epoch": 0.26072231467694373, "grad_norm": 0.6794653332672667, "learning_rate": 1.642887266828873e-05, "loss": 0.6841, "step": 8930 }, { "epoch": 0.2607515109047911, "grad_norm": 0.6197977659069623, "learning_rate": 1.642822384428224e-05, "loss": 0.556, "step": 8931 }, { "epoch": 0.26078070713263846, "grad_norm": 0.6221662117102708, "learning_rate": 1.6427575020275753e-05, "loss": 0.6011, "step": 8932 }, { "epoch": 0.2608099033604858, "grad_norm": 0.6739417844131137, "learning_rate": 1.6426926196269265e-05, "loss": 0.5696, "step": 8933 }, { "epoch": 0.2608390995883332, "grad_norm": 0.6252631374203788, "learning_rate": 1.6426277372262777e-05, "loss": 0.6273, "step": 8934 }, { "epoch": 0.26086829581618054, "grad_norm": 0.7224446242141445, "learning_rate": 1.6425628548256286e-05, "loss": 0.7145, "step": 8935 }, { "epoch": 0.2608974920440279, "grad_norm": 0.6801393669223217, "learning_rate": 1.6424979724249798e-05, "loss": 0.6675, "step": 8936 }, { "epoch": 0.26092668827187526, "grad_norm": 0.7150722299526809, "learning_rate": 1.642433090024331e-05, "loss": 0.7069, "step": 8937 }, { "epoch": 0.2609558844997226, "grad_norm": 0.7254870735922879, "learning_rate": 1.6423682076236822e-05, "loss": 0.723, "step": 8938 }, { "epoch": 0.26098508072757, "grad_norm": 0.6710339295809853, "learning_rate": 1.6423033252230334e-05, "loss": 0.7391, "step": 8939 }, { "epoch": 0.26101427695541735, "grad_norm": 0.6310487855576319, "learning_rate": 1.6422384428223846e-05, "loss": 0.5817, "step": 8940 }, { "epoch": 0.2610434731832647, "grad_norm": 0.6584906816281241, "learning_rate": 1.6421735604217358e-05, "loss": 0.6045, "step": 8941 }, { "epoch": 0.26107266941111207, "grad_norm": 0.6791703339192843, "learning_rate": 1.642108678021087e-05, "loss": 0.6126, "step": 8942 }, { "epoch": 0.26110186563895943, "grad_norm": 0.6387294006540941, "learning_rate": 1.6420437956204382e-05, "loss": 0.663, "step": 8943 }, { "epoch": 0.2611310618668068, "grad_norm": 0.7266601562571665, "learning_rate": 1.641978913219789e-05, "loss": 0.6916, "step": 8944 }, { "epoch": 0.26116025809465415, "grad_norm": 0.6848915800499681, "learning_rate": 1.6419140308191403e-05, "loss": 0.6853, "step": 8945 }, { "epoch": 0.2611894543225015, "grad_norm": 0.6860523999129217, "learning_rate": 1.6418491484184915e-05, "loss": 0.6115, "step": 8946 }, { "epoch": 0.2612186505503489, "grad_norm": 0.6040877803910648, "learning_rate": 1.6417842660178427e-05, "loss": 0.5736, "step": 8947 }, { "epoch": 0.26124784677819624, "grad_norm": 0.6829943153361044, "learning_rate": 1.6417193836171942e-05, "loss": 0.6535, "step": 8948 }, { "epoch": 0.2612770430060436, "grad_norm": 0.6567843253293316, "learning_rate": 1.6416545012165454e-05, "loss": 0.6591, "step": 8949 }, { "epoch": 0.26130623923389096, "grad_norm": 0.7536245439053935, "learning_rate": 1.6415896188158963e-05, "loss": 0.634, "step": 8950 }, { "epoch": 0.2613354354617383, "grad_norm": 0.662538210416626, "learning_rate": 1.6415247364152475e-05, "loss": 0.5954, "step": 8951 }, { "epoch": 0.2613646316895857, "grad_norm": 0.7030492435851059, "learning_rate": 1.6414598540145987e-05, "loss": 0.6421, "step": 8952 }, { "epoch": 0.26139382791743304, "grad_norm": 0.652641434110299, "learning_rate": 1.64139497161395e-05, "loss": 0.632, "step": 8953 }, { "epoch": 0.2614230241452804, "grad_norm": 0.6494141322214251, "learning_rate": 1.641330089213301e-05, "loss": 0.5982, "step": 8954 }, { "epoch": 0.2614522203731278, "grad_norm": 0.6202598619726788, "learning_rate": 1.6412652068126523e-05, "loss": 0.5411, "step": 8955 }, { "epoch": 0.2614814166009752, "grad_norm": 0.6234360778273197, "learning_rate": 1.6412003244120035e-05, "loss": 0.5895, "step": 8956 }, { "epoch": 0.26151061282882254, "grad_norm": 0.7333823631716865, "learning_rate": 1.6411354420113547e-05, "loss": 0.7234, "step": 8957 }, { "epoch": 0.2615398090566699, "grad_norm": 0.6676150596803512, "learning_rate": 1.6410705596107055e-05, "loss": 0.6443, "step": 8958 }, { "epoch": 0.26156900528451726, "grad_norm": 0.6495977757187654, "learning_rate": 1.6410056772100567e-05, "loss": 0.6123, "step": 8959 }, { "epoch": 0.2615982015123646, "grad_norm": 0.6879370292371918, "learning_rate": 1.640940794809408e-05, "loss": 0.6986, "step": 8960 }, { "epoch": 0.261627397740212, "grad_norm": 0.6820934817341495, "learning_rate": 1.640875912408759e-05, "loss": 0.6957, "step": 8961 }, { "epoch": 0.26165659396805935, "grad_norm": 0.6552388360415043, "learning_rate": 1.6408110300081104e-05, "loss": 0.599, "step": 8962 }, { "epoch": 0.2616857901959067, "grad_norm": 0.6889516080479481, "learning_rate": 1.6407461476074616e-05, "loss": 0.6751, "step": 8963 }, { "epoch": 0.26171498642375407, "grad_norm": 0.6544984270362764, "learning_rate": 1.6406812652068128e-05, "loss": 0.6488, "step": 8964 }, { "epoch": 0.26174418265160143, "grad_norm": 0.6179516553628606, "learning_rate": 1.640616382806164e-05, "loss": 0.5713, "step": 8965 }, { "epoch": 0.2617733788794488, "grad_norm": 0.6687794860285731, "learning_rate": 1.640551500405515e-05, "loss": 0.6062, "step": 8966 }, { "epoch": 0.26180257510729615, "grad_norm": 0.649674094220693, "learning_rate": 1.6404866180048664e-05, "loss": 0.6232, "step": 8967 }, { "epoch": 0.2618317713351435, "grad_norm": 0.6288542952092, "learning_rate": 1.6404217356042176e-05, "loss": 0.5804, "step": 8968 }, { "epoch": 0.2618609675629909, "grad_norm": 0.6920062986571597, "learning_rate": 1.6403568532035688e-05, "loss": 0.5707, "step": 8969 }, { "epoch": 0.26189016379083824, "grad_norm": 0.6802847901674126, "learning_rate": 1.64029197080292e-05, "loss": 0.6923, "step": 8970 }, { "epoch": 0.2619193600186856, "grad_norm": 0.6603326065749746, "learning_rate": 1.6402270884022712e-05, "loss": 0.6303, "step": 8971 }, { "epoch": 0.26194855624653296, "grad_norm": 0.6786737062177626, "learning_rate": 1.6401622060016224e-05, "loss": 0.6131, "step": 8972 }, { "epoch": 0.2619777524743803, "grad_norm": 0.6963520966445156, "learning_rate": 1.6400973236009732e-05, "loss": 0.7497, "step": 8973 }, { "epoch": 0.2620069487022277, "grad_norm": 0.6681826451435274, "learning_rate": 1.6400324412003245e-05, "loss": 0.6171, "step": 8974 }, { "epoch": 0.26203614493007504, "grad_norm": 0.6643068270228993, "learning_rate": 1.6399675587996757e-05, "loss": 0.6679, "step": 8975 }, { "epoch": 0.2620653411579224, "grad_norm": 0.7577132150345536, "learning_rate": 1.639902676399027e-05, "loss": 0.7017, "step": 8976 }, { "epoch": 0.26209453738576977, "grad_norm": 0.676395302978986, "learning_rate": 1.639837793998378e-05, "loss": 0.6642, "step": 8977 }, { "epoch": 0.2621237336136171, "grad_norm": 0.6388116623363611, "learning_rate": 1.6397729115977293e-05, "loss": 0.5631, "step": 8978 }, { "epoch": 0.2621529298414645, "grad_norm": 0.6941393026397958, "learning_rate": 1.6397080291970805e-05, "loss": 0.6873, "step": 8979 }, { "epoch": 0.26218212606931185, "grad_norm": 0.6867331202372623, "learning_rate": 1.6396431467964317e-05, "loss": 0.6288, "step": 8980 }, { "epoch": 0.2622113222971592, "grad_norm": 0.6499780387626141, "learning_rate": 1.639578264395783e-05, "loss": 0.6068, "step": 8981 }, { "epoch": 0.26224051852500657, "grad_norm": 0.6345525588085666, "learning_rate": 1.6395133819951337e-05, "loss": 0.5715, "step": 8982 }, { "epoch": 0.26226971475285393, "grad_norm": 0.633133502198227, "learning_rate": 1.639448499594485e-05, "loss": 0.5816, "step": 8983 }, { "epoch": 0.2622989109807013, "grad_norm": 0.6754590750258876, "learning_rate": 1.639383617193836e-05, "loss": 0.6512, "step": 8984 }, { "epoch": 0.26232810720854866, "grad_norm": 0.6154730843220758, "learning_rate": 1.6393187347931873e-05, "loss": 0.5819, "step": 8985 }, { "epoch": 0.262357303436396, "grad_norm": 0.6951390350744662, "learning_rate": 1.639253852392539e-05, "loss": 0.7246, "step": 8986 }, { "epoch": 0.2623864996642434, "grad_norm": 0.6399010150863041, "learning_rate": 1.63918896999189e-05, "loss": 0.6124, "step": 8987 }, { "epoch": 0.26241569589209074, "grad_norm": 0.703718143955937, "learning_rate": 1.639124087591241e-05, "loss": 0.6553, "step": 8988 }, { "epoch": 0.2624448921199381, "grad_norm": 0.6613052528435042, "learning_rate": 1.639059205190592e-05, "loss": 0.5417, "step": 8989 }, { "epoch": 0.26247408834778546, "grad_norm": 0.601089338330462, "learning_rate": 1.6389943227899434e-05, "loss": 0.5238, "step": 8990 }, { "epoch": 0.2625032845756328, "grad_norm": 0.6694531285157077, "learning_rate": 1.6389294403892946e-05, "loss": 0.6594, "step": 8991 }, { "epoch": 0.2625324808034802, "grad_norm": 0.6247014939133414, "learning_rate": 1.6388645579886458e-05, "loss": 0.5966, "step": 8992 }, { "epoch": 0.26256167703132754, "grad_norm": 0.6808933211369645, "learning_rate": 1.638799675587997e-05, "loss": 0.7091, "step": 8993 }, { "epoch": 0.2625908732591749, "grad_norm": 0.6807939281448679, "learning_rate": 1.638734793187348e-05, "loss": 0.6464, "step": 8994 }, { "epoch": 0.26262006948702227, "grad_norm": 0.6442553853506715, "learning_rate": 1.6386699107866994e-05, "loss": 0.6452, "step": 8995 }, { "epoch": 0.26264926571486963, "grad_norm": 0.6786718603628666, "learning_rate": 1.6386050283860502e-05, "loss": 0.6794, "step": 8996 }, { "epoch": 0.262678461942717, "grad_norm": 0.6626242398724788, "learning_rate": 1.6385401459854014e-05, "loss": 0.6601, "step": 8997 }, { "epoch": 0.26270765817056435, "grad_norm": 0.6636729412426715, "learning_rate": 1.6384752635847526e-05, "loss": 0.6244, "step": 8998 }, { "epoch": 0.2627368543984117, "grad_norm": 0.6800139380994928, "learning_rate": 1.638410381184104e-05, "loss": 0.7137, "step": 8999 }, { "epoch": 0.2627660506262591, "grad_norm": 0.6346916249366307, "learning_rate": 1.638345498783455e-05, "loss": 0.5991, "step": 9000 }, { "epoch": 0.26279524685410643, "grad_norm": 0.716525435817693, "learning_rate": 1.6382806163828062e-05, "loss": 0.6743, "step": 9001 }, { "epoch": 0.2628244430819538, "grad_norm": 0.63598937854189, "learning_rate": 1.6382157339821575e-05, "loss": 0.5427, "step": 9002 }, { "epoch": 0.26285363930980116, "grad_norm": 0.662973995290861, "learning_rate": 1.6381508515815087e-05, "loss": 0.6615, "step": 9003 }, { "epoch": 0.2628828355376485, "grad_norm": 0.6781860855545242, "learning_rate": 1.63808596918086e-05, "loss": 0.6183, "step": 9004 }, { "epoch": 0.2629120317654959, "grad_norm": 0.5805362499739593, "learning_rate": 1.638021086780211e-05, "loss": 0.5481, "step": 9005 }, { "epoch": 0.26294122799334324, "grad_norm": 0.6368622080176455, "learning_rate": 1.6379562043795623e-05, "loss": 0.593, "step": 9006 }, { "epoch": 0.2629704242211906, "grad_norm": 0.6419527216285013, "learning_rate": 1.6378913219789135e-05, "loss": 0.5988, "step": 9007 }, { "epoch": 0.26299962044903796, "grad_norm": 0.7738619827335947, "learning_rate": 1.6378264395782647e-05, "loss": 0.5501, "step": 9008 }, { "epoch": 0.2630288166768853, "grad_norm": 0.699170647086527, "learning_rate": 1.637761557177616e-05, "loss": 0.6621, "step": 9009 }, { "epoch": 0.2630580129047327, "grad_norm": 0.620561867629861, "learning_rate": 1.637696674776967e-05, "loss": 0.547, "step": 9010 }, { "epoch": 0.26308720913258005, "grad_norm": 0.6970647188968452, "learning_rate": 1.637631792376318e-05, "loss": 0.6157, "step": 9011 }, { "epoch": 0.2631164053604274, "grad_norm": 0.6655266529911502, "learning_rate": 1.637566909975669e-05, "loss": 0.6459, "step": 9012 }, { "epoch": 0.26314560158827477, "grad_norm": 0.594858664694838, "learning_rate": 1.6375020275750203e-05, "loss": 0.5322, "step": 9013 }, { "epoch": 0.26317479781612213, "grad_norm": 0.6176688738836926, "learning_rate": 1.6374371451743715e-05, "loss": 0.5456, "step": 9014 }, { "epoch": 0.2632039940439695, "grad_norm": 0.6807477232144402, "learning_rate": 1.6373722627737227e-05, "loss": 0.5763, "step": 9015 }, { "epoch": 0.2632331902718169, "grad_norm": 0.6692280350740272, "learning_rate": 1.637307380373074e-05, "loss": 0.6558, "step": 9016 }, { "epoch": 0.26326238649966427, "grad_norm": 0.7125381154756651, "learning_rate": 1.637242497972425e-05, "loss": 0.7277, "step": 9017 }, { "epoch": 0.26329158272751163, "grad_norm": 0.7009700477600179, "learning_rate": 1.6371776155717764e-05, "loss": 0.6626, "step": 9018 }, { "epoch": 0.263320778955359, "grad_norm": 0.7241324785913107, "learning_rate": 1.6371127331711276e-05, "loss": 0.7406, "step": 9019 }, { "epoch": 0.26334997518320635, "grad_norm": 0.7219541616614946, "learning_rate": 1.6370478507704784e-05, "loss": 0.7431, "step": 9020 }, { "epoch": 0.2633791714110537, "grad_norm": 0.672613368897954, "learning_rate": 1.6369829683698296e-05, "loss": 0.6598, "step": 9021 }, { "epoch": 0.2634083676389011, "grad_norm": 0.7667382440283218, "learning_rate": 1.6369180859691808e-05, "loss": 0.6796, "step": 9022 }, { "epoch": 0.26343756386674844, "grad_norm": 0.6962755781063674, "learning_rate": 1.6368532035685324e-05, "loss": 0.6839, "step": 9023 }, { "epoch": 0.2634667600945958, "grad_norm": 0.6362131085489409, "learning_rate": 1.6367883211678836e-05, "loss": 0.5608, "step": 9024 }, { "epoch": 0.26349595632244316, "grad_norm": 0.6645892347766037, "learning_rate": 1.6367234387672348e-05, "loss": 0.6321, "step": 9025 }, { "epoch": 0.2635251525502905, "grad_norm": 0.6666518508500211, "learning_rate": 1.6366585563665856e-05, "loss": 0.6222, "step": 9026 }, { "epoch": 0.2635543487781379, "grad_norm": 0.685957450837711, "learning_rate": 1.636593673965937e-05, "loss": 0.7133, "step": 9027 }, { "epoch": 0.26358354500598524, "grad_norm": 0.6733231138718845, "learning_rate": 1.636528791565288e-05, "loss": 0.5738, "step": 9028 }, { "epoch": 0.2636127412338326, "grad_norm": 0.6027237677910994, "learning_rate": 1.6364639091646392e-05, "loss": 0.5478, "step": 9029 }, { "epoch": 0.26364193746167996, "grad_norm": 0.6369283187909922, "learning_rate": 1.6363990267639904e-05, "loss": 0.5849, "step": 9030 }, { "epoch": 0.2636711336895273, "grad_norm": 0.7276719701611453, "learning_rate": 1.6363341443633417e-05, "loss": 0.7638, "step": 9031 }, { "epoch": 0.2637003299173747, "grad_norm": 0.6290101803469587, "learning_rate": 1.636269261962693e-05, "loss": 0.5457, "step": 9032 }, { "epoch": 0.26372952614522205, "grad_norm": 0.6344919730206746, "learning_rate": 1.636204379562044e-05, "loss": 0.6445, "step": 9033 }, { "epoch": 0.2637587223730694, "grad_norm": 0.6783850840227645, "learning_rate": 1.636139497161395e-05, "loss": 0.6466, "step": 9034 }, { "epoch": 0.26378791860091677, "grad_norm": 0.6441824568006554, "learning_rate": 1.636074614760746e-05, "loss": 0.5925, "step": 9035 }, { "epoch": 0.26381711482876413, "grad_norm": 0.6296321484601751, "learning_rate": 1.6360097323600973e-05, "loss": 0.5674, "step": 9036 }, { "epoch": 0.2638463110566115, "grad_norm": 0.6716638071232159, "learning_rate": 1.6359448499594485e-05, "loss": 0.6339, "step": 9037 }, { "epoch": 0.26387550728445885, "grad_norm": 0.668170382553254, "learning_rate": 1.6358799675587997e-05, "loss": 0.624, "step": 9038 }, { "epoch": 0.2639047035123062, "grad_norm": 0.6955761359510827, "learning_rate": 1.635815085158151e-05, "loss": 0.6627, "step": 9039 }, { "epoch": 0.2639338997401536, "grad_norm": 0.6863067070117082, "learning_rate": 1.635750202757502e-05, "loss": 0.6483, "step": 9040 }, { "epoch": 0.26396309596800094, "grad_norm": 0.6583626474760017, "learning_rate": 1.6356853203568533e-05, "loss": 0.5906, "step": 9041 }, { "epoch": 0.2639922921958483, "grad_norm": 0.6257619378371823, "learning_rate": 1.6356204379562045e-05, "loss": 0.5756, "step": 9042 }, { "epoch": 0.26402148842369566, "grad_norm": 0.7682473589114553, "learning_rate": 1.6355555555555557e-05, "loss": 0.6691, "step": 9043 }, { "epoch": 0.264050684651543, "grad_norm": 0.609533597162195, "learning_rate": 1.635490673154907e-05, "loss": 0.5628, "step": 9044 }, { "epoch": 0.2640798808793904, "grad_norm": 0.6192887693196906, "learning_rate": 1.635425790754258e-05, "loss": 0.5604, "step": 9045 }, { "epoch": 0.26410907710723774, "grad_norm": 0.6781886483508281, "learning_rate": 1.6353609083536094e-05, "loss": 0.5992, "step": 9046 }, { "epoch": 0.2641382733350851, "grad_norm": 0.6476834187031395, "learning_rate": 1.6352960259529606e-05, "loss": 0.5915, "step": 9047 }, { "epoch": 0.26416746956293247, "grad_norm": 0.6965185199762711, "learning_rate": 1.6352311435523118e-05, "loss": 0.6186, "step": 9048 }, { "epoch": 0.2641966657907798, "grad_norm": 0.6618424433417073, "learning_rate": 1.6351662611516626e-05, "loss": 0.5901, "step": 9049 }, { "epoch": 0.2642258620186272, "grad_norm": 0.6664963848017711, "learning_rate": 1.6351013787510138e-05, "loss": 0.6315, "step": 9050 }, { "epoch": 0.26425505824647455, "grad_norm": 0.7478599139579518, "learning_rate": 1.635036496350365e-05, "loss": 0.6643, "step": 9051 }, { "epoch": 0.2642842544743219, "grad_norm": 0.7121915318047851, "learning_rate": 1.6349716139497162e-05, "loss": 0.7208, "step": 9052 }, { "epoch": 0.2643134507021693, "grad_norm": 0.6650791724931377, "learning_rate": 1.6349067315490674e-05, "loss": 0.6656, "step": 9053 }, { "epoch": 0.26434264693001663, "grad_norm": 0.7342345611377594, "learning_rate": 1.6348418491484186e-05, "loss": 0.684, "step": 9054 }, { "epoch": 0.264371843157864, "grad_norm": 0.6265928879726596, "learning_rate": 1.63477696674777e-05, "loss": 0.5656, "step": 9055 }, { "epoch": 0.26440103938571136, "grad_norm": 0.6607070740545785, "learning_rate": 1.634712084347121e-05, "loss": 0.6499, "step": 9056 }, { "epoch": 0.2644302356135587, "grad_norm": 0.6361352301428183, "learning_rate": 1.6346472019464722e-05, "loss": 0.6193, "step": 9057 }, { "epoch": 0.2644594318414061, "grad_norm": 0.6608649858180394, "learning_rate": 1.634582319545823e-05, "loss": 0.5461, "step": 9058 }, { "epoch": 0.26448862806925344, "grad_norm": 0.6043001648417867, "learning_rate": 1.6345174371451743e-05, "loss": 0.5695, "step": 9059 }, { "epoch": 0.2645178242971008, "grad_norm": 0.6911992889474319, "learning_rate": 1.6344525547445255e-05, "loss": 0.6837, "step": 9060 }, { "epoch": 0.26454702052494816, "grad_norm": 0.6489983617792006, "learning_rate": 1.634387672343877e-05, "loss": 0.6671, "step": 9061 }, { "epoch": 0.2645762167527955, "grad_norm": 0.6771719450389928, "learning_rate": 1.6343227899432283e-05, "loss": 0.6912, "step": 9062 }, { "epoch": 0.2646054129806429, "grad_norm": 0.7212619219140323, "learning_rate": 1.6342579075425795e-05, "loss": 0.6378, "step": 9063 }, { "epoch": 0.26463460920849025, "grad_norm": 0.6994985348824205, "learning_rate": 1.6341930251419303e-05, "loss": 0.6871, "step": 9064 }, { "epoch": 0.2646638054363376, "grad_norm": 1.0854272867807748, "learning_rate": 1.6341281427412815e-05, "loss": 0.7126, "step": 9065 }, { "epoch": 0.26469300166418497, "grad_norm": 0.6374897239716951, "learning_rate": 1.6340632603406327e-05, "loss": 0.6531, "step": 9066 }, { "epoch": 0.26472219789203233, "grad_norm": 0.681958855045775, "learning_rate": 1.633998377939984e-05, "loss": 0.6554, "step": 9067 }, { "epoch": 0.2647513941198797, "grad_norm": 0.6888750933952044, "learning_rate": 1.633933495539335e-05, "loss": 0.7232, "step": 9068 }, { "epoch": 0.26478059034772705, "grad_norm": 0.7329573951453207, "learning_rate": 1.6338686131386863e-05, "loss": 0.6287, "step": 9069 }, { "epoch": 0.2648097865755744, "grad_norm": 0.6585791561849396, "learning_rate": 1.6338037307380375e-05, "loss": 0.6044, "step": 9070 }, { "epoch": 0.2648389828034218, "grad_norm": 0.7047495884378242, "learning_rate": 1.6337388483373887e-05, "loss": 0.6369, "step": 9071 }, { "epoch": 0.26486817903126914, "grad_norm": 0.6528912103958215, "learning_rate": 1.6336739659367396e-05, "loss": 0.6483, "step": 9072 }, { "epoch": 0.2648973752591165, "grad_norm": 0.6343818189794009, "learning_rate": 1.6336090835360908e-05, "loss": 0.5742, "step": 9073 }, { "epoch": 0.26492657148696386, "grad_norm": 0.679666742294596, "learning_rate": 1.633544201135442e-05, "loss": 0.6108, "step": 9074 }, { "epoch": 0.2649557677148112, "grad_norm": 0.8529725572652304, "learning_rate": 1.6334793187347932e-05, "loss": 0.7079, "step": 9075 }, { "epoch": 0.26498496394265864, "grad_norm": 0.666970738373614, "learning_rate": 1.6334144363341444e-05, "loss": 0.6542, "step": 9076 }, { "epoch": 0.265014160170506, "grad_norm": 0.6720216221872386, "learning_rate": 1.6333495539334956e-05, "loss": 0.7003, "step": 9077 }, { "epoch": 0.26504335639835336, "grad_norm": 0.6711471049087843, "learning_rate": 1.6332846715328468e-05, "loss": 0.6863, "step": 9078 }, { "epoch": 0.2650725526262007, "grad_norm": 0.7413489035054572, "learning_rate": 1.633219789132198e-05, "loss": 0.7842, "step": 9079 }, { "epoch": 0.2651017488540481, "grad_norm": 0.6510703786088842, "learning_rate": 1.6331549067315492e-05, "loss": 0.647, "step": 9080 }, { "epoch": 0.26513094508189544, "grad_norm": 0.6756693482683616, "learning_rate": 1.6330900243309004e-05, "loss": 0.6356, "step": 9081 }, { "epoch": 0.2651601413097428, "grad_norm": 0.6740146771311742, "learning_rate": 1.6330251419302516e-05, "loss": 0.6632, "step": 9082 }, { "epoch": 0.26518933753759016, "grad_norm": 0.6290615685561769, "learning_rate": 1.632960259529603e-05, "loss": 0.5961, "step": 9083 }, { "epoch": 0.2652185337654375, "grad_norm": 0.6500754944249095, "learning_rate": 1.632895377128954e-05, "loss": 0.587, "step": 9084 }, { "epoch": 0.2652477299932849, "grad_norm": 0.6124099077668826, "learning_rate": 1.6328304947283052e-05, "loss": 0.565, "step": 9085 }, { "epoch": 0.26527692622113225, "grad_norm": 0.5957108085899756, "learning_rate": 1.6327656123276564e-05, "loss": 0.5693, "step": 9086 }, { "epoch": 0.2653061224489796, "grad_norm": 0.6125794040487, "learning_rate": 1.6327007299270073e-05, "loss": 0.5848, "step": 9087 }, { "epoch": 0.26533531867682697, "grad_norm": 0.6748495770924382, "learning_rate": 1.6326358475263585e-05, "loss": 0.6582, "step": 9088 }, { "epoch": 0.26536451490467433, "grad_norm": 0.6788567563901219, "learning_rate": 1.6325709651257097e-05, "loss": 0.611, "step": 9089 }, { "epoch": 0.2653937111325217, "grad_norm": 0.6802019734370045, "learning_rate": 1.632506082725061e-05, "loss": 0.65, "step": 9090 }, { "epoch": 0.26542290736036905, "grad_norm": 0.6994317984660594, "learning_rate": 1.632441200324412e-05, "loss": 0.6925, "step": 9091 }, { "epoch": 0.2654521035882164, "grad_norm": 0.6403221755799631, "learning_rate": 1.6323763179237633e-05, "loss": 0.6074, "step": 9092 }, { "epoch": 0.2654812998160638, "grad_norm": 0.7100199725620735, "learning_rate": 1.6323114355231145e-05, "loss": 0.6799, "step": 9093 }, { "epoch": 0.26551049604391114, "grad_norm": 0.6666381229418512, "learning_rate": 1.6322465531224657e-05, "loss": 0.69, "step": 9094 }, { "epoch": 0.2655396922717585, "grad_norm": 0.6667259844295066, "learning_rate": 1.632181670721817e-05, "loss": 0.5942, "step": 9095 }, { "epoch": 0.26556888849960586, "grad_norm": 0.6935772627282148, "learning_rate": 1.6321167883211678e-05, "loss": 0.7044, "step": 9096 }, { "epoch": 0.2655980847274532, "grad_norm": 0.6083710946126792, "learning_rate": 1.632051905920519e-05, "loss": 0.5938, "step": 9097 }, { "epoch": 0.2656272809553006, "grad_norm": 0.7296831773826562, "learning_rate": 1.6319870235198702e-05, "loss": 0.7743, "step": 9098 }, { "epoch": 0.26565647718314794, "grad_norm": 0.7100563353882126, "learning_rate": 1.6319221411192217e-05, "loss": 0.7061, "step": 9099 }, { "epoch": 0.2656856734109953, "grad_norm": 0.6366639845676445, "learning_rate": 1.631857258718573e-05, "loss": 0.6008, "step": 9100 }, { "epoch": 0.26571486963884267, "grad_norm": 0.7367841488079975, "learning_rate": 1.6317923763179238e-05, "loss": 0.6645, "step": 9101 }, { "epoch": 0.26574406586669, "grad_norm": 0.6402986446829178, "learning_rate": 1.631727493917275e-05, "loss": 0.5657, "step": 9102 }, { "epoch": 0.2657732620945374, "grad_norm": 0.6340748093747952, "learning_rate": 1.6316626115166262e-05, "loss": 0.5898, "step": 9103 }, { "epoch": 0.26580245832238475, "grad_norm": 0.6791382326623896, "learning_rate": 1.6315977291159774e-05, "loss": 0.6374, "step": 9104 }, { "epoch": 0.2658316545502321, "grad_norm": 0.7178755483478472, "learning_rate": 1.6315328467153286e-05, "loss": 0.7454, "step": 9105 }, { "epoch": 0.26586085077807947, "grad_norm": 0.6526792937929696, "learning_rate": 1.6314679643146798e-05, "loss": 0.6051, "step": 9106 }, { "epoch": 0.26589004700592683, "grad_norm": 0.6720213144035164, "learning_rate": 1.631403081914031e-05, "loss": 0.6232, "step": 9107 }, { "epoch": 0.2659192432337742, "grad_norm": 0.6939251232330319, "learning_rate": 1.6313381995133822e-05, "loss": 0.6619, "step": 9108 }, { "epoch": 0.26594843946162156, "grad_norm": 0.6888632568956332, "learning_rate": 1.6312733171127334e-05, "loss": 0.593, "step": 9109 }, { "epoch": 0.2659776356894689, "grad_norm": 0.6817956143331482, "learning_rate": 1.6312084347120843e-05, "loss": 0.6799, "step": 9110 }, { "epoch": 0.2660068319173163, "grad_norm": 0.6198370570238878, "learning_rate": 1.6311435523114355e-05, "loss": 0.5505, "step": 9111 }, { "epoch": 0.26603602814516364, "grad_norm": 0.6582894082661931, "learning_rate": 1.6310786699107867e-05, "loss": 0.6436, "step": 9112 }, { "epoch": 0.266065224373011, "grad_norm": 0.6287801477879627, "learning_rate": 1.631013787510138e-05, "loss": 0.6099, "step": 9113 }, { "epoch": 0.26609442060085836, "grad_norm": 0.6534603690132481, "learning_rate": 1.630948905109489e-05, "loss": 0.6701, "step": 9114 }, { "epoch": 0.2661236168287057, "grad_norm": 0.7370237938459575, "learning_rate": 1.6308840227088403e-05, "loss": 0.6853, "step": 9115 }, { "epoch": 0.2661528130565531, "grad_norm": 0.6479214194076094, "learning_rate": 1.6308191403081915e-05, "loss": 0.6212, "step": 9116 }, { "epoch": 0.26618200928440044, "grad_norm": 0.7608115296300999, "learning_rate": 1.6307542579075427e-05, "loss": 0.6105, "step": 9117 }, { "epoch": 0.2662112055122478, "grad_norm": 0.7326684684975332, "learning_rate": 1.630689375506894e-05, "loss": 0.6993, "step": 9118 }, { "epoch": 0.26624040174009517, "grad_norm": 0.7139844739444324, "learning_rate": 1.630624493106245e-05, "loss": 0.6191, "step": 9119 }, { "epoch": 0.26626959796794253, "grad_norm": 0.6884725129717285, "learning_rate": 1.6305596107055963e-05, "loss": 0.6259, "step": 9120 }, { "epoch": 0.2662987941957899, "grad_norm": 0.6802018680686353, "learning_rate": 1.6304947283049475e-05, "loss": 0.6816, "step": 9121 }, { "epoch": 0.26632799042363725, "grad_norm": 0.6499087663973527, "learning_rate": 1.6304298459042987e-05, "loss": 0.6273, "step": 9122 }, { "epoch": 0.2663571866514846, "grad_norm": 0.644679597948256, "learning_rate": 1.63036496350365e-05, "loss": 0.6466, "step": 9123 }, { "epoch": 0.266386382879332, "grad_norm": 0.6686026028452398, "learning_rate": 1.630300081103001e-05, "loss": 0.6165, "step": 9124 }, { "epoch": 0.26641557910717933, "grad_norm": 0.6948214572006616, "learning_rate": 1.630235198702352e-05, "loss": 0.7304, "step": 9125 }, { "epoch": 0.2664447753350267, "grad_norm": 0.6888531541512184, "learning_rate": 1.6301703163017032e-05, "loss": 0.6216, "step": 9126 }, { "epoch": 0.26647397156287406, "grad_norm": 0.6613474036766558, "learning_rate": 1.6301054339010544e-05, "loss": 0.5961, "step": 9127 }, { "epoch": 0.2665031677907214, "grad_norm": 0.6624383095521048, "learning_rate": 1.6300405515004056e-05, "loss": 0.5808, "step": 9128 }, { "epoch": 0.2665323640185688, "grad_norm": 0.6197797468115144, "learning_rate": 1.6299756690997568e-05, "loss": 0.5859, "step": 9129 }, { "epoch": 0.26656156024641614, "grad_norm": 0.8017797761758986, "learning_rate": 1.629910786699108e-05, "loss": 0.6375, "step": 9130 }, { "epoch": 0.2665907564742635, "grad_norm": 0.6503768088711437, "learning_rate": 1.6298459042984592e-05, "loss": 0.6177, "step": 9131 }, { "epoch": 0.26661995270211086, "grad_norm": 0.6602179601985126, "learning_rate": 1.6297810218978104e-05, "loss": 0.6772, "step": 9132 }, { "epoch": 0.2666491489299582, "grad_norm": 0.7231329708569819, "learning_rate": 1.6297161394971616e-05, "loss": 0.6228, "step": 9133 }, { "epoch": 0.2666783451578056, "grad_norm": 0.6533436006049479, "learning_rate": 1.6296512570965125e-05, "loss": 0.623, "step": 9134 }, { "epoch": 0.26670754138565295, "grad_norm": 0.6491332568682362, "learning_rate": 1.6295863746958637e-05, "loss": 0.6357, "step": 9135 }, { "epoch": 0.26673673761350036, "grad_norm": 0.6723563758330916, "learning_rate": 1.629521492295215e-05, "loss": 0.6275, "step": 9136 }, { "epoch": 0.2667659338413477, "grad_norm": 0.6716062047620373, "learning_rate": 1.6294566098945664e-05, "loss": 0.6532, "step": 9137 }, { "epoch": 0.2667951300691951, "grad_norm": 0.6122993062541848, "learning_rate": 1.6293917274939176e-05, "loss": 0.6053, "step": 9138 }, { "epoch": 0.26682432629704245, "grad_norm": 0.680295614488018, "learning_rate": 1.6293268450932685e-05, "loss": 0.6764, "step": 9139 }, { "epoch": 0.2668535225248898, "grad_norm": 0.7203620157933249, "learning_rate": 1.6292619626926197e-05, "loss": 0.6265, "step": 9140 }, { "epoch": 0.26688271875273717, "grad_norm": 0.702030337289033, "learning_rate": 1.629197080291971e-05, "loss": 0.6328, "step": 9141 }, { "epoch": 0.26691191498058453, "grad_norm": 0.6636008889489089, "learning_rate": 1.629132197891322e-05, "loss": 0.6515, "step": 9142 }, { "epoch": 0.2669411112084319, "grad_norm": 0.6868618389840841, "learning_rate": 1.6290673154906733e-05, "loss": 0.6388, "step": 9143 }, { "epoch": 0.26697030743627925, "grad_norm": 0.688771964389305, "learning_rate": 1.6290024330900245e-05, "loss": 0.6676, "step": 9144 }, { "epoch": 0.2669995036641266, "grad_norm": 0.6643139167434955, "learning_rate": 1.6289375506893757e-05, "loss": 0.6487, "step": 9145 }, { "epoch": 0.267028699891974, "grad_norm": 0.6636465613603186, "learning_rate": 1.628872668288727e-05, "loss": 0.6483, "step": 9146 }, { "epoch": 0.26705789611982134, "grad_norm": 0.6350380177311163, "learning_rate": 1.628807785888078e-05, "loss": 0.6037, "step": 9147 }, { "epoch": 0.2670870923476687, "grad_norm": 0.6718507659730649, "learning_rate": 1.628742903487429e-05, "loss": 0.6421, "step": 9148 }, { "epoch": 0.26711628857551606, "grad_norm": 0.6137326149409308, "learning_rate": 1.6286780210867802e-05, "loss": 0.5749, "step": 9149 }, { "epoch": 0.2671454848033634, "grad_norm": 0.6707166435999201, "learning_rate": 1.6286131386861314e-05, "loss": 0.6545, "step": 9150 }, { "epoch": 0.2671746810312108, "grad_norm": 0.979208496599938, "learning_rate": 1.6285482562854826e-05, "loss": 0.6363, "step": 9151 }, { "epoch": 0.26720387725905814, "grad_norm": 0.6939448363868305, "learning_rate": 1.6284833738848338e-05, "loss": 0.7176, "step": 9152 }, { "epoch": 0.2672330734869055, "grad_norm": 0.7662646521999726, "learning_rate": 1.628418491484185e-05, "loss": 0.6844, "step": 9153 }, { "epoch": 0.26726226971475286, "grad_norm": 0.662946483595123, "learning_rate": 1.6283536090835362e-05, "loss": 0.6507, "step": 9154 }, { "epoch": 0.2672914659426002, "grad_norm": 0.695959220085246, "learning_rate": 1.6282887266828874e-05, "loss": 0.7351, "step": 9155 }, { "epoch": 0.2673206621704476, "grad_norm": 0.6880027871653839, "learning_rate": 1.6282238442822386e-05, "loss": 0.7276, "step": 9156 }, { "epoch": 0.26734985839829495, "grad_norm": 0.615376875960101, "learning_rate": 1.6281589618815898e-05, "loss": 0.5513, "step": 9157 }, { "epoch": 0.2673790546261423, "grad_norm": 0.6624406053360969, "learning_rate": 1.628094079480941e-05, "loss": 0.6413, "step": 9158 }, { "epoch": 0.26740825085398967, "grad_norm": 0.6916789581501568, "learning_rate": 1.6280291970802922e-05, "loss": 0.6976, "step": 9159 }, { "epoch": 0.26743744708183703, "grad_norm": 0.7057622566892495, "learning_rate": 1.6279643146796434e-05, "loss": 0.697, "step": 9160 }, { "epoch": 0.2674666433096844, "grad_norm": 0.6091386226852569, "learning_rate": 1.6278994322789946e-05, "loss": 0.5671, "step": 9161 }, { "epoch": 0.26749583953753175, "grad_norm": 0.7302044510235804, "learning_rate": 1.6278345498783458e-05, "loss": 0.5944, "step": 9162 }, { "epoch": 0.2675250357653791, "grad_norm": 0.6826539840651338, "learning_rate": 1.6277696674776967e-05, "loss": 0.6505, "step": 9163 }, { "epoch": 0.2675542319932265, "grad_norm": 0.6391351671602297, "learning_rate": 1.627704785077048e-05, "loss": 0.617, "step": 9164 }, { "epoch": 0.26758342822107384, "grad_norm": 0.692676536274711, "learning_rate": 1.627639902676399e-05, "loss": 0.6719, "step": 9165 }, { "epoch": 0.2676126244489212, "grad_norm": 0.6992626402447729, "learning_rate": 1.6275750202757503e-05, "loss": 0.6787, "step": 9166 }, { "epoch": 0.26764182067676856, "grad_norm": 0.6640046495963394, "learning_rate": 1.6275101378751015e-05, "loss": 0.6534, "step": 9167 }, { "epoch": 0.2676710169046159, "grad_norm": 0.7250489016461662, "learning_rate": 1.6274452554744527e-05, "loss": 0.6948, "step": 9168 }, { "epoch": 0.2677002131324633, "grad_norm": 0.5908121738788938, "learning_rate": 1.627380373073804e-05, "loss": 0.5414, "step": 9169 }, { "epoch": 0.26772940936031064, "grad_norm": 0.6833526997778027, "learning_rate": 1.627315490673155e-05, "loss": 0.6118, "step": 9170 }, { "epoch": 0.267758605588158, "grad_norm": 0.6768681857043237, "learning_rate": 1.6272506082725063e-05, "loss": 0.6821, "step": 9171 }, { "epoch": 0.26778780181600537, "grad_norm": 0.6872699196354538, "learning_rate": 1.627185725871857e-05, "loss": 0.6128, "step": 9172 }, { "epoch": 0.2678169980438527, "grad_norm": 0.6963979658702574, "learning_rate": 1.6271208434712084e-05, "loss": 0.6626, "step": 9173 }, { "epoch": 0.2678461942717001, "grad_norm": 0.7229096095036999, "learning_rate": 1.62705596107056e-05, "loss": 0.6755, "step": 9174 }, { "epoch": 0.26787539049954745, "grad_norm": 0.6892137307250984, "learning_rate": 1.626991078669911e-05, "loss": 0.7004, "step": 9175 }, { "epoch": 0.2679045867273948, "grad_norm": 0.7032272372523478, "learning_rate": 1.6269261962692623e-05, "loss": 0.6352, "step": 9176 }, { "epoch": 0.2679337829552422, "grad_norm": 0.668629115889962, "learning_rate": 1.6268613138686132e-05, "loss": 0.6801, "step": 9177 }, { "epoch": 0.26796297918308953, "grad_norm": 0.6313253881398769, "learning_rate": 1.6267964314679644e-05, "loss": 0.5454, "step": 9178 }, { "epoch": 0.2679921754109369, "grad_norm": 0.7365781111587874, "learning_rate": 1.6267315490673156e-05, "loss": 0.7561, "step": 9179 }, { "epoch": 0.26802137163878426, "grad_norm": 0.6482033617117009, "learning_rate": 1.6266666666666668e-05, "loss": 0.5434, "step": 9180 }, { "epoch": 0.2680505678666316, "grad_norm": 0.6463813463311929, "learning_rate": 1.626601784266018e-05, "loss": 0.6089, "step": 9181 }, { "epoch": 0.268079764094479, "grad_norm": 0.6587908257912007, "learning_rate": 1.6265369018653692e-05, "loss": 0.6168, "step": 9182 }, { "epoch": 0.26810896032232634, "grad_norm": 0.6819096695433121, "learning_rate": 1.6264720194647204e-05, "loss": 0.6882, "step": 9183 }, { "epoch": 0.2681381565501737, "grad_norm": 0.6422085583897502, "learning_rate": 1.6264071370640716e-05, "loss": 0.5672, "step": 9184 }, { "epoch": 0.26816735277802106, "grad_norm": 0.6635894942399517, "learning_rate": 1.6263422546634228e-05, "loss": 0.62, "step": 9185 }, { "epoch": 0.2681965490058684, "grad_norm": 0.6367270910846154, "learning_rate": 1.6262773722627737e-05, "loss": 0.5707, "step": 9186 }, { "epoch": 0.2682257452337158, "grad_norm": 0.6073360847670732, "learning_rate": 1.626212489862125e-05, "loss": 0.5443, "step": 9187 }, { "epoch": 0.26825494146156315, "grad_norm": 0.7271753445242848, "learning_rate": 1.626147607461476e-05, "loss": 0.6958, "step": 9188 }, { "epoch": 0.2682841376894105, "grad_norm": 0.6678316006046436, "learning_rate": 1.6260827250608273e-05, "loss": 0.605, "step": 9189 }, { "epoch": 0.26831333391725787, "grad_norm": 0.803622812618571, "learning_rate": 1.6260178426601785e-05, "loss": 0.7521, "step": 9190 }, { "epoch": 0.26834253014510523, "grad_norm": 0.6975235755549493, "learning_rate": 1.6259529602595297e-05, "loss": 0.698, "step": 9191 }, { "epoch": 0.2683717263729526, "grad_norm": 0.6473215946638684, "learning_rate": 1.625888077858881e-05, "loss": 0.622, "step": 9192 }, { "epoch": 0.26840092260079995, "grad_norm": 0.6597005060675031, "learning_rate": 1.625823195458232e-05, "loss": 0.6267, "step": 9193 }, { "epoch": 0.2684301188286473, "grad_norm": 0.6293825929629372, "learning_rate": 1.6257583130575833e-05, "loss": 0.6206, "step": 9194 }, { "epoch": 0.2684593150564947, "grad_norm": 0.6485679039990391, "learning_rate": 1.6256934306569345e-05, "loss": 0.6102, "step": 9195 }, { "epoch": 0.2684885112843421, "grad_norm": 0.696919264670649, "learning_rate": 1.6256285482562857e-05, "loss": 0.6978, "step": 9196 }, { "epoch": 0.26851770751218945, "grad_norm": 0.6425493550445207, "learning_rate": 1.625563665855637e-05, "loss": 0.58, "step": 9197 }, { "epoch": 0.2685469037400368, "grad_norm": 0.6865431788393774, "learning_rate": 1.625498783454988e-05, "loss": 0.6428, "step": 9198 }, { "epoch": 0.2685760999678842, "grad_norm": 0.6177611289635407, "learning_rate": 1.6254339010543393e-05, "loss": 0.5972, "step": 9199 }, { "epoch": 0.26860529619573154, "grad_norm": 0.6689601296420656, "learning_rate": 1.6253690186536905e-05, "loss": 0.6547, "step": 9200 }, { "epoch": 0.2686344924235789, "grad_norm": 0.6920266120474114, "learning_rate": 1.6253041362530414e-05, "loss": 0.7037, "step": 9201 }, { "epoch": 0.26866368865142626, "grad_norm": 0.6260253122380498, "learning_rate": 1.6252392538523926e-05, "loss": 0.5335, "step": 9202 }, { "epoch": 0.2686928848792736, "grad_norm": 0.821668635219953, "learning_rate": 1.6251743714517438e-05, "loss": 0.686, "step": 9203 }, { "epoch": 0.268722081107121, "grad_norm": 0.7565096262545056, "learning_rate": 1.625109489051095e-05, "loss": 0.657, "step": 9204 }, { "epoch": 0.26875127733496834, "grad_norm": 0.635742205503661, "learning_rate": 1.6250446066504462e-05, "loss": 0.5756, "step": 9205 }, { "epoch": 0.2687804735628157, "grad_norm": 0.6987585761905926, "learning_rate": 1.6249797242497974e-05, "loss": 0.7066, "step": 9206 }, { "epoch": 0.26880966979066306, "grad_norm": 0.609692963162277, "learning_rate": 1.6249148418491486e-05, "loss": 0.5886, "step": 9207 }, { "epoch": 0.2688388660185104, "grad_norm": 0.6242671288622434, "learning_rate": 1.6248499594484998e-05, "loss": 0.5562, "step": 9208 }, { "epoch": 0.2688680622463578, "grad_norm": 0.6282517910814244, "learning_rate": 1.624785077047851e-05, "loss": 0.5636, "step": 9209 }, { "epoch": 0.26889725847420515, "grad_norm": 0.6488640851251284, "learning_rate": 1.624720194647202e-05, "loss": 0.6447, "step": 9210 }, { "epoch": 0.2689264547020525, "grad_norm": 0.6405004047982314, "learning_rate": 1.624655312246553e-05, "loss": 0.6075, "step": 9211 }, { "epoch": 0.26895565092989987, "grad_norm": 0.7155511121417393, "learning_rate": 1.6245904298459046e-05, "loss": 0.7375, "step": 9212 }, { "epoch": 0.26898484715774723, "grad_norm": 0.6812535615744592, "learning_rate": 1.6245255474452558e-05, "loss": 0.7056, "step": 9213 }, { "epoch": 0.2690140433855946, "grad_norm": 0.73142345105412, "learning_rate": 1.624460665044607e-05, "loss": 0.6759, "step": 9214 }, { "epoch": 0.26904323961344195, "grad_norm": 0.6253264814308043, "learning_rate": 1.624395782643958e-05, "loss": 0.5982, "step": 9215 }, { "epoch": 0.2690724358412893, "grad_norm": 0.6700191525736511, "learning_rate": 1.624330900243309e-05, "loss": 0.6425, "step": 9216 }, { "epoch": 0.2691016320691367, "grad_norm": 0.7172600552992264, "learning_rate": 1.6242660178426603e-05, "loss": 0.7167, "step": 9217 }, { "epoch": 0.26913082829698404, "grad_norm": 0.6535050218297146, "learning_rate": 1.6242011354420115e-05, "loss": 0.6205, "step": 9218 }, { "epoch": 0.2691600245248314, "grad_norm": 0.6634166416036971, "learning_rate": 1.6241362530413627e-05, "loss": 0.6927, "step": 9219 }, { "epoch": 0.26918922075267876, "grad_norm": 0.6193119348616033, "learning_rate": 1.624071370640714e-05, "loss": 0.618, "step": 9220 }, { "epoch": 0.2692184169805261, "grad_norm": 0.6606277392715486, "learning_rate": 1.624006488240065e-05, "loss": 0.6204, "step": 9221 }, { "epoch": 0.2692476132083735, "grad_norm": 0.6519398008873604, "learning_rate": 1.6239416058394163e-05, "loss": 0.6315, "step": 9222 }, { "epoch": 0.26927680943622084, "grad_norm": 0.6158777615541938, "learning_rate": 1.6238767234387675e-05, "loss": 0.5637, "step": 9223 }, { "epoch": 0.2693060056640682, "grad_norm": 0.6873082126392137, "learning_rate": 1.6238118410381184e-05, "loss": 0.634, "step": 9224 }, { "epoch": 0.26933520189191557, "grad_norm": 0.7341051957808168, "learning_rate": 1.6237469586374696e-05, "loss": 0.6975, "step": 9225 }, { "epoch": 0.2693643981197629, "grad_norm": 0.6828847743283563, "learning_rate": 1.6236820762368208e-05, "loss": 0.6262, "step": 9226 }, { "epoch": 0.2693935943476103, "grad_norm": 0.6761459151394742, "learning_rate": 1.623617193836172e-05, "loss": 0.6549, "step": 9227 }, { "epoch": 0.26942279057545765, "grad_norm": 0.675769766779375, "learning_rate": 1.623552311435523e-05, "loss": 0.6099, "step": 9228 }, { "epoch": 0.269451986803305, "grad_norm": 0.6520059899679207, "learning_rate": 1.6234874290348744e-05, "loss": 0.6181, "step": 9229 }, { "epoch": 0.26948118303115237, "grad_norm": 0.712964461222694, "learning_rate": 1.6234225466342256e-05, "loss": 0.6552, "step": 9230 }, { "epoch": 0.26951037925899973, "grad_norm": 0.6815429685367489, "learning_rate": 1.6233576642335768e-05, "loss": 0.6401, "step": 9231 }, { "epoch": 0.2695395754868471, "grad_norm": 0.663685026881616, "learning_rate": 1.623292781832928e-05, "loss": 0.6455, "step": 9232 }, { "epoch": 0.26956877171469446, "grad_norm": 0.6478962676088642, "learning_rate": 1.6232278994322792e-05, "loss": 0.6292, "step": 9233 }, { "epoch": 0.2695979679425418, "grad_norm": 0.650605878366633, "learning_rate": 1.6231630170316304e-05, "loss": 0.5999, "step": 9234 }, { "epoch": 0.2696271641703892, "grad_norm": 0.7230317848612502, "learning_rate": 1.6230981346309816e-05, "loss": 0.7079, "step": 9235 }, { "epoch": 0.26965636039823654, "grad_norm": 0.6999301871230411, "learning_rate": 1.6230332522303328e-05, "loss": 0.6442, "step": 9236 }, { "epoch": 0.2696855566260839, "grad_norm": 0.7611647220232463, "learning_rate": 1.622968369829684e-05, "loss": 0.7767, "step": 9237 }, { "epoch": 0.26971475285393126, "grad_norm": 0.659804025916664, "learning_rate": 1.6229034874290352e-05, "loss": 0.6435, "step": 9238 }, { "epoch": 0.2697439490817786, "grad_norm": 0.6767304760833815, "learning_rate": 1.622838605028386e-05, "loss": 0.6093, "step": 9239 }, { "epoch": 0.269773145309626, "grad_norm": 0.710242526174738, "learning_rate": 1.6227737226277373e-05, "loss": 0.6439, "step": 9240 }, { "epoch": 0.26980234153747334, "grad_norm": 0.7131053074911274, "learning_rate": 1.6227088402270885e-05, "loss": 0.7014, "step": 9241 }, { "epoch": 0.2698315377653207, "grad_norm": 0.684748390507194, "learning_rate": 1.6226439578264397e-05, "loss": 0.6111, "step": 9242 }, { "epoch": 0.26986073399316807, "grad_norm": 0.6679423045512415, "learning_rate": 1.622579075425791e-05, "loss": 0.663, "step": 9243 }, { "epoch": 0.26988993022101543, "grad_norm": 0.6435162617467942, "learning_rate": 1.622514193025142e-05, "loss": 0.6283, "step": 9244 }, { "epoch": 0.2699191264488628, "grad_norm": 0.7066124975074608, "learning_rate": 1.6224493106244933e-05, "loss": 0.6471, "step": 9245 }, { "epoch": 0.26994832267671015, "grad_norm": 0.667586114568704, "learning_rate": 1.6223844282238445e-05, "loss": 0.6363, "step": 9246 }, { "epoch": 0.2699775189045575, "grad_norm": 0.6472720720356497, "learning_rate": 1.6223195458231953e-05, "loss": 0.5949, "step": 9247 }, { "epoch": 0.2700067151324049, "grad_norm": 0.6842757489590702, "learning_rate": 1.6222546634225465e-05, "loss": 0.7031, "step": 9248 }, { "epoch": 0.27003591136025223, "grad_norm": 0.6657055182255838, "learning_rate": 1.6221897810218977e-05, "loss": 0.6523, "step": 9249 }, { "epoch": 0.2700651075880996, "grad_norm": 0.6484203229595933, "learning_rate": 1.6221248986212493e-05, "loss": 0.6458, "step": 9250 }, { "epoch": 0.27009430381594696, "grad_norm": 0.6841498629134909, "learning_rate": 1.6220600162206005e-05, "loss": 0.6738, "step": 9251 }, { "epoch": 0.2701235000437943, "grad_norm": 0.5868896506774095, "learning_rate": 1.6219951338199517e-05, "loss": 0.517, "step": 9252 }, { "epoch": 0.2701526962716417, "grad_norm": 0.6941941847069621, "learning_rate": 1.6219302514193026e-05, "loss": 0.6428, "step": 9253 }, { "epoch": 0.27018189249948904, "grad_norm": 0.6978310264123847, "learning_rate": 1.6218653690186538e-05, "loss": 0.7284, "step": 9254 }, { "epoch": 0.2702110887273364, "grad_norm": 0.6589078510688194, "learning_rate": 1.621800486618005e-05, "loss": 0.6191, "step": 9255 }, { "epoch": 0.27024028495518376, "grad_norm": 0.6152022788039816, "learning_rate": 1.621735604217356e-05, "loss": 0.6096, "step": 9256 }, { "epoch": 0.2702694811830312, "grad_norm": 0.7106523745377604, "learning_rate": 1.6216707218167074e-05, "loss": 0.6445, "step": 9257 }, { "epoch": 0.27029867741087854, "grad_norm": 0.6643624471759518, "learning_rate": 1.6216058394160586e-05, "loss": 0.6316, "step": 9258 }, { "epoch": 0.2703278736387259, "grad_norm": 0.6016727065841253, "learning_rate": 1.6215409570154098e-05, "loss": 0.5739, "step": 9259 }, { "epoch": 0.27035706986657326, "grad_norm": 0.6935161240939376, "learning_rate": 1.621476074614761e-05, "loss": 0.6992, "step": 9260 }, { "epoch": 0.2703862660944206, "grad_norm": 0.6832736513921073, "learning_rate": 1.6214111922141122e-05, "loss": 0.6997, "step": 9261 }, { "epoch": 0.270415462322268, "grad_norm": 0.7157358668583984, "learning_rate": 1.621346309813463e-05, "loss": 0.6687, "step": 9262 }, { "epoch": 0.27044465855011535, "grad_norm": 0.6272490665546355, "learning_rate": 1.6212814274128142e-05, "loss": 0.5832, "step": 9263 }, { "epoch": 0.2704738547779627, "grad_norm": 0.7338071835467271, "learning_rate": 1.6212165450121654e-05, "loss": 0.6565, "step": 9264 }, { "epoch": 0.27050305100581007, "grad_norm": 0.6993547111160449, "learning_rate": 1.6211516626115167e-05, "loss": 0.7048, "step": 9265 }, { "epoch": 0.27053224723365743, "grad_norm": 0.6487785227937146, "learning_rate": 1.621086780210868e-05, "loss": 0.6031, "step": 9266 }, { "epoch": 0.2705614434615048, "grad_norm": 0.6416646769508955, "learning_rate": 1.621021897810219e-05, "loss": 0.637, "step": 9267 }, { "epoch": 0.27059063968935215, "grad_norm": 0.6966199055337505, "learning_rate": 1.6209570154095703e-05, "loss": 0.6762, "step": 9268 }, { "epoch": 0.2706198359171995, "grad_norm": 0.651521083870559, "learning_rate": 1.6208921330089215e-05, "loss": 0.6487, "step": 9269 }, { "epoch": 0.2706490321450469, "grad_norm": 0.6453212735635273, "learning_rate": 1.6208272506082727e-05, "loss": 0.5409, "step": 9270 }, { "epoch": 0.27067822837289424, "grad_norm": 0.6604467260992191, "learning_rate": 1.620762368207624e-05, "loss": 0.6047, "step": 9271 }, { "epoch": 0.2707074246007416, "grad_norm": 0.661680987266105, "learning_rate": 1.620697485806975e-05, "loss": 0.6322, "step": 9272 }, { "epoch": 0.27073662082858896, "grad_norm": 0.632134155871686, "learning_rate": 1.6206326034063263e-05, "loss": 0.5596, "step": 9273 }, { "epoch": 0.2707658170564363, "grad_norm": 0.628263146312821, "learning_rate": 1.6205677210056775e-05, "loss": 0.5725, "step": 9274 }, { "epoch": 0.2707950132842837, "grad_norm": 0.6722014276308838, "learning_rate": 1.6205028386050287e-05, "loss": 0.6072, "step": 9275 }, { "epoch": 0.27082420951213104, "grad_norm": 0.6307420814676843, "learning_rate": 1.62043795620438e-05, "loss": 0.6392, "step": 9276 }, { "epoch": 0.2708534057399784, "grad_norm": 0.6711859139433898, "learning_rate": 1.6203730738037307e-05, "loss": 0.6234, "step": 9277 }, { "epoch": 0.27088260196782576, "grad_norm": 0.6970610243912536, "learning_rate": 1.620308191403082e-05, "loss": 0.6593, "step": 9278 }, { "epoch": 0.2709117981956731, "grad_norm": 0.6437699042311984, "learning_rate": 1.620243309002433e-05, "loss": 0.6249, "step": 9279 }, { "epoch": 0.2709409944235205, "grad_norm": 0.6876853086479192, "learning_rate": 1.6201784266017844e-05, "loss": 0.6756, "step": 9280 }, { "epoch": 0.27097019065136785, "grad_norm": 0.6955230757924548, "learning_rate": 1.6201135442011356e-05, "loss": 0.6055, "step": 9281 }, { "epoch": 0.2709993868792152, "grad_norm": 0.6538993651406435, "learning_rate": 1.6200486618004868e-05, "loss": 0.6337, "step": 9282 }, { "epoch": 0.27102858310706257, "grad_norm": 0.6661112278888804, "learning_rate": 1.619983779399838e-05, "loss": 0.6788, "step": 9283 }, { "epoch": 0.27105777933490993, "grad_norm": 0.6543891773629974, "learning_rate": 1.619918896999189e-05, "loss": 0.6174, "step": 9284 }, { "epoch": 0.2710869755627573, "grad_norm": 0.7431410369787244, "learning_rate": 1.61985401459854e-05, "loss": 0.6431, "step": 9285 }, { "epoch": 0.27111617179060465, "grad_norm": 0.7246678439626404, "learning_rate": 1.6197891321978912e-05, "loss": 0.7239, "step": 9286 }, { "epoch": 0.271145368018452, "grad_norm": 0.6232031526714829, "learning_rate": 1.6197242497972424e-05, "loss": 0.5792, "step": 9287 }, { "epoch": 0.2711745642462994, "grad_norm": 0.7024404412377823, "learning_rate": 1.619659367396594e-05, "loss": 0.6478, "step": 9288 }, { "epoch": 0.27120376047414674, "grad_norm": 0.6710774612692098, "learning_rate": 1.6195944849959452e-05, "loss": 0.5995, "step": 9289 }, { "epoch": 0.2712329567019941, "grad_norm": 0.6413090106171683, "learning_rate": 1.6195296025952964e-05, "loss": 0.6344, "step": 9290 }, { "epoch": 0.27126215292984146, "grad_norm": 0.6629623233980624, "learning_rate": 1.6194647201946472e-05, "loss": 0.6313, "step": 9291 }, { "epoch": 0.2712913491576888, "grad_norm": 0.6402966975411, "learning_rate": 1.6193998377939984e-05, "loss": 0.6487, "step": 9292 }, { "epoch": 0.2713205453855362, "grad_norm": 0.658005733889497, "learning_rate": 1.6193349553933496e-05, "loss": 0.5835, "step": 9293 }, { "epoch": 0.27134974161338354, "grad_norm": 0.5960445279701945, "learning_rate": 1.619270072992701e-05, "loss": 0.5452, "step": 9294 }, { "epoch": 0.2713789378412309, "grad_norm": 0.6588162488649206, "learning_rate": 1.619205190592052e-05, "loss": 0.6421, "step": 9295 }, { "epoch": 0.27140813406907827, "grad_norm": 0.6103167982285126, "learning_rate": 1.6191403081914033e-05, "loss": 0.5842, "step": 9296 }, { "epoch": 0.2714373302969256, "grad_norm": 0.6626418743065043, "learning_rate": 1.6190754257907545e-05, "loss": 0.6524, "step": 9297 }, { "epoch": 0.271466526524773, "grad_norm": 0.6482694194314189, "learning_rate": 1.6190105433901057e-05, "loss": 0.5894, "step": 9298 }, { "epoch": 0.27149572275262035, "grad_norm": 0.6477405330871336, "learning_rate": 1.618945660989457e-05, "loss": 0.6161, "step": 9299 }, { "epoch": 0.2715249189804677, "grad_norm": 0.7050039827930399, "learning_rate": 1.6188807785888077e-05, "loss": 0.6745, "step": 9300 }, { "epoch": 0.27155411520831507, "grad_norm": 0.67341340987696, "learning_rate": 1.618815896188159e-05, "loss": 0.6228, "step": 9301 }, { "epoch": 0.27158331143616243, "grad_norm": 0.6709596310659973, "learning_rate": 1.61875101378751e-05, "loss": 0.6191, "step": 9302 }, { "epoch": 0.2716125076640098, "grad_norm": 0.6884654784427116, "learning_rate": 1.6186861313868613e-05, "loss": 0.6975, "step": 9303 }, { "epoch": 0.27164170389185716, "grad_norm": 0.6446602231601329, "learning_rate": 1.6186212489862125e-05, "loss": 0.6085, "step": 9304 }, { "epoch": 0.2716709001197045, "grad_norm": 0.6854464635850485, "learning_rate": 1.6185563665855637e-05, "loss": 0.6549, "step": 9305 }, { "epoch": 0.2717000963475519, "grad_norm": 0.6691998461565255, "learning_rate": 1.618491484184915e-05, "loss": 0.6258, "step": 9306 }, { "epoch": 0.27172929257539924, "grad_norm": 0.6290628747244036, "learning_rate": 1.618426601784266e-05, "loss": 0.5386, "step": 9307 }, { "epoch": 0.2717584888032466, "grad_norm": 0.6517509425683358, "learning_rate": 1.6183617193836174e-05, "loss": 0.6471, "step": 9308 }, { "epoch": 0.27178768503109396, "grad_norm": 0.6197295179345154, "learning_rate": 1.6182968369829686e-05, "loss": 0.6124, "step": 9309 }, { "epoch": 0.2718168812589413, "grad_norm": 0.6250193284082084, "learning_rate": 1.6182319545823198e-05, "loss": 0.5863, "step": 9310 }, { "epoch": 0.2718460774867887, "grad_norm": 0.6557530701641685, "learning_rate": 1.618167072181671e-05, "loss": 0.6272, "step": 9311 }, { "epoch": 0.27187527371463605, "grad_norm": 0.6839576898158218, "learning_rate": 1.618102189781022e-05, "loss": 0.6258, "step": 9312 }, { "epoch": 0.2719044699424834, "grad_norm": 0.6956947700216644, "learning_rate": 1.6180373073803734e-05, "loss": 0.6801, "step": 9313 }, { "epoch": 0.27193366617033077, "grad_norm": 0.691929938765362, "learning_rate": 1.6179724249797246e-05, "loss": 0.7554, "step": 9314 }, { "epoch": 0.27196286239817813, "grad_norm": 0.7098466098935139, "learning_rate": 1.6179075425790754e-05, "loss": 0.666, "step": 9315 }, { "epoch": 0.2719920586260255, "grad_norm": 0.7440046612832979, "learning_rate": 1.6178426601784266e-05, "loss": 0.7136, "step": 9316 }, { "epoch": 0.2720212548538729, "grad_norm": 0.660331561661618, "learning_rate": 1.617777777777778e-05, "loss": 0.6427, "step": 9317 }, { "epoch": 0.27205045108172027, "grad_norm": 0.6424629772278688, "learning_rate": 1.617712895377129e-05, "loss": 0.6355, "step": 9318 }, { "epoch": 0.27207964730956763, "grad_norm": 0.6487905964014798, "learning_rate": 1.6176480129764802e-05, "loss": 0.6363, "step": 9319 }, { "epoch": 0.272108843537415, "grad_norm": 0.6022490992615049, "learning_rate": 1.6175831305758314e-05, "loss": 0.5567, "step": 9320 }, { "epoch": 0.27213803976526235, "grad_norm": 0.7021000695221388, "learning_rate": 1.6175182481751826e-05, "loss": 0.6638, "step": 9321 }, { "epoch": 0.2721672359931097, "grad_norm": 0.6559258786057657, "learning_rate": 1.617453365774534e-05, "loss": 0.6539, "step": 9322 }, { "epoch": 0.2721964322209571, "grad_norm": 0.6740340314999286, "learning_rate": 1.6173884833738847e-05, "loss": 0.6291, "step": 9323 }, { "epoch": 0.27222562844880444, "grad_norm": 0.6366332415960504, "learning_rate": 1.617323600973236e-05, "loss": 0.6264, "step": 9324 }, { "epoch": 0.2722548246766518, "grad_norm": 0.6739804380709205, "learning_rate": 1.6172587185725875e-05, "loss": 0.6527, "step": 9325 }, { "epoch": 0.27228402090449916, "grad_norm": 0.6633156331953851, "learning_rate": 1.6171938361719387e-05, "loss": 0.6331, "step": 9326 }, { "epoch": 0.2723132171323465, "grad_norm": 0.679347446162664, "learning_rate": 1.61712895377129e-05, "loss": 0.6747, "step": 9327 }, { "epoch": 0.2723424133601939, "grad_norm": 0.6177537279091071, "learning_rate": 1.617064071370641e-05, "loss": 0.5813, "step": 9328 }, { "epoch": 0.27237160958804124, "grad_norm": 0.6479597164890621, "learning_rate": 1.616999188969992e-05, "loss": 0.6167, "step": 9329 }, { "epoch": 0.2724008058158886, "grad_norm": 0.6778888480053806, "learning_rate": 1.616934306569343e-05, "loss": 0.6922, "step": 9330 }, { "epoch": 0.27243000204373596, "grad_norm": 0.6968622916558226, "learning_rate": 1.6168694241686943e-05, "loss": 0.6541, "step": 9331 }, { "epoch": 0.2724591982715833, "grad_norm": 0.6690284574462416, "learning_rate": 1.6168045417680455e-05, "loss": 0.628, "step": 9332 }, { "epoch": 0.2724883944994307, "grad_norm": 0.605868620088932, "learning_rate": 1.6167396593673967e-05, "loss": 0.5402, "step": 9333 }, { "epoch": 0.27251759072727805, "grad_norm": 0.6547068483636381, "learning_rate": 1.616674776966748e-05, "loss": 0.6419, "step": 9334 }, { "epoch": 0.2725467869551254, "grad_norm": 0.6432687447563746, "learning_rate": 1.616609894566099e-05, "loss": 0.6333, "step": 9335 }, { "epoch": 0.27257598318297277, "grad_norm": 0.6956799935866396, "learning_rate": 1.6165450121654503e-05, "loss": 0.7092, "step": 9336 }, { "epoch": 0.27260517941082013, "grad_norm": 0.592948131625329, "learning_rate": 1.6164801297648016e-05, "loss": 0.5463, "step": 9337 }, { "epoch": 0.2726343756386675, "grad_norm": 0.759030859579097, "learning_rate": 1.6164152473641524e-05, "loss": 0.6805, "step": 9338 }, { "epoch": 0.27266357186651485, "grad_norm": 0.6597557969179211, "learning_rate": 1.6163503649635036e-05, "loss": 0.663, "step": 9339 }, { "epoch": 0.2726927680943622, "grad_norm": 0.722357326828226, "learning_rate": 1.6162854825628548e-05, "loss": 0.6779, "step": 9340 }, { "epoch": 0.2727219643222096, "grad_norm": 0.6225566828887368, "learning_rate": 1.616220600162206e-05, "loss": 0.5899, "step": 9341 }, { "epoch": 0.27275116055005694, "grad_norm": 0.7002183284218866, "learning_rate": 1.6161557177615572e-05, "loss": 0.6709, "step": 9342 }, { "epoch": 0.2727803567779043, "grad_norm": 0.8090519378785155, "learning_rate": 1.6160908353609084e-05, "loss": 0.7243, "step": 9343 }, { "epoch": 0.27280955300575166, "grad_norm": 0.6813440296806196, "learning_rate": 1.6160259529602596e-05, "loss": 0.6882, "step": 9344 }, { "epoch": 0.272838749233599, "grad_norm": 0.681015102491584, "learning_rate": 1.615961070559611e-05, "loss": 0.6454, "step": 9345 }, { "epoch": 0.2728679454614464, "grad_norm": 0.714757679617941, "learning_rate": 1.615896188158962e-05, "loss": 0.6979, "step": 9346 }, { "epoch": 0.27289714168929374, "grad_norm": 0.6793569773184541, "learning_rate": 1.6158313057583132e-05, "loss": 0.7125, "step": 9347 }, { "epoch": 0.2729263379171411, "grad_norm": 0.6225483658608675, "learning_rate": 1.6157664233576644e-05, "loss": 0.6073, "step": 9348 }, { "epoch": 0.27295553414498847, "grad_norm": 0.6714969771147982, "learning_rate": 1.6157015409570156e-05, "loss": 0.6172, "step": 9349 }, { "epoch": 0.2729847303728358, "grad_norm": 0.6839566822041466, "learning_rate": 1.615636658556367e-05, "loss": 0.7064, "step": 9350 }, { "epoch": 0.2730139266006832, "grad_norm": 0.6239342879464753, "learning_rate": 1.615571776155718e-05, "loss": 0.5693, "step": 9351 }, { "epoch": 0.27304312282853055, "grad_norm": 0.7940161611375542, "learning_rate": 1.615506893755069e-05, "loss": 0.6707, "step": 9352 }, { "epoch": 0.2730723190563779, "grad_norm": 0.6638002191638898, "learning_rate": 1.61544201135442e-05, "loss": 0.6474, "step": 9353 }, { "epoch": 0.27310151528422527, "grad_norm": 0.6870365599517484, "learning_rate": 1.6153771289537713e-05, "loss": 0.684, "step": 9354 }, { "epoch": 0.27313071151207263, "grad_norm": 0.6976611770713512, "learning_rate": 1.6153122465531225e-05, "loss": 0.635, "step": 9355 }, { "epoch": 0.27315990773992, "grad_norm": 0.6212770005506761, "learning_rate": 1.6152473641524737e-05, "loss": 0.589, "step": 9356 }, { "epoch": 0.27318910396776735, "grad_norm": 0.6572403516294797, "learning_rate": 1.615182481751825e-05, "loss": 0.6622, "step": 9357 }, { "epoch": 0.2732183001956147, "grad_norm": 0.7540552505073055, "learning_rate": 1.615117599351176e-05, "loss": 0.7607, "step": 9358 }, { "epoch": 0.2732474964234621, "grad_norm": 0.6087578104821396, "learning_rate": 1.6150527169505273e-05, "loss": 0.5659, "step": 9359 }, { "epoch": 0.27327669265130944, "grad_norm": 0.6374049437169729, "learning_rate": 1.6149878345498785e-05, "loss": 0.5815, "step": 9360 }, { "epoch": 0.2733058888791568, "grad_norm": 0.6368772079363667, "learning_rate": 1.6149229521492294e-05, "loss": 0.5894, "step": 9361 }, { "epoch": 0.27333508510700416, "grad_norm": 0.6354030217590451, "learning_rate": 1.6148580697485806e-05, "loss": 0.6167, "step": 9362 }, { "epoch": 0.2733642813348515, "grad_norm": 0.6232328359657802, "learning_rate": 1.614793187347932e-05, "loss": 0.5996, "step": 9363 }, { "epoch": 0.2733934775626989, "grad_norm": 0.6382068440318286, "learning_rate": 1.6147283049472833e-05, "loss": 0.6425, "step": 9364 }, { "epoch": 0.27342267379054624, "grad_norm": 0.6575452788873271, "learning_rate": 1.6146634225466346e-05, "loss": 0.585, "step": 9365 }, { "epoch": 0.2734518700183936, "grad_norm": 0.6114702093878088, "learning_rate": 1.6145985401459858e-05, "loss": 0.6125, "step": 9366 }, { "epoch": 0.27348106624624097, "grad_norm": 0.7322686714363281, "learning_rate": 1.6145336577453366e-05, "loss": 0.715, "step": 9367 }, { "epoch": 0.27351026247408833, "grad_norm": 0.6677773263290753, "learning_rate": 1.6144687753446878e-05, "loss": 0.7093, "step": 9368 }, { "epoch": 0.2735394587019357, "grad_norm": 0.7123817732491008, "learning_rate": 1.614403892944039e-05, "loss": 0.6222, "step": 9369 }, { "epoch": 0.27356865492978305, "grad_norm": 0.6719012630291057, "learning_rate": 1.6143390105433902e-05, "loss": 0.6878, "step": 9370 }, { "epoch": 0.2735978511576304, "grad_norm": 0.7166560286310438, "learning_rate": 1.6142741281427414e-05, "loss": 0.7261, "step": 9371 }, { "epoch": 0.2736270473854778, "grad_norm": 0.6722875639995316, "learning_rate": 1.6142092457420926e-05, "loss": 0.5519, "step": 9372 }, { "epoch": 0.27365624361332513, "grad_norm": 0.6849928709541393, "learning_rate": 1.614144363341444e-05, "loss": 0.67, "step": 9373 }, { "epoch": 0.2736854398411725, "grad_norm": 0.61616824078414, "learning_rate": 1.614079480940795e-05, "loss": 0.5997, "step": 9374 }, { "epoch": 0.27371463606901986, "grad_norm": 0.6624332335558879, "learning_rate": 1.6140145985401462e-05, "loss": 0.6296, "step": 9375 }, { "epoch": 0.2737438322968672, "grad_norm": 0.6974622875902654, "learning_rate": 1.613949716139497e-05, "loss": 0.702, "step": 9376 }, { "epoch": 0.27377302852471463, "grad_norm": 0.6359248929024922, "learning_rate": 1.6138848337388483e-05, "loss": 0.6047, "step": 9377 }, { "epoch": 0.273802224752562, "grad_norm": 0.6536306578484085, "learning_rate": 1.6138199513381995e-05, "loss": 0.5874, "step": 9378 }, { "epoch": 0.27383142098040936, "grad_norm": 0.6713486572960253, "learning_rate": 1.6137550689375507e-05, "loss": 0.648, "step": 9379 }, { "epoch": 0.2738606172082567, "grad_norm": 0.66496200952821, "learning_rate": 1.613690186536902e-05, "loss": 0.6466, "step": 9380 }, { "epoch": 0.2738898134361041, "grad_norm": 0.6258297436675067, "learning_rate": 1.613625304136253e-05, "loss": 0.5673, "step": 9381 }, { "epoch": 0.27391900966395144, "grad_norm": 0.689078029646662, "learning_rate": 1.6135604217356043e-05, "loss": 0.6591, "step": 9382 }, { "epoch": 0.2739482058917988, "grad_norm": 0.6479408411914271, "learning_rate": 1.6134955393349555e-05, "loss": 0.6346, "step": 9383 }, { "epoch": 0.27397740211964616, "grad_norm": 0.7038448477526024, "learning_rate": 1.6134306569343067e-05, "loss": 0.6682, "step": 9384 }, { "epoch": 0.2740065983474935, "grad_norm": 0.6627447374409131, "learning_rate": 1.613365774533658e-05, "loss": 0.6652, "step": 9385 }, { "epoch": 0.2740357945753409, "grad_norm": 0.6688183902724288, "learning_rate": 1.613300892133009e-05, "loss": 0.6606, "step": 9386 }, { "epoch": 0.27406499080318825, "grad_norm": 0.6672218096078317, "learning_rate": 1.6132360097323603e-05, "loss": 0.6546, "step": 9387 }, { "epoch": 0.2740941870310356, "grad_norm": 0.637084327176753, "learning_rate": 1.6131711273317115e-05, "loss": 0.5791, "step": 9388 }, { "epoch": 0.27412338325888297, "grad_norm": 0.7777880210029323, "learning_rate": 1.6131062449310627e-05, "loss": 0.6686, "step": 9389 }, { "epoch": 0.27415257948673033, "grad_norm": 0.6551093472060131, "learning_rate": 1.6130413625304136e-05, "loss": 0.6332, "step": 9390 }, { "epoch": 0.2741817757145777, "grad_norm": 0.6976861655577783, "learning_rate": 1.6129764801297648e-05, "loss": 0.6911, "step": 9391 }, { "epoch": 0.27421097194242505, "grad_norm": 0.6967728302508096, "learning_rate": 1.612911597729116e-05, "loss": 0.6969, "step": 9392 }, { "epoch": 0.2742401681702724, "grad_norm": 0.6345189658412471, "learning_rate": 1.6128467153284672e-05, "loss": 0.6114, "step": 9393 }, { "epoch": 0.2742693643981198, "grad_norm": 0.6168019301002909, "learning_rate": 1.6127818329278184e-05, "loss": 0.5362, "step": 9394 }, { "epoch": 0.27429856062596714, "grad_norm": 0.6585668797630249, "learning_rate": 1.6127169505271696e-05, "loss": 0.6627, "step": 9395 }, { "epoch": 0.2743277568538145, "grad_norm": 0.7200233379391157, "learning_rate": 1.6126520681265208e-05, "loss": 0.7177, "step": 9396 }, { "epoch": 0.27435695308166186, "grad_norm": 0.7931728209382998, "learning_rate": 1.612587185725872e-05, "loss": 0.7171, "step": 9397 }, { "epoch": 0.2743861493095092, "grad_norm": 0.6606125807641655, "learning_rate": 1.6125223033252232e-05, "loss": 0.603, "step": 9398 }, { "epoch": 0.2744153455373566, "grad_norm": 0.6692955317996092, "learning_rate": 1.612457420924574e-05, "loss": 0.6469, "step": 9399 }, { "epoch": 0.27444454176520394, "grad_norm": 0.6742900687837076, "learning_rate": 1.6123925385239253e-05, "loss": 0.6238, "step": 9400 }, { "epoch": 0.2744737379930513, "grad_norm": 0.6536779649012926, "learning_rate": 1.612327656123277e-05, "loss": 0.5845, "step": 9401 }, { "epoch": 0.27450293422089866, "grad_norm": 0.657941664562568, "learning_rate": 1.612262773722628e-05, "loss": 0.5736, "step": 9402 }, { "epoch": 0.274532130448746, "grad_norm": 0.7173536018953359, "learning_rate": 1.6121978913219792e-05, "loss": 0.609, "step": 9403 }, { "epoch": 0.2745613266765934, "grad_norm": 0.6864630977272781, "learning_rate": 1.6121330089213304e-05, "loss": 0.6597, "step": 9404 }, { "epoch": 0.27459052290444075, "grad_norm": 0.6037090727808118, "learning_rate": 1.6120681265206813e-05, "loss": 0.5565, "step": 9405 }, { "epoch": 0.2746197191322881, "grad_norm": 0.5940548857936575, "learning_rate": 1.6120032441200325e-05, "loss": 0.535, "step": 9406 }, { "epoch": 0.27464891536013547, "grad_norm": 0.6654315663639604, "learning_rate": 1.6119383617193837e-05, "loss": 0.6375, "step": 9407 }, { "epoch": 0.27467811158798283, "grad_norm": 0.7053848757374861, "learning_rate": 1.611873479318735e-05, "loss": 0.6762, "step": 9408 }, { "epoch": 0.2747073078158302, "grad_norm": 0.6716552965062722, "learning_rate": 1.611808596918086e-05, "loss": 0.685, "step": 9409 }, { "epoch": 0.27473650404367755, "grad_norm": 0.6994746788818698, "learning_rate": 1.6117437145174373e-05, "loss": 0.6825, "step": 9410 }, { "epoch": 0.2747657002715249, "grad_norm": 0.7073469673162852, "learning_rate": 1.6116788321167885e-05, "loss": 0.6885, "step": 9411 }, { "epoch": 0.2747948964993723, "grad_norm": 0.6791028542493559, "learning_rate": 1.6116139497161397e-05, "loss": 0.6653, "step": 9412 }, { "epoch": 0.27482409272721964, "grad_norm": 0.6473080311266539, "learning_rate": 1.611549067315491e-05, "loss": 0.6309, "step": 9413 }, { "epoch": 0.274853288955067, "grad_norm": 0.5988920609464029, "learning_rate": 1.6114841849148418e-05, "loss": 0.5399, "step": 9414 }, { "epoch": 0.27488248518291436, "grad_norm": 0.6347777707059841, "learning_rate": 1.611419302514193e-05, "loss": 0.5643, "step": 9415 }, { "epoch": 0.2749116814107617, "grad_norm": 0.7277448776985027, "learning_rate": 1.6113544201135442e-05, "loss": 0.7653, "step": 9416 }, { "epoch": 0.2749408776386091, "grad_norm": 0.6740182838230587, "learning_rate": 1.6112895377128954e-05, "loss": 0.6562, "step": 9417 }, { "epoch": 0.27497007386645644, "grad_norm": 0.6286393469134608, "learning_rate": 1.6112246553122466e-05, "loss": 0.6183, "step": 9418 }, { "epoch": 0.2749992700943038, "grad_norm": 0.661943119301263, "learning_rate": 1.6111597729115978e-05, "loss": 0.6361, "step": 9419 }, { "epoch": 0.27502846632215117, "grad_norm": 0.7132575533658435, "learning_rate": 1.611094890510949e-05, "loss": 0.7033, "step": 9420 }, { "epoch": 0.2750576625499985, "grad_norm": 0.7371994408593846, "learning_rate": 1.6110300081103002e-05, "loss": 0.6474, "step": 9421 }, { "epoch": 0.2750868587778459, "grad_norm": 0.6672847111996592, "learning_rate": 1.6109651257096514e-05, "loss": 0.5787, "step": 9422 }, { "epoch": 0.27511605500569325, "grad_norm": 0.6421187499153751, "learning_rate": 1.6109002433090026e-05, "loss": 0.5972, "step": 9423 }, { "epoch": 0.2751452512335406, "grad_norm": 0.6461588079807357, "learning_rate": 1.6108353609083538e-05, "loss": 0.5907, "step": 9424 }, { "epoch": 0.27517444746138797, "grad_norm": 0.7112025265746582, "learning_rate": 1.610770478507705e-05, "loss": 0.6995, "step": 9425 }, { "epoch": 0.27520364368923533, "grad_norm": 0.7468977117118761, "learning_rate": 1.6107055961070562e-05, "loss": 0.7792, "step": 9426 }, { "epoch": 0.2752328399170827, "grad_norm": 0.7023557480692416, "learning_rate": 1.6106407137064074e-05, "loss": 0.743, "step": 9427 }, { "epoch": 0.27526203614493006, "grad_norm": 0.6849531895950521, "learning_rate": 1.6105758313057583e-05, "loss": 0.6817, "step": 9428 }, { "epoch": 0.2752912323727774, "grad_norm": 0.6862034618668025, "learning_rate": 1.6105109489051095e-05, "loss": 0.6212, "step": 9429 }, { "epoch": 0.2753204286006248, "grad_norm": 0.6845807888711444, "learning_rate": 1.6104460665044607e-05, "loss": 0.6504, "step": 9430 }, { "epoch": 0.27534962482847214, "grad_norm": 0.6735260625241973, "learning_rate": 1.610381184103812e-05, "loss": 0.636, "step": 9431 }, { "epoch": 0.2753788210563195, "grad_norm": 0.6633785133829014, "learning_rate": 1.610316301703163e-05, "loss": 0.6566, "step": 9432 }, { "epoch": 0.27540801728416686, "grad_norm": 0.7056451856815532, "learning_rate": 1.6102514193025143e-05, "loss": 0.7196, "step": 9433 }, { "epoch": 0.2754372135120142, "grad_norm": 0.624743039760831, "learning_rate": 1.6101865369018655e-05, "loss": 0.5716, "step": 9434 }, { "epoch": 0.2754664097398616, "grad_norm": 0.6816754299442215, "learning_rate": 1.6101216545012167e-05, "loss": 0.6229, "step": 9435 }, { "epoch": 0.27549560596770895, "grad_norm": 0.6567430630204044, "learning_rate": 1.610056772100568e-05, "loss": 0.6093, "step": 9436 }, { "epoch": 0.2755248021955563, "grad_norm": 0.6584788289717097, "learning_rate": 1.6099918896999188e-05, "loss": 0.642, "step": 9437 }, { "epoch": 0.2755539984234037, "grad_norm": 0.6404675672087922, "learning_rate": 1.60992700729927e-05, "loss": 0.6487, "step": 9438 }, { "epoch": 0.2755831946512511, "grad_norm": 0.6324597691046427, "learning_rate": 1.6098621248986215e-05, "loss": 0.6275, "step": 9439 }, { "epoch": 0.27561239087909845, "grad_norm": 0.6693200735478647, "learning_rate": 1.6097972424979727e-05, "loss": 0.6411, "step": 9440 }, { "epoch": 0.2756415871069458, "grad_norm": 0.6652982321356383, "learning_rate": 1.609732360097324e-05, "loss": 0.6275, "step": 9441 }, { "epoch": 0.27567078333479317, "grad_norm": 0.6443655818071242, "learning_rate": 1.609667477696675e-05, "loss": 0.552, "step": 9442 }, { "epoch": 0.27569997956264053, "grad_norm": 0.7216053495415103, "learning_rate": 1.609602595296026e-05, "loss": 0.657, "step": 9443 }, { "epoch": 0.2757291757904879, "grad_norm": 0.7113569887082656, "learning_rate": 1.6095377128953772e-05, "loss": 0.6798, "step": 9444 }, { "epoch": 0.27575837201833525, "grad_norm": 0.7372846510586285, "learning_rate": 1.6094728304947284e-05, "loss": 0.7121, "step": 9445 }, { "epoch": 0.2757875682461826, "grad_norm": 0.72165517013763, "learning_rate": 1.6094079480940796e-05, "loss": 0.7313, "step": 9446 }, { "epoch": 0.27581676447403, "grad_norm": 0.7609110411664292, "learning_rate": 1.6093430656934308e-05, "loss": 0.7162, "step": 9447 }, { "epoch": 0.27584596070187734, "grad_norm": 0.6491030979205962, "learning_rate": 1.609278183292782e-05, "loss": 0.6406, "step": 9448 }, { "epoch": 0.2758751569297247, "grad_norm": 0.7021842515051739, "learning_rate": 1.6092133008921332e-05, "loss": 0.6728, "step": 9449 }, { "epoch": 0.27590435315757206, "grad_norm": 0.6783280675709729, "learning_rate": 1.6091484184914844e-05, "loss": 0.6591, "step": 9450 }, { "epoch": 0.2759335493854194, "grad_norm": 0.6751512434627264, "learning_rate": 1.6090835360908356e-05, "loss": 0.6295, "step": 9451 }, { "epoch": 0.2759627456132668, "grad_norm": 0.7016114791970044, "learning_rate": 1.6090186536901865e-05, "loss": 0.6378, "step": 9452 }, { "epoch": 0.27599194184111414, "grad_norm": 0.6493650022788411, "learning_rate": 1.6089537712895377e-05, "loss": 0.6559, "step": 9453 }, { "epoch": 0.2760211380689615, "grad_norm": 0.6812187093961546, "learning_rate": 1.608888888888889e-05, "loss": 0.6071, "step": 9454 }, { "epoch": 0.27605033429680886, "grad_norm": 0.6664408609904687, "learning_rate": 1.60882400648824e-05, "loss": 0.655, "step": 9455 }, { "epoch": 0.2760795305246562, "grad_norm": 0.6356690190114983, "learning_rate": 1.6087591240875913e-05, "loss": 0.6017, "step": 9456 }, { "epoch": 0.2761087267525036, "grad_norm": 0.6947599155492309, "learning_rate": 1.6086942416869425e-05, "loss": 0.7129, "step": 9457 }, { "epoch": 0.27613792298035095, "grad_norm": 0.6994249205616608, "learning_rate": 1.6086293592862937e-05, "loss": 0.6697, "step": 9458 }, { "epoch": 0.2761671192081983, "grad_norm": 0.6985726912211582, "learning_rate": 1.608564476885645e-05, "loss": 0.6857, "step": 9459 }, { "epoch": 0.27619631543604567, "grad_norm": 0.6675227069153773, "learning_rate": 1.608499594484996e-05, "loss": 0.6195, "step": 9460 }, { "epoch": 0.27622551166389303, "grad_norm": 0.7003349418776594, "learning_rate": 1.6084347120843473e-05, "loss": 0.6347, "step": 9461 }, { "epoch": 0.2762547078917404, "grad_norm": 0.6469346863469964, "learning_rate": 1.6083698296836985e-05, "loss": 0.6025, "step": 9462 }, { "epoch": 0.27628390411958775, "grad_norm": 0.68963091477385, "learning_rate": 1.6083049472830497e-05, "loss": 0.6833, "step": 9463 }, { "epoch": 0.2763131003474351, "grad_norm": 0.6841675236250797, "learning_rate": 1.608240064882401e-05, "loss": 0.6646, "step": 9464 }, { "epoch": 0.2763422965752825, "grad_norm": 0.7011379179696402, "learning_rate": 1.608175182481752e-05, "loss": 0.6739, "step": 9465 }, { "epoch": 0.27637149280312984, "grad_norm": 0.6388440619726299, "learning_rate": 1.608110300081103e-05, "loss": 0.5928, "step": 9466 }, { "epoch": 0.2764006890309772, "grad_norm": 0.7646690507769459, "learning_rate": 1.6080454176804542e-05, "loss": 0.7312, "step": 9467 }, { "epoch": 0.27642988525882456, "grad_norm": 0.6027418462107041, "learning_rate": 1.6079805352798054e-05, "loss": 0.5449, "step": 9468 }, { "epoch": 0.2764590814866719, "grad_norm": 0.6540738075171375, "learning_rate": 1.6079156528791566e-05, "loss": 0.5929, "step": 9469 }, { "epoch": 0.2764882777145193, "grad_norm": 0.7207179881661224, "learning_rate": 1.6078507704785078e-05, "loss": 0.6566, "step": 9470 }, { "epoch": 0.27651747394236664, "grad_norm": 0.8891038694685872, "learning_rate": 1.607785888077859e-05, "loss": 0.681, "step": 9471 }, { "epoch": 0.276546670170214, "grad_norm": 0.7017421092161837, "learning_rate": 1.6077210056772102e-05, "loss": 0.7432, "step": 9472 }, { "epoch": 0.27657586639806137, "grad_norm": 0.6606718888066955, "learning_rate": 1.6076561232765614e-05, "loss": 0.674, "step": 9473 }, { "epoch": 0.2766050626259087, "grad_norm": 0.6307621840697657, "learning_rate": 1.6075912408759126e-05, "loss": 0.5933, "step": 9474 }, { "epoch": 0.2766342588537561, "grad_norm": 0.7086299694046945, "learning_rate": 1.6075263584752635e-05, "loss": 0.7127, "step": 9475 }, { "epoch": 0.27666345508160345, "grad_norm": 0.6338526932057394, "learning_rate": 1.607461476074615e-05, "loss": 0.5847, "step": 9476 }, { "epoch": 0.2766926513094508, "grad_norm": 0.7798798075517279, "learning_rate": 1.6073965936739662e-05, "loss": 0.8147, "step": 9477 }, { "epoch": 0.27672184753729817, "grad_norm": 0.6555826733785481, "learning_rate": 1.6073317112733174e-05, "loss": 0.5906, "step": 9478 }, { "epoch": 0.27675104376514553, "grad_norm": 0.6097175127256289, "learning_rate": 1.6072668288726686e-05, "loss": 0.612, "step": 9479 }, { "epoch": 0.2767802399929929, "grad_norm": 0.626354991180818, "learning_rate": 1.6072019464720198e-05, "loss": 0.5803, "step": 9480 }, { "epoch": 0.27680943622084025, "grad_norm": 0.6484413521534257, "learning_rate": 1.6071370640713707e-05, "loss": 0.6384, "step": 9481 }, { "epoch": 0.2768386324486876, "grad_norm": 0.69559275501435, "learning_rate": 1.607072181670722e-05, "loss": 0.6847, "step": 9482 }, { "epoch": 0.276867828676535, "grad_norm": 0.6779699149520505, "learning_rate": 1.607007299270073e-05, "loss": 0.5721, "step": 9483 }, { "epoch": 0.27689702490438234, "grad_norm": 0.6405873673407079, "learning_rate": 1.6069424168694243e-05, "loss": 0.6425, "step": 9484 }, { "epoch": 0.2769262211322297, "grad_norm": 0.643219206944354, "learning_rate": 1.6068775344687755e-05, "loss": 0.6109, "step": 9485 }, { "epoch": 0.27695541736007706, "grad_norm": 0.6351275432275508, "learning_rate": 1.6068126520681267e-05, "loss": 0.5736, "step": 9486 }, { "epoch": 0.2769846135879244, "grad_norm": 0.6558205304287904, "learning_rate": 1.606747769667478e-05, "loss": 0.6621, "step": 9487 }, { "epoch": 0.2770138098157718, "grad_norm": 0.6927571851685465, "learning_rate": 1.606682887266829e-05, "loss": 0.6734, "step": 9488 }, { "epoch": 0.27704300604361914, "grad_norm": 0.68022202762514, "learning_rate": 1.6066180048661803e-05, "loss": 0.6706, "step": 9489 }, { "epoch": 0.2770722022714665, "grad_norm": 0.6898223900389119, "learning_rate": 1.606553122465531e-05, "loss": 0.7511, "step": 9490 }, { "epoch": 0.27710139849931387, "grad_norm": 0.6560568612249216, "learning_rate": 1.6064882400648824e-05, "loss": 0.6137, "step": 9491 }, { "epoch": 0.27713059472716123, "grad_norm": 0.6100219489597761, "learning_rate": 1.6064233576642336e-05, "loss": 0.5667, "step": 9492 }, { "epoch": 0.2771597909550086, "grad_norm": 0.646028753124844, "learning_rate": 1.6063584752635848e-05, "loss": 0.5944, "step": 9493 }, { "epoch": 0.27718898718285595, "grad_norm": 0.6700161149912074, "learning_rate": 1.606293592862936e-05, "loss": 0.6335, "step": 9494 }, { "epoch": 0.2772181834107033, "grad_norm": 0.6695807684658158, "learning_rate": 1.6062287104622872e-05, "loss": 0.6997, "step": 9495 }, { "epoch": 0.2772473796385507, "grad_norm": 0.6325661069089135, "learning_rate": 1.6061638280616384e-05, "loss": 0.5995, "step": 9496 }, { "epoch": 0.27727657586639803, "grad_norm": 0.6408943627049672, "learning_rate": 1.6060989456609896e-05, "loss": 0.5932, "step": 9497 }, { "epoch": 0.27730577209424545, "grad_norm": 0.7535337149724803, "learning_rate": 1.6060340632603408e-05, "loss": 0.6306, "step": 9498 }, { "epoch": 0.2773349683220928, "grad_norm": 0.6786621015520529, "learning_rate": 1.605969180859692e-05, "loss": 0.6618, "step": 9499 }, { "epoch": 0.2773641645499402, "grad_norm": 0.736674670595033, "learning_rate": 1.6059042984590432e-05, "loss": 0.582, "step": 9500 }, { "epoch": 0.27739336077778753, "grad_norm": 0.7021737960276159, "learning_rate": 1.6058394160583944e-05, "loss": 0.7034, "step": 9501 }, { "epoch": 0.2774225570056349, "grad_norm": 0.6557262218794121, "learning_rate": 1.6057745336577456e-05, "loss": 0.6463, "step": 9502 }, { "epoch": 0.27745175323348226, "grad_norm": 0.6591309032148931, "learning_rate": 1.6057096512570968e-05, "loss": 0.6503, "step": 9503 }, { "epoch": 0.2774809494613296, "grad_norm": 0.6980504201960461, "learning_rate": 1.6056447688564477e-05, "loss": 0.7046, "step": 9504 }, { "epoch": 0.277510145689177, "grad_norm": 0.6948107224292213, "learning_rate": 1.605579886455799e-05, "loss": 0.7544, "step": 9505 }, { "epoch": 0.27753934191702434, "grad_norm": 0.6515135086954572, "learning_rate": 1.60551500405515e-05, "loss": 0.622, "step": 9506 }, { "epoch": 0.2775685381448717, "grad_norm": 0.6918858481298139, "learning_rate": 1.6054501216545013e-05, "loss": 0.6427, "step": 9507 }, { "epoch": 0.27759773437271906, "grad_norm": 0.6673163127740708, "learning_rate": 1.6053852392538525e-05, "loss": 0.6374, "step": 9508 }, { "epoch": 0.2776269306005664, "grad_norm": 0.6562730226169984, "learning_rate": 1.6053203568532037e-05, "loss": 0.6351, "step": 9509 }, { "epoch": 0.2776561268284138, "grad_norm": 0.6444952751961154, "learning_rate": 1.605255474452555e-05, "loss": 0.6197, "step": 9510 }, { "epoch": 0.27768532305626115, "grad_norm": 0.7116986902993443, "learning_rate": 1.605190592051906e-05, "loss": 0.6513, "step": 9511 }, { "epoch": 0.2777145192841085, "grad_norm": 0.6889174232250339, "learning_rate": 1.6051257096512573e-05, "loss": 0.7173, "step": 9512 }, { "epoch": 0.27774371551195587, "grad_norm": 1.5281508442742573, "learning_rate": 1.605060827250608e-05, "loss": 0.6213, "step": 9513 }, { "epoch": 0.27777291173980323, "grad_norm": 0.7058709678083798, "learning_rate": 1.6049959448499597e-05, "loss": 0.7076, "step": 9514 }, { "epoch": 0.2778021079676506, "grad_norm": 0.6282326129293766, "learning_rate": 1.604931062449311e-05, "loss": 0.5687, "step": 9515 }, { "epoch": 0.27783130419549795, "grad_norm": 0.7262612968461724, "learning_rate": 1.604866180048662e-05, "loss": 0.6883, "step": 9516 }, { "epoch": 0.2778605004233453, "grad_norm": 0.7432211643348879, "learning_rate": 1.6048012976480133e-05, "loss": 0.7274, "step": 9517 }, { "epoch": 0.2778896966511927, "grad_norm": 0.6603759008465183, "learning_rate": 1.6047364152473645e-05, "loss": 0.6785, "step": 9518 }, { "epoch": 0.27791889287904004, "grad_norm": 0.6867425045017943, "learning_rate": 1.6046715328467154e-05, "loss": 0.5933, "step": 9519 }, { "epoch": 0.2779480891068874, "grad_norm": 0.8367927565298693, "learning_rate": 1.6046066504460666e-05, "loss": 0.7004, "step": 9520 }, { "epoch": 0.27797728533473476, "grad_norm": 0.6902006528282392, "learning_rate": 1.6045417680454178e-05, "loss": 0.6929, "step": 9521 }, { "epoch": 0.2780064815625821, "grad_norm": 0.6214506768033414, "learning_rate": 1.604476885644769e-05, "loss": 0.5928, "step": 9522 }, { "epoch": 0.2780356777904295, "grad_norm": 0.6978915833951879, "learning_rate": 1.6044120032441202e-05, "loss": 0.6543, "step": 9523 }, { "epoch": 0.27806487401827684, "grad_norm": 0.688498199008955, "learning_rate": 1.6043471208434714e-05, "loss": 0.6804, "step": 9524 }, { "epoch": 0.2780940702461242, "grad_norm": 0.6349848267301554, "learning_rate": 1.6042822384428226e-05, "loss": 0.6038, "step": 9525 }, { "epoch": 0.27812326647397156, "grad_norm": 0.6346685576821236, "learning_rate": 1.6042173560421738e-05, "loss": 0.5669, "step": 9526 }, { "epoch": 0.2781524627018189, "grad_norm": 0.6244578758238848, "learning_rate": 1.604152473641525e-05, "loss": 0.6055, "step": 9527 }, { "epoch": 0.2781816589296663, "grad_norm": 0.6847893100947111, "learning_rate": 1.604087591240876e-05, "loss": 0.6372, "step": 9528 }, { "epoch": 0.27821085515751365, "grad_norm": 0.658777405426882, "learning_rate": 1.604022708840227e-05, "loss": 0.6399, "step": 9529 }, { "epoch": 0.278240051385361, "grad_norm": 0.6665180917824424, "learning_rate": 1.6039578264395783e-05, "loss": 0.6221, "step": 9530 }, { "epoch": 0.27826924761320837, "grad_norm": 0.6195237390059012, "learning_rate": 1.6038929440389295e-05, "loss": 0.5566, "step": 9531 }, { "epoch": 0.27829844384105573, "grad_norm": 0.6233577788637539, "learning_rate": 1.6038280616382807e-05, "loss": 0.5866, "step": 9532 }, { "epoch": 0.2783276400689031, "grad_norm": 0.69080623034213, "learning_rate": 1.603763179237632e-05, "loss": 0.6935, "step": 9533 }, { "epoch": 0.27835683629675045, "grad_norm": 0.6998228976701393, "learning_rate": 1.603698296836983e-05, "loss": 0.7052, "step": 9534 }, { "epoch": 0.2783860325245978, "grad_norm": 0.6573851831384881, "learning_rate": 1.6036334144363343e-05, "loss": 0.6609, "step": 9535 }, { "epoch": 0.2784152287524452, "grad_norm": 0.7112289033130881, "learning_rate": 1.6035685320356855e-05, "loss": 0.6139, "step": 9536 }, { "epoch": 0.27844442498029254, "grad_norm": 0.9605383928083012, "learning_rate": 1.6035036496350367e-05, "loss": 0.6957, "step": 9537 }, { "epoch": 0.2784736212081399, "grad_norm": 0.64392017658043, "learning_rate": 1.603438767234388e-05, "loss": 0.5846, "step": 9538 }, { "epoch": 0.27850281743598726, "grad_norm": 0.620051912347529, "learning_rate": 1.603373884833739e-05, "loss": 0.5755, "step": 9539 }, { "epoch": 0.2785320136638346, "grad_norm": 0.7740160033661857, "learning_rate": 1.6033090024330903e-05, "loss": 0.686, "step": 9540 }, { "epoch": 0.278561209891682, "grad_norm": 0.6937663541695048, "learning_rate": 1.6032441200324415e-05, "loss": 0.663, "step": 9541 }, { "epoch": 0.27859040611952934, "grad_norm": 0.6449227989578116, "learning_rate": 1.6031792376317923e-05, "loss": 0.631, "step": 9542 }, { "epoch": 0.2786196023473767, "grad_norm": 0.661661854431085, "learning_rate": 1.6031143552311436e-05, "loss": 0.646, "step": 9543 }, { "epoch": 0.27864879857522407, "grad_norm": 0.6371769041159095, "learning_rate": 1.6030494728304948e-05, "loss": 0.5998, "step": 9544 }, { "epoch": 0.2786779948030714, "grad_norm": 0.6324882916679768, "learning_rate": 1.602984590429846e-05, "loss": 0.607, "step": 9545 }, { "epoch": 0.2787071910309188, "grad_norm": 0.8937925934527785, "learning_rate": 1.602919708029197e-05, "loss": 0.6612, "step": 9546 }, { "epoch": 0.27873638725876615, "grad_norm": 0.7037933429197151, "learning_rate": 1.6028548256285484e-05, "loss": 0.6223, "step": 9547 }, { "epoch": 0.2787655834866135, "grad_norm": 0.5997118701378291, "learning_rate": 1.6027899432278996e-05, "loss": 0.5161, "step": 9548 }, { "epoch": 0.27879477971446087, "grad_norm": 0.751245351288653, "learning_rate": 1.6027250608272508e-05, "loss": 0.7389, "step": 9549 }, { "epoch": 0.27882397594230823, "grad_norm": 0.6313686708107171, "learning_rate": 1.602660178426602e-05, "loss": 0.6293, "step": 9550 }, { "epoch": 0.2788531721701556, "grad_norm": 0.6463229819237932, "learning_rate": 1.602595296025953e-05, "loss": 0.6216, "step": 9551 }, { "epoch": 0.27888236839800296, "grad_norm": 0.6593986125576156, "learning_rate": 1.6025304136253044e-05, "loss": 0.6157, "step": 9552 }, { "epoch": 0.2789115646258503, "grad_norm": 0.6706789347063933, "learning_rate": 1.6024655312246556e-05, "loss": 0.5695, "step": 9553 }, { "epoch": 0.2789407608536977, "grad_norm": 0.6816116074268741, "learning_rate": 1.6024006488240068e-05, "loss": 0.6919, "step": 9554 }, { "epoch": 0.27896995708154504, "grad_norm": 0.641650447551242, "learning_rate": 1.602335766423358e-05, "loss": 0.6015, "step": 9555 }, { "epoch": 0.2789991533093924, "grad_norm": 0.6350053747804256, "learning_rate": 1.6022708840227092e-05, "loss": 0.619, "step": 9556 }, { "epoch": 0.27902834953723976, "grad_norm": 0.6680075481887775, "learning_rate": 1.60220600162206e-05, "loss": 0.6368, "step": 9557 }, { "epoch": 0.2790575457650872, "grad_norm": 0.6587777300794625, "learning_rate": 1.6021411192214113e-05, "loss": 0.5754, "step": 9558 }, { "epoch": 0.27908674199293454, "grad_norm": 0.737032891754567, "learning_rate": 1.6020762368207625e-05, "loss": 0.7365, "step": 9559 }, { "epoch": 0.2791159382207819, "grad_norm": 0.6030807991692084, "learning_rate": 1.6020113544201137e-05, "loss": 0.576, "step": 9560 }, { "epoch": 0.27914513444862926, "grad_norm": 0.7136972003376194, "learning_rate": 1.601946472019465e-05, "loss": 0.7204, "step": 9561 }, { "epoch": 0.2791743306764766, "grad_norm": 0.6546454453581674, "learning_rate": 1.601881589618816e-05, "loss": 0.6031, "step": 9562 }, { "epoch": 0.279203526904324, "grad_norm": 0.7161533410023436, "learning_rate": 1.6018167072181673e-05, "loss": 0.669, "step": 9563 }, { "epoch": 0.27923272313217135, "grad_norm": 0.646866716727234, "learning_rate": 1.6017518248175185e-05, "loss": 0.6245, "step": 9564 }, { "epoch": 0.2792619193600187, "grad_norm": 0.6359014248953215, "learning_rate": 1.6016869424168697e-05, "loss": 0.5898, "step": 9565 }, { "epoch": 0.27929111558786607, "grad_norm": 0.6441930224757996, "learning_rate": 1.6016220600162205e-05, "loss": 0.59, "step": 9566 }, { "epoch": 0.27932031181571343, "grad_norm": 0.7504681835384077, "learning_rate": 1.6015571776155717e-05, "loss": 0.7588, "step": 9567 }, { "epoch": 0.2793495080435608, "grad_norm": 0.6817284237739244, "learning_rate": 1.601492295214923e-05, "loss": 0.7018, "step": 9568 }, { "epoch": 0.27937870427140815, "grad_norm": 0.6738411178377407, "learning_rate": 1.601427412814274e-05, "loss": 0.6344, "step": 9569 }, { "epoch": 0.2794079004992555, "grad_norm": 0.6630884169278022, "learning_rate": 1.6013625304136253e-05, "loss": 0.6526, "step": 9570 }, { "epoch": 0.2794370967271029, "grad_norm": 0.6223131999638221, "learning_rate": 1.6012976480129766e-05, "loss": 0.585, "step": 9571 }, { "epoch": 0.27946629295495024, "grad_norm": 0.6914905296492251, "learning_rate": 1.6012327656123278e-05, "loss": 0.6352, "step": 9572 }, { "epoch": 0.2794954891827976, "grad_norm": 0.6706605993513388, "learning_rate": 1.601167883211679e-05, "loss": 0.665, "step": 9573 }, { "epoch": 0.27952468541064496, "grad_norm": 0.632426021830777, "learning_rate": 1.60110300081103e-05, "loss": 0.5582, "step": 9574 }, { "epoch": 0.2795538816384923, "grad_norm": 0.7511459983129366, "learning_rate": 1.6010381184103814e-05, "loss": 0.6131, "step": 9575 }, { "epoch": 0.2795830778663397, "grad_norm": 0.7326144402914289, "learning_rate": 1.6009732360097326e-05, "loss": 0.698, "step": 9576 }, { "epoch": 0.27961227409418704, "grad_norm": 0.6859541115520714, "learning_rate": 1.6009083536090838e-05, "loss": 0.6219, "step": 9577 }, { "epoch": 0.2796414703220344, "grad_norm": 0.6366103854155354, "learning_rate": 1.600843471208435e-05, "loss": 0.5734, "step": 9578 }, { "epoch": 0.27967066654988176, "grad_norm": 0.6928057954117837, "learning_rate": 1.6007785888077862e-05, "loss": 0.706, "step": 9579 }, { "epoch": 0.2796998627777291, "grad_norm": 0.7367036930898085, "learning_rate": 1.600713706407137e-05, "loss": 0.69, "step": 9580 }, { "epoch": 0.2797290590055765, "grad_norm": 0.7090027027265802, "learning_rate": 1.6006488240064882e-05, "loss": 0.7046, "step": 9581 }, { "epoch": 0.27975825523342385, "grad_norm": 0.7000926040960257, "learning_rate": 1.6005839416058394e-05, "loss": 0.6982, "step": 9582 }, { "epoch": 0.2797874514612712, "grad_norm": 0.6348166328808922, "learning_rate": 1.6005190592051906e-05, "loss": 0.609, "step": 9583 }, { "epoch": 0.27981664768911857, "grad_norm": 0.6314734063990365, "learning_rate": 1.600454176804542e-05, "loss": 0.6276, "step": 9584 }, { "epoch": 0.27984584391696593, "grad_norm": 0.6422069819601145, "learning_rate": 1.600389294403893e-05, "loss": 0.5889, "step": 9585 }, { "epoch": 0.2798750401448133, "grad_norm": 0.6823432115811585, "learning_rate": 1.6003244120032443e-05, "loss": 0.6835, "step": 9586 }, { "epoch": 0.27990423637266065, "grad_norm": 0.6837484272185848, "learning_rate": 1.6002595296025955e-05, "loss": 0.6511, "step": 9587 }, { "epoch": 0.279933432600508, "grad_norm": 0.6542557799247865, "learning_rate": 1.6001946472019467e-05, "loss": 0.6277, "step": 9588 }, { "epoch": 0.2799626288283554, "grad_norm": 0.6401245531954808, "learning_rate": 1.6001297648012975e-05, "loss": 0.6233, "step": 9589 }, { "epoch": 0.27999182505620274, "grad_norm": 0.6600821893346128, "learning_rate": 1.600064882400649e-05, "loss": 0.6215, "step": 9590 }, { "epoch": 0.2800210212840501, "grad_norm": 0.60755726085424, "learning_rate": 1.6000000000000003e-05, "loss": 0.5253, "step": 9591 }, { "epoch": 0.28005021751189746, "grad_norm": 0.6762533267777121, "learning_rate": 1.5999351175993515e-05, "loss": 0.6656, "step": 9592 }, { "epoch": 0.2800794137397448, "grad_norm": 0.6892795318658611, "learning_rate": 1.5998702351987027e-05, "loss": 0.6934, "step": 9593 }, { "epoch": 0.2801086099675922, "grad_norm": 0.6174807827049167, "learning_rate": 1.599805352798054e-05, "loss": 0.5929, "step": 9594 }, { "epoch": 0.28013780619543954, "grad_norm": 0.6548822040176764, "learning_rate": 1.5997404703974047e-05, "loss": 0.6146, "step": 9595 }, { "epoch": 0.2801670024232869, "grad_norm": 0.7284091695348481, "learning_rate": 1.599675587996756e-05, "loss": 0.6486, "step": 9596 }, { "epoch": 0.28019619865113427, "grad_norm": 0.6405061504513384, "learning_rate": 1.599610705596107e-05, "loss": 0.6363, "step": 9597 }, { "epoch": 0.2802253948789816, "grad_norm": 0.6935206762842867, "learning_rate": 1.5995458231954583e-05, "loss": 0.6638, "step": 9598 }, { "epoch": 0.280254591106829, "grad_norm": 0.6535209613372318, "learning_rate": 1.5994809407948095e-05, "loss": 0.6228, "step": 9599 }, { "epoch": 0.28028378733467635, "grad_norm": 0.6701359661380012, "learning_rate": 1.5994160583941608e-05, "loss": 0.6255, "step": 9600 }, { "epoch": 0.2803129835625237, "grad_norm": 0.6562274789515783, "learning_rate": 1.599351175993512e-05, "loss": 0.6052, "step": 9601 }, { "epoch": 0.28034217979037107, "grad_norm": 0.6937690466350577, "learning_rate": 1.599286293592863e-05, "loss": 0.6501, "step": 9602 }, { "epoch": 0.28037137601821843, "grad_norm": 0.6394507429172439, "learning_rate": 1.599221411192214e-05, "loss": 0.623, "step": 9603 }, { "epoch": 0.2804005722460658, "grad_norm": 0.6590749473336369, "learning_rate": 1.5991565287915652e-05, "loss": 0.6083, "step": 9604 }, { "epoch": 0.28042976847391315, "grad_norm": 0.7240092611434248, "learning_rate": 1.5990916463909164e-05, "loss": 0.7102, "step": 9605 }, { "epoch": 0.2804589647017605, "grad_norm": 0.7439293497165429, "learning_rate": 1.5990267639902676e-05, "loss": 0.6459, "step": 9606 }, { "epoch": 0.2804881609296079, "grad_norm": 0.6892823974328859, "learning_rate": 1.598961881589619e-05, "loss": 0.6677, "step": 9607 }, { "epoch": 0.28051735715745524, "grad_norm": 0.6435292216921351, "learning_rate": 1.5988969991889704e-05, "loss": 0.6243, "step": 9608 }, { "epoch": 0.2805465533853026, "grad_norm": 0.6906239870387278, "learning_rate": 1.5988321167883212e-05, "loss": 0.7027, "step": 9609 }, { "epoch": 0.28057574961314996, "grad_norm": 0.6091182549107167, "learning_rate": 1.5987672343876724e-05, "loss": 0.5115, "step": 9610 }, { "epoch": 0.2806049458409973, "grad_norm": 0.7812258006957922, "learning_rate": 1.5987023519870236e-05, "loss": 0.6484, "step": 9611 }, { "epoch": 0.2806341420688447, "grad_norm": 0.7249730298840762, "learning_rate": 1.598637469586375e-05, "loss": 0.7343, "step": 9612 }, { "epoch": 0.28066333829669204, "grad_norm": 0.660995862976824, "learning_rate": 1.598572587185726e-05, "loss": 0.617, "step": 9613 }, { "epoch": 0.2806925345245394, "grad_norm": 0.6383477726642655, "learning_rate": 1.5985077047850773e-05, "loss": 0.6368, "step": 9614 }, { "epoch": 0.28072173075238677, "grad_norm": 0.6939075231555786, "learning_rate": 1.5984428223844285e-05, "loss": 0.6957, "step": 9615 }, { "epoch": 0.28075092698023413, "grad_norm": 0.7160016178432563, "learning_rate": 1.5983779399837797e-05, "loss": 0.6259, "step": 9616 }, { "epoch": 0.2807801232080815, "grad_norm": 0.6963798724742664, "learning_rate": 1.598313057583131e-05, "loss": 0.6765, "step": 9617 }, { "epoch": 0.28080931943592885, "grad_norm": 0.6938710999317004, "learning_rate": 1.5982481751824817e-05, "loss": 0.6941, "step": 9618 }, { "epoch": 0.28083851566377627, "grad_norm": 0.6634563459927906, "learning_rate": 1.598183292781833e-05, "loss": 0.6816, "step": 9619 }, { "epoch": 0.28086771189162363, "grad_norm": 0.6299748651681596, "learning_rate": 1.598118410381184e-05, "loss": 0.6083, "step": 9620 }, { "epoch": 0.280896908119471, "grad_norm": 0.7172310179174105, "learning_rate": 1.5980535279805353e-05, "loss": 0.6891, "step": 9621 }, { "epoch": 0.28092610434731835, "grad_norm": 0.6509702882536885, "learning_rate": 1.5979886455798865e-05, "loss": 0.6, "step": 9622 }, { "epoch": 0.2809553005751657, "grad_norm": 0.6835607572069635, "learning_rate": 1.5979237631792377e-05, "loss": 0.66, "step": 9623 }, { "epoch": 0.2809844968030131, "grad_norm": 0.6793267495613445, "learning_rate": 1.597858880778589e-05, "loss": 0.6384, "step": 9624 }, { "epoch": 0.28101369303086043, "grad_norm": 0.7020714617474461, "learning_rate": 1.59779399837794e-05, "loss": 0.7052, "step": 9625 }, { "epoch": 0.2810428892587078, "grad_norm": 0.6633767171867545, "learning_rate": 1.5977291159772913e-05, "loss": 0.6461, "step": 9626 }, { "epoch": 0.28107208548655516, "grad_norm": 0.8055359744919299, "learning_rate": 1.5976642335766425e-05, "loss": 0.6891, "step": 9627 }, { "epoch": 0.2811012817144025, "grad_norm": 0.6403258728812793, "learning_rate": 1.5975993511759938e-05, "loss": 0.5936, "step": 9628 }, { "epoch": 0.2811304779422499, "grad_norm": 0.7529053473878199, "learning_rate": 1.597534468775345e-05, "loss": 0.712, "step": 9629 }, { "epoch": 0.28115967417009724, "grad_norm": 0.6858241018999272, "learning_rate": 1.597469586374696e-05, "loss": 0.6377, "step": 9630 }, { "epoch": 0.2811888703979446, "grad_norm": 0.660286663381381, "learning_rate": 1.5974047039740474e-05, "loss": 0.652, "step": 9631 }, { "epoch": 0.28121806662579196, "grad_norm": 0.6697919129157469, "learning_rate": 1.5973398215733986e-05, "loss": 0.6549, "step": 9632 }, { "epoch": 0.2812472628536393, "grad_norm": 0.6545003529506495, "learning_rate": 1.5972749391727494e-05, "loss": 0.5819, "step": 9633 }, { "epoch": 0.2812764590814867, "grad_norm": 0.6530947042095965, "learning_rate": 1.5972100567721006e-05, "loss": 0.632, "step": 9634 }, { "epoch": 0.28130565530933405, "grad_norm": 0.6671609657156746, "learning_rate": 1.597145174371452e-05, "loss": 0.7026, "step": 9635 }, { "epoch": 0.2813348515371814, "grad_norm": 0.7224080572394465, "learning_rate": 1.597080291970803e-05, "loss": 0.6761, "step": 9636 }, { "epoch": 0.28136404776502877, "grad_norm": 0.6605919528235334, "learning_rate": 1.5970154095701542e-05, "loss": 0.6245, "step": 9637 }, { "epoch": 0.28139324399287613, "grad_norm": 0.6032907161377152, "learning_rate": 1.5969505271695054e-05, "loss": 0.5545, "step": 9638 }, { "epoch": 0.2814224402207235, "grad_norm": 0.7009347199980663, "learning_rate": 1.5968856447688566e-05, "loss": 0.6718, "step": 9639 }, { "epoch": 0.28145163644857085, "grad_norm": 0.7193362350013779, "learning_rate": 1.596820762368208e-05, "loss": 0.7069, "step": 9640 }, { "epoch": 0.2814808326764182, "grad_norm": 0.6745269665997707, "learning_rate": 1.5967558799675587e-05, "loss": 0.6298, "step": 9641 }, { "epoch": 0.2815100289042656, "grad_norm": 0.6468846364277632, "learning_rate": 1.59669099756691e-05, "loss": 0.6153, "step": 9642 }, { "epoch": 0.28153922513211294, "grad_norm": 0.7015793043693884, "learning_rate": 1.596626115166261e-05, "loss": 0.6666, "step": 9643 }, { "epoch": 0.2815684213599603, "grad_norm": 0.7173354394357201, "learning_rate": 1.5965612327656123e-05, "loss": 0.6898, "step": 9644 }, { "epoch": 0.28159761758780766, "grad_norm": 0.6760577301973248, "learning_rate": 1.5964963503649635e-05, "loss": 0.6209, "step": 9645 }, { "epoch": 0.281626813815655, "grad_norm": 0.6560388913641335, "learning_rate": 1.596431467964315e-05, "loss": 0.628, "step": 9646 }, { "epoch": 0.2816560100435024, "grad_norm": 0.663393191161721, "learning_rate": 1.596366585563666e-05, "loss": 0.5987, "step": 9647 }, { "epoch": 0.28168520627134974, "grad_norm": 0.6420926885991884, "learning_rate": 1.596301703163017e-05, "loss": 0.5874, "step": 9648 }, { "epoch": 0.2817144024991971, "grad_norm": 0.677144961433304, "learning_rate": 1.5962368207623683e-05, "loss": 0.5835, "step": 9649 }, { "epoch": 0.28174359872704446, "grad_norm": 0.6375426212951413, "learning_rate": 1.5961719383617195e-05, "loss": 0.6024, "step": 9650 }, { "epoch": 0.2817727949548918, "grad_norm": 0.6629856498007796, "learning_rate": 1.5961070559610707e-05, "loss": 0.6254, "step": 9651 }, { "epoch": 0.2818019911827392, "grad_norm": 0.6339540437703185, "learning_rate": 1.596042173560422e-05, "loss": 0.6228, "step": 9652 }, { "epoch": 0.28183118741058655, "grad_norm": 0.6522951780057066, "learning_rate": 1.595977291159773e-05, "loss": 0.6558, "step": 9653 }, { "epoch": 0.2818603836384339, "grad_norm": 0.6104802447585227, "learning_rate": 1.5959124087591243e-05, "loss": 0.5716, "step": 9654 }, { "epoch": 0.28188957986628127, "grad_norm": 0.7436793069486131, "learning_rate": 1.5958475263584755e-05, "loss": 0.7002, "step": 9655 }, { "epoch": 0.28191877609412863, "grad_norm": 0.641364313807589, "learning_rate": 1.5957826439578264e-05, "loss": 0.6377, "step": 9656 }, { "epoch": 0.281947972321976, "grad_norm": 0.6494244899987893, "learning_rate": 1.5957177615571776e-05, "loss": 0.6342, "step": 9657 }, { "epoch": 0.28197716854982335, "grad_norm": 0.6525271344260062, "learning_rate": 1.5956528791565288e-05, "loss": 0.6457, "step": 9658 }, { "epoch": 0.2820063647776707, "grad_norm": 0.6300977633775775, "learning_rate": 1.59558799675588e-05, "loss": 0.5785, "step": 9659 }, { "epoch": 0.2820355610055181, "grad_norm": 0.6779349189265981, "learning_rate": 1.5955231143552312e-05, "loss": 0.7004, "step": 9660 }, { "epoch": 0.28206475723336544, "grad_norm": 0.6287649060756785, "learning_rate": 1.5954582319545824e-05, "loss": 0.5758, "step": 9661 }, { "epoch": 0.2820939534612128, "grad_norm": 0.6568095071093815, "learning_rate": 1.5953933495539336e-05, "loss": 0.6507, "step": 9662 }, { "epoch": 0.28212314968906016, "grad_norm": 0.6606372525720452, "learning_rate": 1.5953284671532848e-05, "loss": 0.6458, "step": 9663 }, { "epoch": 0.2821523459169075, "grad_norm": 0.6591761325427165, "learning_rate": 1.595263584752636e-05, "loss": 0.6138, "step": 9664 }, { "epoch": 0.2821815421447549, "grad_norm": 0.6558669260263835, "learning_rate": 1.5951987023519872e-05, "loss": 0.6272, "step": 9665 }, { "epoch": 0.28221073837260224, "grad_norm": 0.6931489338197973, "learning_rate": 1.5951338199513384e-05, "loss": 0.6733, "step": 9666 }, { "epoch": 0.2822399346004496, "grad_norm": 0.6577970034726596, "learning_rate": 1.5950689375506896e-05, "loss": 0.6086, "step": 9667 }, { "epoch": 0.28226913082829697, "grad_norm": 0.7081262035220435, "learning_rate": 1.595004055150041e-05, "loss": 0.6229, "step": 9668 }, { "epoch": 0.2822983270561443, "grad_norm": 0.5882506764262201, "learning_rate": 1.594939172749392e-05, "loss": 0.5327, "step": 9669 }, { "epoch": 0.2823275232839917, "grad_norm": 0.7113282102434371, "learning_rate": 1.5948742903487432e-05, "loss": 0.6824, "step": 9670 }, { "epoch": 0.28235671951183905, "grad_norm": 0.6458195033320446, "learning_rate": 1.594809407948094e-05, "loss": 0.5807, "step": 9671 }, { "epoch": 0.2823859157396864, "grad_norm": 0.5997412545014933, "learning_rate": 1.5947445255474453e-05, "loss": 0.538, "step": 9672 }, { "epoch": 0.28241511196753377, "grad_norm": 0.6186772495956216, "learning_rate": 1.5946796431467965e-05, "loss": 0.5776, "step": 9673 }, { "epoch": 0.28244430819538113, "grad_norm": 0.6345553153392617, "learning_rate": 1.5946147607461477e-05, "loss": 0.607, "step": 9674 }, { "epoch": 0.2824735044232285, "grad_norm": 0.6749578376819042, "learning_rate": 1.594549878345499e-05, "loss": 0.6343, "step": 9675 }, { "epoch": 0.28250270065107586, "grad_norm": 0.6387038663938487, "learning_rate": 1.59448499594485e-05, "loss": 0.5873, "step": 9676 }, { "epoch": 0.2825318968789232, "grad_norm": 0.7360522822436967, "learning_rate": 1.5944201135442013e-05, "loss": 0.7257, "step": 9677 }, { "epoch": 0.2825610931067706, "grad_norm": 0.6918377859777617, "learning_rate": 1.5943552311435525e-05, "loss": 0.6911, "step": 9678 }, { "epoch": 0.282590289334618, "grad_norm": 0.6943939063175679, "learning_rate": 1.5942903487429034e-05, "loss": 0.681, "step": 9679 }, { "epoch": 0.28261948556246536, "grad_norm": 0.6656911238210405, "learning_rate": 1.5942254663422546e-05, "loss": 0.6457, "step": 9680 }, { "epoch": 0.2826486817903127, "grad_norm": 0.6622984747522999, "learning_rate": 1.5941605839416058e-05, "loss": 0.591, "step": 9681 }, { "epoch": 0.2826778780181601, "grad_norm": 0.6674205633772674, "learning_rate": 1.594095701540957e-05, "loss": 0.6556, "step": 9682 }, { "epoch": 0.28270707424600744, "grad_norm": 0.6735768620232842, "learning_rate": 1.5940308191403082e-05, "loss": 0.6412, "step": 9683 }, { "epoch": 0.2827362704738548, "grad_norm": 0.7906487379033754, "learning_rate": 1.5939659367396597e-05, "loss": 0.7028, "step": 9684 }, { "epoch": 0.28276546670170216, "grad_norm": 0.6332183247578811, "learning_rate": 1.5939010543390106e-05, "loss": 0.5703, "step": 9685 }, { "epoch": 0.2827946629295495, "grad_norm": 0.7988242633440418, "learning_rate": 1.5938361719383618e-05, "loss": 0.7288, "step": 9686 }, { "epoch": 0.2828238591573969, "grad_norm": 0.6573231797639159, "learning_rate": 1.593771289537713e-05, "loss": 0.5469, "step": 9687 }, { "epoch": 0.28285305538524425, "grad_norm": 0.6589570459264268, "learning_rate": 1.5937064071370642e-05, "loss": 0.6243, "step": 9688 }, { "epoch": 0.2828822516130916, "grad_norm": 0.7395099262929267, "learning_rate": 1.5936415247364154e-05, "loss": 0.5941, "step": 9689 }, { "epoch": 0.28291144784093897, "grad_norm": 0.627139275892965, "learning_rate": 1.5935766423357666e-05, "loss": 0.586, "step": 9690 }, { "epoch": 0.28294064406878633, "grad_norm": 0.7011535963237617, "learning_rate": 1.5935117599351178e-05, "loss": 0.6892, "step": 9691 }, { "epoch": 0.2829698402966337, "grad_norm": 0.6734665527178508, "learning_rate": 1.593446877534469e-05, "loss": 0.6434, "step": 9692 }, { "epoch": 0.28299903652448105, "grad_norm": 0.7277357430834571, "learning_rate": 1.5933819951338202e-05, "loss": 0.6976, "step": 9693 }, { "epoch": 0.2830282327523284, "grad_norm": 0.6814420631432204, "learning_rate": 1.593317112733171e-05, "loss": 0.6849, "step": 9694 }, { "epoch": 0.2830574289801758, "grad_norm": 0.7325430004948077, "learning_rate": 1.5932522303325223e-05, "loss": 0.6976, "step": 9695 }, { "epoch": 0.28308662520802313, "grad_norm": 0.7111266467734573, "learning_rate": 1.5931873479318735e-05, "loss": 0.7583, "step": 9696 }, { "epoch": 0.2831158214358705, "grad_norm": 0.6714968795629239, "learning_rate": 1.5931224655312247e-05, "loss": 0.6817, "step": 9697 }, { "epoch": 0.28314501766371786, "grad_norm": 0.6069491657162144, "learning_rate": 1.593057583130576e-05, "loss": 0.5624, "step": 9698 }, { "epoch": 0.2831742138915652, "grad_norm": 0.6824681340540375, "learning_rate": 1.592992700729927e-05, "loss": 0.6789, "step": 9699 }, { "epoch": 0.2832034101194126, "grad_norm": 0.6708837331319941, "learning_rate": 1.5929278183292783e-05, "loss": 0.6878, "step": 9700 }, { "epoch": 0.28323260634725994, "grad_norm": 0.6796823754098092, "learning_rate": 1.5928629359286295e-05, "loss": 0.6456, "step": 9701 }, { "epoch": 0.2832618025751073, "grad_norm": 0.6433204945477051, "learning_rate": 1.5927980535279807e-05, "loss": 0.6354, "step": 9702 }, { "epoch": 0.28329099880295466, "grad_norm": 0.6902633460901508, "learning_rate": 1.592733171127332e-05, "loss": 0.6538, "step": 9703 }, { "epoch": 0.283320195030802, "grad_norm": 0.6483269945165396, "learning_rate": 1.592668288726683e-05, "loss": 0.6146, "step": 9704 }, { "epoch": 0.2833493912586494, "grad_norm": 0.716096095226889, "learning_rate": 1.5926034063260343e-05, "loss": 0.6394, "step": 9705 }, { "epoch": 0.28337858748649675, "grad_norm": 1.0700690176381127, "learning_rate": 1.5925385239253855e-05, "loss": 0.7104, "step": 9706 }, { "epoch": 0.2834077837143441, "grad_norm": 0.6566658363109795, "learning_rate": 1.5924736415247367e-05, "loss": 0.6065, "step": 9707 }, { "epoch": 0.28343697994219147, "grad_norm": 0.6424555632268888, "learning_rate": 1.592408759124088e-05, "loss": 0.5611, "step": 9708 }, { "epoch": 0.28346617617003883, "grad_norm": 0.6987121883565831, "learning_rate": 1.5923438767234388e-05, "loss": 0.6653, "step": 9709 }, { "epoch": 0.2834953723978862, "grad_norm": 0.659682422239115, "learning_rate": 1.59227899432279e-05, "loss": 0.5903, "step": 9710 }, { "epoch": 0.28352456862573355, "grad_norm": 0.6682755889269992, "learning_rate": 1.5922141119221412e-05, "loss": 0.6334, "step": 9711 }, { "epoch": 0.2835537648535809, "grad_norm": 0.6963666779923517, "learning_rate": 1.5921492295214924e-05, "loss": 0.6915, "step": 9712 }, { "epoch": 0.2835829610814283, "grad_norm": 0.6224858471168662, "learning_rate": 1.5920843471208436e-05, "loss": 0.5791, "step": 9713 }, { "epoch": 0.28361215730927564, "grad_norm": 0.6995646212696458, "learning_rate": 1.5920194647201948e-05, "loss": 0.6496, "step": 9714 }, { "epoch": 0.283641353537123, "grad_norm": 0.6544976977109364, "learning_rate": 1.591954582319546e-05, "loss": 0.6169, "step": 9715 }, { "epoch": 0.28367054976497036, "grad_norm": 0.6914416917983155, "learning_rate": 1.5918896999188972e-05, "loss": 0.7041, "step": 9716 }, { "epoch": 0.2836997459928177, "grad_norm": 0.6262485368687224, "learning_rate": 1.591824817518248e-05, "loss": 0.6086, "step": 9717 }, { "epoch": 0.2837289422206651, "grad_norm": 0.725429914399743, "learning_rate": 1.5917599351175993e-05, "loss": 0.7848, "step": 9718 }, { "epoch": 0.28375813844851244, "grad_norm": 0.6229803538171712, "learning_rate": 1.5916950527169505e-05, "loss": 0.6007, "step": 9719 }, { "epoch": 0.2837873346763598, "grad_norm": 0.7293308166444976, "learning_rate": 1.5916301703163017e-05, "loss": 0.6443, "step": 9720 }, { "epoch": 0.28381653090420716, "grad_norm": 0.6565014037418085, "learning_rate": 1.591565287915653e-05, "loss": 0.6881, "step": 9721 }, { "epoch": 0.2838457271320545, "grad_norm": 0.9191822586394551, "learning_rate": 1.5915004055150044e-05, "loss": 0.6613, "step": 9722 }, { "epoch": 0.2838749233599019, "grad_norm": 0.62335702312249, "learning_rate": 1.5914355231143553e-05, "loss": 0.5846, "step": 9723 }, { "epoch": 0.28390411958774925, "grad_norm": 0.7568168781539615, "learning_rate": 1.5913706407137065e-05, "loss": 0.7206, "step": 9724 }, { "epoch": 0.2839333158155966, "grad_norm": 0.63672653558226, "learning_rate": 1.5913057583130577e-05, "loss": 0.6033, "step": 9725 }, { "epoch": 0.28396251204344397, "grad_norm": 0.6777498769360721, "learning_rate": 1.591240875912409e-05, "loss": 0.6732, "step": 9726 }, { "epoch": 0.28399170827129133, "grad_norm": 0.6373135175428086, "learning_rate": 1.59117599351176e-05, "loss": 0.58, "step": 9727 }, { "epoch": 0.2840209044991387, "grad_norm": 0.6910417407169729, "learning_rate": 1.5911111111111113e-05, "loss": 0.7273, "step": 9728 }, { "epoch": 0.28405010072698605, "grad_norm": 0.6468441178151837, "learning_rate": 1.5910462287104625e-05, "loss": 0.6384, "step": 9729 }, { "epoch": 0.2840792969548334, "grad_norm": 0.6611200002572183, "learning_rate": 1.5909813463098137e-05, "loss": 0.6184, "step": 9730 }, { "epoch": 0.2841084931826808, "grad_norm": 0.7194607964097947, "learning_rate": 1.590916463909165e-05, "loss": 0.7397, "step": 9731 }, { "epoch": 0.28413768941052814, "grad_norm": 0.6958340514834487, "learning_rate": 1.5908515815085158e-05, "loss": 0.708, "step": 9732 }, { "epoch": 0.2841668856383755, "grad_norm": 0.6565498660292275, "learning_rate": 1.590786699107867e-05, "loss": 0.6881, "step": 9733 }, { "epoch": 0.28419608186622286, "grad_norm": 0.6602527159468471, "learning_rate": 1.5907218167072182e-05, "loss": 0.6439, "step": 9734 }, { "epoch": 0.2842252780940702, "grad_norm": 0.6004640998213698, "learning_rate": 1.5906569343065694e-05, "loss": 0.5266, "step": 9735 }, { "epoch": 0.2842544743219176, "grad_norm": 0.6292360132473953, "learning_rate": 1.5905920519059206e-05, "loss": 0.5718, "step": 9736 }, { "epoch": 0.28428367054976494, "grad_norm": 0.6541813291126664, "learning_rate": 1.5905271695052718e-05, "loss": 0.6254, "step": 9737 }, { "epoch": 0.2843128667776123, "grad_norm": 0.6970806416910766, "learning_rate": 1.590462287104623e-05, "loss": 0.7297, "step": 9738 }, { "epoch": 0.2843420630054597, "grad_norm": 0.6651864706502494, "learning_rate": 1.5903974047039742e-05, "loss": 0.6553, "step": 9739 }, { "epoch": 0.2843712592333071, "grad_norm": 0.6668180215708049, "learning_rate": 1.5903325223033254e-05, "loss": 0.6405, "step": 9740 }, { "epoch": 0.28440045546115444, "grad_norm": 0.6728064935433847, "learning_rate": 1.5902676399026766e-05, "loss": 0.6343, "step": 9741 }, { "epoch": 0.2844296516890018, "grad_norm": 0.679414454358651, "learning_rate": 1.5902027575020278e-05, "loss": 0.7135, "step": 9742 }, { "epoch": 0.28445884791684917, "grad_norm": 0.6339684463635893, "learning_rate": 1.590137875101379e-05, "loss": 0.6193, "step": 9743 }, { "epoch": 0.28448804414469653, "grad_norm": 0.6790379122563787, "learning_rate": 1.5900729927007302e-05, "loss": 0.7196, "step": 9744 }, { "epoch": 0.2845172403725439, "grad_norm": 0.7023430649854794, "learning_rate": 1.5900081103000814e-05, "loss": 0.7238, "step": 9745 }, { "epoch": 0.28454643660039125, "grad_norm": 0.6717517703669876, "learning_rate": 1.5899432278994323e-05, "loss": 0.669, "step": 9746 }, { "epoch": 0.2845756328282386, "grad_norm": 0.6195649975644371, "learning_rate": 1.5898783454987835e-05, "loss": 0.6, "step": 9747 }, { "epoch": 0.284604829056086, "grad_norm": 0.6784473417103506, "learning_rate": 1.5898134630981347e-05, "loss": 0.6863, "step": 9748 }, { "epoch": 0.28463402528393333, "grad_norm": 0.6469837176385841, "learning_rate": 1.589748580697486e-05, "loss": 0.6096, "step": 9749 }, { "epoch": 0.2846632215117807, "grad_norm": 0.6376958537501026, "learning_rate": 1.589683698296837e-05, "loss": 0.6441, "step": 9750 }, { "epoch": 0.28469241773962806, "grad_norm": 0.7016619704089802, "learning_rate": 1.5896188158961883e-05, "loss": 0.6422, "step": 9751 }, { "epoch": 0.2847216139674754, "grad_norm": 0.6665565021567981, "learning_rate": 1.5895539334955395e-05, "loss": 0.6137, "step": 9752 }, { "epoch": 0.2847508101953228, "grad_norm": 0.6524479312073366, "learning_rate": 1.5894890510948907e-05, "loss": 0.64, "step": 9753 }, { "epoch": 0.28478000642317014, "grad_norm": 0.7271046636563123, "learning_rate": 1.589424168694242e-05, "loss": 0.6734, "step": 9754 }, { "epoch": 0.2848092026510175, "grad_norm": 0.6044919478562577, "learning_rate": 1.5893592862935928e-05, "loss": 0.5575, "step": 9755 }, { "epoch": 0.28483839887886486, "grad_norm": 0.6673526587824793, "learning_rate": 1.589294403892944e-05, "loss": 0.6662, "step": 9756 }, { "epoch": 0.2848675951067122, "grad_norm": 0.6380982749920783, "learning_rate": 1.5892295214922952e-05, "loss": 0.6051, "step": 9757 }, { "epoch": 0.2848967913345596, "grad_norm": 0.6162492647151355, "learning_rate": 1.5891646390916464e-05, "loss": 0.5376, "step": 9758 }, { "epoch": 0.28492598756240695, "grad_norm": 0.6584243830963683, "learning_rate": 1.589099756690998e-05, "loss": 0.6101, "step": 9759 }, { "epoch": 0.2849551837902543, "grad_norm": 0.6565163501225284, "learning_rate": 1.589034874290349e-05, "loss": 0.5981, "step": 9760 }, { "epoch": 0.28498438001810167, "grad_norm": 0.6491315570489947, "learning_rate": 1.5889699918897e-05, "loss": 0.6273, "step": 9761 }, { "epoch": 0.28501357624594903, "grad_norm": 0.6658787839712048, "learning_rate": 1.5889051094890512e-05, "loss": 0.6325, "step": 9762 }, { "epoch": 0.2850427724737964, "grad_norm": 0.6032506239571127, "learning_rate": 1.5888402270884024e-05, "loss": 0.51, "step": 9763 }, { "epoch": 0.28507196870164375, "grad_norm": 0.7969140057462522, "learning_rate": 1.5887753446877536e-05, "loss": 0.6711, "step": 9764 }, { "epoch": 0.2851011649294911, "grad_norm": 0.6853214098702941, "learning_rate": 1.5887104622871048e-05, "loss": 0.5626, "step": 9765 }, { "epoch": 0.2851303611573385, "grad_norm": 0.6950649727940823, "learning_rate": 1.588645579886456e-05, "loss": 0.7056, "step": 9766 }, { "epoch": 0.28515955738518584, "grad_norm": 0.6187654705412242, "learning_rate": 1.5885806974858072e-05, "loss": 0.5574, "step": 9767 }, { "epoch": 0.2851887536130332, "grad_norm": 0.7384611054155873, "learning_rate": 1.5885158150851584e-05, "loss": 0.6454, "step": 9768 }, { "epoch": 0.28521794984088056, "grad_norm": 0.7666466402775544, "learning_rate": 1.5884509326845096e-05, "loss": 0.7649, "step": 9769 }, { "epoch": 0.2852471460687279, "grad_norm": 0.726677024683976, "learning_rate": 1.5883860502838605e-05, "loss": 0.6493, "step": 9770 }, { "epoch": 0.2852763422965753, "grad_norm": 0.6727998370127902, "learning_rate": 1.5883211678832117e-05, "loss": 0.6386, "step": 9771 }, { "epoch": 0.28530553852442264, "grad_norm": 0.6116725822479889, "learning_rate": 1.588256285482563e-05, "loss": 0.5644, "step": 9772 }, { "epoch": 0.28533473475227, "grad_norm": 0.732448929768028, "learning_rate": 1.588191403081914e-05, "loss": 0.5883, "step": 9773 }, { "epoch": 0.28536393098011736, "grad_norm": 0.6434596140814977, "learning_rate": 1.5881265206812653e-05, "loss": 0.6394, "step": 9774 }, { "epoch": 0.2853931272079647, "grad_norm": 0.6238013091317754, "learning_rate": 1.5880616382806165e-05, "loss": 0.5612, "step": 9775 }, { "epoch": 0.2854223234358121, "grad_norm": 0.6441346753312659, "learning_rate": 1.5879967558799677e-05, "loss": 0.6439, "step": 9776 }, { "epoch": 0.28545151966365945, "grad_norm": 0.6599636045266636, "learning_rate": 1.587931873479319e-05, "loss": 0.656, "step": 9777 }, { "epoch": 0.2854807158915068, "grad_norm": 0.65942297220296, "learning_rate": 1.58786699107867e-05, "loss": 0.6615, "step": 9778 }, { "epoch": 0.28550991211935417, "grad_norm": 0.6713866667378385, "learning_rate": 1.5878021086780213e-05, "loss": 0.6928, "step": 9779 }, { "epoch": 0.28553910834720153, "grad_norm": 0.6866653781746517, "learning_rate": 1.5877372262773725e-05, "loss": 0.6947, "step": 9780 }, { "epoch": 0.2855683045750489, "grad_norm": 0.6459629699462106, "learning_rate": 1.5876723438767237e-05, "loss": 0.6426, "step": 9781 }, { "epoch": 0.28559750080289625, "grad_norm": 0.6118263529344644, "learning_rate": 1.587607461476075e-05, "loss": 0.5502, "step": 9782 }, { "epoch": 0.2856266970307436, "grad_norm": 0.6599677176873626, "learning_rate": 1.587542579075426e-05, "loss": 0.6591, "step": 9783 }, { "epoch": 0.285655893258591, "grad_norm": 0.6612312453121181, "learning_rate": 1.587477696674777e-05, "loss": 0.5791, "step": 9784 }, { "epoch": 0.28568508948643834, "grad_norm": 0.6829601910288906, "learning_rate": 1.5874128142741282e-05, "loss": 0.6033, "step": 9785 }, { "epoch": 0.2857142857142857, "grad_norm": 0.6999093651339845, "learning_rate": 1.5873479318734794e-05, "loss": 0.7007, "step": 9786 }, { "epoch": 0.28574348194213306, "grad_norm": 0.658195056242319, "learning_rate": 1.5872830494728306e-05, "loss": 0.6607, "step": 9787 }, { "epoch": 0.2857726781699804, "grad_norm": 0.6377018538339925, "learning_rate": 1.5872181670721818e-05, "loss": 0.6019, "step": 9788 }, { "epoch": 0.2858018743978278, "grad_norm": 0.6993474797629844, "learning_rate": 1.587153284671533e-05, "loss": 0.6698, "step": 9789 }, { "epoch": 0.28583107062567514, "grad_norm": 0.6497077611234087, "learning_rate": 1.5870884022708842e-05, "loss": 0.6117, "step": 9790 }, { "epoch": 0.2858602668535225, "grad_norm": 0.643621945215731, "learning_rate": 1.5870235198702354e-05, "loss": 0.5931, "step": 9791 }, { "epoch": 0.28588946308136987, "grad_norm": 0.637822432187454, "learning_rate": 1.5869586374695866e-05, "loss": 0.6371, "step": 9792 }, { "epoch": 0.2859186593092172, "grad_norm": 0.6100461602888131, "learning_rate": 1.5868937550689375e-05, "loss": 0.5752, "step": 9793 }, { "epoch": 0.2859478555370646, "grad_norm": 0.6566515612051402, "learning_rate": 1.5868288726682887e-05, "loss": 0.6539, "step": 9794 }, { "epoch": 0.28597705176491195, "grad_norm": 0.667133966057617, "learning_rate": 1.58676399026764e-05, "loss": 0.6157, "step": 9795 }, { "epoch": 0.2860062479927593, "grad_norm": 0.6399672019137411, "learning_rate": 1.586699107866991e-05, "loss": 0.6271, "step": 9796 }, { "epoch": 0.28603544422060667, "grad_norm": 0.6552473523043043, "learning_rate": 1.5866342254663426e-05, "loss": 0.6015, "step": 9797 }, { "epoch": 0.28606464044845403, "grad_norm": 0.6551749080923496, "learning_rate": 1.5865693430656938e-05, "loss": 0.612, "step": 9798 }, { "epoch": 0.28609383667630145, "grad_norm": 0.7251509124547699, "learning_rate": 1.5865044606650447e-05, "loss": 0.6279, "step": 9799 }, { "epoch": 0.2861230329041488, "grad_norm": 0.6821102694633513, "learning_rate": 1.586439578264396e-05, "loss": 0.625, "step": 9800 }, { "epoch": 0.28615222913199617, "grad_norm": 0.6610574936897483, "learning_rate": 1.586374695863747e-05, "loss": 0.589, "step": 9801 }, { "epoch": 0.28618142535984353, "grad_norm": 0.597827227172578, "learning_rate": 1.5863098134630983e-05, "loss": 0.5546, "step": 9802 }, { "epoch": 0.2862106215876909, "grad_norm": 0.622986482198285, "learning_rate": 1.5862449310624495e-05, "loss": 0.5884, "step": 9803 }, { "epoch": 0.28623981781553826, "grad_norm": 0.6671729198923958, "learning_rate": 1.5861800486618007e-05, "loss": 0.6281, "step": 9804 }, { "epoch": 0.2862690140433856, "grad_norm": 0.674830676372712, "learning_rate": 1.586115166261152e-05, "loss": 0.6455, "step": 9805 }, { "epoch": 0.286298210271233, "grad_norm": 0.6813254682964887, "learning_rate": 1.586050283860503e-05, "loss": 0.6697, "step": 9806 }, { "epoch": 0.28632740649908034, "grad_norm": 0.6953628060139448, "learning_rate": 1.5859854014598543e-05, "loss": 0.697, "step": 9807 }, { "epoch": 0.2863566027269277, "grad_norm": 0.6361508355324618, "learning_rate": 1.585920519059205e-05, "loss": 0.6018, "step": 9808 }, { "epoch": 0.28638579895477506, "grad_norm": 0.6497009608900861, "learning_rate": 1.5858556366585564e-05, "loss": 0.6184, "step": 9809 }, { "epoch": 0.2864149951826224, "grad_norm": 0.668490307058886, "learning_rate": 1.5857907542579076e-05, "loss": 0.631, "step": 9810 }, { "epoch": 0.2864441914104698, "grad_norm": 0.6567286635747086, "learning_rate": 1.5857258718572588e-05, "loss": 0.6069, "step": 9811 }, { "epoch": 0.28647338763831715, "grad_norm": 0.6305662990707805, "learning_rate": 1.58566098945661e-05, "loss": 0.5801, "step": 9812 }, { "epoch": 0.2865025838661645, "grad_norm": 0.6720977188904883, "learning_rate": 1.5855961070559612e-05, "loss": 0.6589, "step": 9813 }, { "epoch": 0.28653178009401187, "grad_norm": 0.6113549905583359, "learning_rate": 1.5855312246553124e-05, "loss": 0.6006, "step": 9814 }, { "epoch": 0.28656097632185923, "grad_norm": 0.6411751720920323, "learning_rate": 1.5854663422546636e-05, "loss": 0.5848, "step": 9815 }, { "epoch": 0.2865901725497066, "grad_norm": 0.6560139889080645, "learning_rate": 1.5854014598540148e-05, "loss": 0.6326, "step": 9816 }, { "epoch": 0.28661936877755395, "grad_norm": 0.6654096561625449, "learning_rate": 1.585336577453366e-05, "loss": 0.7077, "step": 9817 }, { "epoch": 0.2866485650054013, "grad_norm": 0.6457787825475338, "learning_rate": 1.5852716950527172e-05, "loss": 0.6061, "step": 9818 }, { "epoch": 0.2866777612332487, "grad_norm": 0.6527743832810183, "learning_rate": 1.5852068126520684e-05, "loss": 0.6676, "step": 9819 }, { "epoch": 0.28670695746109603, "grad_norm": 0.7255580325719702, "learning_rate": 1.5851419302514196e-05, "loss": 0.7319, "step": 9820 }, { "epoch": 0.2867361536889434, "grad_norm": 0.6784591036918302, "learning_rate": 1.5850770478507708e-05, "loss": 0.6324, "step": 9821 }, { "epoch": 0.28676534991679076, "grad_norm": 0.6244210382809362, "learning_rate": 1.5850121654501217e-05, "loss": 0.6077, "step": 9822 }, { "epoch": 0.2867945461446381, "grad_norm": 0.631082128243611, "learning_rate": 1.584947283049473e-05, "loss": 0.587, "step": 9823 }, { "epoch": 0.2868237423724855, "grad_norm": 0.6658438558222814, "learning_rate": 1.584882400648824e-05, "loss": 0.6185, "step": 9824 }, { "epoch": 0.28685293860033284, "grad_norm": 0.6538528039939548, "learning_rate": 1.5848175182481753e-05, "loss": 0.6016, "step": 9825 }, { "epoch": 0.2868821348281802, "grad_norm": 0.7981662527886797, "learning_rate": 1.5847526358475265e-05, "loss": 0.6362, "step": 9826 }, { "epoch": 0.28691133105602756, "grad_norm": 0.6786764110725544, "learning_rate": 1.5846877534468777e-05, "loss": 0.6394, "step": 9827 }, { "epoch": 0.2869405272838749, "grad_norm": 0.6928516736444544, "learning_rate": 1.584622871046229e-05, "loss": 0.6914, "step": 9828 }, { "epoch": 0.2869697235117223, "grad_norm": 0.6839199058466532, "learning_rate": 1.58455798864558e-05, "loss": 0.6131, "step": 9829 }, { "epoch": 0.28699891973956965, "grad_norm": 0.6640279057877558, "learning_rate": 1.5844931062449313e-05, "loss": 0.6128, "step": 9830 }, { "epoch": 0.287028115967417, "grad_norm": 0.7044250074705001, "learning_rate": 1.584428223844282e-05, "loss": 0.6756, "step": 9831 }, { "epoch": 0.28705731219526437, "grad_norm": 0.6971582943257646, "learning_rate": 1.5843633414436333e-05, "loss": 0.66, "step": 9832 }, { "epoch": 0.28708650842311173, "grad_norm": 0.6253989030326653, "learning_rate": 1.5842984590429845e-05, "loss": 0.6325, "step": 9833 }, { "epoch": 0.2871157046509591, "grad_norm": 0.6603534737630926, "learning_rate": 1.5842335766423358e-05, "loss": 0.6159, "step": 9834 }, { "epoch": 0.28714490087880645, "grad_norm": 0.7029976005236948, "learning_rate": 1.5841686942416873e-05, "loss": 0.6857, "step": 9835 }, { "epoch": 0.2871740971066538, "grad_norm": 0.6431860566334358, "learning_rate": 1.5841038118410385e-05, "loss": 0.6179, "step": 9836 }, { "epoch": 0.2872032933345012, "grad_norm": 0.6462580801087467, "learning_rate": 1.5840389294403894e-05, "loss": 0.6485, "step": 9837 }, { "epoch": 0.28723248956234854, "grad_norm": 0.827597456739683, "learning_rate": 1.5839740470397406e-05, "loss": 0.6479, "step": 9838 }, { "epoch": 0.2872616857901959, "grad_norm": 0.6452430323028934, "learning_rate": 1.5839091646390918e-05, "loss": 0.6335, "step": 9839 }, { "epoch": 0.28729088201804326, "grad_norm": 0.7043657449458965, "learning_rate": 1.583844282238443e-05, "loss": 0.6781, "step": 9840 }, { "epoch": 0.2873200782458906, "grad_norm": 0.7029901026693127, "learning_rate": 1.5837793998377942e-05, "loss": 0.6912, "step": 9841 }, { "epoch": 0.287349274473738, "grad_norm": 0.6864205087025634, "learning_rate": 1.5837145174371454e-05, "loss": 0.7096, "step": 9842 }, { "epoch": 0.28737847070158534, "grad_norm": 0.6543091533035805, "learning_rate": 1.5836496350364966e-05, "loss": 0.6196, "step": 9843 }, { "epoch": 0.2874076669294327, "grad_norm": 0.7559296325830802, "learning_rate": 1.5835847526358478e-05, "loss": 0.6472, "step": 9844 }, { "epoch": 0.28743686315728006, "grad_norm": 0.5969392127067058, "learning_rate": 1.583519870235199e-05, "loss": 0.5528, "step": 9845 }, { "epoch": 0.2874660593851274, "grad_norm": 0.6585280904512377, "learning_rate": 1.58345498783455e-05, "loss": 0.6121, "step": 9846 }, { "epoch": 0.2874952556129748, "grad_norm": 0.6227187713583733, "learning_rate": 1.583390105433901e-05, "loss": 0.5557, "step": 9847 }, { "epoch": 0.28752445184082215, "grad_norm": 0.6852748870012727, "learning_rate": 1.5833252230332523e-05, "loss": 0.6369, "step": 9848 }, { "epoch": 0.2875536480686695, "grad_norm": 0.6691068491152204, "learning_rate": 1.5832603406326035e-05, "loss": 0.6681, "step": 9849 }, { "epoch": 0.28758284429651687, "grad_norm": 0.6356851517915978, "learning_rate": 1.5831954582319547e-05, "loss": 0.5884, "step": 9850 }, { "epoch": 0.28761204052436423, "grad_norm": 0.7004916093583379, "learning_rate": 1.583130575831306e-05, "loss": 0.7115, "step": 9851 }, { "epoch": 0.2876412367522116, "grad_norm": 0.6177876960086733, "learning_rate": 1.583065693430657e-05, "loss": 0.5272, "step": 9852 }, { "epoch": 0.28767043298005895, "grad_norm": 0.6266177217512936, "learning_rate": 1.5830008110300083e-05, "loss": 0.5761, "step": 9853 }, { "epoch": 0.2876996292079063, "grad_norm": 0.6725413679288632, "learning_rate": 1.5829359286293595e-05, "loss": 0.6431, "step": 9854 }, { "epoch": 0.2877288254357537, "grad_norm": 0.6658116211404079, "learning_rate": 1.5828710462287107e-05, "loss": 0.6401, "step": 9855 }, { "epoch": 0.28775802166360104, "grad_norm": 0.6469397745577352, "learning_rate": 1.582806163828062e-05, "loss": 0.5621, "step": 9856 }, { "epoch": 0.2877872178914484, "grad_norm": 0.7624205107950051, "learning_rate": 1.582741281427413e-05, "loss": 0.7954, "step": 9857 }, { "epoch": 0.28781641411929576, "grad_norm": 0.6157012431156554, "learning_rate": 1.5826763990267643e-05, "loss": 0.5702, "step": 9858 }, { "epoch": 0.2878456103471431, "grad_norm": 0.6555896419604619, "learning_rate": 1.5826115166261155e-05, "loss": 0.5972, "step": 9859 }, { "epoch": 0.28787480657499054, "grad_norm": 0.6729188950001325, "learning_rate": 1.5825466342254663e-05, "loss": 0.6682, "step": 9860 }, { "epoch": 0.2879040028028379, "grad_norm": 0.6630220380989348, "learning_rate": 1.5824817518248175e-05, "loss": 0.6437, "step": 9861 }, { "epoch": 0.28793319903068526, "grad_norm": 0.7151420154043351, "learning_rate": 1.5824168694241687e-05, "loss": 0.6974, "step": 9862 }, { "epoch": 0.2879623952585326, "grad_norm": 0.6072702896174157, "learning_rate": 1.58235198702352e-05, "loss": 0.5251, "step": 9863 }, { "epoch": 0.28799159148638, "grad_norm": 0.6639353411413066, "learning_rate": 1.582287104622871e-05, "loss": 0.64, "step": 9864 }, { "epoch": 0.28802078771422734, "grad_norm": 0.6742614664038444, "learning_rate": 1.5822222222222224e-05, "loss": 0.6421, "step": 9865 }, { "epoch": 0.2880499839420747, "grad_norm": 0.7070171116065119, "learning_rate": 1.5821573398215736e-05, "loss": 0.6191, "step": 9866 }, { "epoch": 0.28807918016992207, "grad_norm": 0.7160897375059748, "learning_rate": 1.5820924574209248e-05, "loss": 0.641, "step": 9867 }, { "epoch": 0.28810837639776943, "grad_norm": 0.6345732215770653, "learning_rate": 1.582027575020276e-05, "loss": 0.6226, "step": 9868 }, { "epoch": 0.2881375726256168, "grad_norm": 0.6529461529553446, "learning_rate": 1.5819626926196268e-05, "loss": 0.6323, "step": 9869 }, { "epoch": 0.28816676885346415, "grad_norm": 0.6636766379927471, "learning_rate": 1.581897810218978e-05, "loss": 0.6316, "step": 9870 }, { "epoch": 0.2881959650813115, "grad_norm": 0.588602792683081, "learning_rate": 1.5818329278183292e-05, "loss": 0.5192, "step": 9871 }, { "epoch": 0.2882251613091589, "grad_norm": 0.6721301409778198, "learning_rate": 1.5817680454176804e-05, "loss": 0.6545, "step": 9872 }, { "epoch": 0.28825435753700623, "grad_norm": 24.931981434984788, "learning_rate": 1.581703163017032e-05, "loss": 1.0786, "step": 9873 }, { "epoch": 0.2882835537648536, "grad_norm": 0.6461468341102249, "learning_rate": 1.5816382806163832e-05, "loss": 0.6377, "step": 9874 }, { "epoch": 0.28831274999270096, "grad_norm": 0.6816349489615846, "learning_rate": 1.581573398215734e-05, "loss": 0.6755, "step": 9875 }, { "epoch": 0.2883419462205483, "grad_norm": 0.6651278124362153, "learning_rate": 1.5815085158150852e-05, "loss": 0.6663, "step": 9876 }, { "epoch": 0.2883711424483957, "grad_norm": 0.6379496920218236, "learning_rate": 1.5814436334144365e-05, "loss": 0.627, "step": 9877 }, { "epoch": 0.28840033867624304, "grad_norm": 0.6632524420982423, "learning_rate": 1.5813787510137877e-05, "loss": 0.6238, "step": 9878 }, { "epoch": 0.2884295349040904, "grad_norm": 0.7752699510558031, "learning_rate": 1.581313868613139e-05, "loss": 0.6729, "step": 9879 }, { "epoch": 0.28845873113193776, "grad_norm": 0.7800051905408373, "learning_rate": 1.58124898621249e-05, "loss": 0.7383, "step": 9880 }, { "epoch": 0.2884879273597851, "grad_norm": 0.6712397045773172, "learning_rate": 1.5811841038118413e-05, "loss": 0.5741, "step": 9881 }, { "epoch": 0.2885171235876325, "grad_norm": 0.6112342889271917, "learning_rate": 1.5811192214111925e-05, "loss": 0.5516, "step": 9882 }, { "epoch": 0.28854631981547985, "grad_norm": 0.6098308860772608, "learning_rate": 1.5810543390105437e-05, "loss": 0.5648, "step": 9883 }, { "epoch": 0.2885755160433272, "grad_norm": 0.6896350463852478, "learning_rate": 1.5809894566098945e-05, "loss": 0.7089, "step": 9884 }, { "epoch": 0.28860471227117457, "grad_norm": 0.7006135553292678, "learning_rate": 1.5809245742092457e-05, "loss": 0.6557, "step": 9885 }, { "epoch": 0.28863390849902193, "grad_norm": 0.6846279672685389, "learning_rate": 1.580859691808597e-05, "loss": 0.6885, "step": 9886 }, { "epoch": 0.2886631047268693, "grad_norm": 0.7008780418097939, "learning_rate": 1.580794809407948e-05, "loss": 0.6877, "step": 9887 }, { "epoch": 0.28869230095471665, "grad_norm": 0.6436433156893003, "learning_rate": 1.5807299270072993e-05, "loss": 0.5715, "step": 9888 }, { "epoch": 0.288721497182564, "grad_norm": 0.6690343820882958, "learning_rate": 1.5806650446066505e-05, "loss": 0.6562, "step": 9889 }, { "epoch": 0.2887506934104114, "grad_norm": 0.654784967588492, "learning_rate": 1.5806001622060017e-05, "loss": 0.596, "step": 9890 }, { "epoch": 0.28877988963825874, "grad_norm": 0.6698732027625253, "learning_rate": 1.580535279805353e-05, "loss": 0.6673, "step": 9891 }, { "epoch": 0.2888090858661061, "grad_norm": 0.677772162565677, "learning_rate": 1.580470397404704e-05, "loss": 0.6856, "step": 9892 }, { "epoch": 0.28883828209395346, "grad_norm": 0.6178814083082748, "learning_rate": 1.5804055150040554e-05, "loss": 0.5347, "step": 9893 }, { "epoch": 0.2888674783218008, "grad_norm": 0.6773244302677247, "learning_rate": 1.5803406326034066e-05, "loss": 0.6223, "step": 9894 }, { "epoch": 0.2888966745496482, "grad_norm": 0.673474274189093, "learning_rate": 1.5802757502027578e-05, "loss": 0.6399, "step": 9895 }, { "epoch": 0.28892587077749554, "grad_norm": 0.698473415791553, "learning_rate": 1.580210867802109e-05, "loss": 0.6583, "step": 9896 }, { "epoch": 0.2889550670053429, "grad_norm": 0.695669910354655, "learning_rate": 1.58014598540146e-05, "loss": 0.7095, "step": 9897 }, { "epoch": 0.28898426323319026, "grad_norm": 0.7343347090733768, "learning_rate": 1.580081103000811e-05, "loss": 0.7441, "step": 9898 }, { "epoch": 0.2890134594610376, "grad_norm": 0.6689078189491964, "learning_rate": 1.5800162206001622e-05, "loss": 0.6636, "step": 9899 }, { "epoch": 0.289042655688885, "grad_norm": 0.7049072743133916, "learning_rate": 1.5799513381995134e-05, "loss": 0.7345, "step": 9900 }, { "epoch": 0.28907185191673235, "grad_norm": 0.7352263390861712, "learning_rate": 1.5798864557988646e-05, "loss": 0.6718, "step": 9901 }, { "epoch": 0.2891010481445797, "grad_norm": 0.628110900397938, "learning_rate": 1.579821573398216e-05, "loss": 0.5653, "step": 9902 }, { "epoch": 0.28913024437242707, "grad_norm": 0.6665299671807309, "learning_rate": 1.579756690997567e-05, "loss": 0.6628, "step": 9903 }, { "epoch": 0.28915944060027443, "grad_norm": 0.6491869910680438, "learning_rate": 1.5796918085969182e-05, "loss": 0.6209, "step": 9904 }, { "epoch": 0.2891886368281218, "grad_norm": 0.6820861043420418, "learning_rate": 1.5796269261962695e-05, "loss": 0.615, "step": 9905 }, { "epoch": 0.28921783305596915, "grad_norm": 0.6719254570566281, "learning_rate": 1.5795620437956207e-05, "loss": 0.6741, "step": 9906 }, { "epoch": 0.2892470292838165, "grad_norm": 0.6935986272869163, "learning_rate": 1.5794971613949715e-05, "loss": 0.6726, "step": 9907 }, { "epoch": 0.2892762255116639, "grad_norm": 0.7261235463961718, "learning_rate": 1.5794322789943227e-05, "loss": 0.7311, "step": 9908 }, { "epoch": 0.28930542173951124, "grad_norm": 0.6715709962325016, "learning_rate": 1.579367396593674e-05, "loss": 0.6299, "step": 9909 }, { "epoch": 0.2893346179673586, "grad_norm": 0.6777595964144855, "learning_rate": 1.579302514193025e-05, "loss": 0.6873, "step": 9910 }, { "epoch": 0.28936381419520596, "grad_norm": 0.6990636772098963, "learning_rate": 1.5792376317923767e-05, "loss": 0.7377, "step": 9911 }, { "epoch": 0.2893930104230533, "grad_norm": 0.6704621932559964, "learning_rate": 1.579172749391728e-05, "loss": 0.6583, "step": 9912 }, { "epoch": 0.2894222066509007, "grad_norm": 0.6226054203333273, "learning_rate": 1.5791078669910787e-05, "loss": 0.5584, "step": 9913 }, { "epoch": 0.28945140287874804, "grad_norm": 0.6167045569642735, "learning_rate": 1.57904298459043e-05, "loss": 0.5955, "step": 9914 }, { "epoch": 0.2894805991065954, "grad_norm": 0.6518110091137795, "learning_rate": 1.578978102189781e-05, "loss": 0.6393, "step": 9915 }, { "epoch": 0.28950979533444277, "grad_norm": 0.7615878116904626, "learning_rate": 1.5789132197891323e-05, "loss": 0.6809, "step": 9916 }, { "epoch": 0.2895389915622901, "grad_norm": 0.6040593481693447, "learning_rate": 1.5788483373884835e-05, "loss": 0.5532, "step": 9917 }, { "epoch": 0.2895681877901375, "grad_norm": 0.6225452121170404, "learning_rate": 1.5787834549878347e-05, "loss": 0.5757, "step": 9918 }, { "epoch": 0.28959738401798485, "grad_norm": 0.6711789965568633, "learning_rate": 1.578718572587186e-05, "loss": 0.6448, "step": 9919 }, { "epoch": 0.28962658024583227, "grad_norm": 0.7269218992367132, "learning_rate": 1.578653690186537e-05, "loss": 0.7494, "step": 9920 }, { "epoch": 0.2896557764736796, "grad_norm": 0.6651087983233737, "learning_rate": 1.5785888077858884e-05, "loss": 0.6302, "step": 9921 }, { "epoch": 0.289684972701527, "grad_norm": 0.6612008393723052, "learning_rate": 1.5785239253852392e-05, "loss": 0.6109, "step": 9922 }, { "epoch": 0.28971416892937435, "grad_norm": 0.6774856667431232, "learning_rate": 1.5784590429845904e-05, "loss": 0.6354, "step": 9923 }, { "epoch": 0.2897433651572217, "grad_norm": 0.6577178179349656, "learning_rate": 1.5783941605839416e-05, "loss": 0.5946, "step": 9924 }, { "epoch": 0.28977256138506907, "grad_norm": 0.6033264945404309, "learning_rate": 1.5783292781832928e-05, "loss": 0.5673, "step": 9925 }, { "epoch": 0.28980175761291643, "grad_norm": 0.6650834836129895, "learning_rate": 1.578264395782644e-05, "loss": 0.6415, "step": 9926 }, { "epoch": 0.2898309538407638, "grad_norm": 0.661324843119778, "learning_rate": 1.5781995133819952e-05, "loss": 0.6614, "step": 9927 }, { "epoch": 0.28986015006861116, "grad_norm": 0.706842184342334, "learning_rate": 1.5781346309813464e-05, "loss": 0.704, "step": 9928 }, { "epoch": 0.2898893462964585, "grad_norm": 0.6418593534320836, "learning_rate": 1.5780697485806976e-05, "loss": 0.5759, "step": 9929 }, { "epoch": 0.2899185425243059, "grad_norm": 0.684425565742308, "learning_rate": 1.578004866180049e-05, "loss": 0.6578, "step": 9930 }, { "epoch": 0.28994773875215324, "grad_norm": 0.6319987030259974, "learning_rate": 1.5779399837794e-05, "loss": 0.6158, "step": 9931 }, { "epoch": 0.2899769349800006, "grad_norm": 0.8160532107914479, "learning_rate": 1.5778751013787512e-05, "loss": 0.6537, "step": 9932 }, { "epoch": 0.29000613120784796, "grad_norm": 0.6968696453727891, "learning_rate": 1.5778102189781024e-05, "loss": 0.6905, "step": 9933 }, { "epoch": 0.2900353274356953, "grad_norm": 0.6340584981969868, "learning_rate": 1.5777453365774537e-05, "loss": 0.567, "step": 9934 }, { "epoch": 0.2900645236635427, "grad_norm": 0.6548051542580411, "learning_rate": 1.577680454176805e-05, "loss": 0.648, "step": 9935 }, { "epoch": 0.29009371989139004, "grad_norm": 0.6420423262879108, "learning_rate": 1.5776155717761557e-05, "loss": 0.6601, "step": 9936 }, { "epoch": 0.2901229161192374, "grad_norm": 0.6529421815914255, "learning_rate": 1.577550689375507e-05, "loss": 0.6021, "step": 9937 }, { "epoch": 0.29015211234708477, "grad_norm": 0.6690019727063485, "learning_rate": 1.577485806974858e-05, "loss": 0.6753, "step": 9938 }, { "epoch": 0.29018130857493213, "grad_norm": 0.666609416687134, "learning_rate": 1.5774209245742093e-05, "loss": 0.5996, "step": 9939 }, { "epoch": 0.2902105048027795, "grad_norm": 0.6907670373868202, "learning_rate": 1.5773560421735605e-05, "loss": 0.6993, "step": 9940 }, { "epoch": 0.29023970103062685, "grad_norm": 0.7292218418234382, "learning_rate": 1.5772911597729117e-05, "loss": 0.7565, "step": 9941 }, { "epoch": 0.2902688972584742, "grad_norm": 0.6426088097843158, "learning_rate": 1.577226277372263e-05, "loss": 0.6619, "step": 9942 }, { "epoch": 0.2902980934863216, "grad_norm": 0.6855280437263648, "learning_rate": 1.577161394971614e-05, "loss": 0.6365, "step": 9943 }, { "epoch": 0.29032728971416893, "grad_norm": 0.6480338484629171, "learning_rate": 1.5770965125709653e-05, "loss": 0.6324, "step": 9944 }, { "epoch": 0.2903564859420163, "grad_norm": 0.6821266339172789, "learning_rate": 1.5770316301703162e-05, "loss": 0.6538, "step": 9945 }, { "epoch": 0.29038568216986366, "grad_norm": 0.6878087319986494, "learning_rate": 1.5769667477696674e-05, "loss": 0.6441, "step": 9946 }, { "epoch": 0.290414878397711, "grad_norm": 0.6962151706050502, "learning_rate": 1.5769018653690186e-05, "loss": 0.6545, "step": 9947 }, { "epoch": 0.2904440746255584, "grad_norm": 0.6579932910968168, "learning_rate": 1.57683698296837e-05, "loss": 0.6365, "step": 9948 }, { "epoch": 0.29047327085340574, "grad_norm": 0.7521021805228613, "learning_rate": 1.5767721005677214e-05, "loss": 0.7419, "step": 9949 }, { "epoch": 0.2905024670812531, "grad_norm": 0.6618956794321038, "learning_rate": 1.5767072181670726e-05, "loss": 0.6228, "step": 9950 }, { "epoch": 0.29053166330910046, "grad_norm": 0.6632897364468509, "learning_rate": 1.5766423357664234e-05, "loss": 0.6293, "step": 9951 }, { "epoch": 0.2905608595369478, "grad_norm": 0.6739261173027272, "learning_rate": 1.5765774533657746e-05, "loss": 0.6154, "step": 9952 }, { "epoch": 0.2905900557647952, "grad_norm": 0.7176013524809972, "learning_rate": 1.5765125709651258e-05, "loss": 0.6959, "step": 9953 }, { "epoch": 0.29061925199264255, "grad_norm": 0.649030337223919, "learning_rate": 1.576447688564477e-05, "loss": 0.6, "step": 9954 }, { "epoch": 0.2906484482204899, "grad_norm": 0.6647368292417529, "learning_rate": 1.5763828061638282e-05, "loss": 0.6126, "step": 9955 }, { "epoch": 0.29067764444833727, "grad_norm": 0.6139665073327832, "learning_rate": 1.5763179237631794e-05, "loss": 0.5596, "step": 9956 }, { "epoch": 0.29070684067618463, "grad_norm": 0.6299321620821801, "learning_rate": 1.5762530413625306e-05, "loss": 0.5653, "step": 9957 }, { "epoch": 0.290736036904032, "grad_norm": 0.6699525885198927, "learning_rate": 1.576188158961882e-05, "loss": 0.6599, "step": 9958 }, { "epoch": 0.29076523313187935, "grad_norm": 0.6602823990252376, "learning_rate": 1.576123276561233e-05, "loss": 0.6163, "step": 9959 }, { "epoch": 0.2907944293597267, "grad_norm": 0.7475698021412785, "learning_rate": 1.576058394160584e-05, "loss": 0.7546, "step": 9960 }, { "epoch": 0.2908236255875741, "grad_norm": 0.679101383806202, "learning_rate": 1.575993511759935e-05, "loss": 0.6537, "step": 9961 }, { "epoch": 0.29085282181542144, "grad_norm": 0.6957601313257702, "learning_rate": 1.5759286293592863e-05, "loss": 0.6886, "step": 9962 }, { "epoch": 0.2908820180432688, "grad_norm": 0.6448722987439852, "learning_rate": 1.5758637469586375e-05, "loss": 0.6207, "step": 9963 }, { "epoch": 0.29091121427111616, "grad_norm": 0.6455635163623425, "learning_rate": 1.5757988645579887e-05, "loss": 0.6172, "step": 9964 }, { "epoch": 0.2909404104989635, "grad_norm": 0.6318053665573554, "learning_rate": 1.57573398215734e-05, "loss": 0.6098, "step": 9965 }, { "epoch": 0.2909696067268109, "grad_norm": 0.6462415139469668, "learning_rate": 1.575669099756691e-05, "loss": 0.5932, "step": 9966 }, { "epoch": 0.29099880295465824, "grad_norm": 0.7394609152534914, "learning_rate": 1.5756042173560423e-05, "loss": 0.6484, "step": 9967 }, { "epoch": 0.2910279991825056, "grad_norm": 0.6639408845695767, "learning_rate": 1.5755393349553935e-05, "loss": 0.5913, "step": 9968 }, { "epoch": 0.29105719541035296, "grad_norm": 0.7230245540244589, "learning_rate": 1.5754744525547447e-05, "loss": 0.7145, "step": 9969 }, { "epoch": 0.2910863916382003, "grad_norm": 0.6078989261695288, "learning_rate": 1.575409570154096e-05, "loss": 0.5791, "step": 9970 }, { "epoch": 0.2911155878660477, "grad_norm": 0.69042407820847, "learning_rate": 1.575344687753447e-05, "loss": 0.6501, "step": 9971 }, { "epoch": 0.29114478409389505, "grad_norm": 0.7022135224639279, "learning_rate": 1.5752798053527983e-05, "loss": 0.7539, "step": 9972 }, { "epoch": 0.2911739803217424, "grad_norm": 0.6333644414371925, "learning_rate": 1.5752149229521495e-05, "loss": 0.637, "step": 9973 }, { "epoch": 0.29120317654958977, "grad_norm": 0.6780721015746748, "learning_rate": 1.5751500405515004e-05, "loss": 0.6981, "step": 9974 }, { "epoch": 0.29123237277743713, "grad_norm": 0.657785300747489, "learning_rate": 1.5750851581508516e-05, "loss": 0.6124, "step": 9975 }, { "epoch": 0.2912615690052845, "grad_norm": 0.5985140833305256, "learning_rate": 1.5750202757502028e-05, "loss": 0.5574, "step": 9976 }, { "epoch": 0.29129076523313185, "grad_norm": 0.6521781321944163, "learning_rate": 1.574955393349554e-05, "loss": 0.7049, "step": 9977 }, { "epoch": 0.2913199614609792, "grad_norm": 0.7281634281856919, "learning_rate": 1.5748905109489052e-05, "loss": 0.6896, "step": 9978 }, { "epoch": 0.2913491576888266, "grad_norm": 0.6654529846410653, "learning_rate": 1.5748256285482564e-05, "loss": 0.6952, "step": 9979 }, { "epoch": 0.291378353916674, "grad_norm": 0.6383394237754945, "learning_rate": 1.5747607461476076e-05, "loss": 0.5878, "step": 9980 }, { "epoch": 0.29140755014452135, "grad_norm": 0.6513272266971984, "learning_rate": 1.5746958637469588e-05, "loss": 0.5606, "step": 9981 }, { "epoch": 0.2914367463723687, "grad_norm": 0.6774173576151152, "learning_rate": 1.57463098134631e-05, "loss": 0.6161, "step": 9982 }, { "epoch": 0.2914659426002161, "grad_norm": 0.6786554188312465, "learning_rate": 1.574566098945661e-05, "loss": 0.6895, "step": 9983 }, { "epoch": 0.29149513882806344, "grad_norm": 0.6688756602194672, "learning_rate": 1.574501216545012e-05, "loss": 0.6562, "step": 9984 }, { "epoch": 0.2915243350559108, "grad_norm": 0.6482259156834407, "learning_rate": 1.5744363341443633e-05, "loss": 0.6553, "step": 9985 }, { "epoch": 0.29155353128375816, "grad_norm": 0.6828533179587332, "learning_rate": 1.574371451743715e-05, "loss": 0.6745, "step": 9986 }, { "epoch": 0.2915827275116055, "grad_norm": 0.6878085193067323, "learning_rate": 1.574306569343066e-05, "loss": 0.6687, "step": 9987 }, { "epoch": 0.2916119237394529, "grad_norm": 0.7334415902627649, "learning_rate": 1.5742416869424172e-05, "loss": 0.6882, "step": 9988 }, { "epoch": 0.29164111996730024, "grad_norm": 0.6635531528045501, "learning_rate": 1.574176804541768e-05, "loss": 0.6615, "step": 9989 }, { "epoch": 0.2916703161951476, "grad_norm": 0.686652665719682, "learning_rate": 1.5741119221411193e-05, "loss": 0.6695, "step": 9990 }, { "epoch": 0.29169951242299497, "grad_norm": 0.6575317369683075, "learning_rate": 1.5740470397404705e-05, "loss": 0.6744, "step": 9991 }, { "epoch": 0.2917287086508423, "grad_norm": 0.6728945883673266, "learning_rate": 1.5739821573398217e-05, "loss": 0.6571, "step": 9992 }, { "epoch": 0.2917579048786897, "grad_norm": 0.7181889337002835, "learning_rate": 1.573917274939173e-05, "loss": 0.723, "step": 9993 }, { "epoch": 0.29178710110653705, "grad_norm": 0.674342517033585, "learning_rate": 1.573852392538524e-05, "loss": 0.6423, "step": 9994 }, { "epoch": 0.2918162973343844, "grad_norm": 0.7274173081135301, "learning_rate": 1.5737875101378753e-05, "loss": 0.6443, "step": 9995 }, { "epoch": 0.2918454935622318, "grad_norm": 0.8195321773204678, "learning_rate": 1.5737226277372265e-05, "loss": 0.7371, "step": 9996 }, { "epoch": 0.29187468979007913, "grad_norm": 0.634347446549353, "learning_rate": 1.5736577453365774e-05, "loss": 0.5948, "step": 9997 }, { "epoch": 0.2919038860179265, "grad_norm": 0.7808700892229988, "learning_rate": 1.5735928629359286e-05, "loss": 0.6969, "step": 9998 }, { "epoch": 0.29193308224577386, "grad_norm": 0.6534159407392583, "learning_rate": 1.5735279805352798e-05, "loss": 0.6126, "step": 9999 }, { "epoch": 0.2919622784736212, "grad_norm": 0.6910861799476926, "learning_rate": 1.573463098134631e-05, "loss": 0.6955, "step": 10000 }, { "epoch": 0.2919914747014686, "grad_norm": 0.6490835672762985, "learning_rate": 1.5733982157339822e-05, "loss": 0.5605, "step": 10001 }, { "epoch": 0.29202067092931594, "grad_norm": 0.6325219955744443, "learning_rate": 1.5733333333333334e-05, "loss": 0.5763, "step": 10002 }, { "epoch": 0.2920498671571633, "grad_norm": 0.7111769975001254, "learning_rate": 1.5732684509326846e-05, "loss": 0.7376, "step": 10003 }, { "epoch": 0.29207906338501066, "grad_norm": 0.6718083011083472, "learning_rate": 1.5732035685320358e-05, "loss": 0.664, "step": 10004 }, { "epoch": 0.292108259612858, "grad_norm": 0.679451324661685, "learning_rate": 1.573138686131387e-05, "loss": 0.6553, "step": 10005 }, { "epoch": 0.2921374558407054, "grad_norm": 0.5899072444815303, "learning_rate": 1.5730738037307382e-05, "loss": 0.5483, "step": 10006 }, { "epoch": 0.29216665206855275, "grad_norm": 0.6224704392310179, "learning_rate": 1.5730089213300894e-05, "loss": 0.604, "step": 10007 }, { "epoch": 0.2921958482964001, "grad_norm": 0.6720672848813245, "learning_rate": 1.5729440389294406e-05, "loss": 0.6911, "step": 10008 }, { "epoch": 0.29222504452424747, "grad_norm": 0.6614974281358883, "learning_rate": 1.5728791565287918e-05, "loss": 0.6272, "step": 10009 }, { "epoch": 0.29225424075209483, "grad_norm": 0.6617094325060084, "learning_rate": 1.572814274128143e-05, "loss": 0.6155, "step": 10010 }, { "epoch": 0.2922834369799422, "grad_norm": 0.6228532002154571, "learning_rate": 1.5727493917274942e-05, "loss": 0.566, "step": 10011 }, { "epoch": 0.29231263320778955, "grad_norm": 0.7206575088020877, "learning_rate": 1.572684509326845e-05, "loss": 0.6221, "step": 10012 }, { "epoch": 0.2923418294356369, "grad_norm": 0.6763823519249837, "learning_rate": 1.5726196269261963e-05, "loss": 0.6668, "step": 10013 }, { "epoch": 0.2923710256634843, "grad_norm": 0.6515207785210826, "learning_rate": 1.5725547445255475e-05, "loss": 0.5979, "step": 10014 }, { "epoch": 0.29240022189133164, "grad_norm": 0.6419511277041666, "learning_rate": 1.5724898621248987e-05, "loss": 0.5512, "step": 10015 }, { "epoch": 0.292429418119179, "grad_norm": 0.6631929983355535, "learning_rate": 1.57242497972425e-05, "loss": 0.6431, "step": 10016 }, { "epoch": 0.29245861434702636, "grad_norm": 0.6244862807244747, "learning_rate": 1.572360097323601e-05, "loss": 0.5917, "step": 10017 }, { "epoch": 0.2924878105748737, "grad_norm": 0.687824120028208, "learning_rate": 1.5722952149229523e-05, "loss": 0.7049, "step": 10018 }, { "epoch": 0.2925170068027211, "grad_norm": 0.6694949073519701, "learning_rate": 1.5722303325223035e-05, "loss": 0.6494, "step": 10019 }, { "epoch": 0.29254620303056844, "grad_norm": 0.6655218308164659, "learning_rate": 1.5721654501216547e-05, "loss": 0.6601, "step": 10020 }, { "epoch": 0.2925753992584158, "grad_norm": 0.7006784615952064, "learning_rate": 1.5721005677210056e-05, "loss": 0.6823, "step": 10021 }, { "epoch": 0.29260459548626316, "grad_norm": 0.6606193355017397, "learning_rate": 1.5720356853203568e-05, "loss": 0.6471, "step": 10022 }, { "epoch": 0.2926337917141105, "grad_norm": 0.6803655642444596, "learning_rate": 1.571970802919708e-05, "loss": 0.6505, "step": 10023 }, { "epoch": 0.2926629879419579, "grad_norm": 0.6796587761965143, "learning_rate": 1.5719059205190595e-05, "loss": 0.6916, "step": 10024 }, { "epoch": 0.29269218416980525, "grad_norm": 0.6687915986091122, "learning_rate": 1.5718410381184107e-05, "loss": 0.5774, "step": 10025 }, { "epoch": 0.2927213803976526, "grad_norm": 0.6642440528579973, "learning_rate": 1.571776155717762e-05, "loss": 0.6024, "step": 10026 }, { "epoch": 0.29275057662549997, "grad_norm": 0.6626347882649386, "learning_rate": 1.5717112733171128e-05, "loss": 0.6561, "step": 10027 }, { "epoch": 0.29277977285334733, "grad_norm": 0.6712372595829739, "learning_rate": 1.571646390916464e-05, "loss": 0.6564, "step": 10028 }, { "epoch": 0.2928089690811947, "grad_norm": 0.7595280820474885, "learning_rate": 1.5715815085158152e-05, "loss": 0.6884, "step": 10029 }, { "epoch": 0.29283816530904205, "grad_norm": 0.6473489687938258, "learning_rate": 1.5715166261151664e-05, "loss": 0.624, "step": 10030 }, { "epoch": 0.2928673615368894, "grad_norm": 0.6615110340945816, "learning_rate": 1.5714517437145176e-05, "loss": 0.5333, "step": 10031 }, { "epoch": 0.2928965577647368, "grad_norm": 0.6479222930226287, "learning_rate": 1.5713868613138688e-05, "loss": 0.6294, "step": 10032 }, { "epoch": 0.29292575399258414, "grad_norm": 0.6045633946352936, "learning_rate": 1.57132197891322e-05, "loss": 0.5611, "step": 10033 }, { "epoch": 0.2929549502204315, "grad_norm": 0.6178450641601346, "learning_rate": 1.5712570965125712e-05, "loss": 0.5763, "step": 10034 }, { "epoch": 0.29298414644827886, "grad_norm": 0.6540158270777198, "learning_rate": 1.571192214111922e-05, "loss": 0.5257, "step": 10035 }, { "epoch": 0.2930133426761262, "grad_norm": 0.6346383834749103, "learning_rate": 1.5711273317112733e-05, "loss": 0.5655, "step": 10036 }, { "epoch": 0.2930425389039736, "grad_norm": 0.6710252943195114, "learning_rate": 1.5710624493106245e-05, "loss": 0.6907, "step": 10037 }, { "epoch": 0.29307173513182094, "grad_norm": 0.6633201286704405, "learning_rate": 1.5709975669099757e-05, "loss": 0.6637, "step": 10038 }, { "epoch": 0.2931009313596683, "grad_norm": 0.6512881447563056, "learning_rate": 1.570932684509327e-05, "loss": 0.6316, "step": 10039 }, { "epoch": 0.29313012758751567, "grad_norm": 0.7284599740454123, "learning_rate": 1.570867802108678e-05, "loss": 0.7428, "step": 10040 }, { "epoch": 0.2931593238153631, "grad_norm": 0.6668176253735094, "learning_rate": 1.5708029197080293e-05, "loss": 0.6722, "step": 10041 }, { "epoch": 0.29318852004321044, "grad_norm": 0.6313884039069291, "learning_rate": 1.5707380373073805e-05, "loss": 0.6018, "step": 10042 }, { "epoch": 0.2932177162710578, "grad_norm": 0.740042370914034, "learning_rate": 1.5706731549067317e-05, "loss": 0.7904, "step": 10043 }, { "epoch": 0.29324691249890517, "grad_norm": 0.6862504179114248, "learning_rate": 1.570608272506083e-05, "loss": 0.7105, "step": 10044 }, { "epoch": 0.2932761087267525, "grad_norm": 0.6196669954417549, "learning_rate": 1.570543390105434e-05, "loss": 0.6113, "step": 10045 }, { "epoch": 0.2933053049545999, "grad_norm": 0.6682091004213473, "learning_rate": 1.5704785077047853e-05, "loss": 0.6935, "step": 10046 }, { "epoch": 0.29333450118244725, "grad_norm": 0.6618000637651948, "learning_rate": 1.5704136253041365e-05, "loss": 0.657, "step": 10047 }, { "epoch": 0.2933636974102946, "grad_norm": 0.6830400099149132, "learning_rate": 1.5703487429034877e-05, "loss": 0.6648, "step": 10048 }, { "epoch": 0.29339289363814197, "grad_norm": 0.6828912060648012, "learning_rate": 1.570283860502839e-05, "loss": 0.6734, "step": 10049 }, { "epoch": 0.29342208986598933, "grad_norm": 0.6664556293345306, "learning_rate": 1.5702189781021898e-05, "loss": 0.6409, "step": 10050 }, { "epoch": 0.2934512860938367, "grad_norm": 0.6255681047045232, "learning_rate": 1.570154095701541e-05, "loss": 0.6305, "step": 10051 }, { "epoch": 0.29348048232168406, "grad_norm": 0.6438861778544824, "learning_rate": 1.5700892133008922e-05, "loss": 0.5982, "step": 10052 }, { "epoch": 0.2935096785495314, "grad_norm": 0.6543373549603284, "learning_rate": 1.5700243309002434e-05, "loss": 0.6551, "step": 10053 }, { "epoch": 0.2935388747773788, "grad_norm": 0.6555503615395883, "learning_rate": 1.5699594484995946e-05, "loss": 0.5915, "step": 10054 }, { "epoch": 0.29356807100522614, "grad_norm": 0.666771127221499, "learning_rate": 1.5698945660989458e-05, "loss": 0.6891, "step": 10055 }, { "epoch": 0.2935972672330735, "grad_norm": 0.6226092929304742, "learning_rate": 1.569829683698297e-05, "loss": 0.5602, "step": 10056 }, { "epoch": 0.29362646346092086, "grad_norm": 0.653194288274288, "learning_rate": 1.5697648012976482e-05, "loss": 0.6378, "step": 10057 }, { "epoch": 0.2936556596887682, "grad_norm": 0.6901647595314998, "learning_rate": 1.5696999188969994e-05, "loss": 0.6186, "step": 10058 }, { "epoch": 0.2936848559166156, "grad_norm": 0.6692305441750703, "learning_rate": 1.5696350364963503e-05, "loss": 0.6309, "step": 10059 }, { "epoch": 0.29371405214446294, "grad_norm": 0.7088236682381265, "learning_rate": 1.5695701540957015e-05, "loss": 0.6984, "step": 10060 }, { "epoch": 0.2937432483723103, "grad_norm": 0.6266903596899989, "learning_rate": 1.5695052716950527e-05, "loss": 0.5822, "step": 10061 }, { "epoch": 0.29377244460015767, "grad_norm": 0.6789322223139221, "learning_rate": 1.5694403892944042e-05, "loss": 0.647, "step": 10062 }, { "epoch": 0.29380164082800503, "grad_norm": 0.6396441788528434, "learning_rate": 1.5693755068937554e-05, "loss": 0.6196, "step": 10063 }, { "epoch": 0.2938308370558524, "grad_norm": 0.6099844350530231, "learning_rate": 1.5693106244931066e-05, "loss": 0.6172, "step": 10064 }, { "epoch": 0.29386003328369975, "grad_norm": 0.7056943596695405, "learning_rate": 1.5692457420924575e-05, "loss": 0.6399, "step": 10065 }, { "epoch": 0.2938892295115471, "grad_norm": 0.6699134397216844, "learning_rate": 1.5691808596918087e-05, "loss": 0.6273, "step": 10066 }, { "epoch": 0.2939184257393945, "grad_norm": 0.7024489522695156, "learning_rate": 1.56911597729116e-05, "loss": 0.7011, "step": 10067 }, { "epoch": 0.29394762196724183, "grad_norm": 0.71683953007272, "learning_rate": 1.569051094890511e-05, "loss": 0.6911, "step": 10068 }, { "epoch": 0.2939768181950892, "grad_norm": 0.6309675349830857, "learning_rate": 1.5689862124898623e-05, "loss": 0.5835, "step": 10069 }, { "epoch": 0.29400601442293656, "grad_norm": 0.6709486106858119, "learning_rate": 1.5689213300892135e-05, "loss": 0.653, "step": 10070 }, { "epoch": 0.2940352106507839, "grad_norm": 0.7132413439485349, "learning_rate": 1.5688564476885647e-05, "loss": 0.6911, "step": 10071 }, { "epoch": 0.2940644068786313, "grad_norm": 0.7637996087654613, "learning_rate": 1.568791565287916e-05, "loss": 0.7121, "step": 10072 }, { "epoch": 0.29409360310647864, "grad_norm": 0.7533702656603486, "learning_rate": 1.5687266828872668e-05, "loss": 0.7644, "step": 10073 }, { "epoch": 0.294122799334326, "grad_norm": 0.6340583413298921, "learning_rate": 1.568661800486618e-05, "loss": 0.6001, "step": 10074 }, { "epoch": 0.29415199556217336, "grad_norm": 0.613822321735626, "learning_rate": 1.568596918085969e-05, "loss": 0.577, "step": 10075 }, { "epoch": 0.2941811917900207, "grad_norm": 0.6166723773148245, "learning_rate": 1.5685320356853204e-05, "loss": 0.5546, "step": 10076 }, { "epoch": 0.2942103880178681, "grad_norm": 0.6661184409755276, "learning_rate": 1.5684671532846716e-05, "loss": 0.6499, "step": 10077 }, { "epoch": 0.29423958424571545, "grad_norm": 0.656497303873201, "learning_rate": 1.5684022708840228e-05, "loss": 0.6518, "step": 10078 }, { "epoch": 0.2942687804735628, "grad_norm": 0.6451190574606634, "learning_rate": 1.568337388483374e-05, "loss": 0.6233, "step": 10079 }, { "epoch": 0.29429797670141017, "grad_norm": 0.6329670090838211, "learning_rate": 1.5682725060827252e-05, "loss": 0.6096, "step": 10080 }, { "epoch": 0.29432717292925753, "grad_norm": 0.6746947437765947, "learning_rate": 1.5682076236820764e-05, "loss": 0.6321, "step": 10081 }, { "epoch": 0.2943563691571049, "grad_norm": 0.7895022263518938, "learning_rate": 1.5681427412814276e-05, "loss": 0.6577, "step": 10082 }, { "epoch": 0.29438556538495225, "grad_norm": 0.8384115852918986, "learning_rate": 1.5680778588807788e-05, "loss": 0.8117, "step": 10083 }, { "epoch": 0.2944147616127996, "grad_norm": 0.6094615896963079, "learning_rate": 1.56801297648013e-05, "loss": 0.6004, "step": 10084 }, { "epoch": 0.294443957840647, "grad_norm": 0.647739218494335, "learning_rate": 1.5679480940794812e-05, "loss": 0.6179, "step": 10085 }, { "epoch": 0.29447315406849434, "grad_norm": 0.6898625497131882, "learning_rate": 1.5678832116788324e-05, "loss": 0.6138, "step": 10086 }, { "epoch": 0.2945023502963417, "grad_norm": 0.8633495244316107, "learning_rate": 1.5678183292781836e-05, "loss": 0.7117, "step": 10087 }, { "epoch": 0.29453154652418906, "grad_norm": 0.7188048981252275, "learning_rate": 1.5677534468775345e-05, "loss": 0.6862, "step": 10088 }, { "epoch": 0.2945607427520364, "grad_norm": 0.6602154601330311, "learning_rate": 1.5676885644768857e-05, "loss": 0.5836, "step": 10089 }, { "epoch": 0.2945899389798838, "grad_norm": 0.7043968955294949, "learning_rate": 1.567623682076237e-05, "loss": 0.6385, "step": 10090 }, { "epoch": 0.29461913520773114, "grad_norm": 0.6788885718989276, "learning_rate": 1.567558799675588e-05, "loss": 0.7124, "step": 10091 }, { "epoch": 0.2946483314355785, "grad_norm": 0.6597345052980041, "learning_rate": 1.5674939172749393e-05, "loss": 0.6394, "step": 10092 }, { "epoch": 0.29467752766342586, "grad_norm": 0.6378947842213628, "learning_rate": 1.5674290348742905e-05, "loss": 0.5955, "step": 10093 }, { "epoch": 0.2947067238912732, "grad_norm": 0.6505845570182729, "learning_rate": 1.5673641524736417e-05, "loss": 0.6691, "step": 10094 }, { "epoch": 0.2947359201191206, "grad_norm": 0.6789915440236167, "learning_rate": 1.567299270072993e-05, "loss": 0.6165, "step": 10095 }, { "epoch": 0.29476511634696795, "grad_norm": 0.6341227310129737, "learning_rate": 1.567234387672344e-05, "loss": 0.6057, "step": 10096 }, { "epoch": 0.2947943125748153, "grad_norm": 0.8470239780427493, "learning_rate": 1.567169505271695e-05, "loss": 0.6604, "step": 10097 }, { "epoch": 0.29482350880266267, "grad_norm": 0.6789391757232428, "learning_rate": 1.567104622871046e-05, "loss": 0.5875, "step": 10098 }, { "epoch": 0.29485270503051003, "grad_norm": 0.6035916705339401, "learning_rate": 1.5670397404703977e-05, "loss": 0.5711, "step": 10099 }, { "epoch": 0.2948819012583574, "grad_norm": 0.7096728854504023, "learning_rate": 1.566974858069749e-05, "loss": 0.6926, "step": 10100 }, { "epoch": 0.2949110974862048, "grad_norm": 0.7282845192614577, "learning_rate": 1.5669099756691e-05, "loss": 0.7424, "step": 10101 }, { "epoch": 0.29494029371405217, "grad_norm": 0.6460658002179795, "learning_rate": 1.566845093268451e-05, "loss": 0.6033, "step": 10102 }, { "epoch": 0.29496948994189953, "grad_norm": 0.6684555526914406, "learning_rate": 1.566780210867802e-05, "loss": 0.6573, "step": 10103 }, { "epoch": 0.2949986861697469, "grad_norm": 0.6998250064327197, "learning_rate": 1.5667153284671534e-05, "loss": 0.688, "step": 10104 }, { "epoch": 0.29502788239759425, "grad_norm": 0.6800617003943147, "learning_rate": 1.5666504460665046e-05, "loss": 0.6923, "step": 10105 }, { "epoch": 0.2950570786254416, "grad_norm": 0.7245844514867489, "learning_rate": 1.5665855636658558e-05, "loss": 0.6816, "step": 10106 }, { "epoch": 0.295086274853289, "grad_norm": 0.7042989959856523, "learning_rate": 1.566520681265207e-05, "loss": 0.6725, "step": 10107 }, { "epoch": 0.29511547108113634, "grad_norm": 0.6475224960037219, "learning_rate": 1.5664557988645582e-05, "loss": 0.5927, "step": 10108 }, { "epoch": 0.2951446673089837, "grad_norm": 0.7064835698643155, "learning_rate": 1.5663909164639094e-05, "loss": 0.7125, "step": 10109 }, { "epoch": 0.29517386353683106, "grad_norm": 0.6541641479778839, "learning_rate": 1.5663260340632606e-05, "loss": 0.6536, "step": 10110 }, { "epoch": 0.2952030597646784, "grad_norm": 0.6083343274548758, "learning_rate": 1.5662611516626115e-05, "loss": 0.5744, "step": 10111 }, { "epoch": 0.2952322559925258, "grad_norm": 0.6821777102249441, "learning_rate": 1.5661962692619627e-05, "loss": 0.6602, "step": 10112 }, { "epoch": 0.29526145222037314, "grad_norm": 0.6824599471220469, "learning_rate": 1.566131386861314e-05, "loss": 0.6095, "step": 10113 }, { "epoch": 0.2952906484482205, "grad_norm": 0.6528323876465207, "learning_rate": 1.566066504460665e-05, "loss": 0.6613, "step": 10114 }, { "epoch": 0.29531984467606787, "grad_norm": 0.78096738299202, "learning_rate": 1.5660016220600163e-05, "loss": 0.5924, "step": 10115 }, { "epoch": 0.2953490409039152, "grad_norm": 0.6214117907700393, "learning_rate": 1.5659367396593675e-05, "loss": 0.5633, "step": 10116 }, { "epoch": 0.2953782371317626, "grad_norm": 0.6653501014823798, "learning_rate": 1.5658718572587187e-05, "loss": 0.6799, "step": 10117 }, { "epoch": 0.29540743335960995, "grad_norm": 0.6499256965752465, "learning_rate": 1.56580697485807e-05, "loss": 0.618, "step": 10118 }, { "epoch": 0.2954366295874573, "grad_norm": 0.6524286673749817, "learning_rate": 1.565742092457421e-05, "loss": 0.6311, "step": 10119 }, { "epoch": 0.2954658258153047, "grad_norm": 0.684035059512209, "learning_rate": 1.5656772100567723e-05, "loss": 0.6557, "step": 10120 }, { "epoch": 0.29549502204315203, "grad_norm": 0.6222409473910138, "learning_rate": 1.5656123276561235e-05, "loss": 0.5707, "step": 10121 }, { "epoch": 0.2955242182709994, "grad_norm": 0.6793522209311559, "learning_rate": 1.5655474452554747e-05, "loss": 0.7039, "step": 10122 }, { "epoch": 0.29555341449884676, "grad_norm": 0.6631033382752083, "learning_rate": 1.565482562854826e-05, "loss": 0.6438, "step": 10123 }, { "epoch": 0.2955826107266941, "grad_norm": 0.6938865730642803, "learning_rate": 1.565417680454177e-05, "loss": 0.7222, "step": 10124 }, { "epoch": 0.2956118069545415, "grad_norm": 0.6081213733105796, "learning_rate": 1.5653527980535283e-05, "loss": 0.581, "step": 10125 }, { "epoch": 0.29564100318238884, "grad_norm": 0.6298457787478552, "learning_rate": 1.565287915652879e-05, "loss": 0.6319, "step": 10126 }, { "epoch": 0.2956701994102362, "grad_norm": 0.6534798048208814, "learning_rate": 1.5652230332522304e-05, "loss": 0.6032, "step": 10127 }, { "epoch": 0.29569939563808356, "grad_norm": 0.7578947630391211, "learning_rate": 1.5651581508515816e-05, "loss": 0.7213, "step": 10128 }, { "epoch": 0.2957285918659309, "grad_norm": 0.7348629964973542, "learning_rate": 1.5650932684509328e-05, "loss": 0.6997, "step": 10129 }, { "epoch": 0.2957577880937783, "grad_norm": 0.682492020159769, "learning_rate": 1.565028386050284e-05, "loss": 0.6603, "step": 10130 }, { "epoch": 0.29578698432162565, "grad_norm": 0.6363254857679274, "learning_rate": 1.564963503649635e-05, "loss": 0.553, "step": 10131 }, { "epoch": 0.295816180549473, "grad_norm": 0.7014059958322499, "learning_rate": 1.5648986212489864e-05, "loss": 0.6991, "step": 10132 }, { "epoch": 0.29584537677732037, "grad_norm": 0.6424935562142878, "learning_rate": 1.5648337388483376e-05, "loss": 0.6164, "step": 10133 }, { "epoch": 0.29587457300516773, "grad_norm": 0.6698825360819062, "learning_rate": 1.5647688564476888e-05, "loss": 0.6607, "step": 10134 }, { "epoch": 0.2959037692330151, "grad_norm": 0.6502762967231226, "learning_rate": 1.5647039740470396e-05, "loss": 0.5874, "step": 10135 }, { "epoch": 0.29593296546086245, "grad_norm": 0.6072271720007957, "learning_rate": 1.564639091646391e-05, "loss": 0.5192, "step": 10136 }, { "epoch": 0.2959621616887098, "grad_norm": 0.6872185013606383, "learning_rate": 1.5645742092457424e-05, "loss": 0.6532, "step": 10137 }, { "epoch": 0.2959913579165572, "grad_norm": 0.7256274115607805, "learning_rate": 1.5645093268450936e-05, "loss": 0.6666, "step": 10138 }, { "epoch": 0.29602055414440454, "grad_norm": 0.6832568713822137, "learning_rate": 1.5644444444444448e-05, "loss": 0.6658, "step": 10139 }, { "epoch": 0.2960497503722519, "grad_norm": 0.6905152077630521, "learning_rate": 1.5643795620437957e-05, "loss": 0.6746, "step": 10140 }, { "epoch": 0.29607894660009926, "grad_norm": 0.6190367005486584, "learning_rate": 1.564314679643147e-05, "loss": 0.6372, "step": 10141 }, { "epoch": 0.2961081428279466, "grad_norm": 0.7331471510000113, "learning_rate": 1.564249797242498e-05, "loss": 0.7126, "step": 10142 }, { "epoch": 0.296137339055794, "grad_norm": 0.7107349134731696, "learning_rate": 1.5641849148418493e-05, "loss": 0.6646, "step": 10143 }, { "epoch": 0.29616653528364134, "grad_norm": 0.681542339345488, "learning_rate": 1.5641200324412005e-05, "loss": 0.622, "step": 10144 }, { "epoch": 0.2961957315114887, "grad_norm": 0.6230787169666003, "learning_rate": 1.5640551500405517e-05, "loss": 0.5895, "step": 10145 }, { "epoch": 0.29622492773933606, "grad_norm": 0.8042825301041531, "learning_rate": 1.563990267639903e-05, "loss": 0.6584, "step": 10146 }, { "epoch": 0.2962541239671834, "grad_norm": 0.7282701937049766, "learning_rate": 1.563925385239254e-05, "loss": 0.82, "step": 10147 }, { "epoch": 0.2962833201950308, "grad_norm": 0.7023687921726482, "learning_rate": 1.5638605028386053e-05, "loss": 0.6778, "step": 10148 }, { "epoch": 0.29631251642287815, "grad_norm": 0.6529466558143393, "learning_rate": 1.563795620437956e-05, "loss": 0.6065, "step": 10149 }, { "epoch": 0.2963417126507255, "grad_norm": 0.6448819630372056, "learning_rate": 1.5637307380373073e-05, "loss": 0.6261, "step": 10150 }, { "epoch": 0.29637090887857287, "grad_norm": 0.6690963147355311, "learning_rate": 1.5636658556366585e-05, "loss": 0.6643, "step": 10151 }, { "epoch": 0.29640010510642023, "grad_norm": 0.7832265786556845, "learning_rate": 1.5636009732360097e-05, "loss": 0.5788, "step": 10152 }, { "epoch": 0.2964293013342676, "grad_norm": 0.636292172990625, "learning_rate": 1.563536090835361e-05, "loss": 0.5952, "step": 10153 }, { "epoch": 0.29645849756211495, "grad_norm": 0.6096770291559352, "learning_rate": 1.563471208434712e-05, "loss": 0.5505, "step": 10154 }, { "epoch": 0.2964876937899623, "grad_norm": 0.6536401990335489, "learning_rate": 1.5634063260340634e-05, "loss": 0.6209, "step": 10155 }, { "epoch": 0.2965168900178097, "grad_norm": 0.6733850178940466, "learning_rate": 1.5633414436334146e-05, "loss": 0.6348, "step": 10156 }, { "epoch": 0.29654608624565704, "grad_norm": 0.697978573356492, "learning_rate": 1.5632765612327658e-05, "loss": 0.6708, "step": 10157 }, { "epoch": 0.2965752824735044, "grad_norm": 0.6593962778417171, "learning_rate": 1.563211678832117e-05, "loss": 0.6294, "step": 10158 }, { "epoch": 0.29660447870135176, "grad_norm": 0.6731594953453452, "learning_rate": 1.563146796431468e-05, "loss": 0.637, "step": 10159 }, { "epoch": 0.2966336749291991, "grad_norm": 0.6888738688497702, "learning_rate": 1.5630819140308194e-05, "loss": 0.7035, "step": 10160 }, { "epoch": 0.29666287115704654, "grad_norm": 0.6549758910656195, "learning_rate": 1.5630170316301706e-05, "loss": 0.6649, "step": 10161 }, { "epoch": 0.2966920673848939, "grad_norm": 0.7620744065762656, "learning_rate": 1.5629521492295218e-05, "loss": 0.6438, "step": 10162 }, { "epoch": 0.29672126361274126, "grad_norm": 0.6373479541227723, "learning_rate": 1.562887266828873e-05, "loss": 0.5769, "step": 10163 }, { "epoch": 0.2967504598405886, "grad_norm": 0.6847739930753137, "learning_rate": 1.562822384428224e-05, "loss": 0.6907, "step": 10164 }, { "epoch": 0.296779656068436, "grad_norm": 0.6526711601674433, "learning_rate": 1.562757502027575e-05, "loss": 0.5942, "step": 10165 }, { "epoch": 0.29680885229628334, "grad_norm": 0.7428836336669905, "learning_rate": 1.5626926196269262e-05, "loss": 0.701, "step": 10166 }, { "epoch": 0.2968380485241307, "grad_norm": 0.6278924650928216, "learning_rate": 1.5626277372262774e-05, "loss": 0.5915, "step": 10167 }, { "epoch": 0.29686724475197807, "grad_norm": 0.6813636438610562, "learning_rate": 1.5625628548256287e-05, "loss": 0.6523, "step": 10168 }, { "epoch": 0.2968964409798254, "grad_norm": 0.7830257532488536, "learning_rate": 1.56249797242498e-05, "loss": 0.8304, "step": 10169 }, { "epoch": 0.2969256372076728, "grad_norm": 0.6494999034172612, "learning_rate": 1.562433090024331e-05, "loss": 0.6315, "step": 10170 }, { "epoch": 0.29695483343552015, "grad_norm": 0.6676806007225443, "learning_rate": 1.5623682076236823e-05, "loss": 0.6775, "step": 10171 }, { "epoch": 0.2969840296633675, "grad_norm": 0.6756874147769215, "learning_rate": 1.5623033252230335e-05, "loss": 0.6252, "step": 10172 }, { "epoch": 0.29701322589121487, "grad_norm": 0.6179722119569662, "learning_rate": 1.5622384428223843e-05, "loss": 0.5481, "step": 10173 }, { "epoch": 0.29704242211906223, "grad_norm": 0.6840105289072109, "learning_rate": 1.5621735604217355e-05, "loss": 0.639, "step": 10174 }, { "epoch": 0.2970716183469096, "grad_norm": 0.6923716268755105, "learning_rate": 1.562108678021087e-05, "loss": 0.734, "step": 10175 }, { "epoch": 0.29710081457475696, "grad_norm": 0.6177221824984459, "learning_rate": 1.5620437956204383e-05, "loss": 0.6128, "step": 10176 }, { "epoch": 0.2971300108026043, "grad_norm": 0.7413827615853428, "learning_rate": 1.5619789132197895e-05, "loss": 0.7919, "step": 10177 }, { "epoch": 0.2971592070304517, "grad_norm": 0.6848191680545325, "learning_rate": 1.5619140308191403e-05, "loss": 0.6523, "step": 10178 }, { "epoch": 0.29718840325829904, "grad_norm": 0.663827674125369, "learning_rate": 1.5618491484184915e-05, "loss": 0.6879, "step": 10179 }, { "epoch": 0.2972175994861464, "grad_norm": 0.6304264478302367, "learning_rate": 1.5617842660178427e-05, "loss": 0.6258, "step": 10180 }, { "epoch": 0.29724679571399376, "grad_norm": 0.6520518424823214, "learning_rate": 1.561719383617194e-05, "loss": 0.6373, "step": 10181 }, { "epoch": 0.2972759919418411, "grad_norm": 0.7305994023462489, "learning_rate": 1.561654501216545e-05, "loss": 0.7199, "step": 10182 }, { "epoch": 0.2973051881696885, "grad_norm": 0.7255814410687195, "learning_rate": 1.5615896188158964e-05, "loss": 0.7545, "step": 10183 }, { "epoch": 0.29733438439753584, "grad_norm": 0.6955070798528606, "learning_rate": 1.5615247364152476e-05, "loss": 0.7141, "step": 10184 }, { "epoch": 0.2973635806253832, "grad_norm": 0.6724712590568762, "learning_rate": 1.5614598540145988e-05, "loss": 0.6676, "step": 10185 }, { "epoch": 0.29739277685323057, "grad_norm": 0.6312371241012338, "learning_rate": 1.56139497161395e-05, "loss": 0.601, "step": 10186 }, { "epoch": 0.29742197308107793, "grad_norm": 0.6315248377650704, "learning_rate": 1.5613300892133008e-05, "loss": 0.6347, "step": 10187 }, { "epoch": 0.2974511693089253, "grad_norm": 0.6581271650215232, "learning_rate": 1.561265206812652e-05, "loss": 0.6707, "step": 10188 }, { "epoch": 0.29748036553677265, "grad_norm": 0.8360046973921843, "learning_rate": 1.5612003244120032e-05, "loss": 0.718, "step": 10189 }, { "epoch": 0.29750956176462, "grad_norm": 0.6567187051717004, "learning_rate": 1.5611354420113544e-05, "loss": 0.5927, "step": 10190 }, { "epoch": 0.2975387579924674, "grad_norm": 0.6797876817420629, "learning_rate": 1.5610705596107056e-05, "loss": 0.5837, "step": 10191 }, { "epoch": 0.29756795422031473, "grad_norm": 0.706893054511665, "learning_rate": 1.561005677210057e-05, "loss": 0.6468, "step": 10192 }, { "epoch": 0.2975971504481621, "grad_norm": 0.688578951571641, "learning_rate": 1.560940794809408e-05, "loss": 0.6726, "step": 10193 }, { "epoch": 0.29762634667600946, "grad_norm": 0.6591538059461834, "learning_rate": 1.5608759124087592e-05, "loss": 0.6663, "step": 10194 }, { "epoch": 0.2976555429038568, "grad_norm": 0.7232138951854771, "learning_rate": 1.5608110300081104e-05, "loss": 0.7777, "step": 10195 }, { "epoch": 0.2976847391317042, "grad_norm": 0.7858759193055198, "learning_rate": 1.5607461476074616e-05, "loss": 0.6936, "step": 10196 }, { "epoch": 0.29771393535955154, "grad_norm": 0.6448191214877624, "learning_rate": 1.560681265206813e-05, "loss": 0.6541, "step": 10197 }, { "epoch": 0.2977431315873989, "grad_norm": 0.7452112730918509, "learning_rate": 1.560616382806164e-05, "loss": 0.6887, "step": 10198 }, { "epoch": 0.29777232781524626, "grad_norm": 0.7401146654474514, "learning_rate": 1.5605515004055153e-05, "loss": 0.6681, "step": 10199 }, { "epoch": 0.2978015240430936, "grad_norm": 0.6611408989922147, "learning_rate": 1.5604866180048665e-05, "loss": 0.6215, "step": 10200 }, { "epoch": 0.297830720270941, "grad_norm": 0.6738538131782157, "learning_rate": 1.5604217356042177e-05, "loss": 0.6344, "step": 10201 }, { "epoch": 0.29785991649878835, "grad_norm": 0.6399788994795973, "learning_rate": 1.5603568532035685e-05, "loss": 0.5644, "step": 10202 }, { "epoch": 0.2978891127266357, "grad_norm": 0.6883633874426495, "learning_rate": 1.5602919708029197e-05, "loss": 0.6523, "step": 10203 }, { "epoch": 0.29791830895448307, "grad_norm": 0.6666802267287355, "learning_rate": 1.560227088402271e-05, "loss": 0.6578, "step": 10204 }, { "epoch": 0.29794750518233043, "grad_norm": 0.6419396029709992, "learning_rate": 1.560162206001622e-05, "loss": 0.6042, "step": 10205 }, { "epoch": 0.2979767014101778, "grad_norm": 0.6361019059996321, "learning_rate": 1.5600973236009733e-05, "loss": 0.6177, "step": 10206 }, { "epoch": 0.29800589763802515, "grad_norm": 0.6587617027846613, "learning_rate": 1.5600324412003245e-05, "loss": 0.6547, "step": 10207 }, { "epoch": 0.2980350938658725, "grad_norm": 0.669403091104363, "learning_rate": 1.5599675587996757e-05, "loss": 0.6236, "step": 10208 }, { "epoch": 0.2980642900937199, "grad_norm": 0.6817635456757581, "learning_rate": 1.559902676399027e-05, "loss": 0.6095, "step": 10209 }, { "epoch": 0.29809348632156724, "grad_norm": 0.7003805528948931, "learning_rate": 1.559837793998378e-05, "loss": 0.6849, "step": 10210 }, { "epoch": 0.2981226825494146, "grad_norm": 0.6399820806969114, "learning_rate": 1.559772911597729e-05, "loss": 0.5598, "step": 10211 }, { "epoch": 0.29815187877726196, "grad_norm": 0.6376884027628865, "learning_rate": 1.5597080291970802e-05, "loss": 0.5781, "step": 10212 }, { "epoch": 0.2981810750051093, "grad_norm": 0.6178164050399527, "learning_rate": 1.5596431467964318e-05, "loss": 0.5486, "step": 10213 }, { "epoch": 0.2982102712329567, "grad_norm": 0.6550168583777723, "learning_rate": 1.559578264395783e-05, "loss": 0.6324, "step": 10214 }, { "epoch": 0.29823946746080404, "grad_norm": 0.6737149790716226, "learning_rate": 1.559513381995134e-05, "loss": 0.6278, "step": 10215 }, { "epoch": 0.2982686636886514, "grad_norm": 0.6330296377005596, "learning_rate": 1.559448499594485e-05, "loss": 0.6064, "step": 10216 }, { "epoch": 0.29829785991649876, "grad_norm": 0.7337324344155627, "learning_rate": 1.5593836171938362e-05, "loss": 0.7272, "step": 10217 }, { "epoch": 0.2983270561443461, "grad_norm": 0.6443996900623207, "learning_rate": 1.5593187347931874e-05, "loss": 0.605, "step": 10218 }, { "epoch": 0.2983562523721935, "grad_norm": 0.6786399878654339, "learning_rate": 1.5592538523925386e-05, "loss": 0.5935, "step": 10219 }, { "epoch": 0.29838544860004085, "grad_norm": 0.6436575492103102, "learning_rate": 1.55918896999189e-05, "loss": 0.6146, "step": 10220 }, { "epoch": 0.2984146448278882, "grad_norm": 0.6986434624406126, "learning_rate": 1.559124087591241e-05, "loss": 0.6066, "step": 10221 }, { "epoch": 0.2984438410557356, "grad_norm": 0.6189013214452862, "learning_rate": 1.5590592051905922e-05, "loss": 0.5834, "step": 10222 }, { "epoch": 0.298473037283583, "grad_norm": 0.6316734820409079, "learning_rate": 1.5589943227899434e-05, "loss": 0.5502, "step": 10223 }, { "epoch": 0.29850223351143035, "grad_norm": 0.5973319608168304, "learning_rate": 1.5589294403892946e-05, "loss": 0.546, "step": 10224 }, { "epoch": 0.2985314297392777, "grad_norm": 0.8154042078715359, "learning_rate": 1.5588645579886455e-05, "loss": 0.7096, "step": 10225 }, { "epoch": 0.29856062596712507, "grad_norm": 0.6935683291308946, "learning_rate": 1.5587996755879967e-05, "loss": 0.6617, "step": 10226 }, { "epoch": 0.29858982219497243, "grad_norm": 0.6420211357637143, "learning_rate": 1.558734793187348e-05, "loss": 0.5948, "step": 10227 }, { "epoch": 0.2986190184228198, "grad_norm": 0.6548135724382356, "learning_rate": 1.558669910786699e-05, "loss": 0.6031, "step": 10228 }, { "epoch": 0.29864821465066715, "grad_norm": 0.5950756245882303, "learning_rate": 1.5586050283860503e-05, "loss": 0.5631, "step": 10229 }, { "epoch": 0.2986774108785145, "grad_norm": 0.6507529256333331, "learning_rate": 1.5585401459854015e-05, "loss": 0.5853, "step": 10230 }, { "epoch": 0.2987066071063619, "grad_norm": 0.6820801785928523, "learning_rate": 1.5584752635847527e-05, "loss": 0.6829, "step": 10231 }, { "epoch": 0.29873580333420924, "grad_norm": 0.7093192595873197, "learning_rate": 1.558410381184104e-05, "loss": 0.7141, "step": 10232 }, { "epoch": 0.2987649995620566, "grad_norm": 0.7218805326661977, "learning_rate": 1.558345498783455e-05, "loss": 0.7203, "step": 10233 }, { "epoch": 0.29879419578990396, "grad_norm": 0.709713011256076, "learning_rate": 1.5582806163828063e-05, "loss": 0.7177, "step": 10234 }, { "epoch": 0.2988233920177513, "grad_norm": 0.6069109890243949, "learning_rate": 1.5582157339821575e-05, "loss": 0.5586, "step": 10235 }, { "epoch": 0.2988525882455987, "grad_norm": 0.6577352837296161, "learning_rate": 1.5581508515815087e-05, "loss": 0.5962, "step": 10236 }, { "epoch": 0.29888178447344604, "grad_norm": 0.5930809636990201, "learning_rate": 1.55808596918086e-05, "loss": 0.5062, "step": 10237 }, { "epoch": 0.2989109807012934, "grad_norm": 0.6316488580942496, "learning_rate": 1.558021086780211e-05, "loss": 0.5765, "step": 10238 }, { "epoch": 0.29894017692914077, "grad_norm": 0.7321623540703572, "learning_rate": 1.5579562043795623e-05, "loss": 0.7103, "step": 10239 }, { "epoch": 0.2989693731569881, "grad_norm": 0.66540882163625, "learning_rate": 1.5578913219789132e-05, "loss": 0.6572, "step": 10240 }, { "epoch": 0.2989985693848355, "grad_norm": 0.7836287183499769, "learning_rate": 1.5578264395782644e-05, "loss": 0.7189, "step": 10241 }, { "epoch": 0.29902776561268285, "grad_norm": 0.6368225248954905, "learning_rate": 1.5577615571776156e-05, "loss": 0.5533, "step": 10242 }, { "epoch": 0.2990569618405302, "grad_norm": 0.7028561019101741, "learning_rate": 1.5576966747769668e-05, "loss": 0.6462, "step": 10243 }, { "epoch": 0.29908615806837757, "grad_norm": 0.6494708647674123, "learning_rate": 1.557631792376318e-05, "loss": 0.6492, "step": 10244 }, { "epoch": 0.29911535429622493, "grad_norm": 0.660613005266635, "learning_rate": 1.5575669099756692e-05, "loss": 0.6452, "step": 10245 }, { "epoch": 0.2991445505240723, "grad_norm": 0.6623102463195278, "learning_rate": 1.5575020275750204e-05, "loss": 0.653, "step": 10246 }, { "epoch": 0.29917374675191966, "grad_norm": 0.6098872290306554, "learning_rate": 1.5574371451743716e-05, "loss": 0.5395, "step": 10247 }, { "epoch": 0.299202942979767, "grad_norm": 0.6644756757801205, "learning_rate": 1.5573722627737225e-05, "loss": 0.6093, "step": 10248 }, { "epoch": 0.2992321392076144, "grad_norm": 0.6656914034628496, "learning_rate": 1.5573073803730737e-05, "loss": 0.624, "step": 10249 }, { "epoch": 0.29926133543546174, "grad_norm": 0.6783486359704394, "learning_rate": 1.5572424979724252e-05, "loss": 0.6661, "step": 10250 }, { "epoch": 0.2992905316633091, "grad_norm": 0.7236502592831373, "learning_rate": 1.5571776155717764e-05, "loss": 0.6679, "step": 10251 }, { "epoch": 0.29931972789115646, "grad_norm": 0.6534888863643801, "learning_rate": 1.5571127331711276e-05, "loss": 0.649, "step": 10252 }, { "epoch": 0.2993489241190038, "grad_norm": 0.597912337533624, "learning_rate": 1.557047850770479e-05, "loss": 0.516, "step": 10253 }, { "epoch": 0.2993781203468512, "grad_norm": 0.8435992083892652, "learning_rate": 1.5569829683698297e-05, "loss": 0.6274, "step": 10254 }, { "epoch": 0.29940731657469855, "grad_norm": 0.7046623571034154, "learning_rate": 1.556918085969181e-05, "loss": 0.6873, "step": 10255 }, { "epoch": 0.2994365128025459, "grad_norm": 0.7141757121714518, "learning_rate": 1.556853203568532e-05, "loss": 0.6793, "step": 10256 }, { "epoch": 0.29946570903039327, "grad_norm": 0.6280369623865496, "learning_rate": 1.5567883211678833e-05, "loss": 0.6125, "step": 10257 }, { "epoch": 0.29949490525824063, "grad_norm": 0.7236137202772333, "learning_rate": 1.5567234387672345e-05, "loss": 0.5689, "step": 10258 }, { "epoch": 0.299524101486088, "grad_norm": 0.7167426983907637, "learning_rate": 1.5566585563665857e-05, "loss": 0.6915, "step": 10259 }, { "epoch": 0.29955329771393535, "grad_norm": 0.6690618229007729, "learning_rate": 1.556593673965937e-05, "loss": 0.6136, "step": 10260 }, { "epoch": 0.2995824939417827, "grad_norm": 0.6603980266877341, "learning_rate": 1.556528791565288e-05, "loss": 0.6599, "step": 10261 }, { "epoch": 0.2996116901696301, "grad_norm": 0.6976399931093487, "learning_rate": 1.5564639091646393e-05, "loss": 0.6752, "step": 10262 }, { "epoch": 0.29964088639747744, "grad_norm": 0.6543585384395127, "learning_rate": 1.5563990267639902e-05, "loss": 0.5325, "step": 10263 }, { "epoch": 0.2996700826253248, "grad_norm": 0.7202698018672431, "learning_rate": 1.5563341443633414e-05, "loss": 0.6542, "step": 10264 }, { "epoch": 0.29969927885317216, "grad_norm": 0.6365271145756841, "learning_rate": 1.5562692619626926e-05, "loss": 0.6007, "step": 10265 }, { "epoch": 0.2997284750810195, "grad_norm": 0.6582971783609326, "learning_rate": 1.5562043795620438e-05, "loss": 0.6604, "step": 10266 }, { "epoch": 0.2997576713088669, "grad_norm": 0.6992125718214537, "learning_rate": 1.556139497161395e-05, "loss": 0.7126, "step": 10267 }, { "epoch": 0.29978686753671424, "grad_norm": 0.6749955983074007, "learning_rate": 1.5560746147607462e-05, "loss": 0.6694, "step": 10268 }, { "epoch": 0.2998160637645616, "grad_norm": 0.8262928988233966, "learning_rate": 1.5560097323600974e-05, "loss": 0.6521, "step": 10269 }, { "epoch": 0.29984525999240896, "grad_norm": 0.6750327454994406, "learning_rate": 1.5559448499594486e-05, "loss": 0.6445, "step": 10270 }, { "epoch": 0.2998744562202563, "grad_norm": 0.6619032037899188, "learning_rate": 1.5558799675587998e-05, "loss": 0.6454, "step": 10271 }, { "epoch": 0.2999036524481037, "grad_norm": 0.6698582330957145, "learning_rate": 1.555815085158151e-05, "loss": 0.671, "step": 10272 }, { "epoch": 0.29993284867595105, "grad_norm": 0.6936663486342505, "learning_rate": 1.5557502027575022e-05, "loss": 0.7002, "step": 10273 }, { "epoch": 0.2999620449037984, "grad_norm": 0.6549796639400903, "learning_rate": 1.5556853203568534e-05, "loss": 0.5976, "step": 10274 }, { "epoch": 0.29999124113164577, "grad_norm": 0.5976622972876344, "learning_rate": 1.5556204379562046e-05, "loss": 0.5076, "step": 10275 }, { "epoch": 0.30002043735949313, "grad_norm": 0.6441026329582008, "learning_rate": 1.555555555555556e-05, "loss": 0.5834, "step": 10276 }, { "epoch": 0.3000496335873405, "grad_norm": 0.699700104985492, "learning_rate": 1.555490673154907e-05, "loss": 0.6626, "step": 10277 }, { "epoch": 0.30007882981518785, "grad_norm": 0.7109536383813972, "learning_rate": 1.555425790754258e-05, "loss": 0.7212, "step": 10278 }, { "epoch": 0.3001080260430352, "grad_norm": 0.5947439351725863, "learning_rate": 1.555360908353609e-05, "loss": 0.5601, "step": 10279 }, { "epoch": 0.3001372222708826, "grad_norm": 0.6855027899992417, "learning_rate": 1.5552960259529603e-05, "loss": 0.6181, "step": 10280 }, { "epoch": 0.30016641849872994, "grad_norm": 0.7208994425511908, "learning_rate": 1.5552311435523115e-05, "loss": 0.6962, "step": 10281 }, { "epoch": 0.30019561472657735, "grad_norm": 0.6539311631583367, "learning_rate": 1.5551662611516627e-05, "loss": 0.6386, "step": 10282 }, { "epoch": 0.3002248109544247, "grad_norm": 0.6659999007036993, "learning_rate": 1.555101378751014e-05, "loss": 0.6638, "step": 10283 }, { "epoch": 0.3002540071822721, "grad_norm": 0.6729964221590526, "learning_rate": 1.555036496350365e-05, "loss": 0.6986, "step": 10284 }, { "epoch": 0.30028320341011944, "grad_norm": 0.6557587120405123, "learning_rate": 1.5549716139497163e-05, "loss": 0.6526, "step": 10285 }, { "epoch": 0.3003123996379668, "grad_norm": 0.7113597877019451, "learning_rate": 1.5549067315490672e-05, "loss": 0.6521, "step": 10286 }, { "epoch": 0.30034159586581416, "grad_norm": 0.6390343374870682, "learning_rate": 1.5548418491484184e-05, "loss": 0.55, "step": 10287 }, { "epoch": 0.3003707920936615, "grad_norm": 0.6627216238191587, "learning_rate": 1.55477696674777e-05, "loss": 0.6675, "step": 10288 }, { "epoch": 0.3003999883215089, "grad_norm": 0.7124776357037567, "learning_rate": 1.554712084347121e-05, "loss": 0.7063, "step": 10289 }, { "epoch": 0.30042918454935624, "grad_norm": 0.6171510908122617, "learning_rate": 1.5546472019464723e-05, "loss": 0.6009, "step": 10290 }, { "epoch": 0.3004583807772036, "grad_norm": 0.7538572241615443, "learning_rate": 1.5545823195458235e-05, "loss": 0.6525, "step": 10291 }, { "epoch": 0.30048757700505097, "grad_norm": 0.6520066640546167, "learning_rate": 1.5545174371451744e-05, "loss": 0.6341, "step": 10292 }, { "epoch": 0.3005167732328983, "grad_norm": 0.66370872759978, "learning_rate": 1.5544525547445256e-05, "loss": 0.6718, "step": 10293 }, { "epoch": 0.3005459694607457, "grad_norm": 0.6964836767143665, "learning_rate": 1.5543876723438768e-05, "loss": 0.7004, "step": 10294 }, { "epoch": 0.30057516568859305, "grad_norm": 0.7915319322953721, "learning_rate": 1.554322789943228e-05, "loss": 0.6616, "step": 10295 }, { "epoch": 0.3006043619164404, "grad_norm": 0.6629887431993988, "learning_rate": 1.5542579075425792e-05, "loss": 0.6437, "step": 10296 }, { "epoch": 0.30063355814428777, "grad_norm": 0.6850639892971622, "learning_rate": 1.5541930251419304e-05, "loss": 0.6693, "step": 10297 }, { "epoch": 0.30066275437213513, "grad_norm": 0.6464244987111795, "learning_rate": 1.5541281427412816e-05, "loss": 0.6023, "step": 10298 }, { "epoch": 0.3006919505999825, "grad_norm": 0.6638621682251755, "learning_rate": 1.5540632603406328e-05, "loss": 0.6769, "step": 10299 }, { "epoch": 0.30072114682782985, "grad_norm": 0.661359613726047, "learning_rate": 1.553998377939984e-05, "loss": 0.6659, "step": 10300 }, { "epoch": 0.3007503430556772, "grad_norm": 0.6710028168281086, "learning_rate": 1.553933495539335e-05, "loss": 0.6316, "step": 10301 }, { "epoch": 0.3007795392835246, "grad_norm": 0.6248004032583071, "learning_rate": 1.553868613138686e-05, "loss": 0.591, "step": 10302 }, { "epoch": 0.30080873551137194, "grad_norm": 0.6438491903389119, "learning_rate": 1.5538037307380373e-05, "loss": 0.6011, "step": 10303 }, { "epoch": 0.3008379317392193, "grad_norm": 0.6368362936193536, "learning_rate": 1.5537388483373885e-05, "loss": 0.6145, "step": 10304 }, { "epoch": 0.30086712796706666, "grad_norm": 0.6986173166682084, "learning_rate": 1.5536739659367397e-05, "loss": 0.6419, "step": 10305 }, { "epoch": 0.300896324194914, "grad_norm": 0.6873115785464416, "learning_rate": 1.553609083536091e-05, "loss": 0.6475, "step": 10306 }, { "epoch": 0.3009255204227614, "grad_norm": 0.636122440565995, "learning_rate": 1.553544201135442e-05, "loss": 0.5725, "step": 10307 }, { "epoch": 0.30095471665060874, "grad_norm": 0.6892262659029176, "learning_rate": 1.5534793187347933e-05, "loss": 0.6262, "step": 10308 }, { "epoch": 0.3009839128784561, "grad_norm": 0.6220896368493306, "learning_rate": 1.5534144363341445e-05, "loss": 0.6036, "step": 10309 }, { "epoch": 0.30101310910630347, "grad_norm": 0.797727811358789, "learning_rate": 1.5533495539334957e-05, "loss": 0.6978, "step": 10310 }, { "epoch": 0.30104230533415083, "grad_norm": 0.6526787000586509, "learning_rate": 1.553284671532847e-05, "loss": 0.6117, "step": 10311 }, { "epoch": 0.3010715015619982, "grad_norm": 0.666733504587576, "learning_rate": 1.553219789132198e-05, "loss": 0.6301, "step": 10312 }, { "epoch": 0.30110069778984555, "grad_norm": 0.6739479921370373, "learning_rate": 1.5531549067315493e-05, "loss": 0.6385, "step": 10313 }, { "epoch": 0.3011298940176929, "grad_norm": 0.6904404210323462, "learning_rate": 1.5530900243309005e-05, "loss": 0.6849, "step": 10314 }, { "epoch": 0.3011590902455403, "grad_norm": 0.6304777784949238, "learning_rate": 1.5530251419302517e-05, "loss": 0.5955, "step": 10315 }, { "epoch": 0.30118828647338763, "grad_norm": 0.6982477016070003, "learning_rate": 1.5529602595296026e-05, "loss": 0.6458, "step": 10316 }, { "epoch": 0.301217482701235, "grad_norm": 0.6470831379820481, "learning_rate": 1.5528953771289538e-05, "loss": 0.6426, "step": 10317 }, { "epoch": 0.30124667892908236, "grad_norm": 0.6910194981736083, "learning_rate": 1.552830494728305e-05, "loss": 0.7005, "step": 10318 }, { "epoch": 0.3012758751569297, "grad_norm": 0.6920552863875514, "learning_rate": 1.5527656123276562e-05, "loss": 0.6618, "step": 10319 }, { "epoch": 0.3013050713847771, "grad_norm": 0.6280640855230567, "learning_rate": 1.5527007299270074e-05, "loss": 0.602, "step": 10320 }, { "epoch": 0.30133426761262444, "grad_norm": 0.7140990370352781, "learning_rate": 1.5526358475263586e-05, "loss": 0.7508, "step": 10321 }, { "epoch": 0.3013634638404718, "grad_norm": 0.6916307037078252, "learning_rate": 1.5525709651257098e-05, "loss": 0.6995, "step": 10322 }, { "epoch": 0.30139266006831916, "grad_norm": 0.6741691278707982, "learning_rate": 1.552506082725061e-05, "loss": 0.6212, "step": 10323 }, { "epoch": 0.3014218562961665, "grad_norm": 0.638917386754866, "learning_rate": 1.552441200324412e-05, "loss": 0.6096, "step": 10324 }, { "epoch": 0.3014510525240139, "grad_norm": 0.6320839252685828, "learning_rate": 1.552376317923763e-05, "loss": 0.6093, "step": 10325 }, { "epoch": 0.30148024875186125, "grad_norm": 0.6637861940943622, "learning_rate": 1.5523114355231146e-05, "loss": 0.6656, "step": 10326 }, { "epoch": 0.3015094449797086, "grad_norm": 0.6892113365055875, "learning_rate": 1.5522465531224658e-05, "loss": 0.6662, "step": 10327 }, { "epoch": 0.30153864120755597, "grad_norm": 0.6319772594252336, "learning_rate": 1.552181670721817e-05, "loss": 0.5992, "step": 10328 }, { "epoch": 0.30156783743540333, "grad_norm": 0.6952193298236511, "learning_rate": 1.5521167883211682e-05, "loss": 0.6801, "step": 10329 }, { "epoch": 0.3015970336632507, "grad_norm": 0.7132798636367506, "learning_rate": 1.552051905920519e-05, "loss": 0.684, "step": 10330 }, { "epoch": 0.30162622989109805, "grad_norm": 0.631401003102337, "learning_rate": 1.5519870235198703e-05, "loss": 0.6122, "step": 10331 }, { "epoch": 0.3016554261189454, "grad_norm": 0.6569256786713555, "learning_rate": 1.5519221411192215e-05, "loss": 0.6441, "step": 10332 }, { "epoch": 0.3016846223467928, "grad_norm": 0.6910852133431803, "learning_rate": 1.5518572587185727e-05, "loss": 0.6626, "step": 10333 }, { "epoch": 0.30171381857464014, "grad_norm": 0.6936726555644958, "learning_rate": 1.551792376317924e-05, "loss": 0.6845, "step": 10334 }, { "epoch": 0.3017430148024875, "grad_norm": 0.6314974188487433, "learning_rate": 1.551727493917275e-05, "loss": 0.6201, "step": 10335 }, { "epoch": 0.30177221103033486, "grad_norm": 0.7158651257840236, "learning_rate": 1.5516626115166263e-05, "loss": 0.7277, "step": 10336 }, { "epoch": 0.3018014072581822, "grad_norm": 0.6767714808361859, "learning_rate": 1.5515977291159775e-05, "loss": 0.6911, "step": 10337 }, { "epoch": 0.3018306034860296, "grad_norm": 0.6838646356998336, "learning_rate": 1.5515328467153287e-05, "loss": 0.6942, "step": 10338 }, { "epoch": 0.30185979971387694, "grad_norm": 0.6992978836983865, "learning_rate": 1.5514679643146796e-05, "loss": 0.6853, "step": 10339 }, { "epoch": 0.3018889959417243, "grad_norm": 0.6817636327780195, "learning_rate": 1.5514030819140308e-05, "loss": 0.61, "step": 10340 }, { "epoch": 0.30191819216957166, "grad_norm": 0.6238458192005967, "learning_rate": 1.551338199513382e-05, "loss": 0.5852, "step": 10341 }, { "epoch": 0.3019473883974191, "grad_norm": 0.6938661314434358, "learning_rate": 1.5512733171127332e-05, "loss": 0.6973, "step": 10342 }, { "epoch": 0.30197658462526644, "grad_norm": 0.6803838519202012, "learning_rate": 1.5512084347120844e-05, "loss": 0.6594, "step": 10343 }, { "epoch": 0.3020057808531138, "grad_norm": 0.6191166604327584, "learning_rate": 1.5511435523114356e-05, "loss": 0.5787, "step": 10344 }, { "epoch": 0.30203497708096116, "grad_norm": 0.7191761565548263, "learning_rate": 1.5510786699107868e-05, "loss": 0.7871, "step": 10345 }, { "epoch": 0.3020641733088085, "grad_norm": 0.6810836493196226, "learning_rate": 1.551013787510138e-05, "loss": 0.7066, "step": 10346 }, { "epoch": 0.3020933695366559, "grad_norm": 0.6312981414882977, "learning_rate": 1.5509489051094892e-05, "loss": 0.5655, "step": 10347 }, { "epoch": 0.30212256576450325, "grad_norm": 0.6502640069381829, "learning_rate": 1.5508840227088404e-05, "loss": 0.5993, "step": 10348 }, { "epoch": 0.3021517619923506, "grad_norm": 0.6286806554363382, "learning_rate": 1.5508191403081916e-05, "loss": 0.6281, "step": 10349 }, { "epoch": 0.30218095822019797, "grad_norm": 0.6432325185893629, "learning_rate": 1.5507542579075428e-05, "loss": 0.6022, "step": 10350 }, { "epoch": 0.30221015444804533, "grad_norm": 0.6053511757403794, "learning_rate": 1.550689375506894e-05, "loss": 0.5613, "step": 10351 }, { "epoch": 0.3022393506758927, "grad_norm": 0.6961057661911844, "learning_rate": 1.5506244931062452e-05, "loss": 0.6241, "step": 10352 }, { "epoch": 0.30226854690374005, "grad_norm": 0.6178808086719355, "learning_rate": 1.5505596107055964e-05, "loss": 0.5838, "step": 10353 }, { "epoch": 0.3022977431315874, "grad_norm": 0.681129204850636, "learning_rate": 1.5504947283049473e-05, "loss": 0.6316, "step": 10354 }, { "epoch": 0.3023269393594348, "grad_norm": 0.6809509537644174, "learning_rate": 1.5504298459042985e-05, "loss": 0.6753, "step": 10355 }, { "epoch": 0.30235613558728214, "grad_norm": 0.6770958228537052, "learning_rate": 1.5503649635036497e-05, "loss": 0.6606, "step": 10356 }, { "epoch": 0.3023853318151295, "grad_norm": 0.7011419404434149, "learning_rate": 1.550300081103001e-05, "loss": 0.7146, "step": 10357 }, { "epoch": 0.30241452804297686, "grad_norm": 0.6792519768728452, "learning_rate": 1.550235198702352e-05, "loss": 0.644, "step": 10358 }, { "epoch": 0.3024437242708242, "grad_norm": 0.6370507156161445, "learning_rate": 1.5501703163017033e-05, "loss": 0.6482, "step": 10359 }, { "epoch": 0.3024729204986716, "grad_norm": 0.6472884912522684, "learning_rate": 1.5501054339010545e-05, "loss": 0.5782, "step": 10360 }, { "epoch": 0.30250211672651894, "grad_norm": 0.6706708394301554, "learning_rate": 1.5500405515004057e-05, "loss": 0.6149, "step": 10361 }, { "epoch": 0.3025313129543663, "grad_norm": 0.6567831919754217, "learning_rate": 1.5499756690997566e-05, "loss": 0.613, "step": 10362 }, { "epoch": 0.30256050918221367, "grad_norm": 0.6647115254615589, "learning_rate": 1.5499107866991078e-05, "loss": 0.6293, "step": 10363 }, { "epoch": 0.302589705410061, "grad_norm": 0.8197168295733473, "learning_rate": 1.5498459042984593e-05, "loss": 0.7303, "step": 10364 }, { "epoch": 0.3026189016379084, "grad_norm": 0.6300911686679644, "learning_rate": 1.5497810218978105e-05, "loss": 0.6276, "step": 10365 }, { "epoch": 0.30264809786575575, "grad_norm": 0.6885188398399622, "learning_rate": 1.5497161394971617e-05, "loss": 0.7478, "step": 10366 }, { "epoch": 0.3026772940936031, "grad_norm": 0.6610861640301657, "learning_rate": 1.549651257096513e-05, "loss": 0.6046, "step": 10367 }, { "epoch": 0.30270649032145047, "grad_norm": 0.6276326954786101, "learning_rate": 1.5495863746958638e-05, "loss": 0.6382, "step": 10368 }, { "epoch": 0.30273568654929783, "grad_norm": 0.6094112614140065, "learning_rate": 1.549521492295215e-05, "loss": 0.5474, "step": 10369 }, { "epoch": 0.3027648827771452, "grad_norm": 0.6002765099660324, "learning_rate": 1.5494566098945662e-05, "loss": 0.5382, "step": 10370 }, { "epoch": 0.30279407900499256, "grad_norm": 0.6916465074009203, "learning_rate": 1.5493917274939174e-05, "loss": 0.6352, "step": 10371 }, { "epoch": 0.3028232752328399, "grad_norm": 0.6854439298291426, "learning_rate": 1.5493268450932686e-05, "loss": 0.7049, "step": 10372 }, { "epoch": 0.3028524714606873, "grad_norm": 0.6818588057939571, "learning_rate": 1.5492619626926198e-05, "loss": 0.7257, "step": 10373 }, { "epoch": 0.30288166768853464, "grad_norm": 0.6900136683431959, "learning_rate": 1.549197080291971e-05, "loss": 0.7234, "step": 10374 }, { "epoch": 0.302910863916382, "grad_norm": 0.7221762257498554, "learning_rate": 1.5491321978913222e-05, "loss": 0.6942, "step": 10375 }, { "epoch": 0.30294006014422936, "grad_norm": 0.6791920848000702, "learning_rate": 1.5490673154906734e-05, "loss": 0.6542, "step": 10376 }, { "epoch": 0.3029692563720767, "grad_norm": 0.6146036069094907, "learning_rate": 1.5490024330900243e-05, "loss": 0.5539, "step": 10377 }, { "epoch": 0.3029984525999241, "grad_norm": 0.6841001460549332, "learning_rate": 1.5489375506893755e-05, "loss": 0.6855, "step": 10378 }, { "epoch": 0.30302764882777145, "grad_norm": 0.6880547722008684, "learning_rate": 1.5488726682887267e-05, "loss": 0.6603, "step": 10379 }, { "epoch": 0.3030568450556188, "grad_norm": 0.6230762896579276, "learning_rate": 1.548807785888078e-05, "loss": 0.638, "step": 10380 }, { "epoch": 0.30308604128346617, "grad_norm": 0.649437394594606, "learning_rate": 1.548742903487429e-05, "loss": 0.644, "step": 10381 }, { "epoch": 0.30311523751131353, "grad_norm": 0.6773124149536353, "learning_rate": 1.5486780210867806e-05, "loss": 0.7226, "step": 10382 }, { "epoch": 0.3031444337391609, "grad_norm": 0.8412303169660897, "learning_rate": 1.5486131386861315e-05, "loss": 0.7055, "step": 10383 }, { "epoch": 0.30317362996700825, "grad_norm": 0.605878680203661, "learning_rate": 1.5485482562854827e-05, "loss": 0.581, "step": 10384 }, { "epoch": 0.3032028261948556, "grad_norm": 0.7339101975362363, "learning_rate": 1.548483373884834e-05, "loss": 0.6609, "step": 10385 }, { "epoch": 0.303232022422703, "grad_norm": 0.7137281783020996, "learning_rate": 1.548418491484185e-05, "loss": 0.6878, "step": 10386 }, { "epoch": 0.30326121865055033, "grad_norm": 0.6512434123344737, "learning_rate": 1.5483536090835363e-05, "loss": 0.526, "step": 10387 }, { "epoch": 0.3032904148783977, "grad_norm": 0.6302880322242397, "learning_rate": 1.5482887266828875e-05, "loss": 0.5472, "step": 10388 }, { "epoch": 0.30331961110624506, "grad_norm": 0.6581145031695907, "learning_rate": 1.5482238442822387e-05, "loss": 0.6508, "step": 10389 }, { "epoch": 0.3033488073340924, "grad_norm": 0.7087723102768689, "learning_rate": 1.54815896188159e-05, "loss": 0.6975, "step": 10390 }, { "epoch": 0.3033780035619398, "grad_norm": 0.7164769478668471, "learning_rate": 1.5480940794809408e-05, "loss": 0.6732, "step": 10391 }, { "epoch": 0.30340719978978714, "grad_norm": 0.6628993457233125, "learning_rate": 1.548029197080292e-05, "loss": 0.6236, "step": 10392 }, { "epoch": 0.3034363960176345, "grad_norm": 0.649928699331041, "learning_rate": 1.547964314679643e-05, "loss": 0.6471, "step": 10393 }, { "epoch": 0.30346559224548186, "grad_norm": 0.6269063841048603, "learning_rate": 1.5478994322789944e-05, "loss": 0.5725, "step": 10394 }, { "epoch": 0.3034947884733292, "grad_norm": 0.6987466147275064, "learning_rate": 1.5478345498783456e-05, "loss": 0.695, "step": 10395 }, { "epoch": 0.3035239847011766, "grad_norm": 0.6895797714272264, "learning_rate": 1.5477696674776968e-05, "loss": 0.7509, "step": 10396 }, { "epoch": 0.30355318092902395, "grad_norm": 0.6283512935467344, "learning_rate": 1.547704785077048e-05, "loss": 0.6086, "step": 10397 }, { "epoch": 0.3035823771568713, "grad_norm": 0.649878469960685, "learning_rate": 1.5476399026763992e-05, "loss": 0.6023, "step": 10398 }, { "epoch": 0.30361157338471867, "grad_norm": 0.661873855257127, "learning_rate": 1.5475750202757504e-05, "loss": 0.6411, "step": 10399 }, { "epoch": 0.30364076961256603, "grad_norm": 0.6192652250017746, "learning_rate": 1.5475101378751012e-05, "loss": 0.5448, "step": 10400 }, { "epoch": 0.3036699658404134, "grad_norm": 0.6503183624938922, "learning_rate": 1.5474452554744528e-05, "loss": 0.6363, "step": 10401 }, { "epoch": 0.3036991620682608, "grad_norm": 0.7201940565786625, "learning_rate": 1.547380373073804e-05, "loss": 0.7153, "step": 10402 }, { "epoch": 0.30372835829610817, "grad_norm": 0.7719079609693723, "learning_rate": 1.5473154906731552e-05, "loss": 0.667, "step": 10403 }, { "epoch": 0.30375755452395553, "grad_norm": 0.6886920962599065, "learning_rate": 1.5472506082725064e-05, "loss": 0.6706, "step": 10404 }, { "epoch": 0.3037867507518029, "grad_norm": 0.7818150923622226, "learning_rate": 1.5471857258718576e-05, "loss": 0.6474, "step": 10405 }, { "epoch": 0.30381594697965025, "grad_norm": 0.6627641971735514, "learning_rate": 1.5471208434712085e-05, "loss": 0.6621, "step": 10406 }, { "epoch": 0.3038451432074976, "grad_norm": 0.6042256060666086, "learning_rate": 1.5470559610705597e-05, "loss": 0.6095, "step": 10407 }, { "epoch": 0.303874339435345, "grad_norm": 0.7753488677950723, "learning_rate": 1.546991078669911e-05, "loss": 0.6465, "step": 10408 }, { "epoch": 0.30390353566319234, "grad_norm": 0.6636427120841305, "learning_rate": 1.546926196269262e-05, "loss": 0.5848, "step": 10409 }, { "epoch": 0.3039327318910397, "grad_norm": 0.67172272129392, "learning_rate": 1.5468613138686133e-05, "loss": 0.6667, "step": 10410 }, { "epoch": 0.30396192811888706, "grad_norm": 0.7187917765778858, "learning_rate": 1.5467964314679645e-05, "loss": 0.6785, "step": 10411 }, { "epoch": 0.3039911243467344, "grad_norm": 0.6972519893637451, "learning_rate": 1.5467315490673157e-05, "loss": 0.7498, "step": 10412 }, { "epoch": 0.3040203205745818, "grad_norm": 0.7066999035877173, "learning_rate": 1.546666666666667e-05, "loss": 0.7102, "step": 10413 }, { "epoch": 0.30404951680242914, "grad_norm": 0.7099307936411026, "learning_rate": 1.546601784266018e-05, "loss": 0.7039, "step": 10414 }, { "epoch": 0.3040787130302765, "grad_norm": 0.590038292832043, "learning_rate": 1.546536901865369e-05, "loss": 0.5562, "step": 10415 }, { "epoch": 0.30410790925812387, "grad_norm": 0.6571313937803503, "learning_rate": 1.54647201946472e-05, "loss": 0.6383, "step": 10416 }, { "epoch": 0.3041371054859712, "grad_norm": 0.6091697809606981, "learning_rate": 1.5464071370640714e-05, "loss": 0.5672, "step": 10417 }, { "epoch": 0.3041663017138186, "grad_norm": 0.6371027934341118, "learning_rate": 1.5463422546634226e-05, "loss": 0.5758, "step": 10418 }, { "epoch": 0.30419549794166595, "grad_norm": 0.6569612313272309, "learning_rate": 1.5462773722627738e-05, "loss": 0.6776, "step": 10419 }, { "epoch": 0.3042246941695133, "grad_norm": 0.6452664960440309, "learning_rate": 1.5462124898621253e-05, "loss": 0.5902, "step": 10420 }, { "epoch": 0.30425389039736067, "grad_norm": 0.6960835571928827, "learning_rate": 1.546147607461476e-05, "loss": 0.6604, "step": 10421 }, { "epoch": 0.30428308662520803, "grad_norm": 0.5977540923142004, "learning_rate": 1.5460827250608274e-05, "loss": 0.579, "step": 10422 }, { "epoch": 0.3043122828530554, "grad_norm": 0.6367575868663697, "learning_rate": 1.5460178426601786e-05, "loss": 0.5815, "step": 10423 }, { "epoch": 0.30434147908090275, "grad_norm": 0.649016213902945, "learning_rate": 1.5459529602595298e-05, "loss": 0.6749, "step": 10424 }, { "epoch": 0.3043706753087501, "grad_norm": 0.7379250397760342, "learning_rate": 1.545888077858881e-05, "loss": 0.6765, "step": 10425 }, { "epoch": 0.3043998715365975, "grad_norm": 0.6862967151409157, "learning_rate": 1.5458231954582322e-05, "loss": 0.6534, "step": 10426 }, { "epoch": 0.30442906776444484, "grad_norm": 0.6018394203187403, "learning_rate": 1.5457583130575834e-05, "loss": 0.5474, "step": 10427 }, { "epoch": 0.3044582639922922, "grad_norm": 0.9071503749901737, "learning_rate": 1.5456934306569346e-05, "loss": 0.684, "step": 10428 }, { "epoch": 0.30448746022013956, "grad_norm": 0.6662438322513973, "learning_rate": 1.5456285482562854e-05, "loss": 0.664, "step": 10429 }, { "epoch": 0.3045166564479869, "grad_norm": 0.6419190042777457, "learning_rate": 1.5455636658556366e-05, "loss": 0.5814, "step": 10430 }, { "epoch": 0.3045458526758343, "grad_norm": 0.6570230946917972, "learning_rate": 1.545498783454988e-05, "loss": 0.6531, "step": 10431 }, { "epoch": 0.30457504890368164, "grad_norm": 0.6817708358687562, "learning_rate": 1.545433901054339e-05, "loss": 0.6177, "step": 10432 }, { "epoch": 0.304604245131529, "grad_norm": 0.6831279367081078, "learning_rate": 1.5453690186536903e-05, "loss": 0.672, "step": 10433 }, { "epoch": 0.30463344135937637, "grad_norm": 0.7136480601162335, "learning_rate": 1.5453041362530415e-05, "loss": 0.707, "step": 10434 }, { "epoch": 0.30466263758722373, "grad_norm": 0.6668521692888678, "learning_rate": 1.5452392538523927e-05, "loss": 0.6663, "step": 10435 }, { "epoch": 0.3046918338150711, "grad_norm": 0.7962133738453057, "learning_rate": 1.545174371451744e-05, "loss": 0.8089, "step": 10436 }, { "epoch": 0.30472103004291845, "grad_norm": 0.692456914265686, "learning_rate": 1.545109489051095e-05, "loss": 0.6207, "step": 10437 }, { "epoch": 0.3047502262707658, "grad_norm": 0.7008816294623772, "learning_rate": 1.545044606650446e-05, "loss": 0.7062, "step": 10438 }, { "epoch": 0.3047794224986132, "grad_norm": 0.596164339662193, "learning_rate": 1.5449797242497975e-05, "loss": 0.5081, "step": 10439 }, { "epoch": 0.30480861872646053, "grad_norm": 0.7000406076128083, "learning_rate": 1.5449148418491487e-05, "loss": 0.7115, "step": 10440 }, { "epoch": 0.3048378149543079, "grad_norm": 0.651142292538668, "learning_rate": 1.5448499594485e-05, "loss": 0.609, "step": 10441 }, { "epoch": 0.30486701118215526, "grad_norm": 0.6781029353574075, "learning_rate": 1.544785077047851e-05, "loss": 0.6414, "step": 10442 }, { "epoch": 0.3048962074100026, "grad_norm": 0.6721381105972517, "learning_rate": 1.5447201946472023e-05, "loss": 0.6532, "step": 10443 }, { "epoch": 0.30492540363785, "grad_norm": 0.6325966703886674, "learning_rate": 1.544655312246553e-05, "loss": 0.6044, "step": 10444 }, { "epoch": 0.30495459986569734, "grad_norm": 0.6510705031915103, "learning_rate": 1.5445904298459043e-05, "loss": 0.5892, "step": 10445 }, { "epoch": 0.3049837960935447, "grad_norm": 0.791351993839998, "learning_rate": 1.5445255474452556e-05, "loss": 0.6213, "step": 10446 }, { "epoch": 0.30501299232139206, "grad_norm": 0.6551938878079335, "learning_rate": 1.5444606650446068e-05, "loss": 0.6571, "step": 10447 }, { "epoch": 0.3050421885492394, "grad_norm": 0.6719341005396787, "learning_rate": 1.544395782643958e-05, "loss": 0.6337, "step": 10448 }, { "epoch": 0.3050713847770868, "grad_norm": 0.6142073416192263, "learning_rate": 1.544330900243309e-05, "loss": 0.5854, "step": 10449 }, { "epoch": 0.30510058100493415, "grad_norm": 0.6199474053807417, "learning_rate": 1.5442660178426604e-05, "loss": 0.6115, "step": 10450 }, { "epoch": 0.3051297772327815, "grad_norm": 0.6480502784025304, "learning_rate": 1.5442011354420116e-05, "loss": 0.6036, "step": 10451 }, { "epoch": 0.30515897346062887, "grad_norm": 0.6542637920254545, "learning_rate": 1.5441362530413628e-05, "loss": 0.6285, "step": 10452 }, { "epoch": 0.30518816968847623, "grad_norm": 0.630352843731762, "learning_rate": 1.5440713706407136e-05, "loss": 0.6242, "step": 10453 }, { "epoch": 0.3052173659163236, "grad_norm": 0.6395036121438713, "learning_rate": 1.544006488240065e-05, "loss": 0.5933, "step": 10454 }, { "epoch": 0.30524656214417095, "grad_norm": 0.6659077262680634, "learning_rate": 1.543941605839416e-05, "loss": 0.6768, "step": 10455 }, { "epoch": 0.3052757583720183, "grad_norm": 0.6782305669125039, "learning_rate": 1.5438767234387672e-05, "loss": 0.6731, "step": 10456 }, { "epoch": 0.3053049545998657, "grad_norm": 0.6529823281870095, "learning_rate": 1.5438118410381184e-05, "loss": 0.6025, "step": 10457 }, { "epoch": 0.30533415082771304, "grad_norm": 0.6169796059631037, "learning_rate": 1.54374695863747e-05, "loss": 0.5728, "step": 10458 }, { "epoch": 0.3053633470555604, "grad_norm": 0.6753849316022821, "learning_rate": 1.543682076236821e-05, "loss": 0.6715, "step": 10459 }, { "epoch": 0.30539254328340776, "grad_norm": 0.6447620607893023, "learning_rate": 1.543617193836172e-05, "loss": 0.5861, "step": 10460 }, { "epoch": 0.3054217395112551, "grad_norm": 0.5853405319309698, "learning_rate": 1.5435523114355233e-05, "loss": 0.5074, "step": 10461 }, { "epoch": 0.3054509357391025, "grad_norm": 0.6513591256886976, "learning_rate": 1.5434874290348745e-05, "loss": 0.6047, "step": 10462 }, { "epoch": 0.3054801319669499, "grad_norm": 0.6510517379666996, "learning_rate": 1.5434225466342257e-05, "loss": 0.6048, "step": 10463 }, { "epoch": 0.30550932819479726, "grad_norm": 0.6712739765354004, "learning_rate": 1.543357664233577e-05, "loss": 0.6755, "step": 10464 }, { "epoch": 0.3055385244226446, "grad_norm": 0.6316366589648051, "learning_rate": 1.543292781832928e-05, "loss": 0.6203, "step": 10465 }, { "epoch": 0.305567720650492, "grad_norm": 0.7096024527239851, "learning_rate": 1.5432278994322793e-05, "loss": 0.7091, "step": 10466 }, { "epoch": 0.30559691687833934, "grad_norm": 0.6829044156095624, "learning_rate": 1.54316301703163e-05, "loss": 0.714, "step": 10467 }, { "epoch": 0.3056261131061867, "grad_norm": 0.6315388139710474, "learning_rate": 1.5430981346309813e-05, "loss": 0.6167, "step": 10468 }, { "epoch": 0.30565530933403406, "grad_norm": 0.6785628481635683, "learning_rate": 1.5430332522303325e-05, "loss": 0.6248, "step": 10469 }, { "epoch": 0.3056845055618814, "grad_norm": 0.6877095104093853, "learning_rate": 1.5429683698296837e-05, "loss": 0.6834, "step": 10470 }, { "epoch": 0.3057137017897288, "grad_norm": 0.6954097908785578, "learning_rate": 1.542903487429035e-05, "loss": 0.7309, "step": 10471 }, { "epoch": 0.30574289801757615, "grad_norm": 0.6243802115847525, "learning_rate": 1.542838605028386e-05, "loss": 0.5706, "step": 10472 }, { "epoch": 0.3057720942454235, "grad_norm": 0.6407587553397471, "learning_rate": 1.5427737226277373e-05, "loss": 0.6586, "step": 10473 }, { "epoch": 0.30580129047327087, "grad_norm": 0.6603721512105278, "learning_rate": 1.5427088402270886e-05, "loss": 0.5991, "step": 10474 }, { "epoch": 0.30583048670111823, "grad_norm": 0.6554843520411463, "learning_rate": 1.5426439578264398e-05, "loss": 0.6521, "step": 10475 }, { "epoch": 0.3058596829289656, "grad_norm": 0.6850638675304002, "learning_rate": 1.5425790754257906e-05, "loss": 0.7034, "step": 10476 }, { "epoch": 0.30588887915681295, "grad_norm": 0.6451343077052408, "learning_rate": 1.542514193025142e-05, "loss": 0.6325, "step": 10477 }, { "epoch": 0.3059180753846603, "grad_norm": 0.6601342170081382, "learning_rate": 1.5424493106244934e-05, "loss": 0.6247, "step": 10478 }, { "epoch": 0.3059472716125077, "grad_norm": 0.6594038534429602, "learning_rate": 1.5423844282238446e-05, "loss": 0.6348, "step": 10479 }, { "epoch": 0.30597646784035504, "grad_norm": 0.5722970724662039, "learning_rate": 1.5423195458231958e-05, "loss": 0.5343, "step": 10480 }, { "epoch": 0.3060056640682024, "grad_norm": 0.6022473813789856, "learning_rate": 1.542254663422547e-05, "loss": 0.5695, "step": 10481 }, { "epoch": 0.30603486029604976, "grad_norm": 0.676981540101079, "learning_rate": 1.542189781021898e-05, "loss": 0.6703, "step": 10482 }, { "epoch": 0.3060640565238971, "grad_norm": 0.6373928681761155, "learning_rate": 1.542124898621249e-05, "loss": 0.6044, "step": 10483 }, { "epoch": 0.3060932527517445, "grad_norm": 0.6516598076160421, "learning_rate": 1.5420600162206002e-05, "loss": 0.6071, "step": 10484 }, { "epoch": 0.30612244897959184, "grad_norm": 0.6918714495947677, "learning_rate": 1.5419951338199514e-05, "loss": 0.7071, "step": 10485 }, { "epoch": 0.3061516452074392, "grad_norm": 0.6340892719598095, "learning_rate": 1.5419302514193026e-05, "loss": 0.5849, "step": 10486 }, { "epoch": 0.30618084143528657, "grad_norm": 0.6407978671742451, "learning_rate": 1.541865369018654e-05, "loss": 0.5789, "step": 10487 }, { "epoch": 0.3062100376631339, "grad_norm": 0.6443970784664855, "learning_rate": 1.541800486618005e-05, "loss": 0.5721, "step": 10488 }, { "epoch": 0.3062392338909813, "grad_norm": 0.6701489698529292, "learning_rate": 1.5417356042173563e-05, "loss": 0.5893, "step": 10489 }, { "epoch": 0.30626843011882865, "grad_norm": 0.6273965383718815, "learning_rate": 1.5416707218167075e-05, "loss": 0.6127, "step": 10490 }, { "epoch": 0.306297626346676, "grad_norm": 0.6850329227236577, "learning_rate": 1.5416058394160583e-05, "loss": 0.6571, "step": 10491 }, { "epoch": 0.30632682257452337, "grad_norm": 0.6684734310097549, "learning_rate": 1.5415409570154095e-05, "loss": 0.6905, "step": 10492 }, { "epoch": 0.30635601880237073, "grad_norm": 0.6829941444089395, "learning_rate": 1.5414760746147607e-05, "loss": 0.679, "step": 10493 }, { "epoch": 0.3063852150302181, "grad_norm": 0.6257421405412954, "learning_rate": 1.541411192214112e-05, "loss": 0.5964, "step": 10494 }, { "epoch": 0.30641441125806546, "grad_norm": 0.653584685694622, "learning_rate": 1.541346309813463e-05, "loss": 0.6425, "step": 10495 }, { "epoch": 0.3064436074859128, "grad_norm": 0.6635909782470947, "learning_rate": 1.5412814274128143e-05, "loss": 0.6839, "step": 10496 }, { "epoch": 0.3064728037137602, "grad_norm": 0.6835113026170865, "learning_rate": 1.5412165450121655e-05, "loss": 0.6488, "step": 10497 }, { "epoch": 0.30650199994160754, "grad_norm": 0.6948388498069592, "learning_rate": 1.5411516626115167e-05, "loss": 0.6784, "step": 10498 }, { "epoch": 0.3065311961694549, "grad_norm": 0.6539062232072064, "learning_rate": 1.541086780210868e-05, "loss": 0.6299, "step": 10499 }, { "epoch": 0.30656039239730226, "grad_norm": 0.657266118875856, "learning_rate": 1.541021897810219e-05, "loss": 0.6483, "step": 10500 }, { "epoch": 0.3065895886251496, "grad_norm": 0.6633345165188719, "learning_rate": 1.5409570154095703e-05, "loss": 0.6111, "step": 10501 }, { "epoch": 0.306618784852997, "grad_norm": 0.6168032041121865, "learning_rate": 1.5408921330089215e-05, "loss": 0.5831, "step": 10502 }, { "epoch": 0.30664798108084435, "grad_norm": 0.6690135576992021, "learning_rate": 1.5408272506082728e-05, "loss": 0.6184, "step": 10503 }, { "epoch": 0.3066771773086917, "grad_norm": 0.6034467414017805, "learning_rate": 1.540762368207624e-05, "loss": 0.5581, "step": 10504 }, { "epoch": 0.30670637353653907, "grad_norm": 0.63468896224004, "learning_rate": 1.5406974858069748e-05, "loss": 0.5702, "step": 10505 }, { "epoch": 0.30673556976438643, "grad_norm": 0.6285450263953805, "learning_rate": 1.540632603406326e-05, "loss": 0.5965, "step": 10506 }, { "epoch": 0.3067647659922338, "grad_norm": 0.6247523165424289, "learning_rate": 1.5405677210056772e-05, "loss": 0.5717, "step": 10507 }, { "epoch": 0.30679396222008115, "grad_norm": 0.6628939605307416, "learning_rate": 1.5405028386050284e-05, "loss": 0.6683, "step": 10508 }, { "epoch": 0.3068231584479285, "grad_norm": 0.6598682131068452, "learning_rate": 1.5404379562043796e-05, "loss": 0.6188, "step": 10509 }, { "epoch": 0.3068523546757759, "grad_norm": 0.7099150095343542, "learning_rate": 1.540373073803731e-05, "loss": 0.6747, "step": 10510 }, { "epoch": 0.30688155090362323, "grad_norm": 0.6980078652371742, "learning_rate": 1.540308191403082e-05, "loss": 0.6358, "step": 10511 }, { "epoch": 0.3069107471314706, "grad_norm": 0.6723326755341219, "learning_rate": 1.5402433090024332e-05, "loss": 0.6641, "step": 10512 }, { "epoch": 0.30693994335931796, "grad_norm": 0.6983933890646685, "learning_rate": 1.5401784266017844e-05, "loss": 0.6494, "step": 10513 }, { "epoch": 0.3069691395871653, "grad_norm": 0.6848854286429948, "learning_rate": 1.5401135442011353e-05, "loss": 0.7014, "step": 10514 }, { "epoch": 0.3069983358150127, "grad_norm": 0.6899252259262897, "learning_rate": 1.540048661800487e-05, "loss": 0.6544, "step": 10515 }, { "epoch": 0.30702753204286004, "grad_norm": 0.63965276593004, "learning_rate": 1.539983779399838e-05, "loss": 0.582, "step": 10516 }, { "epoch": 0.3070567282707074, "grad_norm": 0.7296746104521978, "learning_rate": 1.5399188969991893e-05, "loss": 0.7293, "step": 10517 }, { "epoch": 0.30708592449855476, "grad_norm": 0.6254207693764217, "learning_rate": 1.5398540145985405e-05, "loss": 0.5522, "step": 10518 }, { "epoch": 0.3071151207264021, "grad_norm": 0.6255208218993003, "learning_rate": 1.5397891321978917e-05, "loss": 0.5966, "step": 10519 }, { "epoch": 0.3071443169542495, "grad_norm": 0.6084433503398828, "learning_rate": 1.5397242497972425e-05, "loss": 0.5394, "step": 10520 }, { "epoch": 0.30717351318209685, "grad_norm": 0.6833966346079277, "learning_rate": 1.5396593673965937e-05, "loss": 0.668, "step": 10521 }, { "epoch": 0.3072027094099442, "grad_norm": 0.6705430834999934, "learning_rate": 1.539594484995945e-05, "loss": 0.6804, "step": 10522 }, { "epoch": 0.3072319056377916, "grad_norm": 0.6558804526974558, "learning_rate": 1.539529602595296e-05, "loss": 0.6459, "step": 10523 }, { "epoch": 0.307261101865639, "grad_norm": 0.7630399254287848, "learning_rate": 1.5394647201946473e-05, "loss": 0.6836, "step": 10524 }, { "epoch": 0.30729029809348635, "grad_norm": 0.6713892309046716, "learning_rate": 1.5393998377939985e-05, "loss": 0.6844, "step": 10525 }, { "epoch": 0.3073194943213337, "grad_norm": 0.7093277839227037, "learning_rate": 1.5393349553933497e-05, "loss": 0.7148, "step": 10526 }, { "epoch": 0.30734869054918107, "grad_norm": 0.6462739305233582, "learning_rate": 1.539270072992701e-05, "loss": 0.6145, "step": 10527 }, { "epoch": 0.30737788677702843, "grad_norm": 0.6892791268523898, "learning_rate": 1.539205190592052e-05, "loss": 0.6768, "step": 10528 }, { "epoch": 0.3074070830048758, "grad_norm": 0.6712550284759198, "learning_rate": 1.539140308191403e-05, "loss": 0.6343, "step": 10529 }, { "epoch": 0.30743627923272315, "grad_norm": 0.6361177871261195, "learning_rate": 1.5390754257907542e-05, "loss": 0.5975, "step": 10530 }, { "epoch": 0.3074654754605705, "grad_norm": 0.6702935762016069, "learning_rate": 1.5390105433901054e-05, "loss": 0.6571, "step": 10531 }, { "epoch": 0.3074946716884179, "grad_norm": 0.6964587869045884, "learning_rate": 1.5389456609894566e-05, "loss": 0.6117, "step": 10532 }, { "epoch": 0.30752386791626524, "grad_norm": 0.6671026337524942, "learning_rate": 1.538880778588808e-05, "loss": 0.6382, "step": 10533 }, { "epoch": 0.3075530641441126, "grad_norm": 0.6627306206417352, "learning_rate": 1.538815896188159e-05, "loss": 0.6442, "step": 10534 }, { "epoch": 0.30758226037195996, "grad_norm": 0.7106911839289108, "learning_rate": 1.5387510137875102e-05, "loss": 0.6996, "step": 10535 }, { "epoch": 0.3076114565998073, "grad_norm": 0.6744879671479539, "learning_rate": 1.5386861313868614e-05, "loss": 0.6617, "step": 10536 }, { "epoch": 0.3076406528276547, "grad_norm": 0.6200167076012875, "learning_rate": 1.5386212489862126e-05, "loss": 0.6104, "step": 10537 }, { "epoch": 0.30766984905550204, "grad_norm": 0.6358427009298028, "learning_rate": 1.538556366585564e-05, "loss": 0.6316, "step": 10538 }, { "epoch": 0.3076990452833494, "grad_norm": 0.7118649472347536, "learning_rate": 1.538491484184915e-05, "loss": 0.6716, "step": 10539 }, { "epoch": 0.30772824151119677, "grad_norm": 0.6661530035107186, "learning_rate": 1.5384266017842662e-05, "loss": 0.6484, "step": 10540 }, { "epoch": 0.3077574377390441, "grad_norm": 0.6854580911063645, "learning_rate": 1.5383617193836174e-05, "loss": 0.6557, "step": 10541 }, { "epoch": 0.3077866339668915, "grad_norm": 0.6519453748709754, "learning_rate": 1.5382968369829686e-05, "loss": 0.6407, "step": 10542 }, { "epoch": 0.30781583019473885, "grad_norm": 0.6587579537530784, "learning_rate": 1.5382319545823195e-05, "loss": 0.6189, "step": 10543 }, { "epoch": 0.3078450264225862, "grad_norm": 0.6845941513108946, "learning_rate": 1.5381670721816707e-05, "loss": 0.6735, "step": 10544 }, { "epoch": 0.30787422265043357, "grad_norm": 0.7142594132994023, "learning_rate": 1.538102189781022e-05, "loss": 0.6729, "step": 10545 }, { "epoch": 0.30790341887828093, "grad_norm": 0.6718822048295191, "learning_rate": 1.538037307380373e-05, "loss": 0.6248, "step": 10546 }, { "epoch": 0.3079326151061283, "grad_norm": 0.6120849777066097, "learning_rate": 1.5379724249797243e-05, "loss": 0.6063, "step": 10547 }, { "epoch": 0.30796181133397565, "grad_norm": 0.6726638437206363, "learning_rate": 1.5379075425790755e-05, "loss": 0.6645, "step": 10548 }, { "epoch": 0.307991007561823, "grad_norm": 0.645426746896575, "learning_rate": 1.5378426601784267e-05, "loss": 0.6263, "step": 10549 }, { "epoch": 0.3080202037896704, "grad_norm": 0.6331245360519219, "learning_rate": 1.537777777777778e-05, "loss": 0.5913, "step": 10550 }, { "epoch": 0.30804940001751774, "grad_norm": 0.6337953699071416, "learning_rate": 1.537712895377129e-05, "loss": 0.6384, "step": 10551 }, { "epoch": 0.3080785962453651, "grad_norm": 0.711563149488063, "learning_rate": 1.5376480129764803e-05, "loss": 0.7472, "step": 10552 }, { "epoch": 0.30810779247321246, "grad_norm": 0.5730984426043348, "learning_rate": 1.5375831305758315e-05, "loss": 0.5261, "step": 10553 }, { "epoch": 0.3081369887010598, "grad_norm": 0.6562685347605985, "learning_rate": 1.5375182481751827e-05, "loss": 0.652, "step": 10554 }, { "epoch": 0.3081661849289072, "grad_norm": 0.649369942219095, "learning_rate": 1.537453365774534e-05, "loss": 0.6464, "step": 10555 }, { "epoch": 0.30819538115675454, "grad_norm": 0.6416946614853478, "learning_rate": 1.537388483373885e-05, "loss": 0.5981, "step": 10556 }, { "epoch": 0.3082245773846019, "grad_norm": 0.6361535769823334, "learning_rate": 1.5373236009732363e-05, "loss": 0.5936, "step": 10557 }, { "epoch": 0.30825377361244927, "grad_norm": 0.727491680450938, "learning_rate": 1.5372587185725872e-05, "loss": 0.752, "step": 10558 }, { "epoch": 0.30828296984029663, "grad_norm": 0.6486627009619109, "learning_rate": 1.5371938361719384e-05, "loss": 0.5816, "step": 10559 }, { "epoch": 0.308312166068144, "grad_norm": 0.6652484142716293, "learning_rate": 1.5371289537712896e-05, "loss": 0.6487, "step": 10560 }, { "epoch": 0.30834136229599135, "grad_norm": 0.6671900425309532, "learning_rate": 1.5370640713706408e-05, "loss": 0.6813, "step": 10561 }, { "epoch": 0.3083705585238387, "grad_norm": 0.6332511422355925, "learning_rate": 1.536999188969992e-05, "loss": 0.6098, "step": 10562 }, { "epoch": 0.3083997547516861, "grad_norm": 0.7153424695043556, "learning_rate": 1.5369343065693432e-05, "loss": 0.7216, "step": 10563 }, { "epoch": 0.30842895097953343, "grad_norm": 0.6544425143705859, "learning_rate": 1.5368694241686944e-05, "loss": 0.6533, "step": 10564 }, { "epoch": 0.3084581472073808, "grad_norm": 0.6858245441917332, "learning_rate": 1.5368045417680456e-05, "loss": 0.7426, "step": 10565 }, { "epoch": 0.30848734343522816, "grad_norm": 0.6582624736698157, "learning_rate": 1.5367396593673968e-05, "loss": 0.6973, "step": 10566 }, { "epoch": 0.3085165396630755, "grad_norm": 0.751994816453601, "learning_rate": 1.5366747769667477e-05, "loss": 0.7104, "step": 10567 }, { "epoch": 0.3085457358909229, "grad_norm": 0.6030628152305777, "learning_rate": 1.536609894566099e-05, "loss": 0.5794, "step": 10568 }, { "epoch": 0.30857493211877024, "grad_norm": 0.7410324351924292, "learning_rate": 1.53654501216545e-05, "loss": 0.6977, "step": 10569 }, { "epoch": 0.3086041283466176, "grad_norm": 0.6702820738119308, "learning_rate": 1.5364801297648013e-05, "loss": 0.6399, "step": 10570 }, { "epoch": 0.30863332457446496, "grad_norm": 0.6302059080493327, "learning_rate": 1.536415247364153e-05, "loss": 0.6121, "step": 10571 }, { "epoch": 0.3086625208023123, "grad_norm": 0.6851018604981554, "learning_rate": 1.5363503649635037e-05, "loss": 0.7235, "step": 10572 }, { "epoch": 0.3086917170301597, "grad_norm": 0.6995029610690914, "learning_rate": 1.536285482562855e-05, "loss": 0.6769, "step": 10573 }, { "epoch": 0.30872091325800705, "grad_norm": 0.5747924768126151, "learning_rate": 1.536220600162206e-05, "loss": 0.5113, "step": 10574 }, { "epoch": 0.3087501094858544, "grad_norm": 0.6171834606550163, "learning_rate": 1.5361557177615573e-05, "loss": 0.6043, "step": 10575 }, { "epoch": 0.30877930571370177, "grad_norm": 0.5843490116708454, "learning_rate": 1.5360908353609085e-05, "loss": 0.5252, "step": 10576 }, { "epoch": 0.30880850194154913, "grad_norm": 0.6236991615470527, "learning_rate": 1.5360259529602597e-05, "loss": 0.5883, "step": 10577 }, { "epoch": 0.3088376981693965, "grad_norm": 0.7375235447690737, "learning_rate": 1.535961070559611e-05, "loss": 0.6894, "step": 10578 }, { "epoch": 0.30886689439724385, "grad_norm": 0.5947121333177274, "learning_rate": 1.535896188158962e-05, "loss": 0.5227, "step": 10579 }, { "epoch": 0.3088960906250912, "grad_norm": 0.6103339641479681, "learning_rate": 1.5358313057583133e-05, "loss": 0.5842, "step": 10580 }, { "epoch": 0.3089252868529386, "grad_norm": 0.6231861612866052, "learning_rate": 1.5357664233576642e-05, "loss": 0.549, "step": 10581 }, { "epoch": 0.30895448308078594, "grad_norm": 0.6007439809679757, "learning_rate": 1.5357015409570154e-05, "loss": 0.5441, "step": 10582 }, { "epoch": 0.30898367930863335, "grad_norm": 0.7120739036117044, "learning_rate": 1.5356366585563666e-05, "loss": 0.6613, "step": 10583 }, { "epoch": 0.3090128755364807, "grad_norm": 0.663956884871976, "learning_rate": 1.5355717761557178e-05, "loss": 0.6267, "step": 10584 }, { "epoch": 0.3090420717643281, "grad_norm": 1.2781492712178484, "learning_rate": 1.535506893755069e-05, "loss": 0.5501, "step": 10585 }, { "epoch": 0.30907126799217544, "grad_norm": 0.6519853262208319, "learning_rate": 1.5354420113544202e-05, "loss": 0.5909, "step": 10586 }, { "epoch": 0.3091004642200228, "grad_norm": 0.6853274623040727, "learning_rate": 1.5353771289537714e-05, "loss": 0.7343, "step": 10587 }, { "epoch": 0.30912966044787016, "grad_norm": 0.665298655900243, "learning_rate": 1.5353122465531226e-05, "loss": 0.5962, "step": 10588 }, { "epoch": 0.3091588566757175, "grad_norm": 0.6714879263428134, "learning_rate": 1.5352473641524738e-05, "loss": 0.6568, "step": 10589 }, { "epoch": 0.3091880529035649, "grad_norm": 0.8497751191664581, "learning_rate": 1.535182481751825e-05, "loss": 0.6386, "step": 10590 }, { "epoch": 0.30921724913141224, "grad_norm": 0.6797771209709526, "learning_rate": 1.5351175993511762e-05, "loss": 0.7008, "step": 10591 }, { "epoch": 0.3092464453592596, "grad_norm": 0.6716682956208291, "learning_rate": 1.5350527169505274e-05, "loss": 0.6654, "step": 10592 }, { "epoch": 0.30927564158710696, "grad_norm": 0.6761198088448366, "learning_rate": 1.5349878345498786e-05, "loss": 0.6873, "step": 10593 }, { "epoch": 0.3093048378149543, "grad_norm": 0.6549399781406668, "learning_rate": 1.5349229521492298e-05, "loss": 0.6152, "step": 10594 }, { "epoch": 0.3093340340428017, "grad_norm": 0.6871822154505031, "learning_rate": 1.534858069748581e-05, "loss": 0.6714, "step": 10595 }, { "epoch": 0.30936323027064905, "grad_norm": 0.6203137184570325, "learning_rate": 1.534793187347932e-05, "loss": 0.5524, "step": 10596 }, { "epoch": 0.3093924264984964, "grad_norm": 0.6976221497480808, "learning_rate": 1.534728304947283e-05, "loss": 0.7346, "step": 10597 }, { "epoch": 0.30942162272634377, "grad_norm": 0.6555022284842518, "learning_rate": 1.5346634225466343e-05, "loss": 0.6124, "step": 10598 }, { "epoch": 0.30945081895419113, "grad_norm": 0.6328779615100473, "learning_rate": 1.5345985401459855e-05, "loss": 0.5888, "step": 10599 }, { "epoch": 0.3094800151820385, "grad_norm": 0.673388069173822, "learning_rate": 1.5345336577453367e-05, "loss": 0.6373, "step": 10600 }, { "epoch": 0.30950921140988585, "grad_norm": 0.6942774130009596, "learning_rate": 1.534468775344688e-05, "loss": 0.75, "step": 10601 }, { "epoch": 0.3095384076377332, "grad_norm": 0.6810905002177842, "learning_rate": 1.534403892944039e-05, "loss": 0.6384, "step": 10602 }, { "epoch": 0.3095676038655806, "grad_norm": 0.6692806579380353, "learning_rate": 1.5343390105433903e-05, "loss": 0.6512, "step": 10603 }, { "epoch": 0.30959680009342794, "grad_norm": 0.6480289867578832, "learning_rate": 1.5342741281427415e-05, "loss": 0.596, "step": 10604 }, { "epoch": 0.3096259963212753, "grad_norm": 0.6713491898006644, "learning_rate": 1.5342092457420924e-05, "loss": 0.6648, "step": 10605 }, { "epoch": 0.30965519254912266, "grad_norm": 0.666034582960814, "learning_rate": 1.5341443633414436e-05, "loss": 0.6382, "step": 10606 }, { "epoch": 0.30968438877697, "grad_norm": 0.696175857378616, "learning_rate": 1.5340794809407948e-05, "loss": 0.6428, "step": 10607 }, { "epoch": 0.3097135850048174, "grad_norm": 0.6151201230364497, "learning_rate": 1.534014598540146e-05, "loss": 0.576, "step": 10608 }, { "epoch": 0.30974278123266474, "grad_norm": 0.7009985531444936, "learning_rate": 1.5339497161394975e-05, "loss": 0.6524, "step": 10609 }, { "epoch": 0.3097719774605121, "grad_norm": 2.091496700194508, "learning_rate": 1.5338848337388484e-05, "loss": 0.7179, "step": 10610 }, { "epoch": 0.30980117368835947, "grad_norm": 0.6843057268062789, "learning_rate": 1.5338199513381996e-05, "loss": 0.6832, "step": 10611 }, { "epoch": 0.3098303699162068, "grad_norm": 0.6608392352532904, "learning_rate": 1.5337550689375508e-05, "loss": 0.5828, "step": 10612 }, { "epoch": 0.3098595661440542, "grad_norm": 0.6710278694739421, "learning_rate": 1.533690186536902e-05, "loss": 0.6219, "step": 10613 }, { "epoch": 0.30988876237190155, "grad_norm": 0.6738233767465922, "learning_rate": 1.5336253041362532e-05, "loss": 0.698, "step": 10614 }, { "epoch": 0.3099179585997489, "grad_norm": 0.6645823940941306, "learning_rate": 1.5335604217356044e-05, "loss": 0.6741, "step": 10615 }, { "epoch": 0.30994715482759627, "grad_norm": 0.6813192846160048, "learning_rate": 1.5334955393349556e-05, "loss": 0.6673, "step": 10616 }, { "epoch": 0.30997635105544363, "grad_norm": 0.7011951267848961, "learning_rate": 1.5334306569343068e-05, "loss": 0.6484, "step": 10617 }, { "epoch": 0.310005547283291, "grad_norm": 0.7197670064590751, "learning_rate": 1.533365774533658e-05, "loss": 0.7104, "step": 10618 }, { "epoch": 0.31003474351113836, "grad_norm": 0.6879867784790051, "learning_rate": 1.533300892133009e-05, "loss": 0.6252, "step": 10619 }, { "epoch": 0.3100639397389857, "grad_norm": 0.6276268811635617, "learning_rate": 1.53323600973236e-05, "loss": 0.5761, "step": 10620 }, { "epoch": 0.3100931359668331, "grad_norm": 0.7107295813407599, "learning_rate": 1.5331711273317113e-05, "loss": 0.7323, "step": 10621 }, { "epoch": 0.31012233219468044, "grad_norm": 0.6318943431006829, "learning_rate": 1.5331062449310625e-05, "loss": 0.5861, "step": 10622 }, { "epoch": 0.3101515284225278, "grad_norm": 0.7302814171717483, "learning_rate": 1.5330413625304137e-05, "loss": 0.763, "step": 10623 }, { "epoch": 0.31018072465037516, "grad_norm": 0.6454076963892269, "learning_rate": 1.532976480129765e-05, "loss": 0.5982, "step": 10624 }, { "epoch": 0.3102099208782225, "grad_norm": 0.6471017919060653, "learning_rate": 1.532911597729116e-05, "loss": 0.6081, "step": 10625 }, { "epoch": 0.3102391171060699, "grad_norm": 0.6462563588952828, "learning_rate": 1.5328467153284673e-05, "loss": 0.6341, "step": 10626 }, { "epoch": 0.31026831333391724, "grad_norm": 0.6857684551730093, "learning_rate": 1.5327818329278185e-05, "loss": 0.7089, "step": 10627 }, { "epoch": 0.3102975095617646, "grad_norm": 0.6620071298474507, "learning_rate": 1.5327169505271697e-05, "loss": 0.6243, "step": 10628 }, { "epoch": 0.31032670578961197, "grad_norm": 0.6681834052883128, "learning_rate": 1.532652068126521e-05, "loss": 0.5919, "step": 10629 }, { "epoch": 0.31035590201745933, "grad_norm": 0.7090530177096315, "learning_rate": 1.532587185725872e-05, "loss": 0.6121, "step": 10630 }, { "epoch": 0.3103850982453067, "grad_norm": 0.7063154818311026, "learning_rate": 1.5325223033252233e-05, "loss": 0.6916, "step": 10631 }, { "epoch": 0.31041429447315405, "grad_norm": 0.6427670656104316, "learning_rate": 1.5324574209245745e-05, "loss": 0.6235, "step": 10632 }, { "epoch": 0.3104434907010014, "grad_norm": 0.7824685682733326, "learning_rate": 1.5323925385239257e-05, "loss": 0.6502, "step": 10633 }, { "epoch": 0.3104726869288488, "grad_norm": 0.652827561652944, "learning_rate": 1.5323276561232766e-05, "loss": 0.6257, "step": 10634 }, { "epoch": 0.31050188315669613, "grad_norm": 0.5971912786193011, "learning_rate": 1.5322627737226278e-05, "loss": 0.5644, "step": 10635 }, { "epoch": 0.3105310793845435, "grad_norm": 0.6429118020150851, "learning_rate": 1.532197891321979e-05, "loss": 0.6143, "step": 10636 }, { "epoch": 0.31056027561239086, "grad_norm": 0.6274301391587399, "learning_rate": 1.5321330089213302e-05, "loss": 0.6038, "step": 10637 }, { "epoch": 0.3105894718402382, "grad_norm": 0.6854429046445604, "learning_rate": 1.5320681265206814e-05, "loss": 0.6478, "step": 10638 }, { "epoch": 0.3106186680680856, "grad_norm": 0.6498569845199307, "learning_rate": 1.5320032441200326e-05, "loss": 0.6299, "step": 10639 }, { "epoch": 0.31064786429593294, "grad_norm": 0.6932184494311961, "learning_rate": 1.5319383617193838e-05, "loss": 0.6694, "step": 10640 }, { "epoch": 0.3106770605237803, "grad_norm": 0.6674535995044385, "learning_rate": 1.531873479318735e-05, "loss": 0.6468, "step": 10641 }, { "epoch": 0.31070625675162766, "grad_norm": 0.6935560954093714, "learning_rate": 1.531808596918086e-05, "loss": 0.7274, "step": 10642 }, { "epoch": 0.310735452979475, "grad_norm": 0.6220041872022031, "learning_rate": 1.531743714517437e-05, "loss": 0.5993, "step": 10643 }, { "epoch": 0.31076464920732244, "grad_norm": 0.6362133328585661, "learning_rate": 1.5316788321167883e-05, "loss": 0.5557, "step": 10644 }, { "epoch": 0.3107938454351698, "grad_norm": 0.6534280409542293, "learning_rate": 1.5316139497161395e-05, "loss": 0.6323, "step": 10645 }, { "epoch": 0.31082304166301716, "grad_norm": 0.633405994497021, "learning_rate": 1.5315490673154907e-05, "loss": 0.5898, "step": 10646 }, { "epoch": 0.3108522378908645, "grad_norm": 0.6763993403391085, "learning_rate": 1.5314841849148422e-05, "loss": 0.6426, "step": 10647 }, { "epoch": 0.3108814341187119, "grad_norm": 0.6422406410233598, "learning_rate": 1.531419302514193e-05, "loss": 0.6048, "step": 10648 }, { "epoch": 0.31091063034655925, "grad_norm": 0.6329704511341434, "learning_rate": 1.5313544201135443e-05, "loss": 0.5753, "step": 10649 }, { "epoch": 0.3109398265744066, "grad_norm": 0.714311866624212, "learning_rate": 1.5312895377128955e-05, "loss": 0.6866, "step": 10650 }, { "epoch": 0.31096902280225397, "grad_norm": 0.7175286391658068, "learning_rate": 1.5312246553122467e-05, "loss": 0.7471, "step": 10651 }, { "epoch": 0.31099821903010133, "grad_norm": 0.6400336632152597, "learning_rate": 1.531159772911598e-05, "loss": 0.5842, "step": 10652 }, { "epoch": 0.3110274152579487, "grad_norm": 0.6937760373882403, "learning_rate": 1.531094890510949e-05, "loss": 0.6741, "step": 10653 }, { "epoch": 0.31105661148579605, "grad_norm": 0.6068999218439615, "learning_rate": 1.5310300081103003e-05, "loss": 0.559, "step": 10654 }, { "epoch": 0.3110858077136434, "grad_norm": 0.6902344188329219, "learning_rate": 1.5309651257096515e-05, "loss": 0.7035, "step": 10655 }, { "epoch": 0.3111150039414908, "grad_norm": 0.6455879200619633, "learning_rate": 1.5309002433090027e-05, "loss": 0.5518, "step": 10656 }, { "epoch": 0.31114420016933814, "grad_norm": 0.6495599631690432, "learning_rate": 1.5308353609083536e-05, "loss": 0.6427, "step": 10657 }, { "epoch": 0.3111733963971855, "grad_norm": 0.7038692990771983, "learning_rate": 1.5307704785077048e-05, "loss": 0.653, "step": 10658 }, { "epoch": 0.31120259262503286, "grad_norm": 0.6421423961370094, "learning_rate": 1.530705596107056e-05, "loss": 0.6257, "step": 10659 }, { "epoch": 0.3112317888528802, "grad_norm": 0.6801733405254218, "learning_rate": 1.5306407137064072e-05, "loss": 0.6079, "step": 10660 }, { "epoch": 0.3112609850807276, "grad_norm": 0.6538237540022356, "learning_rate": 1.5305758313057584e-05, "loss": 0.626, "step": 10661 }, { "epoch": 0.31129018130857494, "grad_norm": 0.6609244828260034, "learning_rate": 1.5305109489051096e-05, "loss": 0.6682, "step": 10662 }, { "epoch": 0.3113193775364223, "grad_norm": 0.6663123613524496, "learning_rate": 1.5304460665044608e-05, "loss": 0.6618, "step": 10663 }, { "epoch": 0.31134857376426966, "grad_norm": 0.6342540154833393, "learning_rate": 1.530381184103812e-05, "loss": 0.57, "step": 10664 }, { "epoch": 0.311377769992117, "grad_norm": 0.679935040125354, "learning_rate": 1.5303163017031632e-05, "loss": 0.6462, "step": 10665 }, { "epoch": 0.3114069662199644, "grad_norm": 0.6117243381486617, "learning_rate": 1.5302514193025144e-05, "loss": 0.502, "step": 10666 }, { "epoch": 0.31143616244781175, "grad_norm": 0.6775669722184053, "learning_rate": 1.5301865369018656e-05, "loss": 0.6702, "step": 10667 }, { "epoch": 0.3114653586756591, "grad_norm": 0.6389032973859977, "learning_rate": 1.5301216545012168e-05, "loss": 0.6395, "step": 10668 }, { "epoch": 0.31149455490350647, "grad_norm": 0.7017608367371463, "learning_rate": 1.530056772100568e-05, "loss": 0.6939, "step": 10669 }, { "epoch": 0.31152375113135383, "grad_norm": 0.6212285570598441, "learning_rate": 1.5299918896999192e-05, "loss": 0.5765, "step": 10670 }, { "epoch": 0.3115529473592012, "grad_norm": 0.7051208076697854, "learning_rate": 1.5299270072992704e-05, "loss": 0.7149, "step": 10671 }, { "epoch": 0.31158214358704855, "grad_norm": 0.6531954880786811, "learning_rate": 1.5298621248986213e-05, "loss": 0.6688, "step": 10672 }, { "epoch": 0.3116113398148959, "grad_norm": 0.6400489382336652, "learning_rate": 1.5297972424979725e-05, "loss": 0.6159, "step": 10673 }, { "epoch": 0.3116405360427433, "grad_norm": 0.617591939088925, "learning_rate": 1.5297323600973237e-05, "loss": 0.5593, "step": 10674 }, { "epoch": 0.31166973227059064, "grad_norm": 0.6365877645791124, "learning_rate": 1.529667477696675e-05, "loss": 0.5983, "step": 10675 }, { "epoch": 0.311698928498438, "grad_norm": 0.6531367672035379, "learning_rate": 1.529602595296026e-05, "loss": 0.6141, "step": 10676 }, { "epoch": 0.31172812472628536, "grad_norm": 0.6101540905434585, "learning_rate": 1.5295377128953773e-05, "loss": 0.5677, "step": 10677 }, { "epoch": 0.3117573209541327, "grad_norm": 0.6589069350570882, "learning_rate": 1.5294728304947285e-05, "loss": 0.6345, "step": 10678 }, { "epoch": 0.3117865171819801, "grad_norm": 0.6580509709319452, "learning_rate": 1.5294079480940797e-05, "loss": 0.6261, "step": 10679 }, { "epoch": 0.31181571340982744, "grad_norm": 0.6693613575158203, "learning_rate": 1.5293430656934306e-05, "loss": 0.6127, "step": 10680 }, { "epoch": 0.3118449096376748, "grad_norm": 1.2066568134323155, "learning_rate": 1.5292781832927818e-05, "loss": 0.6513, "step": 10681 }, { "epoch": 0.31187410586552217, "grad_norm": 0.6374882425588738, "learning_rate": 1.529213300892133e-05, "loss": 0.5885, "step": 10682 }, { "epoch": 0.3119033020933695, "grad_norm": 0.7021505964923129, "learning_rate": 1.529148418491484e-05, "loss": 0.7184, "step": 10683 }, { "epoch": 0.3119324983212169, "grad_norm": 0.6584994460845798, "learning_rate": 1.5290835360908357e-05, "loss": 0.6383, "step": 10684 }, { "epoch": 0.31196169454906425, "grad_norm": 0.6258493334465364, "learning_rate": 1.529018653690187e-05, "loss": 0.6172, "step": 10685 }, { "epoch": 0.3119908907769116, "grad_norm": 0.6616847272299011, "learning_rate": 1.5289537712895378e-05, "loss": 0.6677, "step": 10686 }, { "epoch": 0.312020087004759, "grad_norm": 0.6786591037229929, "learning_rate": 1.528888888888889e-05, "loss": 0.6558, "step": 10687 }, { "epoch": 0.31204928323260633, "grad_norm": 0.6191656678607526, "learning_rate": 1.5288240064882402e-05, "loss": 0.5807, "step": 10688 }, { "epoch": 0.3120784794604537, "grad_norm": 0.6640024351145742, "learning_rate": 1.5287591240875914e-05, "loss": 0.6642, "step": 10689 }, { "epoch": 0.31210767568830106, "grad_norm": 0.6342362552467564, "learning_rate": 1.5286942416869426e-05, "loss": 0.6011, "step": 10690 }, { "epoch": 0.3121368719161484, "grad_norm": 0.6860209500281056, "learning_rate": 1.5286293592862938e-05, "loss": 0.6384, "step": 10691 }, { "epoch": 0.3121660681439958, "grad_norm": 0.6100530587781994, "learning_rate": 1.528564476885645e-05, "loss": 0.5299, "step": 10692 }, { "epoch": 0.31219526437184314, "grad_norm": 0.6390642683483045, "learning_rate": 1.5284995944849962e-05, "loss": 0.5838, "step": 10693 }, { "epoch": 0.3122244605996905, "grad_norm": 0.668395770218627, "learning_rate": 1.5284347120843474e-05, "loss": 0.6207, "step": 10694 }, { "epoch": 0.31225365682753786, "grad_norm": 0.648510161911493, "learning_rate": 1.5283698296836983e-05, "loss": 0.603, "step": 10695 }, { "epoch": 0.3122828530553852, "grad_norm": 0.6572066995366103, "learning_rate": 1.5283049472830495e-05, "loss": 0.6381, "step": 10696 }, { "epoch": 0.3123120492832326, "grad_norm": 0.6269268758280689, "learning_rate": 1.5282400648824007e-05, "loss": 0.6071, "step": 10697 }, { "epoch": 0.31234124551107995, "grad_norm": 0.6676200253686686, "learning_rate": 1.528175182481752e-05, "loss": 0.6187, "step": 10698 }, { "epoch": 0.3123704417389273, "grad_norm": 0.6772855408939489, "learning_rate": 1.528110300081103e-05, "loss": 0.6747, "step": 10699 }, { "epoch": 0.31239963796677467, "grad_norm": 0.7210000658055616, "learning_rate": 1.5280454176804543e-05, "loss": 0.6967, "step": 10700 }, { "epoch": 0.31242883419462203, "grad_norm": 0.6451246168597476, "learning_rate": 1.5279805352798055e-05, "loss": 0.6219, "step": 10701 }, { "epoch": 0.3124580304224694, "grad_norm": 0.660314665756386, "learning_rate": 1.5279156528791567e-05, "loss": 0.6234, "step": 10702 }, { "epoch": 0.31248722665031675, "grad_norm": 0.6628079008382579, "learning_rate": 1.527850770478508e-05, "loss": 0.6646, "step": 10703 }, { "epoch": 0.31251642287816417, "grad_norm": 0.6501192705281458, "learning_rate": 1.527785888077859e-05, "loss": 0.6274, "step": 10704 }, { "epoch": 0.31254561910601153, "grad_norm": 0.7396133777413422, "learning_rate": 1.5277210056772103e-05, "loss": 0.7171, "step": 10705 }, { "epoch": 0.3125748153338589, "grad_norm": 0.6129527889546453, "learning_rate": 1.5276561232765615e-05, "loss": 0.5533, "step": 10706 }, { "epoch": 0.31260401156170625, "grad_norm": 0.6284968779694443, "learning_rate": 1.5275912408759127e-05, "loss": 0.5876, "step": 10707 }, { "epoch": 0.3126332077895536, "grad_norm": 0.6354394358017246, "learning_rate": 1.527526358475264e-05, "loss": 0.5702, "step": 10708 }, { "epoch": 0.312662404017401, "grad_norm": 0.6766608279703258, "learning_rate": 1.527461476074615e-05, "loss": 0.7376, "step": 10709 }, { "epoch": 0.31269160024524834, "grad_norm": 0.6943213828438476, "learning_rate": 1.527396593673966e-05, "loss": 0.6783, "step": 10710 }, { "epoch": 0.3127207964730957, "grad_norm": 0.6239953727338283, "learning_rate": 1.527331711273317e-05, "loss": 0.59, "step": 10711 }, { "epoch": 0.31274999270094306, "grad_norm": 0.657122587353226, "learning_rate": 1.5272668288726684e-05, "loss": 0.668, "step": 10712 }, { "epoch": 0.3127791889287904, "grad_norm": 0.6542026196967307, "learning_rate": 1.5272019464720196e-05, "loss": 0.6265, "step": 10713 }, { "epoch": 0.3128083851566378, "grad_norm": 0.6568987217572502, "learning_rate": 1.5271370640713708e-05, "loss": 0.6644, "step": 10714 }, { "epoch": 0.31283758138448514, "grad_norm": 0.6669933680850701, "learning_rate": 1.527072181670722e-05, "loss": 0.6834, "step": 10715 }, { "epoch": 0.3128667776123325, "grad_norm": 0.7098662375055185, "learning_rate": 1.5270072992700732e-05, "loss": 0.7129, "step": 10716 }, { "epoch": 0.31289597384017986, "grad_norm": 0.712569198824068, "learning_rate": 1.5269424168694244e-05, "loss": 0.6259, "step": 10717 }, { "epoch": 0.3129251700680272, "grad_norm": 0.6892613317464038, "learning_rate": 1.5268775344687752e-05, "loss": 0.6903, "step": 10718 }, { "epoch": 0.3129543662958746, "grad_norm": 0.6658578791845695, "learning_rate": 1.5268126520681264e-05, "loss": 0.6211, "step": 10719 }, { "epoch": 0.31298356252372195, "grad_norm": 0.6594481174949214, "learning_rate": 1.5267477696674776e-05, "loss": 0.6492, "step": 10720 }, { "epoch": 0.3130127587515693, "grad_norm": 0.6526438744789109, "learning_rate": 1.526682887266829e-05, "loss": 0.6568, "step": 10721 }, { "epoch": 0.31304195497941667, "grad_norm": 0.6475511994214758, "learning_rate": 1.5266180048661804e-05, "loss": 0.593, "step": 10722 }, { "epoch": 0.31307115120726403, "grad_norm": 0.7391544780462226, "learning_rate": 1.5265531224655316e-05, "loss": 0.6168, "step": 10723 }, { "epoch": 0.3131003474351114, "grad_norm": 0.6587883573772911, "learning_rate": 1.5264882400648825e-05, "loss": 0.657, "step": 10724 }, { "epoch": 0.31312954366295875, "grad_norm": 0.642224295305335, "learning_rate": 1.5264233576642337e-05, "loss": 0.6105, "step": 10725 }, { "epoch": 0.3131587398908061, "grad_norm": 0.6451467873192914, "learning_rate": 1.526358475263585e-05, "loss": 0.5917, "step": 10726 }, { "epoch": 0.3131879361186535, "grad_norm": 0.6384235010283252, "learning_rate": 1.526293592862936e-05, "loss": 0.6208, "step": 10727 }, { "epoch": 0.31321713234650084, "grad_norm": 0.6610331986737171, "learning_rate": 1.5262287104622873e-05, "loss": 0.6874, "step": 10728 }, { "epoch": 0.3132463285743482, "grad_norm": 0.6614583229344425, "learning_rate": 1.5261638280616385e-05, "loss": 0.6172, "step": 10729 }, { "epoch": 0.31327552480219556, "grad_norm": 0.6177192348010063, "learning_rate": 1.5260989456609897e-05, "loss": 0.5105, "step": 10730 }, { "epoch": 0.3133047210300429, "grad_norm": 0.6465863118609566, "learning_rate": 1.526034063260341e-05, "loss": 0.5912, "step": 10731 }, { "epoch": 0.3133339172578903, "grad_norm": 0.6572688489480283, "learning_rate": 1.525969180859692e-05, "loss": 0.6162, "step": 10732 }, { "epoch": 0.31336311348573764, "grad_norm": 0.7607079956805101, "learning_rate": 1.525904298459043e-05, "loss": 0.7784, "step": 10733 }, { "epoch": 0.313392309713585, "grad_norm": 0.6337516648819955, "learning_rate": 1.5258394160583943e-05, "loss": 0.6392, "step": 10734 }, { "epoch": 0.31342150594143237, "grad_norm": 0.6544010403029908, "learning_rate": 1.5257745336577453e-05, "loss": 0.6243, "step": 10735 }, { "epoch": 0.3134507021692797, "grad_norm": 0.6458007106465843, "learning_rate": 1.5257096512570965e-05, "loss": 0.5891, "step": 10736 }, { "epoch": 0.3134798983971271, "grad_norm": 0.6619804446191044, "learning_rate": 1.5256447688564478e-05, "loss": 0.6344, "step": 10737 }, { "epoch": 0.31350909462497445, "grad_norm": 0.6963682840353722, "learning_rate": 1.525579886455799e-05, "loss": 0.6421, "step": 10738 }, { "epoch": 0.3135382908528218, "grad_norm": 0.6164159414383266, "learning_rate": 1.5255150040551502e-05, "loss": 0.5946, "step": 10739 }, { "epoch": 0.31356748708066917, "grad_norm": 0.6192358004282957, "learning_rate": 1.5254501216545012e-05, "loss": 0.5909, "step": 10740 }, { "epoch": 0.31359668330851653, "grad_norm": 0.6137675140576582, "learning_rate": 1.5253852392538526e-05, "loss": 0.5576, "step": 10741 }, { "epoch": 0.3136258795363639, "grad_norm": 0.6602367145244471, "learning_rate": 1.5253203568532038e-05, "loss": 0.663, "step": 10742 }, { "epoch": 0.31365507576421126, "grad_norm": 0.6764479839420509, "learning_rate": 1.525255474452555e-05, "loss": 0.6623, "step": 10743 }, { "epoch": 0.3136842719920586, "grad_norm": 0.6571951912437038, "learning_rate": 1.5251905920519062e-05, "loss": 0.696, "step": 10744 }, { "epoch": 0.313713468219906, "grad_norm": 0.6509157380822532, "learning_rate": 1.5251257096512572e-05, "loss": 0.6274, "step": 10745 }, { "epoch": 0.31374266444775334, "grad_norm": 0.6670519978133764, "learning_rate": 1.5250608272506084e-05, "loss": 0.6621, "step": 10746 }, { "epoch": 0.3137718606756007, "grad_norm": 0.5915791860125487, "learning_rate": 1.5249959448499596e-05, "loss": 0.5355, "step": 10747 }, { "epoch": 0.31380105690344806, "grad_norm": 0.6885029952448432, "learning_rate": 1.5249310624493108e-05, "loss": 0.662, "step": 10748 }, { "epoch": 0.3138302531312954, "grad_norm": 0.7102899029041108, "learning_rate": 1.524866180048662e-05, "loss": 0.7202, "step": 10749 }, { "epoch": 0.3138594493591428, "grad_norm": 0.656539060825472, "learning_rate": 1.524801297648013e-05, "loss": 0.6292, "step": 10750 }, { "epoch": 0.31388864558699014, "grad_norm": 0.6404681968156042, "learning_rate": 1.5247364152473643e-05, "loss": 0.6429, "step": 10751 }, { "epoch": 0.3139178418148375, "grad_norm": 0.6457841430533848, "learning_rate": 1.5246715328467155e-05, "loss": 0.6059, "step": 10752 }, { "epoch": 0.31394703804268487, "grad_norm": 0.7062500704850366, "learning_rate": 1.5246066504460667e-05, "loss": 0.6768, "step": 10753 }, { "epoch": 0.31397623427053223, "grad_norm": 0.7265493554372906, "learning_rate": 1.5245417680454177e-05, "loss": 0.6373, "step": 10754 }, { "epoch": 0.3140054304983796, "grad_norm": 0.6745820828096423, "learning_rate": 1.5244768856447689e-05, "loss": 0.6962, "step": 10755 }, { "epoch": 0.31403462672622695, "grad_norm": 0.6254870305155925, "learning_rate": 1.5244120032441201e-05, "loss": 0.6257, "step": 10756 }, { "epoch": 0.3140638229540743, "grad_norm": 0.5900536959046685, "learning_rate": 1.5243471208434713e-05, "loss": 0.5504, "step": 10757 }, { "epoch": 0.3140930191819217, "grad_norm": 0.6208547965892226, "learning_rate": 1.5242822384428223e-05, "loss": 0.558, "step": 10758 }, { "epoch": 0.31412221540976903, "grad_norm": 0.6621357933381458, "learning_rate": 1.5242173560421735e-05, "loss": 0.644, "step": 10759 }, { "epoch": 0.3141514116376164, "grad_norm": 0.6566482682746781, "learning_rate": 1.5241524736415249e-05, "loss": 0.6197, "step": 10760 }, { "epoch": 0.31418060786546376, "grad_norm": 0.6455843394850995, "learning_rate": 1.5240875912408761e-05, "loss": 0.6469, "step": 10761 }, { "epoch": 0.3142098040933111, "grad_norm": 0.7007493171252266, "learning_rate": 1.5240227088402273e-05, "loss": 0.6507, "step": 10762 }, { "epoch": 0.3142390003211585, "grad_norm": 0.6375671113680197, "learning_rate": 1.5239578264395785e-05, "loss": 0.5096, "step": 10763 }, { "epoch": 0.3142681965490059, "grad_norm": 0.6671563865418645, "learning_rate": 1.5238929440389295e-05, "loss": 0.5594, "step": 10764 }, { "epoch": 0.31429739277685326, "grad_norm": 0.6780488479064615, "learning_rate": 1.5238280616382807e-05, "loss": 0.7216, "step": 10765 }, { "epoch": 0.3143265890047006, "grad_norm": 0.6579474408884847, "learning_rate": 1.523763179237632e-05, "loss": 0.6002, "step": 10766 }, { "epoch": 0.314355785232548, "grad_norm": 0.6307399334102995, "learning_rate": 1.5236982968369832e-05, "loss": 0.5729, "step": 10767 }, { "epoch": 0.31438498146039534, "grad_norm": 0.7024670433067881, "learning_rate": 1.5236334144363344e-05, "loss": 0.7358, "step": 10768 }, { "epoch": 0.3144141776882427, "grad_norm": 0.6982146704671752, "learning_rate": 1.5235685320356854e-05, "loss": 0.6583, "step": 10769 }, { "epoch": 0.31444337391609006, "grad_norm": 0.6666629791596997, "learning_rate": 1.5235036496350366e-05, "loss": 0.6647, "step": 10770 }, { "epoch": 0.3144725701439374, "grad_norm": 0.6904589897788956, "learning_rate": 1.5234387672343878e-05, "loss": 0.5745, "step": 10771 }, { "epoch": 0.3145017663717848, "grad_norm": 0.6376831938854629, "learning_rate": 1.523373884833739e-05, "loss": 0.6076, "step": 10772 }, { "epoch": 0.31453096259963215, "grad_norm": 0.6100619725128805, "learning_rate": 1.52330900243309e-05, "loss": 0.5648, "step": 10773 }, { "epoch": 0.3145601588274795, "grad_norm": 0.6077080320924046, "learning_rate": 1.5232441200324412e-05, "loss": 0.5881, "step": 10774 }, { "epoch": 0.31458935505532687, "grad_norm": 0.6257230132778925, "learning_rate": 1.5231792376317924e-05, "loss": 0.6143, "step": 10775 }, { "epoch": 0.31461855128317423, "grad_norm": 0.8372044626694372, "learning_rate": 1.5231143552311436e-05, "loss": 0.6959, "step": 10776 }, { "epoch": 0.3146477475110216, "grad_norm": 0.6484188281123857, "learning_rate": 1.5230494728304947e-05, "loss": 0.657, "step": 10777 }, { "epoch": 0.31467694373886895, "grad_norm": 0.6397722174777778, "learning_rate": 1.5229845904298459e-05, "loss": 0.6526, "step": 10778 }, { "epoch": 0.3147061399667163, "grad_norm": 0.6383917298619376, "learning_rate": 1.5229197080291972e-05, "loss": 0.5709, "step": 10779 }, { "epoch": 0.3147353361945637, "grad_norm": 0.6452330021313605, "learning_rate": 1.5228548256285485e-05, "loss": 0.6129, "step": 10780 }, { "epoch": 0.31476453242241104, "grad_norm": 0.6646179101788631, "learning_rate": 1.5227899432278997e-05, "loss": 0.646, "step": 10781 }, { "epoch": 0.3147937286502584, "grad_norm": 0.6758650315641399, "learning_rate": 1.5227250608272509e-05, "loss": 0.6421, "step": 10782 }, { "epoch": 0.31482292487810576, "grad_norm": 0.6383457774237571, "learning_rate": 1.5226601784266019e-05, "loss": 0.6313, "step": 10783 }, { "epoch": 0.3148521211059531, "grad_norm": 0.727603151880523, "learning_rate": 1.5225952960259531e-05, "loss": 0.8002, "step": 10784 }, { "epoch": 0.3148813173338005, "grad_norm": 0.745320127461281, "learning_rate": 1.5225304136253043e-05, "loss": 0.739, "step": 10785 }, { "epoch": 0.31491051356164784, "grad_norm": 0.6733097497935235, "learning_rate": 1.5224655312246555e-05, "loss": 0.6953, "step": 10786 }, { "epoch": 0.3149397097894952, "grad_norm": 0.6786590044281543, "learning_rate": 1.5224006488240067e-05, "loss": 0.6593, "step": 10787 }, { "epoch": 0.31496890601734256, "grad_norm": 0.6891510163182619, "learning_rate": 1.5223357664233577e-05, "loss": 0.6901, "step": 10788 }, { "epoch": 0.3149981022451899, "grad_norm": 0.6677542680166587, "learning_rate": 1.522270884022709e-05, "loss": 0.6541, "step": 10789 }, { "epoch": 0.3150272984730373, "grad_norm": 0.6561403210618546, "learning_rate": 1.5222060016220601e-05, "loss": 0.6007, "step": 10790 }, { "epoch": 0.31505649470088465, "grad_norm": 0.643336837401761, "learning_rate": 1.5221411192214113e-05, "loss": 0.6316, "step": 10791 }, { "epoch": 0.315085690928732, "grad_norm": 0.6250517106363915, "learning_rate": 1.5220762368207624e-05, "loss": 0.5828, "step": 10792 }, { "epoch": 0.31511488715657937, "grad_norm": 0.6873771792435278, "learning_rate": 1.5220113544201136e-05, "loss": 0.6981, "step": 10793 }, { "epoch": 0.31514408338442673, "grad_norm": 0.662515631332877, "learning_rate": 1.5219464720194648e-05, "loss": 0.6362, "step": 10794 }, { "epoch": 0.3151732796122741, "grad_norm": 0.6850010825934182, "learning_rate": 1.521881589618816e-05, "loss": 0.6836, "step": 10795 }, { "epoch": 0.31520247584012145, "grad_norm": 0.6868909509533265, "learning_rate": 1.521816707218167e-05, "loss": 0.6304, "step": 10796 }, { "epoch": 0.3152316720679688, "grad_norm": 0.6583093137172232, "learning_rate": 1.5217518248175182e-05, "loss": 0.6647, "step": 10797 }, { "epoch": 0.3152608682958162, "grad_norm": 0.6183533196480949, "learning_rate": 1.5216869424168696e-05, "loss": 0.6051, "step": 10798 }, { "epoch": 0.31529006452366354, "grad_norm": 0.6804053926199961, "learning_rate": 1.5216220600162208e-05, "loss": 0.6652, "step": 10799 }, { "epoch": 0.3153192607515109, "grad_norm": 0.7018468329382244, "learning_rate": 1.521557177615572e-05, "loss": 0.634, "step": 10800 }, { "epoch": 0.31534845697935826, "grad_norm": 0.6898102592242596, "learning_rate": 1.5214922952149232e-05, "loss": 0.7141, "step": 10801 }, { "epoch": 0.3153776532072056, "grad_norm": 0.6428345319251723, "learning_rate": 1.5214274128142742e-05, "loss": 0.5978, "step": 10802 }, { "epoch": 0.315406849435053, "grad_norm": 0.6937336727285875, "learning_rate": 1.5213625304136254e-05, "loss": 0.6962, "step": 10803 }, { "epoch": 0.31543604566290034, "grad_norm": 0.6274922894136632, "learning_rate": 1.5212976480129766e-05, "loss": 0.5863, "step": 10804 }, { "epoch": 0.3154652418907477, "grad_norm": 0.6853699378207532, "learning_rate": 1.5212327656123278e-05, "loss": 0.6899, "step": 10805 }, { "epoch": 0.31549443811859507, "grad_norm": 0.6346345462722974, "learning_rate": 1.521167883211679e-05, "loss": 0.6405, "step": 10806 }, { "epoch": 0.3155236343464424, "grad_norm": 0.6082793012626916, "learning_rate": 1.52110300081103e-05, "loss": 0.5652, "step": 10807 }, { "epoch": 0.3155528305742898, "grad_norm": 0.6523717304872831, "learning_rate": 1.5210381184103813e-05, "loss": 0.616, "step": 10808 }, { "epoch": 0.31558202680213715, "grad_norm": 0.6311193389442451, "learning_rate": 1.5209732360097325e-05, "loss": 0.6298, "step": 10809 }, { "epoch": 0.3156112230299845, "grad_norm": 0.6331392276044093, "learning_rate": 1.5209083536090837e-05, "loss": 0.5802, "step": 10810 }, { "epoch": 0.3156404192578319, "grad_norm": 0.6813659553142225, "learning_rate": 1.5208434712084347e-05, "loss": 0.669, "step": 10811 }, { "epoch": 0.31566961548567923, "grad_norm": 0.7128748036412964, "learning_rate": 1.520778588807786e-05, "loss": 0.6894, "step": 10812 }, { "epoch": 0.3156988117135266, "grad_norm": 0.6415019647700086, "learning_rate": 1.5207137064071371e-05, "loss": 0.6059, "step": 10813 }, { "epoch": 0.31572800794137396, "grad_norm": 0.6913559074054884, "learning_rate": 1.5206488240064883e-05, "loss": 0.617, "step": 10814 }, { "epoch": 0.3157572041692213, "grad_norm": 0.6350153838886109, "learning_rate": 1.5205839416058394e-05, "loss": 0.5799, "step": 10815 }, { "epoch": 0.3157864003970687, "grad_norm": 0.6976498834141982, "learning_rate": 1.5205190592051906e-05, "loss": 0.6772, "step": 10816 }, { "epoch": 0.31581559662491604, "grad_norm": 0.7135698929483151, "learning_rate": 1.520454176804542e-05, "loss": 0.6817, "step": 10817 }, { "epoch": 0.3158447928527634, "grad_norm": 0.6454620588199151, "learning_rate": 1.5203892944038931e-05, "loss": 0.6217, "step": 10818 }, { "epoch": 0.31587398908061076, "grad_norm": 0.6062434547559196, "learning_rate": 1.5203244120032443e-05, "loss": 0.5459, "step": 10819 }, { "epoch": 0.3159031853084581, "grad_norm": 0.67864983401436, "learning_rate": 1.5202595296025955e-05, "loss": 0.7242, "step": 10820 }, { "epoch": 0.3159323815363055, "grad_norm": 0.6181891106069374, "learning_rate": 1.5201946472019466e-05, "loss": 0.5458, "step": 10821 }, { "epoch": 0.31596157776415285, "grad_norm": 0.6685099767122407, "learning_rate": 1.5201297648012978e-05, "loss": 0.6222, "step": 10822 }, { "epoch": 0.3159907739920002, "grad_norm": 0.6700329125789342, "learning_rate": 1.520064882400649e-05, "loss": 0.6524, "step": 10823 }, { "epoch": 0.31601997021984757, "grad_norm": 0.6859360934726726, "learning_rate": 1.5200000000000002e-05, "loss": 0.6958, "step": 10824 }, { "epoch": 0.316049166447695, "grad_norm": 0.6652325933343544, "learning_rate": 1.5199351175993514e-05, "loss": 0.5992, "step": 10825 }, { "epoch": 0.31607836267554235, "grad_norm": 0.6966437348829674, "learning_rate": 1.5198702351987024e-05, "loss": 0.6967, "step": 10826 }, { "epoch": 0.3161075589033897, "grad_norm": 0.6601085221134538, "learning_rate": 1.5198053527980536e-05, "loss": 0.6626, "step": 10827 }, { "epoch": 0.31613675513123707, "grad_norm": 0.6565142336237733, "learning_rate": 1.5197404703974048e-05, "loss": 0.6275, "step": 10828 }, { "epoch": 0.31616595135908443, "grad_norm": 0.7406731028358056, "learning_rate": 1.519675587996756e-05, "loss": 0.6466, "step": 10829 }, { "epoch": 0.3161951475869318, "grad_norm": 0.7264493306339365, "learning_rate": 1.519610705596107e-05, "loss": 0.7475, "step": 10830 }, { "epoch": 0.31622434381477915, "grad_norm": 0.6910590537488513, "learning_rate": 1.5195458231954583e-05, "loss": 0.6443, "step": 10831 }, { "epoch": 0.3162535400426265, "grad_norm": 0.6454441099196052, "learning_rate": 1.5194809407948095e-05, "loss": 0.5833, "step": 10832 }, { "epoch": 0.3162827362704739, "grad_norm": 0.672257971917054, "learning_rate": 1.5194160583941607e-05, "loss": 0.6347, "step": 10833 }, { "epoch": 0.31631193249832124, "grad_norm": 0.6983309961014696, "learning_rate": 1.5193511759935117e-05, "loss": 0.6624, "step": 10834 }, { "epoch": 0.3163411287261686, "grad_norm": 0.6537562389103917, "learning_rate": 1.5192862935928632e-05, "loss": 0.5636, "step": 10835 }, { "epoch": 0.31637032495401596, "grad_norm": 0.6238458081836079, "learning_rate": 1.5192214111922143e-05, "loss": 0.6132, "step": 10836 }, { "epoch": 0.3163995211818633, "grad_norm": 0.6264257999515371, "learning_rate": 1.5191565287915655e-05, "loss": 0.5724, "step": 10837 }, { "epoch": 0.3164287174097107, "grad_norm": 0.6880639434235828, "learning_rate": 1.5190916463909167e-05, "loss": 0.672, "step": 10838 }, { "epoch": 0.31645791363755804, "grad_norm": 0.7023014067440206, "learning_rate": 1.5190267639902679e-05, "loss": 0.6177, "step": 10839 }, { "epoch": 0.3164871098654054, "grad_norm": 0.5975430379522841, "learning_rate": 1.518961881589619e-05, "loss": 0.562, "step": 10840 }, { "epoch": 0.31651630609325276, "grad_norm": 0.6460018653693569, "learning_rate": 1.5188969991889701e-05, "loss": 0.6793, "step": 10841 }, { "epoch": 0.3165455023211001, "grad_norm": 0.6552976888283698, "learning_rate": 1.5188321167883213e-05, "loss": 0.6395, "step": 10842 }, { "epoch": 0.3165746985489475, "grad_norm": 0.7292732980093325, "learning_rate": 1.5187672343876725e-05, "loss": 0.6713, "step": 10843 }, { "epoch": 0.31660389477679485, "grad_norm": 0.6370526921320916, "learning_rate": 1.5187023519870237e-05, "loss": 0.6173, "step": 10844 }, { "epoch": 0.3166330910046422, "grad_norm": 0.6487256290422228, "learning_rate": 1.5186374695863748e-05, "loss": 0.6366, "step": 10845 }, { "epoch": 0.31666228723248957, "grad_norm": 0.6798517220115151, "learning_rate": 1.518572587185726e-05, "loss": 0.6658, "step": 10846 }, { "epoch": 0.31669148346033693, "grad_norm": 0.5904685244088136, "learning_rate": 1.5185077047850772e-05, "loss": 0.5987, "step": 10847 }, { "epoch": 0.3167206796881843, "grad_norm": 0.6201692774755438, "learning_rate": 1.5184428223844284e-05, "loss": 0.5943, "step": 10848 }, { "epoch": 0.31674987591603165, "grad_norm": 0.6717808781903756, "learning_rate": 1.5183779399837794e-05, "loss": 0.6531, "step": 10849 }, { "epoch": 0.316779072143879, "grad_norm": 0.6594441339795596, "learning_rate": 1.5183130575831306e-05, "loss": 0.6802, "step": 10850 }, { "epoch": 0.3168082683717264, "grad_norm": 0.6415844356885291, "learning_rate": 1.5182481751824818e-05, "loss": 0.5798, "step": 10851 }, { "epoch": 0.31683746459957374, "grad_norm": 0.6945043797674324, "learning_rate": 1.518183292781833e-05, "loss": 0.6675, "step": 10852 }, { "epoch": 0.3168666608274211, "grad_norm": 0.6944049465180505, "learning_rate": 1.518118410381184e-05, "loss": 0.6934, "step": 10853 }, { "epoch": 0.31689585705526846, "grad_norm": 0.6514361020508702, "learning_rate": 1.5180535279805356e-05, "loss": 0.6416, "step": 10854 }, { "epoch": 0.3169250532831158, "grad_norm": 0.6630427431455815, "learning_rate": 1.5179886455798866e-05, "loss": 0.6201, "step": 10855 }, { "epoch": 0.3169542495109632, "grad_norm": 0.6516859258053969, "learning_rate": 1.5179237631792378e-05, "loss": 0.6248, "step": 10856 }, { "epoch": 0.31698344573881054, "grad_norm": 0.7110509646232113, "learning_rate": 1.517858880778589e-05, "loss": 0.7628, "step": 10857 }, { "epoch": 0.3170126419666579, "grad_norm": 0.7223133247047214, "learning_rate": 1.5177939983779402e-05, "loss": 0.7141, "step": 10858 }, { "epoch": 0.31704183819450527, "grad_norm": 0.6302629597744369, "learning_rate": 1.5177291159772913e-05, "loss": 0.6014, "step": 10859 }, { "epoch": 0.3170710344223526, "grad_norm": 0.6672592169733788, "learning_rate": 1.5176642335766425e-05, "loss": 0.6722, "step": 10860 }, { "epoch": 0.3171002306502, "grad_norm": 0.6928873728011056, "learning_rate": 1.5175993511759937e-05, "loss": 0.6414, "step": 10861 }, { "epoch": 0.31712942687804735, "grad_norm": 0.6061605897553802, "learning_rate": 1.5175344687753449e-05, "loss": 0.5467, "step": 10862 }, { "epoch": 0.3171586231058947, "grad_norm": 0.6458292969246515, "learning_rate": 1.517469586374696e-05, "loss": 0.6054, "step": 10863 }, { "epoch": 0.31718781933374207, "grad_norm": 0.6556359685443085, "learning_rate": 1.5174047039740471e-05, "loss": 0.6358, "step": 10864 }, { "epoch": 0.31721701556158943, "grad_norm": 0.6889727448732047, "learning_rate": 1.5173398215733983e-05, "loss": 0.6953, "step": 10865 }, { "epoch": 0.3172462117894368, "grad_norm": 0.6614856012677265, "learning_rate": 1.5172749391727495e-05, "loss": 0.6376, "step": 10866 }, { "epoch": 0.31727540801728416, "grad_norm": 0.6062618596617787, "learning_rate": 1.5172100567721007e-05, "loss": 0.5566, "step": 10867 }, { "epoch": 0.3173046042451315, "grad_norm": 0.6470194570549189, "learning_rate": 1.5171451743714517e-05, "loss": 0.5836, "step": 10868 }, { "epoch": 0.3173338004729789, "grad_norm": 0.6487075344731592, "learning_rate": 1.517080291970803e-05, "loss": 0.5843, "step": 10869 }, { "epoch": 0.31736299670082624, "grad_norm": 0.6792032616500099, "learning_rate": 1.5170154095701542e-05, "loss": 0.6701, "step": 10870 }, { "epoch": 0.3173921929286736, "grad_norm": 0.6416174960861578, "learning_rate": 1.5169505271695054e-05, "loss": 0.6211, "step": 10871 }, { "epoch": 0.31742138915652096, "grad_norm": 0.6578210988117712, "learning_rate": 1.5168856447688564e-05, "loss": 0.645, "step": 10872 }, { "epoch": 0.3174505853843683, "grad_norm": 0.7024098939153554, "learning_rate": 1.516820762368208e-05, "loss": 0.7464, "step": 10873 }, { "epoch": 0.3174797816122157, "grad_norm": 0.672861498098201, "learning_rate": 1.516755879967559e-05, "loss": 0.6784, "step": 10874 }, { "epoch": 0.31750897784006304, "grad_norm": 0.6661567242485495, "learning_rate": 1.5166909975669102e-05, "loss": 0.6647, "step": 10875 }, { "epoch": 0.3175381740679104, "grad_norm": 0.6645625616064728, "learning_rate": 1.5166261151662614e-05, "loss": 0.6599, "step": 10876 }, { "epoch": 0.31756737029575777, "grad_norm": 0.6663749322528446, "learning_rate": 1.5165612327656126e-05, "loss": 0.6685, "step": 10877 }, { "epoch": 0.31759656652360513, "grad_norm": 0.6280166464154142, "learning_rate": 1.5164963503649636e-05, "loss": 0.561, "step": 10878 }, { "epoch": 0.3176257627514525, "grad_norm": 0.6574272237297936, "learning_rate": 1.5164314679643148e-05, "loss": 0.6092, "step": 10879 }, { "epoch": 0.31765495897929985, "grad_norm": 0.6137836462354658, "learning_rate": 1.516366585563666e-05, "loss": 0.5713, "step": 10880 }, { "epoch": 0.3176841552071472, "grad_norm": 0.6357026908223946, "learning_rate": 1.5163017031630172e-05, "loss": 0.5882, "step": 10881 }, { "epoch": 0.3177133514349946, "grad_norm": 0.61928608480349, "learning_rate": 1.5162368207623682e-05, "loss": 0.5273, "step": 10882 }, { "epoch": 0.31774254766284193, "grad_norm": 0.6653099784169969, "learning_rate": 1.5161719383617195e-05, "loss": 0.6547, "step": 10883 }, { "epoch": 0.3177717438906893, "grad_norm": 0.6533250084420291, "learning_rate": 1.5161070559610707e-05, "loss": 0.6347, "step": 10884 }, { "epoch": 0.3178009401185367, "grad_norm": 0.6367267768464531, "learning_rate": 1.5160421735604219e-05, "loss": 0.6204, "step": 10885 }, { "epoch": 0.3178301363463841, "grad_norm": 0.6800769583296996, "learning_rate": 1.515977291159773e-05, "loss": 0.682, "step": 10886 }, { "epoch": 0.31785933257423143, "grad_norm": 0.6230801862609364, "learning_rate": 1.5159124087591241e-05, "loss": 0.565, "step": 10887 }, { "epoch": 0.3178885288020788, "grad_norm": 0.675024487711066, "learning_rate": 1.5158475263584753e-05, "loss": 0.6911, "step": 10888 }, { "epoch": 0.31791772502992616, "grad_norm": 0.6698036045586182, "learning_rate": 1.5157826439578265e-05, "loss": 0.6469, "step": 10889 }, { "epoch": 0.3179469212577735, "grad_norm": 0.6435033863522974, "learning_rate": 1.5157177615571777e-05, "loss": 0.61, "step": 10890 }, { "epoch": 0.3179761174856209, "grad_norm": 0.6408489265992192, "learning_rate": 1.5156528791565287e-05, "loss": 0.5991, "step": 10891 }, { "epoch": 0.31800531371346824, "grad_norm": 0.7417267003795539, "learning_rate": 1.5155879967558803e-05, "loss": 0.7379, "step": 10892 }, { "epoch": 0.3180345099413156, "grad_norm": 0.6208904820487127, "learning_rate": 1.5155231143552313e-05, "loss": 0.5862, "step": 10893 }, { "epoch": 0.31806370616916296, "grad_norm": 0.6481299316880893, "learning_rate": 1.5154582319545825e-05, "loss": 0.5794, "step": 10894 }, { "epoch": 0.3180929023970103, "grad_norm": 0.7137691789648973, "learning_rate": 1.5153933495539337e-05, "loss": 0.7119, "step": 10895 }, { "epoch": 0.3181220986248577, "grad_norm": 0.6217414895931713, "learning_rate": 1.515328467153285e-05, "loss": 0.5699, "step": 10896 }, { "epoch": 0.31815129485270505, "grad_norm": 0.6950249475763359, "learning_rate": 1.515263584752636e-05, "loss": 0.6721, "step": 10897 }, { "epoch": 0.3181804910805524, "grad_norm": 0.6347482160129645, "learning_rate": 1.5151987023519872e-05, "loss": 0.6282, "step": 10898 }, { "epoch": 0.31820968730839977, "grad_norm": 0.6745158138686308, "learning_rate": 1.5151338199513384e-05, "loss": 0.6394, "step": 10899 }, { "epoch": 0.31823888353624713, "grad_norm": 0.6723791117644243, "learning_rate": 1.5150689375506896e-05, "loss": 0.6443, "step": 10900 }, { "epoch": 0.3182680797640945, "grad_norm": 0.6781073559133666, "learning_rate": 1.5150040551500406e-05, "loss": 0.6303, "step": 10901 }, { "epoch": 0.31829727599194185, "grad_norm": 0.7615829910851151, "learning_rate": 1.5149391727493918e-05, "loss": 0.7303, "step": 10902 }, { "epoch": 0.3183264722197892, "grad_norm": 0.6212160724262208, "learning_rate": 1.514874290348743e-05, "loss": 0.5442, "step": 10903 }, { "epoch": 0.3183556684476366, "grad_norm": 0.6871477363977939, "learning_rate": 1.5148094079480942e-05, "loss": 0.6937, "step": 10904 }, { "epoch": 0.31838486467548394, "grad_norm": 0.6892717975223002, "learning_rate": 1.5147445255474454e-05, "loss": 0.6591, "step": 10905 }, { "epoch": 0.3184140609033313, "grad_norm": 0.631429951045919, "learning_rate": 1.5146796431467964e-05, "loss": 0.6208, "step": 10906 }, { "epoch": 0.31844325713117866, "grad_norm": 0.7015573103468249, "learning_rate": 1.5146147607461476e-05, "loss": 0.7057, "step": 10907 }, { "epoch": 0.318472453359026, "grad_norm": 0.6430203755707776, "learning_rate": 1.5145498783454988e-05, "loss": 0.5739, "step": 10908 }, { "epoch": 0.3185016495868734, "grad_norm": 0.6996635584123186, "learning_rate": 1.51448499594485e-05, "loss": 0.7365, "step": 10909 }, { "epoch": 0.31853084581472074, "grad_norm": 0.7788673972893259, "learning_rate": 1.514420113544201e-05, "loss": 0.7457, "step": 10910 }, { "epoch": 0.3185600420425681, "grad_norm": 0.7000081299383432, "learning_rate": 1.5143552311435526e-05, "loss": 0.6251, "step": 10911 }, { "epoch": 0.31858923827041546, "grad_norm": 0.6444966699749961, "learning_rate": 1.5142903487429037e-05, "loss": 0.6324, "step": 10912 }, { "epoch": 0.3186184344982628, "grad_norm": 0.6531320558967393, "learning_rate": 1.5142254663422549e-05, "loss": 0.6373, "step": 10913 }, { "epoch": 0.3186476307261102, "grad_norm": 0.6411983771298835, "learning_rate": 1.514160583941606e-05, "loss": 0.6023, "step": 10914 }, { "epoch": 0.31867682695395755, "grad_norm": 0.6217006020593175, "learning_rate": 1.5140957015409573e-05, "loss": 0.5578, "step": 10915 }, { "epoch": 0.3187060231818049, "grad_norm": 0.655819574536157, "learning_rate": 1.5140308191403083e-05, "loss": 0.6556, "step": 10916 }, { "epoch": 0.31873521940965227, "grad_norm": 0.7553469102837024, "learning_rate": 1.5139659367396595e-05, "loss": 0.6803, "step": 10917 }, { "epoch": 0.31876441563749963, "grad_norm": 0.7167037770783546, "learning_rate": 1.5139010543390107e-05, "loss": 0.7367, "step": 10918 }, { "epoch": 0.318793611865347, "grad_norm": 0.6985357719222952, "learning_rate": 1.5138361719383619e-05, "loss": 0.6737, "step": 10919 }, { "epoch": 0.31882280809319435, "grad_norm": 0.67513723073576, "learning_rate": 1.513771289537713e-05, "loss": 0.6612, "step": 10920 }, { "epoch": 0.3188520043210417, "grad_norm": 0.633917484686429, "learning_rate": 1.5137064071370641e-05, "loss": 0.6366, "step": 10921 }, { "epoch": 0.3188812005488891, "grad_norm": 0.6254499340496498, "learning_rate": 1.5136415247364153e-05, "loss": 0.6022, "step": 10922 }, { "epoch": 0.31891039677673644, "grad_norm": 0.718889380916482, "learning_rate": 1.5135766423357665e-05, "loss": 0.6961, "step": 10923 }, { "epoch": 0.3189395930045838, "grad_norm": 0.700314984611493, "learning_rate": 1.5135117599351177e-05, "loss": 0.6128, "step": 10924 }, { "epoch": 0.31896878923243116, "grad_norm": 0.6433722954708244, "learning_rate": 1.5134468775344688e-05, "loss": 0.5577, "step": 10925 }, { "epoch": 0.3189979854602785, "grad_norm": 0.7147132519799189, "learning_rate": 1.51338199513382e-05, "loss": 0.6871, "step": 10926 }, { "epoch": 0.3190271816881259, "grad_norm": 0.6125750378139778, "learning_rate": 1.5133171127331712e-05, "loss": 0.5933, "step": 10927 }, { "epoch": 0.31905637791597324, "grad_norm": 0.6815380488286416, "learning_rate": 1.5132522303325224e-05, "loss": 0.6351, "step": 10928 }, { "epoch": 0.3190855741438206, "grad_norm": 0.6348344350834662, "learning_rate": 1.5131873479318734e-05, "loss": 0.5962, "step": 10929 }, { "epoch": 0.31911477037166797, "grad_norm": 0.6259531065599657, "learning_rate": 1.513122465531225e-05, "loss": 0.5843, "step": 10930 }, { "epoch": 0.3191439665995153, "grad_norm": 0.6622392433547886, "learning_rate": 1.513057583130576e-05, "loss": 0.6887, "step": 10931 }, { "epoch": 0.3191731628273627, "grad_norm": 0.6522758108336649, "learning_rate": 1.5129927007299272e-05, "loss": 0.6229, "step": 10932 }, { "epoch": 0.31920235905521005, "grad_norm": 0.6228859592608141, "learning_rate": 1.5129278183292784e-05, "loss": 0.556, "step": 10933 }, { "epoch": 0.3192315552830574, "grad_norm": 0.670423836550154, "learning_rate": 1.5128629359286296e-05, "loss": 0.6447, "step": 10934 }, { "epoch": 0.31926075151090477, "grad_norm": 0.6510470791178978, "learning_rate": 1.5127980535279806e-05, "loss": 0.635, "step": 10935 }, { "epoch": 0.31928994773875213, "grad_norm": 0.701174982659826, "learning_rate": 1.5127331711273318e-05, "loss": 0.639, "step": 10936 }, { "epoch": 0.3193191439665995, "grad_norm": 0.7377792841976459, "learning_rate": 1.512668288726683e-05, "loss": 0.7283, "step": 10937 }, { "epoch": 0.31934834019444686, "grad_norm": 0.5958367341599615, "learning_rate": 1.5126034063260342e-05, "loss": 0.5582, "step": 10938 }, { "epoch": 0.3193775364222942, "grad_norm": 0.6094532001257809, "learning_rate": 1.5125385239253853e-05, "loss": 0.5686, "step": 10939 }, { "epoch": 0.3194067326501416, "grad_norm": 0.5958895085321655, "learning_rate": 1.5124736415247365e-05, "loss": 0.534, "step": 10940 }, { "epoch": 0.31943592887798894, "grad_norm": 0.6126112694247948, "learning_rate": 1.5124087591240877e-05, "loss": 0.5233, "step": 10941 }, { "epoch": 0.3194651251058363, "grad_norm": 0.6546365154323281, "learning_rate": 1.5123438767234389e-05, "loss": 0.6187, "step": 10942 }, { "epoch": 0.31949432133368366, "grad_norm": 0.6960825397626188, "learning_rate": 1.5122789943227901e-05, "loss": 0.684, "step": 10943 }, { "epoch": 0.319523517561531, "grad_norm": 0.6564951769142467, "learning_rate": 1.5122141119221411e-05, "loss": 0.6038, "step": 10944 }, { "epoch": 0.31955271378937844, "grad_norm": 0.7050604810779256, "learning_rate": 1.5121492295214923e-05, "loss": 0.6868, "step": 10945 }, { "epoch": 0.3195819100172258, "grad_norm": 0.7000025146764373, "learning_rate": 1.5120843471208435e-05, "loss": 0.6436, "step": 10946 }, { "epoch": 0.31961110624507316, "grad_norm": 0.6671476894871375, "learning_rate": 1.5120194647201947e-05, "loss": 0.6376, "step": 10947 }, { "epoch": 0.3196403024729205, "grad_norm": 0.6883337274807794, "learning_rate": 1.5119545823195458e-05, "loss": 0.7008, "step": 10948 }, { "epoch": 0.3196694987007679, "grad_norm": 0.6741365089443596, "learning_rate": 1.5118896999188973e-05, "loss": 0.6314, "step": 10949 }, { "epoch": 0.31969869492861525, "grad_norm": 0.668798112528017, "learning_rate": 1.5118248175182483e-05, "loss": 0.6608, "step": 10950 }, { "epoch": 0.3197278911564626, "grad_norm": 0.6106265234693147, "learning_rate": 1.5117599351175995e-05, "loss": 0.5793, "step": 10951 }, { "epoch": 0.31975708738430997, "grad_norm": 0.6166517937313976, "learning_rate": 1.5116950527169507e-05, "loss": 0.5675, "step": 10952 }, { "epoch": 0.31978628361215733, "grad_norm": 0.6690619015951709, "learning_rate": 1.511630170316302e-05, "loss": 0.6843, "step": 10953 }, { "epoch": 0.3198154798400047, "grad_norm": 0.6711995463096436, "learning_rate": 1.511565287915653e-05, "loss": 0.6428, "step": 10954 }, { "epoch": 0.31984467606785205, "grad_norm": 0.596297893799272, "learning_rate": 1.5115004055150042e-05, "loss": 0.5536, "step": 10955 }, { "epoch": 0.3198738722956994, "grad_norm": 0.6757391416362449, "learning_rate": 1.5114355231143554e-05, "loss": 0.7016, "step": 10956 }, { "epoch": 0.3199030685235468, "grad_norm": 0.6302585649365292, "learning_rate": 1.5113706407137066e-05, "loss": 0.5991, "step": 10957 }, { "epoch": 0.31993226475139414, "grad_norm": 0.7176929470441281, "learning_rate": 1.5113057583130576e-05, "loss": 0.6626, "step": 10958 }, { "epoch": 0.3199614609792415, "grad_norm": 0.6669207307514966, "learning_rate": 1.5112408759124088e-05, "loss": 0.5852, "step": 10959 }, { "epoch": 0.31999065720708886, "grad_norm": 0.8044699936844975, "learning_rate": 1.51117599351176e-05, "loss": 0.6901, "step": 10960 }, { "epoch": 0.3200198534349362, "grad_norm": 0.701897366215024, "learning_rate": 1.5111111111111112e-05, "loss": 0.681, "step": 10961 }, { "epoch": 0.3200490496627836, "grad_norm": 0.6353329597471179, "learning_rate": 1.5110462287104624e-05, "loss": 0.6677, "step": 10962 }, { "epoch": 0.32007824589063094, "grad_norm": 0.6998629465709003, "learning_rate": 1.5109813463098135e-05, "loss": 0.7365, "step": 10963 }, { "epoch": 0.3201074421184783, "grad_norm": 0.7158633092813685, "learning_rate": 1.5109164639091647e-05, "loss": 0.7365, "step": 10964 }, { "epoch": 0.32013663834632566, "grad_norm": 0.6777047611643616, "learning_rate": 1.5108515815085159e-05, "loss": 0.614, "step": 10965 }, { "epoch": 0.320165834574173, "grad_norm": 0.6489282070604026, "learning_rate": 1.510786699107867e-05, "loss": 0.5985, "step": 10966 }, { "epoch": 0.3201950308020204, "grad_norm": 0.6808512467710528, "learning_rate": 1.5107218167072181e-05, "loss": 0.6349, "step": 10967 }, { "epoch": 0.32022422702986775, "grad_norm": 0.641009550009542, "learning_rate": 1.5106569343065696e-05, "loss": 0.6299, "step": 10968 }, { "epoch": 0.3202534232577151, "grad_norm": 0.7307766247385837, "learning_rate": 1.5105920519059207e-05, "loss": 0.7285, "step": 10969 }, { "epoch": 0.32028261948556247, "grad_norm": 0.6202724137646565, "learning_rate": 1.5105271695052719e-05, "loss": 0.6223, "step": 10970 }, { "epoch": 0.32031181571340983, "grad_norm": 0.6933684878663611, "learning_rate": 1.5104622871046231e-05, "loss": 0.5614, "step": 10971 }, { "epoch": 0.3203410119412572, "grad_norm": 0.6540404544633558, "learning_rate": 1.5103974047039743e-05, "loss": 0.5921, "step": 10972 }, { "epoch": 0.32037020816910455, "grad_norm": 0.6319564097879864, "learning_rate": 1.5103325223033253e-05, "loss": 0.5981, "step": 10973 }, { "epoch": 0.3203994043969519, "grad_norm": 0.6357629671016889, "learning_rate": 1.5102676399026765e-05, "loss": 0.5887, "step": 10974 }, { "epoch": 0.3204286006247993, "grad_norm": 0.7827305044474905, "learning_rate": 1.5102027575020277e-05, "loss": 0.6676, "step": 10975 }, { "epoch": 0.32045779685264664, "grad_norm": 0.6091567492668795, "learning_rate": 1.510137875101379e-05, "loss": 0.5724, "step": 10976 }, { "epoch": 0.320486993080494, "grad_norm": 0.6348204724924921, "learning_rate": 1.51007299270073e-05, "loss": 0.6372, "step": 10977 }, { "epoch": 0.32051618930834136, "grad_norm": 0.63174792242812, "learning_rate": 1.5100081103000812e-05, "loss": 0.6311, "step": 10978 }, { "epoch": 0.3205453855361887, "grad_norm": 0.7439157082425629, "learning_rate": 1.5099432278994324e-05, "loss": 0.5931, "step": 10979 }, { "epoch": 0.3205745817640361, "grad_norm": 0.7144404440086035, "learning_rate": 1.5098783454987836e-05, "loss": 0.6946, "step": 10980 }, { "epoch": 0.32060377799188344, "grad_norm": 0.6908752462252656, "learning_rate": 1.5098134630981348e-05, "loss": 0.6311, "step": 10981 }, { "epoch": 0.3206329742197308, "grad_norm": 0.666781797554842, "learning_rate": 1.5097485806974858e-05, "loss": 0.674, "step": 10982 }, { "epoch": 0.32066217044757817, "grad_norm": 0.6634846416432323, "learning_rate": 1.509683698296837e-05, "loss": 0.6256, "step": 10983 }, { "epoch": 0.3206913666754255, "grad_norm": 0.7311937716120681, "learning_rate": 1.5096188158961882e-05, "loss": 0.6969, "step": 10984 }, { "epoch": 0.3207205629032729, "grad_norm": 0.6591821802247025, "learning_rate": 1.5095539334955394e-05, "loss": 0.6294, "step": 10985 }, { "epoch": 0.32074975913112025, "grad_norm": 0.6554149194117256, "learning_rate": 1.5094890510948908e-05, "loss": 0.687, "step": 10986 }, { "epoch": 0.3207789553589676, "grad_norm": 0.6756602733102223, "learning_rate": 1.509424168694242e-05, "loss": 0.6719, "step": 10987 }, { "epoch": 0.32080815158681497, "grad_norm": 0.6439709721203178, "learning_rate": 1.509359286293593e-05, "loss": 0.5965, "step": 10988 }, { "epoch": 0.32083734781466233, "grad_norm": 0.6476883064504525, "learning_rate": 1.5092944038929442e-05, "loss": 0.6154, "step": 10989 }, { "epoch": 0.3208665440425097, "grad_norm": 0.6506578351473378, "learning_rate": 1.5092295214922954e-05, "loss": 0.6206, "step": 10990 }, { "epoch": 0.32089574027035705, "grad_norm": 0.600288410378719, "learning_rate": 1.5091646390916466e-05, "loss": 0.5519, "step": 10991 }, { "epoch": 0.3209249364982044, "grad_norm": 0.6490848598179072, "learning_rate": 1.5090997566909977e-05, "loss": 0.6077, "step": 10992 }, { "epoch": 0.3209541327260518, "grad_norm": 0.6369763560541907, "learning_rate": 1.5090348742903489e-05, "loss": 0.6155, "step": 10993 }, { "epoch": 0.32098332895389914, "grad_norm": 0.6440850073332949, "learning_rate": 1.5089699918897e-05, "loss": 0.6146, "step": 10994 }, { "epoch": 0.3210125251817465, "grad_norm": 0.6317621078688485, "learning_rate": 1.5089051094890513e-05, "loss": 0.6287, "step": 10995 }, { "epoch": 0.32104172140959386, "grad_norm": 0.6493210238216358, "learning_rate": 1.5088402270884023e-05, "loss": 0.6165, "step": 10996 }, { "epoch": 0.3210709176374412, "grad_norm": 0.6920928798256308, "learning_rate": 1.5087753446877535e-05, "loss": 0.6189, "step": 10997 }, { "epoch": 0.3211001138652886, "grad_norm": 0.6743528996704371, "learning_rate": 1.5087104622871047e-05, "loss": 0.6639, "step": 10998 }, { "epoch": 0.32112931009313594, "grad_norm": 0.648937793060606, "learning_rate": 1.508645579886456e-05, "loss": 0.5607, "step": 10999 }, { "epoch": 0.3211585063209833, "grad_norm": 0.6404228455540255, "learning_rate": 1.5085806974858071e-05, "loss": 0.6178, "step": 11000 }, { "epoch": 0.32118770254883067, "grad_norm": 0.678665722717613, "learning_rate": 1.5085158150851582e-05, "loss": 0.6365, "step": 11001 }, { "epoch": 0.32121689877667803, "grad_norm": 0.6527144801206551, "learning_rate": 1.5084509326845094e-05, "loss": 0.7083, "step": 11002 }, { "epoch": 0.3212460950045254, "grad_norm": 0.6334298519168503, "learning_rate": 1.5083860502838606e-05, "loss": 0.5979, "step": 11003 }, { "epoch": 0.32127529123237275, "grad_norm": 0.6330944867817848, "learning_rate": 1.5083211678832118e-05, "loss": 0.6371, "step": 11004 }, { "epoch": 0.32130448746022017, "grad_norm": 0.6782562937979342, "learning_rate": 1.5082562854825631e-05, "loss": 0.6276, "step": 11005 }, { "epoch": 0.32133368368806753, "grad_norm": 0.6392613609778397, "learning_rate": 1.5081914030819142e-05, "loss": 0.5869, "step": 11006 }, { "epoch": 0.3213628799159149, "grad_norm": 0.6379990058282801, "learning_rate": 1.5081265206812654e-05, "loss": 0.6522, "step": 11007 }, { "epoch": 0.32139207614376225, "grad_norm": 0.6325633985028792, "learning_rate": 1.5080616382806166e-05, "loss": 0.6025, "step": 11008 }, { "epoch": 0.3214212723716096, "grad_norm": 0.6137546092392013, "learning_rate": 1.5079967558799678e-05, "loss": 0.6061, "step": 11009 }, { "epoch": 0.321450468599457, "grad_norm": 0.6457983134609713, "learning_rate": 1.507931873479319e-05, "loss": 0.616, "step": 11010 }, { "epoch": 0.32147966482730433, "grad_norm": 0.6495533747699856, "learning_rate": 1.50786699107867e-05, "loss": 0.6337, "step": 11011 }, { "epoch": 0.3215088610551517, "grad_norm": 0.7144740889882779, "learning_rate": 1.5078021086780212e-05, "loss": 0.7354, "step": 11012 }, { "epoch": 0.32153805728299906, "grad_norm": 0.6471383304978515, "learning_rate": 1.5077372262773724e-05, "loss": 0.6302, "step": 11013 }, { "epoch": 0.3215672535108464, "grad_norm": 0.6500168442110543, "learning_rate": 1.5076723438767236e-05, "loss": 0.5993, "step": 11014 }, { "epoch": 0.3215964497386938, "grad_norm": 0.6374188365664075, "learning_rate": 1.5076074614760747e-05, "loss": 0.5835, "step": 11015 }, { "epoch": 0.32162564596654114, "grad_norm": 0.7388186728866414, "learning_rate": 1.5075425790754259e-05, "loss": 0.6488, "step": 11016 }, { "epoch": 0.3216548421943885, "grad_norm": 0.6928634423713426, "learning_rate": 1.507477696674777e-05, "loss": 0.6973, "step": 11017 }, { "epoch": 0.32168403842223586, "grad_norm": 0.6911213839702276, "learning_rate": 1.5074128142741283e-05, "loss": 0.7127, "step": 11018 }, { "epoch": 0.3217132346500832, "grad_norm": 0.6192263423958303, "learning_rate": 1.5073479318734795e-05, "loss": 0.6104, "step": 11019 }, { "epoch": 0.3217424308779306, "grad_norm": 0.7063827402313453, "learning_rate": 1.5072830494728305e-05, "loss": 0.6882, "step": 11020 }, { "epoch": 0.32177162710577795, "grad_norm": 0.6446024363551531, "learning_rate": 1.5072181670721817e-05, "loss": 0.6274, "step": 11021 }, { "epoch": 0.3218008233336253, "grad_norm": 0.6462418595872607, "learning_rate": 1.5071532846715329e-05, "loss": 0.5773, "step": 11022 }, { "epoch": 0.32183001956147267, "grad_norm": 0.6199929748733506, "learning_rate": 1.5070884022708841e-05, "loss": 0.5652, "step": 11023 }, { "epoch": 0.32185921578932003, "grad_norm": 0.6758386277709325, "learning_rate": 1.5070235198702355e-05, "loss": 0.6852, "step": 11024 }, { "epoch": 0.3218884120171674, "grad_norm": 0.6454771698953111, "learning_rate": 1.5069586374695865e-05, "loss": 0.5954, "step": 11025 }, { "epoch": 0.32191760824501475, "grad_norm": 0.65235819633053, "learning_rate": 1.5068937550689377e-05, "loss": 0.6141, "step": 11026 }, { "epoch": 0.3219468044728621, "grad_norm": 0.6869682074640631, "learning_rate": 1.506828872668289e-05, "loss": 0.6296, "step": 11027 }, { "epoch": 0.3219760007007095, "grad_norm": 0.6590731462662494, "learning_rate": 1.5067639902676401e-05, "loss": 0.6481, "step": 11028 }, { "epoch": 0.32200519692855684, "grad_norm": 0.7832299337904994, "learning_rate": 1.5066991078669913e-05, "loss": 0.7174, "step": 11029 }, { "epoch": 0.3220343931564042, "grad_norm": 0.6798667224111016, "learning_rate": 1.5066342254663424e-05, "loss": 0.6832, "step": 11030 }, { "epoch": 0.32206358938425156, "grad_norm": 0.6752790501130734, "learning_rate": 1.5065693430656936e-05, "loss": 0.6886, "step": 11031 }, { "epoch": 0.3220927856120989, "grad_norm": 0.6473409372841794, "learning_rate": 1.5065044606650448e-05, "loss": 0.6032, "step": 11032 }, { "epoch": 0.3221219818399463, "grad_norm": 0.664023820681429, "learning_rate": 1.506439578264396e-05, "loss": 0.6698, "step": 11033 }, { "epoch": 0.32215117806779364, "grad_norm": 0.6588547510738149, "learning_rate": 1.506374695863747e-05, "loss": 0.6007, "step": 11034 }, { "epoch": 0.322180374295641, "grad_norm": 0.6249604222546911, "learning_rate": 1.5063098134630982e-05, "loss": 0.5744, "step": 11035 }, { "epoch": 0.32220957052348836, "grad_norm": 0.6680309851285863, "learning_rate": 1.5062449310624494e-05, "loss": 0.6324, "step": 11036 }, { "epoch": 0.3222387667513357, "grad_norm": 0.6154586189075559, "learning_rate": 1.5061800486618006e-05, "loss": 0.5892, "step": 11037 }, { "epoch": 0.3222679629791831, "grad_norm": 0.6518645267443229, "learning_rate": 1.5061151662611518e-05, "loss": 0.5851, "step": 11038 }, { "epoch": 0.32229715920703045, "grad_norm": 0.6590031429362156, "learning_rate": 1.5060502838605028e-05, "loss": 0.5847, "step": 11039 }, { "epoch": 0.3223263554348778, "grad_norm": 0.6879024753545673, "learning_rate": 1.505985401459854e-05, "loss": 0.6893, "step": 11040 }, { "epoch": 0.32235555166272517, "grad_norm": 0.6857708212599676, "learning_rate": 1.5059205190592052e-05, "loss": 0.6577, "step": 11041 }, { "epoch": 0.32238474789057253, "grad_norm": 0.6553539813434728, "learning_rate": 1.5058556366585564e-05, "loss": 0.617, "step": 11042 }, { "epoch": 0.3224139441184199, "grad_norm": 0.7289307595423222, "learning_rate": 1.5057907542579078e-05, "loss": 0.7636, "step": 11043 }, { "epoch": 0.32244314034626725, "grad_norm": 0.7046785154604259, "learning_rate": 1.5057258718572589e-05, "loss": 0.6927, "step": 11044 }, { "epoch": 0.3224723365741146, "grad_norm": 0.6603432456469902, "learning_rate": 1.50566098945661e-05, "loss": 0.6669, "step": 11045 }, { "epoch": 0.322501532801962, "grad_norm": 0.6733095553009244, "learning_rate": 1.5055961070559613e-05, "loss": 0.6425, "step": 11046 }, { "epoch": 0.32253072902980934, "grad_norm": 0.6665090088580725, "learning_rate": 1.5055312246553125e-05, "loss": 0.5823, "step": 11047 }, { "epoch": 0.3225599252576567, "grad_norm": 0.6690265643852238, "learning_rate": 1.5054663422546637e-05, "loss": 0.6755, "step": 11048 }, { "epoch": 0.32258912148550406, "grad_norm": 0.6678994660843729, "learning_rate": 1.5054014598540147e-05, "loss": 0.6704, "step": 11049 }, { "epoch": 0.3226183177133514, "grad_norm": 0.691369585669674, "learning_rate": 1.5053365774533659e-05, "loss": 0.6514, "step": 11050 }, { "epoch": 0.3226475139411988, "grad_norm": 0.6185929615151827, "learning_rate": 1.5052716950527171e-05, "loss": 0.5735, "step": 11051 }, { "epoch": 0.32267671016904614, "grad_norm": 0.7583573825918988, "learning_rate": 1.5052068126520683e-05, "loss": 0.6723, "step": 11052 }, { "epoch": 0.3227059063968935, "grad_norm": 0.684293208885022, "learning_rate": 1.5051419302514193e-05, "loss": 0.6545, "step": 11053 }, { "epoch": 0.32273510262474087, "grad_norm": 0.6202946071004222, "learning_rate": 1.5050770478507705e-05, "loss": 0.5693, "step": 11054 }, { "epoch": 0.3227642988525882, "grad_norm": 0.6496417074606851, "learning_rate": 1.5050121654501217e-05, "loss": 0.6315, "step": 11055 }, { "epoch": 0.3227934950804356, "grad_norm": 0.6810546464857788, "learning_rate": 1.504947283049473e-05, "loss": 0.6453, "step": 11056 }, { "epoch": 0.32282269130828295, "grad_norm": 0.6724619117755213, "learning_rate": 1.5048824006488242e-05, "loss": 0.6751, "step": 11057 }, { "epoch": 0.3228518875361303, "grad_norm": 0.6089257661487829, "learning_rate": 1.5048175182481752e-05, "loss": 0.5385, "step": 11058 }, { "epoch": 0.32288108376397767, "grad_norm": 0.6357364978673398, "learning_rate": 1.5047526358475264e-05, "loss": 0.6144, "step": 11059 }, { "epoch": 0.32291027999182503, "grad_norm": 0.6221657182641442, "learning_rate": 1.5046877534468776e-05, "loss": 0.5171, "step": 11060 }, { "epoch": 0.3229394762196724, "grad_norm": 0.6235557723920881, "learning_rate": 1.5046228710462288e-05, "loss": 0.634, "step": 11061 }, { "epoch": 0.32296867244751976, "grad_norm": 0.6661008716244846, "learning_rate": 1.5045579886455802e-05, "loss": 0.6945, "step": 11062 }, { "epoch": 0.3229978686753671, "grad_norm": 0.7048602848711158, "learning_rate": 1.5044931062449312e-05, "loss": 0.6974, "step": 11063 }, { "epoch": 0.3230270649032145, "grad_norm": 0.7803004611840761, "learning_rate": 1.5044282238442824e-05, "loss": 0.7471, "step": 11064 }, { "epoch": 0.32305626113106184, "grad_norm": 0.6795258952036316, "learning_rate": 1.5043633414436336e-05, "loss": 0.6995, "step": 11065 }, { "epoch": 0.32308545735890926, "grad_norm": 0.6846108862601838, "learning_rate": 1.5042984590429848e-05, "loss": 0.6523, "step": 11066 }, { "epoch": 0.3231146535867566, "grad_norm": 0.6863723365807101, "learning_rate": 1.504233576642336e-05, "loss": 0.6693, "step": 11067 }, { "epoch": 0.323143849814604, "grad_norm": 0.6748718330749865, "learning_rate": 1.504168694241687e-05, "loss": 0.626, "step": 11068 }, { "epoch": 0.32317304604245134, "grad_norm": 0.7009592766833642, "learning_rate": 1.5041038118410382e-05, "loss": 0.7404, "step": 11069 }, { "epoch": 0.3232022422702987, "grad_norm": 0.7058122726965018, "learning_rate": 1.5040389294403894e-05, "loss": 0.6952, "step": 11070 }, { "epoch": 0.32323143849814606, "grad_norm": 0.6656978920009381, "learning_rate": 1.5039740470397407e-05, "loss": 0.6465, "step": 11071 }, { "epoch": 0.3232606347259934, "grad_norm": 0.5974443858759639, "learning_rate": 1.5039091646390917e-05, "loss": 0.549, "step": 11072 }, { "epoch": 0.3232898309538408, "grad_norm": 0.6516576689958983, "learning_rate": 1.5038442822384429e-05, "loss": 0.6467, "step": 11073 }, { "epoch": 0.32331902718168815, "grad_norm": 0.6829493334952065, "learning_rate": 1.5037793998377941e-05, "loss": 0.6799, "step": 11074 }, { "epoch": 0.3233482234095355, "grad_norm": 0.7316276748362802, "learning_rate": 1.5037145174371453e-05, "loss": 0.7672, "step": 11075 }, { "epoch": 0.32337741963738287, "grad_norm": 0.6724146145399694, "learning_rate": 1.5036496350364965e-05, "loss": 0.6436, "step": 11076 }, { "epoch": 0.32340661586523023, "grad_norm": 0.6715562421183037, "learning_rate": 1.5035847526358475e-05, "loss": 0.6991, "step": 11077 }, { "epoch": 0.3234358120930776, "grad_norm": 0.6404919828934715, "learning_rate": 1.5035198702351987e-05, "loss": 0.6372, "step": 11078 }, { "epoch": 0.32346500832092495, "grad_norm": 0.6495646801540065, "learning_rate": 1.50345498783455e-05, "loss": 0.6274, "step": 11079 }, { "epoch": 0.3234942045487723, "grad_norm": 0.6695759194008226, "learning_rate": 1.5033901054339011e-05, "loss": 0.65, "step": 11080 }, { "epoch": 0.3235234007766197, "grad_norm": 0.6333457468142093, "learning_rate": 1.5033252230332525e-05, "loss": 0.6155, "step": 11081 }, { "epoch": 0.32355259700446704, "grad_norm": 0.6660463528801022, "learning_rate": 1.5032603406326035e-05, "loss": 0.6704, "step": 11082 }, { "epoch": 0.3235817932323144, "grad_norm": 0.6518867676160992, "learning_rate": 1.5031954582319547e-05, "loss": 0.6037, "step": 11083 }, { "epoch": 0.32361098946016176, "grad_norm": 0.6463408496613655, "learning_rate": 1.503130575831306e-05, "loss": 0.6043, "step": 11084 }, { "epoch": 0.3236401856880091, "grad_norm": 0.6810982229641313, "learning_rate": 1.5030656934306571e-05, "loss": 0.6111, "step": 11085 }, { "epoch": 0.3236693819158565, "grad_norm": 0.6403385544617118, "learning_rate": 1.5030008110300084e-05, "loss": 0.5717, "step": 11086 }, { "epoch": 0.32369857814370384, "grad_norm": 0.6457555173865143, "learning_rate": 1.5029359286293594e-05, "loss": 0.6406, "step": 11087 }, { "epoch": 0.3237277743715512, "grad_norm": 0.6830815432770613, "learning_rate": 1.5028710462287106e-05, "loss": 0.6713, "step": 11088 }, { "epoch": 0.32375697059939856, "grad_norm": 0.6899587057278688, "learning_rate": 1.5028061638280618e-05, "loss": 0.6476, "step": 11089 }, { "epoch": 0.3237861668272459, "grad_norm": 0.6298207428747454, "learning_rate": 1.502741281427413e-05, "loss": 0.6161, "step": 11090 }, { "epoch": 0.3238153630550933, "grad_norm": 0.6685421410498719, "learning_rate": 1.502676399026764e-05, "loss": 0.6251, "step": 11091 }, { "epoch": 0.32384455928294065, "grad_norm": 0.6538459585796423, "learning_rate": 1.5026115166261152e-05, "loss": 0.5981, "step": 11092 }, { "epoch": 0.323873755510788, "grad_norm": 0.6511911516119225, "learning_rate": 1.5025466342254664e-05, "loss": 0.5905, "step": 11093 }, { "epoch": 0.32390295173863537, "grad_norm": 0.5600798423080576, "learning_rate": 1.5024817518248176e-05, "loss": 0.487, "step": 11094 }, { "epoch": 0.32393214796648273, "grad_norm": 0.6281885955147843, "learning_rate": 1.5024168694241688e-05, "loss": 0.5995, "step": 11095 }, { "epoch": 0.3239613441943301, "grad_norm": 0.6548490284888957, "learning_rate": 1.5023519870235199e-05, "loss": 0.6529, "step": 11096 }, { "epoch": 0.32399054042217745, "grad_norm": 0.6842932620189338, "learning_rate": 1.502287104622871e-05, "loss": 0.6955, "step": 11097 }, { "epoch": 0.3240197366500248, "grad_norm": 0.6736192234737849, "learning_rate": 1.5022222222222223e-05, "loss": 0.672, "step": 11098 }, { "epoch": 0.3240489328778722, "grad_norm": 0.6273656950418743, "learning_rate": 1.5021573398215735e-05, "loss": 0.6435, "step": 11099 }, { "epoch": 0.32407812910571954, "grad_norm": 0.6697570525133677, "learning_rate": 1.5020924574209249e-05, "loss": 0.5971, "step": 11100 }, { "epoch": 0.3241073253335669, "grad_norm": 0.6822356057704692, "learning_rate": 1.5020275750202759e-05, "loss": 0.6526, "step": 11101 }, { "epoch": 0.32413652156141426, "grad_norm": 0.6035910471392996, "learning_rate": 1.5019626926196271e-05, "loss": 0.5317, "step": 11102 }, { "epoch": 0.3241657177892616, "grad_norm": 0.7323412435092798, "learning_rate": 1.5018978102189783e-05, "loss": 0.7354, "step": 11103 }, { "epoch": 0.324194914017109, "grad_norm": 0.7187933691320859, "learning_rate": 1.5018329278183295e-05, "loss": 0.7602, "step": 11104 }, { "epoch": 0.32422411024495634, "grad_norm": 0.609747058379528, "learning_rate": 1.5017680454176807e-05, "loss": 0.5843, "step": 11105 }, { "epoch": 0.3242533064728037, "grad_norm": 0.6647915656769001, "learning_rate": 1.5017031630170317e-05, "loss": 0.6599, "step": 11106 }, { "epoch": 0.32428250270065107, "grad_norm": 0.5823054941982109, "learning_rate": 1.501638280616383e-05, "loss": 0.5099, "step": 11107 }, { "epoch": 0.3243116989284984, "grad_norm": 0.6568593977762412, "learning_rate": 1.5015733982157341e-05, "loss": 0.6098, "step": 11108 }, { "epoch": 0.3243408951563458, "grad_norm": 0.6962675916165212, "learning_rate": 1.5015085158150853e-05, "loss": 0.6715, "step": 11109 }, { "epoch": 0.32437009138419315, "grad_norm": 0.6405558076677487, "learning_rate": 1.5014436334144364e-05, "loss": 0.6165, "step": 11110 }, { "epoch": 0.3243992876120405, "grad_norm": 0.597763773823702, "learning_rate": 1.5013787510137876e-05, "loss": 0.5325, "step": 11111 }, { "epoch": 0.32442848383988787, "grad_norm": 0.7664303588053852, "learning_rate": 1.5013138686131388e-05, "loss": 0.7071, "step": 11112 }, { "epoch": 0.32445768006773523, "grad_norm": 0.7224576690558558, "learning_rate": 1.50124898621249e-05, "loss": 0.6804, "step": 11113 }, { "epoch": 0.3244868762955826, "grad_norm": 0.6570735708495966, "learning_rate": 1.5011841038118412e-05, "loss": 0.6506, "step": 11114 }, { "epoch": 0.32451607252342995, "grad_norm": 0.636817209921689, "learning_rate": 1.5011192214111922e-05, "loss": 0.5816, "step": 11115 }, { "epoch": 0.3245452687512773, "grad_norm": 0.6898942368931142, "learning_rate": 1.5010543390105434e-05, "loss": 0.6702, "step": 11116 }, { "epoch": 0.3245744649791247, "grad_norm": 0.638596030412785, "learning_rate": 1.5009894566098946e-05, "loss": 0.6162, "step": 11117 }, { "epoch": 0.32460366120697204, "grad_norm": 0.6309406514844405, "learning_rate": 1.5009245742092458e-05, "loss": 0.5767, "step": 11118 }, { "epoch": 0.3246328574348194, "grad_norm": 0.6408324967776723, "learning_rate": 1.5008596918085972e-05, "loss": 0.6032, "step": 11119 }, { "epoch": 0.32466205366266676, "grad_norm": 0.6548075660215509, "learning_rate": 1.5007948094079482e-05, "loss": 0.5882, "step": 11120 }, { "epoch": 0.3246912498905141, "grad_norm": 0.6334061521332022, "learning_rate": 1.5007299270072994e-05, "loss": 0.5955, "step": 11121 }, { "epoch": 0.3247204461183615, "grad_norm": 0.6755681283214067, "learning_rate": 1.5006650446066506e-05, "loss": 0.6896, "step": 11122 }, { "epoch": 0.32474964234620884, "grad_norm": 0.7059707758880064, "learning_rate": 1.5006001622060018e-05, "loss": 0.7132, "step": 11123 }, { "epoch": 0.3247788385740562, "grad_norm": 0.6875646152662443, "learning_rate": 1.500535279805353e-05, "loss": 0.6693, "step": 11124 }, { "epoch": 0.32480803480190357, "grad_norm": 0.7258629616006158, "learning_rate": 1.500470397404704e-05, "loss": 0.6495, "step": 11125 }, { "epoch": 0.324837231029751, "grad_norm": 0.6216623305165337, "learning_rate": 1.5004055150040553e-05, "loss": 0.5685, "step": 11126 }, { "epoch": 0.32486642725759834, "grad_norm": 0.6477777872102126, "learning_rate": 1.5003406326034065e-05, "loss": 0.6328, "step": 11127 }, { "epoch": 0.3248956234854457, "grad_norm": 0.6693155670856068, "learning_rate": 1.5002757502027577e-05, "loss": 0.6513, "step": 11128 }, { "epoch": 0.32492481971329307, "grad_norm": 0.6697992910796726, "learning_rate": 1.5002108678021087e-05, "loss": 0.6906, "step": 11129 }, { "epoch": 0.32495401594114043, "grad_norm": 0.6405520034318445, "learning_rate": 1.50014598540146e-05, "loss": 0.604, "step": 11130 }, { "epoch": 0.3249832121689878, "grad_norm": 0.7167839809045795, "learning_rate": 1.5000811030008111e-05, "loss": 0.6915, "step": 11131 }, { "epoch": 0.32501240839683515, "grad_norm": 0.7031689506232993, "learning_rate": 1.5000162206001623e-05, "loss": 0.6845, "step": 11132 }, { "epoch": 0.3250416046246825, "grad_norm": 0.6352776916879815, "learning_rate": 1.4999513381995135e-05, "loss": 0.5693, "step": 11133 }, { "epoch": 0.3250708008525299, "grad_norm": 0.6327444039124036, "learning_rate": 1.4998864557988646e-05, "loss": 0.5853, "step": 11134 }, { "epoch": 0.32509999708037723, "grad_norm": 0.8506526144054739, "learning_rate": 1.4998215733982158e-05, "loss": 0.7059, "step": 11135 }, { "epoch": 0.3251291933082246, "grad_norm": 0.611116759933881, "learning_rate": 1.499756690997567e-05, "loss": 0.5633, "step": 11136 }, { "epoch": 0.32515838953607196, "grad_norm": 0.6927034134498465, "learning_rate": 1.4996918085969183e-05, "loss": 0.6653, "step": 11137 }, { "epoch": 0.3251875857639193, "grad_norm": 0.6650832816054293, "learning_rate": 1.4996269261962695e-05, "loss": 0.5867, "step": 11138 }, { "epoch": 0.3252167819917667, "grad_norm": 0.6280404414906683, "learning_rate": 1.4995620437956206e-05, "loss": 0.5776, "step": 11139 }, { "epoch": 0.32524597821961404, "grad_norm": 0.6442908527560886, "learning_rate": 1.4994971613949718e-05, "loss": 0.6126, "step": 11140 }, { "epoch": 0.3252751744474614, "grad_norm": 0.66690393574079, "learning_rate": 1.499432278994323e-05, "loss": 0.6421, "step": 11141 }, { "epoch": 0.32530437067530876, "grad_norm": 0.6644999979119522, "learning_rate": 1.4993673965936742e-05, "loss": 0.6192, "step": 11142 }, { "epoch": 0.3253335669031561, "grad_norm": 0.6114689165329308, "learning_rate": 1.4993025141930254e-05, "loss": 0.5453, "step": 11143 }, { "epoch": 0.3253627631310035, "grad_norm": 0.6669483145330398, "learning_rate": 1.4992376317923764e-05, "loss": 0.5952, "step": 11144 }, { "epoch": 0.32539195935885085, "grad_norm": 0.6032304750595893, "learning_rate": 1.4991727493917276e-05, "loss": 0.5774, "step": 11145 }, { "epoch": 0.3254211555866982, "grad_norm": 0.6452309119117069, "learning_rate": 1.4991078669910788e-05, "loss": 0.6159, "step": 11146 }, { "epoch": 0.32545035181454557, "grad_norm": 0.6782766850305673, "learning_rate": 1.49904298459043e-05, "loss": 0.6488, "step": 11147 }, { "epoch": 0.32547954804239293, "grad_norm": 0.6689452144506416, "learning_rate": 1.498978102189781e-05, "loss": 0.6409, "step": 11148 }, { "epoch": 0.3255087442702403, "grad_norm": 0.6542158624921056, "learning_rate": 1.4989132197891323e-05, "loss": 0.6108, "step": 11149 }, { "epoch": 0.32553794049808765, "grad_norm": 0.7186089428777628, "learning_rate": 1.4988483373884835e-05, "loss": 0.7412, "step": 11150 }, { "epoch": 0.325567136725935, "grad_norm": 0.6627006254383517, "learning_rate": 1.4987834549878347e-05, "loss": 0.6004, "step": 11151 }, { "epoch": 0.3255963329537824, "grad_norm": 0.6258477001991152, "learning_rate": 1.4987185725871857e-05, "loss": 0.5874, "step": 11152 }, { "epoch": 0.32562552918162974, "grad_norm": 0.6754176761419786, "learning_rate": 1.4986536901865369e-05, "loss": 0.6734, "step": 11153 }, { "epoch": 0.3256547254094771, "grad_norm": 0.6320315869250621, "learning_rate": 1.4985888077858881e-05, "loss": 0.5832, "step": 11154 }, { "epoch": 0.32568392163732446, "grad_norm": 0.6541198788945036, "learning_rate": 1.4985239253852393e-05, "loss": 0.6467, "step": 11155 }, { "epoch": 0.3257131178651718, "grad_norm": 0.6188160321463927, "learning_rate": 1.4984590429845907e-05, "loss": 0.6048, "step": 11156 }, { "epoch": 0.3257423140930192, "grad_norm": 0.7367130710292197, "learning_rate": 1.4983941605839419e-05, "loss": 0.6651, "step": 11157 }, { "epoch": 0.32577151032086654, "grad_norm": 0.7319516038868181, "learning_rate": 1.4983292781832929e-05, "loss": 0.7123, "step": 11158 }, { "epoch": 0.3258007065487139, "grad_norm": 0.6555729773469741, "learning_rate": 1.4982643957826441e-05, "loss": 0.6257, "step": 11159 }, { "epoch": 0.32582990277656126, "grad_norm": 0.7023304354674079, "learning_rate": 1.4981995133819953e-05, "loss": 0.6733, "step": 11160 }, { "epoch": 0.3258590990044086, "grad_norm": 0.6648888792878253, "learning_rate": 1.4981346309813465e-05, "loss": 0.607, "step": 11161 }, { "epoch": 0.325888295232256, "grad_norm": 0.6163465579994586, "learning_rate": 1.4980697485806977e-05, "loss": 0.5874, "step": 11162 }, { "epoch": 0.32591749146010335, "grad_norm": 0.7016505415206341, "learning_rate": 1.4980048661800488e-05, "loss": 0.675, "step": 11163 }, { "epoch": 0.3259466876879507, "grad_norm": 0.6397133259164289, "learning_rate": 1.4979399837794e-05, "loss": 0.5853, "step": 11164 }, { "epoch": 0.32597588391579807, "grad_norm": 0.6481027864785062, "learning_rate": 1.4978751013787512e-05, "loss": 0.5918, "step": 11165 }, { "epoch": 0.32600508014364543, "grad_norm": 0.6214360545279004, "learning_rate": 1.4978102189781024e-05, "loss": 0.5748, "step": 11166 }, { "epoch": 0.3260342763714928, "grad_norm": 0.6264733946920747, "learning_rate": 1.4977453365774534e-05, "loss": 0.6309, "step": 11167 }, { "epoch": 0.32606347259934015, "grad_norm": 0.6383390545804454, "learning_rate": 1.4976804541768046e-05, "loss": 0.5765, "step": 11168 }, { "epoch": 0.3260926688271875, "grad_norm": 0.7173665339387658, "learning_rate": 1.4976155717761558e-05, "loss": 0.739, "step": 11169 }, { "epoch": 0.3261218650550349, "grad_norm": 0.6735779193296632, "learning_rate": 1.497550689375507e-05, "loss": 0.645, "step": 11170 }, { "epoch": 0.32615106128288224, "grad_norm": 0.7740463428905391, "learning_rate": 1.497485806974858e-05, "loss": 0.7406, "step": 11171 }, { "epoch": 0.3261802575107296, "grad_norm": 0.6305719907619438, "learning_rate": 1.4974209245742092e-05, "loss": 0.5895, "step": 11172 }, { "epoch": 0.32620945373857696, "grad_norm": 0.6641791533887059, "learning_rate": 1.4973560421735604e-05, "loss": 0.5753, "step": 11173 }, { "epoch": 0.3262386499664243, "grad_norm": 0.5897651214107662, "learning_rate": 1.4972911597729117e-05, "loss": 0.5516, "step": 11174 }, { "epoch": 0.3262678461942717, "grad_norm": 0.6491504562904524, "learning_rate": 1.497226277372263e-05, "loss": 0.5852, "step": 11175 }, { "epoch": 0.32629704242211904, "grad_norm": 0.6939265362251252, "learning_rate": 1.4971613949716142e-05, "loss": 0.6516, "step": 11176 }, { "epoch": 0.3263262386499664, "grad_norm": 0.635601369564845, "learning_rate": 1.4970965125709653e-05, "loss": 0.6225, "step": 11177 }, { "epoch": 0.32635543487781377, "grad_norm": 0.6602582386202187, "learning_rate": 1.4970316301703165e-05, "loss": 0.6131, "step": 11178 }, { "epoch": 0.3263846311056611, "grad_norm": 0.6759387651430557, "learning_rate": 1.4969667477696677e-05, "loss": 0.6152, "step": 11179 }, { "epoch": 0.3264138273335085, "grad_norm": 0.6408352662507798, "learning_rate": 1.4969018653690189e-05, "loss": 0.6356, "step": 11180 }, { "epoch": 0.32644302356135585, "grad_norm": 0.6705204012511914, "learning_rate": 1.49683698296837e-05, "loss": 0.6394, "step": 11181 }, { "epoch": 0.3264722197892032, "grad_norm": 0.6971987032042509, "learning_rate": 1.4967721005677211e-05, "loss": 0.7072, "step": 11182 }, { "epoch": 0.32650141601705057, "grad_norm": 0.6823440068449748, "learning_rate": 1.4967072181670723e-05, "loss": 0.6576, "step": 11183 }, { "epoch": 0.32653061224489793, "grad_norm": 0.6350787424652242, "learning_rate": 1.4966423357664235e-05, "loss": 0.6225, "step": 11184 }, { "epoch": 0.3265598084727453, "grad_norm": 0.6609198006134444, "learning_rate": 1.4965774533657747e-05, "loss": 0.6313, "step": 11185 }, { "epoch": 0.3265890047005927, "grad_norm": 0.6764847323097176, "learning_rate": 1.4965125709651257e-05, "loss": 0.6879, "step": 11186 }, { "epoch": 0.32661820092844007, "grad_norm": 0.6123831019028991, "learning_rate": 1.496447688564477e-05, "loss": 0.5719, "step": 11187 }, { "epoch": 0.32664739715628743, "grad_norm": 0.6541914088892212, "learning_rate": 1.4963828061638281e-05, "loss": 0.6293, "step": 11188 }, { "epoch": 0.3266765933841348, "grad_norm": 0.6783914206644702, "learning_rate": 1.4963179237631794e-05, "loss": 0.6366, "step": 11189 }, { "epoch": 0.32670578961198216, "grad_norm": 0.6608713494137588, "learning_rate": 1.4962530413625304e-05, "loss": 0.5889, "step": 11190 }, { "epoch": 0.3267349858398295, "grad_norm": 0.6855232851179083, "learning_rate": 1.4961881589618816e-05, "loss": 0.63, "step": 11191 }, { "epoch": 0.3267641820676769, "grad_norm": 0.6418323450605344, "learning_rate": 1.4961232765612328e-05, "loss": 0.594, "step": 11192 }, { "epoch": 0.32679337829552424, "grad_norm": 0.6527118188453911, "learning_rate": 1.496058394160584e-05, "loss": 0.6395, "step": 11193 }, { "epoch": 0.3268225745233716, "grad_norm": 0.6744010220077111, "learning_rate": 1.4959935117599354e-05, "loss": 0.6693, "step": 11194 }, { "epoch": 0.32685177075121896, "grad_norm": 0.6774776538045092, "learning_rate": 1.4959286293592866e-05, "loss": 0.6317, "step": 11195 }, { "epoch": 0.3268809669790663, "grad_norm": 0.6439935234225368, "learning_rate": 1.4958637469586376e-05, "loss": 0.6393, "step": 11196 }, { "epoch": 0.3269101632069137, "grad_norm": 0.638779053338544, "learning_rate": 1.4957988645579888e-05, "loss": 0.6105, "step": 11197 }, { "epoch": 0.32693935943476105, "grad_norm": 0.6534234519104094, "learning_rate": 1.49573398215734e-05, "loss": 0.5797, "step": 11198 }, { "epoch": 0.3269685556626084, "grad_norm": 0.6075403345766348, "learning_rate": 1.4956690997566912e-05, "loss": 0.5694, "step": 11199 }, { "epoch": 0.32699775189045577, "grad_norm": 0.661100396833844, "learning_rate": 1.4956042173560424e-05, "loss": 0.5859, "step": 11200 }, { "epoch": 0.32702694811830313, "grad_norm": 0.7802289528089813, "learning_rate": 1.4955393349553934e-05, "loss": 0.6062, "step": 11201 }, { "epoch": 0.3270561443461505, "grad_norm": 0.624568307093392, "learning_rate": 1.4954744525547446e-05, "loss": 0.6199, "step": 11202 }, { "epoch": 0.32708534057399785, "grad_norm": 0.6736214663997204, "learning_rate": 1.4954095701540959e-05, "loss": 0.662, "step": 11203 }, { "epoch": 0.3271145368018452, "grad_norm": 0.6847325319928234, "learning_rate": 1.495344687753447e-05, "loss": 0.5962, "step": 11204 }, { "epoch": 0.3271437330296926, "grad_norm": 0.7050186431420612, "learning_rate": 1.4952798053527981e-05, "loss": 0.6649, "step": 11205 }, { "epoch": 0.32717292925753994, "grad_norm": 0.6707282858787224, "learning_rate": 1.4952149229521493e-05, "loss": 0.6902, "step": 11206 }, { "epoch": 0.3272021254853873, "grad_norm": 0.6169269908720821, "learning_rate": 1.4951500405515005e-05, "loss": 0.5965, "step": 11207 }, { "epoch": 0.32723132171323466, "grad_norm": 0.6207542833939054, "learning_rate": 1.4950851581508517e-05, "loss": 0.5451, "step": 11208 }, { "epoch": 0.327260517941082, "grad_norm": 0.6724197996223445, "learning_rate": 1.4950202757502027e-05, "loss": 0.6343, "step": 11209 }, { "epoch": 0.3272897141689294, "grad_norm": 0.6422414476672461, "learning_rate": 1.494955393349554e-05, "loss": 0.6238, "step": 11210 }, { "epoch": 0.32731891039677674, "grad_norm": 0.7049660347299068, "learning_rate": 1.4948905109489051e-05, "loss": 0.6837, "step": 11211 }, { "epoch": 0.3273481066246241, "grad_norm": 0.6364089222074423, "learning_rate": 1.4948256285482563e-05, "loss": 0.6121, "step": 11212 }, { "epoch": 0.32737730285247146, "grad_norm": 0.7077375101320786, "learning_rate": 1.4947607461476077e-05, "loss": 0.6998, "step": 11213 }, { "epoch": 0.3274064990803188, "grad_norm": 0.6667886436947972, "learning_rate": 1.4946958637469589e-05, "loss": 0.6, "step": 11214 }, { "epoch": 0.3274356953081662, "grad_norm": 0.6351989488330632, "learning_rate": 1.49463098134631e-05, "loss": 0.5804, "step": 11215 }, { "epoch": 0.32746489153601355, "grad_norm": 0.7145256438151506, "learning_rate": 1.4945660989456611e-05, "loss": 0.6746, "step": 11216 }, { "epoch": 0.3274940877638609, "grad_norm": 0.6462219064422976, "learning_rate": 1.4945012165450124e-05, "loss": 0.6645, "step": 11217 }, { "epoch": 0.32752328399170827, "grad_norm": 0.6093768851704731, "learning_rate": 1.4944363341443636e-05, "loss": 0.5853, "step": 11218 }, { "epoch": 0.32755248021955563, "grad_norm": 0.6609547499785603, "learning_rate": 1.4943714517437148e-05, "loss": 0.668, "step": 11219 }, { "epoch": 0.327581676447403, "grad_norm": 0.6807563508398532, "learning_rate": 1.4943065693430658e-05, "loss": 0.6359, "step": 11220 }, { "epoch": 0.32761087267525035, "grad_norm": 0.6634577245751756, "learning_rate": 1.494241686942417e-05, "loss": 0.6563, "step": 11221 }, { "epoch": 0.3276400689030977, "grad_norm": 0.7065392599916205, "learning_rate": 1.4941768045417682e-05, "loss": 0.7315, "step": 11222 }, { "epoch": 0.3276692651309451, "grad_norm": 0.6590680040494135, "learning_rate": 1.4941119221411194e-05, "loss": 0.6352, "step": 11223 }, { "epoch": 0.32769846135879244, "grad_norm": 0.7754786219475511, "learning_rate": 1.4940470397404704e-05, "loss": 0.7236, "step": 11224 }, { "epoch": 0.3277276575866398, "grad_norm": 0.6162172233392367, "learning_rate": 1.4939821573398216e-05, "loss": 0.5619, "step": 11225 }, { "epoch": 0.32775685381448716, "grad_norm": 0.7003045437804047, "learning_rate": 1.4939172749391728e-05, "loss": 0.6557, "step": 11226 }, { "epoch": 0.3277860500423345, "grad_norm": 0.6390756008247428, "learning_rate": 1.493852392538524e-05, "loss": 0.6444, "step": 11227 }, { "epoch": 0.3278152462701819, "grad_norm": 0.728610001919987, "learning_rate": 1.493787510137875e-05, "loss": 0.6594, "step": 11228 }, { "epoch": 0.32784444249802924, "grad_norm": 0.6625296416945313, "learning_rate": 1.4937226277372263e-05, "loss": 0.6535, "step": 11229 }, { "epoch": 0.3278736387258766, "grad_norm": 0.5894206542921244, "learning_rate": 1.4936577453365775e-05, "loss": 0.5217, "step": 11230 }, { "epoch": 0.32790283495372397, "grad_norm": 28.41315414377554, "learning_rate": 1.4935928629359287e-05, "loss": 0.9503, "step": 11231 }, { "epoch": 0.3279320311815713, "grad_norm": 0.6461221902793084, "learning_rate": 1.49352798053528e-05, "loss": 0.5916, "step": 11232 }, { "epoch": 0.3279612274094187, "grad_norm": 0.6603432213449806, "learning_rate": 1.4934630981346313e-05, "loss": 0.6453, "step": 11233 }, { "epoch": 0.32799042363726605, "grad_norm": 0.6452701829666686, "learning_rate": 1.4933982157339823e-05, "loss": 0.556, "step": 11234 }, { "epoch": 0.3280196198651134, "grad_norm": 0.6579995291843197, "learning_rate": 1.4933333333333335e-05, "loss": 0.6089, "step": 11235 }, { "epoch": 0.32804881609296077, "grad_norm": 0.6856421464961776, "learning_rate": 1.4932684509326847e-05, "loss": 0.6479, "step": 11236 }, { "epoch": 0.32807801232080813, "grad_norm": 0.6570002024199735, "learning_rate": 1.4932035685320359e-05, "loss": 0.6066, "step": 11237 }, { "epoch": 0.3281072085486555, "grad_norm": 0.6517780476075555, "learning_rate": 1.4931386861313871e-05, "loss": 0.6332, "step": 11238 }, { "epoch": 0.32813640477650285, "grad_norm": 0.67555161937602, "learning_rate": 1.4930738037307381e-05, "loss": 0.6345, "step": 11239 }, { "epoch": 0.3281656010043502, "grad_norm": 0.7791492412730903, "learning_rate": 1.4930089213300893e-05, "loss": 0.7932, "step": 11240 }, { "epoch": 0.3281947972321976, "grad_norm": 0.6624549885162798, "learning_rate": 1.4929440389294405e-05, "loss": 0.6451, "step": 11241 }, { "epoch": 0.32822399346004494, "grad_norm": 0.6220743439086477, "learning_rate": 1.4928791565287917e-05, "loss": 0.5545, "step": 11242 }, { "epoch": 0.3282531896878923, "grad_norm": 0.6874566690104187, "learning_rate": 1.4928142741281428e-05, "loss": 0.7102, "step": 11243 }, { "epoch": 0.32828238591573966, "grad_norm": 0.6643952155535885, "learning_rate": 1.492749391727494e-05, "loss": 0.6336, "step": 11244 }, { "epoch": 0.328311582143587, "grad_norm": 0.6882624002811335, "learning_rate": 1.4926845093268452e-05, "loss": 0.6764, "step": 11245 }, { "epoch": 0.3283407783714344, "grad_norm": 0.6956135513958102, "learning_rate": 1.4926196269261964e-05, "loss": 0.6695, "step": 11246 }, { "epoch": 0.3283699745992818, "grad_norm": 0.6241958545643952, "learning_rate": 1.4925547445255474e-05, "loss": 0.575, "step": 11247 }, { "epoch": 0.32839917082712916, "grad_norm": 0.6274636342108274, "learning_rate": 1.4924898621248986e-05, "loss": 0.5572, "step": 11248 }, { "epoch": 0.3284283670549765, "grad_norm": 0.5962245203758059, "learning_rate": 1.4924249797242498e-05, "loss": 0.5494, "step": 11249 }, { "epoch": 0.3284575632828239, "grad_norm": 0.621850825110366, "learning_rate": 1.492360097323601e-05, "loss": 0.5673, "step": 11250 }, { "epoch": 0.32848675951067124, "grad_norm": 0.6510201462434265, "learning_rate": 1.4922952149229524e-05, "loss": 0.627, "step": 11251 }, { "epoch": 0.3285159557385186, "grad_norm": 0.6712599562150855, "learning_rate": 1.4922303325223036e-05, "loss": 0.6585, "step": 11252 }, { "epoch": 0.32854515196636597, "grad_norm": 0.5984574941577461, "learning_rate": 1.4921654501216546e-05, "loss": 0.5998, "step": 11253 }, { "epoch": 0.32857434819421333, "grad_norm": 0.6835412295529621, "learning_rate": 1.4921005677210058e-05, "loss": 0.6784, "step": 11254 }, { "epoch": 0.3286035444220607, "grad_norm": 0.6662406154874243, "learning_rate": 1.492035685320357e-05, "loss": 0.6443, "step": 11255 }, { "epoch": 0.32863274064990805, "grad_norm": 0.6867350829505693, "learning_rate": 1.4919708029197082e-05, "loss": 0.6511, "step": 11256 }, { "epoch": 0.3286619368777554, "grad_norm": 0.6217986374275465, "learning_rate": 1.4919059205190594e-05, "loss": 0.6133, "step": 11257 }, { "epoch": 0.3286911331056028, "grad_norm": 0.7169302211942689, "learning_rate": 1.4918410381184105e-05, "loss": 0.7133, "step": 11258 }, { "epoch": 0.32872032933345013, "grad_norm": 0.6337120825486792, "learning_rate": 1.4917761557177617e-05, "loss": 0.6035, "step": 11259 }, { "epoch": 0.3287495255612975, "grad_norm": 0.5907741857536747, "learning_rate": 1.4917112733171129e-05, "loss": 0.5232, "step": 11260 }, { "epoch": 0.32877872178914486, "grad_norm": 0.6501993128062649, "learning_rate": 1.4916463909164641e-05, "loss": 0.6362, "step": 11261 }, { "epoch": 0.3288079180169922, "grad_norm": 0.6840534633345132, "learning_rate": 1.4915815085158151e-05, "loss": 0.6134, "step": 11262 }, { "epoch": 0.3288371142448396, "grad_norm": 0.611144953742495, "learning_rate": 1.4915166261151663e-05, "loss": 0.5652, "step": 11263 }, { "epoch": 0.32886631047268694, "grad_norm": 0.6902864385582181, "learning_rate": 1.4914517437145175e-05, "loss": 0.6635, "step": 11264 }, { "epoch": 0.3288955067005343, "grad_norm": 0.6841248440253966, "learning_rate": 1.4913868613138687e-05, "loss": 0.692, "step": 11265 }, { "epoch": 0.32892470292838166, "grad_norm": 0.6720269090700058, "learning_rate": 1.4913219789132198e-05, "loss": 0.5968, "step": 11266 }, { "epoch": 0.328953899156229, "grad_norm": 0.6561320674260105, "learning_rate": 1.491257096512571e-05, "loss": 0.5797, "step": 11267 }, { "epoch": 0.3289830953840764, "grad_norm": 0.6459233443328831, "learning_rate": 1.4911922141119222e-05, "loss": 0.6281, "step": 11268 }, { "epoch": 0.32901229161192375, "grad_norm": 0.6782701733899603, "learning_rate": 1.4911273317112734e-05, "loss": 0.7483, "step": 11269 }, { "epoch": 0.3290414878397711, "grad_norm": 0.6901885890059295, "learning_rate": 1.4910624493106247e-05, "loss": 0.6417, "step": 11270 }, { "epoch": 0.32907068406761847, "grad_norm": 0.6499127163786732, "learning_rate": 1.490997566909976e-05, "loss": 0.6407, "step": 11271 }, { "epoch": 0.32909988029546583, "grad_norm": 0.7273566130441499, "learning_rate": 1.490932684509327e-05, "loss": 0.7696, "step": 11272 }, { "epoch": 0.3291290765233132, "grad_norm": 0.6984319414271162, "learning_rate": 1.4908678021086782e-05, "loss": 0.6278, "step": 11273 }, { "epoch": 0.32915827275116055, "grad_norm": 0.6425925999725227, "learning_rate": 1.4908029197080294e-05, "loss": 0.6188, "step": 11274 }, { "epoch": 0.3291874689790079, "grad_norm": 0.6207028832471034, "learning_rate": 1.4907380373073806e-05, "loss": 0.6293, "step": 11275 }, { "epoch": 0.3292166652068553, "grad_norm": 0.691756571675889, "learning_rate": 1.4906731549067316e-05, "loss": 0.7284, "step": 11276 }, { "epoch": 0.32924586143470264, "grad_norm": 0.6221415477695695, "learning_rate": 1.4906082725060828e-05, "loss": 0.6032, "step": 11277 }, { "epoch": 0.32927505766255, "grad_norm": 0.633705959830491, "learning_rate": 1.490543390105434e-05, "loss": 0.5567, "step": 11278 }, { "epoch": 0.32930425389039736, "grad_norm": 0.6878072218460054, "learning_rate": 1.4904785077047852e-05, "loss": 0.6805, "step": 11279 }, { "epoch": 0.3293334501182447, "grad_norm": 0.702931316658765, "learning_rate": 1.4904136253041364e-05, "loss": 0.6358, "step": 11280 }, { "epoch": 0.3293626463460921, "grad_norm": 0.7101063441390894, "learning_rate": 1.4903487429034875e-05, "loss": 0.5868, "step": 11281 }, { "epoch": 0.32939184257393944, "grad_norm": 0.7301147127705884, "learning_rate": 1.4902838605028387e-05, "loss": 0.7143, "step": 11282 }, { "epoch": 0.3294210388017868, "grad_norm": 0.618406156115431, "learning_rate": 1.4902189781021899e-05, "loss": 0.5885, "step": 11283 }, { "epoch": 0.32945023502963416, "grad_norm": 0.7856878103639844, "learning_rate": 1.490154095701541e-05, "loss": 0.7601, "step": 11284 }, { "epoch": 0.3294794312574815, "grad_norm": 0.6802677248176443, "learning_rate": 1.4900892133008921e-05, "loss": 0.6139, "step": 11285 }, { "epoch": 0.3295086274853289, "grad_norm": 0.6724850282596919, "learning_rate": 1.4900243309002433e-05, "loss": 0.6403, "step": 11286 }, { "epoch": 0.32953782371317625, "grad_norm": 0.6507394957047564, "learning_rate": 1.4899594484995945e-05, "loss": 0.591, "step": 11287 }, { "epoch": 0.3295670199410236, "grad_norm": 0.6321447821666861, "learning_rate": 1.4898945660989459e-05, "loss": 0.6261, "step": 11288 }, { "epoch": 0.32959621616887097, "grad_norm": 0.7030469109726243, "learning_rate": 1.489829683698297e-05, "loss": 0.6528, "step": 11289 }, { "epoch": 0.32962541239671833, "grad_norm": 0.6690836627996962, "learning_rate": 1.4897648012976483e-05, "loss": 0.6661, "step": 11290 }, { "epoch": 0.3296546086245657, "grad_norm": 0.6491532878978292, "learning_rate": 1.4896999188969993e-05, "loss": 0.5914, "step": 11291 }, { "epoch": 0.32968380485241305, "grad_norm": 0.7109877613428917, "learning_rate": 1.4896350364963505e-05, "loss": 0.6495, "step": 11292 }, { "epoch": 0.3297130010802604, "grad_norm": 0.6995248086763616, "learning_rate": 1.4895701540957017e-05, "loss": 0.7349, "step": 11293 }, { "epoch": 0.3297421973081078, "grad_norm": 0.6860974816768339, "learning_rate": 1.489505271695053e-05, "loss": 0.6712, "step": 11294 }, { "epoch": 0.32977139353595514, "grad_norm": 0.6695405361209761, "learning_rate": 1.489440389294404e-05, "loss": 0.6176, "step": 11295 }, { "epoch": 0.3298005897638025, "grad_norm": 0.6635188618001999, "learning_rate": 1.4893755068937552e-05, "loss": 0.6788, "step": 11296 }, { "epoch": 0.32982978599164986, "grad_norm": 0.6626302584727141, "learning_rate": 1.4893106244931064e-05, "loss": 0.6185, "step": 11297 }, { "epoch": 0.3298589822194972, "grad_norm": 0.6890305449530257, "learning_rate": 1.4892457420924576e-05, "loss": 0.6704, "step": 11298 }, { "epoch": 0.3298881784473446, "grad_norm": 0.7039851493701242, "learning_rate": 1.4891808596918088e-05, "loss": 0.7025, "step": 11299 }, { "epoch": 0.32991737467519194, "grad_norm": 0.643682247702766, "learning_rate": 1.4891159772911598e-05, "loss": 0.6034, "step": 11300 }, { "epoch": 0.3299465709030393, "grad_norm": 0.6623645299860865, "learning_rate": 1.489051094890511e-05, "loss": 0.6332, "step": 11301 }, { "epoch": 0.32997576713088667, "grad_norm": 0.6851400937405013, "learning_rate": 1.4889862124898622e-05, "loss": 0.7037, "step": 11302 }, { "epoch": 0.330004963358734, "grad_norm": 0.651304820043286, "learning_rate": 1.4889213300892134e-05, "loss": 0.5806, "step": 11303 }, { "epoch": 0.3300341595865814, "grad_norm": 0.6158842331975946, "learning_rate": 1.4888564476885644e-05, "loss": 0.5547, "step": 11304 }, { "epoch": 0.33006335581442875, "grad_norm": 0.6550595897992811, "learning_rate": 1.4887915652879156e-05, "loss": 0.6144, "step": 11305 }, { "epoch": 0.3300925520422761, "grad_norm": 0.6060281664141375, "learning_rate": 1.4887266828872669e-05, "loss": 0.5622, "step": 11306 }, { "epoch": 0.3301217482701235, "grad_norm": 0.7061979862775442, "learning_rate": 1.4886618004866182e-05, "loss": 0.7414, "step": 11307 }, { "epoch": 0.3301509444979709, "grad_norm": 0.6549555329073906, "learning_rate": 1.4885969180859694e-05, "loss": 0.6615, "step": 11308 }, { "epoch": 0.33018014072581825, "grad_norm": 0.6568331074283932, "learning_rate": 1.4885320356853206e-05, "loss": 0.6425, "step": 11309 }, { "epoch": 0.3302093369536656, "grad_norm": 0.6951116765642474, "learning_rate": 1.4884671532846717e-05, "loss": 0.7091, "step": 11310 }, { "epoch": 0.33023853318151297, "grad_norm": 0.733969124099132, "learning_rate": 1.4884022708840229e-05, "loss": 0.7467, "step": 11311 }, { "epoch": 0.33026772940936033, "grad_norm": 0.6348332226854523, "learning_rate": 1.488337388483374e-05, "loss": 0.5939, "step": 11312 }, { "epoch": 0.3302969256372077, "grad_norm": 0.6474088878531677, "learning_rate": 1.4882725060827253e-05, "loss": 0.6222, "step": 11313 }, { "epoch": 0.33032612186505506, "grad_norm": 0.6690253009226473, "learning_rate": 1.4882076236820763e-05, "loss": 0.6594, "step": 11314 }, { "epoch": 0.3303553180929024, "grad_norm": 0.6642309986587908, "learning_rate": 1.4881427412814275e-05, "loss": 0.6742, "step": 11315 }, { "epoch": 0.3303845143207498, "grad_norm": 0.6637089084237172, "learning_rate": 1.4880778588807787e-05, "loss": 0.6024, "step": 11316 }, { "epoch": 0.33041371054859714, "grad_norm": 0.6574715665909107, "learning_rate": 1.4880129764801299e-05, "loss": 0.584, "step": 11317 }, { "epoch": 0.3304429067764445, "grad_norm": 0.6477991961623499, "learning_rate": 1.4879480940794811e-05, "loss": 0.5699, "step": 11318 }, { "epoch": 0.33047210300429186, "grad_norm": 0.739512354752369, "learning_rate": 1.4878832116788321e-05, "loss": 0.6919, "step": 11319 }, { "epoch": 0.3305012992321392, "grad_norm": 0.728603447257709, "learning_rate": 1.4878183292781834e-05, "loss": 0.5944, "step": 11320 }, { "epoch": 0.3305304954599866, "grad_norm": 0.684728697058397, "learning_rate": 1.4877534468775346e-05, "loss": 0.6724, "step": 11321 }, { "epoch": 0.33055969168783395, "grad_norm": 0.6418651396661801, "learning_rate": 1.4876885644768858e-05, "loss": 0.6278, "step": 11322 }, { "epoch": 0.3305888879156813, "grad_norm": 0.6691648658810031, "learning_rate": 1.4876236820762368e-05, "loss": 0.6873, "step": 11323 }, { "epoch": 0.33061808414352867, "grad_norm": 0.6430923361949203, "learning_rate": 1.487558799675588e-05, "loss": 0.5675, "step": 11324 }, { "epoch": 0.33064728037137603, "grad_norm": 0.6226241240881379, "learning_rate": 1.4874939172749392e-05, "loss": 0.6058, "step": 11325 }, { "epoch": 0.3306764765992234, "grad_norm": 0.6779465177810224, "learning_rate": 1.4874290348742906e-05, "loss": 0.6671, "step": 11326 }, { "epoch": 0.33070567282707075, "grad_norm": 0.6323037438326944, "learning_rate": 1.4873641524736418e-05, "loss": 0.5985, "step": 11327 }, { "epoch": 0.3307348690549181, "grad_norm": 0.6232494585436602, "learning_rate": 1.487299270072993e-05, "loss": 0.5686, "step": 11328 }, { "epoch": 0.3307640652827655, "grad_norm": 0.6409537813469312, "learning_rate": 1.487234387672344e-05, "loss": 0.6142, "step": 11329 }, { "epoch": 0.33079326151061283, "grad_norm": 0.6857959213981593, "learning_rate": 1.4871695052716952e-05, "loss": 0.6485, "step": 11330 }, { "epoch": 0.3308224577384602, "grad_norm": 0.6833633620436391, "learning_rate": 1.4871046228710464e-05, "loss": 0.6511, "step": 11331 }, { "epoch": 0.33085165396630756, "grad_norm": 0.6273990775700315, "learning_rate": 1.4870397404703976e-05, "loss": 0.6002, "step": 11332 }, { "epoch": 0.3308808501941549, "grad_norm": 0.7252829196766852, "learning_rate": 1.4869748580697486e-05, "loss": 0.6556, "step": 11333 }, { "epoch": 0.3309100464220023, "grad_norm": 0.7273439960896317, "learning_rate": 1.4869099756690999e-05, "loss": 0.6872, "step": 11334 }, { "epoch": 0.33093924264984964, "grad_norm": 0.6653885375449365, "learning_rate": 1.486845093268451e-05, "loss": 0.6091, "step": 11335 }, { "epoch": 0.330968438877697, "grad_norm": 0.6722748441817524, "learning_rate": 1.4867802108678023e-05, "loss": 0.6647, "step": 11336 }, { "epoch": 0.33099763510554436, "grad_norm": 0.6595157688257485, "learning_rate": 1.4867153284671535e-05, "loss": 0.657, "step": 11337 }, { "epoch": 0.3310268313333917, "grad_norm": 0.6542046574213486, "learning_rate": 1.4866504460665045e-05, "loss": 0.6614, "step": 11338 }, { "epoch": 0.3310560275612391, "grad_norm": 0.6671881172921464, "learning_rate": 1.4865855636658557e-05, "loss": 0.639, "step": 11339 }, { "epoch": 0.33108522378908645, "grad_norm": 0.609494445749073, "learning_rate": 1.4865206812652069e-05, "loss": 0.5243, "step": 11340 }, { "epoch": 0.3311144200169338, "grad_norm": 0.7286816268300214, "learning_rate": 1.4864557988645581e-05, "loss": 0.644, "step": 11341 }, { "epoch": 0.33114361624478117, "grad_norm": 0.6791426249107603, "learning_rate": 1.4863909164639091e-05, "loss": 0.64, "step": 11342 }, { "epoch": 0.33117281247262853, "grad_norm": 0.6181691959596556, "learning_rate": 1.4863260340632603e-05, "loss": 0.6245, "step": 11343 }, { "epoch": 0.3312020087004759, "grad_norm": 0.5942677507535454, "learning_rate": 1.4862611516626115e-05, "loss": 0.6075, "step": 11344 }, { "epoch": 0.33123120492832325, "grad_norm": 0.6385130348020793, "learning_rate": 1.4861962692619629e-05, "loss": 0.6543, "step": 11345 }, { "epoch": 0.3312604011561706, "grad_norm": 0.6576317650180259, "learning_rate": 1.4861313868613141e-05, "loss": 0.6565, "step": 11346 }, { "epoch": 0.331289597384018, "grad_norm": 0.6449541111225332, "learning_rate": 1.4860665044606653e-05, "loss": 0.6326, "step": 11347 }, { "epoch": 0.33131879361186534, "grad_norm": 0.6329976170463655, "learning_rate": 1.4860016220600163e-05, "loss": 0.621, "step": 11348 }, { "epoch": 0.3313479898397127, "grad_norm": 0.6595380944798068, "learning_rate": 1.4859367396593676e-05, "loss": 0.6289, "step": 11349 }, { "epoch": 0.33137718606756006, "grad_norm": 0.6561413739178333, "learning_rate": 1.4858718572587188e-05, "loss": 0.6294, "step": 11350 }, { "epoch": 0.3314063822954074, "grad_norm": 0.7177742523989853, "learning_rate": 1.48580697485807e-05, "loss": 0.6913, "step": 11351 }, { "epoch": 0.3314355785232548, "grad_norm": 0.6491574603583998, "learning_rate": 1.485742092457421e-05, "loss": 0.6297, "step": 11352 }, { "epoch": 0.33146477475110214, "grad_norm": 0.6816297738142024, "learning_rate": 1.4856772100567722e-05, "loss": 0.6729, "step": 11353 }, { "epoch": 0.3314939709789495, "grad_norm": 0.6147949701752756, "learning_rate": 1.4856123276561234e-05, "loss": 0.5586, "step": 11354 }, { "epoch": 0.33152316720679686, "grad_norm": 0.5952035514653424, "learning_rate": 1.4855474452554746e-05, "loss": 0.5639, "step": 11355 }, { "epoch": 0.3315523634346442, "grad_norm": 0.6583110915164345, "learning_rate": 1.4854825628548258e-05, "loss": 0.6936, "step": 11356 }, { "epoch": 0.3315815596624916, "grad_norm": 0.7745212133251934, "learning_rate": 1.4854176804541768e-05, "loss": 0.6967, "step": 11357 }, { "epoch": 0.33161075589033895, "grad_norm": 0.7408437245552238, "learning_rate": 1.485352798053528e-05, "loss": 0.7173, "step": 11358 }, { "epoch": 0.3316399521181863, "grad_norm": 0.6986610953496238, "learning_rate": 1.4852879156528792e-05, "loss": 0.7349, "step": 11359 }, { "epoch": 0.33166914834603367, "grad_norm": 0.6829794818965468, "learning_rate": 1.4852230332522304e-05, "loss": 0.5997, "step": 11360 }, { "epoch": 0.33169834457388103, "grad_norm": 0.6554724969742275, "learning_rate": 1.4851581508515815e-05, "loss": 0.6401, "step": 11361 }, { "epoch": 0.3317275408017284, "grad_norm": 0.6633015243513022, "learning_rate": 1.4850932684509327e-05, "loss": 0.6616, "step": 11362 }, { "epoch": 0.33175673702957575, "grad_norm": 0.6575769820745002, "learning_rate": 1.4850283860502839e-05, "loss": 0.5705, "step": 11363 }, { "epoch": 0.3317859332574231, "grad_norm": 0.6789485095644778, "learning_rate": 1.4849635036496353e-05, "loss": 0.6313, "step": 11364 }, { "epoch": 0.3318151294852705, "grad_norm": 0.669874580533031, "learning_rate": 1.4848986212489865e-05, "loss": 0.67, "step": 11365 }, { "epoch": 0.33184432571311784, "grad_norm": 0.6918532789694776, "learning_rate": 1.4848337388483377e-05, "loss": 0.6861, "step": 11366 }, { "epoch": 0.33187352194096525, "grad_norm": 0.6794043006148258, "learning_rate": 1.4847688564476887e-05, "loss": 0.6819, "step": 11367 }, { "epoch": 0.3319027181688126, "grad_norm": 0.7205531087367604, "learning_rate": 1.4847039740470399e-05, "loss": 0.7278, "step": 11368 }, { "epoch": 0.33193191439666, "grad_norm": 0.6925161355017787, "learning_rate": 1.4846390916463911e-05, "loss": 0.7171, "step": 11369 }, { "epoch": 0.33196111062450734, "grad_norm": 0.6073664393834493, "learning_rate": 1.4845742092457423e-05, "loss": 0.5621, "step": 11370 }, { "epoch": 0.3319903068523547, "grad_norm": 0.7158760872938522, "learning_rate": 1.4845093268450933e-05, "loss": 0.6611, "step": 11371 }, { "epoch": 0.33201950308020206, "grad_norm": 0.6602346703875218, "learning_rate": 1.4844444444444445e-05, "loss": 0.6217, "step": 11372 }, { "epoch": 0.3320486993080494, "grad_norm": 0.6437037191011454, "learning_rate": 1.4843795620437957e-05, "loss": 0.577, "step": 11373 }, { "epoch": 0.3320778955358968, "grad_norm": 0.7489193427547187, "learning_rate": 1.484314679643147e-05, "loss": 0.6667, "step": 11374 }, { "epoch": 0.33210709176374414, "grad_norm": 0.7435791068696663, "learning_rate": 1.4842497972424981e-05, "loss": 0.7039, "step": 11375 }, { "epoch": 0.3321362879915915, "grad_norm": 0.6766879685336467, "learning_rate": 1.4841849148418492e-05, "loss": 0.6723, "step": 11376 }, { "epoch": 0.33216548421943887, "grad_norm": 0.6360246426775352, "learning_rate": 1.4841200324412004e-05, "loss": 0.5817, "step": 11377 }, { "epoch": 0.33219468044728623, "grad_norm": 0.8020993803659765, "learning_rate": 1.4840551500405516e-05, "loss": 0.743, "step": 11378 }, { "epoch": 0.3322238766751336, "grad_norm": 0.6309399124593075, "learning_rate": 1.4839902676399028e-05, "loss": 0.5881, "step": 11379 }, { "epoch": 0.33225307290298095, "grad_norm": 0.6522026331863698, "learning_rate": 1.4839253852392538e-05, "loss": 0.5865, "step": 11380 }, { "epoch": 0.3322822691308283, "grad_norm": 0.6350521142666841, "learning_rate": 1.483860502838605e-05, "loss": 0.6176, "step": 11381 }, { "epoch": 0.3323114653586757, "grad_norm": 0.6884608813590422, "learning_rate": 1.4837956204379562e-05, "loss": 0.648, "step": 11382 }, { "epoch": 0.33234066158652303, "grad_norm": 0.7374032353909391, "learning_rate": 1.4837307380373076e-05, "loss": 0.7063, "step": 11383 }, { "epoch": 0.3323698578143704, "grad_norm": 0.7201513030903645, "learning_rate": 1.4836658556366588e-05, "loss": 0.7305, "step": 11384 }, { "epoch": 0.33239905404221776, "grad_norm": 0.6354164460662534, "learning_rate": 1.48360097323601e-05, "loss": 0.5825, "step": 11385 }, { "epoch": 0.3324282502700651, "grad_norm": 0.6563057781764526, "learning_rate": 1.483536090835361e-05, "loss": 0.6565, "step": 11386 }, { "epoch": 0.3324574464979125, "grad_norm": 0.6293815089197461, "learning_rate": 1.4834712084347122e-05, "loss": 0.5719, "step": 11387 }, { "epoch": 0.33248664272575984, "grad_norm": 0.670002255919765, "learning_rate": 1.4834063260340634e-05, "loss": 0.6609, "step": 11388 }, { "epoch": 0.3325158389536072, "grad_norm": 0.6875004659914219, "learning_rate": 1.4833414436334146e-05, "loss": 0.6605, "step": 11389 }, { "epoch": 0.33254503518145456, "grad_norm": 0.9021546092055968, "learning_rate": 1.4832765612327657e-05, "loss": 0.6129, "step": 11390 }, { "epoch": 0.3325742314093019, "grad_norm": 0.6948215539151569, "learning_rate": 1.4832116788321169e-05, "loss": 0.6321, "step": 11391 }, { "epoch": 0.3326034276371493, "grad_norm": 0.6047768710539106, "learning_rate": 1.483146796431468e-05, "loss": 0.5484, "step": 11392 }, { "epoch": 0.33263262386499665, "grad_norm": 0.6678531687972771, "learning_rate": 1.4830819140308193e-05, "loss": 0.6113, "step": 11393 }, { "epoch": 0.332661820092844, "grad_norm": 0.6431754240494433, "learning_rate": 1.4830170316301705e-05, "loss": 0.5976, "step": 11394 }, { "epoch": 0.33269101632069137, "grad_norm": 0.6088112711812194, "learning_rate": 1.4829521492295215e-05, "loss": 0.5633, "step": 11395 }, { "epoch": 0.33272021254853873, "grad_norm": 0.6553878304046886, "learning_rate": 1.4828872668288727e-05, "loss": 0.6298, "step": 11396 }, { "epoch": 0.3327494087763861, "grad_norm": 0.7058922146292125, "learning_rate": 1.482822384428224e-05, "loss": 0.651, "step": 11397 }, { "epoch": 0.33277860500423345, "grad_norm": 0.6648709016724634, "learning_rate": 1.4827575020275751e-05, "loss": 0.6848, "step": 11398 }, { "epoch": 0.3328078012320808, "grad_norm": 0.7042657817723373, "learning_rate": 1.4826926196269262e-05, "loss": 0.6628, "step": 11399 }, { "epoch": 0.3328369974599282, "grad_norm": 0.6803803312728761, "learning_rate": 1.4826277372262774e-05, "loss": 0.6167, "step": 11400 }, { "epoch": 0.33286619368777554, "grad_norm": 0.704685321274272, "learning_rate": 1.4825628548256286e-05, "loss": 0.7358, "step": 11401 }, { "epoch": 0.3328953899156229, "grad_norm": 0.616210696472257, "learning_rate": 1.48249797242498e-05, "loss": 0.5852, "step": 11402 }, { "epoch": 0.33292458614347026, "grad_norm": 0.6653287209830284, "learning_rate": 1.4824330900243311e-05, "loss": 0.6444, "step": 11403 }, { "epoch": 0.3329537823713176, "grad_norm": 0.6202739021271663, "learning_rate": 1.4823682076236823e-05, "loss": 0.5655, "step": 11404 }, { "epoch": 0.332982978599165, "grad_norm": 0.6347198832008242, "learning_rate": 1.4823033252230334e-05, "loss": 0.6028, "step": 11405 }, { "epoch": 0.33301217482701234, "grad_norm": 0.6688281174493483, "learning_rate": 1.4822384428223846e-05, "loss": 0.6476, "step": 11406 }, { "epoch": 0.3330413710548597, "grad_norm": 0.6410650411645458, "learning_rate": 1.4821735604217358e-05, "loss": 0.6334, "step": 11407 }, { "epoch": 0.33307056728270706, "grad_norm": 0.7024400572293072, "learning_rate": 1.482108678021087e-05, "loss": 0.7313, "step": 11408 }, { "epoch": 0.3330997635105544, "grad_norm": 0.6892558069822822, "learning_rate": 1.482043795620438e-05, "loss": 0.6828, "step": 11409 }, { "epoch": 0.3331289597384018, "grad_norm": 0.6811419203955791, "learning_rate": 1.4819789132197892e-05, "loss": 0.6672, "step": 11410 }, { "epoch": 0.33315815596624915, "grad_norm": 0.6472083719581015, "learning_rate": 1.4819140308191404e-05, "loss": 0.5806, "step": 11411 }, { "epoch": 0.3331873521940965, "grad_norm": 0.6572388707149468, "learning_rate": 1.4818491484184916e-05, "loss": 0.6746, "step": 11412 }, { "epoch": 0.33321654842194387, "grad_norm": 0.673068872205504, "learning_rate": 1.4817842660178428e-05, "loss": 0.6737, "step": 11413 }, { "epoch": 0.33324574464979123, "grad_norm": 0.6647724620632807, "learning_rate": 1.4817193836171939e-05, "loss": 0.6297, "step": 11414 }, { "epoch": 0.3332749408776386, "grad_norm": 0.7755469343084207, "learning_rate": 1.481654501216545e-05, "loss": 0.615, "step": 11415 }, { "epoch": 0.33330413710548595, "grad_norm": 0.6640395219866304, "learning_rate": 1.4815896188158963e-05, "loss": 0.6389, "step": 11416 }, { "epoch": 0.3333333333333333, "grad_norm": 0.6498101059517389, "learning_rate": 1.4815247364152475e-05, "loss": 0.6315, "step": 11417 }, { "epoch": 0.3333625295611807, "grad_norm": 0.7014967121531994, "learning_rate": 1.4814598540145985e-05, "loss": 0.6901, "step": 11418 }, { "epoch": 0.33339172578902804, "grad_norm": 0.6282951903002668, "learning_rate": 1.4813949716139497e-05, "loss": 0.5766, "step": 11419 }, { "epoch": 0.3334209220168754, "grad_norm": 0.6920200729069304, "learning_rate": 1.4813300892133009e-05, "loss": 0.6578, "step": 11420 }, { "epoch": 0.33345011824472276, "grad_norm": 0.7311016793483419, "learning_rate": 1.4812652068126523e-05, "loss": 0.665, "step": 11421 }, { "epoch": 0.3334793144725701, "grad_norm": 0.5946267394933571, "learning_rate": 1.4812003244120035e-05, "loss": 0.5401, "step": 11422 }, { "epoch": 0.3335085107004175, "grad_norm": 0.6796789693962969, "learning_rate": 1.4811354420113547e-05, "loss": 0.6331, "step": 11423 }, { "epoch": 0.33353770692826484, "grad_norm": 0.6476547224579157, "learning_rate": 1.4810705596107057e-05, "loss": 0.5864, "step": 11424 }, { "epoch": 0.3335669031561122, "grad_norm": 0.6798016275604609, "learning_rate": 1.481005677210057e-05, "loss": 0.6488, "step": 11425 }, { "epoch": 0.33359609938395957, "grad_norm": 0.6767619085367138, "learning_rate": 1.4809407948094081e-05, "loss": 0.6782, "step": 11426 }, { "epoch": 0.3336252956118069, "grad_norm": 0.710294329007663, "learning_rate": 1.4808759124087593e-05, "loss": 0.6579, "step": 11427 }, { "epoch": 0.33365449183965434, "grad_norm": 0.6894986754704139, "learning_rate": 1.4808110300081104e-05, "loss": 0.6726, "step": 11428 }, { "epoch": 0.3336836880675017, "grad_norm": 0.7316468868385468, "learning_rate": 1.4807461476074616e-05, "loss": 0.7232, "step": 11429 }, { "epoch": 0.33371288429534907, "grad_norm": 0.6150435861174178, "learning_rate": 1.4806812652068128e-05, "loss": 0.5847, "step": 11430 }, { "epoch": 0.3337420805231964, "grad_norm": 0.7481802518622993, "learning_rate": 1.480616382806164e-05, "loss": 0.7464, "step": 11431 }, { "epoch": 0.3337712767510438, "grad_norm": 0.6228627723792789, "learning_rate": 1.4805515004055152e-05, "loss": 0.5937, "step": 11432 }, { "epoch": 0.33380047297889115, "grad_norm": 0.6705330009391925, "learning_rate": 1.4804866180048662e-05, "loss": 0.7096, "step": 11433 }, { "epoch": 0.3338296692067385, "grad_norm": 0.6298226670594347, "learning_rate": 1.4804217356042174e-05, "loss": 0.5954, "step": 11434 }, { "epoch": 0.33385886543458587, "grad_norm": 0.6042916678856669, "learning_rate": 1.4803568532035686e-05, "loss": 0.5532, "step": 11435 }, { "epoch": 0.33388806166243323, "grad_norm": 0.626309623757632, "learning_rate": 1.4802919708029198e-05, "loss": 0.604, "step": 11436 }, { "epoch": 0.3339172578902806, "grad_norm": 0.6785788939414602, "learning_rate": 1.4802270884022709e-05, "loss": 0.6625, "step": 11437 }, { "epoch": 0.33394645411812796, "grad_norm": 0.6330317081327722, "learning_rate": 1.480162206001622e-05, "loss": 0.6202, "step": 11438 }, { "epoch": 0.3339756503459753, "grad_norm": 0.721425587997687, "learning_rate": 1.4800973236009734e-05, "loss": 0.6784, "step": 11439 }, { "epoch": 0.3340048465738227, "grad_norm": 0.6856533393414378, "learning_rate": 1.4800324412003246e-05, "loss": 0.7056, "step": 11440 }, { "epoch": 0.33403404280167004, "grad_norm": 0.6612858368100263, "learning_rate": 1.4799675587996758e-05, "loss": 0.6624, "step": 11441 }, { "epoch": 0.3340632390295174, "grad_norm": 0.5760902455569353, "learning_rate": 1.479902676399027e-05, "loss": 0.4853, "step": 11442 }, { "epoch": 0.33409243525736476, "grad_norm": 0.6640586870523791, "learning_rate": 1.479837793998378e-05, "loss": 0.6645, "step": 11443 }, { "epoch": 0.3341216314852121, "grad_norm": 0.652326035561504, "learning_rate": 1.4797729115977293e-05, "loss": 0.6266, "step": 11444 }, { "epoch": 0.3341508277130595, "grad_norm": 0.7106296038420292, "learning_rate": 1.4797080291970805e-05, "loss": 0.6511, "step": 11445 }, { "epoch": 0.33418002394090685, "grad_norm": 0.6474222644720058, "learning_rate": 1.4796431467964317e-05, "loss": 0.6328, "step": 11446 }, { "epoch": 0.3342092201687542, "grad_norm": 0.6157475761219757, "learning_rate": 1.4795782643957827e-05, "loss": 0.5559, "step": 11447 }, { "epoch": 0.33423841639660157, "grad_norm": 0.6738624179088815, "learning_rate": 1.4795133819951339e-05, "loss": 0.6276, "step": 11448 }, { "epoch": 0.33426761262444893, "grad_norm": 0.669972499768883, "learning_rate": 1.4794484995944851e-05, "loss": 0.649, "step": 11449 }, { "epoch": 0.3342968088522963, "grad_norm": 0.726789453553942, "learning_rate": 1.4793836171938363e-05, "loss": 0.7014, "step": 11450 }, { "epoch": 0.33432600508014365, "grad_norm": 0.701566379426171, "learning_rate": 1.4793187347931875e-05, "loss": 0.7398, "step": 11451 }, { "epoch": 0.334355201307991, "grad_norm": 0.641766953699509, "learning_rate": 1.4792538523925386e-05, "loss": 0.6138, "step": 11452 }, { "epoch": 0.3343843975358384, "grad_norm": 0.6472168563632087, "learning_rate": 1.4791889699918898e-05, "loss": 0.6298, "step": 11453 }, { "epoch": 0.33441359376368573, "grad_norm": 0.6366233435987425, "learning_rate": 1.479124087591241e-05, "loss": 0.6116, "step": 11454 }, { "epoch": 0.3344427899915331, "grad_norm": 0.6526802387590152, "learning_rate": 1.4790592051905922e-05, "loss": 0.634, "step": 11455 }, { "epoch": 0.33447198621938046, "grad_norm": 0.6307230579681828, "learning_rate": 1.4789943227899432e-05, "loss": 0.5816, "step": 11456 }, { "epoch": 0.3345011824472278, "grad_norm": 0.6746788684882077, "learning_rate": 1.4789294403892944e-05, "loss": 0.6513, "step": 11457 }, { "epoch": 0.3345303786750752, "grad_norm": 0.6952926103329077, "learning_rate": 1.4788645579886458e-05, "loss": 0.6404, "step": 11458 }, { "epoch": 0.33455957490292254, "grad_norm": 0.6194923738630803, "learning_rate": 1.478799675587997e-05, "loss": 0.5543, "step": 11459 }, { "epoch": 0.3345887711307699, "grad_norm": 0.6092168687850235, "learning_rate": 1.4787347931873482e-05, "loss": 0.5431, "step": 11460 }, { "epoch": 0.33461796735861726, "grad_norm": 0.6580462439368574, "learning_rate": 1.4786699107866994e-05, "loss": 0.6198, "step": 11461 }, { "epoch": 0.3346471635864646, "grad_norm": 0.7061077092409792, "learning_rate": 1.4786050283860504e-05, "loss": 0.7254, "step": 11462 }, { "epoch": 0.334676359814312, "grad_norm": 0.656019344004952, "learning_rate": 1.4785401459854016e-05, "loss": 0.6228, "step": 11463 }, { "epoch": 0.33470555604215935, "grad_norm": 0.6593390108626708, "learning_rate": 1.4784752635847528e-05, "loss": 0.6022, "step": 11464 }, { "epoch": 0.3347347522700067, "grad_norm": 0.699557411328409, "learning_rate": 1.478410381184104e-05, "loss": 0.6401, "step": 11465 }, { "epoch": 0.33476394849785407, "grad_norm": 0.6255832036182221, "learning_rate": 1.478345498783455e-05, "loss": 0.5676, "step": 11466 }, { "epoch": 0.33479314472570143, "grad_norm": 0.6565845868123542, "learning_rate": 1.4782806163828063e-05, "loss": 0.6393, "step": 11467 }, { "epoch": 0.3348223409535488, "grad_norm": 0.6417820020718811, "learning_rate": 1.4782157339821575e-05, "loss": 0.5818, "step": 11468 }, { "epoch": 0.33485153718139615, "grad_norm": 0.6509567248372369, "learning_rate": 1.4781508515815087e-05, "loss": 0.6141, "step": 11469 }, { "epoch": 0.3348807334092435, "grad_norm": 0.6115113460119571, "learning_rate": 1.4780859691808599e-05, "loss": 0.5018, "step": 11470 }, { "epoch": 0.3349099296370909, "grad_norm": 0.7780202393633014, "learning_rate": 1.4780210867802109e-05, "loss": 0.6351, "step": 11471 }, { "epoch": 0.33493912586493824, "grad_norm": 0.6389549123455851, "learning_rate": 1.4779562043795621e-05, "loss": 0.6208, "step": 11472 }, { "epoch": 0.3349683220927856, "grad_norm": 0.6239421672085834, "learning_rate": 1.4778913219789133e-05, "loss": 0.5931, "step": 11473 }, { "epoch": 0.33499751832063296, "grad_norm": 0.6576947057110641, "learning_rate": 1.4778264395782645e-05, "loss": 0.6293, "step": 11474 }, { "epoch": 0.3350267145484803, "grad_norm": 0.8007388885170545, "learning_rate": 1.4777615571776155e-05, "loss": 0.7627, "step": 11475 }, { "epoch": 0.3350559107763277, "grad_norm": 0.7100334972962542, "learning_rate": 1.4776966747769667e-05, "loss": 0.7038, "step": 11476 }, { "epoch": 0.33508510700417504, "grad_norm": 0.6482405565270614, "learning_rate": 1.4776317923763181e-05, "loss": 0.597, "step": 11477 }, { "epoch": 0.3351143032320224, "grad_norm": 0.6534454724069372, "learning_rate": 1.4775669099756693e-05, "loss": 0.633, "step": 11478 }, { "epoch": 0.33514349945986976, "grad_norm": 0.6767309885094339, "learning_rate": 1.4775020275750205e-05, "loss": 0.6787, "step": 11479 }, { "epoch": 0.3351726956877171, "grad_norm": 0.6649724256055147, "learning_rate": 1.4774371451743717e-05, "loss": 0.649, "step": 11480 }, { "epoch": 0.3352018919155645, "grad_norm": 0.6507909489440649, "learning_rate": 1.4773722627737228e-05, "loss": 0.6293, "step": 11481 }, { "epoch": 0.33523108814341185, "grad_norm": 0.6474168740370763, "learning_rate": 1.477307380373074e-05, "loss": 0.5886, "step": 11482 }, { "epoch": 0.3352602843712592, "grad_norm": 0.7123988736320912, "learning_rate": 1.4772424979724252e-05, "loss": 0.7, "step": 11483 }, { "epoch": 0.33528948059910657, "grad_norm": 0.6764233060318778, "learning_rate": 1.4771776155717764e-05, "loss": 0.6708, "step": 11484 }, { "epoch": 0.33531867682695393, "grad_norm": 0.6844517384664072, "learning_rate": 1.4771127331711274e-05, "loss": 0.6169, "step": 11485 }, { "epoch": 0.3353478730548013, "grad_norm": 0.6730690084563382, "learning_rate": 1.4770478507704786e-05, "loss": 0.6201, "step": 11486 }, { "epoch": 0.33537706928264865, "grad_norm": 0.6144736432753778, "learning_rate": 1.4769829683698298e-05, "loss": 0.6118, "step": 11487 }, { "epoch": 0.33540626551049607, "grad_norm": 0.6560951303261594, "learning_rate": 1.476918085969181e-05, "loss": 0.6202, "step": 11488 }, { "epoch": 0.33543546173834343, "grad_norm": 0.7736053417513408, "learning_rate": 1.4768532035685322e-05, "loss": 0.6283, "step": 11489 }, { "epoch": 0.3354646579661908, "grad_norm": 0.6353301429274901, "learning_rate": 1.4767883211678832e-05, "loss": 0.6064, "step": 11490 }, { "epoch": 0.33549385419403815, "grad_norm": 0.6098164034609899, "learning_rate": 1.4767234387672344e-05, "loss": 0.5607, "step": 11491 }, { "epoch": 0.3355230504218855, "grad_norm": 0.6805311050484371, "learning_rate": 1.4766585563665856e-05, "loss": 0.6219, "step": 11492 }, { "epoch": 0.3355522466497329, "grad_norm": 0.628361663810313, "learning_rate": 1.4765936739659368e-05, "loss": 0.615, "step": 11493 }, { "epoch": 0.33558144287758024, "grad_norm": 0.6413166381559711, "learning_rate": 1.4765287915652879e-05, "loss": 0.6259, "step": 11494 }, { "epoch": 0.3356106391054276, "grad_norm": 0.6088099729709621, "learning_rate": 1.476463909164639e-05, "loss": 0.5582, "step": 11495 }, { "epoch": 0.33563983533327496, "grad_norm": 0.6540742168849416, "learning_rate": 1.4763990267639905e-05, "loss": 0.6473, "step": 11496 }, { "epoch": 0.3356690315611223, "grad_norm": 0.6723339008017936, "learning_rate": 1.4763341443633417e-05, "loss": 0.6632, "step": 11497 }, { "epoch": 0.3356982277889697, "grad_norm": 0.6929556096411869, "learning_rate": 1.4762692619626929e-05, "loss": 0.7288, "step": 11498 }, { "epoch": 0.33572742401681704, "grad_norm": 0.7104814257371367, "learning_rate": 1.476204379562044e-05, "loss": 0.6973, "step": 11499 }, { "epoch": 0.3357566202446644, "grad_norm": 0.6587110615846836, "learning_rate": 1.4761394971613951e-05, "loss": 0.6395, "step": 11500 }, { "epoch": 0.33578581647251177, "grad_norm": 0.6511402723891162, "learning_rate": 1.4760746147607463e-05, "loss": 0.6658, "step": 11501 }, { "epoch": 0.33581501270035913, "grad_norm": 0.6579988522478777, "learning_rate": 1.4760097323600975e-05, "loss": 0.6242, "step": 11502 }, { "epoch": 0.3358442089282065, "grad_norm": 0.6311444730951256, "learning_rate": 1.4759448499594487e-05, "loss": 0.6005, "step": 11503 }, { "epoch": 0.33587340515605385, "grad_norm": 0.7677733162240798, "learning_rate": 1.4758799675587997e-05, "loss": 0.6546, "step": 11504 }, { "epoch": 0.3359026013839012, "grad_norm": 0.6281748174118434, "learning_rate": 1.475815085158151e-05, "loss": 0.5819, "step": 11505 }, { "epoch": 0.3359317976117486, "grad_norm": 0.5910315503379457, "learning_rate": 1.4757502027575021e-05, "loss": 0.537, "step": 11506 }, { "epoch": 0.33596099383959593, "grad_norm": 0.6043803589196961, "learning_rate": 1.4756853203568533e-05, "loss": 0.5596, "step": 11507 }, { "epoch": 0.3359901900674433, "grad_norm": 0.6738103101341463, "learning_rate": 1.4756204379562045e-05, "loss": 0.6926, "step": 11508 }, { "epoch": 0.33601938629529066, "grad_norm": 0.6139169889479482, "learning_rate": 1.4755555555555556e-05, "loss": 0.5877, "step": 11509 }, { "epoch": 0.336048582523138, "grad_norm": 0.6394093469651867, "learning_rate": 1.4754906731549068e-05, "loss": 0.69, "step": 11510 }, { "epoch": 0.3360777787509854, "grad_norm": 0.6218779068342224, "learning_rate": 1.475425790754258e-05, "loss": 0.5911, "step": 11511 }, { "epoch": 0.33610697497883274, "grad_norm": 0.6945707370575503, "learning_rate": 1.4753609083536092e-05, "loss": 0.7185, "step": 11512 }, { "epoch": 0.3361361712066801, "grad_norm": 0.5774272957458133, "learning_rate": 1.4752960259529602e-05, "loss": 0.5159, "step": 11513 }, { "epoch": 0.33616536743452746, "grad_norm": 0.6602465937715959, "learning_rate": 1.4752311435523114e-05, "loss": 0.6543, "step": 11514 }, { "epoch": 0.3361945636623748, "grad_norm": 0.6576287022665017, "learning_rate": 1.4751662611516628e-05, "loss": 0.6107, "step": 11515 }, { "epoch": 0.3362237598902222, "grad_norm": 0.7024300369812367, "learning_rate": 1.475101378751014e-05, "loss": 0.6599, "step": 11516 }, { "epoch": 0.33625295611806955, "grad_norm": 0.6409433544851902, "learning_rate": 1.4750364963503652e-05, "loss": 0.648, "step": 11517 }, { "epoch": 0.3362821523459169, "grad_norm": 0.6322124674289974, "learning_rate": 1.4749716139497164e-05, "loss": 0.5803, "step": 11518 }, { "epoch": 0.33631134857376427, "grad_norm": 0.7591897339929218, "learning_rate": 1.4749067315490674e-05, "loss": 0.7219, "step": 11519 }, { "epoch": 0.33634054480161163, "grad_norm": 0.6422664357102201, "learning_rate": 1.4748418491484186e-05, "loss": 0.5955, "step": 11520 }, { "epoch": 0.336369741029459, "grad_norm": 1.2589505567357957, "learning_rate": 1.4747769667477698e-05, "loss": 0.5876, "step": 11521 }, { "epoch": 0.33639893725730635, "grad_norm": 0.665090479030631, "learning_rate": 1.474712084347121e-05, "loss": 0.6507, "step": 11522 }, { "epoch": 0.3364281334851537, "grad_norm": 0.681063785630846, "learning_rate": 1.474647201946472e-05, "loss": 0.732, "step": 11523 }, { "epoch": 0.3364573297130011, "grad_norm": 0.6431915671617786, "learning_rate": 1.4745823195458233e-05, "loss": 0.5834, "step": 11524 }, { "epoch": 0.33648652594084844, "grad_norm": 0.6708899117709668, "learning_rate": 1.4745174371451745e-05, "loss": 0.6646, "step": 11525 }, { "epoch": 0.3365157221686958, "grad_norm": 0.6738325310480134, "learning_rate": 1.4744525547445257e-05, "loss": 0.6123, "step": 11526 }, { "epoch": 0.33654491839654316, "grad_norm": 0.6700857226734567, "learning_rate": 1.4743876723438767e-05, "loss": 0.6718, "step": 11527 }, { "epoch": 0.3365741146243905, "grad_norm": 0.6423655871152706, "learning_rate": 1.474322789943228e-05, "loss": 0.6231, "step": 11528 }, { "epoch": 0.3366033108522379, "grad_norm": 0.6260783509251067, "learning_rate": 1.4742579075425791e-05, "loss": 0.5854, "step": 11529 }, { "epoch": 0.33663250708008524, "grad_norm": 0.6500172609101925, "learning_rate": 1.4741930251419303e-05, "loss": 0.615, "step": 11530 }, { "epoch": 0.3366617033079326, "grad_norm": 0.6268896968789383, "learning_rate": 1.4741281427412815e-05, "loss": 0.6019, "step": 11531 }, { "epoch": 0.33669089953577996, "grad_norm": 0.6570994375190614, "learning_rate": 1.4740632603406326e-05, "loss": 0.6073, "step": 11532 }, { "epoch": 0.3367200957636273, "grad_norm": 0.6150638338424065, "learning_rate": 1.4739983779399838e-05, "loss": 0.5565, "step": 11533 }, { "epoch": 0.3367492919914747, "grad_norm": 0.762932498145437, "learning_rate": 1.4739334955393351e-05, "loss": 0.6523, "step": 11534 }, { "epoch": 0.33677848821932205, "grad_norm": 0.6975424897045506, "learning_rate": 1.4738686131386863e-05, "loss": 0.6468, "step": 11535 }, { "epoch": 0.3368076844471694, "grad_norm": 0.6725451271995331, "learning_rate": 1.4738037307380375e-05, "loss": 0.6518, "step": 11536 }, { "epoch": 0.33683688067501677, "grad_norm": 0.6915711977481701, "learning_rate": 1.4737388483373888e-05, "loss": 0.6716, "step": 11537 }, { "epoch": 0.33686607690286413, "grad_norm": 0.668361514689177, "learning_rate": 1.4736739659367398e-05, "loss": 0.68, "step": 11538 }, { "epoch": 0.3368952731307115, "grad_norm": 0.6332467759412371, "learning_rate": 1.473609083536091e-05, "loss": 0.6053, "step": 11539 }, { "epoch": 0.33692446935855885, "grad_norm": 0.6555308827480006, "learning_rate": 1.4735442011354422e-05, "loss": 0.6574, "step": 11540 }, { "epoch": 0.3369536655864062, "grad_norm": 0.6566690932545004, "learning_rate": 1.4734793187347934e-05, "loss": 0.6085, "step": 11541 }, { "epoch": 0.3369828618142536, "grad_norm": 0.6312311541634944, "learning_rate": 1.4734144363341444e-05, "loss": 0.6042, "step": 11542 }, { "epoch": 0.33701205804210094, "grad_norm": 0.6469238123541131, "learning_rate": 1.4733495539334956e-05, "loss": 0.622, "step": 11543 }, { "epoch": 0.3370412542699483, "grad_norm": 0.6091418114261306, "learning_rate": 1.4732846715328468e-05, "loss": 0.5828, "step": 11544 }, { "epoch": 0.33707045049779566, "grad_norm": 0.6488266548110061, "learning_rate": 1.473219789132198e-05, "loss": 0.5972, "step": 11545 }, { "epoch": 0.337099646725643, "grad_norm": 0.6169456602364947, "learning_rate": 1.473154906731549e-05, "loss": 0.5673, "step": 11546 }, { "epoch": 0.3371288429534904, "grad_norm": 0.6563747860944557, "learning_rate": 1.4730900243309003e-05, "loss": 0.6097, "step": 11547 }, { "epoch": 0.3371580391813378, "grad_norm": 0.6540208895483646, "learning_rate": 1.4730251419302515e-05, "loss": 0.6119, "step": 11548 }, { "epoch": 0.33718723540918516, "grad_norm": 0.613779985890173, "learning_rate": 1.4729602595296027e-05, "loss": 0.5718, "step": 11549 }, { "epoch": 0.3372164316370325, "grad_norm": 0.637144211643795, "learning_rate": 1.4728953771289539e-05, "loss": 0.6307, "step": 11550 }, { "epoch": 0.3372456278648799, "grad_norm": 0.6285155210010821, "learning_rate": 1.4728304947283049e-05, "loss": 0.5758, "step": 11551 }, { "epoch": 0.33727482409272724, "grad_norm": 0.654798485390204, "learning_rate": 1.4727656123276561e-05, "loss": 0.6395, "step": 11552 }, { "epoch": 0.3373040203205746, "grad_norm": 0.6640847790215115, "learning_rate": 1.4727007299270075e-05, "loss": 0.6706, "step": 11553 }, { "epoch": 0.33733321654842197, "grad_norm": 0.6933313639152298, "learning_rate": 1.4726358475263587e-05, "loss": 0.6344, "step": 11554 }, { "epoch": 0.3373624127762693, "grad_norm": 0.6252888743743642, "learning_rate": 1.4725709651257099e-05, "loss": 0.5887, "step": 11555 }, { "epoch": 0.3373916090041167, "grad_norm": 0.6645484333820971, "learning_rate": 1.4725060827250611e-05, "loss": 0.6303, "step": 11556 }, { "epoch": 0.33742080523196405, "grad_norm": 0.6749454551939142, "learning_rate": 1.4724412003244121e-05, "loss": 0.703, "step": 11557 }, { "epoch": 0.3374500014598114, "grad_norm": 0.6662448832753121, "learning_rate": 1.4723763179237633e-05, "loss": 0.6246, "step": 11558 }, { "epoch": 0.33747919768765877, "grad_norm": 0.7035629197265791, "learning_rate": 1.4723114355231145e-05, "loss": 0.6887, "step": 11559 }, { "epoch": 0.33750839391550613, "grad_norm": 0.6888059323082982, "learning_rate": 1.4722465531224657e-05, "loss": 0.6556, "step": 11560 }, { "epoch": 0.3375375901433535, "grad_norm": 0.6149841140748116, "learning_rate": 1.4721816707218168e-05, "loss": 0.5517, "step": 11561 }, { "epoch": 0.33756678637120086, "grad_norm": 0.7066348410818931, "learning_rate": 1.472116788321168e-05, "loss": 0.6352, "step": 11562 }, { "epoch": 0.3375959825990482, "grad_norm": 0.6246964417300355, "learning_rate": 1.4720519059205192e-05, "loss": 0.571, "step": 11563 }, { "epoch": 0.3376251788268956, "grad_norm": 0.666680470913535, "learning_rate": 1.4719870235198704e-05, "loss": 0.6064, "step": 11564 }, { "epoch": 0.33765437505474294, "grad_norm": 0.6996165423059678, "learning_rate": 1.4719221411192214e-05, "loss": 0.7382, "step": 11565 }, { "epoch": 0.3376835712825903, "grad_norm": 0.6639947617236619, "learning_rate": 1.4718572587185726e-05, "loss": 0.6261, "step": 11566 }, { "epoch": 0.33771276751043766, "grad_norm": 0.6415430206626784, "learning_rate": 1.4717923763179238e-05, "loss": 0.6369, "step": 11567 }, { "epoch": 0.337741963738285, "grad_norm": 0.6330760528827997, "learning_rate": 1.471727493917275e-05, "loss": 0.6096, "step": 11568 }, { "epoch": 0.3377711599661324, "grad_norm": 0.6656747084611586, "learning_rate": 1.4716626115166262e-05, "loss": 0.6432, "step": 11569 }, { "epoch": 0.33780035619397975, "grad_norm": 0.6306129062193362, "learning_rate": 1.4715977291159773e-05, "loss": 0.5789, "step": 11570 }, { "epoch": 0.3378295524218271, "grad_norm": 0.6465507331752655, "learning_rate": 1.4715328467153285e-05, "loss": 0.6225, "step": 11571 }, { "epoch": 0.33785874864967447, "grad_norm": 0.6720078298190858, "learning_rate": 1.4714679643146798e-05, "loss": 0.7045, "step": 11572 }, { "epoch": 0.33788794487752183, "grad_norm": 0.6094584882022193, "learning_rate": 1.471403081914031e-05, "loss": 0.5758, "step": 11573 }, { "epoch": 0.3379171411053692, "grad_norm": 0.6473791027391672, "learning_rate": 1.4713381995133822e-05, "loss": 0.611, "step": 11574 }, { "epoch": 0.33794633733321655, "grad_norm": 0.6568364515480356, "learning_rate": 1.4712733171127334e-05, "loss": 0.6743, "step": 11575 }, { "epoch": 0.3379755335610639, "grad_norm": 0.6384334283878447, "learning_rate": 1.4712084347120845e-05, "loss": 0.6114, "step": 11576 }, { "epoch": 0.3380047297889113, "grad_norm": 0.7905786177581212, "learning_rate": 1.4711435523114357e-05, "loss": 0.6574, "step": 11577 }, { "epoch": 0.33803392601675863, "grad_norm": 0.633017835177432, "learning_rate": 1.4710786699107869e-05, "loss": 0.6175, "step": 11578 }, { "epoch": 0.338063122244606, "grad_norm": 0.6436820582083186, "learning_rate": 1.471013787510138e-05, "loss": 0.6338, "step": 11579 }, { "epoch": 0.33809231847245336, "grad_norm": 0.6413199209743161, "learning_rate": 1.4709489051094891e-05, "loss": 0.5556, "step": 11580 }, { "epoch": 0.3381215147003007, "grad_norm": 0.6722858660731434, "learning_rate": 1.4708840227088403e-05, "loss": 0.6692, "step": 11581 }, { "epoch": 0.3381507109281481, "grad_norm": 0.6682147876910655, "learning_rate": 1.4708191403081915e-05, "loss": 0.6557, "step": 11582 }, { "epoch": 0.33817990715599544, "grad_norm": 0.6328377593658517, "learning_rate": 1.4707542579075427e-05, "loss": 0.6156, "step": 11583 }, { "epoch": 0.3382091033838428, "grad_norm": 0.6703016092225331, "learning_rate": 1.4706893755068938e-05, "loss": 0.6288, "step": 11584 }, { "epoch": 0.33823829961169016, "grad_norm": 0.6703741312561082, "learning_rate": 1.470624493106245e-05, "loss": 0.6259, "step": 11585 }, { "epoch": 0.3382674958395375, "grad_norm": 0.6389591761973324, "learning_rate": 1.4705596107055962e-05, "loss": 0.6418, "step": 11586 }, { "epoch": 0.3382966920673849, "grad_norm": 0.6972679684609231, "learning_rate": 1.4704947283049474e-05, "loss": 0.6943, "step": 11587 }, { "epoch": 0.33832588829523225, "grad_norm": 0.6551933034172241, "learning_rate": 1.4704298459042986e-05, "loss": 0.6358, "step": 11588 }, { "epoch": 0.3383550845230796, "grad_norm": 0.6402485964083826, "learning_rate": 1.4703649635036496e-05, "loss": 0.6757, "step": 11589 }, { "epoch": 0.33838428075092697, "grad_norm": 0.6144595817570591, "learning_rate": 1.4703000811030008e-05, "loss": 0.5805, "step": 11590 }, { "epoch": 0.33841347697877433, "grad_norm": 0.6542945319519299, "learning_rate": 1.4702351987023522e-05, "loss": 0.6656, "step": 11591 }, { "epoch": 0.3384426732066217, "grad_norm": 0.6755228392325374, "learning_rate": 1.4701703163017034e-05, "loss": 0.6808, "step": 11592 }, { "epoch": 0.33847186943446905, "grad_norm": 0.6825597983940539, "learning_rate": 1.4701054339010546e-05, "loss": 0.6577, "step": 11593 }, { "epoch": 0.3385010656623164, "grad_norm": 0.6451294753661223, "learning_rate": 1.4700405515004058e-05, "loss": 0.6333, "step": 11594 }, { "epoch": 0.3385302618901638, "grad_norm": 0.6308695804427319, "learning_rate": 1.4699756690997568e-05, "loss": 0.5383, "step": 11595 }, { "epoch": 0.33855945811801114, "grad_norm": 0.7124422632702188, "learning_rate": 1.469910786699108e-05, "loss": 0.7143, "step": 11596 }, { "epoch": 0.3385886543458585, "grad_norm": 0.6560013866010814, "learning_rate": 1.4698459042984592e-05, "loss": 0.6629, "step": 11597 }, { "epoch": 0.33861785057370586, "grad_norm": 0.6581194746262438, "learning_rate": 1.4697810218978104e-05, "loss": 0.6075, "step": 11598 }, { "epoch": 0.3386470468015532, "grad_norm": 0.6787589635205612, "learning_rate": 1.4697161394971615e-05, "loss": 0.6558, "step": 11599 }, { "epoch": 0.3386762430294006, "grad_norm": 0.6525575935585994, "learning_rate": 1.4696512570965127e-05, "loss": 0.636, "step": 11600 }, { "epoch": 0.33870543925724794, "grad_norm": 0.6525564541683934, "learning_rate": 1.4695863746958639e-05, "loss": 0.6773, "step": 11601 }, { "epoch": 0.3387346354850953, "grad_norm": 0.6671870580205002, "learning_rate": 1.469521492295215e-05, "loss": 0.6818, "step": 11602 }, { "epoch": 0.33876383171294266, "grad_norm": 0.7131052381454767, "learning_rate": 1.4694566098945661e-05, "loss": 0.7254, "step": 11603 }, { "epoch": 0.33879302794079, "grad_norm": 0.618692242770534, "learning_rate": 1.4693917274939173e-05, "loss": 0.5887, "step": 11604 }, { "epoch": 0.3388222241686374, "grad_norm": 0.7134864379651072, "learning_rate": 1.4693268450932685e-05, "loss": 0.6908, "step": 11605 }, { "epoch": 0.33885142039648475, "grad_norm": 0.5985061076442022, "learning_rate": 1.4692619626926197e-05, "loss": 0.532, "step": 11606 }, { "epoch": 0.3388806166243321, "grad_norm": 0.6765145170040892, "learning_rate": 1.4691970802919709e-05, "loss": 0.6676, "step": 11607 }, { "epoch": 0.3389098128521795, "grad_norm": 0.6728107200276715, "learning_rate": 1.469132197891322e-05, "loss": 0.6715, "step": 11608 }, { "epoch": 0.3389390090800269, "grad_norm": 0.7146529489027401, "learning_rate": 1.4690673154906733e-05, "loss": 0.6531, "step": 11609 }, { "epoch": 0.33896820530787425, "grad_norm": 0.7124100486158078, "learning_rate": 1.4690024330900245e-05, "loss": 0.6816, "step": 11610 }, { "epoch": 0.3389974015357216, "grad_norm": 0.6307910322631045, "learning_rate": 1.4689375506893757e-05, "loss": 0.6211, "step": 11611 }, { "epoch": 0.33902659776356897, "grad_norm": 0.710861411373279, "learning_rate": 1.468872668288727e-05, "loss": 0.647, "step": 11612 }, { "epoch": 0.33905579399141633, "grad_norm": 0.6703704349325208, "learning_rate": 1.4688077858880781e-05, "loss": 0.6337, "step": 11613 }, { "epoch": 0.3390849902192637, "grad_norm": 0.6521057960353253, "learning_rate": 1.4687429034874292e-05, "loss": 0.594, "step": 11614 }, { "epoch": 0.33911418644711105, "grad_norm": 0.608211203974738, "learning_rate": 1.4686780210867804e-05, "loss": 0.5519, "step": 11615 }, { "epoch": 0.3391433826749584, "grad_norm": 0.7067751473835882, "learning_rate": 1.4686131386861316e-05, "loss": 0.6961, "step": 11616 }, { "epoch": 0.3391725789028058, "grad_norm": 0.6599028189072378, "learning_rate": 1.4685482562854828e-05, "loss": 0.725, "step": 11617 }, { "epoch": 0.33920177513065314, "grad_norm": 0.6346407572420011, "learning_rate": 1.4684833738848338e-05, "loss": 0.5726, "step": 11618 }, { "epoch": 0.3392309713585005, "grad_norm": 0.6719462918931371, "learning_rate": 1.468418491484185e-05, "loss": 0.6463, "step": 11619 }, { "epoch": 0.33926016758634786, "grad_norm": 0.6784002968587446, "learning_rate": 1.4683536090835362e-05, "loss": 0.7041, "step": 11620 }, { "epoch": 0.3392893638141952, "grad_norm": 0.6106057335000004, "learning_rate": 1.4682887266828874e-05, "loss": 0.5522, "step": 11621 }, { "epoch": 0.3393185600420426, "grad_norm": 0.6330983642971031, "learning_rate": 1.4682238442822384e-05, "loss": 0.577, "step": 11622 }, { "epoch": 0.33934775626988994, "grad_norm": 0.6724861975951183, "learning_rate": 1.4681589618815896e-05, "loss": 0.6278, "step": 11623 }, { "epoch": 0.3393769524977373, "grad_norm": 0.6380027160147576, "learning_rate": 1.4680940794809408e-05, "loss": 0.6322, "step": 11624 }, { "epoch": 0.33940614872558467, "grad_norm": 0.7163354249238956, "learning_rate": 1.468029197080292e-05, "loss": 0.7203, "step": 11625 }, { "epoch": 0.339435344953432, "grad_norm": 0.6332052531832267, "learning_rate": 1.4679643146796433e-05, "loss": 0.6254, "step": 11626 }, { "epoch": 0.3394645411812794, "grad_norm": 0.7312201074866782, "learning_rate": 1.4678994322789943e-05, "loss": 0.6711, "step": 11627 }, { "epoch": 0.33949373740912675, "grad_norm": 0.6023779262199166, "learning_rate": 1.4678345498783457e-05, "loss": 0.5965, "step": 11628 }, { "epoch": 0.3395229336369741, "grad_norm": 0.655908788500984, "learning_rate": 1.4677696674776969e-05, "loss": 0.6103, "step": 11629 }, { "epoch": 0.3395521298648215, "grad_norm": 0.5901559347315144, "learning_rate": 1.467704785077048e-05, "loss": 0.539, "step": 11630 }, { "epoch": 0.33958132609266883, "grad_norm": 0.6507658340849591, "learning_rate": 1.4676399026763993e-05, "loss": 0.6152, "step": 11631 }, { "epoch": 0.3396105223205162, "grad_norm": 0.6272009234003878, "learning_rate": 1.4675750202757505e-05, "loss": 0.5837, "step": 11632 }, { "epoch": 0.33963971854836356, "grad_norm": 0.6121328293080731, "learning_rate": 1.4675101378751015e-05, "loss": 0.5908, "step": 11633 }, { "epoch": 0.3396689147762109, "grad_norm": 0.7047290544856682, "learning_rate": 1.4674452554744527e-05, "loss": 0.7039, "step": 11634 }, { "epoch": 0.3396981110040583, "grad_norm": 0.7017816875332318, "learning_rate": 1.4673803730738039e-05, "loss": 0.7056, "step": 11635 }, { "epoch": 0.33972730723190564, "grad_norm": 0.6705100274281705, "learning_rate": 1.4673154906731551e-05, "loss": 0.6344, "step": 11636 }, { "epoch": 0.339756503459753, "grad_norm": 0.6139303604635432, "learning_rate": 1.4672506082725061e-05, "loss": 0.555, "step": 11637 }, { "epoch": 0.33978569968760036, "grad_norm": 0.6159038816377332, "learning_rate": 1.4671857258718573e-05, "loss": 0.53, "step": 11638 }, { "epoch": 0.3398148959154477, "grad_norm": 0.6550391810768433, "learning_rate": 1.4671208434712085e-05, "loss": 0.6399, "step": 11639 }, { "epoch": 0.3398440921432951, "grad_norm": 0.6432694660043707, "learning_rate": 1.4670559610705598e-05, "loss": 0.5916, "step": 11640 }, { "epoch": 0.33987328837114245, "grad_norm": 0.6732205796415575, "learning_rate": 1.4669910786699108e-05, "loss": 0.5984, "step": 11641 }, { "epoch": 0.3399024845989898, "grad_norm": 0.666303180122365, "learning_rate": 1.466926196269262e-05, "loss": 0.6336, "step": 11642 }, { "epoch": 0.33993168082683717, "grad_norm": 0.6880851789908792, "learning_rate": 1.4668613138686132e-05, "loss": 0.6212, "step": 11643 }, { "epoch": 0.33996087705468453, "grad_norm": 0.6720715150116539, "learning_rate": 1.4667964314679644e-05, "loss": 0.5834, "step": 11644 }, { "epoch": 0.3399900732825319, "grad_norm": 0.6188318097768265, "learning_rate": 1.4667315490673156e-05, "loss": 0.5683, "step": 11645 }, { "epoch": 0.34001926951037925, "grad_norm": 0.6515311853576065, "learning_rate": 1.4666666666666666e-05, "loss": 0.6242, "step": 11646 }, { "epoch": 0.3400484657382266, "grad_norm": 0.5999021014808202, "learning_rate": 1.466601784266018e-05, "loss": 0.562, "step": 11647 }, { "epoch": 0.340077661966074, "grad_norm": 0.6684745458434809, "learning_rate": 1.4665369018653692e-05, "loss": 0.6418, "step": 11648 }, { "epoch": 0.34010685819392134, "grad_norm": 0.6859523916231799, "learning_rate": 1.4664720194647204e-05, "loss": 0.6266, "step": 11649 }, { "epoch": 0.3401360544217687, "grad_norm": 0.6406680074528516, "learning_rate": 1.4664071370640716e-05, "loss": 0.5866, "step": 11650 }, { "epoch": 0.34016525064961606, "grad_norm": 0.6401792477595185, "learning_rate": 1.4663422546634226e-05, "loss": 0.6086, "step": 11651 }, { "epoch": 0.3401944468774634, "grad_norm": 0.6537730978851091, "learning_rate": 1.4662773722627738e-05, "loss": 0.5851, "step": 11652 }, { "epoch": 0.3402236431053108, "grad_norm": 0.6641133677590817, "learning_rate": 1.466212489862125e-05, "loss": 0.6622, "step": 11653 }, { "epoch": 0.34025283933315814, "grad_norm": 0.6773047638956159, "learning_rate": 1.4661476074614763e-05, "loss": 0.672, "step": 11654 }, { "epoch": 0.3402820355610055, "grad_norm": 0.6537837473563264, "learning_rate": 1.4660827250608275e-05, "loss": 0.6326, "step": 11655 }, { "epoch": 0.34031123178885286, "grad_norm": 0.6421944950155197, "learning_rate": 1.4660178426601785e-05, "loss": 0.6002, "step": 11656 }, { "epoch": 0.3403404280167002, "grad_norm": 0.6816544171114762, "learning_rate": 1.4659529602595297e-05, "loss": 0.6742, "step": 11657 }, { "epoch": 0.3403696242445476, "grad_norm": 0.5913467438462113, "learning_rate": 1.4658880778588809e-05, "loss": 0.546, "step": 11658 }, { "epoch": 0.34039882047239495, "grad_norm": 0.6005848288243045, "learning_rate": 1.4658231954582321e-05, "loss": 0.564, "step": 11659 }, { "epoch": 0.3404280167002423, "grad_norm": 0.6708903275076517, "learning_rate": 1.4657583130575831e-05, "loss": 0.6806, "step": 11660 }, { "epoch": 0.34045721292808967, "grad_norm": 0.6536881267063839, "learning_rate": 1.4656934306569343e-05, "loss": 0.6313, "step": 11661 }, { "epoch": 0.34048640915593703, "grad_norm": 0.6363925534711175, "learning_rate": 1.4656285482562855e-05, "loss": 0.6237, "step": 11662 }, { "epoch": 0.3405156053837844, "grad_norm": 0.6230859034206642, "learning_rate": 1.4655636658556367e-05, "loss": 0.5529, "step": 11663 }, { "epoch": 0.34054480161163175, "grad_norm": 0.6372059880436616, "learning_rate": 1.465498783454988e-05, "loss": 0.5894, "step": 11664 }, { "epoch": 0.3405739978394791, "grad_norm": 0.7222208995479905, "learning_rate": 1.465433901054339e-05, "loss": 0.6416, "step": 11665 }, { "epoch": 0.3406031940673265, "grad_norm": 0.7570718883660952, "learning_rate": 1.4653690186536903e-05, "loss": 0.7209, "step": 11666 }, { "epoch": 0.34063239029517384, "grad_norm": 0.6792391239870256, "learning_rate": 1.4653041362530415e-05, "loss": 0.6893, "step": 11667 }, { "epoch": 0.3406615865230212, "grad_norm": 0.6388038789676171, "learning_rate": 1.4652392538523927e-05, "loss": 0.5792, "step": 11668 }, { "epoch": 0.3406907827508686, "grad_norm": 0.6972617545793306, "learning_rate": 1.465174371451744e-05, "loss": 0.7265, "step": 11669 }, { "epoch": 0.340719978978716, "grad_norm": 0.6239026378264726, "learning_rate": 1.465109489051095e-05, "loss": 0.6251, "step": 11670 }, { "epoch": 0.34074917520656334, "grad_norm": 0.6148188609672386, "learning_rate": 1.4650446066504462e-05, "loss": 0.5751, "step": 11671 }, { "epoch": 0.3407783714344107, "grad_norm": 0.649967471317654, "learning_rate": 1.4649797242497974e-05, "loss": 0.6669, "step": 11672 }, { "epoch": 0.34080756766225806, "grad_norm": 0.6978462019746592, "learning_rate": 1.4649148418491486e-05, "loss": 0.6849, "step": 11673 }, { "epoch": 0.3408367638901054, "grad_norm": 0.6257679760917594, "learning_rate": 1.4648499594484998e-05, "loss": 0.5299, "step": 11674 }, { "epoch": 0.3408659601179528, "grad_norm": 0.6470151496532779, "learning_rate": 1.4647850770478508e-05, "loss": 0.6628, "step": 11675 }, { "epoch": 0.34089515634580014, "grad_norm": 0.6185634869813538, "learning_rate": 1.464720194647202e-05, "loss": 0.5785, "step": 11676 }, { "epoch": 0.3409243525736475, "grad_norm": 0.613561124437216, "learning_rate": 1.4646553122465532e-05, "loss": 0.5172, "step": 11677 }, { "epoch": 0.34095354880149487, "grad_norm": 0.6992408536390111, "learning_rate": 1.4645904298459044e-05, "loss": 0.6712, "step": 11678 }, { "epoch": 0.3409827450293422, "grad_norm": 0.7324226613324092, "learning_rate": 1.4645255474452555e-05, "loss": 0.7263, "step": 11679 }, { "epoch": 0.3410119412571896, "grad_norm": 0.6631122826335999, "learning_rate": 1.4644606650446067e-05, "loss": 0.5495, "step": 11680 }, { "epoch": 0.34104113748503695, "grad_norm": 0.7110201666561816, "learning_rate": 1.4643957826439579e-05, "loss": 0.6988, "step": 11681 }, { "epoch": 0.3410703337128843, "grad_norm": 0.584904227131961, "learning_rate": 1.464330900243309e-05, "loss": 0.4881, "step": 11682 }, { "epoch": 0.34109952994073167, "grad_norm": 0.6070670324817373, "learning_rate": 1.4642660178426603e-05, "loss": 0.586, "step": 11683 }, { "epoch": 0.34112872616857903, "grad_norm": 0.6899879844732515, "learning_rate": 1.4642011354420113e-05, "loss": 0.7023, "step": 11684 }, { "epoch": 0.3411579223964264, "grad_norm": 0.6795458269270476, "learning_rate": 1.4641362530413627e-05, "loss": 0.6256, "step": 11685 }, { "epoch": 0.34118711862427376, "grad_norm": 0.5961085402291242, "learning_rate": 1.4640713706407139e-05, "loss": 0.5361, "step": 11686 }, { "epoch": 0.3412163148521211, "grad_norm": 0.6847386609264166, "learning_rate": 1.4640064882400651e-05, "loss": 0.7078, "step": 11687 }, { "epoch": 0.3412455110799685, "grad_norm": 0.6882363912183894, "learning_rate": 1.4639416058394163e-05, "loss": 0.6718, "step": 11688 }, { "epoch": 0.34127470730781584, "grad_norm": 0.7253573479884188, "learning_rate": 1.4638767234387673e-05, "loss": 0.6439, "step": 11689 }, { "epoch": 0.3413039035356632, "grad_norm": 0.6779252770661415, "learning_rate": 1.4638118410381185e-05, "loss": 0.727, "step": 11690 }, { "epoch": 0.34133309976351056, "grad_norm": 0.6894624590558998, "learning_rate": 1.4637469586374697e-05, "loss": 0.6813, "step": 11691 }, { "epoch": 0.3413622959913579, "grad_norm": 0.7112824527896369, "learning_rate": 1.463682076236821e-05, "loss": 0.7291, "step": 11692 }, { "epoch": 0.3413914922192053, "grad_norm": 0.6767208719661871, "learning_rate": 1.4636171938361721e-05, "loss": 0.6483, "step": 11693 }, { "epoch": 0.34142068844705264, "grad_norm": 0.6827264167343483, "learning_rate": 1.4635523114355232e-05, "loss": 0.7005, "step": 11694 }, { "epoch": 0.3414498846749, "grad_norm": 0.847227213832243, "learning_rate": 1.4634874290348744e-05, "loss": 0.6414, "step": 11695 }, { "epoch": 0.34147908090274737, "grad_norm": 0.654080123336721, "learning_rate": 1.4634225466342256e-05, "loss": 0.634, "step": 11696 }, { "epoch": 0.34150827713059473, "grad_norm": 0.6177717353277383, "learning_rate": 1.4633576642335768e-05, "loss": 0.6011, "step": 11697 }, { "epoch": 0.3415374733584421, "grad_norm": 0.6740915038660834, "learning_rate": 1.4632927818329278e-05, "loss": 0.6837, "step": 11698 }, { "epoch": 0.34156666958628945, "grad_norm": 0.6676576973623427, "learning_rate": 1.463227899432279e-05, "loss": 0.6333, "step": 11699 }, { "epoch": 0.3415958658141368, "grad_norm": 0.6188217670911181, "learning_rate": 1.4631630170316302e-05, "loss": 0.5523, "step": 11700 }, { "epoch": 0.3416250620419842, "grad_norm": 0.6668153672255112, "learning_rate": 1.4630981346309814e-05, "loss": 0.69, "step": 11701 }, { "epoch": 0.34165425826983153, "grad_norm": 0.6956439354043283, "learning_rate": 1.4630332522303326e-05, "loss": 0.661, "step": 11702 }, { "epoch": 0.3416834544976789, "grad_norm": 0.6565171458313636, "learning_rate": 1.4629683698296837e-05, "loss": 0.6866, "step": 11703 }, { "epoch": 0.34171265072552626, "grad_norm": 0.674786763663811, "learning_rate": 1.462903487429035e-05, "loss": 0.696, "step": 11704 }, { "epoch": 0.3417418469533736, "grad_norm": 0.6456124804042782, "learning_rate": 1.4628386050283862e-05, "loss": 0.6594, "step": 11705 }, { "epoch": 0.341771043181221, "grad_norm": 0.638721140148245, "learning_rate": 1.4627737226277374e-05, "loss": 0.5891, "step": 11706 }, { "epoch": 0.34180023940906834, "grad_norm": 0.6430351196374687, "learning_rate": 1.4627088402270886e-05, "loss": 0.6156, "step": 11707 }, { "epoch": 0.3418294356369157, "grad_norm": 0.6723377354108231, "learning_rate": 1.4626439578264397e-05, "loss": 0.7168, "step": 11708 }, { "epoch": 0.34185863186476306, "grad_norm": 0.6933708796359179, "learning_rate": 1.4625790754257909e-05, "loss": 0.6756, "step": 11709 }, { "epoch": 0.3418878280926104, "grad_norm": 0.6830316135483905, "learning_rate": 1.462514193025142e-05, "loss": 0.71, "step": 11710 }, { "epoch": 0.3419170243204578, "grad_norm": 0.657871495439194, "learning_rate": 1.4624493106244933e-05, "loss": 0.6433, "step": 11711 }, { "epoch": 0.34194622054830515, "grad_norm": 0.6751351129083296, "learning_rate": 1.4623844282238445e-05, "loss": 0.7041, "step": 11712 }, { "epoch": 0.3419754167761525, "grad_norm": 0.6493617955705916, "learning_rate": 1.4623195458231955e-05, "loss": 0.6245, "step": 11713 }, { "epoch": 0.34200461300399987, "grad_norm": 0.6489774087992392, "learning_rate": 1.4622546634225467e-05, "loss": 0.5986, "step": 11714 }, { "epoch": 0.34203380923184723, "grad_norm": 0.6573802654420073, "learning_rate": 1.462189781021898e-05, "loss": 0.6763, "step": 11715 }, { "epoch": 0.3420630054596946, "grad_norm": 0.7322061793556184, "learning_rate": 1.4621248986212491e-05, "loss": 0.7402, "step": 11716 }, { "epoch": 0.34209220168754195, "grad_norm": 0.6712923279858226, "learning_rate": 1.4620600162206002e-05, "loss": 0.7196, "step": 11717 }, { "epoch": 0.3421213979153893, "grad_norm": 0.6767129722575901, "learning_rate": 1.4619951338199514e-05, "loss": 0.673, "step": 11718 }, { "epoch": 0.3421505941432367, "grad_norm": 0.6767614149559958, "learning_rate": 1.4619302514193026e-05, "loss": 0.6595, "step": 11719 }, { "epoch": 0.34217979037108404, "grad_norm": 0.6490514275857987, "learning_rate": 1.4618653690186538e-05, "loss": 0.6486, "step": 11720 }, { "epoch": 0.3422089865989314, "grad_norm": 0.6844698207189793, "learning_rate": 1.461800486618005e-05, "loss": 0.694, "step": 11721 }, { "epoch": 0.34223818282677876, "grad_norm": 0.668537719888005, "learning_rate": 1.461735604217356e-05, "loss": 0.6151, "step": 11722 }, { "epoch": 0.3422673790546261, "grad_norm": 0.6908089174929957, "learning_rate": 1.4616707218167074e-05, "loss": 0.6724, "step": 11723 }, { "epoch": 0.3422965752824735, "grad_norm": 0.6695961718783526, "learning_rate": 1.4616058394160586e-05, "loss": 0.6427, "step": 11724 }, { "epoch": 0.34232577151032084, "grad_norm": 0.7455976183858621, "learning_rate": 1.4615409570154098e-05, "loss": 0.6437, "step": 11725 }, { "epoch": 0.3423549677381682, "grad_norm": 0.6398329379036819, "learning_rate": 1.461476074614761e-05, "loss": 0.6098, "step": 11726 }, { "epoch": 0.34238416396601556, "grad_norm": 0.6533708067546184, "learning_rate": 1.461411192214112e-05, "loss": 0.6665, "step": 11727 }, { "epoch": 0.3424133601938629, "grad_norm": 0.6405380281206975, "learning_rate": 1.4613463098134632e-05, "loss": 0.6421, "step": 11728 }, { "epoch": 0.34244255642171034, "grad_norm": 0.6832437426882555, "learning_rate": 1.4612814274128144e-05, "loss": 0.623, "step": 11729 }, { "epoch": 0.3424717526495577, "grad_norm": 0.6922333864135652, "learning_rate": 1.4612165450121656e-05, "loss": 0.6746, "step": 11730 }, { "epoch": 0.34250094887740506, "grad_norm": 0.6321832433308077, "learning_rate": 1.4611516626115168e-05, "loss": 0.6218, "step": 11731 }, { "epoch": 0.3425301451052524, "grad_norm": 0.5954682295322005, "learning_rate": 1.4610867802108679e-05, "loss": 0.5323, "step": 11732 }, { "epoch": 0.3425593413330998, "grad_norm": 0.6290913085361644, "learning_rate": 1.461021897810219e-05, "loss": 0.5985, "step": 11733 }, { "epoch": 0.34258853756094715, "grad_norm": 0.6097042850099592, "learning_rate": 1.4609570154095703e-05, "loss": 0.5827, "step": 11734 }, { "epoch": 0.3426177337887945, "grad_norm": 0.767973941362686, "learning_rate": 1.4608921330089215e-05, "loss": 0.6511, "step": 11735 }, { "epoch": 0.34264693001664187, "grad_norm": 0.6571369720142399, "learning_rate": 1.4608272506082725e-05, "loss": 0.6582, "step": 11736 }, { "epoch": 0.34267612624448923, "grad_norm": 0.7030506301744706, "learning_rate": 1.4607623682076237e-05, "loss": 0.6907, "step": 11737 }, { "epoch": 0.3427053224723366, "grad_norm": 0.6133838149877686, "learning_rate": 1.4606974858069749e-05, "loss": 0.5915, "step": 11738 }, { "epoch": 0.34273451870018395, "grad_norm": 0.6075427429870363, "learning_rate": 1.4606326034063261e-05, "loss": 0.5391, "step": 11739 }, { "epoch": 0.3427637149280313, "grad_norm": 0.6275687421920757, "learning_rate": 1.4605677210056773e-05, "loss": 0.635, "step": 11740 }, { "epoch": 0.3427929111558787, "grad_norm": 0.6421719346116519, "learning_rate": 1.4605028386050283e-05, "loss": 0.6403, "step": 11741 }, { "epoch": 0.34282210738372604, "grad_norm": 0.703885510713835, "learning_rate": 1.4604379562043797e-05, "loss": 0.7159, "step": 11742 }, { "epoch": 0.3428513036115734, "grad_norm": 0.6404470458708499, "learning_rate": 1.460373073803731e-05, "loss": 0.5962, "step": 11743 }, { "epoch": 0.34288049983942076, "grad_norm": 0.6696026543470367, "learning_rate": 1.4603081914030821e-05, "loss": 0.6557, "step": 11744 }, { "epoch": 0.3429096960672681, "grad_norm": 0.6404743372664727, "learning_rate": 1.4602433090024333e-05, "loss": 0.6112, "step": 11745 }, { "epoch": 0.3429388922951155, "grad_norm": 0.6602890711071471, "learning_rate": 1.4601784266017844e-05, "loss": 0.6368, "step": 11746 }, { "epoch": 0.34296808852296284, "grad_norm": 0.6727977412603735, "learning_rate": 1.4601135442011356e-05, "loss": 0.6309, "step": 11747 }, { "epoch": 0.3429972847508102, "grad_norm": 0.6708888308442148, "learning_rate": 1.4600486618004868e-05, "loss": 0.6392, "step": 11748 }, { "epoch": 0.34302648097865757, "grad_norm": 0.6712713267224153, "learning_rate": 1.459983779399838e-05, "loss": 0.6373, "step": 11749 }, { "epoch": 0.3430556772065049, "grad_norm": 0.6888614437183257, "learning_rate": 1.4599188969991892e-05, "loss": 0.671, "step": 11750 }, { "epoch": 0.3430848734343523, "grad_norm": 0.6423352531740475, "learning_rate": 1.4598540145985402e-05, "loss": 0.6237, "step": 11751 }, { "epoch": 0.34311406966219965, "grad_norm": 0.6401195067148912, "learning_rate": 1.4597891321978914e-05, "loss": 0.5911, "step": 11752 }, { "epoch": 0.343143265890047, "grad_norm": 0.7546749778856593, "learning_rate": 1.4597242497972426e-05, "loss": 0.7641, "step": 11753 }, { "epoch": 0.3431724621178944, "grad_norm": 0.6540121767048583, "learning_rate": 1.4596593673965938e-05, "loss": 0.6028, "step": 11754 }, { "epoch": 0.34320165834574173, "grad_norm": 0.6988612999810585, "learning_rate": 1.4595944849959448e-05, "loss": 0.7107, "step": 11755 }, { "epoch": 0.3432308545735891, "grad_norm": 0.7415638068892678, "learning_rate": 1.459529602595296e-05, "loss": 0.7751, "step": 11756 }, { "epoch": 0.34326005080143646, "grad_norm": 0.6415207040305522, "learning_rate": 1.4594647201946473e-05, "loss": 0.6432, "step": 11757 }, { "epoch": 0.3432892470292838, "grad_norm": 0.641156134330627, "learning_rate": 1.4593998377939985e-05, "loss": 0.6419, "step": 11758 }, { "epoch": 0.3433184432571312, "grad_norm": 0.7265828086351784, "learning_rate": 1.4593349553933497e-05, "loss": 0.6838, "step": 11759 }, { "epoch": 0.34334763948497854, "grad_norm": 0.6692812852230262, "learning_rate": 1.459270072992701e-05, "loss": 0.6585, "step": 11760 }, { "epoch": 0.3433768357128259, "grad_norm": 0.668767255718233, "learning_rate": 1.459205190592052e-05, "loss": 0.6485, "step": 11761 }, { "epoch": 0.34340603194067326, "grad_norm": 0.6842112203902111, "learning_rate": 1.4591403081914033e-05, "loss": 0.6747, "step": 11762 }, { "epoch": 0.3434352281685206, "grad_norm": 0.6868142100890471, "learning_rate": 1.4590754257907545e-05, "loss": 0.6802, "step": 11763 }, { "epoch": 0.343464424396368, "grad_norm": 0.5996174051077503, "learning_rate": 1.4590105433901057e-05, "loss": 0.5279, "step": 11764 }, { "epoch": 0.34349362062421535, "grad_norm": 0.7085276575015409, "learning_rate": 1.4589456609894567e-05, "loss": 0.6924, "step": 11765 }, { "epoch": 0.3435228168520627, "grad_norm": 0.6361704562656788, "learning_rate": 1.4588807785888079e-05, "loss": 0.64, "step": 11766 }, { "epoch": 0.34355201307991007, "grad_norm": 0.6645676863647476, "learning_rate": 1.4588158961881591e-05, "loss": 0.6616, "step": 11767 }, { "epoch": 0.34358120930775743, "grad_norm": 0.6767495005948428, "learning_rate": 1.4587510137875103e-05, "loss": 0.6546, "step": 11768 }, { "epoch": 0.3436104055356048, "grad_norm": 0.6451090131427332, "learning_rate": 1.4586861313868615e-05, "loss": 0.6101, "step": 11769 }, { "epoch": 0.34363960176345215, "grad_norm": 0.638678973917555, "learning_rate": 1.4586212489862125e-05, "loss": 0.6061, "step": 11770 }, { "epoch": 0.3436687979912995, "grad_norm": 0.667913884658983, "learning_rate": 1.4585563665855637e-05, "loss": 0.6426, "step": 11771 }, { "epoch": 0.3436979942191469, "grad_norm": 0.6169342975069462, "learning_rate": 1.458491484184915e-05, "loss": 0.5852, "step": 11772 }, { "epoch": 0.34372719044699424, "grad_norm": 0.6559848228027103, "learning_rate": 1.4584266017842662e-05, "loss": 0.6102, "step": 11773 }, { "epoch": 0.3437563866748416, "grad_norm": 0.6674766882998076, "learning_rate": 1.4583617193836172e-05, "loss": 0.6372, "step": 11774 }, { "epoch": 0.34378558290268896, "grad_norm": 0.6866151432172259, "learning_rate": 1.4582968369829684e-05, "loss": 0.7078, "step": 11775 }, { "epoch": 0.3438147791305363, "grad_norm": 0.6383203386941906, "learning_rate": 1.4582319545823196e-05, "loss": 0.5711, "step": 11776 }, { "epoch": 0.3438439753583837, "grad_norm": 0.6647609182282779, "learning_rate": 1.4581670721816708e-05, "loss": 0.6688, "step": 11777 }, { "epoch": 0.34387317158623104, "grad_norm": 0.6731171078168166, "learning_rate": 1.458102189781022e-05, "loss": 0.5639, "step": 11778 }, { "epoch": 0.3439023678140784, "grad_norm": 0.6436840979278767, "learning_rate": 1.4580373073803734e-05, "loss": 0.5805, "step": 11779 }, { "epoch": 0.34393156404192576, "grad_norm": 0.67395051454681, "learning_rate": 1.4579724249797244e-05, "loss": 0.6441, "step": 11780 }, { "epoch": 0.3439607602697731, "grad_norm": 0.6816364917302973, "learning_rate": 1.4579075425790756e-05, "loss": 0.639, "step": 11781 }, { "epoch": 0.3439899564976205, "grad_norm": 0.6435834676835369, "learning_rate": 1.4578426601784268e-05, "loss": 0.604, "step": 11782 }, { "epoch": 0.34401915272546785, "grad_norm": 0.6541653359515087, "learning_rate": 1.457777777777778e-05, "loss": 0.6343, "step": 11783 }, { "epoch": 0.3440483489533152, "grad_norm": 0.6327272017209622, "learning_rate": 1.457712895377129e-05, "loss": 0.6156, "step": 11784 }, { "epoch": 0.34407754518116257, "grad_norm": 0.6685492157668154, "learning_rate": 1.4576480129764802e-05, "loss": 0.6796, "step": 11785 }, { "epoch": 0.34410674140900993, "grad_norm": 0.6949225780643709, "learning_rate": 1.4575831305758315e-05, "loss": 0.6832, "step": 11786 }, { "epoch": 0.3441359376368573, "grad_norm": 0.6861694173306774, "learning_rate": 1.4575182481751827e-05, "loss": 0.6654, "step": 11787 }, { "epoch": 0.34416513386470465, "grad_norm": 0.6538477214469016, "learning_rate": 1.4574533657745339e-05, "loss": 0.6635, "step": 11788 }, { "epoch": 0.34419433009255207, "grad_norm": 0.5805071309531311, "learning_rate": 1.4573884833738849e-05, "loss": 0.5657, "step": 11789 }, { "epoch": 0.34422352632039943, "grad_norm": 0.6438546889279015, "learning_rate": 1.4573236009732361e-05, "loss": 0.6048, "step": 11790 }, { "epoch": 0.3442527225482468, "grad_norm": 0.6686684212489644, "learning_rate": 1.4572587185725873e-05, "loss": 0.6641, "step": 11791 }, { "epoch": 0.34428191877609415, "grad_norm": 0.6389881173619019, "learning_rate": 1.4571938361719385e-05, "loss": 0.6048, "step": 11792 }, { "epoch": 0.3443111150039415, "grad_norm": 0.6138899398735366, "learning_rate": 1.4571289537712895e-05, "loss": 0.5749, "step": 11793 }, { "epoch": 0.3443403112317889, "grad_norm": 0.636018854945791, "learning_rate": 1.4570640713706407e-05, "loss": 0.6163, "step": 11794 }, { "epoch": 0.34436950745963624, "grad_norm": 0.6986069811645909, "learning_rate": 1.456999188969992e-05, "loss": 0.6058, "step": 11795 }, { "epoch": 0.3443987036874836, "grad_norm": 0.6576409477075693, "learning_rate": 1.4569343065693431e-05, "loss": 0.6174, "step": 11796 }, { "epoch": 0.34442789991533096, "grad_norm": 0.7070835340052452, "learning_rate": 1.4568694241686942e-05, "loss": 0.6716, "step": 11797 }, { "epoch": 0.3444570961431783, "grad_norm": 0.6250760160374985, "learning_rate": 1.4568045417680457e-05, "loss": 0.5733, "step": 11798 }, { "epoch": 0.3444862923710257, "grad_norm": 0.6512164617216702, "learning_rate": 1.4567396593673967e-05, "loss": 0.5857, "step": 11799 }, { "epoch": 0.34451548859887304, "grad_norm": 0.6701259143234476, "learning_rate": 1.456674776966748e-05, "loss": 0.6843, "step": 11800 }, { "epoch": 0.3445446848267204, "grad_norm": 0.7258142084739071, "learning_rate": 1.4566098945660992e-05, "loss": 0.6671, "step": 11801 }, { "epoch": 0.34457388105456777, "grad_norm": 0.7225602063670807, "learning_rate": 1.4565450121654504e-05, "loss": 0.742, "step": 11802 }, { "epoch": 0.3446030772824151, "grad_norm": 0.7875127232009196, "learning_rate": 1.4564801297648014e-05, "loss": 0.8414, "step": 11803 }, { "epoch": 0.3446322735102625, "grad_norm": 0.6607135388481636, "learning_rate": 1.4564152473641526e-05, "loss": 0.6374, "step": 11804 }, { "epoch": 0.34466146973810985, "grad_norm": 0.6874506320443967, "learning_rate": 1.4563503649635038e-05, "loss": 0.6676, "step": 11805 }, { "epoch": 0.3446906659659572, "grad_norm": 0.6662002793365861, "learning_rate": 1.456285482562855e-05, "loss": 0.664, "step": 11806 }, { "epoch": 0.34471986219380457, "grad_norm": 0.7021727688122665, "learning_rate": 1.4562206001622062e-05, "loss": 0.6834, "step": 11807 }, { "epoch": 0.34474905842165193, "grad_norm": 0.7403941394894283, "learning_rate": 1.4561557177615572e-05, "loss": 0.6849, "step": 11808 }, { "epoch": 0.3447782546494993, "grad_norm": 0.5799399162779536, "learning_rate": 1.4560908353609084e-05, "loss": 0.535, "step": 11809 }, { "epoch": 0.34480745087734666, "grad_norm": 0.6383822391802672, "learning_rate": 1.4560259529602596e-05, "loss": 0.5929, "step": 11810 }, { "epoch": 0.344836647105194, "grad_norm": 0.5943351490815748, "learning_rate": 1.4559610705596108e-05, "loss": 0.5321, "step": 11811 }, { "epoch": 0.3448658433330414, "grad_norm": 0.7584290212904065, "learning_rate": 1.4558961881589619e-05, "loss": 0.8141, "step": 11812 }, { "epoch": 0.34489503956088874, "grad_norm": 0.6906880918094409, "learning_rate": 1.455831305758313e-05, "loss": 0.6608, "step": 11813 }, { "epoch": 0.3449242357887361, "grad_norm": 0.618085022276222, "learning_rate": 1.4557664233576643e-05, "loss": 0.5897, "step": 11814 }, { "epoch": 0.34495343201658346, "grad_norm": 0.6660517469934043, "learning_rate": 1.4557015409570155e-05, "loss": 0.6625, "step": 11815 }, { "epoch": 0.3449826282444308, "grad_norm": 0.6383200857697257, "learning_rate": 1.4556366585563665e-05, "loss": 0.6291, "step": 11816 }, { "epoch": 0.3450118244722782, "grad_norm": 0.6235508777926454, "learning_rate": 1.455571776155718e-05, "loss": 0.5722, "step": 11817 }, { "epoch": 0.34504102070012554, "grad_norm": 0.6573436123838698, "learning_rate": 1.4555068937550691e-05, "loss": 0.633, "step": 11818 }, { "epoch": 0.3450702169279729, "grad_norm": 0.6959497990867066, "learning_rate": 1.4554420113544203e-05, "loss": 0.6215, "step": 11819 }, { "epoch": 0.34509941315582027, "grad_norm": 0.6003332059567704, "learning_rate": 1.4553771289537715e-05, "loss": 0.5716, "step": 11820 }, { "epoch": 0.34512860938366763, "grad_norm": 0.6468423174032705, "learning_rate": 1.4553122465531227e-05, "loss": 0.6249, "step": 11821 }, { "epoch": 0.345157805611515, "grad_norm": 0.634644740578963, "learning_rate": 1.4552473641524737e-05, "loss": 0.5949, "step": 11822 }, { "epoch": 0.34518700183936235, "grad_norm": 0.6443726249657233, "learning_rate": 1.455182481751825e-05, "loss": 0.5995, "step": 11823 }, { "epoch": 0.3452161980672097, "grad_norm": 0.7099023914906429, "learning_rate": 1.4551175993511761e-05, "loss": 0.7158, "step": 11824 }, { "epoch": 0.3452453942950571, "grad_norm": 0.6177890040863177, "learning_rate": 1.4550527169505273e-05, "loss": 0.6119, "step": 11825 }, { "epoch": 0.34527459052290443, "grad_norm": 0.633071727170291, "learning_rate": 1.4549878345498785e-05, "loss": 0.6365, "step": 11826 }, { "epoch": 0.3453037867507518, "grad_norm": 0.6670039070608461, "learning_rate": 1.4549229521492296e-05, "loss": 0.666, "step": 11827 }, { "epoch": 0.34533298297859916, "grad_norm": 0.7386832547190916, "learning_rate": 1.4548580697485808e-05, "loss": 0.718, "step": 11828 }, { "epoch": 0.3453621792064465, "grad_norm": 0.681915219521706, "learning_rate": 1.454793187347932e-05, "loss": 0.6968, "step": 11829 }, { "epoch": 0.3453913754342939, "grad_norm": 0.643849306142164, "learning_rate": 1.4547283049472832e-05, "loss": 0.6562, "step": 11830 }, { "epoch": 0.34542057166214124, "grad_norm": 0.6685485984617399, "learning_rate": 1.4546634225466342e-05, "loss": 0.6299, "step": 11831 }, { "epoch": 0.3454497678899886, "grad_norm": 0.7159487877252964, "learning_rate": 1.4545985401459854e-05, "loss": 0.7555, "step": 11832 }, { "epoch": 0.34547896411783596, "grad_norm": 0.7270185989576904, "learning_rate": 1.4545336577453366e-05, "loss": 0.616, "step": 11833 }, { "epoch": 0.3455081603456833, "grad_norm": 0.6372568534515594, "learning_rate": 1.4544687753446878e-05, "loss": 0.6037, "step": 11834 }, { "epoch": 0.3455373565735307, "grad_norm": 0.6645172200072571, "learning_rate": 1.4544038929440389e-05, "loss": 0.6042, "step": 11835 }, { "epoch": 0.34556655280137805, "grad_norm": 0.6563080458936826, "learning_rate": 1.4543390105433904e-05, "loss": 0.6242, "step": 11836 }, { "epoch": 0.3455957490292254, "grad_norm": 0.6519206324522154, "learning_rate": 1.4542741281427414e-05, "loss": 0.6314, "step": 11837 }, { "epoch": 0.34562494525707277, "grad_norm": 0.64032738684772, "learning_rate": 1.4542092457420926e-05, "loss": 0.627, "step": 11838 }, { "epoch": 0.34565414148492013, "grad_norm": 0.6498497095314707, "learning_rate": 1.4541443633414438e-05, "loss": 0.6432, "step": 11839 }, { "epoch": 0.3456833377127675, "grad_norm": 0.7618941405301998, "learning_rate": 1.454079480940795e-05, "loss": 0.6866, "step": 11840 }, { "epoch": 0.34571253394061485, "grad_norm": 0.6098347038362366, "learning_rate": 1.454014598540146e-05, "loss": 0.5535, "step": 11841 }, { "epoch": 0.3457417301684622, "grad_norm": 0.6446116458837975, "learning_rate": 1.4539497161394973e-05, "loss": 0.6239, "step": 11842 }, { "epoch": 0.3457709263963096, "grad_norm": 0.7646271351984443, "learning_rate": 1.4538848337388485e-05, "loss": 0.6492, "step": 11843 }, { "epoch": 0.34580012262415694, "grad_norm": 0.7085189661110508, "learning_rate": 1.4538199513381997e-05, "loss": 0.7287, "step": 11844 }, { "epoch": 0.3458293188520043, "grad_norm": 0.6799997188646779, "learning_rate": 1.4537550689375509e-05, "loss": 0.686, "step": 11845 }, { "epoch": 0.34585851507985166, "grad_norm": 0.7419136146030367, "learning_rate": 1.453690186536902e-05, "loss": 0.7016, "step": 11846 }, { "epoch": 0.345887711307699, "grad_norm": 0.6302108310239426, "learning_rate": 1.4536253041362531e-05, "loss": 0.563, "step": 11847 }, { "epoch": 0.3459169075355464, "grad_norm": 0.719700682494331, "learning_rate": 1.4535604217356043e-05, "loss": 0.7094, "step": 11848 }, { "epoch": 0.34594610376339374, "grad_norm": 0.7103491371338359, "learning_rate": 1.4534955393349555e-05, "loss": 0.742, "step": 11849 }, { "epoch": 0.34597529999124116, "grad_norm": 0.6503603792123128, "learning_rate": 1.4534306569343066e-05, "loss": 0.6261, "step": 11850 }, { "epoch": 0.3460044962190885, "grad_norm": 0.6259835107780054, "learning_rate": 1.4533657745336578e-05, "loss": 0.5504, "step": 11851 }, { "epoch": 0.3460336924469359, "grad_norm": 0.6199017627673414, "learning_rate": 1.453300892133009e-05, "loss": 0.5665, "step": 11852 }, { "epoch": 0.34606288867478324, "grad_norm": 0.7360229639928356, "learning_rate": 1.4532360097323602e-05, "loss": 0.6287, "step": 11853 }, { "epoch": 0.3460920849026306, "grad_norm": 0.6439046535564156, "learning_rate": 1.4531711273317112e-05, "loss": 0.6221, "step": 11854 }, { "epoch": 0.34612128113047796, "grad_norm": 0.6873151556344139, "learning_rate": 1.4531062449310627e-05, "loss": 0.6615, "step": 11855 }, { "epoch": 0.3461504773583253, "grad_norm": 0.6297463102966697, "learning_rate": 1.4530413625304138e-05, "loss": 0.5885, "step": 11856 }, { "epoch": 0.3461796735861727, "grad_norm": 0.6957343293412865, "learning_rate": 1.452976480129765e-05, "loss": 0.6406, "step": 11857 }, { "epoch": 0.34620886981402005, "grad_norm": 0.6248157170963794, "learning_rate": 1.4529115977291162e-05, "loss": 0.5895, "step": 11858 }, { "epoch": 0.3462380660418674, "grad_norm": 0.6102879470317566, "learning_rate": 1.4528467153284674e-05, "loss": 0.5779, "step": 11859 }, { "epoch": 0.34626726226971477, "grad_norm": 0.6344622922895925, "learning_rate": 1.4527818329278184e-05, "loss": 0.6051, "step": 11860 }, { "epoch": 0.34629645849756213, "grad_norm": 0.617485056213271, "learning_rate": 1.4527169505271696e-05, "loss": 0.5677, "step": 11861 }, { "epoch": 0.3463256547254095, "grad_norm": 0.6413143987502059, "learning_rate": 1.4526520681265208e-05, "loss": 0.5856, "step": 11862 }, { "epoch": 0.34635485095325685, "grad_norm": 0.7194986203179287, "learning_rate": 1.452587185725872e-05, "loss": 0.5813, "step": 11863 }, { "epoch": 0.3463840471811042, "grad_norm": 0.6787620544831771, "learning_rate": 1.4525223033252232e-05, "loss": 0.6653, "step": 11864 }, { "epoch": 0.3464132434089516, "grad_norm": 0.6456710564783008, "learning_rate": 1.4524574209245743e-05, "loss": 0.6448, "step": 11865 }, { "epoch": 0.34644243963679894, "grad_norm": 0.7176032942560184, "learning_rate": 1.4523925385239255e-05, "loss": 0.6039, "step": 11866 }, { "epoch": 0.3464716358646463, "grad_norm": 0.6626121844256212, "learning_rate": 1.4523276561232767e-05, "loss": 0.6116, "step": 11867 }, { "epoch": 0.34650083209249366, "grad_norm": 0.669457960364926, "learning_rate": 1.4522627737226279e-05, "loss": 0.6529, "step": 11868 }, { "epoch": 0.346530028320341, "grad_norm": 0.6430082201683034, "learning_rate": 1.4521978913219789e-05, "loss": 0.6067, "step": 11869 }, { "epoch": 0.3465592245481884, "grad_norm": 0.6686398312544161, "learning_rate": 1.4521330089213301e-05, "loss": 0.6608, "step": 11870 }, { "epoch": 0.34658842077603574, "grad_norm": 0.6305007410158008, "learning_rate": 1.4520681265206813e-05, "loss": 0.5922, "step": 11871 }, { "epoch": 0.3466176170038831, "grad_norm": 0.6356967045992624, "learning_rate": 1.4520032441200325e-05, "loss": 0.5808, "step": 11872 }, { "epoch": 0.34664681323173047, "grad_norm": 0.6339321426488604, "learning_rate": 1.4519383617193835e-05, "loss": 0.608, "step": 11873 }, { "epoch": 0.3466760094595778, "grad_norm": 0.6169587077642793, "learning_rate": 1.4518734793187351e-05, "loss": 0.5764, "step": 11874 }, { "epoch": 0.3467052056874252, "grad_norm": 0.6867110191101073, "learning_rate": 1.4518085969180861e-05, "loss": 0.6108, "step": 11875 }, { "epoch": 0.34673440191527255, "grad_norm": 0.6146088980544615, "learning_rate": 1.4517437145174373e-05, "loss": 0.5436, "step": 11876 }, { "epoch": 0.3467635981431199, "grad_norm": 0.6069781432054641, "learning_rate": 1.4516788321167885e-05, "loss": 0.5877, "step": 11877 }, { "epoch": 0.34679279437096727, "grad_norm": 0.7165034692049361, "learning_rate": 1.4516139497161397e-05, "loss": 0.7418, "step": 11878 }, { "epoch": 0.34682199059881463, "grad_norm": 0.6775432389775927, "learning_rate": 1.4515490673154908e-05, "loss": 0.6684, "step": 11879 }, { "epoch": 0.346851186826662, "grad_norm": 0.6721023817352502, "learning_rate": 1.451484184914842e-05, "loss": 0.6488, "step": 11880 }, { "epoch": 0.34688038305450936, "grad_norm": 0.684194669527129, "learning_rate": 1.4514193025141932e-05, "loss": 0.7339, "step": 11881 }, { "epoch": 0.3469095792823567, "grad_norm": 0.7300462231463204, "learning_rate": 1.4513544201135444e-05, "loss": 0.6415, "step": 11882 }, { "epoch": 0.3469387755102041, "grad_norm": 0.61562029484631, "learning_rate": 1.4512895377128956e-05, "loss": 0.5754, "step": 11883 }, { "epoch": 0.34696797173805144, "grad_norm": 0.6745749582675916, "learning_rate": 1.4512246553122466e-05, "loss": 0.7232, "step": 11884 }, { "epoch": 0.3469971679658988, "grad_norm": 0.6166769790108682, "learning_rate": 1.4511597729115978e-05, "loss": 0.5787, "step": 11885 }, { "epoch": 0.34702636419374616, "grad_norm": 0.6101921654292468, "learning_rate": 1.451094890510949e-05, "loss": 0.5742, "step": 11886 }, { "epoch": 0.3470555604215935, "grad_norm": 0.667130579496834, "learning_rate": 1.4510300081103002e-05, "loss": 0.6569, "step": 11887 }, { "epoch": 0.3470847566494409, "grad_norm": 0.6285295227590313, "learning_rate": 1.4509651257096512e-05, "loss": 0.587, "step": 11888 }, { "epoch": 0.34711395287728825, "grad_norm": 0.6426937622191512, "learning_rate": 1.4509002433090025e-05, "loss": 0.6294, "step": 11889 }, { "epoch": 0.3471431491051356, "grad_norm": 0.6774247356041201, "learning_rate": 1.4508353609083537e-05, "loss": 0.6202, "step": 11890 }, { "epoch": 0.34717234533298297, "grad_norm": 0.6625107167318479, "learning_rate": 1.4507704785077049e-05, "loss": 0.6614, "step": 11891 }, { "epoch": 0.34720154156083033, "grad_norm": 0.6424777156788714, "learning_rate": 1.4507055961070559e-05, "loss": 0.6316, "step": 11892 }, { "epoch": 0.3472307377886777, "grad_norm": 0.9402123039915167, "learning_rate": 1.4506407137064074e-05, "loss": 0.7318, "step": 11893 }, { "epoch": 0.34725993401652505, "grad_norm": 0.5986064305915468, "learning_rate": 1.4505758313057585e-05, "loss": 0.5413, "step": 11894 }, { "epoch": 0.3472891302443724, "grad_norm": 0.6434324593161893, "learning_rate": 1.4505109489051097e-05, "loss": 0.6575, "step": 11895 }, { "epoch": 0.3473183264722198, "grad_norm": 0.668838084615584, "learning_rate": 1.4504460665044609e-05, "loss": 0.6113, "step": 11896 }, { "epoch": 0.34734752270006714, "grad_norm": 0.6887277765945546, "learning_rate": 1.450381184103812e-05, "loss": 0.6791, "step": 11897 }, { "epoch": 0.3473767189279145, "grad_norm": 0.5695277654932726, "learning_rate": 1.4503163017031631e-05, "loss": 0.504, "step": 11898 }, { "epoch": 0.34740591515576186, "grad_norm": 0.6430447709120466, "learning_rate": 1.4502514193025143e-05, "loss": 0.6177, "step": 11899 }, { "epoch": 0.3474351113836092, "grad_norm": 0.6909427994640726, "learning_rate": 1.4501865369018655e-05, "loss": 0.6856, "step": 11900 }, { "epoch": 0.3474643076114566, "grad_norm": 0.6566431828952664, "learning_rate": 1.4501216545012167e-05, "loss": 0.6287, "step": 11901 }, { "epoch": 0.34749350383930394, "grad_norm": 0.6555806010174704, "learning_rate": 1.450056772100568e-05, "loss": 0.685, "step": 11902 }, { "epoch": 0.3475227000671513, "grad_norm": 0.5672817849437983, "learning_rate": 1.449991889699919e-05, "loss": 0.4755, "step": 11903 }, { "epoch": 0.34755189629499866, "grad_norm": 0.6303771791431548, "learning_rate": 1.4499270072992702e-05, "loss": 0.6514, "step": 11904 }, { "epoch": 0.347581092522846, "grad_norm": 0.6553560096971921, "learning_rate": 1.4498621248986214e-05, "loss": 0.654, "step": 11905 }, { "epoch": 0.3476102887506934, "grad_norm": 0.7031266618530413, "learning_rate": 1.4497972424979726e-05, "loss": 0.7081, "step": 11906 }, { "epoch": 0.34763948497854075, "grad_norm": 0.6980430777058078, "learning_rate": 1.4497323600973236e-05, "loss": 0.6623, "step": 11907 }, { "epoch": 0.3476686812063881, "grad_norm": 0.617441991346151, "learning_rate": 1.4496674776966748e-05, "loss": 0.5685, "step": 11908 }, { "epoch": 0.34769787743423547, "grad_norm": 0.6081235514657433, "learning_rate": 1.449602595296026e-05, "loss": 0.5651, "step": 11909 }, { "epoch": 0.3477270736620829, "grad_norm": 0.6179122788363243, "learning_rate": 1.4495377128953772e-05, "loss": 0.5993, "step": 11910 }, { "epoch": 0.34775626988993025, "grad_norm": 0.6469916432231294, "learning_rate": 1.4494728304947286e-05, "loss": 0.6114, "step": 11911 }, { "epoch": 0.3477854661177776, "grad_norm": 0.7276731279193088, "learning_rate": 1.4494079480940798e-05, "loss": 0.6694, "step": 11912 }, { "epoch": 0.34781466234562497, "grad_norm": 0.6409813580175538, "learning_rate": 1.4493430656934308e-05, "loss": 0.586, "step": 11913 }, { "epoch": 0.34784385857347233, "grad_norm": 0.6600583968239261, "learning_rate": 1.449278183292782e-05, "loss": 0.6358, "step": 11914 }, { "epoch": 0.3478730548013197, "grad_norm": 0.6524172839601362, "learning_rate": 1.4492133008921332e-05, "loss": 0.6285, "step": 11915 }, { "epoch": 0.34790225102916705, "grad_norm": 0.6651456933481831, "learning_rate": 1.4491484184914844e-05, "loss": 0.6312, "step": 11916 }, { "epoch": 0.3479314472570144, "grad_norm": 0.6467191874429387, "learning_rate": 1.4490835360908355e-05, "loss": 0.6155, "step": 11917 }, { "epoch": 0.3479606434848618, "grad_norm": 0.7308578485980558, "learning_rate": 1.4490186536901867e-05, "loss": 0.6378, "step": 11918 }, { "epoch": 0.34798983971270914, "grad_norm": 0.6243261424536392, "learning_rate": 1.4489537712895379e-05, "loss": 0.5816, "step": 11919 }, { "epoch": 0.3480190359405565, "grad_norm": 0.6317403542349839, "learning_rate": 1.448888888888889e-05, "loss": 0.5678, "step": 11920 }, { "epoch": 0.34804823216840386, "grad_norm": 0.7185364001616424, "learning_rate": 1.4488240064882401e-05, "loss": 0.65, "step": 11921 }, { "epoch": 0.3480774283962512, "grad_norm": 0.6533667272522825, "learning_rate": 1.4487591240875913e-05, "loss": 0.6562, "step": 11922 }, { "epoch": 0.3481066246240986, "grad_norm": 0.6216167738335729, "learning_rate": 1.4486942416869425e-05, "loss": 0.5804, "step": 11923 }, { "epoch": 0.34813582085194594, "grad_norm": 0.6703509252371987, "learning_rate": 1.4486293592862937e-05, "loss": 0.6557, "step": 11924 }, { "epoch": 0.3481650170797933, "grad_norm": 0.6381295186625411, "learning_rate": 1.4485644768856449e-05, "loss": 0.617, "step": 11925 }, { "epoch": 0.34819421330764067, "grad_norm": 0.612001299384203, "learning_rate": 1.448499594484996e-05, "loss": 0.5719, "step": 11926 }, { "epoch": 0.348223409535488, "grad_norm": 0.6674018511625982, "learning_rate": 1.4484347120843471e-05, "loss": 0.6431, "step": 11927 }, { "epoch": 0.3482526057633354, "grad_norm": 0.6194324955892558, "learning_rate": 1.4483698296836983e-05, "loss": 0.6007, "step": 11928 }, { "epoch": 0.34828180199118275, "grad_norm": 0.6120469312645557, "learning_rate": 1.4483049472830495e-05, "loss": 0.5791, "step": 11929 }, { "epoch": 0.3483109982190301, "grad_norm": 0.7087896116001355, "learning_rate": 1.448240064882401e-05, "loss": 0.6206, "step": 11930 }, { "epoch": 0.34834019444687747, "grad_norm": 0.701086792550942, "learning_rate": 1.4481751824817521e-05, "loss": 0.6821, "step": 11931 }, { "epoch": 0.34836939067472483, "grad_norm": 0.6396824133376596, "learning_rate": 1.4481103000811032e-05, "loss": 0.6168, "step": 11932 }, { "epoch": 0.3483985869025722, "grad_norm": 0.6911935030998997, "learning_rate": 1.4480454176804544e-05, "loss": 0.6707, "step": 11933 }, { "epoch": 0.34842778313041955, "grad_norm": 0.623999990224298, "learning_rate": 1.4479805352798056e-05, "loss": 0.6244, "step": 11934 }, { "epoch": 0.3484569793582669, "grad_norm": 0.6533810611536083, "learning_rate": 1.4479156528791568e-05, "loss": 0.6803, "step": 11935 }, { "epoch": 0.3484861755861143, "grad_norm": 0.6759610574527503, "learning_rate": 1.4478507704785078e-05, "loss": 0.6939, "step": 11936 }, { "epoch": 0.34851537181396164, "grad_norm": 0.6495587583066507, "learning_rate": 1.447785888077859e-05, "loss": 0.5783, "step": 11937 }, { "epoch": 0.348544568041809, "grad_norm": 0.6069121899754217, "learning_rate": 1.4477210056772102e-05, "loss": 0.5524, "step": 11938 }, { "epoch": 0.34857376426965636, "grad_norm": 0.6340404380082864, "learning_rate": 1.4476561232765614e-05, "loss": 0.5985, "step": 11939 }, { "epoch": 0.3486029604975037, "grad_norm": 0.5831848797549772, "learning_rate": 1.4475912408759124e-05, "loss": 0.5271, "step": 11940 }, { "epoch": 0.3486321567253511, "grad_norm": 0.6114201514357893, "learning_rate": 1.4475263584752636e-05, "loss": 0.558, "step": 11941 }, { "epoch": 0.34866135295319844, "grad_norm": 0.6972285202072197, "learning_rate": 1.4474614760746148e-05, "loss": 0.7041, "step": 11942 }, { "epoch": 0.3486905491810458, "grad_norm": 0.6735385488645215, "learning_rate": 1.447396593673966e-05, "loss": 0.6315, "step": 11943 }, { "epoch": 0.34871974540889317, "grad_norm": 0.6816239987735941, "learning_rate": 1.4473317112733172e-05, "loss": 0.6863, "step": 11944 }, { "epoch": 0.34874894163674053, "grad_norm": 0.6376307649625749, "learning_rate": 1.4472668288726683e-05, "loss": 0.629, "step": 11945 }, { "epoch": 0.3487781378645879, "grad_norm": 0.6571213854066351, "learning_rate": 1.4472019464720195e-05, "loss": 0.6613, "step": 11946 }, { "epoch": 0.34880733409243525, "grad_norm": 0.6278415485384805, "learning_rate": 1.4471370640713707e-05, "loss": 0.5771, "step": 11947 }, { "epoch": 0.3488365303202826, "grad_norm": 0.6619858463879306, "learning_rate": 1.4470721816707219e-05, "loss": 0.6483, "step": 11948 }, { "epoch": 0.34886572654813, "grad_norm": 0.6518771552334893, "learning_rate": 1.4470072992700733e-05, "loss": 0.6106, "step": 11949 }, { "epoch": 0.34889492277597733, "grad_norm": 0.6000254073007014, "learning_rate": 1.4469424168694245e-05, "loss": 0.5305, "step": 11950 }, { "epoch": 0.3489241190038247, "grad_norm": 0.6850505367859213, "learning_rate": 1.4468775344687755e-05, "loss": 0.6552, "step": 11951 }, { "epoch": 0.34895331523167206, "grad_norm": 0.658135063344568, "learning_rate": 1.4468126520681267e-05, "loss": 0.6436, "step": 11952 }, { "epoch": 0.3489825114595194, "grad_norm": 0.6442579976068239, "learning_rate": 1.4467477696674779e-05, "loss": 0.6305, "step": 11953 }, { "epoch": 0.3490117076873668, "grad_norm": 0.6777545408033199, "learning_rate": 1.4466828872668291e-05, "loss": 0.6484, "step": 11954 }, { "epoch": 0.34904090391521414, "grad_norm": 0.6700267761957396, "learning_rate": 1.4466180048661801e-05, "loss": 0.6336, "step": 11955 }, { "epoch": 0.3490701001430615, "grad_norm": 0.6357917707961939, "learning_rate": 1.4465531224655313e-05, "loss": 0.5704, "step": 11956 }, { "epoch": 0.34909929637090886, "grad_norm": 0.6785971005816767, "learning_rate": 1.4464882400648825e-05, "loss": 0.6711, "step": 11957 }, { "epoch": 0.3491284925987562, "grad_norm": 0.6627249913916476, "learning_rate": 1.4464233576642337e-05, "loss": 0.6019, "step": 11958 }, { "epoch": 0.3491576888266036, "grad_norm": 0.7184787513717792, "learning_rate": 1.4463584752635848e-05, "loss": 0.7466, "step": 11959 }, { "epoch": 0.34918688505445095, "grad_norm": 0.7177776653543148, "learning_rate": 1.446293592862936e-05, "loss": 0.6401, "step": 11960 }, { "epoch": 0.3492160812822983, "grad_norm": 0.5977973535052398, "learning_rate": 1.4462287104622872e-05, "loss": 0.5465, "step": 11961 }, { "epoch": 0.34924527751014567, "grad_norm": 0.6506353507678924, "learning_rate": 1.4461638280616384e-05, "loss": 0.6207, "step": 11962 }, { "epoch": 0.34927447373799303, "grad_norm": 0.6185729939022819, "learning_rate": 1.4460989456609896e-05, "loss": 0.5573, "step": 11963 }, { "epoch": 0.3493036699658404, "grad_norm": 0.6503449782752636, "learning_rate": 1.4460340632603406e-05, "loss": 0.6293, "step": 11964 }, { "epoch": 0.34933286619368775, "grad_norm": 0.6830129465120475, "learning_rate": 1.4459691808596918e-05, "loss": 0.6177, "step": 11965 }, { "epoch": 0.3493620624215351, "grad_norm": 0.7028978346522525, "learning_rate": 1.445904298459043e-05, "loss": 0.6478, "step": 11966 }, { "epoch": 0.3493912586493825, "grad_norm": 0.6936353504203115, "learning_rate": 1.4458394160583942e-05, "loss": 0.6618, "step": 11967 }, { "epoch": 0.34942045487722984, "grad_norm": 0.6871576803023975, "learning_rate": 1.4457745336577456e-05, "loss": 0.5978, "step": 11968 }, { "epoch": 0.3494496511050772, "grad_norm": 0.6094423687269624, "learning_rate": 1.4457096512570968e-05, "loss": 0.5758, "step": 11969 }, { "epoch": 0.3494788473329246, "grad_norm": 0.6539091766851304, "learning_rate": 1.4456447688564478e-05, "loss": 0.6141, "step": 11970 }, { "epoch": 0.349508043560772, "grad_norm": 0.668646060799108, "learning_rate": 1.445579886455799e-05, "loss": 0.6639, "step": 11971 }, { "epoch": 0.34953723978861934, "grad_norm": 0.7088316528027054, "learning_rate": 1.4455150040551502e-05, "loss": 0.6707, "step": 11972 }, { "epoch": 0.3495664360164667, "grad_norm": 0.888163659141531, "learning_rate": 1.4454501216545014e-05, "loss": 0.6512, "step": 11973 }, { "epoch": 0.34959563224431406, "grad_norm": 0.6882355228217357, "learning_rate": 1.4453852392538525e-05, "loss": 0.6406, "step": 11974 }, { "epoch": 0.3496248284721614, "grad_norm": 0.6950882851945521, "learning_rate": 1.4453203568532037e-05, "loss": 0.6468, "step": 11975 }, { "epoch": 0.3496540247000088, "grad_norm": 0.5954110954764267, "learning_rate": 1.4452554744525549e-05, "loss": 0.5108, "step": 11976 }, { "epoch": 0.34968322092785614, "grad_norm": 0.6642493597281947, "learning_rate": 1.4451905920519061e-05, "loss": 0.6096, "step": 11977 }, { "epoch": 0.3497124171557035, "grad_norm": 0.6575513387668196, "learning_rate": 1.4451257096512571e-05, "loss": 0.6458, "step": 11978 }, { "epoch": 0.34974161338355086, "grad_norm": 0.6667130964373671, "learning_rate": 1.4450608272506083e-05, "loss": 0.652, "step": 11979 }, { "epoch": 0.3497708096113982, "grad_norm": 0.6521665390137333, "learning_rate": 1.4449959448499595e-05, "loss": 0.6642, "step": 11980 }, { "epoch": 0.3498000058392456, "grad_norm": 0.6555178529165595, "learning_rate": 1.4449310624493107e-05, "loss": 0.6307, "step": 11981 }, { "epoch": 0.34982920206709295, "grad_norm": 0.7185118910194401, "learning_rate": 1.444866180048662e-05, "loss": 0.6848, "step": 11982 }, { "epoch": 0.3498583982949403, "grad_norm": 0.6514978842142898, "learning_rate": 1.444801297648013e-05, "loss": 0.577, "step": 11983 }, { "epoch": 0.34988759452278767, "grad_norm": 0.665835079490192, "learning_rate": 1.4447364152473642e-05, "loss": 0.6326, "step": 11984 }, { "epoch": 0.34991679075063503, "grad_norm": 0.676943140618692, "learning_rate": 1.4446715328467154e-05, "loss": 0.6334, "step": 11985 }, { "epoch": 0.3499459869784824, "grad_norm": 0.6522983097030833, "learning_rate": 1.4446066504460666e-05, "loss": 0.644, "step": 11986 }, { "epoch": 0.34997518320632975, "grad_norm": 0.60133906345874, "learning_rate": 1.444541768045418e-05, "loss": 0.5627, "step": 11987 }, { "epoch": 0.3500043794341771, "grad_norm": 0.6396508986429, "learning_rate": 1.4444768856447691e-05, "loss": 0.6438, "step": 11988 }, { "epoch": 0.3500335756620245, "grad_norm": 0.6341984691278844, "learning_rate": 1.4444120032441202e-05, "loss": 0.6159, "step": 11989 }, { "epoch": 0.35006277188987184, "grad_norm": 0.6118608203532743, "learning_rate": 1.4443471208434714e-05, "loss": 0.584, "step": 11990 }, { "epoch": 0.3500919681177192, "grad_norm": 0.7157699480549397, "learning_rate": 1.4442822384428226e-05, "loss": 0.6657, "step": 11991 }, { "epoch": 0.35012116434556656, "grad_norm": 0.6751625040515201, "learning_rate": 1.4442173560421738e-05, "loss": 0.7139, "step": 11992 }, { "epoch": 0.3501503605734139, "grad_norm": 0.6669466870812043, "learning_rate": 1.4441524736415248e-05, "loss": 0.625, "step": 11993 }, { "epoch": 0.3501795568012613, "grad_norm": 0.6540263481308045, "learning_rate": 1.444087591240876e-05, "loss": 0.6556, "step": 11994 }, { "epoch": 0.35020875302910864, "grad_norm": 0.6394886576427476, "learning_rate": 1.4440227088402272e-05, "loss": 0.6456, "step": 11995 }, { "epoch": 0.350237949256956, "grad_norm": 0.6295314950700388, "learning_rate": 1.4439578264395784e-05, "loss": 0.6036, "step": 11996 }, { "epoch": 0.35026714548480337, "grad_norm": 0.6706900968514025, "learning_rate": 1.4438929440389295e-05, "loss": 0.6423, "step": 11997 }, { "epoch": 0.3502963417126507, "grad_norm": 0.6226823064358268, "learning_rate": 1.4438280616382807e-05, "loss": 0.6052, "step": 11998 }, { "epoch": 0.3503255379404981, "grad_norm": 0.5997991986704673, "learning_rate": 1.4437631792376319e-05, "loss": 0.559, "step": 11999 }, { "epoch": 0.35035473416834545, "grad_norm": 0.6274736297760557, "learning_rate": 1.443698296836983e-05, "loss": 0.6037, "step": 12000 }, { "epoch": 0.3503839303961928, "grad_norm": 0.6629304309604885, "learning_rate": 1.4436334144363343e-05, "loss": 0.646, "step": 12001 }, { "epoch": 0.35041312662404017, "grad_norm": 0.6888270071401759, "learning_rate": 1.4435685320356853e-05, "loss": 0.7098, "step": 12002 }, { "epoch": 0.35044232285188753, "grad_norm": 0.6607169556745157, "learning_rate": 1.4435036496350365e-05, "loss": 0.6524, "step": 12003 }, { "epoch": 0.3504715190797349, "grad_norm": 0.6331060238728791, "learning_rate": 1.4434387672343877e-05, "loss": 0.634, "step": 12004 }, { "epoch": 0.35050071530758226, "grad_norm": 0.6470411885299074, "learning_rate": 1.443373884833739e-05, "loss": 0.6024, "step": 12005 }, { "epoch": 0.3505299115354296, "grad_norm": 0.6046957790690414, "learning_rate": 1.4433090024330903e-05, "loss": 0.5369, "step": 12006 }, { "epoch": 0.350559107763277, "grad_norm": 0.6546666895356358, "learning_rate": 1.4432441200324415e-05, "loss": 0.6127, "step": 12007 }, { "epoch": 0.35058830399112434, "grad_norm": 0.6858965136147435, "learning_rate": 1.4431792376317925e-05, "loss": 0.6691, "step": 12008 }, { "epoch": 0.3506175002189717, "grad_norm": 0.6456618355350173, "learning_rate": 1.4431143552311437e-05, "loss": 0.6329, "step": 12009 }, { "epoch": 0.35064669644681906, "grad_norm": 0.6826349337216141, "learning_rate": 1.443049472830495e-05, "loss": 0.6173, "step": 12010 }, { "epoch": 0.3506758926746664, "grad_norm": 0.6529325083344598, "learning_rate": 1.4429845904298461e-05, "loss": 0.6755, "step": 12011 }, { "epoch": 0.3507050889025138, "grad_norm": 0.6544594661633217, "learning_rate": 1.4429197080291972e-05, "loss": 0.6523, "step": 12012 }, { "epoch": 0.35073428513036115, "grad_norm": 0.5948151787633482, "learning_rate": 1.4428548256285484e-05, "loss": 0.5794, "step": 12013 }, { "epoch": 0.3507634813582085, "grad_norm": 0.6373951409814115, "learning_rate": 1.4427899432278996e-05, "loss": 0.6494, "step": 12014 }, { "epoch": 0.35079267758605587, "grad_norm": 0.6591637663559025, "learning_rate": 1.4427250608272508e-05, "loss": 0.6327, "step": 12015 }, { "epoch": 0.35082187381390323, "grad_norm": 0.6322811654025348, "learning_rate": 1.4426601784266018e-05, "loss": 0.6236, "step": 12016 }, { "epoch": 0.3508510700417506, "grad_norm": 0.7229257706809729, "learning_rate": 1.442595296025953e-05, "loss": 0.5853, "step": 12017 }, { "epoch": 0.35088026626959795, "grad_norm": 0.6275269294959345, "learning_rate": 1.4425304136253042e-05, "loss": 0.5895, "step": 12018 }, { "epoch": 0.3509094624974453, "grad_norm": 0.6500274826515016, "learning_rate": 1.4424655312246554e-05, "loss": 0.6152, "step": 12019 }, { "epoch": 0.3509386587252927, "grad_norm": 0.65107870808117, "learning_rate": 1.4424006488240066e-05, "loss": 0.5903, "step": 12020 }, { "epoch": 0.35096785495314003, "grad_norm": 0.7457894330810309, "learning_rate": 1.4423357664233577e-05, "loss": 0.7035, "step": 12021 }, { "epoch": 0.3509970511809874, "grad_norm": 0.6474447272197874, "learning_rate": 1.4422708840227089e-05, "loss": 0.6353, "step": 12022 }, { "epoch": 0.35102624740883476, "grad_norm": 0.6284055357167937, "learning_rate": 1.44220600162206e-05, "loss": 0.5605, "step": 12023 }, { "epoch": 0.3510554436366821, "grad_norm": 0.6964975560156874, "learning_rate": 1.4421411192214113e-05, "loss": 0.7248, "step": 12024 }, { "epoch": 0.3510846398645295, "grad_norm": 0.7093376486174082, "learning_rate": 1.4420762368207626e-05, "loss": 0.6734, "step": 12025 }, { "epoch": 0.35111383609237684, "grad_norm": 0.655167467354221, "learning_rate": 1.4420113544201137e-05, "loss": 0.667, "step": 12026 }, { "epoch": 0.3511430323202242, "grad_norm": 0.6765926564474226, "learning_rate": 1.4419464720194649e-05, "loss": 0.6208, "step": 12027 }, { "epoch": 0.35117222854807156, "grad_norm": 0.6644421521702701, "learning_rate": 1.441881589618816e-05, "loss": 0.6193, "step": 12028 }, { "epoch": 0.3512014247759189, "grad_norm": 0.7080696891221834, "learning_rate": 1.4418167072181673e-05, "loss": 0.7274, "step": 12029 }, { "epoch": 0.3512306210037663, "grad_norm": 0.6536866494565496, "learning_rate": 1.4417518248175185e-05, "loss": 0.6379, "step": 12030 }, { "epoch": 0.3512598172316137, "grad_norm": 0.6699350762603205, "learning_rate": 1.4416869424168695e-05, "loss": 0.6859, "step": 12031 }, { "epoch": 0.35128901345946106, "grad_norm": 0.6283079902560696, "learning_rate": 1.4416220600162207e-05, "loss": 0.6362, "step": 12032 }, { "epoch": 0.3513182096873084, "grad_norm": 0.6956043008404672, "learning_rate": 1.441557177615572e-05, "loss": 0.7218, "step": 12033 }, { "epoch": 0.3513474059151558, "grad_norm": 0.7064863852176873, "learning_rate": 1.4414922952149231e-05, "loss": 0.6888, "step": 12034 }, { "epoch": 0.35137660214300315, "grad_norm": 0.6035637353277785, "learning_rate": 1.4414274128142742e-05, "loss": 0.5443, "step": 12035 }, { "epoch": 0.3514057983708505, "grad_norm": 0.6862977444187249, "learning_rate": 1.4413625304136254e-05, "loss": 0.6301, "step": 12036 }, { "epoch": 0.35143499459869787, "grad_norm": 0.67994191009645, "learning_rate": 1.4412976480129766e-05, "loss": 0.6667, "step": 12037 }, { "epoch": 0.35146419082654523, "grad_norm": 0.6529003326851028, "learning_rate": 1.4412327656123278e-05, "loss": 0.6619, "step": 12038 }, { "epoch": 0.3514933870543926, "grad_norm": 0.6212192264141663, "learning_rate": 1.441167883211679e-05, "loss": 0.5711, "step": 12039 }, { "epoch": 0.35152258328223995, "grad_norm": 0.677564936934773, "learning_rate": 1.44110300081103e-05, "loss": 0.5817, "step": 12040 }, { "epoch": 0.3515517795100873, "grad_norm": 0.7338625243458919, "learning_rate": 1.4410381184103812e-05, "loss": 0.7128, "step": 12041 }, { "epoch": 0.3515809757379347, "grad_norm": 0.6544933382459874, "learning_rate": 1.4409732360097324e-05, "loss": 0.662, "step": 12042 }, { "epoch": 0.35161017196578204, "grad_norm": 0.6393756021208168, "learning_rate": 1.4409083536090836e-05, "loss": 0.6292, "step": 12043 }, { "epoch": 0.3516393681936294, "grad_norm": 0.7064292678726701, "learning_rate": 1.440843471208435e-05, "loss": 0.6609, "step": 12044 }, { "epoch": 0.35166856442147676, "grad_norm": 0.6741094100757824, "learning_rate": 1.440778588807786e-05, "loss": 0.654, "step": 12045 }, { "epoch": 0.3516977606493241, "grad_norm": 0.6699899147038303, "learning_rate": 1.4407137064071372e-05, "loss": 0.683, "step": 12046 }, { "epoch": 0.3517269568771715, "grad_norm": 0.6848198066676665, "learning_rate": 1.4406488240064884e-05, "loss": 0.6868, "step": 12047 }, { "epoch": 0.35175615310501884, "grad_norm": 0.6502090707592174, "learning_rate": 1.4405839416058396e-05, "loss": 0.6127, "step": 12048 }, { "epoch": 0.3517853493328662, "grad_norm": 0.6001869591049303, "learning_rate": 1.4405190592051908e-05, "loss": 0.5791, "step": 12049 }, { "epoch": 0.35181454556071357, "grad_norm": 0.6777071901740845, "learning_rate": 1.4404541768045419e-05, "loss": 0.6807, "step": 12050 }, { "epoch": 0.3518437417885609, "grad_norm": 0.6936026205923633, "learning_rate": 1.440389294403893e-05, "loss": 0.6765, "step": 12051 }, { "epoch": 0.3518729380164083, "grad_norm": 0.6433933519952052, "learning_rate": 1.4403244120032443e-05, "loss": 0.5485, "step": 12052 }, { "epoch": 0.35190213424425565, "grad_norm": 0.6245868602401384, "learning_rate": 1.4402595296025955e-05, "loss": 0.6127, "step": 12053 }, { "epoch": 0.351931330472103, "grad_norm": 0.6608160816539976, "learning_rate": 1.4401946472019465e-05, "loss": 0.5735, "step": 12054 }, { "epoch": 0.35196052669995037, "grad_norm": 0.633796159054675, "learning_rate": 1.4401297648012977e-05, "loss": 0.6323, "step": 12055 }, { "epoch": 0.35198972292779773, "grad_norm": 0.6396751201876402, "learning_rate": 1.4400648824006489e-05, "loss": 0.5817, "step": 12056 }, { "epoch": 0.3520189191556451, "grad_norm": 0.6399747274352705, "learning_rate": 1.4400000000000001e-05, "loss": 0.5651, "step": 12057 }, { "epoch": 0.35204811538349245, "grad_norm": 0.6802731760375537, "learning_rate": 1.4399351175993513e-05, "loss": 0.6426, "step": 12058 }, { "epoch": 0.3520773116113398, "grad_norm": 0.6420612331684574, "learning_rate": 1.4398702351987023e-05, "loss": 0.5983, "step": 12059 }, { "epoch": 0.3521065078391872, "grad_norm": 0.6064276790039222, "learning_rate": 1.4398053527980535e-05, "loss": 0.5329, "step": 12060 }, { "epoch": 0.35213570406703454, "grad_norm": 0.6767017749615966, "learning_rate": 1.4397404703974047e-05, "loss": 0.6183, "step": 12061 }, { "epoch": 0.3521649002948819, "grad_norm": 0.6827654496858727, "learning_rate": 1.4396755879967561e-05, "loss": 0.6021, "step": 12062 }, { "epoch": 0.35219409652272926, "grad_norm": 0.6625921974151175, "learning_rate": 1.4396107055961073e-05, "loss": 0.5838, "step": 12063 }, { "epoch": 0.3522232927505766, "grad_norm": 0.6380561267289153, "learning_rate": 1.4395458231954584e-05, "loss": 0.6769, "step": 12064 }, { "epoch": 0.352252488978424, "grad_norm": 0.6136668434330554, "learning_rate": 1.4394809407948096e-05, "loss": 0.5763, "step": 12065 }, { "epoch": 0.35228168520627134, "grad_norm": 0.7023762522026761, "learning_rate": 1.4394160583941608e-05, "loss": 0.7325, "step": 12066 }, { "epoch": 0.3523108814341187, "grad_norm": 0.626004695238592, "learning_rate": 1.439351175993512e-05, "loss": 0.632, "step": 12067 }, { "epoch": 0.35234007766196607, "grad_norm": 0.6468455725693687, "learning_rate": 1.4392862935928632e-05, "loss": 0.5982, "step": 12068 }, { "epoch": 0.35236927388981343, "grad_norm": 0.6485383705174886, "learning_rate": 1.4392214111922142e-05, "loss": 0.6564, "step": 12069 }, { "epoch": 0.3523984701176608, "grad_norm": 0.6110917007887318, "learning_rate": 1.4391565287915654e-05, "loss": 0.563, "step": 12070 }, { "epoch": 0.35242766634550815, "grad_norm": 0.6444052995597669, "learning_rate": 1.4390916463909166e-05, "loss": 0.6293, "step": 12071 }, { "epoch": 0.3524568625733555, "grad_norm": 0.596967516369801, "learning_rate": 1.4390267639902678e-05, "loss": 0.57, "step": 12072 }, { "epoch": 0.3524860588012029, "grad_norm": 0.6311310799175823, "learning_rate": 1.4389618815896188e-05, "loss": 0.5891, "step": 12073 }, { "epoch": 0.35251525502905023, "grad_norm": 0.6064710358582668, "learning_rate": 1.43889699918897e-05, "loss": 0.5636, "step": 12074 }, { "epoch": 0.3525444512568976, "grad_norm": 0.6165489631132813, "learning_rate": 1.4388321167883212e-05, "loss": 0.6024, "step": 12075 }, { "epoch": 0.35257364748474496, "grad_norm": 0.6441165312947356, "learning_rate": 1.4387672343876724e-05, "loss": 0.6298, "step": 12076 }, { "epoch": 0.3526028437125923, "grad_norm": 0.6922546560334359, "learning_rate": 1.4387023519870237e-05, "loss": 0.5831, "step": 12077 }, { "epoch": 0.3526320399404397, "grad_norm": 0.6175304970469294, "learning_rate": 1.4386374695863747e-05, "loss": 0.5916, "step": 12078 }, { "epoch": 0.35266123616828704, "grad_norm": 0.6624120779617894, "learning_rate": 1.4385725871857259e-05, "loss": 0.6571, "step": 12079 }, { "epoch": 0.3526904323961344, "grad_norm": 0.6718725541826591, "learning_rate": 1.4385077047850771e-05, "loss": 0.6849, "step": 12080 }, { "epoch": 0.35271962862398176, "grad_norm": 0.5870854304323674, "learning_rate": 1.4384428223844285e-05, "loss": 0.5365, "step": 12081 }, { "epoch": 0.3527488248518291, "grad_norm": 0.6699244761719826, "learning_rate": 1.4383779399837797e-05, "loss": 0.5906, "step": 12082 }, { "epoch": 0.3527780210796765, "grad_norm": 0.6222126358576896, "learning_rate": 1.4383130575831307e-05, "loss": 0.6325, "step": 12083 }, { "epoch": 0.35280721730752385, "grad_norm": 0.7093057139374765, "learning_rate": 1.4382481751824819e-05, "loss": 0.695, "step": 12084 }, { "epoch": 0.3528364135353712, "grad_norm": 0.6424933884102834, "learning_rate": 1.4381832927818331e-05, "loss": 0.6549, "step": 12085 }, { "epoch": 0.35286560976321857, "grad_norm": 0.598398877004501, "learning_rate": 1.4381184103811843e-05, "loss": 0.524, "step": 12086 }, { "epoch": 0.35289480599106593, "grad_norm": 0.6699764663135155, "learning_rate": 1.4380535279805355e-05, "loss": 0.6463, "step": 12087 }, { "epoch": 0.3529240022189133, "grad_norm": 0.6424533497147364, "learning_rate": 1.4379886455798865e-05, "loss": 0.6681, "step": 12088 }, { "epoch": 0.35295319844676065, "grad_norm": 0.7080492845861145, "learning_rate": 1.4379237631792377e-05, "loss": 0.6751, "step": 12089 }, { "epoch": 0.352982394674608, "grad_norm": 0.6933980482299424, "learning_rate": 1.437858880778589e-05, "loss": 0.6477, "step": 12090 }, { "epoch": 0.35301159090245543, "grad_norm": 0.6619486469760366, "learning_rate": 1.4377939983779401e-05, "loss": 0.5697, "step": 12091 }, { "epoch": 0.3530407871303028, "grad_norm": 0.807070984526862, "learning_rate": 1.4377291159772912e-05, "loss": 0.6985, "step": 12092 }, { "epoch": 0.35306998335815015, "grad_norm": 0.7188198387670399, "learning_rate": 1.4376642335766424e-05, "loss": 0.6915, "step": 12093 }, { "epoch": 0.3530991795859975, "grad_norm": 0.6566144720326851, "learning_rate": 1.4375993511759936e-05, "loss": 0.6442, "step": 12094 }, { "epoch": 0.3531283758138449, "grad_norm": 0.6681319958759029, "learning_rate": 1.4375344687753448e-05, "loss": 0.6579, "step": 12095 }, { "epoch": 0.35315757204169224, "grad_norm": 0.7076945399563112, "learning_rate": 1.437469586374696e-05, "loss": 0.6597, "step": 12096 }, { "epoch": 0.3531867682695396, "grad_norm": 0.643320141583759, "learning_rate": 1.437404703974047e-05, "loss": 0.6317, "step": 12097 }, { "epoch": 0.35321596449738696, "grad_norm": 0.6598865695036007, "learning_rate": 1.4373398215733982e-05, "loss": 0.6051, "step": 12098 }, { "epoch": 0.3532451607252343, "grad_norm": 0.582882456577749, "learning_rate": 1.4372749391727494e-05, "loss": 0.553, "step": 12099 }, { "epoch": 0.3532743569530817, "grad_norm": 0.6735392664023496, "learning_rate": 1.4372100567721008e-05, "loss": 0.6701, "step": 12100 }, { "epoch": 0.35330355318092904, "grad_norm": 0.6825589014237163, "learning_rate": 1.437145174371452e-05, "loss": 0.6475, "step": 12101 }, { "epoch": 0.3533327494087764, "grad_norm": 0.6887637313343624, "learning_rate": 1.437080291970803e-05, "loss": 0.6942, "step": 12102 }, { "epoch": 0.35336194563662376, "grad_norm": 0.6274581302510158, "learning_rate": 1.4370154095701542e-05, "loss": 0.5608, "step": 12103 }, { "epoch": 0.3533911418644711, "grad_norm": 0.607425598665072, "learning_rate": 1.4369505271695054e-05, "loss": 0.525, "step": 12104 }, { "epoch": 0.3534203380923185, "grad_norm": 0.6647183124410081, "learning_rate": 1.4368856447688566e-05, "loss": 0.6242, "step": 12105 }, { "epoch": 0.35344953432016585, "grad_norm": 0.6929115804048114, "learning_rate": 1.4368207623682079e-05, "loss": 0.6234, "step": 12106 }, { "epoch": 0.3534787305480132, "grad_norm": 0.7066161355189498, "learning_rate": 1.4367558799675589e-05, "loss": 0.7158, "step": 12107 }, { "epoch": 0.35350792677586057, "grad_norm": 0.7255696132640466, "learning_rate": 1.4366909975669101e-05, "loss": 0.7271, "step": 12108 }, { "epoch": 0.35353712300370793, "grad_norm": 0.6480890719121779, "learning_rate": 1.4366261151662613e-05, "loss": 0.6072, "step": 12109 }, { "epoch": 0.3535663192315553, "grad_norm": 0.68077723236031, "learning_rate": 1.4365612327656125e-05, "loss": 0.6026, "step": 12110 }, { "epoch": 0.35359551545940265, "grad_norm": 0.6905977804635715, "learning_rate": 1.4364963503649635e-05, "loss": 0.6667, "step": 12111 }, { "epoch": 0.35362471168725, "grad_norm": 0.6416229563519676, "learning_rate": 1.4364314679643147e-05, "loss": 0.6923, "step": 12112 }, { "epoch": 0.3536539079150974, "grad_norm": 0.6059188890245142, "learning_rate": 1.436366585563666e-05, "loss": 0.5561, "step": 12113 }, { "epoch": 0.35368310414294474, "grad_norm": 0.6203016859785434, "learning_rate": 1.4363017031630171e-05, "loss": 0.6045, "step": 12114 }, { "epoch": 0.3537123003707921, "grad_norm": 0.7183292138810615, "learning_rate": 1.4362368207623683e-05, "loss": 0.7052, "step": 12115 }, { "epoch": 0.35374149659863946, "grad_norm": 0.7023037159096804, "learning_rate": 1.4361719383617194e-05, "loss": 0.6355, "step": 12116 }, { "epoch": 0.3537706928264868, "grad_norm": 0.7021437249560983, "learning_rate": 1.4361070559610706e-05, "loss": 0.7293, "step": 12117 }, { "epoch": 0.3537998890543342, "grad_norm": 0.5822992375521908, "learning_rate": 1.4360421735604218e-05, "loss": 0.5108, "step": 12118 }, { "epoch": 0.35382908528218154, "grad_norm": 0.6221384222008249, "learning_rate": 1.4359772911597731e-05, "loss": 0.5653, "step": 12119 }, { "epoch": 0.3538582815100289, "grad_norm": 0.6117812238855476, "learning_rate": 1.4359124087591244e-05, "loss": 0.5548, "step": 12120 }, { "epoch": 0.35388747773787627, "grad_norm": 0.6031707438897604, "learning_rate": 1.4358475263584754e-05, "loss": 0.5446, "step": 12121 }, { "epoch": 0.3539166739657236, "grad_norm": 0.6851594584332418, "learning_rate": 1.4357826439578266e-05, "loss": 0.7003, "step": 12122 }, { "epoch": 0.353945870193571, "grad_norm": 0.5982370934427371, "learning_rate": 1.4357177615571778e-05, "loss": 0.5201, "step": 12123 }, { "epoch": 0.35397506642141835, "grad_norm": 0.647471284332671, "learning_rate": 1.435652879156529e-05, "loss": 0.6076, "step": 12124 }, { "epoch": 0.3540042626492657, "grad_norm": 0.7487566073745138, "learning_rate": 1.4355879967558802e-05, "loss": 0.7763, "step": 12125 }, { "epoch": 0.35403345887711307, "grad_norm": 0.6706767410791703, "learning_rate": 1.4355231143552312e-05, "loss": 0.6427, "step": 12126 }, { "epoch": 0.35406265510496043, "grad_norm": 0.6989748973615293, "learning_rate": 1.4354582319545824e-05, "loss": 0.6456, "step": 12127 }, { "epoch": 0.3540918513328078, "grad_norm": 0.6481043672411894, "learning_rate": 1.4353933495539336e-05, "loss": 0.6133, "step": 12128 }, { "epoch": 0.35412104756065516, "grad_norm": 0.6636267086199887, "learning_rate": 1.4353284671532848e-05, "loss": 0.6337, "step": 12129 }, { "epoch": 0.3541502437885025, "grad_norm": 0.6440420895241258, "learning_rate": 1.4352635847526359e-05, "loss": 0.5422, "step": 12130 }, { "epoch": 0.3541794400163499, "grad_norm": 0.6414456519157192, "learning_rate": 1.435198702351987e-05, "loss": 0.605, "step": 12131 }, { "epoch": 0.35420863624419724, "grad_norm": 0.6661041431525644, "learning_rate": 1.4351338199513383e-05, "loss": 0.6279, "step": 12132 }, { "epoch": 0.3542378324720446, "grad_norm": 0.6577362657161547, "learning_rate": 1.4350689375506895e-05, "loss": 0.6353, "step": 12133 }, { "epoch": 0.35426702869989196, "grad_norm": 0.6645993722107809, "learning_rate": 1.4350040551500407e-05, "loss": 0.637, "step": 12134 }, { "epoch": 0.3542962249277393, "grad_norm": 0.6752804311417259, "learning_rate": 1.4349391727493917e-05, "loss": 0.6547, "step": 12135 }, { "epoch": 0.3543254211555867, "grad_norm": 0.653055332254161, "learning_rate": 1.434874290348743e-05, "loss": 0.668, "step": 12136 }, { "epoch": 0.35435461738343405, "grad_norm": 0.6561259231286132, "learning_rate": 1.4348094079480941e-05, "loss": 0.6744, "step": 12137 }, { "epoch": 0.3543838136112814, "grad_norm": 0.6232513852875787, "learning_rate": 1.4347445255474455e-05, "loss": 0.5858, "step": 12138 }, { "epoch": 0.35441300983912877, "grad_norm": 0.6235366497531947, "learning_rate": 1.4346796431467967e-05, "loss": 0.6227, "step": 12139 }, { "epoch": 0.35444220606697613, "grad_norm": 0.7164916737802806, "learning_rate": 1.4346147607461477e-05, "loss": 0.7236, "step": 12140 }, { "epoch": 0.3544714022948235, "grad_norm": 0.668788291029376, "learning_rate": 1.434549878345499e-05, "loss": 0.6425, "step": 12141 }, { "epoch": 0.35450059852267085, "grad_norm": 0.7244166338771216, "learning_rate": 1.4344849959448501e-05, "loss": 0.7131, "step": 12142 }, { "epoch": 0.3545297947505182, "grad_norm": 0.6611365132286391, "learning_rate": 1.4344201135442013e-05, "loss": 0.6608, "step": 12143 }, { "epoch": 0.3545589909783656, "grad_norm": 0.5953489338940449, "learning_rate": 1.4343552311435525e-05, "loss": 0.5581, "step": 12144 }, { "epoch": 0.35458818720621293, "grad_norm": 0.6435379434997959, "learning_rate": 1.4342903487429036e-05, "loss": 0.5691, "step": 12145 }, { "epoch": 0.3546173834340603, "grad_norm": 0.6817248002479247, "learning_rate": 1.4342254663422548e-05, "loss": 0.5573, "step": 12146 }, { "epoch": 0.35464657966190766, "grad_norm": 0.6269934960972926, "learning_rate": 1.434160583941606e-05, "loss": 0.6083, "step": 12147 }, { "epoch": 0.354675775889755, "grad_norm": 0.6309572038165987, "learning_rate": 1.4340957015409572e-05, "loss": 0.6136, "step": 12148 }, { "epoch": 0.3547049721176024, "grad_norm": 0.6964628011402075, "learning_rate": 1.4340308191403082e-05, "loss": 0.6677, "step": 12149 }, { "epoch": 0.35473416834544974, "grad_norm": 0.7150087571476768, "learning_rate": 1.4339659367396594e-05, "loss": 0.6996, "step": 12150 }, { "epoch": 0.35476336457329716, "grad_norm": 0.7771620278033877, "learning_rate": 1.4339010543390106e-05, "loss": 0.7514, "step": 12151 }, { "epoch": 0.3547925608011445, "grad_norm": 0.6616948341353815, "learning_rate": 1.4338361719383618e-05, "loss": 0.6226, "step": 12152 }, { "epoch": 0.3548217570289919, "grad_norm": 0.6369707424754341, "learning_rate": 1.433771289537713e-05, "loss": 0.6025, "step": 12153 }, { "epoch": 0.35485095325683924, "grad_norm": 0.7682140274289846, "learning_rate": 1.433706407137064e-05, "loss": 0.6928, "step": 12154 }, { "epoch": 0.3548801494846866, "grad_norm": 0.6291411663959842, "learning_rate": 1.4336415247364153e-05, "loss": 0.5568, "step": 12155 }, { "epoch": 0.35490934571253396, "grad_norm": 0.6664657020807773, "learning_rate": 1.4335766423357665e-05, "loss": 0.6587, "step": 12156 }, { "epoch": 0.3549385419403813, "grad_norm": 0.6817875322157629, "learning_rate": 1.4335117599351178e-05, "loss": 0.6559, "step": 12157 }, { "epoch": 0.3549677381682287, "grad_norm": 0.6668140667705679, "learning_rate": 1.433446877534469e-05, "loss": 0.6515, "step": 12158 }, { "epoch": 0.35499693439607605, "grad_norm": 0.6189456046452648, "learning_rate": 1.43338199513382e-05, "loss": 0.546, "step": 12159 }, { "epoch": 0.3550261306239234, "grad_norm": 0.6909993195896525, "learning_rate": 1.4333171127331713e-05, "loss": 0.6731, "step": 12160 }, { "epoch": 0.35505532685177077, "grad_norm": 0.6741875794801998, "learning_rate": 1.4332522303325225e-05, "loss": 0.6392, "step": 12161 }, { "epoch": 0.35508452307961813, "grad_norm": 0.7006709301388545, "learning_rate": 1.4331873479318737e-05, "loss": 0.7188, "step": 12162 }, { "epoch": 0.3551137193074655, "grad_norm": 0.6664047505158288, "learning_rate": 1.4331224655312249e-05, "loss": 0.6785, "step": 12163 }, { "epoch": 0.35514291553531285, "grad_norm": 0.6945793613482988, "learning_rate": 1.433057583130576e-05, "loss": 0.6832, "step": 12164 }, { "epoch": 0.3551721117631602, "grad_norm": 0.6463749470567017, "learning_rate": 1.4329927007299271e-05, "loss": 0.6243, "step": 12165 }, { "epoch": 0.3552013079910076, "grad_norm": 0.6562419502591219, "learning_rate": 1.4329278183292783e-05, "loss": 0.6409, "step": 12166 }, { "epoch": 0.35523050421885494, "grad_norm": 0.6131437648725947, "learning_rate": 1.4328629359286295e-05, "loss": 0.5895, "step": 12167 }, { "epoch": 0.3552597004467023, "grad_norm": 0.7153133008131919, "learning_rate": 1.4327980535279806e-05, "loss": 0.6692, "step": 12168 }, { "epoch": 0.35528889667454966, "grad_norm": 0.6202867073190794, "learning_rate": 1.4327331711273318e-05, "loss": 0.573, "step": 12169 }, { "epoch": 0.355318092902397, "grad_norm": 0.6965532482813297, "learning_rate": 1.432668288726683e-05, "loss": 0.6765, "step": 12170 }, { "epoch": 0.3553472891302444, "grad_norm": 0.615210896216473, "learning_rate": 1.4326034063260342e-05, "loss": 0.5533, "step": 12171 }, { "epoch": 0.35537648535809174, "grad_norm": 0.6440856070354163, "learning_rate": 1.4325385239253852e-05, "loss": 0.6611, "step": 12172 }, { "epoch": 0.3554056815859391, "grad_norm": 0.6765165266196527, "learning_rate": 1.4324736415247364e-05, "loss": 0.6804, "step": 12173 }, { "epoch": 0.35543487781378647, "grad_norm": 0.6340008362946212, "learning_rate": 1.4324087591240876e-05, "loss": 0.6405, "step": 12174 }, { "epoch": 0.3554640740416338, "grad_norm": 0.6770567207969435, "learning_rate": 1.4323438767234388e-05, "loss": 0.6868, "step": 12175 }, { "epoch": 0.3554932702694812, "grad_norm": 0.6699304344188712, "learning_rate": 1.4322789943227902e-05, "loss": 0.7048, "step": 12176 }, { "epoch": 0.35552246649732855, "grad_norm": 0.6698062163231052, "learning_rate": 1.4322141119221414e-05, "loss": 0.667, "step": 12177 }, { "epoch": 0.3555516627251759, "grad_norm": 0.6509383892345879, "learning_rate": 1.4321492295214924e-05, "loss": 0.6794, "step": 12178 }, { "epoch": 0.35558085895302327, "grad_norm": 0.6586309186055571, "learning_rate": 1.4320843471208436e-05, "loss": 0.646, "step": 12179 }, { "epoch": 0.35561005518087063, "grad_norm": 0.6875998646213682, "learning_rate": 1.4320194647201948e-05, "loss": 0.6518, "step": 12180 }, { "epoch": 0.355639251408718, "grad_norm": 0.6686410118902275, "learning_rate": 1.431954582319546e-05, "loss": 0.6398, "step": 12181 }, { "epoch": 0.35566844763656535, "grad_norm": 0.6991252461191685, "learning_rate": 1.4318896999188972e-05, "loss": 0.6788, "step": 12182 }, { "epoch": 0.3556976438644127, "grad_norm": 0.6534249507850918, "learning_rate": 1.4318248175182483e-05, "loss": 0.6187, "step": 12183 }, { "epoch": 0.3557268400922601, "grad_norm": 0.6181685442135706, "learning_rate": 1.4317599351175995e-05, "loss": 0.6003, "step": 12184 }, { "epoch": 0.35575603632010744, "grad_norm": 0.6618963820826692, "learning_rate": 1.4316950527169507e-05, "loss": 0.5523, "step": 12185 }, { "epoch": 0.3557852325479548, "grad_norm": 0.658031484965512, "learning_rate": 1.4316301703163019e-05, "loss": 0.5997, "step": 12186 }, { "epoch": 0.35581442877580216, "grad_norm": 0.6167476215482094, "learning_rate": 1.4315652879156529e-05, "loss": 0.5953, "step": 12187 }, { "epoch": 0.3558436250036495, "grad_norm": 0.6221594069923276, "learning_rate": 1.4315004055150041e-05, "loss": 0.6033, "step": 12188 }, { "epoch": 0.3558728212314969, "grad_norm": 0.6736004072215849, "learning_rate": 1.4314355231143553e-05, "loss": 0.6735, "step": 12189 }, { "epoch": 0.35590201745934424, "grad_norm": 0.6979915740802487, "learning_rate": 1.4313706407137065e-05, "loss": 0.6718, "step": 12190 }, { "epoch": 0.3559312136871916, "grad_norm": 0.6144341779770425, "learning_rate": 1.4313057583130575e-05, "loss": 0.5635, "step": 12191 }, { "epoch": 0.35596040991503897, "grad_norm": 0.6394321455974712, "learning_rate": 1.4312408759124087e-05, "loss": 0.5923, "step": 12192 }, { "epoch": 0.35598960614288633, "grad_norm": 0.6667833275407601, "learning_rate": 1.43117599351176e-05, "loss": 0.6407, "step": 12193 }, { "epoch": 0.3560188023707337, "grad_norm": 0.6329608431132593, "learning_rate": 1.4311111111111111e-05, "loss": 0.6024, "step": 12194 }, { "epoch": 0.35604799859858105, "grad_norm": 0.6654073127620144, "learning_rate": 1.4310462287104625e-05, "loss": 0.6371, "step": 12195 }, { "epoch": 0.3560771948264284, "grad_norm": 0.6382172286681375, "learning_rate": 1.4309813463098137e-05, "loss": 0.6296, "step": 12196 }, { "epoch": 0.3561063910542758, "grad_norm": 0.6558389560186608, "learning_rate": 1.4309164639091648e-05, "loss": 0.5965, "step": 12197 }, { "epoch": 0.35613558728212313, "grad_norm": 0.6612822495877821, "learning_rate": 1.430851581508516e-05, "loss": 0.6643, "step": 12198 }, { "epoch": 0.3561647835099705, "grad_norm": 0.6831026775214311, "learning_rate": 1.4307866991078672e-05, "loss": 0.6777, "step": 12199 }, { "epoch": 0.35619397973781786, "grad_norm": 0.6624013071522112, "learning_rate": 1.4307218167072184e-05, "loss": 0.6274, "step": 12200 }, { "epoch": 0.3562231759656652, "grad_norm": 0.6754652788218961, "learning_rate": 1.4306569343065696e-05, "loss": 0.6763, "step": 12201 }, { "epoch": 0.3562523721935126, "grad_norm": 0.6895642407385756, "learning_rate": 1.4305920519059206e-05, "loss": 0.7352, "step": 12202 }, { "epoch": 0.35628156842135994, "grad_norm": 0.6711844693230261, "learning_rate": 1.4305271695052718e-05, "loss": 0.6737, "step": 12203 }, { "epoch": 0.3563107646492073, "grad_norm": 0.6883816743754966, "learning_rate": 1.430462287104623e-05, "loss": 0.651, "step": 12204 }, { "epoch": 0.35633996087705466, "grad_norm": 0.6923452457749835, "learning_rate": 1.4303974047039742e-05, "loss": 0.7392, "step": 12205 }, { "epoch": 0.356369157104902, "grad_norm": 0.6460946363178276, "learning_rate": 1.4303325223033252e-05, "loss": 0.6278, "step": 12206 }, { "epoch": 0.3563983533327494, "grad_norm": 0.7191178128533593, "learning_rate": 1.4302676399026764e-05, "loss": 0.6928, "step": 12207 }, { "epoch": 0.35642754956059675, "grad_norm": 0.6455574085988461, "learning_rate": 1.4302027575020276e-05, "loss": 0.5981, "step": 12208 }, { "epoch": 0.3564567457884441, "grad_norm": 0.6888901823351377, "learning_rate": 1.4301378751013789e-05, "loss": 0.6792, "step": 12209 }, { "epoch": 0.35648594201629147, "grad_norm": 0.6501063684539716, "learning_rate": 1.4300729927007299e-05, "loss": 0.588, "step": 12210 }, { "epoch": 0.3565151382441389, "grad_norm": 0.7255268865677569, "learning_rate": 1.4300081103000811e-05, "loss": 0.7005, "step": 12211 }, { "epoch": 0.35654433447198625, "grad_norm": 0.6935613454810018, "learning_rate": 1.4299432278994323e-05, "loss": 0.6627, "step": 12212 }, { "epoch": 0.3565735306998336, "grad_norm": 0.6663017548410232, "learning_rate": 1.4298783454987837e-05, "loss": 0.6122, "step": 12213 }, { "epoch": 0.35660272692768097, "grad_norm": 0.620644495595702, "learning_rate": 1.4298134630981349e-05, "loss": 0.5805, "step": 12214 }, { "epoch": 0.35663192315552833, "grad_norm": 0.6271859470870517, "learning_rate": 1.429748580697486e-05, "loss": 0.5996, "step": 12215 }, { "epoch": 0.3566611193833757, "grad_norm": 0.6739280021488948, "learning_rate": 1.4296836982968371e-05, "loss": 0.6799, "step": 12216 }, { "epoch": 0.35669031561122305, "grad_norm": 0.6467627632836814, "learning_rate": 1.4296188158961883e-05, "loss": 0.6207, "step": 12217 }, { "epoch": 0.3567195118390704, "grad_norm": 0.6878309346220017, "learning_rate": 1.4295539334955395e-05, "loss": 0.67, "step": 12218 }, { "epoch": 0.3567487080669178, "grad_norm": 0.6490222601672256, "learning_rate": 1.4294890510948907e-05, "loss": 0.5798, "step": 12219 }, { "epoch": 0.35677790429476514, "grad_norm": 0.6415871044771099, "learning_rate": 1.4294241686942419e-05, "loss": 0.6146, "step": 12220 }, { "epoch": 0.3568071005226125, "grad_norm": 0.6723794921365066, "learning_rate": 1.429359286293593e-05, "loss": 0.653, "step": 12221 }, { "epoch": 0.35683629675045986, "grad_norm": 0.6527399847556782, "learning_rate": 1.4292944038929441e-05, "loss": 0.6203, "step": 12222 }, { "epoch": 0.3568654929783072, "grad_norm": 0.6302839694772019, "learning_rate": 1.4292295214922954e-05, "loss": 0.6215, "step": 12223 }, { "epoch": 0.3568946892061546, "grad_norm": 0.6076066766217877, "learning_rate": 1.4291646390916466e-05, "loss": 0.5456, "step": 12224 }, { "epoch": 0.35692388543400194, "grad_norm": 0.6404616238630652, "learning_rate": 1.4290997566909976e-05, "loss": 0.5916, "step": 12225 }, { "epoch": 0.3569530816618493, "grad_norm": 0.6551503783918985, "learning_rate": 1.4290348742903488e-05, "loss": 0.6343, "step": 12226 }, { "epoch": 0.35698227788969666, "grad_norm": 0.6117339249341697, "learning_rate": 1.4289699918897e-05, "loss": 0.5587, "step": 12227 }, { "epoch": 0.357011474117544, "grad_norm": 0.623838992435264, "learning_rate": 1.4289051094890512e-05, "loss": 0.6006, "step": 12228 }, { "epoch": 0.3570406703453914, "grad_norm": 0.6833454263230981, "learning_rate": 1.4288402270884022e-05, "loss": 0.6452, "step": 12229 }, { "epoch": 0.35706986657323875, "grad_norm": 0.6104334469166733, "learning_rate": 1.4287753446877534e-05, "loss": 0.5886, "step": 12230 }, { "epoch": 0.3570990628010861, "grad_norm": 0.6494031209838522, "learning_rate": 1.4287104622871046e-05, "loss": 0.6026, "step": 12231 }, { "epoch": 0.35712825902893347, "grad_norm": 0.63746064424415, "learning_rate": 1.428645579886456e-05, "loss": 0.6074, "step": 12232 }, { "epoch": 0.35715745525678083, "grad_norm": 0.6174854332048288, "learning_rate": 1.4285806974858072e-05, "loss": 0.5536, "step": 12233 }, { "epoch": 0.3571866514846282, "grad_norm": 0.5976166302943486, "learning_rate": 1.4285158150851584e-05, "loss": 0.5404, "step": 12234 }, { "epoch": 0.35721584771247555, "grad_norm": 0.689959106418964, "learning_rate": 1.4284509326845094e-05, "loss": 0.7139, "step": 12235 }, { "epoch": 0.3572450439403229, "grad_norm": 0.6626097557898499, "learning_rate": 1.4283860502838606e-05, "loss": 0.6806, "step": 12236 }, { "epoch": 0.3572742401681703, "grad_norm": 0.6006415064487329, "learning_rate": 1.4283211678832119e-05, "loss": 0.5509, "step": 12237 }, { "epoch": 0.35730343639601764, "grad_norm": 0.6352566055096193, "learning_rate": 1.428256285482563e-05, "loss": 0.6651, "step": 12238 }, { "epoch": 0.357332632623865, "grad_norm": 0.7166780690265571, "learning_rate": 1.4281914030819143e-05, "loss": 0.7657, "step": 12239 }, { "epoch": 0.35736182885171236, "grad_norm": 0.6810493721766656, "learning_rate": 1.4281265206812653e-05, "loss": 0.6629, "step": 12240 }, { "epoch": 0.3573910250795597, "grad_norm": 0.6463729733041892, "learning_rate": 1.4280616382806165e-05, "loss": 0.6259, "step": 12241 }, { "epoch": 0.3574202213074071, "grad_norm": 0.5985894502397504, "learning_rate": 1.4279967558799677e-05, "loss": 0.5613, "step": 12242 }, { "epoch": 0.35744941753525444, "grad_norm": 0.5758005604287723, "learning_rate": 1.4279318734793189e-05, "loss": 0.5483, "step": 12243 }, { "epoch": 0.3574786137631018, "grad_norm": 0.6465094073948822, "learning_rate": 1.42786699107867e-05, "loss": 0.6353, "step": 12244 }, { "epoch": 0.35750780999094917, "grad_norm": 0.6311649785445593, "learning_rate": 1.4278021086780211e-05, "loss": 0.6041, "step": 12245 }, { "epoch": 0.3575370062187965, "grad_norm": 0.6549668517063147, "learning_rate": 1.4277372262773723e-05, "loss": 0.6284, "step": 12246 }, { "epoch": 0.3575662024466439, "grad_norm": 0.741063089941334, "learning_rate": 1.4276723438767235e-05, "loss": 0.6173, "step": 12247 }, { "epoch": 0.35759539867449125, "grad_norm": 0.6427779366897342, "learning_rate": 1.4276074614760746e-05, "loss": 0.6512, "step": 12248 }, { "epoch": 0.3576245949023386, "grad_norm": 0.6789454688033304, "learning_rate": 1.4275425790754258e-05, "loss": 0.6829, "step": 12249 }, { "epoch": 0.35765379113018597, "grad_norm": 0.722484187602753, "learning_rate": 1.427477696674777e-05, "loss": 0.5875, "step": 12250 }, { "epoch": 0.35768298735803333, "grad_norm": 0.6794054819555537, "learning_rate": 1.4274128142741283e-05, "loss": 0.6528, "step": 12251 }, { "epoch": 0.3577121835858807, "grad_norm": 0.6405123352697948, "learning_rate": 1.4273479318734796e-05, "loss": 0.6536, "step": 12252 }, { "epoch": 0.35774137981372806, "grad_norm": 0.6672161223199026, "learning_rate": 1.4272830494728308e-05, "loss": 0.6878, "step": 12253 }, { "epoch": 0.3577705760415754, "grad_norm": 0.6763411943096328, "learning_rate": 1.4272181670721818e-05, "loss": 0.6712, "step": 12254 }, { "epoch": 0.3577997722694228, "grad_norm": 0.6563275173128463, "learning_rate": 1.427153284671533e-05, "loss": 0.6316, "step": 12255 }, { "epoch": 0.35782896849727014, "grad_norm": 0.6459596305378262, "learning_rate": 1.4270884022708842e-05, "loss": 0.6055, "step": 12256 }, { "epoch": 0.3578581647251175, "grad_norm": 0.6171947941479223, "learning_rate": 1.4270235198702354e-05, "loss": 0.6254, "step": 12257 }, { "epoch": 0.35788736095296486, "grad_norm": 0.6430283397027441, "learning_rate": 1.4269586374695866e-05, "loss": 0.6488, "step": 12258 }, { "epoch": 0.3579165571808122, "grad_norm": 0.656938186145531, "learning_rate": 1.4268937550689376e-05, "loss": 0.6542, "step": 12259 }, { "epoch": 0.3579457534086596, "grad_norm": 0.5997649254917946, "learning_rate": 1.4268288726682888e-05, "loss": 0.5399, "step": 12260 }, { "epoch": 0.35797494963650694, "grad_norm": 0.6520423497735505, "learning_rate": 1.42676399026764e-05, "loss": 0.568, "step": 12261 }, { "epoch": 0.3580041458643543, "grad_norm": 0.6342024003229554, "learning_rate": 1.4266991078669912e-05, "loss": 0.5664, "step": 12262 }, { "epoch": 0.35803334209220167, "grad_norm": 0.6496151870784928, "learning_rate": 1.4266342254663423e-05, "loss": 0.648, "step": 12263 }, { "epoch": 0.35806253832004903, "grad_norm": 0.668173684815661, "learning_rate": 1.4265693430656935e-05, "loss": 0.6277, "step": 12264 }, { "epoch": 0.3580917345478964, "grad_norm": 0.6106151510854886, "learning_rate": 1.4265044606650447e-05, "loss": 0.5465, "step": 12265 }, { "epoch": 0.35812093077574375, "grad_norm": 0.6694958314624082, "learning_rate": 1.4264395782643959e-05, "loss": 0.6416, "step": 12266 }, { "epoch": 0.3581501270035911, "grad_norm": 0.6536406264767853, "learning_rate": 1.426374695863747e-05, "loss": 0.6305, "step": 12267 }, { "epoch": 0.3581793232314385, "grad_norm": 0.5997937454976782, "learning_rate": 1.4263098134630981e-05, "loss": 0.5776, "step": 12268 }, { "epoch": 0.35820851945928583, "grad_norm": 0.6363726352653485, "learning_rate": 1.4262449310624493e-05, "loss": 0.6162, "step": 12269 }, { "epoch": 0.3582377156871332, "grad_norm": 0.6639762740764906, "learning_rate": 1.4261800486618007e-05, "loss": 0.6145, "step": 12270 }, { "epoch": 0.35826691191498056, "grad_norm": 0.6996715625374531, "learning_rate": 1.4261151662611519e-05, "loss": 0.6241, "step": 12271 }, { "epoch": 0.358296108142828, "grad_norm": 0.7253992974279286, "learning_rate": 1.4260502838605031e-05, "loss": 0.682, "step": 12272 }, { "epoch": 0.35832530437067533, "grad_norm": 0.6670172519375249, "learning_rate": 1.4259854014598541e-05, "loss": 0.5936, "step": 12273 }, { "epoch": 0.3583545005985227, "grad_norm": 0.6481697178731487, "learning_rate": 1.4259205190592053e-05, "loss": 0.5597, "step": 12274 }, { "epoch": 0.35838369682637006, "grad_norm": 0.8270446133659253, "learning_rate": 1.4258556366585565e-05, "loss": 0.6392, "step": 12275 }, { "epoch": 0.3584128930542174, "grad_norm": 0.7181444839325625, "learning_rate": 1.4257907542579077e-05, "loss": 0.7011, "step": 12276 }, { "epoch": 0.3584420892820648, "grad_norm": 0.6236044033376826, "learning_rate": 1.425725871857259e-05, "loss": 0.6003, "step": 12277 }, { "epoch": 0.35847128550991214, "grad_norm": 0.7228504384797102, "learning_rate": 1.42566098945661e-05, "loss": 0.7218, "step": 12278 }, { "epoch": 0.3585004817377595, "grad_norm": 0.6927123560448275, "learning_rate": 1.4255961070559612e-05, "loss": 0.6906, "step": 12279 }, { "epoch": 0.35852967796560686, "grad_norm": 0.6853662396232596, "learning_rate": 1.4255312246553124e-05, "loss": 0.6665, "step": 12280 }, { "epoch": 0.3585588741934542, "grad_norm": 0.648043846814824, "learning_rate": 1.4254663422546636e-05, "loss": 0.6498, "step": 12281 }, { "epoch": 0.3585880704213016, "grad_norm": 0.6633470508819858, "learning_rate": 1.4254014598540146e-05, "loss": 0.6335, "step": 12282 }, { "epoch": 0.35861726664914895, "grad_norm": 0.6406728032719993, "learning_rate": 1.4253365774533658e-05, "loss": 0.5878, "step": 12283 }, { "epoch": 0.3586464628769963, "grad_norm": 0.6595707242129663, "learning_rate": 1.425271695052717e-05, "loss": 0.6578, "step": 12284 }, { "epoch": 0.35867565910484367, "grad_norm": 0.7115733596179509, "learning_rate": 1.4252068126520682e-05, "loss": 0.6657, "step": 12285 }, { "epoch": 0.35870485533269103, "grad_norm": 0.7100204347857241, "learning_rate": 1.4251419302514193e-05, "loss": 0.6908, "step": 12286 }, { "epoch": 0.3587340515605384, "grad_norm": 0.6746708618677114, "learning_rate": 1.4250770478507705e-05, "loss": 0.673, "step": 12287 }, { "epoch": 0.35876324778838575, "grad_norm": 0.6332531643965923, "learning_rate": 1.4250121654501217e-05, "loss": 0.6294, "step": 12288 }, { "epoch": 0.3587924440162331, "grad_norm": 0.6381124172608066, "learning_rate": 1.424947283049473e-05, "loss": 0.5914, "step": 12289 }, { "epoch": 0.3588216402440805, "grad_norm": 0.6588530348982992, "learning_rate": 1.4248824006488242e-05, "loss": 0.6287, "step": 12290 }, { "epoch": 0.35885083647192784, "grad_norm": 0.7315351166623196, "learning_rate": 1.4248175182481754e-05, "loss": 0.6324, "step": 12291 }, { "epoch": 0.3588800326997752, "grad_norm": 0.6600569426510198, "learning_rate": 1.4247526358475265e-05, "loss": 0.6509, "step": 12292 }, { "epoch": 0.35890922892762256, "grad_norm": 0.7067593004819074, "learning_rate": 1.4246877534468777e-05, "loss": 0.7004, "step": 12293 }, { "epoch": 0.3589384251554699, "grad_norm": 0.7206711643361979, "learning_rate": 1.4246228710462289e-05, "loss": 0.6813, "step": 12294 }, { "epoch": 0.3589676213833173, "grad_norm": 0.6277276649536511, "learning_rate": 1.42455798864558e-05, "loss": 0.6407, "step": 12295 }, { "epoch": 0.35899681761116464, "grad_norm": 0.6799476370164533, "learning_rate": 1.4244931062449311e-05, "loss": 0.6589, "step": 12296 }, { "epoch": 0.359026013839012, "grad_norm": 0.698842145491927, "learning_rate": 1.4244282238442823e-05, "loss": 0.6951, "step": 12297 }, { "epoch": 0.35905521006685936, "grad_norm": 0.6372322409544385, "learning_rate": 1.4243633414436335e-05, "loss": 0.6245, "step": 12298 }, { "epoch": 0.3590844062947067, "grad_norm": 0.621876136951613, "learning_rate": 1.4242984590429847e-05, "loss": 0.5943, "step": 12299 }, { "epoch": 0.3591136025225541, "grad_norm": 0.7181528023843873, "learning_rate": 1.424233576642336e-05, "loss": 0.6821, "step": 12300 }, { "epoch": 0.35914279875040145, "grad_norm": 0.6809359751804442, "learning_rate": 1.424168694241687e-05, "loss": 0.7057, "step": 12301 }, { "epoch": 0.3591719949782488, "grad_norm": 0.6203137062940295, "learning_rate": 1.4241038118410382e-05, "loss": 0.5864, "step": 12302 }, { "epoch": 0.35920119120609617, "grad_norm": 0.6517535032388077, "learning_rate": 1.4240389294403894e-05, "loss": 0.6503, "step": 12303 }, { "epoch": 0.35923038743394353, "grad_norm": 0.6701923735731203, "learning_rate": 1.4239740470397406e-05, "loss": 0.6722, "step": 12304 }, { "epoch": 0.3592595836617909, "grad_norm": 0.6562122804907143, "learning_rate": 1.4239091646390916e-05, "loss": 0.551, "step": 12305 }, { "epoch": 0.35928877988963825, "grad_norm": 0.6472901708376128, "learning_rate": 1.4238442822384428e-05, "loss": 0.5574, "step": 12306 }, { "epoch": 0.3593179761174856, "grad_norm": 0.6882303202188943, "learning_rate": 1.423779399837794e-05, "loss": 0.6266, "step": 12307 }, { "epoch": 0.359347172345333, "grad_norm": 0.6891554245016628, "learning_rate": 1.4237145174371454e-05, "loss": 0.69, "step": 12308 }, { "epoch": 0.35937636857318034, "grad_norm": 0.6118044773056649, "learning_rate": 1.4236496350364966e-05, "loss": 0.6124, "step": 12309 }, { "epoch": 0.3594055648010277, "grad_norm": 0.6177703841015832, "learning_rate": 1.4235847526358478e-05, "loss": 0.5778, "step": 12310 }, { "epoch": 0.35943476102887506, "grad_norm": 0.6958834493217494, "learning_rate": 1.4235198702351988e-05, "loss": 0.6872, "step": 12311 }, { "epoch": 0.3594639572567224, "grad_norm": 0.6423923647100922, "learning_rate": 1.42345498783455e-05, "loss": 0.5961, "step": 12312 }, { "epoch": 0.3594931534845698, "grad_norm": 0.6547257143025864, "learning_rate": 1.4233901054339012e-05, "loss": 0.6772, "step": 12313 }, { "epoch": 0.35952234971241714, "grad_norm": 0.6728693979414156, "learning_rate": 1.4233252230332524e-05, "loss": 0.6396, "step": 12314 }, { "epoch": 0.3595515459402645, "grad_norm": 0.6794967472771589, "learning_rate": 1.4232603406326035e-05, "loss": 0.6104, "step": 12315 }, { "epoch": 0.35958074216811187, "grad_norm": 0.7693227548800043, "learning_rate": 1.4231954582319547e-05, "loss": 0.6068, "step": 12316 }, { "epoch": 0.3596099383959592, "grad_norm": 0.6582763634602944, "learning_rate": 1.4231305758313059e-05, "loss": 0.6197, "step": 12317 }, { "epoch": 0.3596391346238066, "grad_norm": 0.7311667173403364, "learning_rate": 1.423065693430657e-05, "loss": 0.6454, "step": 12318 }, { "epoch": 0.35966833085165395, "grad_norm": 0.6434281740594072, "learning_rate": 1.4230008110300083e-05, "loss": 0.6393, "step": 12319 }, { "epoch": 0.3596975270795013, "grad_norm": 0.6831288694955212, "learning_rate": 1.4229359286293593e-05, "loss": 0.6771, "step": 12320 }, { "epoch": 0.3597267233073487, "grad_norm": 0.6962419222894005, "learning_rate": 1.4228710462287105e-05, "loss": 0.7338, "step": 12321 }, { "epoch": 0.35975591953519603, "grad_norm": 0.6531646246534056, "learning_rate": 1.4228061638280617e-05, "loss": 0.6681, "step": 12322 }, { "epoch": 0.3597851157630434, "grad_norm": 0.6598719478350217, "learning_rate": 1.4227412814274129e-05, "loss": 0.6063, "step": 12323 }, { "epoch": 0.35981431199089076, "grad_norm": 0.735317826575562, "learning_rate": 1.422676399026764e-05, "loss": 0.7091, "step": 12324 }, { "epoch": 0.3598435082187381, "grad_norm": 0.679930777441618, "learning_rate": 1.4226115166261151e-05, "loss": 0.6473, "step": 12325 }, { "epoch": 0.3598727044465855, "grad_norm": 0.6654998732897838, "learning_rate": 1.4225466342254664e-05, "loss": 0.643, "step": 12326 }, { "epoch": 0.35990190067443284, "grad_norm": 0.657973317230808, "learning_rate": 1.4224817518248177e-05, "loss": 0.6156, "step": 12327 }, { "epoch": 0.3599310969022802, "grad_norm": 0.5754456488389889, "learning_rate": 1.422416869424169e-05, "loss": 0.5539, "step": 12328 }, { "epoch": 0.35996029313012756, "grad_norm": 0.677421206974299, "learning_rate": 1.4223519870235201e-05, "loss": 0.6435, "step": 12329 }, { "epoch": 0.3599894893579749, "grad_norm": 0.6667253368315266, "learning_rate": 1.4222871046228712e-05, "loss": 0.6484, "step": 12330 }, { "epoch": 0.3600186855858223, "grad_norm": 0.6461326294969818, "learning_rate": 1.4222222222222224e-05, "loss": 0.6198, "step": 12331 }, { "epoch": 0.3600478818136697, "grad_norm": 0.6092668887506777, "learning_rate": 1.4221573398215736e-05, "loss": 0.5512, "step": 12332 }, { "epoch": 0.36007707804151706, "grad_norm": 0.6712187457839024, "learning_rate": 1.4220924574209248e-05, "loss": 0.6825, "step": 12333 }, { "epoch": 0.3601062742693644, "grad_norm": 0.7096470455472412, "learning_rate": 1.4220275750202758e-05, "loss": 0.6551, "step": 12334 }, { "epoch": 0.3601354704972118, "grad_norm": 0.6637599033436264, "learning_rate": 1.421962692619627e-05, "loss": 0.6027, "step": 12335 }, { "epoch": 0.36016466672505915, "grad_norm": 0.6827317273765787, "learning_rate": 1.4218978102189782e-05, "loss": 0.7139, "step": 12336 }, { "epoch": 0.3601938629529065, "grad_norm": 0.6306545974435421, "learning_rate": 1.4218329278183294e-05, "loss": 0.6109, "step": 12337 }, { "epoch": 0.36022305918075387, "grad_norm": 0.677640152673792, "learning_rate": 1.4217680454176806e-05, "loss": 0.6591, "step": 12338 }, { "epoch": 0.36025225540860123, "grad_norm": 0.6621739099406437, "learning_rate": 1.4217031630170316e-05, "loss": 0.6123, "step": 12339 }, { "epoch": 0.3602814516364486, "grad_norm": 0.6969808773845374, "learning_rate": 1.4216382806163829e-05, "loss": 0.6961, "step": 12340 }, { "epoch": 0.36031064786429595, "grad_norm": 0.6336315765845545, "learning_rate": 1.421573398215734e-05, "loss": 0.5862, "step": 12341 }, { "epoch": 0.3603398440921433, "grad_norm": 0.6951913392293675, "learning_rate": 1.4215085158150853e-05, "loss": 0.6885, "step": 12342 }, { "epoch": 0.3603690403199907, "grad_norm": 0.667032274785217, "learning_rate": 1.4214436334144363e-05, "loss": 0.6967, "step": 12343 }, { "epoch": 0.36039823654783804, "grad_norm": 0.6353725522879218, "learning_rate": 1.4213787510137875e-05, "loss": 0.6059, "step": 12344 }, { "epoch": 0.3604274327756854, "grad_norm": 0.6528743423017315, "learning_rate": 1.4213138686131387e-05, "loss": 0.581, "step": 12345 }, { "epoch": 0.36045662900353276, "grad_norm": 0.6936428452156004, "learning_rate": 1.42124898621249e-05, "loss": 0.6652, "step": 12346 }, { "epoch": 0.3604858252313801, "grad_norm": 0.6282892050972361, "learning_rate": 1.4211841038118413e-05, "loss": 0.6205, "step": 12347 }, { "epoch": 0.3605150214592275, "grad_norm": 0.6171238737666543, "learning_rate": 1.4211192214111925e-05, "loss": 0.5619, "step": 12348 }, { "epoch": 0.36054421768707484, "grad_norm": 0.6331312379309941, "learning_rate": 1.4210543390105435e-05, "loss": 0.6036, "step": 12349 }, { "epoch": 0.3605734139149222, "grad_norm": 0.6381756888445778, "learning_rate": 1.4209894566098947e-05, "loss": 0.5771, "step": 12350 }, { "epoch": 0.36060261014276956, "grad_norm": 0.6578655655562442, "learning_rate": 1.4209245742092459e-05, "loss": 0.6311, "step": 12351 }, { "epoch": 0.3606318063706169, "grad_norm": 0.7988276467354191, "learning_rate": 1.4208596918085971e-05, "loss": 0.6733, "step": 12352 }, { "epoch": 0.3606610025984643, "grad_norm": 0.6825005384939447, "learning_rate": 1.4207948094079481e-05, "loss": 0.6495, "step": 12353 }, { "epoch": 0.36069019882631165, "grad_norm": 0.6277954926205788, "learning_rate": 1.4207299270072993e-05, "loss": 0.6116, "step": 12354 }, { "epoch": 0.360719395054159, "grad_norm": 0.6636437130456907, "learning_rate": 1.4206650446066506e-05, "loss": 0.6564, "step": 12355 }, { "epoch": 0.36074859128200637, "grad_norm": 0.636427292342205, "learning_rate": 1.4206001622060018e-05, "loss": 0.593, "step": 12356 }, { "epoch": 0.36077778750985373, "grad_norm": 0.6261304243187639, "learning_rate": 1.420535279805353e-05, "loss": 0.5668, "step": 12357 }, { "epoch": 0.3608069837377011, "grad_norm": 0.658502213117451, "learning_rate": 1.420470397404704e-05, "loss": 0.6769, "step": 12358 }, { "epoch": 0.36083617996554845, "grad_norm": 0.6309672847835345, "learning_rate": 1.4204055150040552e-05, "loss": 0.5876, "step": 12359 }, { "epoch": 0.3608653761933958, "grad_norm": 0.6881514747634636, "learning_rate": 1.4203406326034064e-05, "loss": 0.6477, "step": 12360 }, { "epoch": 0.3608945724212432, "grad_norm": 0.6582373701809668, "learning_rate": 1.4202757502027576e-05, "loss": 0.6397, "step": 12361 }, { "epoch": 0.36092376864909054, "grad_norm": 0.5984059444632378, "learning_rate": 1.4202108678021086e-05, "loss": 0.5495, "step": 12362 }, { "epoch": 0.3609529648769379, "grad_norm": 0.6503672826229723, "learning_rate": 1.4201459854014598e-05, "loss": 0.6736, "step": 12363 }, { "epoch": 0.36098216110478526, "grad_norm": 0.6327040810163709, "learning_rate": 1.4200811030008112e-05, "loss": 0.6128, "step": 12364 }, { "epoch": 0.3610113573326326, "grad_norm": 0.6705874387414313, "learning_rate": 1.4200162206001624e-05, "loss": 0.6788, "step": 12365 }, { "epoch": 0.36104055356048, "grad_norm": 0.6676553495839588, "learning_rate": 1.4199513381995136e-05, "loss": 0.6064, "step": 12366 }, { "epoch": 0.36106974978832734, "grad_norm": 0.6365543690646487, "learning_rate": 1.4198864557988648e-05, "loss": 0.6284, "step": 12367 }, { "epoch": 0.3610989460161747, "grad_norm": 0.6515353367237956, "learning_rate": 1.4198215733982158e-05, "loss": 0.5889, "step": 12368 }, { "epoch": 0.36112814224402207, "grad_norm": 0.645433455819343, "learning_rate": 1.419756690997567e-05, "loss": 0.6182, "step": 12369 }, { "epoch": 0.3611573384718694, "grad_norm": 0.72698196602377, "learning_rate": 1.4196918085969183e-05, "loss": 0.6867, "step": 12370 }, { "epoch": 0.3611865346997168, "grad_norm": 0.6187512779500388, "learning_rate": 1.4196269261962695e-05, "loss": 0.5887, "step": 12371 }, { "epoch": 0.36121573092756415, "grad_norm": 0.6973560704176943, "learning_rate": 1.4195620437956205e-05, "loss": 0.7128, "step": 12372 }, { "epoch": 0.3612449271554115, "grad_norm": 0.710231505114445, "learning_rate": 1.4194971613949717e-05, "loss": 0.6647, "step": 12373 }, { "epoch": 0.36127412338325887, "grad_norm": 0.6743885354353606, "learning_rate": 1.4194322789943229e-05, "loss": 0.6632, "step": 12374 }, { "epoch": 0.36130331961110623, "grad_norm": 0.6673366905492739, "learning_rate": 1.4193673965936741e-05, "loss": 0.6394, "step": 12375 }, { "epoch": 0.3613325158389536, "grad_norm": 0.6933431314663725, "learning_rate": 1.4193025141930253e-05, "loss": 0.6854, "step": 12376 }, { "epoch": 0.36136171206680096, "grad_norm": 0.6519647170854367, "learning_rate": 1.4192376317923763e-05, "loss": 0.5973, "step": 12377 }, { "epoch": 0.3613909082946483, "grad_norm": 0.7453868393735561, "learning_rate": 1.4191727493917275e-05, "loss": 0.6975, "step": 12378 }, { "epoch": 0.3614201045224957, "grad_norm": 0.6086809980774354, "learning_rate": 1.4191078669910787e-05, "loss": 0.5823, "step": 12379 }, { "epoch": 0.36144930075034304, "grad_norm": 0.6959195308521073, "learning_rate": 1.41904298459043e-05, "loss": 0.6278, "step": 12380 }, { "epoch": 0.3614784969781904, "grad_norm": 0.5930313450008915, "learning_rate": 1.418978102189781e-05, "loss": 0.544, "step": 12381 }, { "epoch": 0.36150769320603776, "grad_norm": 0.6830348196450936, "learning_rate": 1.4189132197891322e-05, "loss": 0.6761, "step": 12382 }, { "epoch": 0.3615368894338851, "grad_norm": 0.6363245335408344, "learning_rate": 1.4188483373884836e-05, "loss": 0.5713, "step": 12383 }, { "epoch": 0.3615660856617325, "grad_norm": 0.6215458289202886, "learning_rate": 1.4187834549878348e-05, "loss": 0.5802, "step": 12384 }, { "epoch": 0.36159528188957984, "grad_norm": 0.6923075935024158, "learning_rate": 1.418718572587186e-05, "loss": 0.7077, "step": 12385 }, { "epoch": 0.3616244781174272, "grad_norm": 0.6675174033988241, "learning_rate": 1.4186536901865372e-05, "loss": 0.6339, "step": 12386 }, { "epoch": 0.36165367434527457, "grad_norm": 0.7790800837392345, "learning_rate": 1.4185888077858882e-05, "loss": 0.5537, "step": 12387 }, { "epoch": 0.36168287057312193, "grad_norm": 0.6309787846585022, "learning_rate": 1.4185239253852394e-05, "loss": 0.622, "step": 12388 }, { "epoch": 0.3617120668009693, "grad_norm": 0.6747575916668239, "learning_rate": 1.4184590429845906e-05, "loss": 0.661, "step": 12389 }, { "epoch": 0.36174126302881665, "grad_norm": 0.6778266260770031, "learning_rate": 1.4183941605839418e-05, "loss": 0.6391, "step": 12390 }, { "epoch": 0.361770459256664, "grad_norm": 0.6495755465982193, "learning_rate": 1.4183292781832928e-05, "loss": 0.6434, "step": 12391 }, { "epoch": 0.36179965548451143, "grad_norm": 0.6570649971200104, "learning_rate": 1.418264395782644e-05, "loss": 0.6311, "step": 12392 }, { "epoch": 0.3618288517123588, "grad_norm": 0.6668608947458159, "learning_rate": 1.4181995133819952e-05, "loss": 0.6226, "step": 12393 }, { "epoch": 0.36185804794020615, "grad_norm": 0.7141874219914784, "learning_rate": 1.4181346309813464e-05, "loss": 0.7314, "step": 12394 }, { "epoch": 0.3618872441680535, "grad_norm": 0.7254974303453429, "learning_rate": 1.4180697485806976e-05, "loss": 0.6403, "step": 12395 }, { "epoch": 0.3619164403959009, "grad_norm": 0.6723664224735992, "learning_rate": 1.4180048661800487e-05, "loss": 0.6635, "step": 12396 }, { "epoch": 0.36194563662374823, "grad_norm": 0.6464666890022577, "learning_rate": 1.4179399837793999e-05, "loss": 0.6299, "step": 12397 }, { "epoch": 0.3619748328515956, "grad_norm": 0.6318119996887661, "learning_rate": 1.417875101378751e-05, "loss": 0.6079, "step": 12398 }, { "epoch": 0.36200402907944296, "grad_norm": 0.6581043831484865, "learning_rate": 1.4178102189781023e-05, "loss": 0.6763, "step": 12399 }, { "epoch": 0.3620332253072903, "grad_norm": 0.6671078452896434, "learning_rate": 1.4177453365774533e-05, "loss": 0.6524, "step": 12400 }, { "epoch": 0.3620624215351377, "grad_norm": 0.6088968786451421, "learning_rate": 1.4176804541768045e-05, "loss": 0.5681, "step": 12401 }, { "epoch": 0.36209161776298504, "grad_norm": 0.6597659975288891, "learning_rate": 1.4176155717761559e-05, "loss": 0.6157, "step": 12402 }, { "epoch": 0.3621208139908324, "grad_norm": 0.6584469532900862, "learning_rate": 1.4175506893755071e-05, "loss": 0.6593, "step": 12403 }, { "epoch": 0.36215001021867976, "grad_norm": 0.6933955147004803, "learning_rate": 1.4174858069748583e-05, "loss": 0.6145, "step": 12404 }, { "epoch": 0.3621792064465271, "grad_norm": 0.6861566588745107, "learning_rate": 1.4174209245742095e-05, "loss": 0.6687, "step": 12405 }, { "epoch": 0.3622084026743745, "grad_norm": 0.6520627176058461, "learning_rate": 1.4173560421735605e-05, "loss": 0.6202, "step": 12406 }, { "epoch": 0.36223759890222185, "grad_norm": 0.6898520364663282, "learning_rate": 1.4172911597729117e-05, "loss": 0.6829, "step": 12407 }, { "epoch": 0.3622667951300692, "grad_norm": 0.6379266970617069, "learning_rate": 1.417226277372263e-05, "loss": 0.5864, "step": 12408 }, { "epoch": 0.36229599135791657, "grad_norm": 0.7282497162130186, "learning_rate": 1.4171613949716141e-05, "loss": 0.7175, "step": 12409 }, { "epoch": 0.36232518758576393, "grad_norm": 0.6620734051852236, "learning_rate": 1.4170965125709652e-05, "loss": 0.6266, "step": 12410 }, { "epoch": 0.3623543838136113, "grad_norm": 0.6037072784004547, "learning_rate": 1.4170316301703164e-05, "loss": 0.5753, "step": 12411 }, { "epoch": 0.36238358004145865, "grad_norm": 0.7317935499903728, "learning_rate": 1.4169667477696676e-05, "loss": 0.7074, "step": 12412 }, { "epoch": 0.362412776269306, "grad_norm": 0.6679122220366484, "learning_rate": 1.4169018653690188e-05, "loss": 0.6474, "step": 12413 }, { "epoch": 0.3624419724971534, "grad_norm": 0.6250279633936459, "learning_rate": 1.41683698296837e-05, "loss": 0.6237, "step": 12414 }, { "epoch": 0.36247116872500074, "grad_norm": 0.7217221582174617, "learning_rate": 1.416772100567721e-05, "loss": 0.6996, "step": 12415 }, { "epoch": 0.3625003649528481, "grad_norm": 0.5807719285917232, "learning_rate": 1.4167072181670722e-05, "loss": 0.6018, "step": 12416 }, { "epoch": 0.36252956118069546, "grad_norm": 0.6405949780247743, "learning_rate": 1.4166423357664234e-05, "loss": 0.6221, "step": 12417 }, { "epoch": 0.3625587574085428, "grad_norm": 0.5989942053265482, "learning_rate": 1.4165774533657746e-05, "loss": 0.5478, "step": 12418 }, { "epoch": 0.3625879536363902, "grad_norm": 0.6621026722759945, "learning_rate": 1.4165125709651257e-05, "loss": 0.6742, "step": 12419 }, { "epoch": 0.36261714986423754, "grad_norm": 0.6927737431090306, "learning_rate": 1.4164476885644769e-05, "loss": 0.6222, "step": 12420 }, { "epoch": 0.3626463460920849, "grad_norm": 0.6861526030452928, "learning_rate": 1.4163828061638282e-05, "loss": 0.577, "step": 12421 }, { "epoch": 0.36267554231993226, "grad_norm": 0.6320437694861036, "learning_rate": 1.4163179237631794e-05, "loss": 0.6049, "step": 12422 }, { "epoch": 0.3627047385477796, "grad_norm": 0.7182577365400968, "learning_rate": 1.4162530413625306e-05, "loss": 0.6496, "step": 12423 }, { "epoch": 0.362733934775627, "grad_norm": 0.6443670362979569, "learning_rate": 1.4161881589618818e-05, "loss": 0.5958, "step": 12424 }, { "epoch": 0.36276313100347435, "grad_norm": 0.5997897698332227, "learning_rate": 1.4161232765612329e-05, "loss": 0.5437, "step": 12425 }, { "epoch": 0.3627923272313217, "grad_norm": 0.6297514581771644, "learning_rate": 1.416058394160584e-05, "loss": 0.597, "step": 12426 }, { "epoch": 0.36282152345916907, "grad_norm": 0.6220615834932397, "learning_rate": 1.4159935117599353e-05, "loss": 0.5828, "step": 12427 }, { "epoch": 0.36285071968701643, "grad_norm": 0.676096457129619, "learning_rate": 1.4159286293592865e-05, "loss": 0.6296, "step": 12428 }, { "epoch": 0.3628799159148638, "grad_norm": 0.6901262345196836, "learning_rate": 1.4158637469586375e-05, "loss": 0.604, "step": 12429 }, { "epoch": 0.36290911214271115, "grad_norm": 0.6804008137436671, "learning_rate": 1.4157988645579887e-05, "loss": 0.6822, "step": 12430 }, { "epoch": 0.3629383083705585, "grad_norm": 0.6805665863231225, "learning_rate": 1.41573398215734e-05, "loss": 0.6692, "step": 12431 }, { "epoch": 0.3629675045984059, "grad_norm": 0.7094108329239704, "learning_rate": 1.4156690997566911e-05, "loss": 0.611, "step": 12432 }, { "epoch": 0.36299670082625324, "grad_norm": 0.5824157338168741, "learning_rate": 1.4156042173560423e-05, "loss": 0.5164, "step": 12433 }, { "epoch": 0.3630258970541006, "grad_norm": 0.6932728017549482, "learning_rate": 1.4155393349553934e-05, "loss": 0.617, "step": 12434 }, { "epoch": 0.36305509328194796, "grad_norm": 0.6583876162912757, "learning_rate": 1.4154744525547446e-05, "loss": 0.6328, "step": 12435 }, { "epoch": 0.3630842895097953, "grad_norm": 0.7273803937877529, "learning_rate": 1.4154095701540958e-05, "loss": 0.725, "step": 12436 }, { "epoch": 0.3631134857376427, "grad_norm": 0.6317415436629463, "learning_rate": 1.415344687753447e-05, "loss": 0.58, "step": 12437 }, { "epoch": 0.36314268196549004, "grad_norm": 1.05696417199811, "learning_rate": 1.415279805352798e-05, "loss": 0.646, "step": 12438 }, { "epoch": 0.3631718781933374, "grad_norm": 0.7045267158681449, "learning_rate": 1.4152149229521492e-05, "loss": 0.6766, "step": 12439 }, { "epoch": 0.36320107442118477, "grad_norm": 0.6650797612213956, "learning_rate": 1.4151500405515006e-05, "loss": 0.6902, "step": 12440 }, { "epoch": 0.3632302706490321, "grad_norm": 0.691694354731827, "learning_rate": 1.4150851581508518e-05, "loss": 0.7448, "step": 12441 }, { "epoch": 0.3632594668768795, "grad_norm": 0.6468274345321325, "learning_rate": 1.415020275750203e-05, "loss": 0.6338, "step": 12442 }, { "epoch": 0.36328866310472685, "grad_norm": 0.6497772549925886, "learning_rate": 1.4149553933495542e-05, "loss": 0.625, "step": 12443 }, { "epoch": 0.3633178593325742, "grad_norm": 0.6704941121397401, "learning_rate": 1.4148905109489052e-05, "loss": 0.6236, "step": 12444 }, { "epoch": 0.3633470555604216, "grad_norm": 0.6337385232110699, "learning_rate": 1.4148256285482564e-05, "loss": 0.5936, "step": 12445 }, { "epoch": 0.36337625178826893, "grad_norm": 0.6669545186464715, "learning_rate": 1.4147607461476076e-05, "loss": 0.6684, "step": 12446 }, { "epoch": 0.3634054480161163, "grad_norm": 0.6825844722768045, "learning_rate": 1.4146958637469588e-05, "loss": 0.6665, "step": 12447 }, { "epoch": 0.36343464424396366, "grad_norm": 0.6670104283980839, "learning_rate": 1.4146309813463099e-05, "loss": 0.6605, "step": 12448 }, { "epoch": 0.363463840471811, "grad_norm": 0.6936527470009929, "learning_rate": 1.414566098945661e-05, "loss": 0.6498, "step": 12449 }, { "epoch": 0.3634930366996584, "grad_norm": 0.6701501820501952, "learning_rate": 1.4145012165450123e-05, "loss": 0.6669, "step": 12450 }, { "epoch": 0.36352223292750574, "grad_norm": 0.6642478809103113, "learning_rate": 1.4144363341443635e-05, "loss": 0.6494, "step": 12451 }, { "epoch": 0.3635514291553531, "grad_norm": 0.6458166388583256, "learning_rate": 1.4143714517437147e-05, "loss": 0.6032, "step": 12452 }, { "epoch": 0.3635806253832005, "grad_norm": 0.681697920429062, "learning_rate": 1.4143065693430657e-05, "loss": 0.6206, "step": 12453 }, { "epoch": 0.3636098216110479, "grad_norm": 0.6097683430972964, "learning_rate": 1.4142416869424169e-05, "loss": 0.5565, "step": 12454 }, { "epoch": 0.36363901783889524, "grad_norm": 0.6231265536066165, "learning_rate": 1.4141768045417681e-05, "loss": 0.5893, "step": 12455 }, { "epoch": 0.3636682140667426, "grad_norm": 0.6722589691130298, "learning_rate": 1.4141119221411193e-05, "loss": 0.6543, "step": 12456 }, { "epoch": 0.36369741029458996, "grad_norm": 0.6530047222955392, "learning_rate": 1.4140470397404704e-05, "loss": 0.6551, "step": 12457 }, { "epoch": 0.3637266065224373, "grad_norm": 0.7577374408037976, "learning_rate": 1.4139821573398216e-05, "loss": 0.6175, "step": 12458 }, { "epoch": 0.3637558027502847, "grad_norm": 0.6300811118177777, "learning_rate": 1.413917274939173e-05, "loss": 0.5648, "step": 12459 }, { "epoch": 0.36378499897813205, "grad_norm": 0.6518440049317825, "learning_rate": 1.4138523925385241e-05, "loss": 0.5979, "step": 12460 }, { "epoch": 0.3638141952059794, "grad_norm": 0.6471246774198915, "learning_rate": 1.4137875101378753e-05, "loss": 0.6265, "step": 12461 }, { "epoch": 0.36384339143382677, "grad_norm": 0.6602815821274077, "learning_rate": 1.4137226277372265e-05, "loss": 0.608, "step": 12462 }, { "epoch": 0.36387258766167413, "grad_norm": 0.6742850271477454, "learning_rate": 1.4136577453365776e-05, "loss": 0.6299, "step": 12463 }, { "epoch": 0.3639017838895215, "grad_norm": 0.6366749370694609, "learning_rate": 1.4135928629359288e-05, "loss": 0.5748, "step": 12464 }, { "epoch": 0.36393098011736885, "grad_norm": 0.6375035796450097, "learning_rate": 1.41352798053528e-05, "loss": 0.608, "step": 12465 }, { "epoch": 0.3639601763452162, "grad_norm": 0.6510300199802391, "learning_rate": 1.4134630981346312e-05, "loss": 0.6177, "step": 12466 }, { "epoch": 0.3639893725730636, "grad_norm": 0.7052445861955123, "learning_rate": 1.4133982157339822e-05, "loss": 0.7514, "step": 12467 }, { "epoch": 0.36401856880091094, "grad_norm": 0.6993278420358011, "learning_rate": 1.4133333333333334e-05, "loss": 0.7091, "step": 12468 }, { "epoch": 0.3640477650287583, "grad_norm": 0.6250987473639099, "learning_rate": 1.4132684509326846e-05, "loss": 0.5518, "step": 12469 }, { "epoch": 0.36407696125660566, "grad_norm": 0.638628376910241, "learning_rate": 1.4132035685320358e-05, "loss": 0.6072, "step": 12470 }, { "epoch": 0.364106157484453, "grad_norm": 0.6559108284445072, "learning_rate": 1.413138686131387e-05, "loss": 0.6313, "step": 12471 }, { "epoch": 0.3641353537123004, "grad_norm": 0.6281686177595673, "learning_rate": 1.413073803730738e-05, "loss": 0.5378, "step": 12472 }, { "epoch": 0.36416454994014774, "grad_norm": 0.6431868448329808, "learning_rate": 1.4130089213300893e-05, "loss": 0.6002, "step": 12473 }, { "epoch": 0.3641937461679951, "grad_norm": 0.6323117892323773, "learning_rate": 1.4129440389294405e-05, "loss": 0.5985, "step": 12474 }, { "epoch": 0.36422294239584246, "grad_norm": 0.6558174449282603, "learning_rate": 1.4128791565287917e-05, "loss": 0.6556, "step": 12475 }, { "epoch": 0.3642521386236898, "grad_norm": 0.577663393522825, "learning_rate": 1.4128142741281427e-05, "loss": 0.5174, "step": 12476 }, { "epoch": 0.3642813348515372, "grad_norm": 0.7221364456521148, "learning_rate": 1.4127493917274939e-05, "loss": 0.6539, "step": 12477 }, { "epoch": 0.36431053107938455, "grad_norm": 0.7137622974549599, "learning_rate": 1.4126845093268453e-05, "loss": 0.6609, "step": 12478 }, { "epoch": 0.3643397273072319, "grad_norm": 0.6844084580298996, "learning_rate": 1.4126196269261965e-05, "loss": 0.6694, "step": 12479 }, { "epoch": 0.36436892353507927, "grad_norm": 0.6583774911595935, "learning_rate": 1.4125547445255477e-05, "loss": 0.6357, "step": 12480 }, { "epoch": 0.36439811976292663, "grad_norm": 0.5904890322157771, "learning_rate": 1.4124898621248989e-05, "loss": 0.5181, "step": 12481 }, { "epoch": 0.364427315990774, "grad_norm": 0.6600940763473637, "learning_rate": 1.4124249797242499e-05, "loss": 0.6575, "step": 12482 }, { "epoch": 0.36445651221862135, "grad_norm": 0.7007742358844933, "learning_rate": 1.4123600973236011e-05, "loss": 0.6852, "step": 12483 }, { "epoch": 0.3644857084464687, "grad_norm": 0.6588942433208997, "learning_rate": 1.4122952149229523e-05, "loss": 0.6675, "step": 12484 }, { "epoch": 0.3645149046743161, "grad_norm": 0.611424861852409, "learning_rate": 1.4122303325223035e-05, "loss": 0.5739, "step": 12485 }, { "epoch": 0.36454410090216344, "grad_norm": 0.7254307445339364, "learning_rate": 1.4121654501216546e-05, "loss": 0.6972, "step": 12486 }, { "epoch": 0.3645732971300108, "grad_norm": 0.6056835396653519, "learning_rate": 1.4121005677210058e-05, "loss": 0.5469, "step": 12487 }, { "epoch": 0.36460249335785816, "grad_norm": 0.6620653923895307, "learning_rate": 1.412035685320357e-05, "loss": 0.6462, "step": 12488 }, { "epoch": 0.3646316895857055, "grad_norm": 0.6512047336701642, "learning_rate": 1.4119708029197082e-05, "loss": 0.6303, "step": 12489 }, { "epoch": 0.3646608858135529, "grad_norm": 0.703236260696693, "learning_rate": 1.4119059205190594e-05, "loss": 0.6433, "step": 12490 }, { "epoch": 0.36469008204140024, "grad_norm": 0.6912735585338872, "learning_rate": 1.4118410381184104e-05, "loss": 0.6763, "step": 12491 }, { "epoch": 0.3647192782692476, "grad_norm": 0.6359039258349418, "learning_rate": 1.4117761557177616e-05, "loss": 0.6247, "step": 12492 }, { "epoch": 0.36474847449709497, "grad_norm": 0.6745203796287773, "learning_rate": 1.4117112733171128e-05, "loss": 0.6991, "step": 12493 }, { "epoch": 0.3647776707249423, "grad_norm": 0.6760354127238718, "learning_rate": 1.411646390916464e-05, "loss": 0.6527, "step": 12494 }, { "epoch": 0.3648068669527897, "grad_norm": 0.8235215238374547, "learning_rate": 1.411581508515815e-05, "loss": 0.7028, "step": 12495 }, { "epoch": 0.36483606318063705, "grad_norm": 0.623991514677334, "learning_rate": 1.4115166261151662e-05, "loss": 0.5952, "step": 12496 }, { "epoch": 0.3648652594084844, "grad_norm": 0.6541766591549951, "learning_rate": 1.4114517437145176e-05, "loss": 0.6395, "step": 12497 }, { "epoch": 0.36489445563633177, "grad_norm": 0.6752924623493357, "learning_rate": 1.4113868613138688e-05, "loss": 0.6575, "step": 12498 }, { "epoch": 0.36492365186417913, "grad_norm": 0.6906107527947228, "learning_rate": 1.41132197891322e-05, "loss": 0.7238, "step": 12499 }, { "epoch": 0.3649528480920265, "grad_norm": 0.6303409913333794, "learning_rate": 1.4112570965125712e-05, "loss": 0.6239, "step": 12500 }, { "epoch": 0.36498204431987386, "grad_norm": 0.5946715445323046, "learning_rate": 1.4111922141119223e-05, "loss": 0.5291, "step": 12501 }, { "epoch": 0.3650112405477212, "grad_norm": 0.7519207525791831, "learning_rate": 1.4111273317112735e-05, "loss": 0.7565, "step": 12502 }, { "epoch": 0.3650404367755686, "grad_norm": 0.6348092214927962, "learning_rate": 1.4110624493106247e-05, "loss": 0.6081, "step": 12503 }, { "epoch": 0.36506963300341594, "grad_norm": 0.6250472113638238, "learning_rate": 1.4109975669099759e-05, "loss": 0.5962, "step": 12504 }, { "epoch": 0.3650988292312633, "grad_norm": 0.6508775455988602, "learning_rate": 1.4109326845093269e-05, "loss": 0.6737, "step": 12505 }, { "epoch": 0.36512802545911066, "grad_norm": 0.6293506387333154, "learning_rate": 1.4108678021086781e-05, "loss": 0.6398, "step": 12506 }, { "epoch": 0.365157221686958, "grad_norm": 0.6115712760074441, "learning_rate": 1.4108029197080293e-05, "loss": 0.5337, "step": 12507 }, { "epoch": 0.3651864179148054, "grad_norm": 0.7015884218885146, "learning_rate": 1.4107380373073805e-05, "loss": 0.6251, "step": 12508 }, { "epoch": 0.36521561414265274, "grad_norm": 0.6510789423071563, "learning_rate": 1.4106731549067317e-05, "loss": 0.6151, "step": 12509 }, { "epoch": 0.3652448103705001, "grad_norm": 0.731892411547809, "learning_rate": 1.4106082725060827e-05, "loss": 0.6669, "step": 12510 }, { "epoch": 0.36527400659834747, "grad_norm": 0.6480273498639911, "learning_rate": 1.410543390105434e-05, "loss": 0.5955, "step": 12511 }, { "epoch": 0.36530320282619483, "grad_norm": 0.6683558630797907, "learning_rate": 1.4104785077047851e-05, "loss": 0.6405, "step": 12512 }, { "epoch": 0.36533239905404225, "grad_norm": 0.6451102108246514, "learning_rate": 1.4104136253041363e-05, "loss": 0.6167, "step": 12513 }, { "epoch": 0.3653615952818896, "grad_norm": 0.6850777565463039, "learning_rate": 1.4103487429034874e-05, "loss": 0.6482, "step": 12514 }, { "epoch": 0.36539079150973697, "grad_norm": 0.6995172951810767, "learning_rate": 1.4102838605028388e-05, "loss": 0.6756, "step": 12515 }, { "epoch": 0.36541998773758433, "grad_norm": 0.6142009746328416, "learning_rate": 1.41021897810219e-05, "loss": 0.5633, "step": 12516 }, { "epoch": 0.3654491839654317, "grad_norm": 0.7097275598583209, "learning_rate": 1.4101540957015412e-05, "loss": 0.7243, "step": 12517 }, { "epoch": 0.36547838019327905, "grad_norm": 0.6321811971785262, "learning_rate": 1.4100892133008924e-05, "loss": 0.6, "step": 12518 }, { "epoch": 0.3655075764211264, "grad_norm": 0.6426093232921487, "learning_rate": 1.4100243309002436e-05, "loss": 0.6081, "step": 12519 }, { "epoch": 0.3655367726489738, "grad_norm": 0.6631299134562263, "learning_rate": 1.4099594484995946e-05, "loss": 0.6438, "step": 12520 }, { "epoch": 0.36556596887682113, "grad_norm": 0.5774145818013026, "learning_rate": 1.4098945660989458e-05, "loss": 0.5456, "step": 12521 }, { "epoch": 0.3655951651046685, "grad_norm": 0.7082295364443457, "learning_rate": 1.409829683698297e-05, "loss": 0.6731, "step": 12522 }, { "epoch": 0.36562436133251586, "grad_norm": 0.6905426626212214, "learning_rate": 1.4097648012976482e-05, "loss": 0.605, "step": 12523 }, { "epoch": 0.3656535575603632, "grad_norm": 0.6515323914565758, "learning_rate": 1.4096999188969992e-05, "loss": 0.5872, "step": 12524 }, { "epoch": 0.3656827537882106, "grad_norm": 0.6903668155581635, "learning_rate": 1.4096350364963504e-05, "loss": 0.6537, "step": 12525 }, { "epoch": 0.36571195001605794, "grad_norm": 0.7256357242763961, "learning_rate": 1.4095701540957016e-05, "loss": 0.672, "step": 12526 }, { "epoch": 0.3657411462439053, "grad_norm": 0.6778520709020929, "learning_rate": 1.4095052716950528e-05, "loss": 0.6653, "step": 12527 }, { "epoch": 0.36577034247175266, "grad_norm": 0.6824404581949977, "learning_rate": 1.409440389294404e-05, "loss": 0.7074, "step": 12528 }, { "epoch": 0.3657995386996, "grad_norm": 0.702583627733169, "learning_rate": 1.409375506893755e-05, "loss": 0.7113, "step": 12529 }, { "epoch": 0.3658287349274474, "grad_norm": 0.6941874772305785, "learning_rate": 1.4093106244931063e-05, "loss": 0.6824, "step": 12530 }, { "epoch": 0.36585793115529475, "grad_norm": 0.6193897050592359, "learning_rate": 1.4092457420924575e-05, "loss": 0.6156, "step": 12531 }, { "epoch": 0.3658871273831421, "grad_norm": 0.6328091815677018, "learning_rate": 1.4091808596918087e-05, "loss": 0.623, "step": 12532 }, { "epoch": 0.36591632361098947, "grad_norm": 0.6945767745020216, "learning_rate": 1.4091159772911597e-05, "loss": 0.73, "step": 12533 }, { "epoch": 0.36594551983883683, "grad_norm": 0.6593447729652498, "learning_rate": 1.4090510948905111e-05, "loss": 0.6083, "step": 12534 }, { "epoch": 0.3659747160666842, "grad_norm": 0.6779554722708734, "learning_rate": 1.4089862124898623e-05, "loss": 0.6125, "step": 12535 }, { "epoch": 0.36600391229453155, "grad_norm": 0.7032225135879215, "learning_rate": 1.4089213300892135e-05, "loss": 0.7238, "step": 12536 }, { "epoch": 0.3660331085223789, "grad_norm": 0.6207300981117255, "learning_rate": 1.4088564476885647e-05, "loss": 0.6054, "step": 12537 }, { "epoch": 0.3660623047502263, "grad_norm": 0.6982996855476852, "learning_rate": 1.4087915652879159e-05, "loss": 0.7042, "step": 12538 }, { "epoch": 0.36609150097807364, "grad_norm": 0.7400516789114128, "learning_rate": 1.408726682887267e-05, "loss": 0.7323, "step": 12539 }, { "epoch": 0.366120697205921, "grad_norm": 0.6971668955754472, "learning_rate": 1.4086618004866181e-05, "loss": 0.7283, "step": 12540 }, { "epoch": 0.36614989343376836, "grad_norm": 0.6073418715984282, "learning_rate": 1.4085969180859693e-05, "loss": 0.5699, "step": 12541 }, { "epoch": 0.3661790896616157, "grad_norm": 0.6555628326790918, "learning_rate": 1.4085320356853205e-05, "loss": 0.6211, "step": 12542 }, { "epoch": 0.3662082858894631, "grad_norm": 0.660695708630063, "learning_rate": 1.4084671532846716e-05, "loss": 0.6862, "step": 12543 }, { "epoch": 0.36623748211731044, "grad_norm": 0.6360345383344946, "learning_rate": 1.4084022708840228e-05, "loss": 0.6049, "step": 12544 }, { "epoch": 0.3662666783451578, "grad_norm": 0.6870458722637746, "learning_rate": 1.408337388483374e-05, "loss": 0.568, "step": 12545 }, { "epoch": 0.36629587457300516, "grad_norm": 0.6183818097157343, "learning_rate": 1.4082725060827252e-05, "loss": 0.5823, "step": 12546 }, { "epoch": 0.3663250708008525, "grad_norm": 0.6169083857440113, "learning_rate": 1.4082076236820764e-05, "loss": 0.5464, "step": 12547 }, { "epoch": 0.3663542670286999, "grad_norm": 0.6470581376953115, "learning_rate": 1.4081427412814274e-05, "loss": 0.6222, "step": 12548 }, { "epoch": 0.36638346325654725, "grad_norm": 0.7219541591643771, "learning_rate": 1.4080778588807786e-05, "loss": 0.6838, "step": 12549 }, { "epoch": 0.3664126594843946, "grad_norm": 0.6783047418050325, "learning_rate": 1.4080129764801298e-05, "loss": 0.6273, "step": 12550 }, { "epoch": 0.36644185571224197, "grad_norm": 0.5990171939709535, "learning_rate": 1.407948094079481e-05, "loss": 0.5499, "step": 12551 }, { "epoch": 0.36647105194008933, "grad_norm": 0.6738297778483785, "learning_rate": 1.407883211678832e-05, "loss": 0.6666, "step": 12552 }, { "epoch": 0.3665002481679367, "grad_norm": 0.6486847363960839, "learning_rate": 1.4078183292781834e-05, "loss": 0.5946, "step": 12553 }, { "epoch": 0.36652944439578405, "grad_norm": 0.6618714245934008, "learning_rate": 1.4077534468775346e-05, "loss": 0.6498, "step": 12554 }, { "epoch": 0.3665586406236314, "grad_norm": 0.6947886465283806, "learning_rate": 1.4076885644768858e-05, "loss": 0.5996, "step": 12555 }, { "epoch": 0.3665878368514788, "grad_norm": 0.6983634045647213, "learning_rate": 1.407623682076237e-05, "loss": 0.6646, "step": 12556 }, { "epoch": 0.36661703307932614, "grad_norm": 0.6253327905527964, "learning_rate": 1.4075587996755883e-05, "loss": 0.5806, "step": 12557 }, { "epoch": 0.3666462293071735, "grad_norm": 0.6012315840921949, "learning_rate": 1.4074939172749393e-05, "loss": 0.5517, "step": 12558 }, { "epoch": 0.36667542553502086, "grad_norm": 0.6526192754749508, "learning_rate": 1.4074290348742905e-05, "loss": 0.634, "step": 12559 }, { "epoch": 0.3667046217628682, "grad_norm": 0.637138806922816, "learning_rate": 1.4073641524736417e-05, "loss": 0.6456, "step": 12560 }, { "epoch": 0.3667338179907156, "grad_norm": 0.7003895293008976, "learning_rate": 1.4072992700729929e-05, "loss": 0.6672, "step": 12561 }, { "epoch": 0.36676301421856294, "grad_norm": 0.6377915419026732, "learning_rate": 1.407234387672344e-05, "loss": 0.6219, "step": 12562 }, { "epoch": 0.3667922104464103, "grad_norm": 0.7093354366519244, "learning_rate": 1.4071695052716951e-05, "loss": 0.6838, "step": 12563 }, { "epoch": 0.36682140667425767, "grad_norm": 0.6086679904044576, "learning_rate": 1.4071046228710463e-05, "loss": 0.5022, "step": 12564 }, { "epoch": 0.366850602902105, "grad_norm": 0.6606192869930919, "learning_rate": 1.4070397404703975e-05, "loss": 0.6298, "step": 12565 }, { "epoch": 0.3668797991299524, "grad_norm": 0.6673904647512016, "learning_rate": 1.4069748580697486e-05, "loss": 0.6549, "step": 12566 }, { "epoch": 0.36690899535779975, "grad_norm": 0.7193831153031855, "learning_rate": 1.4069099756690998e-05, "loss": 0.6847, "step": 12567 }, { "epoch": 0.3669381915856471, "grad_norm": 0.6652147783560601, "learning_rate": 1.406845093268451e-05, "loss": 0.6552, "step": 12568 }, { "epoch": 0.36696738781349447, "grad_norm": 0.6341063064303067, "learning_rate": 1.4067802108678022e-05, "loss": 0.5859, "step": 12569 }, { "epoch": 0.36699658404134183, "grad_norm": 0.6298937169768549, "learning_rate": 1.4067153284671534e-05, "loss": 0.5646, "step": 12570 }, { "epoch": 0.3670257802691892, "grad_norm": 0.6150301492918715, "learning_rate": 1.4066504460665044e-05, "loss": 0.5653, "step": 12571 }, { "epoch": 0.36705497649703656, "grad_norm": 0.6376203556818107, "learning_rate": 1.4065855636658558e-05, "loss": 0.5954, "step": 12572 }, { "epoch": 0.367084172724884, "grad_norm": 0.6474133349488634, "learning_rate": 1.406520681265207e-05, "loss": 0.6138, "step": 12573 }, { "epoch": 0.36711336895273133, "grad_norm": 0.6862467773081042, "learning_rate": 1.4064557988645582e-05, "loss": 0.65, "step": 12574 }, { "epoch": 0.3671425651805787, "grad_norm": 0.679976597122632, "learning_rate": 1.4063909164639094e-05, "loss": 0.6818, "step": 12575 }, { "epoch": 0.36717176140842606, "grad_norm": 0.6383555279812576, "learning_rate": 1.4063260340632606e-05, "loss": 0.614, "step": 12576 }, { "epoch": 0.3672009576362734, "grad_norm": 0.683694442753823, "learning_rate": 1.4062611516626116e-05, "loss": 0.671, "step": 12577 }, { "epoch": 0.3672301538641208, "grad_norm": 0.6562550383920649, "learning_rate": 1.4061962692619628e-05, "loss": 0.5972, "step": 12578 }, { "epoch": 0.36725935009196814, "grad_norm": 0.6245360820900477, "learning_rate": 1.406131386861314e-05, "loss": 0.6117, "step": 12579 }, { "epoch": 0.3672885463198155, "grad_norm": 0.7032717355322774, "learning_rate": 1.4060665044606652e-05, "loss": 0.7219, "step": 12580 }, { "epoch": 0.36731774254766286, "grad_norm": 0.6481388387780666, "learning_rate": 1.4060016220600163e-05, "loss": 0.5841, "step": 12581 }, { "epoch": 0.3673469387755102, "grad_norm": 0.6168821420007348, "learning_rate": 1.4059367396593675e-05, "loss": 0.6033, "step": 12582 }, { "epoch": 0.3673761350033576, "grad_norm": 0.6861501060776186, "learning_rate": 1.4058718572587187e-05, "loss": 0.6444, "step": 12583 }, { "epoch": 0.36740533123120495, "grad_norm": 0.6436060371668582, "learning_rate": 1.4058069748580699e-05, "loss": 0.6633, "step": 12584 }, { "epoch": 0.3674345274590523, "grad_norm": 0.7088821107508461, "learning_rate": 1.4057420924574209e-05, "loss": 0.7152, "step": 12585 }, { "epoch": 0.36746372368689967, "grad_norm": 0.6840021294666954, "learning_rate": 1.4056772100567721e-05, "loss": 0.6938, "step": 12586 }, { "epoch": 0.36749291991474703, "grad_norm": 0.6584466725672471, "learning_rate": 1.4056123276561233e-05, "loss": 0.6356, "step": 12587 }, { "epoch": 0.3675221161425944, "grad_norm": 0.6419088726757189, "learning_rate": 1.4055474452554745e-05, "loss": 0.584, "step": 12588 }, { "epoch": 0.36755131237044175, "grad_norm": 0.6563435633821822, "learning_rate": 1.4054825628548257e-05, "loss": 0.6186, "step": 12589 }, { "epoch": 0.3675805085982891, "grad_norm": 0.6613757244870527, "learning_rate": 1.4054176804541768e-05, "loss": 0.6567, "step": 12590 }, { "epoch": 0.3676097048261365, "grad_norm": 0.7274237033155004, "learning_rate": 1.4053527980535281e-05, "loss": 0.6848, "step": 12591 }, { "epoch": 0.36763890105398384, "grad_norm": 0.7157580169016791, "learning_rate": 1.4052879156528793e-05, "loss": 0.6882, "step": 12592 }, { "epoch": 0.3676680972818312, "grad_norm": 0.6468561406638291, "learning_rate": 1.4052230332522305e-05, "loss": 0.6106, "step": 12593 }, { "epoch": 0.36769729350967856, "grad_norm": 0.6291751087888358, "learning_rate": 1.4051581508515817e-05, "loss": 0.6067, "step": 12594 }, { "epoch": 0.3677264897375259, "grad_norm": 0.6697936674670099, "learning_rate": 1.405093268450933e-05, "loss": 0.6825, "step": 12595 }, { "epoch": 0.3677556859653733, "grad_norm": 0.7114576614497002, "learning_rate": 1.405028386050284e-05, "loss": 0.7212, "step": 12596 }, { "epoch": 0.36778488219322064, "grad_norm": 0.7003076931008556, "learning_rate": 1.4049635036496352e-05, "loss": 0.6892, "step": 12597 }, { "epoch": 0.367814078421068, "grad_norm": 0.667834235437611, "learning_rate": 1.4048986212489864e-05, "loss": 0.6416, "step": 12598 }, { "epoch": 0.36784327464891536, "grad_norm": 0.6950280639736941, "learning_rate": 1.4048337388483376e-05, "loss": 0.6886, "step": 12599 }, { "epoch": 0.3678724708767627, "grad_norm": 0.6851600087633345, "learning_rate": 1.4047688564476886e-05, "loss": 0.7206, "step": 12600 }, { "epoch": 0.3679016671046101, "grad_norm": 0.6449485663818495, "learning_rate": 1.4047039740470398e-05, "loss": 0.6165, "step": 12601 }, { "epoch": 0.36793086333245745, "grad_norm": 0.6499443409869785, "learning_rate": 1.404639091646391e-05, "loss": 0.6472, "step": 12602 }, { "epoch": 0.3679600595603048, "grad_norm": 0.6471221101582663, "learning_rate": 1.4045742092457422e-05, "loss": 0.5781, "step": 12603 }, { "epoch": 0.36798925578815217, "grad_norm": 0.6392811423104712, "learning_rate": 1.4045093268450933e-05, "loss": 0.5961, "step": 12604 }, { "epoch": 0.36801845201599953, "grad_norm": 0.7754316245470181, "learning_rate": 1.4044444444444445e-05, "loss": 0.7061, "step": 12605 }, { "epoch": 0.3680476482438469, "grad_norm": 0.7162613506482222, "learning_rate": 1.4043795620437957e-05, "loss": 0.6445, "step": 12606 }, { "epoch": 0.36807684447169425, "grad_norm": 0.6715883441055353, "learning_rate": 1.4043146796431469e-05, "loss": 0.6045, "step": 12607 }, { "epoch": 0.3681060406995416, "grad_norm": 0.7023032060609186, "learning_rate": 1.404249797242498e-05, "loss": 0.6594, "step": 12608 }, { "epoch": 0.368135236927389, "grad_norm": 0.6407790897660921, "learning_rate": 1.4041849148418491e-05, "loss": 0.5094, "step": 12609 }, { "epoch": 0.36816443315523634, "grad_norm": 0.677766260252356, "learning_rate": 1.4041200324412005e-05, "loss": 0.6936, "step": 12610 }, { "epoch": 0.3681936293830837, "grad_norm": 0.6757602717232623, "learning_rate": 1.4040551500405517e-05, "loss": 0.615, "step": 12611 }, { "epoch": 0.36822282561093106, "grad_norm": 0.6815330996549596, "learning_rate": 1.4039902676399029e-05, "loss": 0.6988, "step": 12612 }, { "epoch": 0.3682520218387784, "grad_norm": 0.6527983483034586, "learning_rate": 1.403925385239254e-05, "loss": 0.6334, "step": 12613 }, { "epoch": 0.3682812180666258, "grad_norm": 0.6179731976038516, "learning_rate": 1.4038605028386053e-05, "loss": 0.5854, "step": 12614 }, { "epoch": 0.36831041429447314, "grad_norm": 0.6674969028430002, "learning_rate": 1.4037956204379563e-05, "loss": 0.636, "step": 12615 }, { "epoch": 0.3683396105223205, "grad_norm": 0.7141814860359396, "learning_rate": 1.4037307380373075e-05, "loss": 0.6942, "step": 12616 }, { "epoch": 0.36836880675016787, "grad_norm": 0.5897938358023802, "learning_rate": 1.4036658556366587e-05, "loss": 0.5149, "step": 12617 }, { "epoch": 0.3683980029780152, "grad_norm": 0.6583137144683624, "learning_rate": 1.40360097323601e-05, "loss": 0.6552, "step": 12618 }, { "epoch": 0.3684271992058626, "grad_norm": 1.069395235788291, "learning_rate": 1.403536090835361e-05, "loss": 0.5924, "step": 12619 }, { "epoch": 0.36845639543370995, "grad_norm": 0.6438588430376365, "learning_rate": 1.4034712084347122e-05, "loss": 0.627, "step": 12620 }, { "epoch": 0.3684855916615573, "grad_norm": 0.6518181533486092, "learning_rate": 1.4034063260340634e-05, "loss": 0.6406, "step": 12621 }, { "epoch": 0.36851478788940467, "grad_norm": 0.7665203709820279, "learning_rate": 1.4033414436334146e-05, "loss": 0.7982, "step": 12622 }, { "epoch": 0.36854398411725203, "grad_norm": 0.619754854753998, "learning_rate": 1.4032765612327656e-05, "loss": 0.544, "step": 12623 }, { "epoch": 0.3685731803450994, "grad_norm": 0.6396852449471572, "learning_rate": 1.4032116788321168e-05, "loss": 0.6243, "step": 12624 }, { "epoch": 0.36860237657294675, "grad_norm": 0.635066396558466, "learning_rate": 1.403146796431468e-05, "loss": 0.5998, "step": 12625 }, { "epoch": 0.3686315728007941, "grad_norm": 0.6499660133344108, "learning_rate": 1.4030819140308192e-05, "loss": 0.6474, "step": 12626 }, { "epoch": 0.3686607690286415, "grad_norm": 0.6407790294745197, "learning_rate": 1.4030170316301704e-05, "loss": 0.6145, "step": 12627 }, { "epoch": 0.36868996525648884, "grad_norm": 0.6298013349458613, "learning_rate": 1.4029521492295214e-05, "loss": 0.6144, "step": 12628 }, { "epoch": 0.3687191614843362, "grad_norm": 0.6566195593055897, "learning_rate": 1.4028872668288728e-05, "loss": 0.685, "step": 12629 }, { "epoch": 0.36874835771218356, "grad_norm": 0.6258520578563999, "learning_rate": 1.402822384428224e-05, "loss": 0.6134, "step": 12630 }, { "epoch": 0.3687775539400309, "grad_norm": 0.6264682095569087, "learning_rate": 1.4027575020275752e-05, "loss": 0.5913, "step": 12631 }, { "epoch": 0.3688067501678783, "grad_norm": 0.6098060539330553, "learning_rate": 1.4026926196269264e-05, "loss": 0.5679, "step": 12632 }, { "epoch": 0.36883594639572564, "grad_norm": 0.659869153347388, "learning_rate": 1.4026277372262776e-05, "loss": 0.6618, "step": 12633 }, { "epoch": 0.36886514262357306, "grad_norm": 0.6590906454746048, "learning_rate": 1.4025628548256287e-05, "loss": 0.6227, "step": 12634 }, { "epoch": 0.3688943388514204, "grad_norm": 0.6805374671463419, "learning_rate": 1.4024979724249799e-05, "loss": 0.7108, "step": 12635 }, { "epoch": 0.3689235350792678, "grad_norm": 0.6288000993640788, "learning_rate": 1.402433090024331e-05, "loss": 0.5754, "step": 12636 }, { "epoch": 0.36895273130711514, "grad_norm": 0.6821828536859734, "learning_rate": 1.4023682076236823e-05, "loss": 0.6591, "step": 12637 }, { "epoch": 0.3689819275349625, "grad_norm": 0.6162551122758845, "learning_rate": 1.4023033252230333e-05, "loss": 0.5571, "step": 12638 }, { "epoch": 0.36901112376280987, "grad_norm": 0.7099563798951274, "learning_rate": 1.4022384428223845e-05, "loss": 0.7433, "step": 12639 }, { "epoch": 0.36904031999065723, "grad_norm": 0.6806049405017498, "learning_rate": 1.4021735604217357e-05, "loss": 0.6683, "step": 12640 }, { "epoch": 0.3690695162185046, "grad_norm": 0.7441059506109735, "learning_rate": 1.4021086780210869e-05, "loss": 0.8077, "step": 12641 }, { "epoch": 0.36909871244635195, "grad_norm": 0.6911923985665026, "learning_rate": 1.402043795620438e-05, "loss": 0.6933, "step": 12642 }, { "epoch": 0.3691279086741993, "grad_norm": 0.6746581554238267, "learning_rate": 1.4019789132197891e-05, "loss": 0.667, "step": 12643 }, { "epoch": 0.3691571049020467, "grad_norm": 0.7185135616333258, "learning_rate": 1.4019140308191403e-05, "loss": 0.6837, "step": 12644 }, { "epoch": 0.36918630112989403, "grad_norm": 0.6800745092978111, "learning_rate": 1.4018491484184915e-05, "loss": 0.6684, "step": 12645 }, { "epoch": 0.3692154973577414, "grad_norm": 0.6356525016829431, "learning_rate": 1.4017842660178428e-05, "loss": 0.5761, "step": 12646 }, { "epoch": 0.36924469358558876, "grad_norm": 0.6886409557352812, "learning_rate": 1.4017193836171938e-05, "loss": 0.6893, "step": 12647 }, { "epoch": 0.3692738898134361, "grad_norm": 0.6499157260976193, "learning_rate": 1.4016545012165452e-05, "loss": 0.603, "step": 12648 }, { "epoch": 0.3693030860412835, "grad_norm": 0.6506579604167021, "learning_rate": 1.4015896188158964e-05, "loss": 0.6375, "step": 12649 }, { "epoch": 0.36933228226913084, "grad_norm": 0.665441462684544, "learning_rate": 1.4015247364152476e-05, "loss": 0.6061, "step": 12650 }, { "epoch": 0.3693614784969782, "grad_norm": 0.6624895281254585, "learning_rate": 1.4014598540145988e-05, "loss": 0.6371, "step": 12651 }, { "epoch": 0.36939067472482556, "grad_norm": 0.6520298829832605, "learning_rate": 1.40139497161395e-05, "loss": 0.5857, "step": 12652 }, { "epoch": 0.3694198709526729, "grad_norm": 0.6826692133696369, "learning_rate": 1.401330089213301e-05, "loss": 0.6508, "step": 12653 }, { "epoch": 0.3694490671805203, "grad_norm": 0.5741893620843451, "learning_rate": 1.4012652068126522e-05, "loss": 0.4824, "step": 12654 }, { "epoch": 0.36947826340836765, "grad_norm": 0.6603803091028743, "learning_rate": 1.4012003244120034e-05, "loss": 0.6705, "step": 12655 }, { "epoch": 0.369507459636215, "grad_norm": 0.6753421609068475, "learning_rate": 1.4011354420113546e-05, "loss": 0.6579, "step": 12656 }, { "epoch": 0.36953665586406237, "grad_norm": 0.72378677323699, "learning_rate": 1.4010705596107056e-05, "loss": 0.7727, "step": 12657 }, { "epoch": 0.36956585209190973, "grad_norm": 0.619148595754017, "learning_rate": 1.4010056772100568e-05, "loss": 0.5768, "step": 12658 }, { "epoch": 0.3695950483197571, "grad_norm": 0.6992837346732256, "learning_rate": 1.400940794809408e-05, "loss": 0.7044, "step": 12659 }, { "epoch": 0.36962424454760445, "grad_norm": 0.7227038571340338, "learning_rate": 1.4008759124087593e-05, "loss": 0.7772, "step": 12660 }, { "epoch": 0.3696534407754518, "grad_norm": 0.6082410046563582, "learning_rate": 1.4008110300081103e-05, "loss": 0.5816, "step": 12661 }, { "epoch": 0.3696826370032992, "grad_norm": 0.6117330684971996, "learning_rate": 1.4007461476074615e-05, "loss": 0.5705, "step": 12662 }, { "epoch": 0.36971183323114654, "grad_norm": 0.6473266169327169, "learning_rate": 1.4006812652068127e-05, "loss": 0.565, "step": 12663 }, { "epoch": 0.3697410294589939, "grad_norm": 0.6723594605370167, "learning_rate": 1.4006163828061639e-05, "loss": 0.6612, "step": 12664 }, { "epoch": 0.36977022568684126, "grad_norm": 0.6298762120419915, "learning_rate": 1.4005515004055151e-05, "loss": 0.6058, "step": 12665 }, { "epoch": 0.3697994219146886, "grad_norm": 0.7309072286799919, "learning_rate": 1.4004866180048665e-05, "loss": 0.5743, "step": 12666 }, { "epoch": 0.369828618142536, "grad_norm": 0.6512979264986006, "learning_rate": 1.4004217356042175e-05, "loss": 0.5994, "step": 12667 }, { "epoch": 0.36985781437038334, "grad_norm": 0.6625966260230781, "learning_rate": 1.4003568532035687e-05, "loss": 0.6335, "step": 12668 }, { "epoch": 0.3698870105982307, "grad_norm": 0.6654464083944419, "learning_rate": 1.4002919708029199e-05, "loss": 0.6554, "step": 12669 }, { "epoch": 0.36991620682607806, "grad_norm": 0.687211812932522, "learning_rate": 1.4002270884022711e-05, "loss": 0.6117, "step": 12670 }, { "epoch": 0.3699454030539254, "grad_norm": 0.6516681740781554, "learning_rate": 1.4001622060016221e-05, "loss": 0.6006, "step": 12671 }, { "epoch": 0.3699745992817728, "grad_norm": 0.6408653243732754, "learning_rate": 1.4000973236009733e-05, "loss": 0.6307, "step": 12672 }, { "epoch": 0.37000379550962015, "grad_norm": 0.6326779204338233, "learning_rate": 1.4000324412003245e-05, "loss": 0.5139, "step": 12673 }, { "epoch": 0.3700329917374675, "grad_norm": 0.6250727049634438, "learning_rate": 1.3999675587996757e-05, "loss": 0.5915, "step": 12674 }, { "epoch": 0.37006218796531487, "grad_norm": 0.6678879416185995, "learning_rate": 1.399902676399027e-05, "loss": 0.6473, "step": 12675 }, { "epoch": 0.37009138419316223, "grad_norm": 0.6842498918136404, "learning_rate": 1.399837793998378e-05, "loss": 0.6978, "step": 12676 }, { "epoch": 0.3701205804210096, "grad_norm": 0.6130188342464112, "learning_rate": 1.3997729115977292e-05, "loss": 0.5634, "step": 12677 }, { "epoch": 0.37014977664885695, "grad_norm": 0.7061242801980321, "learning_rate": 1.3997080291970804e-05, "loss": 0.6274, "step": 12678 }, { "epoch": 0.3701789728767043, "grad_norm": 0.6484658656963426, "learning_rate": 1.3996431467964316e-05, "loss": 0.6153, "step": 12679 }, { "epoch": 0.3702081691045517, "grad_norm": 0.6163839542140669, "learning_rate": 1.3995782643957826e-05, "loss": 0.57, "step": 12680 }, { "epoch": 0.37023736533239904, "grad_norm": 0.6833226026687981, "learning_rate": 1.3995133819951338e-05, "loss": 0.6351, "step": 12681 }, { "epoch": 0.3702665615602464, "grad_norm": 0.6523206766329889, "learning_rate": 1.399448499594485e-05, "loss": 0.6262, "step": 12682 }, { "epoch": 0.37029575778809376, "grad_norm": 0.6923964356996614, "learning_rate": 1.3993836171938362e-05, "loss": 0.6529, "step": 12683 }, { "epoch": 0.3703249540159411, "grad_norm": 0.6542686609721917, "learning_rate": 1.3993187347931874e-05, "loss": 0.6735, "step": 12684 }, { "epoch": 0.3703541502437885, "grad_norm": 0.6577060689037907, "learning_rate": 1.3992538523925388e-05, "loss": 0.6298, "step": 12685 }, { "epoch": 0.37038334647163584, "grad_norm": 0.6772978851269597, "learning_rate": 1.3991889699918898e-05, "loss": 0.6591, "step": 12686 }, { "epoch": 0.3704125426994832, "grad_norm": 0.6867991908781956, "learning_rate": 1.399124087591241e-05, "loss": 0.7579, "step": 12687 }, { "epoch": 0.37044173892733057, "grad_norm": 0.6354222309493212, "learning_rate": 1.3990592051905922e-05, "loss": 0.5859, "step": 12688 }, { "epoch": 0.3704709351551779, "grad_norm": 0.6518797725509574, "learning_rate": 1.3989943227899435e-05, "loss": 0.6446, "step": 12689 }, { "epoch": 0.3705001313830253, "grad_norm": 0.6944550049886955, "learning_rate": 1.3989294403892945e-05, "loss": 0.6902, "step": 12690 }, { "epoch": 0.37052932761087265, "grad_norm": 0.6513147881176166, "learning_rate": 1.3988645579886457e-05, "loss": 0.6482, "step": 12691 }, { "epoch": 0.37055852383872, "grad_norm": 0.6851002377410124, "learning_rate": 1.3987996755879969e-05, "loss": 0.6883, "step": 12692 }, { "epoch": 0.37058772006656737, "grad_norm": 0.6605199967771185, "learning_rate": 1.3987347931873481e-05, "loss": 0.6608, "step": 12693 }, { "epoch": 0.3706169162944148, "grad_norm": 0.649673821559072, "learning_rate": 1.3986699107866993e-05, "loss": 0.6284, "step": 12694 }, { "epoch": 0.37064611252226215, "grad_norm": 0.6323768200898707, "learning_rate": 1.3986050283860503e-05, "loss": 0.5473, "step": 12695 }, { "epoch": 0.3706753087501095, "grad_norm": 0.7032133447199284, "learning_rate": 1.3985401459854015e-05, "loss": 0.6146, "step": 12696 }, { "epoch": 0.3707045049779569, "grad_norm": 0.6670530294235649, "learning_rate": 1.3984752635847527e-05, "loss": 0.6383, "step": 12697 }, { "epoch": 0.37073370120580423, "grad_norm": 0.6704292335785599, "learning_rate": 1.398410381184104e-05, "loss": 0.6809, "step": 12698 }, { "epoch": 0.3707628974336516, "grad_norm": 0.6021004253631165, "learning_rate": 1.398345498783455e-05, "loss": 0.5929, "step": 12699 }, { "epoch": 0.37079209366149896, "grad_norm": 0.6846038111782998, "learning_rate": 1.3982806163828062e-05, "loss": 0.68, "step": 12700 }, { "epoch": 0.3708212898893463, "grad_norm": 0.6470741300515322, "learning_rate": 1.3982157339821574e-05, "loss": 0.6088, "step": 12701 }, { "epoch": 0.3708504861171937, "grad_norm": 0.7238321258455825, "learning_rate": 1.3981508515815086e-05, "loss": 0.6647, "step": 12702 }, { "epoch": 0.37087968234504104, "grad_norm": 0.6228060453136142, "learning_rate": 1.3980859691808598e-05, "loss": 0.5785, "step": 12703 }, { "epoch": 0.3709088785728884, "grad_norm": 0.6596972025867914, "learning_rate": 1.3980210867802112e-05, "loss": 0.6538, "step": 12704 }, { "epoch": 0.37093807480073576, "grad_norm": 0.6437866597873279, "learning_rate": 1.3979562043795622e-05, "loss": 0.6119, "step": 12705 }, { "epoch": 0.3709672710285831, "grad_norm": 0.6514823423703405, "learning_rate": 1.3978913219789134e-05, "loss": 0.644, "step": 12706 }, { "epoch": 0.3709964672564305, "grad_norm": 0.5863293567169985, "learning_rate": 1.3978264395782646e-05, "loss": 0.5361, "step": 12707 }, { "epoch": 0.37102566348427785, "grad_norm": 0.6687629396011382, "learning_rate": 1.3977615571776158e-05, "loss": 0.6753, "step": 12708 }, { "epoch": 0.3710548597121252, "grad_norm": 0.6417260233684473, "learning_rate": 1.3976966747769668e-05, "loss": 0.63, "step": 12709 }, { "epoch": 0.37108405593997257, "grad_norm": 0.6519813611840242, "learning_rate": 1.397631792376318e-05, "loss": 0.6698, "step": 12710 }, { "epoch": 0.37111325216781993, "grad_norm": 0.6976128472906732, "learning_rate": 1.3975669099756692e-05, "loss": 0.6641, "step": 12711 }, { "epoch": 0.3711424483956673, "grad_norm": 0.798948240557889, "learning_rate": 1.3975020275750204e-05, "loss": 0.6735, "step": 12712 }, { "epoch": 0.37117164462351465, "grad_norm": 0.6728433432999009, "learning_rate": 1.3974371451743716e-05, "loss": 0.714, "step": 12713 }, { "epoch": 0.371200840851362, "grad_norm": 0.6832555293989946, "learning_rate": 1.3973722627737227e-05, "loss": 0.6694, "step": 12714 }, { "epoch": 0.3712300370792094, "grad_norm": 0.642814918362865, "learning_rate": 1.3973073803730739e-05, "loss": 0.6194, "step": 12715 }, { "epoch": 0.37125923330705674, "grad_norm": 0.6401958041175878, "learning_rate": 1.397242497972425e-05, "loss": 0.5773, "step": 12716 }, { "epoch": 0.3712884295349041, "grad_norm": 0.6175404327509105, "learning_rate": 1.3971776155717763e-05, "loss": 0.5963, "step": 12717 }, { "epoch": 0.37131762576275146, "grad_norm": 0.6987351527299254, "learning_rate": 1.3971127331711273e-05, "loss": 0.6654, "step": 12718 }, { "epoch": 0.3713468219905988, "grad_norm": 0.6827301366732093, "learning_rate": 1.3970478507704785e-05, "loss": 0.6886, "step": 12719 }, { "epoch": 0.3713760182184462, "grad_norm": 0.726451629000109, "learning_rate": 1.3969829683698297e-05, "loss": 0.7537, "step": 12720 }, { "epoch": 0.37140521444629354, "grad_norm": 0.6037663141249301, "learning_rate": 1.396918085969181e-05, "loss": 0.5457, "step": 12721 }, { "epoch": 0.3714344106741409, "grad_norm": 0.6780236547525951, "learning_rate": 1.3968532035685321e-05, "loss": 0.7013, "step": 12722 }, { "epoch": 0.37146360690198826, "grad_norm": 0.6203095818963296, "learning_rate": 1.3967883211678835e-05, "loss": 0.5883, "step": 12723 }, { "epoch": 0.3714928031298356, "grad_norm": 0.6315969520882875, "learning_rate": 1.3967234387672345e-05, "loss": 0.5967, "step": 12724 }, { "epoch": 0.371521999357683, "grad_norm": 0.6209421426966673, "learning_rate": 1.3966585563665857e-05, "loss": 0.624, "step": 12725 }, { "epoch": 0.37155119558553035, "grad_norm": 0.650800997897109, "learning_rate": 1.396593673965937e-05, "loss": 0.6561, "step": 12726 }, { "epoch": 0.3715803918133777, "grad_norm": 0.6195462095145151, "learning_rate": 1.3965287915652881e-05, "loss": 0.5513, "step": 12727 }, { "epoch": 0.37160958804122507, "grad_norm": 0.6525610102914626, "learning_rate": 1.3964639091646392e-05, "loss": 0.604, "step": 12728 }, { "epoch": 0.37163878426907243, "grad_norm": 0.5709377160467709, "learning_rate": 1.3963990267639904e-05, "loss": 0.4628, "step": 12729 }, { "epoch": 0.3716679804969198, "grad_norm": 0.6057611833425132, "learning_rate": 1.3963341443633416e-05, "loss": 0.5574, "step": 12730 }, { "epoch": 0.37169717672476715, "grad_norm": 0.6783513089177419, "learning_rate": 1.3962692619626928e-05, "loss": 0.6999, "step": 12731 }, { "epoch": 0.3717263729526145, "grad_norm": 1.1342359869904763, "learning_rate": 1.396204379562044e-05, "loss": 0.7345, "step": 12732 }, { "epoch": 0.3717555691804619, "grad_norm": 0.6318927879172328, "learning_rate": 1.396139497161395e-05, "loss": 0.5748, "step": 12733 }, { "epoch": 0.37178476540830924, "grad_norm": 0.704973693883939, "learning_rate": 1.3960746147607462e-05, "loss": 0.7435, "step": 12734 }, { "epoch": 0.3718139616361566, "grad_norm": 0.6091166139911126, "learning_rate": 1.3960097323600974e-05, "loss": 0.6073, "step": 12735 }, { "epoch": 0.37184315786400396, "grad_norm": 0.6409833195486317, "learning_rate": 1.3959448499594486e-05, "loss": 0.5814, "step": 12736 }, { "epoch": 0.3718723540918513, "grad_norm": 0.6369571505802362, "learning_rate": 1.3958799675587997e-05, "loss": 0.6152, "step": 12737 }, { "epoch": 0.3719015503196987, "grad_norm": 0.6870921731282398, "learning_rate": 1.3958150851581509e-05, "loss": 0.6927, "step": 12738 }, { "epoch": 0.37193074654754604, "grad_norm": 0.6594600279373771, "learning_rate": 1.395750202757502e-05, "loss": 0.5959, "step": 12739 }, { "epoch": 0.3719599427753934, "grad_norm": 0.6250162242418696, "learning_rate": 1.3956853203568533e-05, "loss": 0.5961, "step": 12740 }, { "epoch": 0.37198913900324077, "grad_norm": 0.6101945452269588, "learning_rate": 1.3956204379562045e-05, "loss": 0.5397, "step": 12741 }, { "epoch": 0.3720183352310881, "grad_norm": 0.6822781089377793, "learning_rate": 1.3955555555555558e-05, "loss": 0.6885, "step": 12742 }, { "epoch": 0.3720475314589355, "grad_norm": 0.6418774169639494, "learning_rate": 1.3954906731549069e-05, "loss": 0.6195, "step": 12743 }, { "epoch": 0.37207672768678285, "grad_norm": 0.7201120789655332, "learning_rate": 1.395425790754258e-05, "loss": 0.777, "step": 12744 }, { "epoch": 0.3721059239146302, "grad_norm": 0.6810019516256653, "learning_rate": 1.3953609083536093e-05, "loss": 0.6454, "step": 12745 }, { "epoch": 0.37213512014247757, "grad_norm": 0.669122019041507, "learning_rate": 1.3952960259529605e-05, "loss": 0.6414, "step": 12746 }, { "epoch": 0.37216431637032493, "grad_norm": 0.6966938542575634, "learning_rate": 1.3952311435523115e-05, "loss": 0.6613, "step": 12747 }, { "epoch": 0.3721935125981723, "grad_norm": 0.6630373545959876, "learning_rate": 1.3951662611516627e-05, "loss": 0.6649, "step": 12748 }, { "epoch": 0.37222270882601965, "grad_norm": 0.6488271823441357, "learning_rate": 1.395101378751014e-05, "loss": 0.6115, "step": 12749 }, { "epoch": 0.372251905053867, "grad_norm": 0.6893853944092148, "learning_rate": 1.3950364963503651e-05, "loss": 0.7177, "step": 12750 }, { "epoch": 0.3722811012817144, "grad_norm": 0.6549437654699871, "learning_rate": 1.3949716139497163e-05, "loss": 0.6382, "step": 12751 }, { "epoch": 0.37231029750956174, "grad_norm": 0.7003244548363868, "learning_rate": 1.3949067315490674e-05, "loss": 0.6832, "step": 12752 }, { "epoch": 0.3723394937374091, "grad_norm": 0.702462525406382, "learning_rate": 1.3948418491484186e-05, "loss": 0.6891, "step": 12753 }, { "epoch": 0.3723686899652565, "grad_norm": 0.6077137086388419, "learning_rate": 1.3947769667477698e-05, "loss": 0.5512, "step": 12754 }, { "epoch": 0.3723978861931039, "grad_norm": 0.631324857066771, "learning_rate": 1.394712084347121e-05, "loss": 0.5832, "step": 12755 }, { "epoch": 0.37242708242095124, "grad_norm": 0.6117479679535666, "learning_rate": 1.394647201946472e-05, "loss": 0.5624, "step": 12756 }, { "epoch": 0.3724562786487986, "grad_norm": 0.6743677639538082, "learning_rate": 1.3945823195458232e-05, "loss": 0.6687, "step": 12757 }, { "epoch": 0.37248547487664596, "grad_norm": 0.587888144905751, "learning_rate": 1.3945174371451744e-05, "loss": 0.4979, "step": 12758 }, { "epoch": 0.3725146711044933, "grad_norm": 0.5929594995898209, "learning_rate": 1.3944525547445256e-05, "loss": 0.5652, "step": 12759 }, { "epoch": 0.3725438673323407, "grad_norm": 0.6890879510698493, "learning_rate": 1.3943876723438768e-05, "loss": 0.6535, "step": 12760 }, { "epoch": 0.37257306356018804, "grad_norm": 0.6193083037618218, "learning_rate": 1.3943227899432282e-05, "loss": 0.5844, "step": 12761 }, { "epoch": 0.3726022597880354, "grad_norm": 0.6540867625335097, "learning_rate": 1.3942579075425792e-05, "loss": 0.6373, "step": 12762 }, { "epoch": 0.37263145601588277, "grad_norm": 0.6875538443730433, "learning_rate": 1.3941930251419304e-05, "loss": 0.6807, "step": 12763 }, { "epoch": 0.37266065224373013, "grad_norm": 0.640825058196858, "learning_rate": 1.3941281427412816e-05, "loss": 0.611, "step": 12764 }, { "epoch": 0.3726898484715775, "grad_norm": 0.6482787422030503, "learning_rate": 1.3940632603406328e-05, "loss": 0.641, "step": 12765 }, { "epoch": 0.37271904469942485, "grad_norm": 0.6481049861482672, "learning_rate": 1.3939983779399839e-05, "loss": 0.6608, "step": 12766 }, { "epoch": 0.3727482409272722, "grad_norm": 0.6014775422037415, "learning_rate": 1.393933495539335e-05, "loss": 0.5662, "step": 12767 }, { "epoch": 0.3727774371551196, "grad_norm": 0.6734675885024083, "learning_rate": 1.3938686131386863e-05, "loss": 0.6677, "step": 12768 }, { "epoch": 0.37280663338296693, "grad_norm": 0.7107727768591295, "learning_rate": 1.3938037307380375e-05, "loss": 0.6899, "step": 12769 }, { "epoch": 0.3728358296108143, "grad_norm": 0.7224223484890843, "learning_rate": 1.3937388483373887e-05, "loss": 0.6375, "step": 12770 }, { "epoch": 0.37286502583866166, "grad_norm": 0.6657324307542607, "learning_rate": 1.3936739659367397e-05, "loss": 0.6671, "step": 12771 }, { "epoch": 0.372894222066509, "grad_norm": 0.658687738197626, "learning_rate": 1.3936090835360909e-05, "loss": 0.6442, "step": 12772 }, { "epoch": 0.3729234182943564, "grad_norm": 0.6404433942266555, "learning_rate": 1.3935442011354421e-05, "loss": 0.596, "step": 12773 }, { "epoch": 0.37295261452220374, "grad_norm": 0.7491882741904692, "learning_rate": 1.3934793187347933e-05, "loss": 0.7698, "step": 12774 }, { "epoch": 0.3729818107500511, "grad_norm": 0.6368394630364497, "learning_rate": 1.3934144363341443e-05, "loss": 0.5934, "step": 12775 }, { "epoch": 0.37301100697789846, "grad_norm": 0.6896606598414905, "learning_rate": 1.3933495539334955e-05, "loss": 0.6623, "step": 12776 }, { "epoch": 0.3730402032057458, "grad_norm": 0.6794400146077176, "learning_rate": 1.3932846715328467e-05, "loss": 0.6087, "step": 12777 }, { "epoch": 0.3730693994335932, "grad_norm": 0.5893037629177863, "learning_rate": 1.393219789132198e-05, "loss": 0.5026, "step": 12778 }, { "epoch": 0.37309859566144055, "grad_norm": 0.6666234283470186, "learning_rate": 1.3931549067315492e-05, "loss": 0.6904, "step": 12779 }, { "epoch": 0.3731277918892879, "grad_norm": 0.6404549632336665, "learning_rate": 1.3930900243309005e-05, "loss": 0.6694, "step": 12780 }, { "epoch": 0.37315698811713527, "grad_norm": 0.607160667931031, "learning_rate": 1.3930251419302516e-05, "loss": 0.5429, "step": 12781 }, { "epoch": 0.37318618434498263, "grad_norm": 0.7234219357615157, "learning_rate": 1.3929602595296028e-05, "loss": 0.6988, "step": 12782 }, { "epoch": 0.37321538057283, "grad_norm": 0.6471800987849349, "learning_rate": 1.392895377128954e-05, "loss": 0.6472, "step": 12783 }, { "epoch": 0.37324457680067735, "grad_norm": 0.6698624020858546, "learning_rate": 1.3928304947283052e-05, "loss": 0.7025, "step": 12784 }, { "epoch": 0.3732737730285247, "grad_norm": 0.6422226094135018, "learning_rate": 1.3927656123276562e-05, "loss": 0.61, "step": 12785 }, { "epoch": 0.3733029692563721, "grad_norm": 0.5941215685183425, "learning_rate": 1.3927007299270074e-05, "loss": 0.535, "step": 12786 }, { "epoch": 0.37333216548421944, "grad_norm": 0.6269242491896299, "learning_rate": 1.3926358475263586e-05, "loss": 0.5963, "step": 12787 }, { "epoch": 0.3733613617120668, "grad_norm": 0.5659198866472616, "learning_rate": 1.3925709651257098e-05, "loss": 0.4869, "step": 12788 }, { "epoch": 0.37339055793991416, "grad_norm": 0.6623051764660881, "learning_rate": 1.392506082725061e-05, "loss": 0.6267, "step": 12789 }, { "epoch": 0.3734197541677615, "grad_norm": 0.6725661991135561, "learning_rate": 1.392441200324412e-05, "loss": 0.6791, "step": 12790 }, { "epoch": 0.3734489503956089, "grad_norm": 0.7095480277212513, "learning_rate": 1.3923763179237632e-05, "loss": 0.661, "step": 12791 }, { "epoch": 0.37347814662345624, "grad_norm": 0.5703768346548702, "learning_rate": 1.3923114355231145e-05, "loss": 0.5184, "step": 12792 }, { "epoch": 0.3735073428513036, "grad_norm": 0.6902204680173678, "learning_rate": 1.3922465531224657e-05, "loss": 0.6846, "step": 12793 }, { "epoch": 0.37353653907915096, "grad_norm": 0.6663441837078727, "learning_rate": 1.3921816707218167e-05, "loss": 0.6094, "step": 12794 }, { "epoch": 0.3735657353069983, "grad_norm": 0.6846437261725407, "learning_rate": 1.3921167883211679e-05, "loss": 0.6356, "step": 12795 }, { "epoch": 0.3735949315348457, "grad_norm": 0.6732662148053062, "learning_rate": 1.3920519059205191e-05, "loss": 0.6504, "step": 12796 }, { "epoch": 0.37362412776269305, "grad_norm": 0.7323301886603406, "learning_rate": 1.3919870235198703e-05, "loss": 0.7593, "step": 12797 }, { "epoch": 0.3736533239905404, "grad_norm": 0.6762393913800241, "learning_rate": 1.3919221411192215e-05, "loss": 0.7011, "step": 12798 }, { "epoch": 0.37368252021838777, "grad_norm": 0.6147115004219866, "learning_rate": 1.3918572587185729e-05, "loss": 0.6037, "step": 12799 }, { "epoch": 0.37371171644623513, "grad_norm": 0.6331312205781751, "learning_rate": 1.3917923763179239e-05, "loss": 0.6014, "step": 12800 }, { "epoch": 0.3737409126740825, "grad_norm": 0.6193569835684549, "learning_rate": 1.3917274939172751e-05, "loss": 0.5711, "step": 12801 }, { "epoch": 0.37377010890192985, "grad_norm": 0.691202446806804, "learning_rate": 1.3916626115166263e-05, "loss": 0.7048, "step": 12802 }, { "epoch": 0.3737993051297772, "grad_norm": 0.6496747289746938, "learning_rate": 1.3915977291159775e-05, "loss": 0.6146, "step": 12803 }, { "epoch": 0.3738285013576246, "grad_norm": 0.6175275088050434, "learning_rate": 1.3915328467153285e-05, "loss": 0.6035, "step": 12804 }, { "epoch": 0.37385769758547194, "grad_norm": 0.6576193014680202, "learning_rate": 1.3914679643146797e-05, "loss": 0.5899, "step": 12805 }, { "epoch": 0.3738868938133193, "grad_norm": 0.6290549865467503, "learning_rate": 1.391403081914031e-05, "loss": 0.5861, "step": 12806 }, { "epoch": 0.37391609004116666, "grad_norm": 0.6477389052986251, "learning_rate": 1.3913381995133822e-05, "loss": 0.6141, "step": 12807 }, { "epoch": 0.373945286269014, "grad_norm": 0.7263711280457026, "learning_rate": 1.3912733171127334e-05, "loss": 0.7071, "step": 12808 }, { "epoch": 0.3739744824968614, "grad_norm": 0.6344423262417406, "learning_rate": 1.3912084347120844e-05, "loss": 0.6037, "step": 12809 }, { "epoch": 0.37400367872470874, "grad_norm": 0.6514747537641278, "learning_rate": 1.3911435523114356e-05, "loss": 0.6045, "step": 12810 }, { "epoch": 0.3740328749525561, "grad_norm": 0.65075764250603, "learning_rate": 1.3910786699107868e-05, "loss": 0.6315, "step": 12811 }, { "epoch": 0.37406207118040347, "grad_norm": 0.6419705797692254, "learning_rate": 1.391013787510138e-05, "loss": 0.5977, "step": 12812 }, { "epoch": 0.3740912674082508, "grad_norm": 0.6838555104129866, "learning_rate": 1.390948905109489e-05, "loss": 0.7215, "step": 12813 }, { "epoch": 0.37412046363609824, "grad_norm": 0.7840757909774794, "learning_rate": 1.3908840227088402e-05, "loss": 0.7806, "step": 12814 }, { "epoch": 0.3741496598639456, "grad_norm": 0.6329915775683943, "learning_rate": 1.3908191403081914e-05, "loss": 0.5954, "step": 12815 }, { "epoch": 0.37417885609179297, "grad_norm": 0.6364401969772507, "learning_rate": 1.3907542579075426e-05, "loss": 0.6231, "step": 12816 }, { "epoch": 0.3742080523196403, "grad_norm": 0.6554008555585304, "learning_rate": 1.390689375506894e-05, "loss": 0.5715, "step": 12817 }, { "epoch": 0.3742372485474877, "grad_norm": 0.6912943943140152, "learning_rate": 1.3906244931062452e-05, "loss": 0.6601, "step": 12818 }, { "epoch": 0.37426644477533505, "grad_norm": 0.6867809661785104, "learning_rate": 1.3905596107055962e-05, "loss": 0.6735, "step": 12819 }, { "epoch": 0.3742956410031824, "grad_norm": 0.6539240780953445, "learning_rate": 1.3904947283049475e-05, "loss": 0.6339, "step": 12820 }, { "epoch": 0.37432483723102977, "grad_norm": 0.6777231347876477, "learning_rate": 1.3904298459042987e-05, "loss": 0.6341, "step": 12821 }, { "epoch": 0.37435403345887713, "grad_norm": 0.6417757490398649, "learning_rate": 1.3903649635036499e-05, "loss": 0.6144, "step": 12822 }, { "epoch": 0.3743832296867245, "grad_norm": 0.6676597212883112, "learning_rate": 1.3903000811030009e-05, "loss": 0.6375, "step": 12823 }, { "epoch": 0.37441242591457186, "grad_norm": 0.6370537112041776, "learning_rate": 1.3902351987023521e-05, "loss": 0.5728, "step": 12824 }, { "epoch": 0.3744416221424192, "grad_norm": 0.6064807988550961, "learning_rate": 1.3901703163017033e-05, "loss": 0.5647, "step": 12825 }, { "epoch": 0.3744708183702666, "grad_norm": 0.7003504454414762, "learning_rate": 1.3901054339010545e-05, "loss": 0.6512, "step": 12826 }, { "epoch": 0.37450001459811394, "grad_norm": 0.6980981829698262, "learning_rate": 1.3900405515004057e-05, "loss": 0.7536, "step": 12827 }, { "epoch": 0.3745292108259613, "grad_norm": 0.612591945616802, "learning_rate": 1.3899756690997567e-05, "loss": 0.5625, "step": 12828 }, { "epoch": 0.37455840705380866, "grad_norm": 0.6405184005161283, "learning_rate": 1.389910786699108e-05, "loss": 0.5938, "step": 12829 }, { "epoch": 0.374587603281656, "grad_norm": 0.6511963061184872, "learning_rate": 1.3898459042984591e-05, "loss": 0.6406, "step": 12830 }, { "epoch": 0.3746167995095034, "grad_norm": 0.6077448027215578, "learning_rate": 1.3897810218978103e-05, "loss": 0.5645, "step": 12831 }, { "epoch": 0.37464599573735075, "grad_norm": 0.7542534341730112, "learning_rate": 1.3897161394971614e-05, "loss": 0.6445, "step": 12832 }, { "epoch": 0.3746751919651981, "grad_norm": 0.6698955862658081, "learning_rate": 1.3896512570965126e-05, "loss": 0.582, "step": 12833 }, { "epoch": 0.37470438819304547, "grad_norm": 0.631511423137762, "learning_rate": 1.3895863746958638e-05, "loss": 0.6177, "step": 12834 }, { "epoch": 0.37473358442089283, "grad_norm": 0.6548846233016847, "learning_rate": 1.389521492295215e-05, "loss": 0.5996, "step": 12835 }, { "epoch": 0.3747627806487402, "grad_norm": 0.6198699850999084, "learning_rate": 1.3894566098945664e-05, "loss": 0.6023, "step": 12836 }, { "epoch": 0.37479197687658755, "grad_norm": 0.6442877316358162, "learning_rate": 1.3893917274939176e-05, "loss": 0.6612, "step": 12837 }, { "epoch": 0.3748211731044349, "grad_norm": 0.6858227416971271, "learning_rate": 1.3893268450932686e-05, "loss": 0.6393, "step": 12838 }, { "epoch": 0.3748503693322823, "grad_norm": 0.7440209412343541, "learning_rate": 1.3892619626926198e-05, "loss": 0.7031, "step": 12839 }, { "epoch": 0.37487956556012964, "grad_norm": 0.680720070826247, "learning_rate": 1.389197080291971e-05, "loss": 0.6512, "step": 12840 }, { "epoch": 0.374908761787977, "grad_norm": 0.6692749531386227, "learning_rate": 1.3891321978913222e-05, "loss": 0.663, "step": 12841 }, { "epoch": 0.37493795801582436, "grad_norm": 0.7061511419775579, "learning_rate": 1.3890673154906732e-05, "loss": 0.6555, "step": 12842 }, { "epoch": 0.3749671542436717, "grad_norm": 0.7082709111262996, "learning_rate": 1.3890024330900244e-05, "loss": 0.6323, "step": 12843 }, { "epoch": 0.3749963504715191, "grad_norm": 0.6692796791133349, "learning_rate": 1.3889375506893756e-05, "loss": 0.6678, "step": 12844 }, { "epoch": 0.37502554669936644, "grad_norm": 0.6325874446221639, "learning_rate": 1.3888726682887268e-05, "loss": 0.625, "step": 12845 }, { "epoch": 0.3750547429272138, "grad_norm": 0.6429124410161535, "learning_rate": 1.388807785888078e-05, "loss": 0.5966, "step": 12846 }, { "epoch": 0.37508393915506116, "grad_norm": 0.5683842922034299, "learning_rate": 1.388742903487429e-05, "loss": 0.5251, "step": 12847 }, { "epoch": 0.3751131353829085, "grad_norm": 0.6399969216935677, "learning_rate": 1.3886780210867803e-05, "loss": 0.5925, "step": 12848 }, { "epoch": 0.3751423316107559, "grad_norm": 0.689049399938632, "learning_rate": 1.3886131386861315e-05, "loss": 0.6931, "step": 12849 }, { "epoch": 0.37517152783860325, "grad_norm": 0.7082682961043253, "learning_rate": 1.3885482562854827e-05, "loss": 0.6875, "step": 12850 }, { "epoch": 0.3752007240664506, "grad_norm": 0.6339934345197773, "learning_rate": 1.3884833738848337e-05, "loss": 0.6285, "step": 12851 }, { "epoch": 0.37522992029429797, "grad_norm": 0.6541957347778268, "learning_rate": 1.388418491484185e-05, "loss": 0.6559, "step": 12852 }, { "epoch": 0.37525911652214533, "grad_norm": 0.6530147835082254, "learning_rate": 1.3883536090835361e-05, "loss": 0.6018, "step": 12853 }, { "epoch": 0.3752883127499927, "grad_norm": 0.6748618203130892, "learning_rate": 1.3882887266828873e-05, "loss": 0.6155, "step": 12854 }, { "epoch": 0.37531750897784005, "grad_norm": 0.7270629485542899, "learning_rate": 1.3882238442822387e-05, "loss": 0.6129, "step": 12855 }, { "epoch": 0.3753467052056874, "grad_norm": 0.6739552487167333, "learning_rate": 1.3881589618815899e-05, "loss": 0.6613, "step": 12856 }, { "epoch": 0.3753759014335348, "grad_norm": 0.6269626774565829, "learning_rate": 1.388094079480941e-05, "loss": 0.5947, "step": 12857 }, { "epoch": 0.37540509766138214, "grad_norm": 0.6921200726221687, "learning_rate": 1.3880291970802921e-05, "loss": 0.7123, "step": 12858 }, { "epoch": 0.3754342938892295, "grad_norm": 0.6668801103781512, "learning_rate": 1.3879643146796433e-05, "loss": 0.701, "step": 12859 }, { "epoch": 0.37546349011707686, "grad_norm": 0.7444435688795994, "learning_rate": 1.3878994322789945e-05, "loss": 0.7245, "step": 12860 }, { "epoch": 0.3754926863449242, "grad_norm": 0.6270228785145311, "learning_rate": 1.3878345498783456e-05, "loss": 0.6572, "step": 12861 }, { "epoch": 0.3755218825727716, "grad_norm": 0.6600114427288087, "learning_rate": 1.3877696674776968e-05, "loss": 0.6712, "step": 12862 }, { "epoch": 0.37555107880061894, "grad_norm": 0.647023077622445, "learning_rate": 1.387704785077048e-05, "loss": 0.6403, "step": 12863 }, { "epoch": 0.3755802750284663, "grad_norm": 0.5840638078708797, "learning_rate": 1.3876399026763992e-05, "loss": 0.4807, "step": 12864 }, { "epoch": 0.37560947125631367, "grad_norm": 0.6061423080301795, "learning_rate": 1.3875750202757504e-05, "loss": 0.5905, "step": 12865 }, { "epoch": 0.375638667484161, "grad_norm": 0.6649490903807052, "learning_rate": 1.3875101378751014e-05, "loss": 0.6479, "step": 12866 }, { "epoch": 0.3756678637120084, "grad_norm": 0.6426930042366806, "learning_rate": 1.3874452554744526e-05, "loss": 0.63, "step": 12867 }, { "epoch": 0.37569705993985575, "grad_norm": 0.6298918035835653, "learning_rate": 1.3873803730738038e-05, "loss": 0.5861, "step": 12868 }, { "epoch": 0.3757262561677031, "grad_norm": 0.624843471576048, "learning_rate": 1.387315490673155e-05, "loss": 0.6239, "step": 12869 }, { "epoch": 0.37575545239555047, "grad_norm": 0.6361741452464058, "learning_rate": 1.387250608272506e-05, "loss": 0.6176, "step": 12870 }, { "epoch": 0.37578464862339783, "grad_norm": 0.594865243270663, "learning_rate": 1.3871857258718573e-05, "loss": 0.5846, "step": 12871 }, { "epoch": 0.3758138448512452, "grad_norm": 0.664978278803036, "learning_rate": 1.3871208434712085e-05, "loss": 0.6806, "step": 12872 }, { "epoch": 0.37584304107909255, "grad_norm": 0.6620612403754187, "learning_rate": 1.3870559610705597e-05, "loss": 0.6444, "step": 12873 }, { "epoch": 0.3758722373069399, "grad_norm": 0.6517302496022775, "learning_rate": 1.386991078669911e-05, "loss": 0.6354, "step": 12874 }, { "epoch": 0.37590143353478733, "grad_norm": 0.6277811601238472, "learning_rate": 1.3869261962692622e-05, "loss": 0.5932, "step": 12875 }, { "epoch": 0.3759306297626347, "grad_norm": 0.6323928036521647, "learning_rate": 1.3868613138686133e-05, "loss": 0.6639, "step": 12876 }, { "epoch": 0.37595982599048205, "grad_norm": 0.6650623330315266, "learning_rate": 1.3867964314679645e-05, "loss": 0.7061, "step": 12877 }, { "epoch": 0.3759890222183294, "grad_norm": 0.664507953166667, "learning_rate": 1.3867315490673157e-05, "loss": 0.6533, "step": 12878 }, { "epoch": 0.3760182184461768, "grad_norm": 0.6492967571306759, "learning_rate": 1.3866666666666669e-05, "loss": 0.6466, "step": 12879 }, { "epoch": 0.37604741467402414, "grad_norm": 0.704026399575698, "learning_rate": 1.386601784266018e-05, "loss": 0.6971, "step": 12880 }, { "epoch": 0.3760766109018715, "grad_norm": 0.6663640644143634, "learning_rate": 1.3865369018653691e-05, "loss": 0.671, "step": 12881 }, { "epoch": 0.37610580712971886, "grad_norm": 0.6898528170278866, "learning_rate": 1.3864720194647203e-05, "loss": 0.641, "step": 12882 }, { "epoch": 0.3761350033575662, "grad_norm": 0.6165556220953629, "learning_rate": 1.3864071370640715e-05, "loss": 0.51, "step": 12883 }, { "epoch": 0.3761641995854136, "grad_norm": 0.6703438859267197, "learning_rate": 1.3863422546634227e-05, "loss": 0.6823, "step": 12884 }, { "epoch": 0.37619339581326094, "grad_norm": 0.6367060629903862, "learning_rate": 1.3862773722627738e-05, "loss": 0.6046, "step": 12885 }, { "epoch": 0.3762225920411083, "grad_norm": 0.7237243443491423, "learning_rate": 1.386212489862125e-05, "loss": 0.7118, "step": 12886 }, { "epoch": 0.37625178826895567, "grad_norm": 0.5842563777006199, "learning_rate": 1.3861476074614762e-05, "loss": 0.5387, "step": 12887 }, { "epoch": 0.37628098449680303, "grad_norm": 0.6983756196452278, "learning_rate": 1.3860827250608274e-05, "loss": 0.6555, "step": 12888 }, { "epoch": 0.3763101807246504, "grad_norm": 0.6661187459263112, "learning_rate": 1.3860178426601784e-05, "loss": 0.6317, "step": 12889 }, { "epoch": 0.37633937695249775, "grad_norm": 0.6279765750689283, "learning_rate": 1.3859529602595296e-05, "loss": 0.5338, "step": 12890 }, { "epoch": 0.3763685731803451, "grad_norm": 0.6553409372213628, "learning_rate": 1.3858880778588808e-05, "loss": 0.6403, "step": 12891 }, { "epoch": 0.3763977694081925, "grad_norm": 0.6333322452040447, "learning_rate": 1.385823195458232e-05, "loss": 0.5741, "step": 12892 }, { "epoch": 0.37642696563603983, "grad_norm": 0.6463658009852334, "learning_rate": 1.3857583130575834e-05, "loss": 0.6528, "step": 12893 }, { "epoch": 0.3764561618638872, "grad_norm": 0.6875181452374527, "learning_rate": 1.3856934306569346e-05, "loss": 0.6223, "step": 12894 }, { "epoch": 0.37648535809173456, "grad_norm": 0.6736057867714215, "learning_rate": 1.3856285482562856e-05, "loss": 0.6658, "step": 12895 }, { "epoch": 0.3765145543195819, "grad_norm": 0.7077695770553769, "learning_rate": 1.3855636658556368e-05, "loss": 0.6383, "step": 12896 }, { "epoch": 0.3765437505474293, "grad_norm": 0.6721793016419321, "learning_rate": 1.385498783454988e-05, "loss": 0.6311, "step": 12897 }, { "epoch": 0.37657294677527664, "grad_norm": 0.6802483227307258, "learning_rate": 1.3854339010543392e-05, "loss": 0.6836, "step": 12898 }, { "epoch": 0.376602143003124, "grad_norm": 0.6850405786440854, "learning_rate": 1.3853690186536903e-05, "loss": 0.6579, "step": 12899 }, { "epoch": 0.37663133923097136, "grad_norm": 0.6240371614745486, "learning_rate": 1.3853041362530415e-05, "loss": 0.5595, "step": 12900 }, { "epoch": 0.3766605354588187, "grad_norm": 0.6223183099432101, "learning_rate": 1.3852392538523927e-05, "loss": 0.6374, "step": 12901 }, { "epoch": 0.3766897316866661, "grad_norm": 0.6865817026242335, "learning_rate": 1.3851743714517439e-05, "loss": 0.6277, "step": 12902 }, { "epoch": 0.37671892791451345, "grad_norm": 0.6461098758499857, "learning_rate": 1.385109489051095e-05, "loss": 0.5984, "step": 12903 }, { "epoch": 0.3767481241423608, "grad_norm": 0.6180027371528033, "learning_rate": 1.3850446066504461e-05, "loss": 0.5741, "step": 12904 }, { "epoch": 0.37677732037020817, "grad_norm": 0.6280158917077467, "learning_rate": 1.3849797242497973e-05, "loss": 0.6149, "step": 12905 }, { "epoch": 0.37680651659805553, "grad_norm": 0.7254973868659353, "learning_rate": 1.3849148418491485e-05, "loss": 0.6794, "step": 12906 }, { "epoch": 0.3768357128259029, "grad_norm": 0.6248726068388861, "learning_rate": 1.3848499594484997e-05, "loss": 0.6037, "step": 12907 }, { "epoch": 0.37686490905375025, "grad_norm": 0.60996470422138, "learning_rate": 1.3847850770478507e-05, "loss": 0.5928, "step": 12908 }, { "epoch": 0.3768941052815976, "grad_norm": 0.6548227133993891, "learning_rate": 1.384720194647202e-05, "loss": 0.6425, "step": 12909 }, { "epoch": 0.376923301509445, "grad_norm": 0.6242952346749968, "learning_rate": 1.3846553122465532e-05, "loss": 0.5377, "step": 12910 }, { "epoch": 0.37695249773729234, "grad_norm": 0.7021168225744546, "learning_rate": 1.3845904298459044e-05, "loss": 0.7649, "step": 12911 }, { "epoch": 0.3769816939651397, "grad_norm": 0.645114218748757, "learning_rate": 1.3845255474452557e-05, "loss": 0.609, "step": 12912 }, { "epoch": 0.37701089019298706, "grad_norm": 0.7049294344650493, "learning_rate": 1.384460665044607e-05, "loss": 0.7117, "step": 12913 }, { "epoch": 0.3770400864208344, "grad_norm": 0.6441524217678818, "learning_rate": 1.384395782643958e-05, "loss": 0.6271, "step": 12914 }, { "epoch": 0.3770692826486818, "grad_norm": 0.8289359559890588, "learning_rate": 1.3843309002433092e-05, "loss": 0.6818, "step": 12915 }, { "epoch": 0.37709847887652914, "grad_norm": 0.6552706686522631, "learning_rate": 1.3842660178426604e-05, "loss": 0.6256, "step": 12916 }, { "epoch": 0.3771276751043765, "grad_norm": 0.6122350413348352, "learning_rate": 1.3842011354420116e-05, "loss": 0.5632, "step": 12917 }, { "epoch": 0.37715687133222386, "grad_norm": 0.6106751192705279, "learning_rate": 1.3841362530413626e-05, "loss": 0.5848, "step": 12918 }, { "epoch": 0.3771860675600712, "grad_norm": 0.6517084010376505, "learning_rate": 1.3840713706407138e-05, "loss": 0.6529, "step": 12919 }, { "epoch": 0.3772152637879186, "grad_norm": 0.7068409826777542, "learning_rate": 1.384006488240065e-05, "loss": 0.7265, "step": 12920 }, { "epoch": 0.37724446001576595, "grad_norm": 0.6203064415782094, "learning_rate": 1.3839416058394162e-05, "loss": 0.5798, "step": 12921 }, { "epoch": 0.3772736562436133, "grad_norm": 0.6182621049266995, "learning_rate": 1.3838767234387674e-05, "loss": 0.5832, "step": 12922 }, { "epoch": 0.37730285247146067, "grad_norm": 0.6275860256657504, "learning_rate": 1.3838118410381185e-05, "loss": 0.6028, "step": 12923 }, { "epoch": 0.37733204869930803, "grad_norm": 0.7181692922111649, "learning_rate": 1.3837469586374697e-05, "loss": 0.7006, "step": 12924 }, { "epoch": 0.3773612449271554, "grad_norm": 0.6340876063530647, "learning_rate": 1.3836820762368209e-05, "loss": 0.5952, "step": 12925 }, { "epoch": 0.37739044115500275, "grad_norm": 0.6233864321227822, "learning_rate": 1.383617193836172e-05, "loss": 0.5881, "step": 12926 }, { "epoch": 0.3774196373828501, "grad_norm": 0.9702752507296613, "learning_rate": 1.3835523114355231e-05, "loss": 0.7465, "step": 12927 }, { "epoch": 0.3774488336106975, "grad_norm": 0.6613492603730917, "learning_rate": 1.3834874290348743e-05, "loss": 0.6353, "step": 12928 }, { "epoch": 0.37747802983854484, "grad_norm": 0.6785097012887623, "learning_rate": 1.3834225466342255e-05, "loss": 0.6141, "step": 12929 }, { "epoch": 0.3775072260663922, "grad_norm": 0.620522521627654, "learning_rate": 1.3833576642335767e-05, "loss": 0.6153, "step": 12930 }, { "epoch": 0.37753642229423956, "grad_norm": 0.6853474072250851, "learning_rate": 1.383292781832928e-05, "loss": 0.6971, "step": 12931 }, { "epoch": 0.3775656185220869, "grad_norm": 0.6347042578568325, "learning_rate": 1.3832278994322793e-05, "loss": 0.5733, "step": 12932 }, { "epoch": 0.3775948147499343, "grad_norm": 0.650662199343292, "learning_rate": 1.3831630170316303e-05, "loss": 0.64, "step": 12933 }, { "epoch": 0.37762401097778164, "grad_norm": 0.6021054143015838, "learning_rate": 1.3830981346309815e-05, "loss": 0.5999, "step": 12934 }, { "epoch": 0.37765320720562906, "grad_norm": 0.5891412906506972, "learning_rate": 1.3830332522303327e-05, "loss": 0.5271, "step": 12935 }, { "epoch": 0.3776824034334764, "grad_norm": 0.6514414511442834, "learning_rate": 1.382968369829684e-05, "loss": 0.5922, "step": 12936 }, { "epoch": 0.3777115996613238, "grad_norm": 0.5957749275626589, "learning_rate": 1.382903487429035e-05, "loss": 0.543, "step": 12937 }, { "epoch": 0.37774079588917114, "grad_norm": 0.6208367089879048, "learning_rate": 1.3828386050283862e-05, "loss": 0.5274, "step": 12938 }, { "epoch": 0.3777699921170185, "grad_norm": 0.6333063127912013, "learning_rate": 1.3827737226277374e-05, "loss": 0.6327, "step": 12939 }, { "epoch": 0.37779918834486587, "grad_norm": 0.6750995928482549, "learning_rate": 1.3827088402270886e-05, "loss": 0.697, "step": 12940 }, { "epoch": 0.3778283845727132, "grad_norm": 0.6943538969088636, "learning_rate": 1.3826439578264396e-05, "loss": 0.6392, "step": 12941 }, { "epoch": 0.3778575808005606, "grad_norm": 0.6427858981760198, "learning_rate": 1.3825790754257908e-05, "loss": 0.607, "step": 12942 }, { "epoch": 0.37788677702840795, "grad_norm": 0.7042528185146727, "learning_rate": 1.382514193025142e-05, "loss": 0.6951, "step": 12943 }, { "epoch": 0.3779159732562553, "grad_norm": 0.6465823623792254, "learning_rate": 1.3824493106244932e-05, "loss": 0.6124, "step": 12944 }, { "epoch": 0.37794516948410267, "grad_norm": 0.6361562272487595, "learning_rate": 1.3823844282238444e-05, "loss": 0.637, "step": 12945 }, { "epoch": 0.37797436571195003, "grad_norm": 0.7528238224576732, "learning_rate": 1.3823195458231954e-05, "loss": 0.7363, "step": 12946 }, { "epoch": 0.3780035619397974, "grad_norm": 0.6907528837634751, "learning_rate": 1.3822546634225466e-05, "loss": 0.6332, "step": 12947 }, { "epoch": 0.37803275816764476, "grad_norm": 0.6166963639817026, "learning_rate": 1.3821897810218978e-05, "loss": 0.5521, "step": 12948 }, { "epoch": 0.3780619543954921, "grad_norm": 0.6729593131485309, "learning_rate": 1.382124898621249e-05, "loss": 0.6426, "step": 12949 }, { "epoch": 0.3780911506233395, "grad_norm": 0.6996229389719878, "learning_rate": 1.3820600162206004e-05, "loss": 0.7325, "step": 12950 }, { "epoch": 0.37812034685118684, "grad_norm": 0.765851617834309, "learning_rate": 1.3819951338199516e-05, "loss": 0.7266, "step": 12951 }, { "epoch": 0.3781495430790342, "grad_norm": 0.6759134819294484, "learning_rate": 1.3819302514193027e-05, "loss": 0.6328, "step": 12952 }, { "epoch": 0.37817873930688156, "grad_norm": 0.6231063387128412, "learning_rate": 1.3818653690186539e-05, "loss": 0.6056, "step": 12953 }, { "epoch": 0.3782079355347289, "grad_norm": 0.7113866148990432, "learning_rate": 1.381800486618005e-05, "loss": 0.6678, "step": 12954 }, { "epoch": 0.3782371317625763, "grad_norm": 0.6767609697323489, "learning_rate": 1.3817356042173563e-05, "loss": 0.6745, "step": 12955 }, { "epoch": 0.37826632799042365, "grad_norm": 0.6051478118555719, "learning_rate": 1.3816707218167073e-05, "loss": 0.5776, "step": 12956 }, { "epoch": 0.378295524218271, "grad_norm": 0.7040079995516026, "learning_rate": 1.3816058394160585e-05, "loss": 0.7316, "step": 12957 }, { "epoch": 0.37832472044611837, "grad_norm": 0.6829403195458561, "learning_rate": 1.3815409570154097e-05, "loss": 0.6697, "step": 12958 }, { "epoch": 0.37835391667396573, "grad_norm": 0.6699698921162377, "learning_rate": 1.3814760746147609e-05, "loss": 0.6669, "step": 12959 }, { "epoch": 0.3783831129018131, "grad_norm": 0.7128747771565224, "learning_rate": 1.381411192214112e-05, "loss": 0.6748, "step": 12960 }, { "epoch": 0.37841230912966045, "grad_norm": 1.144180865207713, "learning_rate": 1.3813463098134631e-05, "loss": 0.6019, "step": 12961 }, { "epoch": 0.3784415053575078, "grad_norm": 0.6583219451548248, "learning_rate": 1.3812814274128143e-05, "loss": 0.692, "step": 12962 }, { "epoch": 0.3784707015853552, "grad_norm": 0.6657027974282358, "learning_rate": 1.3812165450121655e-05, "loss": 0.6403, "step": 12963 }, { "epoch": 0.37849989781320253, "grad_norm": 0.6265140960416584, "learning_rate": 1.3811516626115167e-05, "loss": 0.5741, "step": 12964 }, { "epoch": 0.3785290940410499, "grad_norm": 0.6464087193993012, "learning_rate": 1.3810867802108678e-05, "loss": 0.6479, "step": 12965 }, { "epoch": 0.37855829026889726, "grad_norm": 0.5899328084233489, "learning_rate": 1.381021897810219e-05, "loss": 0.5537, "step": 12966 }, { "epoch": 0.3785874864967446, "grad_norm": 0.694168080597095, "learning_rate": 1.3809570154095702e-05, "loss": 0.6691, "step": 12967 }, { "epoch": 0.378616682724592, "grad_norm": 0.6634638564840589, "learning_rate": 1.3808921330089216e-05, "loss": 0.6302, "step": 12968 }, { "epoch": 0.37864587895243934, "grad_norm": 0.6289945741473358, "learning_rate": 1.3808272506082728e-05, "loss": 0.6029, "step": 12969 }, { "epoch": 0.3786750751802867, "grad_norm": 0.6374687595632057, "learning_rate": 1.380762368207624e-05, "loss": 0.6286, "step": 12970 }, { "epoch": 0.37870427140813406, "grad_norm": 0.6603291604314998, "learning_rate": 1.380697485806975e-05, "loss": 0.6257, "step": 12971 }, { "epoch": 0.3787334676359814, "grad_norm": 0.683756368808917, "learning_rate": 1.3806326034063262e-05, "loss": 0.6984, "step": 12972 }, { "epoch": 0.3787626638638288, "grad_norm": 0.6678295392050608, "learning_rate": 1.3805677210056774e-05, "loss": 0.6164, "step": 12973 }, { "epoch": 0.37879186009167615, "grad_norm": 0.6339893282850801, "learning_rate": 1.3805028386050286e-05, "loss": 0.6019, "step": 12974 }, { "epoch": 0.3788210563195235, "grad_norm": 0.6589376519867937, "learning_rate": 1.3804379562043796e-05, "loss": 0.6632, "step": 12975 }, { "epoch": 0.37885025254737087, "grad_norm": 0.6279057475474344, "learning_rate": 1.3803730738037308e-05, "loss": 0.6188, "step": 12976 }, { "epoch": 0.37887944877521823, "grad_norm": 0.6240996974516261, "learning_rate": 1.380308191403082e-05, "loss": 0.6075, "step": 12977 }, { "epoch": 0.3789086450030656, "grad_norm": 0.6446335769809445, "learning_rate": 1.3802433090024332e-05, "loss": 0.6622, "step": 12978 }, { "epoch": 0.37893784123091295, "grad_norm": 0.6405752324760493, "learning_rate": 1.3801784266017843e-05, "loss": 0.6178, "step": 12979 }, { "epoch": 0.3789670374587603, "grad_norm": 0.6716614572558522, "learning_rate": 1.3801135442011355e-05, "loss": 0.6379, "step": 12980 }, { "epoch": 0.3789962336866077, "grad_norm": 0.6912657215374359, "learning_rate": 1.3800486618004867e-05, "loss": 0.6577, "step": 12981 }, { "epoch": 0.37902542991445504, "grad_norm": 0.7282506257303777, "learning_rate": 1.3799837793998379e-05, "loss": 0.7244, "step": 12982 }, { "epoch": 0.3790546261423024, "grad_norm": 0.6292135025850392, "learning_rate": 1.3799188969991891e-05, "loss": 0.5952, "step": 12983 }, { "epoch": 0.37908382237014976, "grad_norm": 0.6607297602335604, "learning_rate": 1.3798540145985401e-05, "loss": 0.6613, "step": 12984 }, { "epoch": 0.3791130185979971, "grad_norm": 0.7567523652702219, "learning_rate": 1.3797891321978913e-05, "loss": 0.7107, "step": 12985 }, { "epoch": 0.3791422148258445, "grad_norm": 0.66944743364057, "learning_rate": 1.3797242497972425e-05, "loss": 0.6566, "step": 12986 }, { "epoch": 0.37917141105369184, "grad_norm": 0.6939793752912474, "learning_rate": 1.3796593673965939e-05, "loss": 0.7248, "step": 12987 }, { "epoch": 0.3792006072815392, "grad_norm": 0.71860627648916, "learning_rate": 1.3795944849959451e-05, "loss": 0.6982, "step": 12988 }, { "epoch": 0.37922980350938656, "grad_norm": 0.654350834496237, "learning_rate": 1.3795296025952963e-05, "loss": 0.5976, "step": 12989 }, { "epoch": 0.3792589997372339, "grad_norm": 0.5908926004544407, "learning_rate": 1.3794647201946473e-05, "loss": 0.5833, "step": 12990 }, { "epoch": 0.3792881959650813, "grad_norm": 0.6978003230535265, "learning_rate": 1.3793998377939985e-05, "loss": 0.6645, "step": 12991 }, { "epoch": 0.37931739219292865, "grad_norm": 0.6269499456246104, "learning_rate": 1.3793349553933497e-05, "loss": 0.5539, "step": 12992 }, { "epoch": 0.379346588420776, "grad_norm": 0.6542895017232488, "learning_rate": 1.379270072992701e-05, "loss": 0.6378, "step": 12993 }, { "epoch": 0.37937578464862337, "grad_norm": 0.6881052554384649, "learning_rate": 1.379205190592052e-05, "loss": 0.6669, "step": 12994 }, { "epoch": 0.3794049808764708, "grad_norm": 0.6482008170579421, "learning_rate": 1.3791403081914032e-05, "loss": 0.6532, "step": 12995 }, { "epoch": 0.37943417710431815, "grad_norm": 0.6771526132258943, "learning_rate": 1.3790754257907544e-05, "loss": 0.6521, "step": 12996 }, { "epoch": 0.3794633733321655, "grad_norm": 0.585438924284632, "learning_rate": 1.3790105433901056e-05, "loss": 0.533, "step": 12997 }, { "epoch": 0.37949256956001287, "grad_norm": 0.6586187664477217, "learning_rate": 1.3789456609894566e-05, "loss": 0.6147, "step": 12998 }, { "epoch": 0.37952176578786023, "grad_norm": 0.701541171290669, "learning_rate": 1.3788807785888078e-05, "loss": 0.7315, "step": 12999 }, { "epoch": 0.3795509620157076, "grad_norm": 0.6706226020653305, "learning_rate": 1.378815896188159e-05, "loss": 0.693, "step": 13000 }, { "epoch": 0.37958015824355495, "grad_norm": 0.6322866831980352, "learning_rate": 1.3787510137875102e-05, "loss": 0.5771, "step": 13001 }, { "epoch": 0.3796093544714023, "grad_norm": 0.7312860004143639, "learning_rate": 1.3786861313868614e-05, "loss": 0.6694, "step": 13002 }, { "epoch": 0.3796385506992497, "grad_norm": 0.7064681809502907, "learning_rate": 1.3786212489862125e-05, "loss": 0.6768, "step": 13003 }, { "epoch": 0.37966774692709704, "grad_norm": 0.6324402689828105, "learning_rate": 1.3785563665855637e-05, "loss": 0.5792, "step": 13004 }, { "epoch": 0.3796969431549444, "grad_norm": 0.7230560664223812, "learning_rate": 1.3784914841849149e-05, "loss": 0.6777, "step": 13005 }, { "epoch": 0.37972613938279176, "grad_norm": 0.6870233837674549, "learning_rate": 1.3784266017842662e-05, "loss": 0.6597, "step": 13006 }, { "epoch": 0.3797553356106391, "grad_norm": 0.6913502073660835, "learning_rate": 1.3783617193836174e-05, "loss": 0.7125, "step": 13007 }, { "epoch": 0.3797845318384865, "grad_norm": 0.7068210706809686, "learning_rate": 1.3782968369829686e-05, "loss": 0.6648, "step": 13008 }, { "epoch": 0.37981372806633384, "grad_norm": 0.6116650141426825, "learning_rate": 1.3782319545823197e-05, "loss": 0.5439, "step": 13009 }, { "epoch": 0.3798429242941812, "grad_norm": 0.6043734116454894, "learning_rate": 1.3781670721816709e-05, "loss": 0.553, "step": 13010 }, { "epoch": 0.37987212052202857, "grad_norm": 0.6237134428650086, "learning_rate": 1.3781021897810221e-05, "loss": 0.5815, "step": 13011 }, { "epoch": 0.37990131674987593, "grad_norm": 0.6473723893340144, "learning_rate": 1.3780373073803733e-05, "loss": 0.6257, "step": 13012 }, { "epoch": 0.3799305129777233, "grad_norm": 0.6631601770913912, "learning_rate": 1.3779724249797243e-05, "loss": 0.6387, "step": 13013 }, { "epoch": 0.37995970920557065, "grad_norm": 0.8754086187932956, "learning_rate": 1.3779075425790755e-05, "loss": 0.749, "step": 13014 }, { "epoch": 0.379988905433418, "grad_norm": 0.6841858684569438, "learning_rate": 1.3778426601784267e-05, "loss": 0.6116, "step": 13015 }, { "epoch": 0.3800181016612654, "grad_norm": 0.6418290761520591, "learning_rate": 1.377777777777778e-05, "loss": 0.607, "step": 13016 }, { "epoch": 0.38004729788911273, "grad_norm": 0.6329715952993815, "learning_rate": 1.377712895377129e-05, "loss": 0.5821, "step": 13017 }, { "epoch": 0.3800764941169601, "grad_norm": 0.6680421632204747, "learning_rate": 1.3776480129764802e-05, "loss": 0.6524, "step": 13018 }, { "epoch": 0.38010569034480746, "grad_norm": 0.735290203382599, "learning_rate": 1.3775831305758314e-05, "loss": 0.6988, "step": 13019 }, { "epoch": 0.3801348865726548, "grad_norm": 0.6962408481623337, "learning_rate": 1.3775182481751826e-05, "loss": 0.6422, "step": 13020 }, { "epoch": 0.3801640828005022, "grad_norm": 0.6190247663635194, "learning_rate": 1.3774533657745338e-05, "loss": 0.6169, "step": 13021 }, { "epoch": 0.38019327902834954, "grad_norm": 0.7002733669907424, "learning_rate": 1.3773884833738848e-05, "loss": 0.7171, "step": 13022 }, { "epoch": 0.3802224752561969, "grad_norm": 0.669654446152345, "learning_rate": 1.377323600973236e-05, "loss": 0.7047, "step": 13023 }, { "epoch": 0.38025167148404426, "grad_norm": 0.6064820034051593, "learning_rate": 1.3772587185725872e-05, "loss": 0.5867, "step": 13024 }, { "epoch": 0.3802808677118916, "grad_norm": 0.64127140415644, "learning_rate": 1.3771938361719386e-05, "loss": 0.6493, "step": 13025 }, { "epoch": 0.380310063939739, "grad_norm": 0.6580311571528105, "learning_rate": 1.3771289537712898e-05, "loss": 0.6629, "step": 13026 }, { "epoch": 0.38033926016758635, "grad_norm": 0.6454186670450384, "learning_rate": 1.377064071370641e-05, "loss": 0.638, "step": 13027 }, { "epoch": 0.3803684563954337, "grad_norm": 0.6735451121542255, "learning_rate": 1.376999188969992e-05, "loss": 0.6363, "step": 13028 }, { "epoch": 0.38039765262328107, "grad_norm": 0.657189800692856, "learning_rate": 1.3769343065693432e-05, "loss": 0.6262, "step": 13029 }, { "epoch": 0.38042684885112843, "grad_norm": 0.6167135067095414, "learning_rate": 1.3768694241686944e-05, "loss": 0.5664, "step": 13030 }, { "epoch": 0.3804560450789758, "grad_norm": 0.6169473658020923, "learning_rate": 1.3768045417680456e-05, "loss": 0.6325, "step": 13031 }, { "epoch": 0.38048524130682315, "grad_norm": 0.6607664688136438, "learning_rate": 1.3767396593673967e-05, "loss": 0.6064, "step": 13032 }, { "epoch": 0.3805144375346705, "grad_norm": 0.6501966633188181, "learning_rate": 1.3766747769667479e-05, "loss": 0.5996, "step": 13033 }, { "epoch": 0.3805436337625179, "grad_norm": 0.7233814570746626, "learning_rate": 1.376609894566099e-05, "loss": 0.7125, "step": 13034 }, { "epoch": 0.38057282999036524, "grad_norm": 0.6867852972338663, "learning_rate": 1.3765450121654503e-05, "loss": 0.6232, "step": 13035 }, { "epoch": 0.3806020262182126, "grad_norm": 0.6282464140958971, "learning_rate": 1.3764801297648013e-05, "loss": 0.6239, "step": 13036 }, { "epoch": 0.38063122244605996, "grad_norm": 0.6407889234658437, "learning_rate": 1.3764152473641525e-05, "loss": 0.6312, "step": 13037 }, { "epoch": 0.3806604186739073, "grad_norm": 0.6677991612810019, "learning_rate": 1.3763503649635037e-05, "loss": 0.6667, "step": 13038 }, { "epoch": 0.3806896149017547, "grad_norm": 0.5933222554418334, "learning_rate": 1.376285482562855e-05, "loss": 0.5512, "step": 13039 }, { "epoch": 0.38071881112960204, "grad_norm": 0.6298205559053206, "learning_rate": 1.3762206001622061e-05, "loss": 0.6126, "step": 13040 }, { "epoch": 0.3807480073574494, "grad_norm": 0.6429539417382759, "learning_rate": 1.3761557177615572e-05, "loss": 0.6147, "step": 13041 }, { "epoch": 0.38077720358529676, "grad_norm": 0.667775247228417, "learning_rate": 1.3760908353609084e-05, "loss": 0.6565, "step": 13042 }, { "epoch": 0.3808063998131441, "grad_norm": 0.6948157728757065, "learning_rate": 1.3760259529602596e-05, "loss": 0.6792, "step": 13043 }, { "epoch": 0.3808355960409915, "grad_norm": 0.7619511329605234, "learning_rate": 1.375961070559611e-05, "loss": 0.7192, "step": 13044 }, { "epoch": 0.38086479226883885, "grad_norm": 0.70783771983581, "learning_rate": 1.3758961881589621e-05, "loss": 0.7004, "step": 13045 }, { "epoch": 0.3808939884966862, "grad_norm": 0.6677124678738035, "learning_rate": 1.3758313057583133e-05, "loss": 0.6586, "step": 13046 }, { "epoch": 0.38092318472453357, "grad_norm": 0.666391695678474, "learning_rate": 1.3757664233576644e-05, "loss": 0.6732, "step": 13047 }, { "epoch": 0.38095238095238093, "grad_norm": 0.6618480459427475, "learning_rate": 1.3757015409570156e-05, "loss": 0.6263, "step": 13048 }, { "epoch": 0.3809815771802283, "grad_norm": 0.6137117890390988, "learning_rate": 1.3756366585563668e-05, "loss": 0.5484, "step": 13049 }, { "epoch": 0.38101077340807565, "grad_norm": 0.6677459292648998, "learning_rate": 1.375571776155718e-05, "loss": 0.6743, "step": 13050 }, { "epoch": 0.381039969635923, "grad_norm": 0.611089198985684, "learning_rate": 1.375506893755069e-05, "loss": 0.5399, "step": 13051 }, { "epoch": 0.3810691658637704, "grad_norm": 0.651386699912385, "learning_rate": 1.3754420113544202e-05, "loss": 0.6113, "step": 13052 }, { "epoch": 0.38109836209161774, "grad_norm": 0.6811006032483412, "learning_rate": 1.3753771289537714e-05, "loss": 0.677, "step": 13053 }, { "epoch": 0.3811275583194651, "grad_norm": 0.7152472531561332, "learning_rate": 1.3753122465531226e-05, "loss": 0.6629, "step": 13054 }, { "epoch": 0.38115675454731246, "grad_norm": 0.6983255738599946, "learning_rate": 1.3752473641524737e-05, "loss": 0.6154, "step": 13055 }, { "epoch": 0.3811859507751599, "grad_norm": 0.7434821001345385, "learning_rate": 1.3751824817518249e-05, "loss": 0.6329, "step": 13056 }, { "epoch": 0.38121514700300724, "grad_norm": 0.6211544784145026, "learning_rate": 1.375117599351176e-05, "loss": 0.5829, "step": 13057 }, { "epoch": 0.3812443432308546, "grad_norm": 0.6537682029154558, "learning_rate": 1.3750527169505273e-05, "loss": 0.6195, "step": 13058 }, { "epoch": 0.38127353945870196, "grad_norm": 0.6614065557271867, "learning_rate": 1.3749878345498785e-05, "loss": 0.6413, "step": 13059 }, { "epoch": 0.3813027356865493, "grad_norm": 0.7175654150395475, "learning_rate": 1.3749229521492295e-05, "loss": 0.6849, "step": 13060 }, { "epoch": 0.3813319319143967, "grad_norm": 0.7294742773580858, "learning_rate": 1.3748580697485807e-05, "loss": 0.6192, "step": 13061 }, { "epoch": 0.38136112814224404, "grad_norm": 0.7040982621427448, "learning_rate": 1.3747931873479319e-05, "loss": 0.6539, "step": 13062 }, { "epoch": 0.3813903243700914, "grad_norm": 0.6093294435434166, "learning_rate": 1.3747283049472833e-05, "loss": 0.5514, "step": 13063 }, { "epoch": 0.38141952059793877, "grad_norm": 0.6945718214671762, "learning_rate": 1.3746634225466345e-05, "loss": 0.6919, "step": 13064 }, { "epoch": 0.3814487168257861, "grad_norm": 0.6772026531374089, "learning_rate": 1.3745985401459855e-05, "loss": 0.6734, "step": 13065 }, { "epoch": 0.3814779130536335, "grad_norm": 0.6517690066246034, "learning_rate": 1.3745336577453367e-05, "loss": 0.5938, "step": 13066 }, { "epoch": 0.38150710928148085, "grad_norm": 0.679484153526949, "learning_rate": 1.374468775344688e-05, "loss": 0.6305, "step": 13067 }, { "epoch": 0.3815363055093282, "grad_norm": 0.6609616417908222, "learning_rate": 1.3744038929440391e-05, "loss": 0.6522, "step": 13068 }, { "epoch": 0.38156550173717557, "grad_norm": 0.642258055139843, "learning_rate": 1.3743390105433903e-05, "loss": 0.6358, "step": 13069 }, { "epoch": 0.38159469796502293, "grad_norm": 0.6442743428485996, "learning_rate": 1.3742741281427414e-05, "loss": 0.6204, "step": 13070 }, { "epoch": 0.3816238941928703, "grad_norm": 0.6227646961354436, "learning_rate": 1.3742092457420926e-05, "loss": 0.575, "step": 13071 }, { "epoch": 0.38165309042071766, "grad_norm": 0.726883771976292, "learning_rate": 1.3741443633414438e-05, "loss": 0.6672, "step": 13072 }, { "epoch": 0.381682286648565, "grad_norm": 0.6518162235650404, "learning_rate": 1.374079480940795e-05, "loss": 0.6658, "step": 13073 }, { "epoch": 0.3817114828764124, "grad_norm": 0.6135071114309546, "learning_rate": 1.374014598540146e-05, "loss": 0.5807, "step": 13074 }, { "epoch": 0.38174067910425974, "grad_norm": 0.6349575313063134, "learning_rate": 1.3739497161394972e-05, "loss": 0.5654, "step": 13075 }, { "epoch": 0.3817698753321071, "grad_norm": 0.6394893863251836, "learning_rate": 1.3738848337388484e-05, "loss": 0.6503, "step": 13076 }, { "epoch": 0.38179907155995446, "grad_norm": 0.6320036209531676, "learning_rate": 1.3738199513381996e-05, "loss": 0.6029, "step": 13077 }, { "epoch": 0.3818282677878018, "grad_norm": 0.659084007406508, "learning_rate": 1.3737550689375508e-05, "loss": 0.6474, "step": 13078 }, { "epoch": 0.3818574640156492, "grad_norm": 0.6843674451182873, "learning_rate": 1.3736901865369018e-05, "loss": 0.6239, "step": 13079 }, { "epoch": 0.38188666024349655, "grad_norm": 0.7158581623842986, "learning_rate": 1.373625304136253e-05, "loss": 0.7302, "step": 13080 }, { "epoch": 0.3819158564713439, "grad_norm": 0.5768849172951351, "learning_rate": 1.3735604217356042e-05, "loss": 0.4957, "step": 13081 }, { "epoch": 0.38194505269919127, "grad_norm": 0.6319291587080467, "learning_rate": 1.3734955393349556e-05, "loss": 0.5752, "step": 13082 }, { "epoch": 0.38197424892703863, "grad_norm": 0.6219424066394327, "learning_rate": 1.3734306569343068e-05, "loss": 0.6018, "step": 13083 }, { "epoch": 0.382003445154886, "grad_norm": 0.6212276591737161, "learning_rate": 1.3733657745336579e-05, "loss": 0.5726, "step": 13084 }, { "epoch": 0.38203264138273335, "grad_norm": 0.6683325649587748, "learning_rate": 1.373300892133009e-05, "loss": 0.6116, "step": 13085 }, { "epoch": 0.3820618376105807, "grad_norm": 0.7301925973509393, "learning_rate": 1.3732360097323603e-05, "loss": 0.7441, "step": 13086 }, { "epoch": 0.3820910338384281, "grad_norm": 0.6515553820572192, "learning_rate": 1.3731711273317115e-05, "loss": 0.5867, "step": 13087 }, { "epoch": 0.38212023006627543, "grad_norm": 0.6542509542433323, "learning_rate": 1.3731062449310627e-05, "loss": 0.651, "step": 13088 }, { "epoch": 0.3821494262941228, "grad_norm": 0.6672043190127647, "learning_rate": 1.3730413625304137e-05, "loss": 0.6423, "step": 13089 }, { "epoch": 0.38217862252197016, "grad_norm": 0.6477845923012725, "learning_rate": 1.3729764801297649e-05, "loss": 0.6182, "step": 13090 }, { "epoch": 0.3822078187498175, "grad_norm": 0.6319135912708843, "learning_rate": 1.3729115977291161e-05, "loss": 0.616, "step": 13091 }, { "epoch": 0.3822370149776649, "grad_norm": 0.6772345045442341, "learning_rate": 1.3728467153284673e-05, "loss": 0.6813, "step": 13092 }, { "epoch": 0.38226621120551224, "grad_norm": 0.6390746610253641, "learning_rate": 1.3727818329278183e-05, "loss": 0.6635, "step": 13093 }, { "epoch": 0.3822954074333596, "grad_norm": 0.7475772975721771, "learning_rate": 1.3727169505271695e-05, "loss": 0.7283, "step": 13094 }, { "epoch": 0.38232460366120696, "grad_norm": 0.6413429916567653, "learning_rate": 1.3726520681265207e-05, "loss": 0.6469, "step": 13095 }, { "epoch": 0.3823537998890543, "grad_norm": 0.6447896312921686, "learning_rate": 1.372587185725872e-05, "loss": 0.5949, "step": 13096 }, { "epoch": 0.3823829961169017, "grad_norm": 0.6554039629647226, "learning_rate": 1.3725223033252231e-05, "loss": 0.628, "step": 13097 }, { "epoch": 0.38241219234474905, "grad_norm": 0.5867533530322849, "learning_rate": 1.3724574209245742e-05, "loss": 0.5561, "step": 13098 }, { "epoch": 0.3824413885725964, "grad_norm": 0.6571273468124018, "learning_rate": 1.3723925385239254e-05, "loss": 0.6713, "step": 13099 }, { "epoch": 0.38247058480044377, "grad_norm": 0.5924259475077629, "learning_rate": 1.3723276561232766e-05, "loss": 0.5482, "step": 13100 }, { "epoch": 0.38249978102829113, "grad_norm": 0.5614816442057912, "learning_rate": 1.372262773722628e-05, "loss": 0.4944, "step": 13101 }, { "epoch": 0.3825289772561385, "grad_norm": 0.626642098223491, "learning_rate": 1.3721978913219792e-05, "loss": 0.5785, "step": 13102 }, { "epoch": 0.38255817348398585, "grad_norm": 0.636881365097867, "learning_rate": 1.3721330089213302e-05, "loss": 0.562, "step": 13103 }, { "epoch": 0.3825873697118332, "grad_norm": 0.714328253645375, "learning_rate": 1.3720681265206814e-05, "loss": 0.7046, "step": 13104 }, { "epoch": 0.3826165659396806, "grad_norm": 0.6468245152326023, "learning_rate": 1.3720032441200326e-05, "loss": 0.6607, "step": 13105 }, { "epoch": 0.38264576216752794, "grad_norm": 0.6264946903266642, "learning_rate": 1.3719383617193838e-05, "loss": 0.551, "step": 13106 }, { "epoch": 0.3826749583953753, "grad_norm": 0.661630459714693, "learning_rate": 1.371873479318735e-05, "loss": 0.6997, "step": 13107 }, { "epoch": 0.38270415462322266, "grad_norm": 0.6269470179484502, "learning_rate": 1.371808596918086e-05, "loss": 0.6408, "step": 13108 }, { "epoch": 0.38273335085107, "grad_norm": 0.6437268767464094, "learning_rate": 1.3717437145174372e-05, "loss": 0.6443, "step": 13109 }, { "epoch": 0.3827625470789174, "grad_norm": 0.5928925769509968, "learning_rate": 1.3716788321167884e-05, "loss": 0.5595, "step": 13110 }, { "epoch": 0.38279174330676474, "grad_norm": 0.5253821436369124, "learning_rate": 1.3716139497161396e-05, "loss": 0.4455, "step": 13111 }, { "epoch": 0.3828209395346121, "grad_norm": 0.6620434491956162, "learning_rate": 1.3715490673154907e-05, "loss": 0.6444, "step": 13112 }, { "epoch": 0.38285013576245946, "grad_norm": 0.662569266327521, "learning_rate": 1.3714841849148419e-05, "loss": 0.6217, "step": 13113 }, { "epoch": 0.3828793319903068, "grad_norm": 0.6684782286516147, "learning_rate": 1.3714193025141931e-05, "loss": 0.6426, "step": 13114 }, { "epoch": 0.3829085282181542, "grad_norm": 0.7239581337388332, "learning_rate": 1.3713544201135443e-05, "loss": 0.7705, "step": 13115 }, { "epoch": 0.3829377244460016, "grad_norm": 0.6515676390385945, "learning_rate": 1.3712895377128955e-05, "loss": 0.659, "step": 13116 }, { "epoch": 0.38296692067384897, "grad_norm": 0.6314512999665395, "learning_rate": 1.3712246553122465e-05, "loss": 0.606, "step": 13117 }, { "epoch": 0.3829961169016963, "grad_norm": 0.6271229018302565, "learning_rate": 1.3711597729115977e-05, "loss": 0.618, "step": 13118 }, { "epoch": 0.3830253131295437, "grad_norm": 0.5920683555260272, "learning_rate": 1.3710948905109491e-05, "loss": 0.5332, "step": 13119 }, { "epoch": 0.38305450935739105, "grad_norm": 0.6351605363279272, "learning_rate": 1.3710300081103003e-05, "loss": 0.5689, "step": 13120 }, { "epoch": 0.3830837055852384, "grad_norm": 0.6471920517329653, "learning_rate": 1.3709651257096515e-05, "loss": 0.6284, "step": 13121 }, { "epoch": 0.38311290181308577, "grad_norm": 0.6790095368809287, "learning_rate": 1.3709002433090025e-05, "loss": 0.6583, "step": 13122 }, { "epoch": 0.38314209804093313, "grad_norm": 0.6682355617300623, "learning_rate": 1.3708353609083537e-05, "loss": 0.6809, "step": 13123 }, { "epoch": 0.3831712942687805, "grad_norm": 0.6866008791991169, "learning_rate": 1.370770478507705e-05, "loss": 0.673, "step": 13124 }, { "epoch": 0.38320049049662785, "grad_norm": 0.660995086448702, "learning_rate": 1.3707055961070561e-05, "loss": 0.6619, "step": 13125 }, { "epoch": 0.3832296867244752, "grad_norm": 0.6718692496459325, "learning_rate": 1.3706407137064074e-05, "loss": 0.6671, "step": 13126 }, { "epoch": 0.3832588829523226, "grad_norm": 0.6454560067820568, "learning_rate": 1.3705758313057584e-05, "loss": 0.6167, "step": 13127 }, { "epoch": 0.38328807918016994, "grad_norm": 0.6284242921843426, "learning_rate": 1.3705109489051096e-05, "loss": 0.5757, "step": 13128 }, { "epoch": 0.3833172754080173, "grad_norm": 0.7150037667102912, "learning_rate": 1.3704460665044608e-05, "loss": 0.7184, "step": 13129 }, { "epoch": 0.38334647163586466, "grad_norm": 0.6564730560645502, "learning_rate": 1.370381184103812e-05, "loss": 0.6396, "step": 13130 }, { "epoch": 0.383375667863712, "grad_norm": 0.6349802616769895, "learning_rate": 1.370316301703163e-05, "loss": 0.581, "step": 13131 }, { "epoch": 0.3834048640915594, "grad_norm": 0.641108063323409, "learning_rate": 1.3702514193025142e-05, "loss": 0.604, "step": 13132 }, { "epoch": 0.38343406031940674, "grad_norm": 0.6609931967564211, "learning_rate": 1.3701865369018654e-05, "loss": 0.5734, "step": 13133 }, { "epoch": 0.3834632565472541, "grad_norm": 0.6074158996343646, "learning_rate": 1.3701216545012166e-05, "loss": 0.5726, "step": 13134 }, { "epoch": 0.38349245277510147, "grad_norm": 0.6908855546502052, "learning_rate": 1.3700567721005678e-05, "loss": 0.7127, "step": 13135 }, { "epoch": 0.38352164900294883, "grad_norm": 0.6173580407484979, "learning_rate": 1.3699918896999189e-05, "loss": 0.5566, "step": 13136 }, { "epoch": 0.3835508452307962, "grad_norm": 0.6669354032569099, "learning_rate": 1.36992700729927e-05, "loss": 0.6553, "step": 13137 }, { "epoch": 0.38358004145864355, "grad_norm": 0.6688994617383938, "learning_rate": 1.3698621248986214e-05, "loss": 0.6585, "step": 13138 }, { "epoch": 0.3836092376864909, "grad_norm": 0.6258558076922152, "learning_rate": 1.3697972424979726e-05, "loss": 0.5839, "step": 13139 }, { "epoch": 0.3836384339143383, "grad_norm": 0.6433289415576696, "learning_rate": 1.3697323600973239e-05, "loss": 0.6306, "step": 13140 }, { "epoch": 0.38366763014218563, "grad_norm": 0.618837008354985, "learning_rate": 1.3696674776966749e-05, "loss": 0.5869, "step": 13141 }, { "epoch": 0.383696826370033, "grad_norm": 0.6433675407952383, "learning_rate": 1.3696025952960261e-05, "loss": 0.6073, "step": 13142 }, { "epoch": 0.38372602259788036, "grad_norm": 0.7366807156924987, "learning_rate": 1.3695377128953773e-05, "loss": 0.7219, "step": 13143 }, { "epoch": 0.3837552188257277, "grad_norm": 0.5723602566149699, "learning_rate": 1.3694728304947285e-05, "loss": 0.4857, "step": 13144 }, { "epoch": 0.3837844150535751, "grad_norm": 0.6651136188377731, "learning_rate": 1.3694079480940797e-05, "loss": 0.6611, "step": 13145 }, { "epoch": 0.38381361128142244, "grad_norm": 0.658510920624694, "learning_rate": 1.3693430656934307e-05, "loss": 0.6127, "step": 13146 }, { "epoch": 0.3838428075092698, "grad_norm": 0.6731270002745583, "learning_rate": 1.369278183292782e-05, "loss": 0.6126, "step": 13147 }, { "epoch": 0.38387200373711716, "grad_norm": 0.6373212672483357, "learning_rate": 1.3692133008921331e-05, "loss": 0.5841, "step": 13148 }, { "epoch": 0.3839011999649645, "grad_norm": 0.6478318548151746, "learning_rate": 1.3691484184914843e-05, "loss": 0.6344, "step": 13149 }, { "epoch": 0.3839303961928119, "grad_norm": 0.6280196945613522, "learning_rate": 1.3690835360908354e-05, "loss": 0.5918, "step": 13150 }, { "epoch": 0.38395959242065925, "grad_norm": 0.6326758625851572, "learning_rate": 1.3690186536901866e-05, "loss": 0.614, "step": 13151 }, { "epoch": 0.3839887886485066, "grad_norm": 0.6545904518267652, "learning_rate": 1.3689537712895378e-05, "loss": 0.6937, "step": 13152 }, { "epoch": 0.38401798487635397, "grad_norm": 0.7241809933134604, "learning_rate": 1.368888888888889e-05, "loss": 0.7243, "step": 13153 }, { "epoch": 0.38404718110420133, "grad_norm": 0.6091600127460411, "learning_rate": 1.3688240064882402e-05, "loss": 0.5833, "step": 13154 }, { "epoch": 0.3840763773320487, "grad_norm": 0.6155616010668509, "learning_rate": 1.3687591240875912e-05, "loss": 0.5643, "step": 13155 }, { "epoch": 0.38410557355989605, "grad_norm": 0.6315930230940529, "learning_rate": 1.3686942416869424e-05, "loss": 0.6106, "step": 13156 }, { "epoch": 0.3841347697877434, "grad_norm": 0.6613380525169079, "learning_rate": 1.3686293592862938e-05, "loss": 0.6338, "step": 13157 }, { "epoch": 0.3841639660155908, "grad_norm": 0.6617776999611756, "learning_rate": 1.368564476885645e-05, "loss": 0.6067, "step": 13158 }, { "epoch": 0.38419316224343814, "grad_norm": 0.6338506379872899, "learning_rate": 1.3684995944849962e-05, "loss": 0.5832, "step": 13159 }, { "epoch": 0.3842223584712855, "grad_norm": 0.6648225287400439, "learning_rate": 1.3684347120843472e-05, "loss": 0.6331, "step": 13160 }, { "epoch": 0.38425155469913286, "grad_norm": 0.6820413986216, "learning_rate": 1.3683698296836984e-05, "loss": 0.7145, "step": 13161 }, { "epoch": 0.3842807509269802, "grad_norm": 0.669371495375912, "learning_rate": 1.3683049472830496e-05, "loss": 0.6096, "step": 13162 }, { "epoch": 0.3843099471548276, "grad_norm": 0.6757690437088524, "learning_rate": 1.3682400648824008e-05, "loss": 0.6261, "step": 13163 }, { "epoch": 0.38433914338267494, "grad_norm": 0.6765812993342438, "learning_rate": 1.368175182481752e-05, "loss": 0.6675, "step": 13164 }, { "epoch": 0.3843683396105223, "grad_norm": 0.6668322626075796, "learning_rate": 1.368110300081103e-05, "loss": 0.633, "step": 13165 }, { "epoch": 0.38439753583836966, "grad_norm": 0.6727140991117717, "learning_rate": 1.3680454176804543e-05, "loss": 0.6384, "step": 13166 }, { "epoch": 0.384426732066217, "grad_norm": 0.6716938277489931, "learning_rate": 1.3679805352798055e-05, "loss": 0.6743, "step": 13167 }, { "epoch": 0.3844559282940644, "grad_norm": 0.6487529195110093, "learning_rate": 1.3679156528791567e-05, "loss": 0.6381, "step": 13168 }, { "epoch": 0.38448512452191175, "grad_norm": 0.9953211137413631, "learning_rate": 1.3678507704785077e-05, "loss": 0.6309, "step": 13169 }, { "epoch": 0.3845143207497591, "grad_norm": 0.6561055057767013, "learning_rate": 1.367785888077859e-05, "loss": 0.6607, "step": 13170 }, { "epoch": 0.38454351697760647, "grad_norm": 0.6294777663448136, "learning_rate": 1.3677210056772101e-05, "loss": 0.5814, "step": 13171 }, { "epoch": 0.38457271320545383, "grad_norm": 0.734401632246199, "learning_rate": 1.3676561232765613e-05, "loss": 0.6485, "step": 13172 }, { "epoch": 0.3846019094333012, "grad_norm": 0.6257693005493491, "learning_rate": 1.3675912408759125e-05, "loss": 0.5831, "step": 13173 }, { "epoch": 0.38463110566114855, "grad_norm": 0.615599047266708, "learning_rate": 1.3675263584752636e-05, "loss": 0.5993, "step": 13174 }, { "epoch": 0.3846603018889959, "grad_norm": 0.6075823539656168, "learning_rate": 1.3674614760746148e-05, "loss": 0.5922, "step": 13175 }, { "epoch": 0.38468949811684333, "grad_norm": 0.6007351516066464, "learning_rate": 1.3673965936739661e-05, "loss": 0.5356, "step": 13176 }, { "epoch": 0.3847186943446907, "grad_norm": 0.6720445748432804, "learning_rate": 1.3673317112733173e-05, "loss": 0.6187, "step": 13177 }, { "epoch": 0.38474789057253805, "grad_norm": 0.6142563717668499, "learning_rate": 1.3672668288726685e-05, "loss": 0.5418, "step": 13178 }, { "epoch": 0.3847770868003854, "grad_norm": 0.6758972604805477, "learning_rate": 1.3672019464720196e-05, "loss": 0.6485, "step": 13179 }, { "epoch": 0.3848062830282328, "grad_norm": 0.6210630191571531, "learning_rate": 1.3671370640713708e-05, "loss": 0.6018, "step": 13180 }, { "epoch": 0.38483547925608014, "grad_norm": 0.6703352387648711, "learning_rate": 1.367072181670722e-05, "loss": 0.6428, "step": 13181 }, { "epoch": 0.3848646754839275, "grad_norm": 0.6191588934222723, "learning_rate": 1.3670072992700732e-05, "loss": 0.5817, "step": 13182 }, { "epoch": 0.38489387171177486, "grad_norm": 0.6096084258827409, "learning_rate": 1.3669424168694244e-05, "loss": 0.5421, "step": 13183 }, { "epoch": 0.3849230679396222, "grad_norm": 0.7240294047078402, "learning_rate": 1.3668775344687754e-05, "loss": 0.7032, "step": 13184 }, { "epoch": 0.3849522641674696, "grad_norm": 0.6177215785255018, "learning_rate": 1.3668126520681266e-05, "loss": 0.55, "step": 13185 }, { "epoch": 0.38498146039531694, "grad_norm": 0.644398142619765, "learning_rate": 1.3667477696674778e-05, "loss": 0.596, "step": 13186 }, { "epoch": 0.3850106566231643, "grad_norm": 0.8400583494088311, "learning_rate": 1.366682887266829e-05, "loss": 0.6353, "step": 13187 }, { "epoch": 0.38503985285101167, "grad_norm": 0.6282627140157004, "learning_rate": 1.36661800486618e-05, "loss": 0.5914, "step": 13188 }, { "epoch": 0.385069049078859, "grad_norm": 0.7022898928062385, "learning_rate": 1.3665531224655313e-05, "loss": 0.6448, "step": 13189 }, { "epoch": 0.3850982453067064, "grad_norm": 0.6035820451773563, "learning_rate": 1.3664882400648825e-05, "loss": 0.5672, "step": 13190 }, { "epoch": 0.38512744153455375, "grad_norm": 0.6561819271351631, "learning_rate": 1.3664233576642337e-05, "loss": 0.6285, "step": 13191 }, { "epoch": 0.3851566377624011, "grad_norm": 0.653818705269125, "learning_rate": 1.3663584752635849e-05, "loss": 0.6762, "step": 13192 }, { "epoch": 0.38518583399024847, "grad_norm": 0.6698085453853965, "learning_rate": 1.3662935928629359e-05, "loss": 0.6835, "step": 13193 }, { "epoch": 0.38521503021809583, "grad_norm": 0.6731864478546054, "learning_rate": 1.3662287104622871e-05, "loss": 0.6409, "step": 13194 }, { "epoch": 0.3852442264459432, "grad_norm": 0.6243211000059793, "learning_rate": 1.3661638280616385e-05, "loss": 0.6064, "step": 13195 }, { "epoch": 0.38527342267379056, "grad_norm": 0.690753211210986, "learning_rate": 1.3660989456609897e-05, "loss": 0.6833, "step": 13196 }, { "epoch": 0.3853026189016379, "grad_norm": 0.6055583352212125, "learning_rate": 1.3660340632603409e-05, "loss": 0.5389, "step": 13197 }, { "epoch": 0.3853318151294853, "grad_norm": 0.6564870895702075, "learning_rate": 1.3659691808596919e-05, "loss": 0.606, "step": 13198 }, { "epoch": 0.38536101135733264, "grad_norm": 0.7262119771213223, "learning_rate": 1.3659042984590431e-05, "loss": 0.7273, "step": 13199 }, { "epoch": 0.38539020758518, "grad_norm": 0.6338954340718359, "learning_rate": 1.3658394160583943e-05, "loss": 0.6006, "step": 13200 }, { "epoch": 0.38541940381302736, "grad_norm": 0.6533474040625115, "learning_rate": 1.3657745336577455e-05, "loss": 0.6569, "step": 13201 }, { "epoch": 0.3854486000408747, "grad_norm": 0.6722748541970618, "learning_rate": 1.3657096512570967e-05, "loss": 0.646, "step": 13202 }, { "epoch": 0.3854777962687221, "grad_norm": 0.6502137038296314, "learning_rate": 1.3656447688564478e-05, "loss": 0.5913, "step": 13203 }, { "epoch": 0.38550699249656945, "grad_norm": 0.6372762920659919, "learning_rate": 1.365579886455799e-05, "loss": 0.6305, "step": 13204 }, { "epoch": 0.3855361887244168, "grad_norm": 0.6601423374467315, "learning_rate": 1.3655150040551502e-05, "loss": 0.6173, "step": 13205 }, { "epoch": 0.38556538495226417, "grad_norm": 0.6081441881605734, "learning_rate": 1.3654501216545014e-05, "loss": 0.5611, "step": 13206 }, { "epoch": 0.38559458118011153, "grad_norm": 0.6316402519888187, "learning_rate": 1.3653852392538524e-05, "loss": 0.6079, "step": 13207 }, { "epoch": 0.3856237774079589, "grad_norm": 0.6858716749030954, "learning_rate": 1.3653203568532036e-05, "loss": 0.5615, "step": 13208 }, { "epoch": 0.38565297363580625, "grad_norm": 0.6471867480953675, "learning_rate": 1.3652554744525548e-05, "loss": 0.5854, "step": 13209 }, { "epoch": 0.3856821698636536, "grad_norm": 0.6482125857262412, "learning_rate": 1.365190592051906e-05, "loss": 0.6137, "step": 13210 }, { "epoch": 0.385711366091501, "grad_norm": 0.6449117646161718, "learning_rate": 1.365125709651257e-05, "loss": 0.6351, "step": 13211 }, { "epoch": 0.38574056231934833, "grad_norm": 0.6781807097944162, "learning_rate": 1.3650608272506082e-05, "loss": 0.6724, "step": 13212 }, { "epoch": 0.3857697585471957, "grad_norm": 0.7012010677933809, "learning_rate": 1.3649959448499594e-05, "loss": 0.6983, "step": 13213 }, { "epoch": 0.38579895477504306, "grad_norm": 0.8937768663328048, "learning_rate": 1.3649310624493108e-05, "loss": 0.7197, "step": 13214 }, { "epoch": 0.3858281510028904, "grad_norm": 0.6497812901118994, "learning_rate": 1.364866180048662e-05, "loss": 0.6401, "step": 13215 }, { "epoch": 0.3858573472307378, "grad_norm": 0.6385484780408326, "learning_rate": 1.3648012976480132e-05, "loss": 0.6413, "step": 13216 }, { "epoch": 0.38588654345858514, "grad_norm": 0.6060350937851919, "learning_rate": 1.3647364152473643e-05, "loss": 0.5457, "step": 13217 }, { "epoch": 0.3859157396864325, "grad_norm": 0.6415209019330373, "learning_rate": 1.3646715328467155e-05, "loss": 0.595, "step": 13218 }, { "epoch": 0.38594493591427986, "grad_norm": 0.6888059163651875, "learning_rate": 1.3646066504460667e-05, "loss": 0.7208, "step": 13219 }, { "epoch": 0.3859741321421272, "grad_norm": 0.6495657270638863, "learning_rate": 1.3645417680454179e-05, "loss": 0.6363, "step": 13220 }, { "epoch": 0.3860033283699746, "grad_norm": 0.5881449906184102, "learning_rate": 1.364476885644769e-05, "loss": 0.5143, "step": 13221 }, { "epoch": 0.38603252459782195, "grad_norm": 0.6681845662784356, "learning_rate": 1.3644120032441201e-05, "loss": 0.7113, "step": 13222 }, { "epoch": 0.3860617208256693, "grad_norm": 0.6627159662734601, "learning_rate": 1.3643471208434713e-05, "loss": 0.6637, "step": 13223 }, { "epoch": 0.38609091705351667, "grad_norm": 0.7042529817250485, "learning_rate": 1.3642822384428225e-05, "loss": 0.6799, "step": 13224 }, { "epoch": 0.38612011328136403, "grad_norm": 0.6168917455518869, "learning_rate": 1.3642173560421737e-05, "loss": 0.5703, "step": 13225 }, { "epoch": 0.3861493095092114, "grad_norm": 0.6541001103186814, "learning_rate": 1.3641524736415247e-05, "loss": 0.68, "step": 13226 }, { "epoch": 0.38617850573705875, "grad_norm": 0.6802085533559564, "learning_rate": 1.364087591240876e-05, "loss": 0.6253, "step": 13227 }, { "epoch": 0.3862077019649061, "grad_norm": 0.6201225085318661, "learning_rate": 1.3640227088402271e-05, "loss": 0.5731, "step": 13228 }, { "epoch": 0.3862368981927535, "grad_norm": 0.6540279885931777, "learning_rate": 1.3639578264395784e-05, "loss": 0.6648, "step": 13229 }, { "epoch": 0.38626609442060084, "grad_norm": 0.6237382727226742, "learning_rate": 1.3638929440389294e-05, "loss": 0.5625, "step": 13230 }, { "epoch": 0.3862952906484482, "grad_norm": 0.6117967549801332, "learning_rate": 1.3638280616382806e-05, "loss": 0.5877, "step": 13231 }, { "epoch": 0.38632448687629556, "grad_norm": 0.7100396212465718, "learning_rate": 1.3637631792376318e-05, "loss": 0.6123, "step": 13232 }, { "epoch": 0.3863536831041429, "grad_norm": 0.6646947254501514, "learning_rate": 1.3636982968369832e-05, "loss": 0.6436, "step": 13233 }, { "epoch": 0.3863828793319903, "grad_norm": 0.6749721134696419, "learning_rate": 1.3636334144363344e-05, "loss": 0.6522, "step": 13234 }, { "epoch": 0.38641207555983764, "grad_norm": 0.6291802256812287, "learning_rate": 1.3635685320356856e-05, "loss": 0.631, "step": 13235 }, { "epoch": 0.386441271787685, "grad_norm": 0.6177023152657576, "learning_rate": 1.3635036496350366e-05, "loss": 0.5679, "step": 13236 }, { "epoch": 0.3864704680155324, "grad_norm": 0.6289041755971847, "learning_rate": 1.3634387672343878e-05, "loss": 0.5693, "step": 13237 }, { "epoch": 0.3864996642433798, "grad_norm": 0.6471967576237666, "learning_rate": 1.363373884833739e-05, "loss": 0.6327, "step": 13238 }, { "epoch": 0.38652886047122714, "grad_norm": 0.5946862850978428, "learning_rate": 1.3633090024330902e-05, "loss": 0.5449, "step": 13239 }, { "epoch": 0.3865580566990745, "grad_norm": 0.6783305295665067, "learning_rate": 1.3632441200324414e-05, "loss": 0.6607, "step": 13240 }, { "epoch": 0.38658725292692186, "grad_norm": 0.6651613543265353, "learning_rate": 1.3631792376317924e-05, "loss": 0.6273, "step": 13241 }, { "epoch": 0.3866164491547692, "grad_norm": 0.7641762905931114, "learning_rate": 1.3631143552311436e-05, "loss": 0.6958, "step": 13242 }, { "epoch": 0.3866456453826166, "grad_norm": 0.7318143400657376, "learning_rate": 1.3630494728304949e-05, "loss": 0.6798, "step": 13243 }, { "epoch": 0.38667484161046395, "grad_norm": 0.6566205782159371, "learning_rate": 1.362984590429846e-05, "loss": 0.6098, "step": 13244 }, { "epoch": 0.3867040378383113, "grad_norm": 0.6896374844179709, "learning_rate": 1.3629197080291971e-05, "loss": 0.6216, "step": 13245 }, { "epoch": 0.38673323406615867, "grad_norm": 0.6498729734051137, "learning_rate": 1.3628548256285483e-05, "loss": 0.597, "step": 13246 }, { "epoch": 0.38676243029400603, "grad_norm": 0.7194398947105676, "learning_rate": 1.3627899432278995e-05, "loss": 0.7699, "step": 13247 }, { "epoch": 0.3867916265218534, "grad_norm": 0.6914709856312499, "learning_rate": 1.3627250608272507e-05, "loss": 0.6644, "step": 13248 }, { "epoch": 0.38682082274970075, "grad_norm": 0.6688422273908733, "learning_rate": 1.3626601784266017e-05, "loss": 0.5685, "step": 13249 }, { "epoch": 0.3868500189775481, "grad_norm": 0.6385236221154458, "learning_rate": 1.362595296025953e-05, "loss": 0.5798, "step": 13250 }, { "epoch": 0.3868792152053955, "grad_norm": 0.6572038772318389, "learning_rate": 1.3625304136253041e-05, "loss": 0.6514, "step": 13251 }, { "epoch": 0.38690841143324284, "grad_norm": 0.6864664841313949, "learning_rate": 1.3624655312246555e-05, "loss": 0.6586, "step": 13252 }, { "epoch": 0.3869376076610902, "grad_norm": 0.6717658494542585, "learning_rate": 1.3624006488240067e-05, "loss": 0.6189, "step": 13253 }, { "epoch": 0.38696680388893756, "grad_norm": 0.6068419081602344, "learning_rate": 1.3623357664233579e-05, "loss": 0.6064, "step": 13254 }, { "epoch": 0.3869960001167849, "grad_norm": 0.6208373986670119, "learning_rate": 1.362270884022709e-05, "loss": 0.5901, "step": 13255 }, { "epoch": 0.3870251963446323, "grad_norm": 0.6476615445575024, "learning_rate": 1.3622060016220601e-05, "loss": 0.5985, "step": 13256 }, { "epoch": 0.38705439257247964, "grad_norm": 0.732319073814679, "learning_rate": 1.3621411192214113e-05, "loss": 0.7206, "step": 13257 }, { "epoch": 0.387083588800327, "grad_norm": 0.6580802667499757, "learning_rate": 1.3620762368207626e-05, "loss": 0.6383, "step": 13258 }, { "epoch": 0.38711278502817437, "grad_norm": 0.6597773058108201, "learning_rate": 1.3620113544201138e-05, "loss": 0.6162, "step": 13259 }, { "epoch": 0.3871419812560217, "grad_norm": 0.6783110236791811, "learning_rate": 1.3619464720194648e-05, "loss": 0.6719, "step": 13260 }, { "epoch": 0.3871711774838691, "grad_norm": 0.655554509850638, "learning_rate": 1.361881589618816e-05, "loss": 0.6142, "step": 13261 }, { "epoch": 0.38720037371171645, "grad_norm": 0.6982646739204134, "learning_rate": 1.3618167072181672e-05, "loss": 0.623, "step": 13262 }, { "epoch": 0.3872295699395638, "grad_norm": 0.7458960598175756, "learning_rate": 1.3617518248175184e-05, "loss": 0.7064, "step": 13263 }, { "epoch": 0.3872587661674112, "grad_norm": 0.6670796716173392, "learning_rate": 1.3616869424168694e-05, "loss": 0.5812, "step": 13264 }, { "epoch": 0.38728796239525853, "grad_norm": 0.6613407863577635, "learning_rate": 1.3616220600162206e-05, "loss": 0.6614, "step": 13265 }, { "epoch": 0.3873171586231059, "grad_norm": 0.6368476750356086, "learning_rate": 1.3615571776155718e-05, "loss": 0.6087, "step": 13266 }, { "epoch": 0.38734635485095326, "grad_norm": 0.6359911327212835, "learning_rate": 1.361492295214923e-05, "loss": 0.5277, "step": 13267 }, { "epoch": 0.3873755510788006, "grad_norm": 0.7047831017641788, "learning_rate": 1.361427412814274e-05, "loss": 0.6802, "step": 13268 }, { "epoch": 0.387404747306648, "grad_norm": 0.6575234270784018, "learning_rate": 1.3613625304136253e-05, "loss": 0.5972, "step": 13269 }, { "epoch": 0.38743394353449534, "grad_norm": 0.6185944929436056, "learning_rate": 1.3612976480129766e-05, "loss": 0.593, "step": 13270 }, { "epoch": 0.3874631397623427, "grad_norm": 0.6950164565068675, "learning_rate": 1.3612327656123278e-05, "loss": 0.7146, "step": 13271 }, { "epoch": 0.38749233599019006, "grad_norm": 0.6206696502815998, "learning_rate": 1.361167883211679e-05, "loss": 0.5665, "step": 13272 }, { "epoch": 0.3875215322180374, "grad_norm": 0.6133217939638052, "learning_rate": 1.3611030008110303e-05, "loss": 0.5554, "step": 13273 }, { "epoch": 0.3875507284458848, "grad_norm": 0.6869769666984948, "learning_rate": 1.3610381184103813e-05, "loss": 0.6395, "step": 13274 }, { "epoch": 0.38757992467373215, "grad_norm": 0.7307456263442313, "learning_rate": 1.3609732360097325e-05, "loss": 0.6944, "step": 13275 }, { "epoch": 0.3876091209015795, "grad_norm": 0.6312847353351345, "learning_rate": 1.3609083536090837e-05, "loss": 0.5554, "step": 13276 }, { "epoch": 0.38763831712942687, "grad_norm": 0.6448443968780194, "learning_rate": 1.3608434712084349e-05, "loss": 0.5754, "step": 13277 }, { "epoch": 0.38766751335727423, "grad_norm": 0.6383190978150557, "learning_rate": 1.3607785888077861e-05, "loss": 0.6335, "step": 13278 }, { "epoch": 0.3876967095851216, "grad_norm": 0.7322833782861828, "learning_rate": 1.3607137064071371e-05, "loss": 0.7316, "step": 13279 }, { "epoch": 0.38772590581296895, "grad_norm": 0.7699250867372802, "learning_rate": 1.3606488240064883e-05, "loss": 0.7271, "step": 13280 }, { "epoch": 0.3877551020408163, "grad_norm": 0.676073377061993, "learning_rate": 1.3605839416058395e-05, "loss": 0.7142, "step": 13281 }, { "epoch": 0.3877842982686637, "grad_norm": 0.5966234156267397, "learning_rate": 1.3605190592051907e-05, "loss": 0.5607, "step": 13282 }, { "epoch": 0.38781349449651104, "grad_norm": 0.6593409208007391, "learning_rate": 1.3604541768045418e-05, "loss": 0.6768, "step": 13283 }, { "epoch": 0.3878426907243584, "grad_norm": 0.6965888594454758, "learning_rate": 1.360389294403893e-05, "loss": 0.6996, "step": 13284 }, { "epoch": 0.38787188695220576, "grad_norm": 0.6289875232869426, "learning_rate": 1.3603244120032442e-05, "loss": 0.5756, "step": 13285 }, { "epoch": 0.3879010831800531, "grad_norm": 0.5923385083005787, "learning_rate": 1.3602595296025954e-05, "loss": 0.5552, "step": 13286 }, { "epoch": 0.3879302794079005, "grad_norm": 0.6442631137640861, "learning_rate": 1.3601946472019464e-05, "loss": 0.6319, "step": 13287 }, { "epoch": 0.38795947563574784, "grad_norm": 0.6289981323438971, "learning_rate": 1.3601297648012976e-05, "loss": 0.5879, "step": 13288 }, { "epoch": 0.3879886718635952, "grad_norm": 0.6587485807513167, "learning_rate": 1.360064882400649e-05, "loss": 0.6232, "step": 13289 }, { "epoch": 0.38801786809144256, "grad_norm": 0.5943372511554506, "learning_rate": 1.3600000000000002e-05, "loss": 0.5122, "step": 13290 }, { "epoch": 0.3880470643192899, "grad_norm": 0.6795068615867966, "learning_rate": 1.3599351175993514e-05, "loss": 0.6914, "step": 13291 }, { "epoch": 0.3880762605471373, "grad_norm": 0.6632849627794484, "learning_rate": 1.3598702351987026e-05, "loss": 0.6235, "step": 13292 }, { "epoch": 0.38810545677498465, "grad_norm": 0.6541478566131201, "learning_rate": 1.3598053527980536e-05, "loss": 0.6437, "step": 13293 }, { "epoch": 0.388134653002832, "grad_norm": 0.6650634930304896, "learning_rate": 1.3597404703974048e-05, "loss": 0.6342, "step": 13294 }, { "epoch": 0.38816384923067937, "grad_norm": 0.6894068229924859, "learning_rate": 1.359675587996756e-05, "loss": 0.7055, "step": 13295 }, { "epoch": 0.38819304545852673, "grad_norm": 0.6573528164423614, "learning_rate": 1.3596107055961072e-05, "loss": 0.6399, "step": 13296 }, { "epoch": 0.38822224168637415, "grad_norm": 0.6547766721297641, "learning_rate": 1.3595458231954584e-05, "loss": 0.6248, "step": 13297 }, { "epoch": 0.3882514379142215, "grad_norm": 0.6326643668447777, "learning_rate": 1.3594809407948095e-05, "loss": 0.6016, "step": 13298 }, { "epoch": 0.38828063414206887, "grad_norm": 0.658376161140343, "learning_rate": 1.3594160583941607e-05, "loss": 0.5612, "step": 13299 }, { "epoch": 0.38830983036991623, "grad_norm": 0.7718936762761467, "learning_rate": 1.3593511759935119e-05, "loss": 0.6147, "step": 13300 }, { "epoch": 0.3883390265977636, "grad_norm": 0.6448304406504306, "learning_rate": 1.359286293592863e-05, "loss": 0.5982, "step": 13301 }, { "epoch": 0.38836822282561095, "grad_norm": 0.6244825476102248, "learning_rate": 1.3592214111922141e-05, "loss": 0.549, "step": 13302 }, { "epoch": 0.3883974190534583, "grad_norm": 0.607164121343616, "learning_rate": 1.3591565287915653e-05, "loss": 0.5375, "step": 13303 }, { "epoch": 0.3884266152813057, "grad_norm": 0.6534782967032305, "learning_rate": 1.3590916463909165e-05, "loss": 0.631, "step": 13304 }, { "epoch": 0.38845581150915304, "grad_norm": 0.6031803179796261, "learning_rate": 1.3590267639902677e-05, "loss": 0.5183, "step": 13305 }, { "epoch": 0.3884850077370004, "grad_norm": 0.6783767739091225, "learning_rate": 1.3589618815896188e-05, "loss": 0.6407, "step": 13306 }, { "epoch": 0.38851420396484776, "grad_norm": 0.8600306319787235, "learning_rate": 1.35889699918897e-05, "loss": 0.7019, "step": 13307 }, { "epoch": 0.3885434001926951, "grad_norm": 0.6641127979870737, "learning_rate": 1.3588321167883213e-05, "loss": 0.5977, "step": 13308 }, { "epoch": 0.3885725964205425, "grad_norm": 0.6288561636160337, "learning_rate": 1.3587672343876725e-05, "loss": 0.5966, "step": 13309 }, { "epoch": 0.38860179264838984, "grad_norm": 0.6359294722144219, "learning_rate": 1.3587023519870237e-05, "loss": 0.6538, "step": 13310 }, { "epoch": 0.3886309888762372, "grad_norm": 0.600578786863541, "learning_rate": 1.358637469586375e-05, "loss": 0.536, "step": 13311 }, { "epoch": 0.38866018510408457, "grad_norm": 0.6758251406911628, "learning_rate": 1.358572587185726e-05, "loss": 0.699, "step": 13312 }, { "epoch": 0.3886893813319319, "grad_norm": 0.6882158164686956, "learning_rate": 1.3585077047850772e-05, "loss": 0.694, "step": 13313 }, { "epoch": 0.3887185775597793, "grad_norm": 0.6041690659232707, "learning_rate": 1.3584428223844284e-05, "loss": 0.5525, "step": 13314 }, { "epoch": 0.38874777378762665, "grad_norm": 0.6666453509081136, "learning_rate": 1.3583779399837796e-05, "loss": 0.6881, "step": 13315 }, { "epoch": 0.388776970015474, "grad_norm": 0.6706757660364897, "learning_rate": 1.3583130575831306e-05, "loss": 0.6413, "step": 13316 }, { "epoch": 0.38880616624332137, "grad_norm": 0.6420525036831619, "learning_rate": 1.3582481751824818e-05, "loss": 0.6654, "step": 13317 }, { "epoch": 0.38883536247116873, "grad_norm": 0.6565624805161576, "learning_rate": 1.358183292781833e-05, "loss": 0.6516, "step": 13318 }, { "epoch": 0.3888645586990161, "grad_norm": 0.6583665966483568, "learning_rate": 1.3581184103811842e-05, "loss": 0.6499, "step": 13319 }, { "epoch": 0.38889375492686346, "grad_norm": 0.6453026895679222, "learning_rate": 1.3580535279805354e-05, "loss": 0.6279, "step": 13320 }, { "epoch": 0.3889229511547108, "grad_norm": 0.6504304557739686, "learning_rate": 1.3579886455798865e-05, "loss": 0.6188, "step": 13321 }, { "epoch": 0.3889521473825582, "grad_norm": 0.6502175453572897, "learning_rate": 1.3579237631792377e-05, "loss": 0.6082, "step": 13322 }, { "epoch": 0.38898134361040554, "grad_norm": 0.6696544692997092, "learning_rate": 1.3578588807785889e-05, "loss": 0.6449, "step": 13323 }, { "epoch": 0.3890105398382529, "grad_norm": 0.6793589277779564, "learning_rate": 1.35779399837794e-05, "loss": 0.6715, "step": 13324 }, { "epoch": 0.38903973606610026, "grad_norm": 0.7000663599992587, "learning_rate": 1.3577291159772911e-05, "loss": 0.6849, "step": 13325 }, { "epoch": 0.3890689322939476, "grad_norm": 0.621486384704671, "learning_rate": 1.3576642335766423e-05, "loss": 0.5915, "step": 13326 }, { "epoch": 0.389098128521795, "grad_norm": 0.6255386623240611, "learning_rate": 1.3575993511759937e-05, "loss": 0.5509, "step": 13327 }, { "epoch": 0.38912732474964234, "grad_norm": 0.6611362104627618, "learning_rate": 1.3575344687753449e-05, "loss": 0.5927, "step": 13328 }, { "epoch": 0.3891565209774897, "grad_norm": 0.6537405218936417, "learning_rate": 1.357469586374696e-05, "loss": 0.6564, "step": 13329 }, { "epoch": 0.38918571720533707, "grad_norm": 0.7177896517236033, "learning_rate": 1.3574047039740473e-05, "loss": 0.7308, "step": 13330 }, { "epoch": 0.38921491343318443, "grad_norm": 0.6479947652328187, "learning_rate": 1.3573398215733983e-05, "loss": 0.6244, "step": 13331 }, { "epoch": 0.3892441096610318, "grad_norm": 0.6378397304581486, "learning_rate": 1.3572749391727495e-05, "loss": 0.6695, "step": 13332 }, { "epoch": 0.38927330588887915, "grad_norm": 0.6585032162316198, "learning_rate": 1.3572100567721007e-05, "loss": 0.6563, "step": 13333 }, { "epoch": 0.3893025021167265, "grad_norm": 0.6450766575477205, "learning_rate": 1.357145174371452e-05, "loss": 0.5992, "step": 13334 }, { "epoch": 0.3893316983445739, "grad_norm": 0.6317167770704335, "learning_rate": 1.357080291970803e-05, "loss": 0.6265, "step": 13335 }, { "epoch": 0.38936089457242123, "grad_norm": 0.7053388694733029, "learning_rate": 1.3570154095701542e-05, "loss": 0.7325, "step": 13336 }, { "epoch": 0.3893900908002686, "grad_norm": 0.6686988495096434, "learning_rate": 1.3569505271695054e-05, "loss": 0.6538, "step": 13337 }, { "epoch": 0.38941928702811596, "grad_norm": 0.6770693618200418, "learning_rate": 1.3568856447688566e-05, "loss": 0.6483, "step": 13338 }, { "epoch": 0.3894484832559633, "grad_norm": 0.6968697733222067, "learning_rate": 1.3568207623682078e-05, "loss": 0.7117, "step": 13339 }, { "epoch": 0.3894776794838107, "grad_norm": 0.6650508563986887, "learning_rate": 1.3567558799675588e-05, "loss": 0.6699, "step": 13340 }, { "epoch": 0.38950687571165804, "grad_norm": 0.6256622295279003, "learning_rate": 1.35669099756691e-05, "loss": 0.6246, "step": 13341 }, { "epoch": 0.3895360719395054, "grad_norm": 0.6168108092760756, "learning_rate": 1.3566261151662612e-05, "loss": 0.5531, "step": 13342 }, { "epoch": 0.38956526816735276, "grad_norm": 0.6474874102969465, "learning_rate": 1.3565612327656124e-05, "loss": 0.639, "step": 13343 }, { "epoch": 0.3895944643952001, "grad_norm": 0.638473617071337, "learning_rate": 1.3564963503649634e-05, "loss": 0.6469, "step": 13344 }, { "epoch": 0.3896236606230475, "grad_norm": 0.6276658272498028, "learning_rate": 1.3564314679643146e-05, "loss": 0.5993, "step": 13345 }, { "epoch": 0.38965285685089485, "grad_norm": 0.6190021088282299, "learning_rate": 1.356366585563666e-05, "loss": 0.6259, "step": 13346 }, { "epoch": 0.3896820530787422, "grad_norm": 0.6380545511865421, "learning_rate": 1.3563017031630172e-05, "loss": 0.6019, "step": 13347 }, { "epoch": 0.38971124930658957, "grad_norm": 0.6390019257735098, "learning_rate": 1.3562368207623684e-05, "loss": 0.6293, "step": 13348 }, { "epoch": 0.38974044553443693, "grad_norm": 0.6104797121921862, "learning_rate": 1.3561719383617196e-05, "loss": 0.6024, "step": 13349 }, { "epoch": 0.3897696417622843, "grad_norm": 0.643101108410188, "learning_rate": 1.3561070559610707e-05, "loss": 0.6114, "step": 13350 }, { "epoch": 0.38979883799013165, "grad_norm": 0.6356843402760363, "learning_rate": 1.3560421735604219e-05, "loss": 0.6281, "step": 13351 }, { "epoch": 0.389828034217979, "grad_norm": 0.6253771654090295, "learning_rate": 1.355977291159773e-05, "loss": 0.6144, "step": 13352 }, { "epoch": 0.3898572304458264, "grad_norm": 0.632015760995237, "learning_rate": 1.3559124087591243e-05, "loss": 0.5855, "step": 13353 }, { "epoch": 0.38988642667367374, "grad_norm": 0.6211389100515322, "learning_rate": 1.3558475263584753e-05, "loss": 0.5604, "step": 13354 }, { "epoch": 0.3899156229015211, "grad_norm": 0.6406251274815061, "learning_rate": 1.3557826439578265e-05, "loss": 0.6254, "step": 13355 }, { "epoch": 0.38994481912936846, "grad_norm": 0.6341165713468419, "learning_rate": 1.3557177615571777e-05, "loss": 0.6492, "step": 13356 }, { "epoch": 0.3899740153572159, "grad_norm": 0.6052863807790222, "learning_rate": 1.3556528791565289e-05, "loss": 0.5761, "step": 13357 }, { "epoch": 0.39000321158506324, "grad_norm": 0.6241119606328109, "learning_rate": 1.3555879967558801e-05, "loss": 0.5784, "step": 13358 }, { "epoch": 0.3900324078129106, "grad_norm": 0.7071425905890097, "learning_rate": 1.3555231143552311e-05, "loss": 0.6362, "step": 13359 }, { "epoch": 0.39006160404075796, "grad_norm": 0.6875537734480314, "learning_rate": 1.3554582319545824e-05, "loss": 0.5969, "step": 13360 }, { "epoch": 0.3900908002686053, "grad_norm": 0.6688000886636282, "learning_rate": 1.3553933495539336e-05, "loss": 0.6544, "step": 13361 }, { "epoch": 0.3901199964964527, "grad_norm": 0.6436938420226881, "learning_rate": 1.3553284671532848e-05, "loss": 0.6029, "step": 13362 }, { "epoch": 0.39014919272430004, "grad_norm": 0.6547700826940313, "learning_rate": 1.3552635847526358e-05, "loss": 0.6197, "step": 13363 }, { "epoch": 0.3901783889521474, "grad_norm": 0.6380025692801157, "learning_rate": 1.355198702351987e-05, "loss": 0.6044, "step": 13364 }, { "epoch": 0.39020758517999476, "grad_norm": 0.7188043293938744, "learning_rate": 1.3551338199513384e-05, "loss": 0.7012, "step": 13365 }, { "epoch": 0.3902367814078421, "grad_norm": 0.6766378915652062, "learning_rate": 1.3550689375506896e-05, "loss": 0.6071, "step": 13366 }, { "epoch": 0.3902659776356895, "grad_norm": 0.6335859347621067, "learning_rate": 1.3550040551500408e-05, "loss": 0.62, "step": 13367 }, { "epoch": 0.39029517386353685, "grad_norm": 0.636518439029343, "learning_rate": 1.354939172749392e-05, "loss": 0.6135, "step": 13368 }, { "epoch": 0.3903243700913842, "grad_norm": 0.6360304781063243, "learning_rate": 1.354874290348743e-05, "loss": 0.5785, "step": 13369 }, { "epoch": 0.39035356631923157, "grad_norm": 0.7171687190577849, "learning_rate": 1.3548094079480942e-05, "loss": 0.6831, "step": 13370 }, { "epoch": 0.39038276254707893, "grad_norm": 0.6207451824407709, "learning_rate": 1.3547445255474454e-05, "loss": 0.5801, "step": 13371 }, { "epoch": 0.3904119587749263, "grad_norm": 0.6854859974112262, "learning_rate": 1.3546796431467966e-05, "loss": 0.6566, "step": 13372 }, { "epoch": 0.39044115500277365, "grad_norm": 0.7147302247836611, "learning_rate": 1.3546147607461476e-05, "loss": 0.7238, "step": 13373 }, { "epoch": 0.390470351230621, "grad_norm": 0.6426159469840538, "learning_rate": 1.3545498783454988e-05, "loss": 0.6078, "step": 13374 }, { "epoch": 0.3904995474584684, "grad_norm": 0.646970593310934, "learning_rate": 1.35448499594485e-05, "loss": 0.641, "step": 13375 }, { "epoch": 0.39052874368631574, "grad_norm": 0.6439365721825038, "learning_rate": 1.3544201135442013e-05, "loss": 0.6155, "step": 13376 }, { "epoch": 0.3905579399141631, "grad_norm": 0.6735309868957785, "learning_rate": 1.3543552311435525e-05, "loss": 0.6868, "step": 13377 }, { "epoch": 0.39058713614201046, "grad_norm": 0.6581311905583865, "learning_rate": 1.3542903487429035e-05, "loss": 0.6091, "step": 13378 }, { "epoch": 0.3906163323698578, "grad_norm": 0.6328071747875315, "learning_rate": 1.3542254663422547e-05, "loss": 0.5192, "step": 13379 }, { "epoch": 0.3906455285977052, "grad_norm": 0.6801522907826275, "learning_rate": 1.3541605839416059e-05, "loss": 0.6661, "step": 13380 }, { "epoch": 0.39067472482555254, "grad_norm": 0.6819600807732872, "learning_rate": 1.3540957015409571e-05, "loss": 0.6331, "step": 13381 }, { "epoch": 0.3907039210533999, "grad_norm": 0.6480807124487941, "learning_rate": 1.3540308191403081e-05, "loss": 0.6295, "step": 13382 }, { "epoch": 0.39073311728124727, "grad_norm": 0.7004617091390463, "learning_rate": 1.3539659367396593e-05, "loss": 0.714, "step": 13383 }, { "epoch": 0.3907623135090946, "grad_norm": 0.6512458486716612, "learning_rate": 1.3539010543390107e-05, "loss": 0.677, "step": 13384 }, { "epoch": 0.390791509736942, "grad_norm": 0.6574584236156926, "learning_rate": 1.3538361719383619e-05, "loss": 0.6004, "step": 13385 }, { "epoch": 0.39082070596478935, "grad_norm": 0.6659626143994284, "learning_rate": 1.3537712895377131e-05, "loss": 0.6723, "step": 13386 }, { "epoch": 0.3908499021926367, "grad_norm": 0.6666701290765794, "learning_rate": 1.3537064071370643e-05, "loss": 0.6635, "step": 13387 }, { "epoch": 0.3908790984204841, "grad_norm": 0.6768552092379062, "learning_rate": 1.3536415247364153e-05, "loss": 0.6822, "step": 13388 }, { "epoch": 0.39090829464833143, "grad_norm": 0.6901793067860006, "learning_rate": 1.3535766423357666e-05, "loss": 0.6973, "step": 13389 }, { "epoch": 0.3909374908761788, "grad_norm": 0.6343786751726327, "learning_rate": 1.3535117599351178e-05, "loss": 0.6146, "step": 13390 }, { "epoch": 0.39096668710402616, "grad_norm": 0.6515393675460042, "learning_rate": 1.353446877534469e-05, "loss": 0.622, "step": 13391 }, { "epoch": 0.3909958833318735, "grad_norm": 0.6951596342433966, "learning_rate": 1.35338199513382e-05, "loss": 0.7458, "step": 13392 }, { "epoch": 0.3910250795597209, "grad_norm": 0.6070078286511102, "learning_rate": 1.3533171127331712e-05, "loss": 0.5449, "step": 13393 }, { "epoch": 0.39105427578756824, "grad_norm": 0.6370711235127526, "learning_rate": 1.3532522303325224e-05, "loss": 0.6111, "step": 13394 }, { "epoch": 0.3910834720154156, "grad_norm": 0.6391617420228216, "learning_rate": 1.3531873479318736e-05, "loss": 0.5981, "step": 13395 }, { "epoch": 0.39111266824326296, "grad_norm": 0.6777145119390905, "learning_rate": 1.3531224655312248e-05, "loss": 0.5412, "step": 13396 }, { "epoch": 0.3911418644711103, "grad_norm": 0.6022678674940354, "learning_rate": 1.3530575831305758e-05, "loss": 0.5749, "step": 13397 }, { "epoch": 0.3911710606989577, "grad_norm": 0.5760729580370135, "learning_rate": 1.352992700729927e-05, "loss": 0.5313, "step": 13398 }, { "epoch": 0.39120025692680505, "grad_norm": 0.6363991841913182, "learning_rate": 1.3529278183292782e-05, "loss": 0.5942, "step": 13399 }, { "epoch": 0.3912294531546524, "grad_norm": 0.640313147439723, "learning_rate": 1.3528629359286294e-05, "loss": 0.6674, "step": 13400 }, { "epoch": 0.39125864938249977, "grad_norm": 0.6484477722979091, "learning_rate": 1.3527980535279805e-05, "loss": 0.6364, "step": 13401 }, { "epoch": 0.39128784561034713, "grad_norm": 0.673740856942544, "learning_rate": 1.3527331711273317e-05, "loss": 0.6788, "step": 13402 }, { "epoch": 0.3913170418381945, "grad_norm": 0.6455749866016316, "learning_rate": 1.352668288726683e-05, "loss": 0.656, "step": 13403 }, { "epoch": 0.39134623806604185, "grad_norm": 0.6443591829247935, "learning_rate": 1.3526034063260343e-05, "loss": 0.5741, "step": 13404 }, { "epoch": 0.3913754342938892, "grad_norm": 0.7024784134379808, "learning_rate": 1.3525385239253855e-05, "loss": 0.6574, "step": 13405 }, { "epoch": 0.3914046305217366, "grad_norm": 0.6244961405544002, "learning_rate": 1.3524736415247367e-05, "loss": 0.5819, "step": 13406 }, { "epoch": 0.39143382674958394, "grad_norm": 0.7235914799408221, "learning_rate": 1.3524087591240877e-05, "loss": 0.722, "step": 13407 }, { "epoch": 0.3914630229774313, "grad_norm": 0.6134506113713057, "learning_rate": 1.3523438767234389e-05, "loss": 0.5412, "step": 13408 }, { "epoch": 0.39149221920527866, "grad_norm": 0.6134881120770422, "learning_rate": 1.3522789943227901e-05, "loss": 0.597, "step": 13409 }, { "epoch": 0.391521415433126, "grad_norm": 0.6368343791067229, "learning_rate": 1.3522141119221413e-05, "loss": 0.6138, "step": 13410 }, { "epoch": 0.3915506116609734, "grad_norm": 0.6488668499238897, "learning_rate": 1.3521492295214923e-05, "loss": 0.6507, "step": 13411 }, { "epoch": 0.39157980788882074, "grad_norm": 0.7694529439625111, "learning_rate": 1.3520843471208435e-05, "loss": 0.7332, "step": 13412 }, { "epoch": 0.3916090041166681, "grad_norm": 0.6226994099060394, "learning_rate": 1.3520194647201947e-05, "loss": 0.5679, "step": 13413 }, { "epoch": 0.39163820034451546, "grad_norm": 0.6935267926222667, "learning_rate": 1.351954582319546e-05, "loss": 0.6806, "step": 13414 }, { "epoch": 0.3916673965723628, "grad_norm": 0.580448808818094, "learning_rate": 1.3518896999188971e-05, "loss": 0.5129, "step": 13415 }, { "epoch": 0.3916965928002102, "grad_norm": 0.7830285232164228, "learning_rate": 1.3518248175182482e-05, "loss": 0.6342, "step": 13416 }, { "epoch": 0.3917257890280576, "grad_norm": 0.70004531624053, "learning_rate": 1.3517599351175994e-05, "loss": 0.728, "step": 13417 }, { "epoch": 0.39175498525590496, "grad_norm": 0.620285324444813, "learning_rate": 1.3516950527169506e-05, "loss": 0.6136, "step": 13418 }, { "epoch": 0.3917841814837523, "grad_norm": 0.7020778661441891, "learning_rate": 1.3516301703163018e-05, "loss": 0.618, "step": 13419 }, { "epoch": 0.3918133777115997, "grad_norm": 1.0872570568316287, "learning_rate": 1.3515652879156528e-05, "loss": 0.677, "step": 13420 }, { "epoch": 0.39184257393944705, "grad_norm": 0.6664751743483047, "learning_rate": 1.351500405515004e-05, "loss": 0.7032, "step": 13421 }, { "epoch": 0.3918717701672944, "grad_norm": 0.6436834236798371, "learning_rate": 1.3514355231143554e-05, "loss": 0.6034, "step": 13422 }, { "epoch": 0.39190096639514177, "grad_norm": 0.6893605358908083, "learning_rate": 1.3513706407137066e-05, "loss": 0.5883, "step": 13423 }, { "epoch": 0.39193016262298913, "grad_norm": 0.6411001165240665, "learning_rate": 1.3513057583130578e-05, "loss": 0.5613, "step": 13424 }, { "epoch": 0.3919593588508365, "grad_norm": 0.658550581955475, "learning_rate": 1.351240875912409e-05, "loss": 0.6524, "step": 13425 }, { "epoch": 0.39198855507868385, "grad_norm": 0.6633273763771452, "learning_rate": 1.35117599351176e-05, "loss": 0.6332, "step": 13426 }, { "epoch": 0.3920177513065312, "grad_norm": 0.647740886164658, "learning_rate": 1.3511111111111112e-05, "loss": 0.6412, "step": 13427 }, { "epoch": 0.3920469475343786, "grad_norm": 0.6524165000670348, "learning_rate": 1.3510462287104624e-05, "loss": 0.6071, "step": 13428 }, { "epoch": 0.39207614376222594, "grad_norm": 0.6129470951604307, "learning_rate": 1.3509813463098136e-05, "loss": 0.5359, "step": 13429 }, { "epoch": 0.3921053399900733, "grad_norm": 0.6175735557605958, "learning_rate": 1.3509164639091647e-05, "loss": 0.5776, "step": 13430 }, { "epoch": 0.39213453621792066, "grad_norm": 0.6553520709819886, "learning_rate": 1.3508515815085159e-05, "loss": 0.6627, "step": 13431 }, { "epoch": 0.392163732445768, "grad_norm": 0.6483350585945313, "learning_rate": 1.350786699107867e-05, "loss": 0.6283, "step": 13432 }, { "epoch": 0.3921929286736154, "grad_norm": 0.6753410819586522, "learning_rate": 1.3507218167072183e-05, "loss": 0.6344, "step": 13433 }, { "epoch": 0.39222212490146274, "grad_norm": 0.6998790777667478, "learning_rate": 1.3506569343065695e-05, "loss": 0.7271, "step": 13434 }, { "epoch": 0.3922513211293101, "grad_norm": 0.5923722903977471, "learning_rate": 1.3505920519059205e-05, "loss": 0.5389, "step": 13435 }, { "epoch": 0.39228051735715747, "grad_norm": 0.6855616841493556, "learning_rate": 1.3505271695052717e-05, "loss": 0.6791, "step": 13436 }, { "epoch": 0.3923097135850048, "grad_norm": 0.6837928985836612, "learning_rate": 1.350462287104623e-05, "loss": 0.6889, "step": 13437 }, { "epoch": 0.3923389098128522, "grad_norm": 0.6936634075919947, "learning_rate": 1.3503974047039741e-05, "loss": 0.6678, "step": 13438 }, { "epoch": 0.39236810604069955, "grad_norm": 0.6237454122071295, "learning_rate": 1.3503325223033252e-05, "loss": 0.6004, "step": 13439 }, { "epoch": 0.3923973022685469, "grad_norm": 0.6672426605647569, "learning_rate": 1.3502676399026765e-05, "loss": 0.6541, "step": 13440 }, { "epoch": 0.39242649849639427, "grad_norm": 0.6375175822239494, "learning_rate": 1.3502027575020277e-05, "loss": 0.5997, "step": 13441 }, { "epoch": 0.39245569472424163, "grad_norm": 0.6730647463108306, "learning_rate": 1.350137875101379e-05, "loss": 0.6164, "step": 13442 }, { "epoch": 0.392484890952089, "grad_norm": 0.6704824845460031, "learning_rate": 1.3500729927007301e-05, "loss": 0.6707, "step": 13443 }, { "epoch": 0.39251408717993636, "grad_norm": 0.6712350996178962, "learning_rate": 1.3500081103000813e-05, "loss": 0.6398, "step": 13444 }, { "epoch": 0.3925432834077837, "grad_norm": 0.6364598995423492, "learning_rate": 1.3499432278994324e-05, "loss": 0.5428, "step": 13445 }, { "epoch": 0.3925724796356311, "grad_norm": 0.6354682548460655, "learning_rate": 1.3498783454987836e-05, "loss": 0.6181, "step": 13446 }, { "epoch": 0.39260167586347844, "grad_norm": 0.6564049880120476, "learning_rate": 1.3498134630981348e-05, "loss": 0.6047, "step": 13447 }, { "epoch": 0.3926308720913258, "grad_norm": 0.6617052173589867, "learning_rate": 1.349748580697486e-05, "loss": 0.6772, "step": 13448 }, { "epoch": 0.39266006831917316, "grad_norm": 0.6997691864853826, "learning_rate": 1.349683698296837e-05, "loss": 0.6833, "step": 13449 }, { "epoch": 0.3926892645470205, "grad_norm": 0.5799631476421622, "learning_rate": 1.3496188158961882e-05, "loss": 0.5152, "step": 13450 }, { "epoch": 0.3927184607748679, "grad_norm": 0.6685800678846162, "learning_rate": 1.3495539334955394e-05, "loss": 0.6315, "step": 13451 }, { "epoch": 0.39274765700271524, "grad_norm": 0.6733116537437334, "learning_rate": 1.3494890510948906e-05, "loss": 0.6778, "step": 13452 }, { "epoch": 0.3927768532305626, "grad_norm": 0.6491793279583818, "learning_rate": 1.3494241686942418e-05, "loss": 0.5931, "step": 13453 }, { "epoch": 0.39280604945840997, "grad_norm": 0.6344656969398713, "learning_rate": 1.3493592862935929e-05, "loss": 0.5831, "step": 13454 }, { "epoch": 0.39283524568625733, "grad_norm": 0.7585656504255073, "learning_rate": 1.349294403892944e-05, "loss": 0.7194, "step": 13455 }, { "epoch": 0.3928644419141047, "grad_norm": 0.6565415411316484, "learning_rate": 1.3492295214922953e-05, "loss": 0.6061, "step": 13456 }, { "epoch": 0.39289363814195205, "grad_norm": 0.5621685400336408, "learning_rate": 1.3491646390916465e-05, "loss": 0.4803, "step": 13457 }, { "epoch": 0.3929228343697994, "grad_norm": 0.6895500175528254, "learning_rate": 1.3490997566909975e-05, "loss": 0.59, "step": 13458 }, { "epoch": 0.3929520305976468, "grad_norm": 0.6467173158728177, "learning_rate": 1.3490348742903489e-05, "loss": 0.6173, "step": 13459 }, { "epoch": 0.39298122682549413, "grad_norm": 0.6756650874896578, "learning_rate": 1.3489699918897e-05, "loss": 0.6614, "step": 13460 }, { "epoch": 0.3930104230533415, "grad_norm": 0.6112955584867963, "learning_rate": 1.3489051094890513e-05, "loss": 0.567, "step": 13461 }, { "epoch": 0.39303961928118886, "grad_norm": 0.6596446217463543, "learning_rate": 1.3488402270884025e-05, "loss": 0.598, "step": 13462 }, { "epoch": 0.3930688155090362, "grad_norm": 0.7273854424991859, "learning_rate": 1.3487753446877537e-05, "loss": 0.7428, "step": 13463 }, { "epoch": 0.3930980117368836, "grad_norm": 0.6199516454307018, "learning_rate": 1.3487104622871047e-05, "loss": 0.5914, "step": 13464 }, { "epoch": 0.39312720796473094, "grad_norm": 0.6363642485207694, "learning_rate": 1.348645579886456e-05, "loss": 0.6136, "step": 13465 }, { "epoch": 0.3931564041925783, "grad_norm": 0.6590852565174323, "learning_rate": 1.3485806974858071e-05, "loss": 0.6935, "step": 13466 }, { "epoch": 0.39318560042042566, "grad_norm": 0.6747196259096102, "learning_rate": 1.3485158150851583e-05, "loss": 0.6652, "step": 13467 }, { "epoch": 0.393214796648273, "grad_norm": 0.6840308139165918, "learning_rate": 1.3484509326845094e-05, "loss": 0.6234, "step": 13468 }, { "epoch": 0.3932439928761204, "grad_norm": 0.6166784127131398, "learning_rate": 1.3483860502838606e-05, "loss": 0.6116, "step": 13469 }, { "epoch": 0.39327318910396775, "grad_norm": 0.5843987477743426, "learning_rate": 1.3483211678832118e-05, "loss": 0.5257, "step": 13470 }, { "epoch": 0.3933023853318151, "grad_norm": 0.6549502411643827, "learning_rate": 1.348256285482563e-05, "loss": 0.6525, "step": 13471 }, { "epoch": 0.39333158155966247, "grad_norm": 0.689501273132256, "learning_rate": 1.3481914030819142e-05, "loss": 0.7137, "step": 13472 }, { "epoch": 0.39336077778750983, "grad_norm": 0.6664885373576395, "learning_rate": 1.3481265206812652e-05, "loss": 0.6661, "step": 13473 }, { "epoch": 0.3933899740153572, "grad_norm": 0.6161640037598471, "learning_rate": 1.3480616382806164e-05, "loss": 0.5534, "step": 13474 }, { "epoch": 0.39341917024320455, "grad_norm": 0.7227697214402853, "learning_rate": 1.3479967558799676e-05, "loss": 0.6534, "step": 13475 }, { "epoch": 0.3934483664710519, "grad_norm": 0.6551726563100448, "learning_rate": 1.3479318734793188e-05, "loss": 0.6293, "step": 13476 }, { "epoch": 0.3934775626988993, "grad_norm": 0.6545710748288289, "learning_rate": 1.3478669910786698e-05, "loss": 0.6001, "step": 13477 }, { "epoch": 0.3935067589267467, "grad_norm": 0.6571631554234517, "learning_rate": 1.3478021086780212e-05, "loss": 0.633, "step": 13478 }, { "epoch": 0.39353595515459405, "grad_norm": 0.6189009621158269, "learning_rate": 1.3477372262773724e-05, "loss": 0.574, "step": 13479 }, { "epoch": 0.3935651513824414, "grad_norm": 0.6994252716617767, "learning_rate": 1.3476723438767236e-05, "loss": 0.7268, "step": 13480 }, { "epoch": 0.3935943476102888, "grad_norm": 0.6713846080549691, "learning_rate": 1.3476074614760748e-05, "loss": 0.6318, "step": 13481 }, { "epoch": 0.39362354383813614, "grad_norm": 0.6202844270864509, "learning_rate": 1.347542579075426e-05, "loss": 0.5664, "step": 13482 }, { "epoch": 0.3936527400659835, "grad_norm": 0.6349745210130134, "learning_rate": 1.347477696674777e-05, "loss": 0.5646, "step": 13483 }, { "epoch": 0.39368193629383086, "grad_norm": 0.7119171882879627, "learning_rate": 1.3474128142741283e-05, "loss": 0.6823, "step": 13484 }, { "epoch": 0.3937111325216782, "grad_norm": 0.624180082732333, "learning_rate": 1.3473479318734795e-05, "loss": 0.5897, "step": 13485 }, { "epoch": 0.3937403287495256, "grad_norm": 0.6748007081724624, "learning_rate": 1.3472830494728307e-05, "loss": 0.6686, "step": 13486 }, { "epoch": 0.39376952497737294, "grad_norm": 0.6819638529455346, "learning_rate": 1.3472181670721817e-05, "loss": 0.6158, "step": 13487 }, { "epoch": 0.3937987212052203, "grad_norm": 0.7291149217983871, "learning_rate": 1.3471532846715329e-05, "loss": 0.7054, "step": 13488 }, { "epoch": 0.39382791743306766, "grad_norm": 0.6234063632265952, "learning_rate": 1.3470884022708841e-05, "loss": 0.5737, "step": 13489 }, { "epoch": 0.393857113660915, "grad_norm": 0.6687806126979855, "learning_rate": 1.3470235198702353e-05, "loss": 0.6754, "step": 13490 }, { "epoch": 0.3938863098887624, "grad_norm": 0.6605873698953834, "learning_rate": 1.3469586374695865e-05, "loss": 0.5895, "step": 13491 }, { "epoch": 0.39391550611660975, "grad_norm": 0.668687933410736, "learning_rate": 1.3468937550689376e-05, "loss": 0.6332, "step": 13492 }, { "epoch": 0.3939447023444571, "grad_norm": 0.6539536195005283, "learning_rate": 1.3468288726682888e-05, "loss": 0.5875, "step": 13493 }, { "epoch": 0.39397389857230447, "grad_norm": 0.634457668135658, "learning_rate": 1.34676399026764e-05, "loss": 0.6018, "step": 13494 }, { "epoch": 0.39400309480015183, "grad_norm": 0.6134540919978788, "learning_rate": 1.3466991078669912e-05, "loss": 0.5799, "step": 13495 }, { "epoch": 0.3940322910279992, "grad_norm": 0.6615279742667222, "learning_rate": 1.3466342254663422e-05, "loss": 0.6216, "step": 13496 }, { "epoch": 0.39406148725584655, "grad_norm": 0.6846398205950676, "learning_rate": 1.3465693430656936e-05, "loss": 0.7166, "step": 13497 }, { "epoch": 0.3940906834836939, "grad_norm": 0.6314156643483962, "learning_rate": 1.3465044606650448e-05, "loss": 0.5741, "step": 13498 }, { "epoch": 0.3941198797115413, "grad_norm": 0.6151235857781189, "learning_rate": 1.346439578264396e-05, "loss": 0.5575, "step": 13499 }, { "epoch": 0.39414907593938864, "grad_norm": 0.742209968925305, "learning_rate": 1.3463746958637472e-05, "loss": 0.6271, "step": 13500 }, { "epoch": 0.394178272167236, "grad_norm": 0.5718850296733587, "learning_rate": 1.3463098134630984e-05, "loss": 0.5172, "step": 13501 }, { "epoch": 0.39420746839508336, "grad_norm": 0.6863714296604585, "learning_rate": 1.3462449310624494e-05, "loss": 0.7077, "step": 13502 }, { "epoch": 0.3942366646229307, "grad_norm": 0.6400807018474812, "learning_rate": 1.3461800486618006e-05, "loss": 0.5995, "step": 13503 }, { "epoch": 0.3942658608507781, "grad_norm": 0.6550751377814804, "learning_rate": 1.3461151662611518e-05, "loss": 0.6398, "step": 13504 }, { "epoch": 0.39429505707862544, "grad_norm": 0.6904000977727968, "learning_rate": 1.346050283860503e-05, "loss": 0.7186, "step": 13505 }, { "epoch": 0.3943242533064728, "grad_norm": 0.6505912553145415, "learning_rate": 1.345985401459854e-05, "loss": 0.6361, "step": 13506 }, { "epoch": 0.39435344953432017, "grad_norm": 0.6048007625165863, "learning_rate": 1.3459205190592053e-05, "loss": 0.5598, "step": 13507 }, { "epoch": 0.3943826457621675, "grad_norm": 0.5645636068224738, "learning_rate": 1.3458556366585565e-05, "loss": 0.4873, "step": 13508 }, { "epoch": 0.3944118419900149, "grad_norm": 0.6276013834566666, "learning_rate": 1.3457907542579077e-05, "loss": 0.6064, "step": 13509 }, { "epoch": 0.39444103821786225, "grad_norm": 0.6447242275400817, "learning_rate": 1.3457258718572589e-05, "loss": 0.6613, "step": 13510 }, { "epoch": 0.3944702344457096, "grad_norm": 0.601709137069431, "learning_rate": 1.3456609894566099e-05, "loss": 0.5614, "step": 13511 }, { "epoch": 0.39449943067355697, "grad_norm": 0.6332117412448389, "learning_rate": 1.3455961070559611e-05, "loss": 0.5555, "step": 13512 }, { "epoch": 0.39452862690140433, "grad_norm": 0.6332803631840315, "learning_rate": 1.3455312246553123e-05, "loss": 0.6423, "step": 13513 }, { "epoch": 0.3945578231292517, "grad_norm": 0.6879180680039318, "learning_rate": 1.3454663422546635e-05, "loss": 0.7198, "step": 13514 }, { "epoch": 0.39458701935709906, "grad_norm": 0.6380895410370317, "learning_rate": 1.3454014598540145e-05, "loss": 0.6066, "step": 13515 }, { "epoch": 0.3946162155849464, "grad_norm": 0.6290461052283376, "learning_rate": 1.3453365774533659e-05, "loss": 0.5942, "step": 13516 }, { "epoch": 0.3946454118127938, "grad_norm": 0.6793716057605762, "learning_rate": 1.3452716950527171e-05, "loss": 0.6995, "step": 13517 }, { "epoch": 0.39467460804064114, "grad_norm": 0.7294179887620116, "learning_rate": 1.3452068126520683e-05, "loss": 0.7228, "step": 13518 }, { "epoch": 0.3947038042684885, "grad_norm": 0.7156522301265675, "learning_rate": 1.3451419302514195e-05, "loss": 0.7133, "step": 13519 }, { "epoch": 0.39473300049633586, "grad_norm": 0.645153398937899, "learning_rate": 1.3450770478507707e-05, "loss": 0.5886, "step": 13520 }, { "epoch": 0.3947621967241832, "grad_norm": 0.6205247393349042, "learning_rate": 1.3450121654501218e-05, "loss": 0.6156, "step": 13521 }, { "epoch": 0.3947913929520306, "grad_norm": 0.7016609879578417, "learning_rate": 1.344947283049473e-05, "loss": 0.6873, "step": 13522 }, { "epoch": 0.39482058917987795, "grad_norm": 0.6216712686453304, "learning_rate": 1.3448824006488242e-05, "loss": 0.5839, "step": 13523 }, { "epoch": 0.3948497854077253, "grad_norm": 0.5978995430089648, "learning_rate": 1.3448175182481754e-05, "loss": 0.5559, "step": 13524 }, { "epoch": 0.39487898163557267, "grad_norm": 0.6361202698506799, "learning_rate": 1.3447526358475264e-05, "loss": 0.5876, "step": 13525 }, { "epoch": 0.39490817786342003, "grad_norm": 0.6314859482854862, "learning_rate": 1.3446877534468776e-05, "loss": 0.5474, "step": 13526 }, { "epoch": 0.3949373740912674, "grad_norm": 0.7136522887821316, "learning_rate": 1.3446228710462288e-05, "loss": 0.6841, "step": 13527 }, { "epoch": 0.39496657031911475, "grad_norm": 0.6337496605758789, "learning_rate": 1.34455798864558e-05, "loss": 0.6071, "step": 13528 }, { "epoch": 0.3949957665469621, "grad_norm": 0.6082689772981421, "learning_rate": 1.3444931062449312e-05, "loss": 0.5515, "step": 13529 }, { "epoch": 0.3950249627748095, "grad_norm": 0.6520750796891528, "learning_rate": 1.3444282238442822e-05, "loss": 0.6895, "step": 13530 }, { "epoch": 0.39505415900265684, "grad_norm": 0.6443612670145311, "learning_rate": 1.3443633414436334e-05, "loss": 0.6298, "step": 13531 }, { "epoch": 0.3950833552305042, "grad_norm": 0.6287032217786829, "learning_rate": 1.3442984590429846e-05, "loss": 0.5871, "step": 13532 }, { "epoch": 0.39511255145835156, "grad_norm": 0.6387395841650643, "learning_rate": 1.3442335766423358e-05, "loss": 0.573, "step": 13533 }, { "epoch": 0.3951417476861989, "grad_norm": 0.7014843222551237, "learning_rate": 1.3441686942416869e-05, "loss": 0.6644, "step": 13534 }, { "epoch": 0.3951709439140463, "grad_norm": 0.6148661836446908, "learning_rate": 1.3441038118410383e-05, "loss": 0.5446, "step": 13535 }, { "epoch": 0.39520014014189364, "grad_norm": 0.6418820930369956, "learning_rate": 1.3440389294403895e-05, "loss": 0.6384, "step": 13536 }, { "epoch": 0.395229336369741, "grad_norm": 0.6339715026861525, "learning_rate": 1.3439740470397407e-05, "loss": 0.5951, "step": 13537 }, { "epoch": 0.3952585325975884, "grad_norm": 0.7014761993671484, "learning_rate": 1.3439091646390919e-05, "loss": 0.6688, "step": 13538 }, { "epoch": 0.3952877288254358, "grad_norm": 0.652657221822953, "learning_rate": 1.343844282238443e-05, "loss": 0.6449, "step": 13539 }, { "epoch": 0.39531692505328314, "grad_norm": 0.6574161329909354, "learning_rate": 1.3437793998377941e-05, "loss": 0.6205, "step": 13540 }, { "epoch": 0.3953461212811305, "grad_norm": 0.6374859872243495, "learning_rate": 1.3437145174371453e-05, "loss": 0.6139, "step": 13541 }, { "epoch": 0.39537531750897786, "grad_norm": 0.6851251677511375, "learning_rate": 1.3436496350364965e-05, "loss": 0.7196, "step": 13542 }, { "epoch": 0.3954045137368252, "grad_norm": 0.6086257520860482, "learning_rate": 1.3435847526358477e-05, "loss": 0.5357, "step": 13543 }, { "epoch": 0.3954337099646726, "grad_norm": 0.6463583525658579, "learning_rate": 1.3435198702351987e-05, "loss": 0.6662, "step": 13544 }, { "epoch": 0.39546290619251995, "grad_norm": 0.6222588784778201, "learning_rate": 1.34345498783455e-05, "loss": 0.6115, "step": 13545 }, { "epoch": 0.3954921024203673, "grad_norm": 0.619309846340728, "learning_rate": 1.3433901054339011e-05, "loss": 0.5965, "step": 13546 }, { "epoch": 0.39552129864821467, "grad_norm": 0.6342306844059606, "learning_rate": 1.3433252230332523e-05, "loss": 0.5976, "step": 13547 }, { "epoch": 0.39555049487606203, "grad_norm": 0.757042127393381, "learning_rate": 1.3432603406326035e-05, "loss": 0.7305, "step": 13548 }, { "epoch": 0.3955796911039094, "grad_norm": 0.5950302176164435, "learning_rate": 1.3431954582319546e-05, "loss": 0.5505, "step": 13549 }, { "epoch": 0.39560888733175675, "grad_norm": 0.5724075530338546, "learning_rate": 1.3431305758313058e-05, "loss": 0.5527, "step": 13550 }, { "epoch": 0.3956380835596041, "grad_norm": 0.6015246222963049, "learning_rate": 1.343065693430657e-05, "loss": 0.5431, "step": 13551 }, { "epoch": 0.3956672797874515, "grad_norm": 0.6194430387500548, "learning_rate": 1.3430008110300082e-05, "loss": 0.5617, "step": 13552 }, { "epoch": 0.39569647601529884, "grad_norm": 0.7281788467939935, "learning_rate": 1.3429359286293592e-05, "loss": 0.7466, "step": 13553 }, { "epoch": 0.3957256722431462, "grad_norm": 0.6522908658843736, "learning_rate": 1.3428710462287106e-05, "loss": 0.5505, "step": 13554 }, { "epoch": 0.39575486847099356, "grad_norm": 0.6615404188171977, "learning_rate": 1.3428061638280618e-05, "loss": 0.6515, "step": 13555 }, { "epoch": 0.3957840646988409, "grad_norm": 0.5914566721387852, "learning_rate": 1.342741281427413e-05, "loss": 0.5437, "step": 13556 }, { "epoch": 0.3958132609266883, "grad_norm": 0.674824504716867, "learning_rate": 1.3426763990267642e-05, "loss": 0.6454, "step": 13557 }, { "epoch": 0.39584245715453564, "grad_norm": 0.6338122179354113, "learning_rate": 1.3426115166261154e-05, "loss": 0.5938, "step": 13558 }, { "epoch": 0.395871653382383, "grad_norm": 0.6202269862798264, "learning_rate": 1.3425466342254664e-05, "loss": 0.5637, "step": 13559 }, { "epoch": 0.39590084961023037, "grad_norm": 0.6259175439660689, "learning_rate": 1.3424817518248176e-05, "loss": 0.5817, "step": 13560 }, { "epoch": 0.3959300458380777, "grad_norm": 0.6232243196020987, "learning_rate": 1.3424168694241688e-05, "loss": 0.5888, "step": 13561 }, { "epoch": 0.3959592420659251, "grad_norm": 0.6616761872007697, "learning_rate": 1.34235198702352e-05, "loss": 0.5365, "step": 13562 }, { "epoch": 0.39598843829377245, "grad_norm": 0.6777412531818859, "learning_rate": 1.342287104622871e-05, "loss": 0.6536, "step": 13563 }, { "epoch": 0.3960176345216198, "grad_norm": 0.6884812591753418, "learning_rate": 1.3422222222222223e-05, "loss": 0.7144, "step": 13564 }, { "epoch": 0.39604683074946717, "grad_norm": 0.6808640525661177, "learning_rate": 1.3421573398215735e-05, "loss": 0.5718, "step": 13565 }, { "epoch": 0.39607602697731453, "grad_norm": 0.656466400141341, "learning_rate": 1.3420924574209247e-05, "loss": 0.6401, "step": 13566 }, { "epoch": 0.3961052232051619, "grad_norm": 0.6206196275069233, "learning_rate": 1.3420275750202759e-05, "loss": 0.5951, "step": 13567 }, { "epoch": 0.39613441943300925, "grad_norm": 0.6443739526169608, "learning_rate": 1.341962692619627e-05, "loss": 0.6501, "step": 13568 }, { "epoch": 0.3961636156608566, "grad_norm": 0.6700425396337859, "learning_rate": 1.3418978102189781e-05, "loss": 0.6377, "step": 13569 }, { "epoch": 0.396192811888704, "grad_norm": 0.6496511190590679, "learning_rate": 1.3418329278183293e-05, "loss": 0.6218, "step": 13570 }, { "epoch": 0.39622200811655134, "grad_norm": 0.6370832351358983, "learning_rate": 1.3417680454176805e-05, "loss": 0.6072, "step": 13571 }, { "epoch": 0.3962512043443987, "grad_norm": 0.7179371928044956, "learning_rate": 1.3417031630170316e-05, "loss": 0.7338, "step": 13572 }, { "epoch": 0.39628040057224606, "grad_norm": 0.6069485237860631, "learning_rate": 1.341638280616383e-05, "loss": 0.5605, "step": 13573 }, { "epoch": 0.3963095968000934, "grad_norm": 0.7310481703144601, "learning_rate": 1.3415733982157341e-05, "loss": 0.6977, "step": 13574 }, { "epoch": 0.3963387930279408, "grad_norm": 0.631295084740036, "learning_rate": 1.3415085158150853e-05, "loss": 0.6303, "step": 13575 }, { "epoch": 0.39636798925578814, "grad_norm": 0.6836064435918683, "learning_rate": 1.3414436334144365e-05, "loss": 0.6589, "step": 13576 }, { "epoch": 0.3963971854836355, "grad_norm": 0.6121919964628786, "learning_rate": 1.3413787510137877e-05, "loss": 0.5721, "step": 13577 }, { "epoch": 0.39642638171148287, "grad_norm": 0.6962572419820636, "learning_rate": 1.3413138686131388e-05, "loss": 0.6991, "step": 13578 }, { "epoch": 0.39645557793933023, "grad_norm": 0.6143137342045133, "learning_rate": 1.34124898621249e-05, "loss": 0.548, "step": 13579 }, { "epoch": 0.3964847741671776, "grad_norm": 0.6663801046522063, "learning_rate": 1.3411841038118412e-05, "loss": 0.6584, "step": 13580 }, { "epoch": 0.39651397039502495, "grad_norm": 0.6495959242472411, "learning_rate": 1.3411192214111924e-05, "loss": 0.6192, "step": 13581 }, { "epoch": 0.3965431666228723, "grad_norm": 0.6421473915150796, "learning_rate": 1.3410543390105434e-05, "loss": 0.5693, "step": 13582 }, { "epoch": 0.3965723628507197, "grad_norm": 0.6172358885162427, "learning_rate": 1.3409894566098946e-05, "loss": 0.5799, "step": 13583 }, { "epoch": 0.39660155907856703, "grad_norm": 0.7017497814908809, "learning_rate": 1.3409245742092458e-05, "loss": 0.6545, "step": 13584 }, { "epoch": 0.3966307553064144, "grad_norm": 0.6853692210145058, "learning_rate": 1.340859691808597e-05, "loss": 0.6856, "step": 13585 }, { "epoch": 0.39665995153426176, "grad_norm": 0.8241075679841016, "learning_rate": 1.340794809407948e-05, "loss": 0.6417, "step": 13586 }, { "epoch": 0.3966891477621091, "grad_norm": 0.6302263311624869, "learning_rate": 1.3407299270072993e-05, "loss": 0.5928, "step": 13587 }, { "epoch": 0.3967183439899565, "grad_norm": 0.6584449019363783, "learning_rate": 1.3406650446066505e-05, "loss": 0.6497, "step": 13588 }, { "epoch": 0.39674754021780384, "grad_norm": 0.6438042871809728, "learning_rate": 1.3406001622060017e-05, "loss": 0.6333, "step": 13589 }, { "epoch": 0.3967767364456512, "grad_norm": 0.6123706134879952, "learning_rate": 1.3405352798053529e-05, "loss": 0.5624, "step": 13590 }, { "epoch": 0.39680593267349856, "grad_norm": 0.6029241098987137, "learning_rate": 1.3404703974047042e-05, "loss": 0.5811, "step": 13591 }, { "epoch": 0.3968351289013459, "grad_norm": 0.6785456327482409, "learning_rate": 1.3404055150040553e-05, "loss": 0.7348, "step": 13592 }, { "epoch": 0.3968643251291933, "grad_norm": 0.6724296371333355, "learning_rate": 1.3403406326034065e-05, "loss": 0.6418, "step": 13593 }, { "epoch": 0.39689352135704065, "grad_norm": 0.6279548753411771, "learning_rate": 1.3402757502027577e-05, "loss": 0.5938, "step": 13594 }, { "epoch": 0.396922717584888, "grad_norm": 0.6732307962400929, "learning_rate": 1.3402108678021089e-05, "loss": 0.6813, "step": 13595 }, { "epoch": 0.39695191381273537, "grad_norm": 0.604702904869838, "learning_rate": 1.3401459854014601e-05, "loss": 0.5715, "step": 13596 }, { "epoch": 0.39698111004058273, "grad_norm": 0.6560164074488635, "learning_rate": 1.3400811030008111e-05, "loss": 0.6243, "step": 13597 }, { "epoch": 0.39701030626843015, "grad_norm": 0.6550270429313624, "learning_rate": 1.3400162206001623e-05, "loss": 0.6439, "step": 13598 }, { "epoch": 0.3970395024962775, "grad_norm": 0.6043505886207657, "learning_rate": 1.3399513381995135e-05, "loss": 0.5664, "step": 13599 }, { "epoch": 0.39706869872412487, "grad_norm": 0.6707147115572932, "learning_rate": 1.3398864557988647e-05, "loss": 0.6537, "step": 13600 }, { "epoch": 0.39709789495197223, "grad_norm": 0.6875408811134339, "learning_rate": 1.3398215733982158e-05, "loss": 0.7143, "step": 13601 }, { "epoch": 0.3971270911798196, "grad_norm": 0.6069327302823728, "learning_rate": 1.339756690997567e-05, "loss": 0.5818, "step": 13602 }, { "epoch": 0.39715628740766695, "grad_norm": 0.6267721096780268, "learning_rate": 1.3396918085969182e-05, "loss": 0.5768, "step": 13603 }, { "epoch": 0.3971854836355143, "grad_norm": 0.6684528648220972, "learning_rate": 1.3396269261962694e-05, "loss": 0.6505, "step": 13604 }, { "epoch": 0.3972146798633617, "grad_norm": 0.6202565998370432, "learning_rate": 1.3395620437956204e-05, "loss": 0.5685, "step": 13605 }, { "epoch": 0.39724387609120904, "grad_norm": 0.7843553136551875, "learning_rate": 1.3394971613949716e-05, "loss": 0.6709, "step": 13606 }, { "epoch": 0.3972730723190564, "grad_norm": 0.6881045841544564, "learning_rate": 1.3394322789943228e-05, "loss": 0.7127, "step": 13607 }, { "epoch": 0.39730226854690376, "grad_norm": 0.6695432258037836, "learning_rate": 1.339367396593674e-05, "loss": 0.6755, "step": 13608 }, { "epoch": 0.3973314647747511, "grad_norm": 0.6403939604955972, "learning_rate": 1.3393025141930252e-05, "loss": 0.5905, "step": 13609 }, { "epoch": 0.3973606610025985, "grad_norm": 0.6377376180392154, "learning_rate": 1.3392376317923766e-05, "loss": 0.6035, "step": 13610 }, { "epoch": 0.39738985723044584, "grad_norm": 0.6910986140833185, "learning_rate": 1.3391727493917276e-05, "loss": 0.7286, "step": 13611 }, { "epoch": 0.3974190534582932, "grad_norm": 0.650803512956411, "learning_rate": 1.3391078669910788e-05, "loss": 0.6115, "step": 13612 }, { "epoch": 0.39744824968614056, "grad_norm": 0.5952702525212145, "learning_rate": 1.33904298459043e-05, "loss": 0.5634, "step": 13613 }, { "epoch": 0.3974774459139879, "grad_norm": 0.6183914614887358, "learning_rate": 1.3389781021897812e-05, "loss": 0.5582, "step": 13614 }, { "epoch": 0.3975066421418353, "grad_norm": 0.6839642842626811, "learning_rate": 1.3389132197891324e-05, "loss": 0.7014, "step": 13615 }, { "epoch": 0.39753583836968265, "grad_norm": 0.6321029335904433, "learning_rate": 1.3388483373884835e-05, "loss": 0.6062, "step": 13616 }, { "epoch": 0.39756503459753, "grad_norm": 0.6218718398695612, "learning_rate": 1.3387834549878347e-05, "loss": 0.5523, "step": 13617 }, { "epoch": 0.39759423082537737, "grad_norm": 0.6736782851174925, "learning_rate": 1.3387185725871859e-05, "loss": 0.7067, "step": 13618 }, { "epoch": 0.39762342705322473, "grad_norm": 0.6721963272979716, "learning_rate": 1.338653690186537e-05, "loss": 0.6479, "step": 13619 }, { "epoch": 0.3976526232810721, "grad_norm": 0.6173019562477811, "learning_rate": 1.3385888077858881e-05, "loss": 0.5464, "step": 13620 }, { "epoch": 0.39768181950891945, "grad_norm": 0.6051631898185754, "learning_rate": 1.3385239253852393e-05, "loss": 0.5313, "step": 13621 }, { "epoch": 0.3977110157367668, "grad_norm": 0.6106655425118092, "learning_rate": 1.3384590429845905e-05, "loss": 0.59, "step": 13622 }, { "epoch": 0.3977402119646142, "grad_norm": 0.6843399442422061, "learning_rate": 1.3383941605839417e-05, "loss": 0.5685, "step": 13623 }, { "epoch": 0.39776940819246154, "grad_norm": 0.6681461533005952, "learning_rate": 1.3383292781832928e-05, "loss": 0.6467, "step": 13624 }, { "epoch": 0.3977986044203089, "grad_norm": 0.6666394918232512, "learning_rate": 1.338264395782644e-05, "loss": 0.6174, "step": 13625 }, { "epoch": 0.39782780064815626, "grad_norm": 0.6371786507685082, "learning_rate": 1.3381995133819952e-05, "loss": 0.6284, "step": 13626 }, { "epoch": 0.3978569968760036, "grad_norm": 0.6244941037552978, "learning_rate": 1.3381346309813464e-05, "loss": 0.6191, "step": 13627 }, { "epoch": 0.397886193103851, "grad_norm": 0.6264394268853448, "learning_rate": 1.3380697485806976e-05, "loss": 0.5757, "step": 13628 }, { "epoch": 0.39791538933169834, "grad_norm": 0.7015370023081963, "learning_rate": 1.338004866180049e-05, "loss": 0.6828, "step": 13629 }, { "epoch": 0.3979445855595457, "grad_norm": 0.7656578083117556, "learning_rate": 1.3379399837794e-05, "loss": 0.6253, "step": 13630 }, { "epoch": 0.39797378178739307, "grad_norm": 0.6626090370884499, "learning_rate": 1.3378751013787512e-05, "loss": 0.5951, "step": 13631 }, { "epoch": 0.3980029780152404, "grad_norm": 0.6317841223264307, "learning_rate": 1.3378102189781024e-05, "loss": 0.6167, "step": 13632 }, { "epoch": 0.3980321742430878, "grad_norm": 0.7450234742230025, "learning_rate": 1.3377453365774536e-05, "loss": 0.688, "step": 13633 }, { "epoch": 0.39806137047093515, "grad_norm": 0.6180418243759794, "learning_rate": 1.3376804541768048e-05, "loss": 0.6072, "step": 13634 }, { "epoch": 0.3980905666987825, "grad_norm": 0.628469595310685, "learning_rate": 1.3376155717761558e-05, "loss": 0.5985, "step": 13635 }, { "epoch": 0.39811976292662987, "grad_norm": 0.65600025316935, "learning_rate": 1.337550689375507e-05, "loss": 0.6793, "step": 13636 }, { "epoch": 0.39814895915447723, "grad_norm": 0.6169330363105094, "learning_rate": 1.3374858069748582e-05, "loss": 0.5663, "step": 13637 }, { "epoch": 0.3981781553823246, "grad_norm": 0.647945666838494, "learning_rate": 1.3374209245742094e-05, "loss": 0.6493, "step": 13638 }, { "epoch": 0.39820735161017196, "grad_norm": 0.6713007726120499, "learning_rate": 1.3373560421735605e-05, "loss": 0.6398, "step": 13639 }, { "epoch": 0.3982365478380193, "grad_norm": 0.6650982309624887, "learning_rate": 1.3372911597729117e-05, "loss": 0.6534, "step": 13640 }, { "epoch": 0.3982657440658667, "grad_norm": 0.6872127040900321, "learning_rate": 1.3372262773722629e-05, "loss": 0.7167, "step": 13641 }, { "epoch": 0.39829494029371404, "grad_norm": 0.6533241196468829, "learning_rate": 1.337161394971614e-05, "loss": 0.5792, "step": 13642 }, { "epoch": 0.3983241365215614, "grad_norm": 0.7074324632441488, "learning_rate": 1.3370965125709651e-05, "loss": 0.7134, "step": 13643 }, { "epoch": 0.39835333274940876, "grad_norm": 0.6636577886350689, "learning_rate": 1.3370316301703163e-05, "loss": 0.6377, "step": 13644 }, { "epoch": 0.3983825289772561, "grad_norm": 0.6502077346299466, "learning_rate": 1.3369667477696675e-05, "loss": 0.6636, "step": 13645 }, { "epoch": 0.3984117252051035, "grad_norm": 0.595663362190761, "learning_rate": 1.3369018653690187e-05, "loss": 0.5667, "step": 13646 }, { "epoch": 0.39844092143295085, "grad_norm": 0.7278426983972195, "learning_rate": 1.3368369829683699e-05, "loss": 0.6488, "step": 13647 }, { "epoch": 0.3984701176607982, "grad_norm": 0.7152178031170704, "learning_rate": 1.3367721005677213e-05, "loss": 0.7371, "step": 13648 }, { "epoch": 0.39849931388864557, "grad_norm": 0.642100215240746, "learning_rate": 1.3367072181670723e-05, "loss": 0.6372, "step": 13649 }, { "epoch": 0.39852851011649293, "grad_norm": 0.6443913567476276, "learning_rate": 1.3366423357664235e-05, "loss": 0.6299, "step": 13650 }, { "epoch": 0.3985577063443403, "grad_norm": 0.6381568189343617, "learning_rate": 1.3365774533657747e-05, "loss": 0.5863, "step": 13651 }, { "epoch": 0.39858690257218765, "grad_norm": 0.64494434255423, "learning_rate": 1.336512570965126e-05, "loss": 0.6016, "step": 13652 }, { "epoch": 0.398616098800035, "grad_norm": 0.6701011945706088, "learning_rate": 1.3364476885644771e-05, "loss": 0.6665, "step": 13653 }, { "epoch": 0.3986452950278824, "grad_norm": 0.7503556230649964, "learning_rate": 1.3363828061638282e-05, "loss": 0.722, "step": 13654 }, { "epoch": 0.39867449125572973, "grad_norm": 0.5912975379578618, "learning_rate": 1.3363179237631794e-05, "loss": 0.5476, "step": 13655 }, { "epoch": 0.3987036874835771, "grad_norm": 0.6758346264324504, "learning_rate": 1.3362530413625306e-05, "loss": 0.6818, "step": 13656 }, { "epoch": 0.39873288371142446, "grad_norm": 0.6770785541883731, "learning_rate": 1.3361881589618818e-05, "loss": 0.6932, "step": 13657 }, { "epoch": 0.3987620799392718, "grad_norm": 0.6861570843368, "learning_rate": 1.3361232765612328e-05, "loss": 0.5998, "step": 13658 }, { "epoch": 0.39879127616711924, "grad_norm": 0.5984999550820445, "learning_rate": 1.336058394160584e-05, "loss": 0.5284, "step": 13659 }, { "epoch": 0.3988204723949666, "grad_norm": 0.6286631812937615, "learning_rate": 1.3359935117599352e-05, "loss": 0.5766, "step": 13660 }, { "epoch": 0.39884966862281396, "grad_norm": 0.6705767145428218, "learning_rate": 1.3359286293592864e-05, "loss": 0.6604, "step": 13661 }, { "epoch": 0.3988788648506613, "grad_norm": 0.6645312583436244, "learning_rate": 1.3358637469586374e-05, "loss": 0.684, "step": 13662 }, { "epoch": 0.3989080610785087, "grad_norm": 0.6464500201100377, "learning_rate": 1.3357988645579886e-05, "loss": 0.6097, "step": 13663 }, { "epoch": 0.39893725730635604, "grad_norm": 0.7334734387169822, "learning_rate": 1.3357339821573398e-05, "loss": 0.7022, "step": 13664 }, { "epoch": 0.3989664535342034, "grad_norm": 0.6803657174164637, "learning_rate": 1.335669099756691e-05, "loss": 0.6573, "step": 13665 }, { "epoch": 0.39899564976205076, "grad_norm": 0.6128932595964266, "learning_rate": 1.3356042173560423e-05, "loss": 0.5951, "step": 13666 }, { "epoch": 0.3990248459898981, "grad_norm": 0.6459485886891646, "learning_rate": 1.3355393349553936e-05, "loss": 0.6271, "step": 13667 }, { "epoch": 0.3990540422177455, "grad_norm": 0.6213348631563378, "learning_rate": 1.3354744525547447e-05, "loss": 0.5563, "step": 13668 }, { "epoch": 0.39908323844559285, "grad_norm": 0.6703289306576871, "learning_rate": 1.3354095701540959e-05, "loss": 0.6034, "step": 13669 }, { "epoch": 0.3991124346734402, "grad_norm": 0.6801100145547434, "learning_rate": 1.335344687753447e-05, "loss": 0.6609, "step": 13670 }, { "epoch": 0.39914163090128757, "grad_norm": 0.6480934020664613, "learning_rate": 1.3352798053527983e-05, "loss": 0.6324, "step": 13671 }, { "epoch": 0.39917082712913493, "grad_norm": 0.6820664037586439, "learning_rate": 1.3352149229521495e-05, "loss": 0.6709, "step": 13672 }, { "epoch": 0.3992000233569823, "grad_norm": 0.6044781523454341, "learning_rate": 1.3351500405515005e-05, "loss": 0.5397, "step": 13673 }, { "epoch": 0.39922921958482965, "grad_norm": 0.6841456030055892, "learning_rate": 1.3350851581508517e-05, "loss": 0.72, "step": 13674 }, { "epoch": 0.399258415812677, "grad_norm": 0.6327272510681523, "learning_rate": 1.3350202757502029e-05, "loss": 0.5982, "step": 13675 }, { "epoch": 0.3992876120405244, "grad_norm": 0.6558994939832745, "learning_rate": 1.3349553933495541e-05, "loss": 0.6409, "step": 13676 }, { "epoch": 0.39931680826837174, "grad_norm": 0.6822600151725317, "learning_rate": 1.3348905109489051e-05, "loss": 0.6739, "step": 13677 }, { "epoch": 0.3993460044962191, "grad_norm": 0.6277619584168158, "learning_rate": 1.3348256285482563e-05, "loss": 0.5477, "step": 13678 }, { "epoch": 0.39937520072406646, "grad_norm": 0.7156665671151855, "learning_rate": 1.3347607461476075e-05, "loss": 0.6555, "step": 13679 }, { "epoch": 0.3994043969519138, "grad_norm": 0.6740231077779044, "learning_rate": 1.3346958637469587e-05, "loss": 0.6519, "step": 13680 }, { "epoch": 0.3994335931797612, "grad_norm": 0.6510366188174905, "learning_rate": 1.3346309813463098e-05, "loss": 0.6474, "step": 13681 }, { "epoch": 0.39946278940760854, "grad_norm": 0.6542563843575225, "learning_rate": 1.334566098945661e-05, "loss": 0.6981, "step": 13682 }, { "epoch": 0.3994919856354559, "grad_norm": 0.6510707162919572, "learning_rate": 1.3345012165450122e-05, "loss": 0.6132, "step": 13683 }, { "epoch": 0.39952118186330327, "grad_norm": 0.7051775459594646, "learning_rate": 1.3344363341443634e-05, "loss": 0.7428, "step": 13684 }, { "epoch": 0.3995503780911506, "grad_norm": 0.688165081884064, "learning_rate": 1.3343714517437146e-05, "loss": 0.6268, "step": 13685 }, { "epoch": 0.399579574318998, "grad_norm": 0.6428174924252299, "learning_rate": 1.334306569343066e-05, "loss": 0.6349, "step": 13686 }, { "epoch": 0.39960877054684535, "grad_norm": 0.6331253550646216, "learning_rate": 1.334241686942417e-05, "loss": 0.6192, "step": 13687 }, { "epoch": 0.3996379667746927, "grad_norm": 0.6484025445453662, "learning_rate": 1.3341768045417682e-05, "loss": 0.6435, "step": 13688 }, { "epoch": 0.39966716300254007, "grad_norm": 0.6182023646784107, "learning_rate": 1.3341119221411194e-05, "loss": 0.599, "step": 13689 }, { "epoch": 0.39969635923038743, "grad_norm": 0.6481886776556743, "learning_rate": 1.3340470397404706e-05, "loss": 0.6002, "step": 13690 }, { "epoch": 0.3997255554582348, "grad_norm": 0.6465993690360896, "learning_rate": 1.3339821573398218e-05, "loss": 0.6501, "step": 13691 }, { "epoch": 0.39975475168608215, "grad_norm": 0.6427764970093031, "learning_rate": 1.3339172749391728e-05, "loss": 0.6099, "step": 13692 }, { "epoch": 0.3997839479139295, "grad_norm": 0.6454263424514513, "learning_rate": 1.333852392538524e-05, "loss": 0.5928, "step": 13693 }, { "epoch": 0.3998131441417769, "grad_norm": 0.6046792773145194, "learning_rate": 1.3337875101378752e-05, "loss": 0.57, "step": 13694 }, { "epoch": 0.39984234036962424, "grad_norm": 0.67398836169309, "learning_rate": 1.3337226277372265e-05, "loss": 0.6426, "step": 13695 }, { "epoch": 0.3998715365974716, "grad_norm": 0.6857451828058007, "learning_rate": 1.3336577453365775e-05, "loss": 0.6216, "step": 13696 }, { "epoch": 0.39990073282531896, "grad_norm": 0.6185093098053449, "learning_rate": 1.3335928629359287e-05, "loss": 0.5694, "step": 13697 }, { "epoch": 0.3999299290531663, "grad_norm": 0.6101036635914574, "learning_rate": 1.3335279805352799e-05, "loss": 0.5758, "step": 13698 }, { "epoch": 0.3999591252810137, "grad_norm": 0.6116442041071005, "learning_rate": 1.3334630981346311e-05, "loss": 0.6098, "step": 13699 }, { "epoch": 0.39998832150886104, "grad_norm": 0.6593695653945795, "learning_rate": 1.3333982157339821e-05, "loss": 0.6396, "step": 13700 }, { "epoch": 0.4000175177367084, "grad_norm": 0.6522861955139897, "learning_rate": 1.3333333333333333e-05, "loss": 0.6628, "step": 13701 }, { "epoch": 0.40004671396455577, "grad_norm": 0.7042143204767254, "learning_rate": 1.3332684509326845e-05, "loss": 0.6364, "step": 13702 }, { "epoch": 0.40007591019240313, "grad_norm": 0.6637328789434656, "learning_rate": 1.3332035685320357e-05, "loss": 0.6011, "step": 13703 }, { "epoch": 0.4001051064202505, "grad_norm": 0.6664968878861125, "learning_rate": 1.333138686131387e-05, "loss": 0.5771, "step": 13704 }, { "epoch": 0.40013430264809785, "grad_norm": 0.610900160838713, "learning_rate": 1.3330738037307383e-05, "loss": 0.5647, "step": 13705 }, { "epoch": 0.4001634988759452, "grad_norm": 0.6690515349523914, "learning_rate": 1.3330089213300893e-05, "loss": 0.631, "step": 13706 }, { "epoch": 0.4001926951037926, "grad_norm": 0.6545599451306594, "learning_rate": 1.3329440389294405e-05, "loss": 0.632, "step": 13707 }, { "epoch": 0.40022189133163993, "grad_norm": 0.6528014737127594, "learning_rate": 1.3328791565287917e-05, "loss": 0.6327, "step": 13708 }, { "epoch": 0.4002510875594873, "grad_norm": 0.6590544110047211, "learning_rate": 1.332814274128143e-05, "loss": 0.6414, "step": 13709 }, { "epoch": 0.40028028378733466, "grad_norm": 0.6839728996267826, "learning_rate": 1.332749391727494e-05, "loss": 0.7211, "step": 13710 }, { "epoch": 0.400309480015182, "grad_norm": 0.6378202264681405, "learning_rate": 1.3326845093268452e-05, "loss": 0.5791, "step": 13711 }, { "epoch": 0.4003386762430294, "grad_norm": 0.6574178020484714, "learning_rate": 1.3326196269261964e-05, "loss": 0.5828, "step": 13712 }, { "epoch": 0.40036787247087674, "grad_norm": 0.6811277461022647, "learning_rate": 1.3325547445255476e-05, "loss": 0.6642, "step": 13713 }, { "epoch": 0.4003970686987241, "grad_norm": 0.6312233355042363, "learning_rate": 1.3324898621248988e-05, "loss": 0.6104, "step": 13714 }, { "epoch": 0.40042626492657146, "grad_norm": 0.6401976435638745, "learning_rate": 1.3324249797242498e-05, "loss": 0.5953, "step": 13715 }, { "epoch": 0.4004554611544188, "grad_norm": 0.6577523312077884, "learning_rate": 1.332360097323601e-05, "loss": 0.6235, "step": 13716 }, { "epoch": 0.4004846573822662, "grad_norm": 0.6472195713097942, "learning_rate": 1.3322952149229522e-05, "loss": 0.5701, "step": 13717 }, { "epoch": 0.40051385361011355, "grad_norm": 0.5741739545753849, "learning_rate": 1.3322303325223034e-05, "loss": 0.4838, "step": 13718 }, { "epoch": 0.40054304983796096, "grad_norm": 0.6718859759318522, "learning_rate": 1.3321654501216545e-05, "loss": 0.6773, "step": 13719 }, { "epoch": 0.4005722460658083, "grad_norm": 0.5875374960633761, "learning_rate": 1.3321005677210057e-05, "loss": 0.4991, "step": 13720 }, { "epoch": 0.4006014422936557, "grad_norm": 0.7193640654717285, "learning_rate": 1.3320356853203569e-05, "loss": 0.755, "step": 13721 }, { "epoch": 0.40063063852150305, "grad_norm": 0.6564159756589044, "learning_rate": 1.331970802919708e-05, "loss": 0.6153, "step": 13722 }, { "epoch": 0.4006598347493504, "grad_norm": 0.6362908459332952, "learning_rate": 1.3319059205190593e-05, "loss": 0.6102, "step": 13723 }, { "epoch": 0.40068903097719777, "grad_norm": 0.640072757813265, "learning_rate": 1.3318410381184107e-05, "loss": 0.6361, "step": 13724 }, { "epoch": 0.40071822720504513, "grad_norm": 0.6689211233489666, "learning_rate": 1.3317761557177617e-05, "loss": 0.6705, "step": 13725 }, { "epoch": 0.4007474234328925, "grad_norm": 0.6238517211433201, "learning_rate": 1.3317112733171129e-05, "loss": 0.5581, "step": 13726 }, { "epoch": 0.40077661966073985, "grad_norm": 0.6266030787181889, "learning_rate": 1.3316463909164641e-05, "loss": 0.6215, "step": 13727 }, { "epoch": 0.4008058158885872, "grad_norm": 0.6638072311853308, "learning_rate": 1.3315815085158153e-05, "loss": 0.6632, "step": 13728 }, { "epoch": 0.4008350121164346, "grad_norm": 0.7386109475587826, "learning_rate": 1.3315166261151663e-05, "loss": 0.7021, "step": 13729 }, { "epoch": 0.40086420834428194, "grad_norm": 0.6520182623787939, "learning_rate": 1.3314517437145175e-05, "loss": 0.6327, "step": 13730 }, { "epoch": 0.4008934045721293, "grad_norm": 0.6740033798098668, "learning_rate": 1.3313868613138687e-05, "loss": 0.6457, "step": 13731 }, { "epoch": 0.40092260079997666, "grad_norm": 0.6447152382581823, "learning_rate": 1.33132197891322e-05, "loss": 0.6129, "step": 13732 }, { "epoch": 0.400951797027824, "grad_norm": 0.579985706723616, "learning_rate": 1.3312570965125711e-05, "loss": 0.5188, "step": 13733 }, { "epoch": 0.4009809932556714, "grad_norm": 0.6668852884596802, "learning_rate": 1.3311922141119222e-05, "loss": 0.6404, "step": 13734 }, { "epoch": 0.40101018948351874, "grad_norm": 0.6369391626746733, "learning_rate": 1.3311273317112734e-05, "loss": 0.6014, "step": 13735 }, { "epoch": 0.4010393857113661, "grad_norm": 0.6596502083215083, "learning_rate": 1.3310624493106246e-05, "loss": 0.6552, "step": 13736 }, { "epoch": 0.40106858193921346, "grad_norm": 0.6361523322721104, "learning_rate": 1.3309975669099758e-05, "loss": 0.6065, "step": 13737 }, { "epoch": 0.4010977781670608, "grad_norm": 0.6749440783895229, "learning_rate": 1.3309326845093268e-05, "loss": 0.631, "step": 13738 }, { "epoch": 0.4011269743949082, "grad_norm": 0.6392531129368201, "learning_rate": 1.330867802108678e-05, "loss": 0.5508, "step": 13739 }, { "epoch": 0.40115617062275555, "grad_norm": 0.616125163704791, "learning_rate": 1.3308029197080292e-05, "loss": 0.5609, "step": 13740 }, { "epoch": 0.4011853668506029, "grad_norm": 0.6141606979683897, "learning_rate": 1.3307380373073804e-05, "loss": 0.5465, "step": 13741 }, { "epoch": 0.40121456307845027, "grad_norm": 0.6611272283351357, "learning_rate": 1.3306731549067318e-05, "loss": 0.6475, "step": 13742 }, { "epoch": 0.40124375930629763, "grad_norm": 0.5924775619696592, "learning_rate": 1.330608272506083e-05, "loss": 0.5354, "step": 13743 }, { "epoch": 0.401272955534145, "grad_norm": 0.6387565645966932, "learning_rate": 1.330543390105434e-05, "loss": 0.5857, "step": 13744 }, { "epoch": 0.40130215176199235, "grad_norm": 0.7392550882856269, "learning_rate": 1.3304785077047852e-05, "loss": 0.666, "step": 13745 }, { "epoch": 0.4013313479898397, "grad_norm": 0.6886932292905844, "learning_rate": 1.3304136253041364e-05, "loss": 0.6994, "step": 13746 }, { "epoch": 0.4013605442176871, "grad_norm": 0.6776974031822266, "learning_rate": 1.3303487429034876e-05, "loss": 0.6978, "step": 13747 }, { "epoch": 0.40138974044553444, "grad_norm": 0.6193579304457878, "learning_rate": 1.3302838605028387e-05, "loss": 0.6029, "step": 13748 }, { "epoch": 0.4014189366733818, "grad_norm": 0.7356869154758812, "learning_rate": 1.3302189781021899e-05, "loss": 0.772, "step": 13749 }, { "epoch": 0.40144813290122916, "grad_norm": 0.7111149340390419, "learning_rate": 1.330154095701541e-05, "loss": 0.7033, "step": 13750 }, { "epoch": 0.4014773291290765, "grad_norm": 0.6846412081700427, "learning_rate": 1.3300892133008923e-05, "loss": 0.6642, "step": 13751 }, { "epoch": 0.4015065253569239, "grad_norm": 0.6049362345235204, "learning_rate": 1.3300243309002435e-05, "loss": 0.5697, "step": 13752 }, { "epoch": 0.40153572158477124, "grad_norm": 0.6708001543748588, "learning_rate": 1.3299594484995945e-05, "loss": 0.6652, "step": 13753 }, { "epoch": 0.4015649178126186, "grad_norm": 0.6137133093834325, "learning_rate": 1.3298945660989457e-05, "loss": 0.5919, "step": 13754 }, { "epoch": 0.40159411404046597, "grad_norm": 0.6608007276595894, "learning_rate": 1.329829683698297e-05, "loss": 0.6627, "step": 13755 }, { "epoch": 0.4016233102683133, "grad_norm": 0.5847327868410491, "learning_rate": 1.3297648012976481e-05, "loss": 0.5459, "step": 13756 }, { "epoch": 0.4016525064961607, "grad_norm": 0.5705909946043111, "learning_rate": 1.3296999188969992e-05, "loss": 0.4895, "step": 13757 }, { "epoch": 0.40168170272400805, "grad_norm": 0.6355054921205202, "learning_rate": 1.3296350364963504e-05, "loss": 0.5854, "step": 13758 }, { "epoch": 0.4017108989518554, "grad_norm": 0.6320566888688898, "learning_rate": 1.3295701540957016e-05, "loss": 0.6152, "step": 13759 }, { "epoch": 0.40174009517970277, "grad_norm": 0.6240579711973552, "learning_rate": 1.3295052716950528e-05, "loss": 0.5986, "step": 13760 }, { "epoch": 0.40176929140755013, "grad_norm": 0.6147950045269215, "learning_rate": 1.3294403892944041e-05, "loss": 0.5464, "step": 13761 }, { "epoch": 0.4017984876353975, "grad_norm": 0.6297484285571958, "learning_rate": 1.3293755068937553e-05, "loss": 0.604, "step": 13762 }, { "epoch": 0.40182768386324486, "grad_norm": 0.6564966356788196, "learning_rate": 1.3293106244931064e-05, "loss": 0.5401, "step": 13763 }, { "epoch": 0.4018568800910922, "grad_norm": 0.7726820374373842, "learning_rate": 1.3292457420924576e-05, "loss": 0.6944, "step": 13764 }, { "epoch": 0.4018860763189396, "grad_norm": 0.6442579193011121, "learning_rate": 1.3291808596918088e-05, "loss": 0.6262, "step": 13765 }, { "epoch": 0.40191527254678694, "grad_norm": 0.6578112992017974, "learning_rate": 1.32911597729116e-05, "loss": 0.6305, "step": 13766 }, { "epoch": 0.4019444687746343, "grad_norm": 0.7074240319280052, "learning_rate": 1.329051094890511e-05, "loss": 0.721, "step": 13767 }, { "epoch": 0.40197366500248166, "grad_norm": 0.6410695368111254, "learning_rate": 1.3289862124898622e-05, "loss": 0.614, "step": 13768 }, { "epoch": 0.402002861230329, "grad_norm": 0.6419835858025225, "learning_rate": 1.3289213300892134e-05, "loss": 0.5724, "step": 13769 }, { "epoch": 0.4020320574581764, "grad_norm": 0.592909234928487, "learning_rate": 1.3288564476885646e-05, "loss": 0.5318, "step": 13770 }, { "epoch": 0.40206125368602375, "grad_norm": 0.6541679837862014, "learning_rate": 1.3287915652879158e-05, "loss": 0.6441, "step": 13771 }, { "epoch": 0.4020904499138711, "grad_norm": 0.6868004802627874, "learning_rate": 1.3287266828872669e-05, "loss": 0.615, "step": 13772 }, { "epoch": 0.40211964614171847, "grad_norm": 0.6573815579776164, "learning_rate": 1.328661800486618e-05, "loss": 0.6397, "step": 13773 }, { "epoch": 0.40214884236956583, "grad_norm": 0.638988239164802, "learning_rate": 1.3285969180859693e-05, "loss": 0.6092, "step": 13774 }, { "epoch": 0.4021780385974132, "grad_norm": 0.6465208082024132, "learning_rate": 1.3285320356853205e-05, "loss": 0.6656, "step": 13775 }, { "epoch": 0.40220723482526055, "grad_norm": 0.7285783569525768, "learning_rate": 1.3284671532846715e-05, "loss": 0.721, "step": 13776 }, { "epoch": 0.4022364310531079, "grad_norm": 0.6593310054236075, "learning_rate": 1.3284022708840227e-05, "loss": 0.6037, "step": 13777 }, { "epoch": 0.4022656272809553, "grad_norm": 0.6680724839511367, "learning_rate": 1.3283373884833739e-05, "loss": 0.6503, "step": 13778 }, { "epoch": 0.4022948235088027, "grad_norm": 0.6722961354249236, "learning_rate": 1.3282725060827251e-05, "loss": 0.6694, "step": 13779 }, { "epoch": 0.40232401973665005, "grad_norm": 0.6104914248490588, "learning_rate": 1.3282076236820765e-05, "loss": 0.558, "step": 13780 }, { "epoch": 0.4023532159644974, "grad_norm": 0.6672478554172914, "learning_rate": 1.3281427412814277e-05, "loss": 0.6615, "step": 13781 }, { "epoch": 0.4023824121923448, "grad_norm": 0.6612662851561388, "learning_rate": 1.3280778588807787e-05, "loss": 0.6506, "step": 13782 }, { "epoch": 0.40241160842019214, "grad_norm": 0.6190062613195777, "learning_rate": 1.32801297648013e-05, "loss": 0.5941, "step": 13783 }, { "epoch": 0.4024408046480395, "grad_norm": 0.6983382786789709, "learning_rate": 1.3279480940794811e-05, "loss": 0.6393, "step": 13784 }, { "epoch": 0.40247000087588686, "grad_norm": 0.6184724931328124, "learning_rate": 1.3278832116788323e-05, "loss": 0.6003, "step": 13785 }, { "epoch": 0.4024991971037342, "grad_norm": 0.6212254578128588, "learning_rate": 1.3278183292781834e-05, "loss": 0.5654, "step": 13786 }, { "epoch": 0.4025283933315816, "grad_norm": 0.6243194698523017, "learning_rate": 1.3277534468775346e-05, "loss": 0.5498, "step": 13787 }, { "epoch": 0.40255758955942894, "grad_norm": 0.6305876527262672, "learning_rate": 1.3276885644768858e-05, "loss": 0.62, "step": 13788 }, { "epoch": 0.4025867857872763, "grad_norm": 0.6222586746312396, "learning_rate": 1.327623682076237e-05, "loss": 0.5986, "step": 13789 }, { "epoch": 0.40261598201512366, "grad_norm": 0.6604134349480345, "learning_rate": 1.3275587996755882e-05, "loss": 0.6794, "step": 13790 }, { "epoch": 0.402645178242971, "grad_norm": 0.6236916630480528, "learning_rate": 1.3274939172749392e-05, "loss": 0.6006, "step": 13791 }, { "epoch": 0.4026743744708184, "grad_norm": 0.612173364746389, "learning_rate": 1.3274290348742904e-05, "loss": 0.5204, "step": 13792 }, { "epoch": 0.40270357069866575, "grad_norm": 0.6040527286455887, "learning_rate": 1.3273641524736416e-05, "loss": 0.5853, "step": 13793 }, { "epoch": 0.4027327669265131, "grad_norm": 0.6457424435058179, "learning_rate": 1.3272992700729928e-05, "loss": 0.5775, "step": 13794 }, { "epoch": 0.40276196315436047, "grad_norm": 0.6933338695114678, "learning_rate": 1.3272343876723438e-05, "loss": 0.5955, "step": 13795 }, { "epoch": 0.40279115938220783, "grad_norm": 0.6175670730948035, "learning_rate": 1.327169505271695e-05, "loss": 0.5708, "step": 13796 }, { "epoch": 0.4028203556100552, "grad_norm": 0.7231835633999187, "learning_rate": 1.3271046228710462e-05, "loss": 0.6422, "step": 13797 }, { "epoch": 0.40284955183790255, "grad_norm": 0.6908827800474182, "learning_rate": 1.3270397404703975e-05, "loss": 0.7154, "step": 13798 }, { "epoch": 0.4028787480657499, "grad_norm": 0.7016624480324171, "learning_rate": 1.3269748580697488e-05, "loss": 0.6912, "step": 13799 }, { "epoch": 0.4029079442935973, "grad_norm": 0.6243670174788432, "learning_rate": 1.3269099756691e-05, "loss": 0.5845, "step": 13800 }, { "epoch": 0.40293714052144464, "grad_norm": 0.6321394784373189, "learning_rate": 1.326845093268451e-05, "loss": 0.5579, "step": 13801 }, { "epoch": 0.402966336749292, "grad_norm": 0.6634903487356502, "learning_rate": 1.3267802108678023e-05, "loss": 0.6496, "step": 13802 }, { "epoch": 0.40299553297713936, "grad_norm": 0.643762556000972, "learning_rate": 1.3267153284671535e-05, "loss": 0.6709, "step": 13803 }, { "epoch": 0.4030247292049867, "grad_norm": 0.6429845375099701, "learning_rate": 1.3266504460665047e-05, "loss": 0.6163, "step": 13804 }, { "epoch": 0.4030539254328341, "grad_norm": 0.6737053284386143, "learning_rate": 1.3265855636658557e-05, "loss": 0.6836, "step": 13805 }, { "epoch": 0.40308312166068144, "grad_norm": 0.6437998209362542, "learning_rate": 1.3265206812652069e-05, "loss": 0.6156, "step": 13806 }, { "epoch": 0.4031123178885288, "grad_norm": 0.8032323787452346, "learning_rate": 1.3264557988645581e-05, "loss": 0.669, "step": 13807 }, { "epoch": 0.40314151411637617, "grad_norm": 0.6694226677562857, "learning_rate": 1.3263909164639093e-05, "loss": 0.6629, "step": 13808 }, { "epoch": 0.4031707103442235, "grad_norm": 0.6163010848524056, "learning_rate": 1.3263260340632605e-05, "loss": 0.6087, "step": 13809 }, { "epoch": 0.4031999065720709, "grad_norm": 0.6623195951691889, "learning_rate": 1.3262611516626115e-05, "loss": 0.6837, "step": 13810 }, { "epoch": 0.40322910279991825, "grad_norm": 0.6469262700448879, "learning_rate": 1.3261962692619627e-05, "loss": 0.6648, "step": 13811 }, { "epoch": 0.4032582990277656, "grad_norm": 0.5958591066115904, "learning_rate": 1.326131386861314e-05, "loss": 0.5815, "step": 13812 }, { "epoch": 0.40328749525561297, "grad_norm": 0.6406803748759566, "learning_rate": 1.3260665044606652e-05, "loss": 0.6177, "step": 13813 }, { "epoch": 0.40331669148346033, "grad_norm": 0.6998355470073101, "learning_rate": 1.3260016220600162e-05, "loss": 0.6803, "step": 13814 }, { "epoch": 0.4033458877113077, "grad_norm": 0.6575933984158, "learning_rate": 1.3259367396593674e-05, "loss": 0.6168, "step": 13815 }, { "epoch": 0.40337508393915505, "grad_norm": 0.6067854413431475, "learning_rate": 1.3258718572587186e-05, "loss": 0.5745, "step": 13816 }, { "epoch": 0.4034042801670024, "grad_norm": 0.640254403227306, "learning_rate": 1.3258069748580698e-05, "loss": 0.58, "step": 13817 }, { "epoch": 0.4034334763948498, "grad_norm": 0.6962969018528703, "learning_rate": 1.3257420924574212e-05, "loss": 0.6907, "step": 13818 }, { "epoch": 0.40346267262269714, "grad_norm": 0.6942640310109431, "learning_rate": 1.3256772100567724e-05, "loss": 0.6954, "step": 13819 }, { "epoch": 0.4034918688505445, "grad_norm": 0.6391026202329869, "learning_rate": 1.3256123276561234e-05, "loss": 0.6107, "step": 13820 }, { "epoch": 0.40352106507839186, "grad_norm": 0.6907256728188859, "learning_rate": 1.3255474452554746e-05, "loss": 0.675, "step": 13821 }, { "epoch": 0.4035502613062392, "grad_norm": 0.6591635455740315, "learning_rate": 1.3254825628548258e-05, "loss": 0.6254, "step": 13822 }, { "epoch": 0.4035794575340866, "grad_norm": 0.6468132385538439, "learning_rate": 1.325417680454177e-05, "loss": 0.5811, "step": 13823 }, { "epoch": 0.40360865376193394, "grad_norm": 0.6121294441185505, "learning_rate": 1.325352798053528e-05, "loss": 0.5488, "step": 13824 }, { "epoch": 0.4036378499897813, "grad_norm": 0.6424406904819041, "learning_rate": 1.3252879156528792e-05, "loss": 0.6425, "step": 13825 }, { "epoch": 0.40366704621762867, "grad_norm": 0.6024543337044623, "learning_rate": 1.3252230332522305e-05, "loss": 0.5881, "step": 13826 }, { "epoch": 0.40369624244547603, "grad_norm": 0.7103269735584655, "learning_rate": 1.3251581508515817e-05, "loss": 0.65, "step": 13827 }, { "epoch": 0.4037254386733234, "grad_norm": 0.6649903783757851, "learning_rate": 1.3250932684509329e-05, "loss": 0.6292, "step": 13828 }, { "epoch": 0.40375463490117075, "grad_norm": 0.6899102939515844, "learning_rate": 1.3250283860502839e-05, "loss": 0.7169, "step": 13829 }, { "epoch": 0.4037838311290181, "grad_norm": 0.6836401823692604, "learning_rate": 1.3249635036496351e-05, "loss": 0.6679, "step": 13830 }, { "epoch": 0.4038130273568655, "grad_norm": 0.616726329454562, "learning_rate": 1.3248986212489863e-05, "loss": 0.5998, "step": 13831 }, { "epoch": 0.40384222358471283, "grad_norm": 0.6402598782909453, "learning_rate": 1.3248337388483375e-05, "loss": 0.5926, "step": 13832 }, { "epoch": 0.4038714198125602, "grad_norm": 0.6578625292311002, "learning_rate": 1.3247688564476885e-05, "loss": 0.6366, "step": 13833 }, { "epoch": 0.40390061604040756, "grad_norm": 0.6153545925542515, "learning_rate": 1.3247039740470397e-05, "loss": 0.5487, "step": 13834 }, { "epoch": 0.4039298122682549, "grad_norm": 0.6640250626306149, "learning_rate": 1.324639091646391e-05, "loss": 0.6828, "step": 13835 }, { "epoch": 0.4039590084961023, "grad_norm": 0.6347739717506369, "learning_rate": 1.3245742092457421e-05, "loss": 0.5911, "step": 13836 }, { "epoch": 0.40398820472394964, "grad_norm": 0.713754201266752, "learning_rate": 1.3245093268450935e-05, "loss": 0.6524, "step": 13837 }, { "epoch": 0.404017400951797, "grad_norm": 0.6495741741631207, "learning_rate": 1.3244444444444447e-05, "loss": 0.6521, "step": 13838 }, { "epoch": 0.40404659717964436, "grad_norm": 1.0285832484889135, "learning_rate": 1.3243795620437957e-05, "loss": 0.5983, "step": 13839 }, { "epoch": 0.4040757934074918, "grad_norm": 0.6992273085152358, "learning_rate": 1.324314679643147e-05, "loss": 0.6853, "step": 13840 }, { "epoch": 0.40410498963533914, "grad_norm": 0.6213264301178236, "learning_rate": 1.3242497972424982e-05, "loss": 0.5721, "step": 13841 }, { "epoch": 0.4041341858631865, "grad_norm": 0.5982402473971563, "learning_rate": 1.3241849148418494e-05, "loss": 0.577, "step": 13842 }, { "epoch": 0.40416338209103386, "grad_norm": 0.6708228069645381, "learning_rate": 1.3241200324412004e-05, "loss": 0.6241, "step": 13843 }, { "epoch": 0.4041925783188812, "grad_norm": 0.6718500742256344, "learning_rate": 1.3240551500405516e-05, "loss": 0.6378, "step": 13844 }, { "epoch": 0.4042217745467286, "grad_norm": 0.6523709923999176, "learning_rate": 1.3239902676399028e-05, "loss": 0.6664, "step": 13845 }, { "epoch": 0.40425097077457595, "grad_norm": 0.6684177776573582, "learning_rate": 1.323925385239254e-05, "loss": 0.6416, "step": 13846 }, { "epoch": 0.4042801670024233, "grad_norm": 0.6454271614478567, "learning_rate": 1.3238605028386052e-05, "loss": 0.6412, "step": 13847 }, { "epoch": 0.40430936323027067, "grad_norm": 0.6253705628341534, "learning_rate": 1.3237956204379562e-05, "loss": 0.6198, "step": 13848 }, { "epoch": 0.40433855945811803, "grad_norm": 0.6614182550390114, "learning_rate": 1.3237307380373074e-05, "loss": 0.5684, "step": 13849 }, { "epoch": 0.4043677556859654, "grad_norm": 0.6700274434379919, "learning_rate": 1.3236658556366586e-05, "loss": 0.6649, "step": 13850 }, { "epoch": 0.40439695191381275, "grad_norm": 0.6179254657384472, "learning_rate": 1.3236009732360098e-05, "loss": 0.547, "step": 13851 }, { "epoch": 0.4044261481416601, "grad_norm": 0.7885364524903472, "learning_rate": 1.3235360908353609e-05, "loss": 0.6864, "step": 13852 }, { "epoch": 0.4044553443695075, "grad_norm": 0.6411331263902256, "learning_rate": 1.323471208434712e-05, "loss": 0.5799, "step": 13853 }, { "epoch": 0.40448454059735484, "grad_norm": 0.6259427293647569, "learning_rate": 1.3234063260340633e-05, "loss": 0.5171, "step": 13854 }, { "epoch": 0.4045137368252022, "grad_norm": 0.6570726769564212, "learning_rate": 1.3233414436334145e-05, "loss": 0.6364, "step": 13855 }, { "epoch": 0.40454293305304956, "grad_norm": 0.6067444138605319, "learning_rate": 1.3232765612327659e-05, "loss": 0.5637, "step": 13856 }, { "epoch": 0.4045721292808969, "grad_norm": 0.6898603025476646, "learning_rate": 1.323211678832117e-05, "loss": 0.6771, "step": 13857 }, { "epoch": 0.4046013255087443, "grad_norm": 0.6556743514867674, "learning_rate": 1.3231467964314681e-05, "loss": 0.65, "step": 13858 }, { "epoch": 0.40463052173659164, "grad_norm": 0.6449236052216246, "learning_rate": 1.3230819140308193e-05, "loss": 0.5953, "step": 13859 }, { "epoch": 0.404659717964439, "grad_norm": 0.7001302186375628, "learning_rate": 1.3230170316301705e-05, "loss": 0.7028, "step": 13860 }, { "epoch": 0.40468891419228636, "grad_norm": 0.6578299533171926, "learning_rate": 1.3229521492295217e-05, "loss": 0.6224, "step": 13861 }, { "epoch": 0.4047181104201337, "grad_norm": 0.6617255942088818, "learning_rate": 1.3228872668288727e-05, "loss": 0.6004, "step": 13862 }, { "epoch": 0.4047473066479811, "grad_norm": 0.6126818187198363, "learning_rate": 1.322822384428224e-05, "loss": 0.5831, "step": 13863 }, { "epoch": 0.40477650287582845, "grad_norm": 0.6376001961502245, "learning_rate": 1.3227575020275751e-05, "loss": 0.6054, "step": 13864 }, { "epoch": 0.4048056991036758, "grad_norm": 0.6607612295971658, "learning_rate": 1.3226926196269263e-05, "loss": 0.6699, "step": 13865 }, { "epoch": 0.40483489533152317, "grad_norm": 0.5864103827246878, "learning_rate": 1.3226277372262775e-05, "loss": 0.5575, "step": 13866 }, { "epoch": 0.40486409155937053, "grad_norm": 0.6723627639975905, "learning_rate": 1.3225628548256286e-05, "loss": 0.6365, "step": 13867 }, { "epoch": 0.4048932877872179, "grad_norm": 0.6868729774261899, "learning_rate": 1.3224979724249798e-05, "loss": 0.6946, "step": 13868 }, { "epoch": 0.40492248401506525, "grad_norm": 0.6047172744581086, "learning_rate": 1.322433090024331e-05, "loss": 0.5875, "step": 13869 }, { "epoch": 0.4049516802429126, "grad_norm": 0.6454281197034664, "learning_rate": 1.3223682076236822e-05, "loss": 0.6194, "step": 13870 }, { "epoch": 0.40498087647076, "grad_norm": 0.6696154924866025, "learning_rate": 1.3223033252230332e-05, "loss": 0.6408, "step": 13871 }, { "epoch": 0.40501007269860734, "grad_norm": 0.5998440912238735, "learning_rate": 1.3222384428223844e-05, "loss": 0.6003, "step": 13872 }, { "epoch": 0.4050392689264547, "grad_norm": 0.6165259541049336, "learning_rate": 1.3221735604217356e-05, "loss": 0.6008, "step": 13873 }, { "epoch": 0.40506846515430206, "grad_norm": 0.6557019698025072, "learning_rate": 1.3221086780210868e-05, "loss": 0.654, "step": 13874 }, { "epoch": 0.4050976613821494, "grad_norm": 0.6938101892948237, "learning_rate": 1.3220437956204382e-05, "loss": 0.6581, "step": 13875 }, { "epoch": 0.4051268576099968, "grad_norm": 0.6429322293098974, "learning_rate": 1.3219789132197894e-05, "loss": 0.6325, "step": 13876 }, { "epoch": 0.40515605383784414, "grad_norm": 0.7265871477053927, "learning_rate": 1.3219140308191404e-05, "loss": 0.6046, "step": 13877 }, { "epoch": 0.4051852500656915, "grad_norm": 0.6781169048467734, "learning_rate": 1.3218491484184916e-05, "loss": 0.6715, "step": 13878 }, { "epoch": 0.40521444629353887, "grad_norm": 0.6666009023283423, "learning_rate": 1.3217842660178428e-05, "loss": 0.6127, "step": 13879 }, { "epoch": 0.4052436425213862, "grad_norm": 0.6655033465198639, "learning_rate": 1.321719383617194e-05, "loss": 0.6587, "step": 13880 }, { "epoch": 0.4052728387492336, "grad_norm": 0.6320779824068149, "learning_rate": 1.321654501216545e-05, "loss": 0.5375, "step": 13881 }, { "epoch": 0.40530203497708095, "grad_norm": 0.6763997283420861, "learning_rate": 1.3215896188158963e-05, "loss": 0.6684, "step": 13882 }, { "epoch": 0.4053312312049283, "grad_norm": 0.6842824471042094, "learning_rate": 1.3215247364152475e-05, "loss": 0.6715, "step": 13883 }, { "epoch": 0.40536042743277567, "grad_norm": 0.6154154545978175, "learning_rate": 1.3214598540145987e-05, "loss": 0.5839, "step": 13884 }, { "epoch": 0.40538962366062303, "grad_norm": 0.6888483382574567, "learning_rate": 1.3213949716139499e-05, "loss": 0.6893, "step": 13885 }, { "epoch": 0.4054188198884704, "grad_norm": 0.6480995417504777, "learning_rate": 1.321330089213301e-05, "loss": 0.5968, "step": 13886 }, { "epoch": 0.40544801611631776, "grad_norm": 0.6847645423482676, "learning_rate": 1.3212652068126521e-05, "loss": 0.6549, "step": 13887 }, { "epoch": 0.4054772123441651, "grad_norm": 0.6296355047985487, "learning_rate": 1.3212003244120033e-05, "loss": 0.6261, "step": 13888 }, { "epoch": 0.4055064085720125, "grad_norm": 0.5788439705177648, "learning_rate": 1.3211354420113545e-05, "loss": 0.5183, "step": 13889 }, { "epoch": 0.40553560479985984, "grad_norm": 0.6377631201740673, "learning_rate": 1.3210705596107056e-05, "loss": 0.6146, "step": 13890 }, { "epoch": 0.4055648010277072, "grad_norm": 0.6718543740306901, "learning_rate": 1.3210056772100568e-05, "loss": 0.6648, "step": 13891 }, { "epoch": 0.40559399725555456, "grad_norm": 0.6402352003882509, "learning_rate": 1.320940794809408e-05, "loss": 0.6193, "step": 13892 }, { "epoch": 0.4056231934834019, "grad_norm": 0.6848340301847511, "learning_rate": 1.3208759124087593e-05, "loss": 0.6484, "step": 13893 }, { "epoch": 0.4056523897112493, "grad_norm": 0.726268191512537, "learning_rate": 1.3208110300081105e-05, "loss": 0.8137, "step": 13894 }, { "epoch": 0.40568158593909664, "grad_norm": 0.575949133227225, "learning_rate": 1.3207461476074617e-05, "loss": 0.5008, "step": 13895 }, { "epoch": 0.405710782166944, "grad_norm": 0.6480438129993674, "learning_rate": 1.3206812652068128e-05, "loss": 0.6218, "step": 13896 }, { "epoch": 0.40573997839479137, "grad_norm": 0.6177560871129858, "learning_rate": 1.320616382806164e-05, "loss": 0.577, "step": 13897 }, { "epoch": 0.40576917462263873, "grad_norm": 0.6512083303188594, "learning_rate": 1.3205515004055152e-05, "loss": 0.6537, "step": 13898 }, { "epoch": 0.4057983708504861, "grad_norm": 0.6405602800408546, "learning_rate": 1.3204866180048664e-05, "loss": 0.6285, "step": 13899 }, { "epoch": 0.4058275670783335, "grad_norm": 0.6305800809488104, "learning_rate": 1.3204217356042174e-05, "loss": 0.6156, "step": 13900 }, { "epoch": 0.40585676330618087, "grad_norm": 0.6174119374778254, "learning_rate": 1.3203568532035686e-05, "loss": 0.5557, "step": 13901 }, { "epoch": 0.40588595953402823, "grad_norm": 0.6508774825164139, "learning_rate": 1.3202919708029198e-05, "loss": 0.6441, "step": 13902 }, { "epoch": 0.4059151557618756, "grad_norm": 0.6835287406196354, "learning_rate": 1.320227088402271e-05, "loss": 0.6617, "step": 13903 }, { "epoch": 0.40594435198972295, "grad_norm": 0.6391148368073425, "learning_rate": 1.3201622060016222e-05, "loss": 0.6351, "step": 13904 }, { "epoch": 0.4059735482175703, "grad_norm": 0.6561119649716783, "learning_rate": 1.3200973236009733e-05, "loss": 0.6217, "step": 13905 }, { "epoch": 0.4060027444454177, "grad_norm": 0.6204311852030246, "learning_rate": 1.3200324412003245e-05, "loss": 0.5533, "step": 13906 }, { "epoch": 0.40603194067326503, "grad_norm": 0.7043151715932301, "learning_rate": 1.3199675587996757e-05, "loss": 0.6971, "step": 13907 }, { "epoch": 0.4060611369011124, "grad_norm": 0.6576039969186307, "learning_rate": 1.3199026763990269e-05, "loss": 0.6037, "step": 13908 }, { "epoch": 0.40609033312895976, "grad_norm": 0.6458317938220763, "learning_rate": 1.3198377939983779e-05, "loss": 0.6483, "step": 13909 }, { "epoch": 0.4061195293568071, "grad_norm": 0.6218979257404038, "learning_rate": 1.3197729115977291e-05, "loss": 0.5823, "step": 13910 }, { "epoch": 0.4061487255846545, "grad_norm": 0.6506226767206476, "learning_rate": 1.3197080291970803e-05, "loss": 0.6175, "step": 13911 }, { "epoch": 0.40617792181250184, "grad_norm": 0.6638997561991303, "learning_rate": 1.3196431467964317e-05, "loss": 0.6056, "step": 13912 }, { "epoch": 0.4062071180403492, "grad_norm": 0.6243879973139401, "learning_rate": 1.3195782643957829e-05, "loss": 0.5496, "step": 13913 }, { "epoch": 0.40623631426819656, "grad_norm": 0.6769264454166143, "learning_rate": 1.3195133819951341e-05, "loss": 0.6837, "step": 13914 }, { "epoch": 0.4062655104960439, "grad_norm": 0.6657313924847359, "learning_rate": 1.3194484995944851e-05, "loss": 0.6883, "step": 13915 }, { "epoch": 0.4062947067238913, "grad_norm": 0.7190630680271728, "learning_rate": 1.3193836171938363e-05, "loss": 0.6603, "step": 13916 }, { "epoch": 0.40632390295173865, "grad_norm": 0.6647864346357234, "learning_rate": 1.3193187347931875e-05, "loss": 0.6516, "step": 13917 }, { "epoch": 0.406353099179586, "grad_norm": 0.6867033471487789, "learning_rate": 1.3192538523925387e-05, "loss": 0.6829, "step": 13918 }, { "epoch": 0.40638229540743337, "grad_norm": 0.6277341867342443, "learning_rate": 1.3191889699918898e-05, "loss": 0.6061, "step": 13919 }, { "epoch": 0.40641149163528073, "grad_norm": 0.7152132601560932, "learning_rate": 1.319124087591241e-05, "loss": 0.6536, "step": 13920 }, { "epoch": 0.4064406878631281, "grad_norm": 0.686304137406408, "learning_rate": 1.3190592051905922e-05, "loss": 0.6783, "step": 13921 }, { "epoch": 0.40646988409097545, "grad_norm": 0.7095366343694838, "learning_rate": 1.3189943227899434e-05, "loss": 0.6686, "step": 13922 }, { "epoch": 0.4064990803188228, "grad_norm": 0.5745236927215218, "learning_rate": 1.3189294403892946e-05, "loss": 0.4999, "step": 13923 }, { "epoch": 0.4065282765466702, "grad_norm": 0.6441756413163696, "learning_rate": 1.3188645579886456e-05, "loss": 0.6475, "step": 13924 }, { "epoch": 0.40655747277451754, "grad_norm": 0.6770519519713517, "learning_rate": 1.3187996755879968e-05, "loss": 0.7286, "step": 13925 }, { "epoch": 0.4065866690023649, "grad_norm": 0.6604496524359387, "learning_rate": 1.318734793187348e-05, "loss": 0.7004, "step": 13926 }, { "epoch": 0.40661586523021226, "grad_norm": 0.6162652652370713, "learning_rate": 1.3186699107866992e-05, "loss": 0.5548, "step": 13927 }, { "epoch": 0.4066450614580596, "grad_norm": 0.5945377310654061, "learning_rate": 1.3186050283860502e-05, "loss": 0.5237, "step": 13928 }, { "epoch": 0.406674257685907, "grad_norm": 0.7761439841586977, "learning_rate": 1.3185401459854015e-05, "loss": 0.694, "step": 13929 }, { "epoch": 0.40670345391375434, "grad_norm": 0.6160137600007783, "learning_rate": 1.3184752635847527e-05, "loss": 0.5966, "step": 13930 }, { "epoch": 0.4067326501416017, "grad_norm": 0.6610627380084044, "learning_rate": 1.318410381184104e-05, "loss": 0.6395, "step": 13931 }, { "epoch": 0.40676184636944906, "grad_norm": 0.625673293167093, "learning_rate": 1.3183454987834552e-05, "loss": 0.5951, "step": 13932 }, { "epoch": 0.4067910425972964, "grad_norm": 0.6225907609375825, "learning_rate": 1.3182806163828064e-05, "loss": 0.6276, "step": 13933 }, { "epoch": 0.4068202388251438, "grad_norm": 0.7319796038949132, "learning_rate": 1.3182157339821575e-05, "loss": 0.7339, "step": 13934 }, { "epoch": 0.40684943505299115, "grad_norm": 0.6128339597385286, "learning_rate": 1.3181508515815087e-05, "loss": 0.5714, "step": 13935 }, { "epoch": 0.4068786312808385, "grad_norm": 0.6307556792619948, "learning_rate": 1.3180859691808599e-05, "loss": 0.583, "step": 13936 }, { "epoch": 0.40690782750868587, "grad_norm": 0.6596797956704675, "learning_rate": 1.318021086780211e-05, "loss": 0.6338, "step": 13937 }, { "epoch": 0.40693702373653323, "grad_norm": 0.6430938097000023, "learning_rate": 1.3179562043795621e-05, "loss": 0.6219, "step": 13938 }, { "epoch": 0.4069662199643806, "grad_norm": 0.6705583293095029, "learning_rate": 1.3178913219789133e-05, "loss": 0.6366, "step": 13939 }, { "epoch": 0.40699541619222795, "grad_norm": 0.8237295285832286, "learning_rate": 1.3178264395782645e-05, "loss": 0.6626, "step": 13940 }, { "epoch": 0.4070246124200753, "grad_norm": 0.6469319032592746, "learning_rate": 1.3177615571776157e-05, "loss": 0.6599, "step": 13941 }, { "epoch": 0.4070538086479227, "grad_norm": 0.659124975049089, "learning_rate": 1.317696674776967e-05, "loss": 0.6532, "step": 13942 }, { "epoch": 0.40708300487577004, "grad_norm": 0.7117530036494129, "learning_rate": 1.317631792376318e-05, "loss": 0.6663, "step": 13943 }, { "epoch": 0.4071122011036174, "grad_norm": 0.6738415512683548, "learning_rate": 1.3175669099756692e-05, "loss": 0.6996, "step": 13944 }, { "epoch": 0.40714139733146476, "grad_norm": 0.6675633925260145, "learning_rate": 1.3175020275750204e-05, "loss": 0.6217, "step": 13945 }, { "epoch": 0.4071705935593121, "grad_norm": 0.6317209226824156, "learning_rate": 1.3174371451743716e-05, "loss": 0.5923, "step": 13946 }, { "epoch": 0.4071997897871595, "grad_norm": 0.6633057434602552, "learning_rate": 1.3173722627737226e-05, "loss": 0.6236, "step": 13947 }, { "epoch": 0.40722898601500684, "grad_norm": 0.6089076208376282, "learning_rate": 1.3173073803730738e-05, "loss": 0.5528, "step": 13948 }, { "epoch": 0.4072581822428542, "grad_norm": 0.6198206378237123, "learning_rate": 1.317242497972425e-05, "loss": 0.624, "step": 13949 }, { "epoch": 0.40728737847070157, "grad_norm": 0.6617600618298627, "learning_rate": 1.3171776155717764e-05, "loss": 0.598, "step": 13950 }, { "epoch": 0.4073165746985489, "grad_norm": 0.6716482991719781, "learning_rate": 1.3171127331711276e-05, "loss": 0.6454, "step": 13951 }, { "epoch": 0.4073457709263963, "grad_norm": 0.6631375447663075, "learning_rate": 1.3170478507704788e-05, "loss": 0.6651, "step": 13952 }, { "epoch": 0.40737496715424365, "grad_norm": 0.6921739431998696, "learning_rate": 1.3169829683698298e-05, "loss": 0.6215, "step": 13953 }, { "epoch": 0.407404163382091, "grad_norm": 0.6280429245819988, "learning_rate": 1.316918085969181e-05, "loss": 0.5872, "step": 13954 }, { "epoch": 0.4074333596099384, "grad_norm": 0.6231460925722695, "learning_rate": 1.3168532035685322e-05, "loss": 0.5621, "step": 13955 }, { "epoch": 0.40746255583778573, "grad_norm": 0.5930196808995875, "learning_rate": 1.3167883211678834e-05, "loss": 0.5191, "step": 13956 }, { "epoch": 0.4074917520656331, "grad_norm": 0.6745624626494648, "learning_rate": 1.3167234387672344e-05, "loss": 0.6636, "step": 13957 }, { "epoch": 0.40752094829348046, "grad_norm": 0.6225494692186498, "learning_rate": 1.3166585563665857e-05, "loss": 0.6195, "step": 13958 }, { "epoch": 0.4075501445213278, "grad_norm": 0.6829970767795488, "learning_rate": 1.3165936739659369e-05, "loss": 0.6569, "step": 13959 }, { "epoch": 0.40757934074917523, "grad_norm": 0.730122950786835, "learning_rate": 1.316528791565288e-05, "loss": 0.6125, "step": 13960 }, { "epoch": 0.4076085369770226, "grad_norm": 0.6506578675505992, "learning_rate": 1.3164639091646391e-05, "loss": 0.5788, "step": 13961 }, { "epoch": 0.40763773320486996, "grad_norm": 0.5806273926937463, "learning_rate": 1.3163990267639903e-05, "loss": 0.5384, "step": 13962 }, { "epoch": 0.4076669294327173, "grad_norm": 0.6252305616563041, "learning_rate": 1.3163341443633415e-05, "loss": 0.5909, "step": 13963 }, { "epoch": 0.4076961256605647, "grad_norm": 0.6978326608324201, "learning_rate": 1.3162692619626927e-05, "loss": 0.6527, "step": 13964 }, { "epoch": 0.40772532188841204, "grad_norm": 0.6526786871967892, "learning_rate": 1.3162043795620439e-05, "loss": 0.6182, "step": 13965 }, { "epoch": 0.4077545181162594, "grad_norm": 0.6330129867243793, "learning_rate": 1.316139497161395e-05, "loss": 0.6189, "step": 13966 }, { "epoch": 0.40778371434410676, "grad_norm": 0.6349966429343278, "learning_rate": 1.3160746147607461e-05, "loss": 0.5856, "step": 13967 }, { "epoch": 0.4078129105719541, "grad_norm": 0.6245169865587041, "learning_rate": 1.3160097323600973e-05, "loss": 0.6197, "step": 13968 }, { "epoch": 0.4078421067998015, "grad_norm": 0.6132927707229877, "learning_rate": 1.3159448499594487e-05, "loss": 0.5752, "step": 13969 }, { "epoch": 0.40787130302764885, "grad_norm": 0.6704796953285693, "learning_rate": 1.3158799675588e-05, "loss": 0.6765, "step": 13970 }, { "epoch": 0.4079004992554962, "grad_norm": 0.6681541137541819, "learning_rate": 1.3158150851581511e-05, "loss": 0.6354, "step": 13971 }, { "epoch": 0.40792969548334357, "grad_norm": 0.6299878278919602, "learning_rate": 1.3157502027575022e-05, "loss": 0.5739, "step": 13972 }, { "epoch": 0.40795889171119093, "grad_norm": 0.6327189093589581, "learning_rate": 1.3156853203568534e-05, "loss": 0.6355, "step": 13973 }, { "epoch": 0.4079880879390383, "grad_norm": 0.622361114851024, "learning_rate": 1.3156204379562046e-05, "loss": 0.6022, "step": 13974 }, { "epoch": 0.40801728416688565, "grad_norm": 0.6678881403336698, "learning_rate": 1.3155555555555558e-05, "loss": 0.5721, "step": 13975 }, { "epoch": 0.408046480394733, "grad_norm": 0.6351963217601669, "learning_rate": 1.3154906731549068e-05, "loss": 0.6194, "step": 13976 }, { "epoch": 0.4080756766225804, "grad_norm": 0.6506166295090855, "learning_rate": 1.315425790754258e-05, "loss": 0.6453, "step": 13977 }, { "epoch": 0.40810487285042774, "grad_norm": 0.6883614307688164, "learning_rate": 1.3153609083536092e-05, "loss": 0.679, "step": 13978 }, { "epoch": 0.4081340690782751, "grad_norm": 0.6344067487462446, "learning_rate": 1.3152960259529604e-05, "loss": 0.6054, "step": 13979 }, { "epoch": 0.40816326530612246, "grad_norm": 0.691012564377532, "learning_rate": 1.3152311435523114e-05, "loss": 0.7052, "step": 13980 }, { "epoch": 0.4081924615339698, "grad_norm": 0.6250811573507691, "learning_rate": 1.3151662611516626e-05, "loss": 0.6225, "step": 13981 }, { "epoch": 0.4082216577618172, "grad_norm": 0.6391104151496024, "learning_rate": 1.3151013787510138e-05, "loss": 0.6027, "step": 13982 }, { "epoch": 0.40825085398966454, "grad_norm": 0.7074038550125108, "learning_rate": 1.315036496350365e-05, "loss": 0.7036, "step": 13983 }, { "epoch": 0.4082800502175119, "grad_norm": 0.7350742605445731, "learning_rate": 1.3149716139497162e-05, "loss": 0.6927, "step": 13984 }, { "epoch": 0.40830924644535926, "grad_norm": 0.6559815150367706, "learning_rate": 1.3149067315490673e-05, "loss": 0.6558, "step": 13985 }, { "epoch": 0.4083384426732066, "grad_norm": 0.641149693630646, "learning_rate": 1.3148418491484185e-05, "loss": 0.6571, "step": 13986 }, { "epoch": 0.408367638901054, "grad_norm": 0.7162217402268195, "learning_rate": 1.3147769667477697e-05, "loss": 0.6571, "step": 13987 }, { "epoch": 0.40839683512890135, "grad_norm": 0.7190551648820782, "learning_rate": 1.314712084347121e-05, "loss": 0.7586, "step": 13988 }, { "epoch": 0.4084260313567487, "grad_norm": 0.6920206713746925, "learning_rate": 1.3146472019464723e-05, "loss": 0.6449, "step": 13989 }, { "epoch": 0.40845522758459607, "grad_norm": 0.6213088306807377, "learning_rate": 1.3145823195458235e-05, "loss": 0.6116, "step": 13990 }, { "epoch": 0.40848442381244343, "grad_norm": 0.6870302164886507, "learning_rate": 1.3145174371451745e-05, "loss": 0.6801, "step": 13991 }, { "epoch": 0.4085136200402908, "grad_norm": 0.7157880185854005, "learning_rate": 1.3144525547445257e-05, "loss": 0.7186, "step": 13992 }, { "epoch": 0.40854281626813815, "grad_norm": 0.6419306947952755, "learning_rate": 1.3143876723438769e-05, "loss": 0.6021, "step": 13993 }, { "epoch": 0.4085720124959855, "grad_norm": 0.6849826229420147, "learning_rate": 1.3143227899432281e-05, "loss": 0.6472, "step": 13994 }, { "epoch": 0.4086012087238329, "grad_norm": 0.6595922401344404, "learning_rate": 1.3142579075425791e-05, "loss": 0.6321, "step": 13995 }, { "epoch": 0.40863040495168024, "grad_norm": 0.6035776115762825, "learning_rate": 1.3141930251419303e-05, "loss": 0.5607, "step": 13996 }, { "epoch": 0.4086596011795276, "grad_norm": 0.6619812541689485, "learning_rate": 1.3141281427412815e-05, "loss": 0.6509, "step": 13997 }, { "epoch": 0.40868879740737496, "grad_norm": 0.6247076370353842, "learning_rate": 1.3140632603406327e-05, "loss": 0.6093, "step": 13998 }, { "epoch": 0.4087179936352223, "grad_norm": 0.6421270234817469, "learning_rate": 1.3139983779399838e-05, "loss": 0.6175, "step": 13999 }, { "epoch": 0.4087471898630697, "grad_norm": 0.5960609190170791, "learning_rate": 1.313933495539335e-05, "loss": 0.5234, "step": 14000 }, { "epoch": 0.40877638609091704, "grad_norm": 0.7528939437170659, "learning_rate": 1.3138686131386862e-05, "loss": 0.6942, "step": 14001 }, { "epoch": 0.4088055823187644, "grad_norm": 0.6260444907541268, "learning_rate": 1.3138037307380374e-05, "loss": 0.6011, "step": 14002 }, { "epoch": 0.40883477854661177, "grad_norm": 0.6460732026216127, "learning_rate": 1.3137388483373886e-05, "loss": 0.6524, "step": 14003 }, { "epoch": 0.4088639747744591, "grad_norm": 0.6589152701762885, "learning_rate": 1.3136739659367396e-05, "loss": 0.6493, "step": 14004 }, { "epoch": 0.4088931710023065, "grad_norm": 0.6353374983880036, "learning_rate": 1.3136090835360908e-05, "loss": 0.5851, "step": 14005 }, { "epoch": 0.40892236723015385, "grad_norm": 0.6563452847821976, "learning_rate": 1.313544201135442e-05, "loss": 0.578, "step": 14006 }, { "epoch": 0.4089515634580012, "grad_norm": 0.645147222729298, "learning_rate": 1.3134793187347934e-05, "loss": 0.6129, "step": 14007 }, { "epoch": 0.40898075968584857, "grad_norm": 0.6579662663355881, "learning_rate": 1.3134144363341446e-05, "loss": 0.6063, "step": 14008 }, { "epoch": 0.40900995591369593, "grad_norm": 0.6105860232405159, "learning_rate": 1.3133495539334958e-05, "loss": 0.532, "step": 14009 }, { "epoch": 0.4090391521415433, "grad_norm": 0.6435363034876959, "learning_rate": 1.3132846715328468e-05, "loss": 0.6446, "step": 14010 }, { "epoch": 0.40906834836939066, "grad_norm": 0.6674189768026081, "learning_rate": 1.313219789132198e-05, "loss": 0.6491, "step": 14011 }, { "epoch": 0.409097544597238, "grad_norm": 0.6860490896636566, "learning_rate": 1.3131549067315492e-05, "loss": 0.6619, "step": 14012 }, { "epoch": 0.4091267408250854, "grad_norm": 0.6443769985528991, "learning_rate": 1.3130900243309004e-05, "loss": 0.6409, "step": 14013 }, { "epoch": 0.40915593705293274, "grad_norm": 0.6435785707415459, "learning_rate": 1.3130251419302515e-05, "loss": 0.6009, "step": 14014 }, { "epoch": 0.4091851332807801, "grad_norm": 0.6307562289997056, "learning_rate": 1.3129602595296027e-05, "loss": 0.5781, "step": 14015 }, { "epoch": 0.40921432950862746, "grad_norm": 0.6496022441578186, "learning_rate": 1.3128953771289539e-05, "loss": 0.5963, "step": 14016 }, { "epoch": 0.4092435257364748, "grad_norm": 0.663641358236587, "learning_rate": 1.3128304947283051e-05, "loss": 0.6838, "step": 14017 }, { "epoch": 0.4092727219643222, "grad_norm": 0.692944979679129, "learning_rate": 1.3127656123276561e-05, "loss": 0.7365, "step": 14018 }, { "epoch": 0.40930191819216954, "grad_norm": 0.7201629597115806, "learning_rate": 1.3127007299270073e-05, "loss": 0.684, "step": 14019 }, { "epoch": 0.40933111442001696, "grad_norm": 0.5853880984682511, "learning_rate": 1.3126358475263585e-05, "loss": 0.5163, "step": 14020 }, { "epoch": 0.4093603106478643, "grad_norm": 0.6733889677726588, "learning_rate": 1.3125709651257097e-05, "loss": 0.6504, "step": 14021 }, { "epoch": 0.4093895068757117, "grad_norm": 0.6998524089550174, "learning_rate": 1.312506082725061e-05, "loss": 0.674, "step": 14022 }, { "epoch": 0.40941870310355905, "grad_norm": 0.641971689592351, "learning_rate": 1.312441200324412e-05, "loss": 0.6245, "step": 14023 }, { "epoch": 0.4094478993314064, "grad_norm": 0.648409926725214, "learning_rate": 1.3123763179237632e-05, "loss": 0.6407, "step": 14024 }, { "epoch": 0.40947709555925377, "grad_norm": 0.6076823117006226, "learning_rate": 1.3123114355231144e-05, "loss": 0.5452, "step": 14025 }, { "epoch": 0.40950629178710113, "grad_norm": 0.6953221231444565, "learning_rate": 1.3122465531224657e-05, "loss": 0.6915, "step": 14026 }, { "epoch": 0.4095354880149485, "grad_norm": 0.6158910057123461, "learning_rate": 1.312181670721817e-05, "loss": 0.5719, "step": 14027 }, { "epoch": 0.40956468424279585, "grad_norm": 0.5571401218525863, "learning_rate": 1.3121167883211681e-05, "loss": 0.4987, "step": 14028 }, { "epoch": 0.4095938804706432, "grad_norm": 0.6414738514229946, "learning_rate": 1.3120519059205192e-05, "loss": 0.6682, "step": 14029 }, { "epoch": 0.4096230766984906, "grad_norm": 0.5825434985378527, "learning_rate": 1.3119870235198704e-05, "loss": 0.5567, "step": 14030 }, { "epoch": 0.40965227292633793, "grad_norm": 0.6853771059038609, "learning_rate": 1.3119221411192216e-05, "loss": 0.6385, "step": 14031 }, { "epoch": 0.4096814691541853, "grad_norm": 0.6468761232730388, "learning_rate": 1.3118572587185728e-05, "loss": 0.6312, "step": 14032 }, { "epoch": 0.40971066538203266, "grad_norm": 0.6435884781462748, "learning_rate": 1.3117923763179238e-05, "loss": 0.605, "step": 14033 }, { "epoch": 0.40973986160988, "grad_norm": 0.6087884249083213, "learning_rate": 1.311727493917275e-05, "loss": 0.6031, "step": 14034 }, { "epoch": 0.4097690578377274, "grad_norm": 0.6746495868071637, "learning_rate": 1.3116626115166262e-05, "loss": 0.6137, "step": 14035 }, { "epoch": 0.40979825406557474, "grad_norm": 0.656710698713472, "learning_rate": 1.3115977291159774e-05, "loss": 0.6582, "step": 14036 }, { "epoch": 0.4098274502934221, "grad_norm": 0.6580975905515541, "learning_rate": 1.3115328467153285e-05, "loss": 0.6607, "step": 14037 }, { "epoch": 0.40985664652126946, "grad_norm": 0.6345121361768965, "learning_rate": 1.3114679643146797e-05, "loss": 0.632, "step": 14038 }, { "epoch": 0.4098858427491168, "grad_norm": 0.6328754253863073, "learning_rate": 1.3114030819140309e-05, "loss": 0.6211, "step": 14039 }, { "epoch": 0.4099150389769642, "grad_norm": 0.6306280160434524, "learning_rate": 1.311338199513382e-05, "loss": 0.5339, "step": 14040 }, { "epoch": 0.40994423520481155, "grad_norm": 0.5598511334277964, "learning_rate": 1.3112733171127333e-05, "loss": 0.4776, "step": 14041 }, { "epoch": 0.4099734314326589, "grad_norm": 0.6158358243828344, "learning_rate": 1.3112084347120843e-05, "loss": 0.5687, "step": 14042 }, { "epoch": 0.41000262766050627, "grad_norm": 0.7296880702564469, "learning_rate": 1.3111435523114355e-05, "loss": 0.6576, "step": 14043 }, { "epoch": 0.41003182388835363, "grad_norm": 0.6610095709832339, "learning_rate": 1.3110786699107869e-05, "loss": 0.6543, "step": 14044 }, { "epoch": 0.410061020116201, "grad_norm": 0.6771224995447921, "learning_rate": 1.3110137875101381e-05, "loss": 0.6903, "step": 14045 }, { "epoch": 0.41009021634404835, "grad_norm": 0.6408304084672337, "learning_rate": 1.3109489051094893e-05, "loss": 0.5889, "step": 14046 }, { "epoch": 0.4101194125718957, "grad_norm": 0.653063690713797, "learning_rate": 1.3108840227088405e-05, "loss": 0.6553, "step": 14047 }, { "epoch": 0.4101486087997431, "grad_norm": 0.6226241691690825, "learning_rate": 1.3108191403081915e-05, "loss": 0.5564, "step": 14048 }, { "epoch": 0.41017780502759044, "grad_norm": 0.6443198695325747, "learning_rate": 1.3107542579075427e-05, "loss": 0.6608, "step": 14049 }, { "epoch": 0.4102070012554378, "grad_norm": 0.6135608881193799, "learning_rate": 1.310689375506894e-05, "loss": 0.541, "step": 14050 }, { "epoch": 0.41023619748328516, "grad_norm": 0.6465678376017832, "learning_rate": 1.3106244931062451e-05, "loss": 0.6052, "step": 14051 }, { "epoch": 0.4102653937111325, "grad_norm": 0.6783458135300049, "learning_rate": 1.3105596107055962e-05, "loss": 0.6533, "step": 14052 }, { "epoch": 0.4102945899389799, "grad_norm": 0.6712090481921347, "learning_rate": 1.3104947283049474e-05, "loss": 0.6639, "step": 14053 }, { "epoch": 0.41032378616682724, "grad_norm": 0.6415813645772633, "learning_rate": 1.3104298459042986e-05, "loss": 0.6348, "step": 14054 }, { "epoch": 0.4103529823946746, "grad_norm": 0.6239828122678399, "learning_rate": 1.3103649635036498e-05, "loss": 0.5849, "step": 14055 }, { "epoch": 0.41038217862252196, "grad_norm": 0.6343680985189492, "learning_rate": 1.3103000811030008e-05, "loss": 0.5862, "step": 14056 }, { "epoch": 0.4104113748503693, "grad_norm": 0.7073195715626706, "learning_rate": 1.310235198702352e-05, "loss": 0.7425, "step": 14057 }, { "epoch": 0.4104405710782167, "grad_norm": 0.641538832628894, "learning_rate": 1.3101703163017032e-05, "loss": 0.5828, "step": 14058 }, { "epoch": 0.41046976730606405, "grad_norm": 0.6827329640555934, "learning_rate": 1.3101054339010544e-05, "loss": 0.642, "step": 14059 }, { "epoch": 0.4104989635339114, "grad_norm": 0.6398443335910027, "learning_rate": 1.3100405515004056e-05, "loss": 0.5868, "step": 14060 }, { "epoch": 0.41052815976175877, "grad_norm": 0.6532660848035907, "learning_rate": 1.3099756690997567e-05, "loss": 0.6399, "step": 14061 }, { "epoch": 0.41055735598960613, "grad_norm": 0.6237329738533898, "learning_rate": 1.3099107866991079e-05, "loss": 0.6086, "step": 14062 }, { "epoch": 0.4105865522174535, "grad_norm": 0.6764670154794425, "learning_rate": 1.3098459042984592e-05, "loss": 0.652, "step": 14063 }, { "epoch": 0.41061574844530085, "grad_norm": 0.6561856358304803, "learning_rate": 1.3097810218978104e-05, "loss": 0.6673, "step": 14064 }, { "epoch": 0.4106449446731482, "grad_norm": 0.6485311049393673, "learning_rate": 1.3097161394971616e-05, "loss": 0.5716, "step": 14065 }, { "epoch": 0.4106741409009956, "grad_norm": 0.6627429803063003, "learning_rate": 1.3096512570965128e-05, "loss": 0.6157, "step": 14066 }, { "epoch": 0.41070333712884294, "grad_norm": 0.5917358174075219, "learning_rate": 1.3095863746958639e-05, "loss": 0.5341, "step": 14067 }, { "epoch": 0.4107325333566903, "grad_norm": 0.6541367605396963, "learning_rate": 1.309521492295215e-05, "loss": 0.6305, "step": 14068 }, { "epoch": 0.41076172958453766, "grad_norm": 0.661441069593963, "learning_rate": 1.3094566098945663e-05, "loss": 0.6259, "step": 14069 }, { "epoch": 0.410790925812385, "grad_norm": 0.6511340279760254, "learning_rate": 1.3093917274939175e-05, "loss": 0.6477, "step": 14070 }, { "epoch": 0.4108201220402324, "grad_norm": 0.6934547435083015, "learning_rate": 1.3093268450932685e-05, "loss": 0.6946, "step": 14071 }, { "epoch": 0.41084931826807974, "grad_norm": 0.6099746185988859, "learning_rate": 1.3092619626926197e-05, "loss": 0.5976, "step": 14072 }, { "epoch": 0.4108785144959271, "grad_norm": 0.647645173522671, "learning_rate": 1.309197080291971e-05, "loss": 0.6259, "step": 14073 }, { "epoch": 0.41090771072377447, "grad_norm": 0.6473834553913398, "learning_rate": 1.3091321978913221e-05, "loss": 0.6506, "step": 14074 }, { "epoch": 0.4109369069516218, "grad_norm": 0.6349784745914271, "learning_rate": 1.3090673154906732e-05, "loss": 0.5866, "step": 14075 }, { "epoch": 0.4109661031794692, "grad_norm": 0.6235351058547904, "learning_rate": 1.3090024330900244e-05, "loss": 0.5914, "step": 14076 }, { "epoch": 0.41099529940731655, "grad_norm": 0.6664246396124609, "learning_rate": 1.3089375506893756e-05, "loss": 0.6215, "step": 14077 }, { "epoch": 0.4110244956351639, "grad_norm": 0.691207891778908, "learning_rate": 1.3088726682887268e-05, "loss": 0.6694, "step": 14078 }, { "epoch": 0.4110536918630113, "grad_norm": 0.6348683249230319, "learning_rate": 1.308807785888078e-05, "loss": 0.5964, "step": 14079 }, { "epoch": 0.41108288809085863, "grad_norm": 0.6143346290104791, "learning_rate": 1.308742903487429e-05, "loss": 0.5572, "step": 14080 }, { "epoch": 0.41111208431870605, "grad_norm": 0.675629922616511, "learning_rate": 1.3086780210867802e-05, "loss": 0.6394, "step": 14081 }, { "epoch": 0.4111412805465534, "grad_norm": 0.6282634919262081, "learning_rate": 1.3086131386861316e-05, "loss": 0.5983, "step": 14082 }, { "epoch": 0.4111704767744008, "grad_norm": 0.676791541704333, "learning_rate": 1.3085482562854828e-05, "loss": 0.6327, "step": 14083 }, { "epoch": 0.41119967300224813, "grad_norm": 0.6958233089378448, "learning_rate": 1.308483373884834e-05, "loss": 0.6698, "step": 14084 }, { "epoch": 0.4112288692300955, "grad_norm": 0.6181282901234079, "learning_rate": 1.308418491484185e-05, "loss": 0.5691, "step": 14085 }, { "epoch": 0.41125806545794286, "grad_norm": 0.6450821455170925, "learning_rate": 1.3083536090835362e-05, "loss": 0.6147, "step": 14086 }, { "epoch": 0.4112872616857902, "grad_norm": 0.6707933611676323, "learning_rate": 1.3082887266828874e-05, "loss": 0.6701, "step": 14087 }, { "epoch": 0.4113164579136376, "grad_norm": 0.6360702224823206, "learning_rate": 1.3082238442822386e-05, "loss": 0.6117, "step": 14088 }, { "epoch": 0.41134565414148494, "grad_norm": 0.6634045613151323, "learning_rate": 1.3081589618815898e-05, "loss": 0.6491, "step": 14089 }, { "epoch": 0.4113748503693323, "grad_norm": 0.661055360908639, "learning_rate": 1.3080940794809409e-05, "loss": 0.6555, "step": 14090 }, { "epoch": 0.41140404659717966, "grad_norm": 0.6967700682509131, "learning_rate": 1.308029197080292e-05, "loss": 0.7428, "step": 14091 }, { "epoch": 0.411433242825027, "grad_norm": 0.6234439806779944, "learning_rate": 1.3079643146796433e-05, "loss": 0.5483, "step": 14092 }, { "epoch": 0.4114624390528744, "grad_norm": 0.6772487351736916, "learning_rate": 1.3078994322789945e-05, "loss": 0.64, "step": 14093 }, { "epoch": 0.41149163528072175, "grad_norm": 0.6400605717064815, "learning_rate": 1.3078345498783455e-05, "loss": 0.606, "step": 14094 }, { "epoch": 0.4115208315085691, "grad_norm": 0.6348319307618602, "learning_rate": 1.3077696674776967e-05, "loss": 0.5982, "step": 14095 }, { "epoch": 0.41155002773641647, "grad_norm": 0.6290105049494742, "learning_rate": 1.3077047850770479e-05, "loss": 0.5641, "step": 14096 }, { "epoch": 0.41157922396426383, "grad_norm": 0.6338251399429202, "learning_rate": 1.3076399026763991e-05, "loss": 0.6292, "step": 14097 }, { "epoch": 0.4116084201921112, "grad_norm": 0.7415312017891023, "learning_rate": 1.3075750202757503e-05, "loss": 0.7428, "step": 14098 }, { "epoch": 0.41163761641995855, "grad_norm": 0.6815920563959946, "learning_rate": 1.3075101378751013e-05, "loss": 0.6357, "step": 14099 }, { "epoch": 0.4116668126478059, "grad_norm": 0.6262000755209196, "learning_rate": 1.3074452554744525e-05, "loss": 0.5697, "step": 14100 }, { "epoch": 0.4116960088756533, "grad_norm": 0.6395162677932507, "learning_rate": 1.3073803730738039e-05, "loss": 0.5813, "step": 14101 }, { "epoch": 0.41172520510350064, "grad_norm": 0.6195901822141459, "learning_rate": 1.3073154906731551e-05, "loss": 0.6324, "step": 14102 }, { "epoch": 0.411754401331348, "grad_norm": 0.63667322978887, "learning_rate": 1.3072506082725063e-05, "loss": 0.6053, "step": 14103 }, { "epoch": 0.41178359755919536, "grad_norm": 0.6740576815850611, "learning_rate": 1.3071857258718574e-05, "loss": 0.7085, "step": 14104 }, { "epoch": 0.4118127937870427, "grad_norm": 0.672891738963282, "learning_rate": 1.3071208434712086e-05, "loss": 0.703, "step": 14105 }, { "epoch": 0.4118419900148901, "grad_norm": 0.6298477078040743, "learning_rate": 1.3070559610705598e-05, "loss": 0.5917, "step": 14106 }, { "epoch": 0.41187118624273744, "grad_norm": 0.6094147267038327, "learning_rate": 1.306991078669911e-05, "loss": 0.5893, "step": 14107 }, { "epoch": 0.4119003824705848, "grad_norm": 0.6473058316736898, "learning_rate": 1.3069261962692622e-05, "loss": 0.6082, "step": 14108 }, { "epoch": 0.41192957869843216, "grad_norm": 0.6734038824178465, "learning_rate": 1.3068613138686132e-05, "loss": 0.6205, "step": 14109 }, { "epoch": 0.4119587749262795, "grad_norm": 0.6596843468641385, "learning_rate": 1.3067964314679644e-05, "loss": 0.6604, "step": 14110 }, { "epoch": 0.4119879711541269, "grad_norm": 0.6259727177267913, "learning_rate": 1.3067315490673156e-05, "loss": 0.5585, "step": 14111 }, { "epoch": 0.41201716738197425, "grad_norm": 0.7182750906161586, "learning_rate": 1.3066666666666668e-05, "loss": 0.5608, "step": 14112 }, { "epoch": 0.4120463636098216, "grad_norm": 0.6651267466376775, "learning_rate": 1.3066017842660178e-05, "loss": 0.6705, "step": 14113 }, { "epoch": 0.41207555983766897, "grad_norm": 0.6301656673084003, "learning_rate": 1.306536901865369e-05, "loss": 0.6087, "step": 14114 }, { "epoch": 0.41210475606551633, "grad_norm": 0.6421250054419837, "learning_rate": 1.3064720194647202e-05, "loss": 0.5938, "step": 14115 }, { "epoch": 0.4121339522933637, "grad_norm": 0.6508717685729651, "learning_rate": 1.3064071370640714e-05, "loss": 0.6311, "step": 14116 }, { "epoch": 0.41216314852121105, "grad_norm": 0.732577928175999, "learning_rate": 1.3063422546634226e-05, "loss": 0.6643, "step": 14117 }, { "epoch": 0.4121923447490584, "grad_norm": 0.6354563878502727, "learning_rate": 1.3062773722627737e-05, "loss": 0.5513, "step": 14118 }, { "epoch": 0.4122215409769058, "grad_norm": 0.6398114963395839, "learning_rate": 1.3062124898621249e-05, "loss": 0.6505, "step": 14119 }, { "epoch": 0.41225073720475314, "grad_norm": 0.6839019254159053, "learning_rate": 1.3061476074614763e-05, "loss": 0.6976, "step": 14120 }, { "epoch": 0.4122799334326005, "grad_norm": 0.6749873289366202, "learning_rate": 1.3060827250608275e-05, "loss": 0.7048, "step": 14121 }, { "epoch": 0.41230912966044786, "grad_norm": 0.5702146346399439, "learning_rate": 1.3060178426601787e-05, "loss": 0.5442, "step": 14122 }, { "epoch": 0.4123383258882952, "grad_norm": 0.7233334264781098, "learning_rate": 1.3059529602595297e-05, "loss": 0.642, "step": 14123 }, { "epoch": 0.4123675221161426, "grad_norm": 0.6938982446135463, "learning_rate": 1.3058880778588809e-05, "loss": 0.6828, "step": 14124 }, { "epoch": 0.41239671834398994, "grad_norm": 0.6634422357896008, "learning_rate": 1.3058231954582321e-05, "loss": 0.6233, "step": 14125 }, { "epoch": 0.4124259145718373, "grad_norm": 0.6183326148549514, "learning_rate": 1.3057583130575833e-05, "loss": 0.6056, "step": 14126 }, { "epoch": 0.41245511079968467, "grad_norm": 0.6248426710046413, "learning_rate": 1.3056934306569345e-05, "loss": 0.557, "step": 14127 }, { "epoch": 0.412484307027532, "grad_norm": 0.6343104334751589, "learning_rate": 1.3056285482562855e-05, "loss": 0.6226, "step": 14128 }, { "epoch": 0.4125135032553794, "grad_norm": 0.6312179877515327, "learning_rate": 1.3055636658556367e-05, "loss": 0.6184, "step": 14129 }, { "epoch": 0.41254269948322675, "grad_norm": 0.638440684882465, "learning_rate": 1.305498783454988e-05, "loss": 0.6536, "step": 14130 }, { "epoch": 0.4125718957110741, "grad_norm": 0.645128079470749, "learning_rate": 1.3054339010543391e-05, "loss": 0.6399, "step": 14131 }, { "epoch": 0.41260109193892147, "grad_norm": 0.6435740445414886, "learning_rate": 1.3053690186536902e-05, "loss": 0.6038, "step": 14132 }, { "epoch": 0.41263028816676883, "grad_norm": 0.7163300037662512, "learning_rate": 1.3053041362530414e-05, "loss": 0.7174, "step": 14133 }, { "epoch": 0.4126594843946162, "grad_norm": 0.6836611100340595, "learning_rate": 1.3052392538523926e-05, "loss": 0.6476, "step": 14134 }, { "epoch": 0.41268868062246356, "grad_norm": 0.6127962415263655, "learning_rate": 1.3051743714517438e-05, "loss": 0.5569, "step": 14135 }, { "epoch": 0.4127178768503109, "grad_norm": 0.6694752560736675, "learning_rate": 1.305109489051095e-05, "loss": 0.6695, "step": 14136 }, { "epoch": 0.4127470730781583, "grad_norm": 0.6897201015960598, "learning_rate": 1.305044606650446e-05, "loss": 0.6883, "step": 14137 }, { "epoch": 0.41277626930600564, "grad_norm": 0.6826162564738268, "learning_rate": 1.3049797242497972e-05, "loss": 0.685, "step": 14138 }, { "epoch": 0.412805465533853, "grad_norm": 0.6171459323559001, "learning_rate": 1.3049148418491486e-05, "loss": 0.5647, "step": 14139 }, { "epoch": 0.41283466176170036, "grad_norm": 0.6102677376257618, "learning_rate": 1.3048499594484998e-05, "loss": 0.5778, "step": 14140 }, { "epoch": 0.4128638579895478, "grad_norm": 0.6435305253823679, "learning_rate": 1.304785077047851e-05, "loss": 0.6265, "step": 14141 }, { "epoch": 0.41289305421739514, "grad_norm": 0.668131459097548, "learning_rate": 1.304720194647202e-05, "loss": 0.6602, "step": 14142 }, { "epoch": 0.4129222504452425, "grad_norm": 0.6155608329939921, "learning_rate": 1.3046553122465532e-05, "loss": 0.5848, "step": 14143 }, { "epoch": 0.41295144667308986, "grad_norm": 0.6420171731796964, "learning_rate": 1.3045904298459044e-05, "loss": 0.6044, "step": 14144 }, { "epoch": 0.4129806429009372, "grad_norm": 0.6068499591801426, "learning_rate": 1.3045255474452556e-05, "loss": 0.5809, "step": 14145 }, { "epoch": 0.4130098391287846, "grad_norm": 0.6656953973966754, "learning_rate": 1.3044606650446069e-05, "loss": 0.6516, "step": 14146 }, { "epoch": 0.41303903535663195, "grad_norm": 0.6765862938800139, "learning_rate": 1.3043957826439579e-05, "loss": 0.698, "step": 14147 }, { "epoch": 0.4130682315844793, "grad_norm": 0.709019219620161, "learning_rate": 1.3043309002433091e-05, "loss": 0.621, "step": 14148 }, { "epoch": 0.41309742781232667, "grad_norm": 0.6920538797138744, "learning_rate": 1.3042660178426603e-05, "loss": 0.6769, "step": 14149 }, { "epoch": 0.41312662404017403, "grad_norm": 0.6433547590275875, "learning_rate": 1.3042011354420115e-05, "loss": 0.6236, "step": 14150 }, { "epoch": 0.4131558202680214, "grad_norm": 0.6832357510802872, "learning_rate": 1.3041362530413625e-05, "loss": 0.5389, "step": 14151 }, { "epoch": 0.41318501649586875, "grad_norm": 0.6326135994239225, "learning_rate": 1.3040713706407137e-05, "loss": 0.5848, "step": 14152 }, { "epoch": 0.4132142127237161, "grad_norm": 0.6304748986914283, "learning_rate": 1.304006488240065e-05, "loss": 0.6, "step": 14153 }, { "epoch": 0.4132434089515635, "grad_norm": 0.659794328649478, "learning_rate": 1.3039416058394161e-05, "loss": 0.6603, "step": 14154 }, { "epoch": 0.41327260517941083, "grad_norm": 0.6505859065531183, "learning_rate": 1.3038767234387673e-05, "loss": 0.6138, "step": 14155 }, { "epoch": 0.4133018014072582, "grad_norm": 0.689585149699794, "learning_rate": 1.3038118410381184e-05, "loss": 0.6912, "step": 14156 }, { "epoch": 0.41333099763510556, "grad_norm": 0.6803585095689307, "learning_rate": 1.3037469586374696e-05, "loss": 0.6691, "step": 14157 }, { "epoch": 0.4133601938629529, "grad_norm": 0.6666540415287256, "learning_rate": 1.303682076236821e-05, "loss": 0.6073, "step": 14158 }, { "epoch": 0.4133893900908003, "grad_norm": 0.6232540037175189, "learning_rate": 1.3036171938361721e-05, "loss": 0.5715, "step": 14159 }, { "epoch": 0.41341858631864764, "grad_norm": 0.6692893207328819, "learning_rate": 1.3035523114355233e-05, "loss": 0.6963, "step": 14160 }, { "epoch": 0.413447782546495, "grad_norm": 0.6273900854952519, "learning_rate": 1.3034874290348744e-05, "loss": 0.5234, "step": 14161 }, { "epoch": 0.41347697877434236, "grad_norm": 0.7085327554769895, "learning_rate": 1.3034225466342256e-05, "loss": 0.7446, "step": 14162 }, { "epoch": 0.4135061750021897, "grad_norm": 0.6329962146329446, "learning_rate": 1.3033576642335768e-05, "loss": 0.5916, "step": 14163 }, { "epoch": 0.4135353712300371, "grad_norm": 0.630751035003515, "learning_rate": 1.303292781832928e-05, "loss": 0.5979, "step": 14164 }, { "epoch": 0.41356456745788445, "grad_norm": 0.7231287205981293, "learning_rate": 1.3032278994322792e-05, "loss": 0.7697, "step": 14165 }, { "epoch": 0.4135937636857318, "grad_norm": 0.6660769245465543, "learning_rate": 1.3031630170316302e-05, "loss": 0.6601, "step": 14166 }, { "epoch": 0.41362295991357917, "grad_norm": 0.6380655998420462, "learning_rate": 1.3030981346309814e-05, "loss": 0.595, "step": 14167 }, { "epoch": 0.41365215614142653, "grad_norm": 0.639174162564835, "learning_rate": 1.3030332522303326e-05, "loss": 0.5943, "step": 14168 }, { "epoch": 0.4136813523692739, "grad_norm": 0.6638251906719616, "learning_rate": 1.3029683698296838e-05, "loss": 0.704, "step": 14169 }, { "epoch": 0.41371054859712125, "grad_norm": 0.6720797525339104, "learning_rate": 1.3029034874290349e-05, "loss": 0.6589, "step": 14170 }, { "epoch": 0.4137397448249686, "grad_norm": 0.6957280484032689, "learning_rate": 1.302838605028386e-05, "loss": 0.6769, "step": 14171 }, { "epoch": 0.413768941052816, "grad_norm": 0.6970845975821587, "learning_rate": 1.3027737226277373e-05, "loss": 0.6243, "step": 14172 }, { "epoch": 0.41379813728066334, "grad_norm": 0.6317248113616917, "learning_rate": 1.3027088402270885e-05, "loss": 0.5906, "step": 14173 }, { "epoch": 0.4138273335085107, "grad_norm": 0.6144131827426013, "learning_rate": 1.3026439578264397e-05, "loss": 0.6035, "step": 14174 }, { "epoch": 0.41385652973635806, "grad_norm": 0.6281014978076801, "learning_rate": 1.3025790754257907e-05, "loss": 0.5978, "step": 14175 }, { "epoch": 0.4138857259642054, "grad_norm": 0.6284244354362838, "learning_rate": 1.302514193025142e-05, "loss": 0.5903, "step": 14176 }, { "epoch": 0.4139149221920528, "grad_norm": 0.6568661711377225, "learning_rate": 1.3024493106244933e-05, "loss": 0.5897, "step": 14177 }, { "epoch": 0.41394411841990014, "grad_norm": 0.7071627474647955, "learning_rate": 1.3023844282238445e-05, "loss": 0.7085, "step": 14178 }, { "epoch": 0.4139733146477475, "grad_norm": 0.7210946459970887, "learning_rate": 1.3023195458231957e-05, "loss": 0.6664, "step": 14179 }, { "epoch": 0.41400251087559486, "grad_norm": 0.7356759136406383, "learning_rate": 1.3022546634225467e-05, "loss": 0.6799, "step": 14180 }, { "epoch": 0.4140317071034422, "grad_norm": 0.6326754937085968, "learning_rate": 1.302189781021898e-05, "loss": 0.5743, "step": 14181 }, { "epoch": 0.4140609033312896, "grad_norm": 0.6005907748641047, "learning_rate": 1.3021248986212491e-05, "loss": 0.5694, "step": 14182 }, { "epoch": 0.41409009955913695, "grad_norm": 0.6056961911450593, "learning_rate": 1.3020600162206003e-05, "loss": 0.5809, "step": 14183 }, { "epoch": 0.4141192957869843, "grad_norm": 0.6204429623335108, "learning_rate": 1.3019951338199515e-05, "loss": 0.556, "step": 14184 }, { "epoch": 0.41414849201483167, "grad_norm": 0.6893109775733007, "learning_rate": 1.3019302514193026e-05, "loss": 0.7528, "step": 14185 }, { "epoch": 0.41417768824267903, "grad_norm": 0.6057723530943926, "learning_rate": 1.3018653690186538e-05, "loss": 0.5265, "step": 14186 }, { "epoch": 0.4142068844705264, "grad_norm": 0.6573720541028423, "learning_rate": 1.301800486618005e-05, "loss": 0.5916, "step": 14187 }, { "epoch": 0.41423608069837375, "grad_norm": 0.6813621099326902, "learning_rate": 1.3017356042173562e-05, "loss": 0.6575, "step": 14188 }, { "epoch": 0.4142652769262211, "grad_norm": 0.6586086947668257, "learning_rate": 1.3016707218167072e-05, "loss": 0.6147, "step": 14189 }, { "epoch": 0.4142944731540685, "grad_norm": 0.6515603894290285, "learning_rate": 1.3016058394160584e-05, "loss": 0.6145, "step": 14190 }, { "epoch": 0.41432366938191584, "grad_norm": 0.5998540250748036, "learning_rate": 1.3015409570154096e-05, "loss": 0.5455, "step": 14191 }, { "epoch": 0.4143528656097632, "grad_norm": 0.7104669876740246, "learning_rate": 1.3014760746147608e-05, "loss": 0.7246, "step": 14192 }, { "epoch": 0.41438206183761056, "grad_norm": 0.6969372606448851, "learning_rate": 1.301411192214112e-05, "loss": 0.68, "step": 14193 }, { "epoch": 0.4144112580654579, "grad_norm": 0.6053621567547485, "learning_rate": 1.301346309813463e-05, "loss": 0.5317, "step": 14194 }, { "epoch": 0.4144404542933053, "grad_norm": 0.6984867888654328, "learning_rate": 1.3012814274128144e-05, "loss": 0.7212, "step": 14195 }, { "epoch": 0.41446965052115264, "grad_norm": 0.614979692967735, "learning_rate": 1.3012165450121656e-05, "loss": 0.5536, "step": 14196 }, { "epoch": 0.414498846749, "grad_norm": 0.6616920772377167, "learning_rate": 1.3011516626115168e-05, "loss": 0.63, "step": 14197 }, { "epoch": 0.41452804297684737, "grad_norm": 0.701129866562278, "learning_rate": 1.301086780210868e-05, "loss": 0.7067, "step": 14198 }, { "epoch": 0.4145572392046947, "grad_norm": 0.6342095899416085, "learning_rate": 1.301021897810219e-05, "loss": 0.568, "step": 14199 }, { "epoch": 0.4145864354325421, "grad_norm": 0.6674030379312527, "learning_rate": 1.3009570154095703e-05, "loss": 0.6649, "step": 14200 }, { "epoch": 0.4146156316603895, "grad_norm": 0.702481418758171, "learning_rate": 1.3008921330089215e-05, "loss": 0.6974, "step": 14201 }, { "epoch": 0.41464482788823687, "grad_norm": 0.6232856134726777, "learning_rate": 1.3008272506082727e-05, "loss": 0.6517, "step": 14202 }, { "epoch": 0.4146740241160842, "grad_norm": 0.6200423680110365, "learning_rate": 1.3007623682076239e-05, "loss": 0.5765, "step": 14203 }, { "epoch": 0.4147032203439316, "grad_norm": 0.6725771752094477, "learning_rate": 1.3006974858069749e-05, "loss": 0.6294, "step": 14204 }, { "epoch": 0.41473241657177895, "grad_norm": 0.623145628220127, "learning_rate": 1.3006326034063261e-05, "loss": 0.5862, "step": 14205 }, { "epoch": 0.4147616127996263, "grad_norm": 0.70117747053945, "learning_rate": 1.3005677210056773e-05, "loss": 0.739, "step": 14206 }, { "epoch": 0.4147908090274737, "grad_norm": 0.6854888194034362, "learning_rate": 1.3005028386050285e-05, "loss": 0.6585, "step": 14207 }, { "epoch": 0.41482000525532103, "grad_norm": 0.5942586343571722, "learning_rate": 1.3004379562043796e-05, "loss": 0.5289, "step": 14208 }, { "epoch": 0.4148492014831684, "grad_norm": 0.657417734361316, "learning_rate": 1.3003730738037308e-05, "loss": 0.6063, "step": 14209 }, { "epoch": 0.41487839771101576, "grad_norm": 0.6608996952552012, "learning_rate": 1.300308191403082e-05, "loss": 0.6668, "step": 14210 }, { "epoch": 0.4149075939388631, "grad_norm": 0.6670869038658349, "learning_rate": 1.3002433090024332e-05, "loss": 0.6567, "step": 14211 }, { "epoch": 0.4149367901667105, "grad_norm": 0.6271826297963835, "learning_rate": 1.3001784266017844e-05, "loss": 0.604, "step": 14212 }, { "epoch": 0.41496598639455784, "grad_norm": 0.6883459296546572, "learning_rate": 1.3001135442011354e-05, "loss": 0.6718, "step": 14213 }, { "epoch": 0.4149951826224052, "grad_norm": 0.6416546315145754, "learning_rate": 1.3000486618004868e-05, "loss": 0.5471, "step": 14214 }, { "epoch": 0.41502437885025256, "grad_norm": 0.6806770497455525, "learning_rate": 1.299983779399838e-05, "loss": 0.7001, "step": 14215 }, { "epoch": 0.4150535750780999, "grad_norm": 0.6411383467283849, "learning_rate": 1.2999188969991892e-05, "loss": 0.6124, "step": 14216 }, { "epoch": 0.4150827713059473, "grad_norm": 0.6905593926416437, "learning_rate": 1.2998540145985404e-05, "loss": 0.6924, "step": 14217 }, { "epoch": 0.41511196753379465, "grad_norm": 0.6431428004336278, "learning_rate": 1.2997891321978914e-05, "loss": 0.6267, "step": 14218 }, { "epoch": 0.415141163761642, "grad_norm": 0.6360051087797366, "learning_rate": 1.2997242497972426e-05, "loss": 0.5888, "step": 14219 }, { "epoch": 0.41517035998948937, "grad_norm": 0.593731757719598, "learning_rate": 1.2996593673965938e-05, "loss": 0.5236, "step": 14220 }, { "epoch": 0.41519955621733673, "grad_norm": 0.6528882181908202, "learning_rate": 1.299594484995945e-05, "loss": 0.6981, "step": 14221 }, { "epoch": 0.4152287524451841, "grad_norm": 0.6535743358353658, "learning_rate": 1.2995296025952962e-05, "loss": 0.6542, "step": 14222 }, { "epoch": 0.41525794867303145, "grad_norm": 0.6694281744864357, "learning_rate": 1.2994647201946473e-05, "loss": 0.6651, "step": 14223 }, { "epoch": 0.4152871449008788, "grad_norm": 0.592473956223792, "learning_rate": 1.2993998377939985e-05, "loss": 0.5547, "step": 14224 }, { "epoch": 0.4153163411287262, "grad_norm": 0.6210337096507792, "learning_rate": 1.2993349553933497e-05, "loss": 0.6011, "step": 14225 }, { "epoch": 0.41534553735657354, "grad_norm": 0.6506654717142138, "learning_rate": 1.2992700729927009e-05, "loss": 0.6352, "step": 14226 }, { "epoch": 0.4153747335844209, "grad_norm": 0.60488083304528, "learning_rate": 1.2992051905920519e-05, "loss": 0.5606, "step": 14227 }, { "epoch": 0.41540392981226826, "grad_norm": 0.7040762807119653, "learning_rate": 1.2991403081914031e-05, "loss": 0.7033, "step": 14228 }, { "epoch": 0.4154331260401156, "grad_norm": 0.6166589498043227, "learning_rate": 1.2990754257907543e-05, "loss": 0.567, "step": 14229 }, { "epoch": 0.415462322267963, "grad_norm": 0.7035797954712625, "learning_rate": 1.2990105433901055e-05, "loss": 0.7472, "step": 14230 }, { "epoch": 0.41549151849581034, "grad_norm": 0.6786773716714912, "learning_rate": 1.2989456609894565e-05, "loss": 0.6715, "step": 14231 }, { "epoch": 0.4155207147236577, "grad_norm": 0.6704333069981322, "learning_rate": 1.2988807785888077e-05, "loss": 0.6292, "step": 14232 }, { "epoch": 0.41554991095150506, "grad_norm": 0.6672224107059743, "learning_rate": 1.2988158961881591e-05, "loss": 0.641, "step": 14233 }, { "epoch": 0.4155791071793524, "grad_norm": 0.6428465977238731, "learning_rate": 1.2987510137875103e-05, "loss": 0.6599, "step": 14234 }, { "epoch": 0.4156083034071998, "grad_norm": 0.6560509752870491, "learning_rate": 1.2986861313868615e-05, "loss": 0.6441, "step": 14235 }, { "epoch": 0.41563749963504715, "grad_norm": 0.6482752602778065, "learning_rate": 1.2986212489862127e-05, "loss": 0.6403, "step": 14236 }, { "epoch": 0.4156666958628945, "grad_norm": 0.6351991157118831, "learning_rate": 1.2985563665855638e-05, "loss": 0.596, "step": 14237 }, { "epoch": 0.41569589209074187, "grad_norm": 0.7304589484587009, "learning_rate": 1.298491484184915e-05, "loss": 0.6799, "step": 14238 }, { "epoch": 0.41572508831858923, "grad_norm": 0.6550546553745336, "learning_rate": 1.2984266017842662e-05, "loss": 0.6101, "step": 14239 }, { "epoch": 0.4157542845464366, "grad_norm": 0.7214573418482854, "learning_rate": 1.2983617193836174e-05, "loss": 0.6974, "step": 14240 }, { "epoch": 0.41578348077428395, "grad_norm": 0.6973481042443629, "learning_rate": 1.2982968369829686e-05, "loss": 0.5947, "step": 14241 }, { "epoch": 0.4158126770021313, "grad_norm": 0.6955264885197049, "learning_rate": 1.2982319545823196e-05, "loss": 0.6843, "step": 14242 }, { "epoch": 0.4158418732299787, "grad_norm": 0.6645215166427625, "learning_rate": 1.2981670721816708e-05, "loss": 0.6663, "step": 14243 }, { "epoch": 0.41587106945782604, "grad_norm": 0.7035201207600018, "learning_rate": 1.298102189781022e-05, "loss": 0.7159, "step": 14244 }, { "epoch": 0.4159002656856734, "grad_norm": 0.6612950794553563, "learning_rate": 1.2980373073803732e-05, "loss": 0.6797, "step": 14245 }, { "epoch": 0.41592946191352076, "grad_norm": 0.6966877376703806, "learning_rate": 1.2979724249797242e-05, "loss": 0.6239, "step": 14246 }, { "epoch": 0.4159586581413681, "grad_norm": 0.6194215899387976, "learning_rate": 1.2979075425790754e-05, "loss": 0.5139, "step": 14247 }, { "epoch": 0.4159878543692155, "grad_norm": 0.6433349756470784, "learning_rate": 1.2978426601784266e-05, "loss": 0.5981, "step": 14248 }, { "epoch": 0.41601705059706284, "grad_norm": 0.6551507089991969, "learning_rate": 1.2977777777777779e-05, "loss": 0.6019, "step": 14249 }, { "epoch": 0.4160462468249102, "grad_norm": 0.6406156879781398, "learning_rate": 1.2977128953771289e-05, "loss": 0.6391, "step": 14250 }, { "epoch": 0.41607544305275757, "grad_norm": 0.611262195944299, "learning_rate": 1.2976480129764801e-05, "loss": 0.589, "step": 14251 }, { "epoch": 0.4161046392806049, "grad_norm": 0.6440527044138724, "learning_rate": 1.2975831305758315e-05, "loss": 0.589, "step": 14252 }, { "epoch": 0.4161338355084523, "grad_norm": 0.6307873884146646, "learning_rate": 1.2975182481751827e-05, "loss": 0.5752, "step": 14253 }, { "epoch": 0.41616303173629965, "grad_norm": 0.6487068923457274, "learning_rate": 1.2974533657745339e-05, "loss": 0.6256, "step": 14254 }, { "epoch": 0.416192227964147, "grad_norm": 0.6143517795051103, "learning_rate": 1.297388483373885e-05, "loss": 0.5768, "step": 14255 }, { "epoch": 0.41622142419199437, "grad_norm": 0.6437203538277728, "learning_rate": 1.2973236009732361e-05, "loss": 0.6198, "step": 14256 }, { "epoch": 0.41625062041984173, "grad_norm": 0.6155628324082189, "learning_rate": 1.2972587185725873e-05, "loss": 0.6139, "step": 14257 }, { "epoch": 0.4162798166476891, "grad_norm": 0.6742242780959918, "learning_rate": 1.2971938361719385e-05, "loss": 0.5838, "step": 14258 }, { "epoch": 0.41630901287553645, "grad_norm": 0.651955796111741, "learning_rate": 1.2971289537712897e-05, "loss": 0.5871, "step": 14259 }, { "epoch": 0.4163382091033838, "grad_norm": 0.6991682044401412, "learning_rate": 1.2970640713706409e-05, "loss": 0.6867, "step": 14260 }, { "epoch": 0.4163674053312312, "grad_norm": 0.6635871788005671, "learning_rate": 1.296999188969992e-05, "loss": 0.644, "step": 14261 }, { "epoch": 0.4163966015590786, "grad_norm": 0.604652450229725, "learning_rate": 1.2969343065693431e-05, "loss": 0.5849, "step": 14262 }, { "epoch": 0.41642579778692596, "grad_norm": 0.6563209888026094, "learning_rate": 1.2968694241686944e-05, "loss": 0.6429, "step": 14263 }, { "epoch": 0.4164549940147733, "grad_norm": 0.6113381619694168, "learning_rate": 1.2968045417680456e-05, "loss": 0.5814, "step": 14264 }, { "epoch": 0.4164841902426207, "grad_norm": 0.6309562132595928, "learning_rate": 1.2967396593673966e-05, "loss": 0.5808, "step": 14265 }, { "epoch": 0.41651338647046804, "grad_norm": 0.6358141474374023, "learning_rate": 1.2966747769667478e-05, "loss": 0.5852, "step": 14266 }, { "epoch": 0.4165425826983154, "grad_norm": 0.6516585035560588, "learning_rate": 1.296609894566099e-05, "loss": 0.6593, "step": 14267 }, { "epoch": 0.41657177892616276, "grad_norm": 0.6774431965055735, "learning_rate": 1.2965450121654502e-05, "loss": 0.6268, "step": 14268 }, { "epoch": 0.4166009751540101, "grad_norm": 0.6414025717720054, "learning_rate": 1.2964801297648012e-05, "loss": 0.5995, "step": 14269 }, { "epoch": 0.4166301713818575, "grad_norm": 0.6143610627610183, "learning_rate": 1.2964152473641524e-05, "loss": 0.5929, "step": 14270 }, { "epoch": 0.41665936760970484, "grad_norm": 0.787188172201525, "learning_rate": 1.2963503649635038e-05, "loss": 0.7415, "step": 14271 }, { "epoch": 0.4166885638375522, "grad_norm": 0.6728147845970399, "learning_rate": 1.296285482562855e-05, "loss": 0.6896, "step": 14272 }, { "epoch": 0.41671776006539957, "grad_norm": 0.705915265821498, "learning_rate": 1.2962206001622062e-05, "loss": 0.6728, "step": 14273 }, { "epoch": 0.41674695629324693, "grad_norm": 0.5903595307326924, "learning_rate": 1.2961557177615574e-05, "loss": 0.5147, "step": 14274 }, { "epoch": 0.4167761525210943, "grad_norm": 0.6011659369303455, "learning_rate": 1.2960908353609084e-05, "loss": 0.5476, "step": 14275 }, { "epoch": 0.41680534874894165, "grad_norm": 0.6495904203084792, "learning_rate": 1.2960259529602596e-05, "loss": 0.6622, "step": 14276 }, { "epoch": 0.416834544976789, "grad_norm": 0.6143803426536342, "learning_rate": 1.2959610705596108e-05, "loss": 0.6058, "step": 14277 }, { "epoch": 0.4168637412046364, "grad_norm": 0.6120306239881222, "learning_rate": 1.295896188158962e-05, "loss": 0.5913, "step": 14278 }, { "epoch": 0.41689293743248373, "grad_norm": 0.7196041277833858, "learning_rate": 1.2958313057583133e-05, "loss": 0.6753, "step": 14279 }, { "epoch": 0.4169221336603311, "grad_norm": 0.6692578312527728, "learning_rate": 1.2957664233576643e-05, "loss": 0.6754, "step": 14280 }, { "epoch": 0.41695132988817846, "grad_norm": 0.6815983235061142, "learning_rate": 1.2957015409570155e-05, "loss": 0.6495, "step": 14281 }, { "epoch": 0.4169805261160258, "grad_norm": 0.62482365616145, "learning_rate": 1.2956366585563667e-05, "loss": 0.5795, "step": 14282 }, { "epoch": 0.4170097223438732, "grad_norm": 0.6985791392148614, "learning_rate": 1.2955717761557179e-05, "loss": 0.6752, "step": 14283 }, { "epoch": 0.41703891857172054, "grad_norm": 0.6178190892088066, "learning_rate": 1.295506893755069e-05, "loss": 0.5763, "step": 14284 }, { "epoch": 0.4170681147995679, "grad_norm": 0.6708736305896873, "learning_rate": 1.2954420113544201e-05, "loss": 0.6593, "step": 14285 }, { "epoch": 0.41709731102741526, "grad_norm": 0.6286030536187176, "learning_rate": 1.2953771289537713e-05, "loss": 0.6282, "step": 14286 }, { "epoch": 0.4171265072552626, "grad_norm": 0.6433246469704152, "learning_rate": 1.2953122465531225e-05, "loss": 0.5789, "step": 14287 }, { "epoch": 0.41715570348311, "grad_norm": 0.6236453766347576, "learning_rate": 1.2952473641524736e-05, "loss": 0.6003, "step": 14288 }, { "epoch": 0.41718489971095735, "grad_norm": 0.68229828390515, "learning_rate": 1.2951824817518248e-05, "loss": 0.6605, "step": 14289 }, { "epoch": 0.4172140959388047, "grad_norm": 0.6351814388912248, "learning_rate": 1.2951175993511761e-05, "loss": 0.5951, "step": 14290 }, { "epoch": 0.41724329216665207, "grad_norm": 0.6634256962113699, "learning_rate": 1.2950527169505273e-05, "loss": 0.6806, "step": 14291 }, { "epoch": 0.41727248839449943, "grad_norm": 0.6150466947197876, "learning_rate": 1.2949878345498786e-05, "loss": 0.5647, "step": 14292 }, { "epoch": 0.4173016846223468, "grad_norm": 0.6500275882363808, "learning_rate": 1.2949229521492298e-05, "loss": 0.6812, "step": 14293 }, { "epoch": 0.41733088085019415, "grad_norm": 0.6756613412397562, "learning_rate": 1.2948580697485808e-05, "loss": 0.6682, "step": 14294 }, { "epoch": 0.4173600770780415, "grad_norm": 0.6536761833606246, "learning_rate": 1.294793187347932e-05, "loss": 0.6398, "step": 14295 }, { "epoch": 0.4173892733058889, "grad_norm": 0.6836778017769597, "learning_rate": 1.2947283049472832e-05, "loss": 0.6688, "step": 14296 }, { "epoch": 0.41741846953373624, "grad_norm": 0.6424532617687911, "learning_rate": 1.2946634225466344e-05, "loss": 0.6029, "step": 14297 }, { "epoch": 0.4174476657615836, "grad_norm": 0.6998228691901601, "learning_rate": 1.2945985401459856e-05, "loss": 0.7415, "step": 14298 }, { "epoch": 0.41747686198943096, "grad_norm": 0.6429881621744034, "learning_rate": 1.2945336577453366e-05, "loss": 0.5827, "step": 14299 }, { "epoch": 0.4175060582172783, "grad_norm": 0.6681534293130377, "learning_rate": 1.2944687753446878e-05, "loss": 0.6499, "step": 14300 }, { "epoch": 0.4175352544451257, "grad_norm": 0.6596111414333385, "learning_rate": 1.294403892944039e-05, "loss": 0.6544, "step": 14301 }, { "epoch": 0.41756445067297304, "grad_norm": 0.6545220941280361, "learning_rate": 1.2943390105433902e-05, "loss": 0.6291, "step": 14302 }, { "epoch": 0.4175936469008204, "grad_norm": 0.6620041953010658, "learning_rate": 1.2942741281427413e-05, "loss": 0.6421, "step": 14303 }, { "epoch": 0.41762284312866776, "grad_norm": 0.7074928711449247, "learning_rate": 1.2942092457420925e-05, "loss": 0.7515, "step": 14304 }, { "epoch": 0.4176520393565151, "grad_norm": 0.6300479316875911, "learning_rate": 1.2941443633414437e-05, "loss": 0.5887, "step": 14305 }, { "epoch": 0.4176812355843625, "grad_norm": 0.6472353704808818, "learning_rate": 1.2940794809407949e-05, "loss": 0.5843, "step": 14306 }, { "epoch": 0.41771043181220985, "grad_norm": 0.7273911655065853, "learning_rate": 1.2940145985401459e-05, "loss": 0.7076, "step": 14307 }, { "epoch": 0.4177396280400572, "grad_norm": 0.597719442817163, "learning_rate": 1.2939497161394971e-05, "loss": 0.5542, "step": 14308 }, { "epoch": 0.41776882426790457, "grad_norm": 0.6035748094162262, "learning_rate": 1.2938848337388485e-05, "loss": 0.5562, "step": 14309 }, { "epoch": 0.41779802049575193, "grad_norm": 0.6572036975581597, "learning_rate": 1.2938199513381997e-05, "loss": 0.6461, "step": 14310 }, { "epoch": 0.4178272167235993, "grad_norm": 0.7775030593624991, "learning_rate": 1.2937550689375509e-05, "loss": 0.7786, "step": 14311 }, { "epoch": 0.41785641295144665, "grad_norm": 0.6529282969904102, "learning_rate": 1.2936901865369021e-05, "loss": 0.5683, "step": 14312 }, { "epoch": 0.417885609179294, "grad_norm": 0.6428658618103028, "learning_rate": 1.2936253041362531e-05, "loss": 0.6006, "step": 14313 }, { "epoch": 0.4179148054071414, "grad_norm": 0.6302753191327223, "learning_rate": 1.2935604217356043e-05, "loss": 0.5816, "step": 14314 }, { "epoch": 0.41794400163498874, "grad_norm": 0.6215834891121642, "learning_rate": 1.2934955393349555e-05, "loss": 0.5967, "step": 14315 }, { "epoch": 0.4179731978628361, "grad_norm": 0.6765229701644466, "learning_rate": 1.2934306569343067e-05, "loss": 0.7065, "step": 14316 }, { "epoch": 0.41800239409068346, "grad_norm": 0.7082938923000016, "learning_rate": 1.293365774533658e-05, "loss": 0.7135, "step": 14317 }, { "epoch": 0.4180315903185308, "grad_norm": 0.610670470057899, "learning_rate": 1.293300892133009e-05, "loss": 0.551, "step": 14318 }, { "epoch": 0.4180607865463782, "grad_norm": 0.6535099924816973, "learning_rate": 1.2932360097323602e-05, "loss": 0.6449, "step": 14319 }, { "epoch": 0.41808998277422554, "grad_norm": 0.704536421169846, "learning_rate": 1.2931711273317114e-05, "loss": 0.7063, "step": 14320 }, { "epoch": 0.4181191790020729, "grad_norm": 0.6736293305242221, "learning_rate": 1.2931062449310626e-05, "loss": 0.6425, "step": 14321 }, { "epoch": 0.4181483752299203, "grad_norm": 0.6525669499925907, "learning_rate": 1.2930413625304136e-05, "loss": 0.6408, "step": 14322 }, { "epoch": 0.4181775714577677, "grad_norm": 0.6067811978418949, "learning_rate": 1.2929764801297648e-05, "loss": 0.5155, "step": 14323 }, { "epoch": 0.41820676768561504, "grad_norm": 0.6618938733587005, "learning_rate": 1.292911597729116e-05, "loss": 0.6366, "step": 14324 }, { "epoch": 0.4182359639134624, "grad_norm": 0.658362541259741, "learning_rate": 1.2928467153284672e-05, "loss": 0.6493, "step": 14325 }, { "epoch": 0.41826516014130977, "grad_norm": 0.6810272647560454, "learning_rate": 1.2927818329278183e-05, "loss": 0.6819, "step": 14326 }, { "epoch": 0.4182943563691571, "grad_norm": 0.6390092786487779, "learning_rate": 1.2927169505271695e-05, "loss": 0.6165, "step": 14327 }, { "epoch": 0.4183235525970045, "grad_norm": 0.6381653392273356, "learning_rate": 1.2926520681265208e-05, "loss": 0.6291, "step": 14328 }, { "epoch": 0.41835274882485185, "grad_norm": 0.6818499684129422, "learning_rate": 1.292587185725872e-05, "loss": 0.7056, "step": 14329 }, { "epoch": 0.4183819450526992, "grad_norm": 0.6687883201508098, "learning_rate": 1.2925223033252232e-05, "loss": 0.6686, "step": 14330 }, { "epoch": 0.4184111412805466, "grad_norm": 0.6685447790313624, "learning_rate": 1.2924574209245744e-05, "loss": 0.7049, "step": 14331 }, { "epoch": 0.41844033750839393, "grad_norm": 0.6491383586416232, "learning_rate": 1.2923925385239255e-05, "loss": 0.5913, "step": 14332 }, { "epoch": 0.4184695337362413, "grad_norm": 0.6158141891582379, "learning_rate": 1.2923276561232767e-05, "loss": 0.5945, "step": 14333 }, { "epoch": 0.41849872996408866, "grad_norm": 0.6589852460084124, "learning_rate": 1.2922627737226279e-05, "loss": 0.6471, "step": 14334 }, { "epoch": 0.418527926191936, "grad_norm": 0.6602088558726551, "learning_rate": 1.292197891321979e-05, "loss": 0.5835, "step": 14335 }, { "epoch": 0.4185571224197834, "grad_norm": 0.6461971748269617, "learning_rate": 1.2921330089213303e-05, "loss": 0.6681, "step": 14336 }, { "epoch": 0.41858631864763074, "grad_norm": 0.5933457200923369, "learning_rate": 1.2920681265206813e-05, "loss": 0.5342, "step": 14337 }, { "epoch": 0.4186155148754781, "grad_norm": 0.6573146178121952, "learning_rate": 1.2920032441200325e-05, "loss": 0.6533, "step": 14338 }, { "epoch": 0.41864471110332546, "grad_norm": 0.655126338677415, "learning_rate": 1.2919383617193837e-05, "loss": 0.6333, "step": 14339 }, { "epoch": 0.4186739073311728, "grad_norm": 0.6340276092559556, "learning_rate": 1.291873479318735e-05, "loss": 0.6414, "step": 14340 }, { "epoch": 0.4187031035590202, "grad_norm": 0.6439183746876318, "learning_rate": 1.291808596918086e-05, "loss": 0.6976, "step": 14341 }, { "epoch": 0.41873229978686755, "grad_norm": 0.6875479817829343, "learning_rate": 1.2917437145174372e-05, "loss": 0.7264, "step": 14342 }, { "epoch": 0.4187614960147149, "grad_norm": 0.6567158644709941, "learning_rate": 1.2916788321167884e-05, "loss": 0.5523, "step": 14343 }, { "epoch": 0.41879069224256227, "grad_norm": 0.7111372118415407, "learning_rate": 1.2916139497161396e-05, "loss": 0.7006, "step": 14344 }, { "epoch": 0.41881988847040963, "grad_norm": 0.6581679531173764, "learning_rate": 1.2915490673154906e-05, "loss": 0.6406, "step": 14345 }, { "epoch": 0.418849084698257, "grad_norm": 0.6756417035370212, "learning_rate": 1.2914841849148421e-05, "loss": 0.6686, "step": 14346 }, { "epoch": 0.41887828092610435, "grad_norm": 0.6299119090295052, "learning_rate": 1.2914193025141932e-05, "loss": 0.6176, "step": 14347 }, { "epoch": 0.4189074771539517, "grad_norm": 0.6512696209287913, "learning_rate": 1.2913544201135444e-05, "loss": 0.6521, "step": 14348 }, { "epoch": 0.4189366733817991, "grad_norm": 0.6842740737193612, "learning_rate": 1.2912895377128956e-05, "loss": 0.6898, "step": 14349 }, { "epoch": 0.41896586960964644, "grad_norm": 0.9481723033185305, "learning_rate": 1.2912246553122468e-05, "loss": 0.6339, "step": 14350 }, { "epoch": 0.4189950658374938, "grad_norm": 0.674246207215271, "learning_rate": 1.2911597729115978e-05, "loss": 0.6435, "step": 14351 }, { "epoch": 0.41902426206534116, "grad_norm": 0.6774409034801215, "learning_rate": 1.291094890510949e-05, "loss": 0.6681, "step": 14352 }, { "epoch": 0.4190534582931885, "grad_norm": 0.674206032611204, "learning_rate": 1.2910300081103002e-05, "loss": 0.696, "step": 14353 }, { "epoch": 0.4190826545210359, "grad_norm": 0.7474857112728656, "learning_rate": 1.2909651257096514e-05, "loss": 0.7325, "step": 14354 }, { "epoch": 0.41911185074888324, "grad_norm": 0.6536752103440444, "learning_rate": 1.2909002433090025e-05, "loss": 0.6261, "step": 14355 }, { "epoch": 0.4191410469767306, "grad_norm": 0.6931938314790459, "learning_rate": 1.2908353609083537e-05, "loss": 0.6928, "step": 14356 }, { "epoch": 0.41917024320457796, "grad_norm": 0.678177696901348, "learning_rate": 1.2907704785077049e-05, "loss": 0.666, "step": 14357 }, { "epoch": 0.4191994394324253, "grad_norm": 0.6602599885209118, "learning_rate": 1.290705596107056e-05, "loss": 0.6882, "step": 14358 }, { "epoch": 0.4192286356602727, "grad_norm": 0.6603279915416428, "learning_rate": 1.2906407137064073e-05, "loss": 0.6314, "step": 14359 }, { "epoch": 0.41925783188812005, "grad_norm": 0.649054432266562, "learning_rate": 1.2905758313057583e-05, "loss": 0.6099, "step": 14360 }, { "epoch": 0.4192870281159674, "grad_norm": 0.6642759762680649, "learning_rate": 1.2905109489051095e-05, "loss": 0.6518, "step": 14361 }, { "epoch": 0.41931622434381477, "grad_norm": 0.6461527354240877, "learning_rate": 1.2904460665044607e-05, "loss": 0.6184, "step": 14362 }, { "epoch": 0.41934542057166213, "grad_norm": 0.5890122438524736, "learning_rate": 1.2903811841038119e-05, "loss": 0.558, "step": 14363 }, { "epoch": 0.4193746167995095, "grad_norm": 0.6340577470814529, "learning_rate": 1.290316301703163e-05, "loss": 0.6402, "step": 14364 }, { "epoch": 0.41940381302735685, "grad_norm": 0.7138946377990245, "learning_rate": 1.2902514193025145e-05, "loss": 0.6595, "step": 14365 }, { "epoch": 0.4194330092552042, "grad_norm": 0.6455715806722291, "learning_rate": 1.2901865369018655e-05, "loss": 0.6212, "step": 14366 }, { "epoch": 0.4194622054830516, "grad_norm": 0.621982051359646, "learning_rate": 1.2901216545012167e-05, "loss": 0.6174, "step": 14367 }, { "epoch": 0.41949140171089894, "grad_norm": 0.6836463052582155, "learning_rate": 1.290056772100568e-05, "loss": 0.7207, "step": 14368 }, { "epoch": 0.4195205979387463, "grad_norm": 0.6326599372945861, "learning_rate": 1.2899918896999191e-05, "loss": 0.588, "step": 14369 }, { "epoch": 0.41954979416659366, "grad_norm": 0.5966362545374757, "learning_rate": 1.2899270072992702e-05, "loss": 0.5674, "step": 14370 }, { "epoch": 0.419578990394441, "grad_norm": 0.6549840393521273, "learning_rate": 1.2898621248986214e-05, "loss": 0.6046, "step": 14371 }, { "epoch": 0.4196081866222884, "grad_norm": 0.6688842239816624, "learning_rate": 1.2897972424979726e-05, "loss": 0.6838, "step": 14372 }, { "epoch": 0.41963738285013574, "grad_norm": 0.6363749935965769, "learning_rate": 1.2897323600973238e-05, "loss": 0.6052, "step": 14373 }, { "epoch": 0.4196665790779831, "grad_norm": 0.6695442632220968, "learning_rate": 1.2896674776966748e-05, "loss": 0.6039, "step": 14374 }, { "epoch": 0.41969577530583047, "grad_norm": 0.7271691079153162, "learning_rate": 1.289602595296026e-05, "loss": 0.7274, "step": 14375 }, { "epoch": 0.4197249715336778, "grad_norm": 0.680948495418338, "learning_rate": 1.2895377128953772e-05, "loss": 0.694, "step": 14376 }, { "epoch": 0.4197541677615252, "grad_norm": 0.6198187060733437, "learning_rate": 1.2894728304947284e-05, "loss": 0.551, "step": 14377 }, { "epoch": 0.41978336398937255, "grad_norm": 0.6797362780037054, "learning_rate": 1.2894079480940796e-05, "loss": 0.668, "step": 14378 }, { "epoch": 0.4198125602172199, "grad_norm": 0.6398983572448604, "learning_rate": 1.2893430656934306e-05, "loss": 0.6311, "step": 14379 }, { "epoch": 0.41984175644506727, "grad_norm": 0.5950060247467261, "learning_rate": 1.2892781832927818e-05, "loss": 0.5192, "step": 14380 }, { "epoch": 0.41987095267291463, "grad_norm": 0.6620562466605086, "learning_rate": 1.289213300892133e-05, "loss": 0.6674, "step": 14381 }, { "epoch": 0.41990014890076205, "grad_norm": 0.6531754807843647, "learning_rate": 1.2891484184914843e-05, "loss": 0.6556, "step": 14382 }, { "epoch": 0.4199293451286094, "grad_norm": 0.6697867889962797, "learning_rate": 1.2890835360908353e-05, "loss": 0.6816, "step": 14383 }, { "epoch": 0.41995854135645677, "grad_norm": 0.6622686796251533, "learning_rate": 1.2890186536901868e-05, "loss": 0.6658, "step": 14384 }, { "epoch": 0.41998773758430413, "grad_norm": 0.6388256539893453, "learning_rate": 1.2889537712895379e-05, "loss": 0.6416, "step": 14385 }, { "epoch": 0.4200169338121515, "grad_norm": 0.6916005992003844, "learning_rate": 1.288888888888889e-05, "loss": 0.7192, "step": 14386 }, { "epoch": 0.42004613003999886, "grad_norm": 0.5739825007263419, "learning_rate": 1.2888240064882403e-05, "loss": 0.5161, "step": 14387 }, { "epoch": 0.4200753262678462, "grad_norm": 0.6148681711480497, "learning_rate": 1.2887591240875915e-05, "loss": 0.5617, "step": 14388 }, { "epoch": 0.4201045224956936, "grad_norm": 0.6682793686312738, "learning_rate": 1.2886942416869425e-05, "loss": 0.6876, "step": 14389 }, { "epoch": 0.42013371872354094, "grad_norm": 0.6548386351988131, "learning_rate": 1.2886293592862937e-05, "loss": 0.5748, "step": 14390 }, { "epoch": 0.4201629149513883, "grad_norm": 0.5909511886466386, "learning_rate": 1.2885644768856449e-05, "loss": 0.5359, "step": 14391 }, { "epoch": 0.42019211117923566, "grad_norm": 0.6403701284406165, "learning_rate": 1.2884995944849961e-05, "loss": 0.6059, "step": 14392 }, { "epoch": 0.420221307407083, "grad_norm": 0.6675998172090242, "learning_rate": 1.2884347120843471e-05, "loss": 0.6878, "step": 14393 }, { "epoch": 0.4202505036349304, "grad_norm": 0.6218546805112892, "learning_rate": 1.2883698296836983e-05, "loss": 0.6063, "step": 14394 }, { "epoch": 0.42027969986277774, "grad_norm": 0.5975302260554178, "learning_rate": 1.2883049472830496e-05, "loss": 0.5202, "step": 14395 }, { "epoch": 0.4203088960906251, "grad_norm": 0.6955361593471641, "learning_rate": 1.2882400648824008e-05, "loss": 0.6456, "step": 14396 }, { "epoch": 0.42033809231847247, "grad_norm": 0.6405114631431141, "learning_rate": 1.288175182481752e-05, "loss": 0.6169, "step": 14397 }, { "epoch": 0.42036728854631983, "grad_norm": 0.6801508386378043, "learning_rate": 1.288110300081103e-05, "loss": 0.6635, "step": 14398 }, { "epoch": 0.4203964847741672, "grad_norm": 0.626484930500284, "learning_rate": 1.2880454176804542e-05, "loss": 0.6026, "step": 14399 }, { "epoch": 0.42042568100201455, "grad_norm": 0.6215809579306526, "learning_rate": 1.2879805352798054e-05, "loss": 0.6, "step": 14400 }, { "epoch": 0.4204548772298619, "grad_norm": 0.6353977427120454, "learning_rate": 1.2879156528791566e-05, "loss": 0.5325, "step": 14401 }, { "epoch": 0.4204840734577093, "grad_norm": 0.6171951678118052, "learning_rate": 1.2878507704785076e-05, "loss": 0.5408, "step": 14402 }, { "epoch": 0.42051326968555663, "grad_norm": 0.6710470414522135, "learning_rate": 1.2877858880778592e-05, "loss": 0.6504, "step": 14403 }, { "epoch": 0.420542465913404, "grad_norm": 0.6524729685508867, "learning_rate": 1.2877210056772102e-05, "loss": 0.6078, "step": 14404 }, { "epoch": 0.42057166214125136, "grad_norm": 0.6182477496183056, "learning_rate": 1.2876561232765614e-05, "loss": 0.5994, "step": 14405 }, { "epoch": 0.4206008583690987, "grad_norm": 0.5807891750037647, "learning_rate": 1.2875912408759126e-05, "loss": 0.5198, "step": 14406 }, { "epoch": 0.4206300545969461, "grad_norm": 0.6154115806913129, "learning_rate": 1.2875263584752638e-05, "loss": 0.6229, "step": 14407 }, { "epoch": 0.42065925082479344, "grad_norm": 0.6525391819455046, "learning_rate": 1.2874614760746148e-05, "loss": 0.6102, "step": 14408 }, { "epoch": 0.4206884470526408, "grad_norm": 0.6576084882174408, "learning_rate": 1.287396593673966e-05, "loss": 0.6203, "step": 14409 }, { "epoch": 0.42071764328048816, "grad_norm": 0.6468390011183793, "learning_rate": 1.2873317112733173e-05, "loss": 0.616, "step": 14410 }, { "epoch": 0.4207468395083355, "grad_norm": 0.6443049250156613, "learning_rate": 1.2872668288726685e-05, "loss": 0.6507, "step": 14411 }, { "epoch": 0.4207760357361829, "grad_norm": 0.6260083415697749, "learning_rate": 1.2872019464720195e-05, "loss": 0.6066, "step": 14412 }, { "epoch": 0.42080523196403025, "grad_norm": 0.612721477769761, "learning_rate": 1.2871370640713707e-05, "loss": 0.5797, "step": 14413 }, { "epoch": 0.4208344281918776, "grad_norm": 0.6819553474955447, "learning_rate": 1.2870721816707219e-05, "loss": 0.6592, "step": 14414 }, { "epoch": 0.42086362441972497, "grad_norm": 0.6427536708507052, "learning_rate": 1.2870072992700731e-05, "loss": 0.6102, "step": 14415 }, { "epoch": 0.42089282064757233, "grad_norm": 0.6314389131791364, "learning_rate": 1.2869424168694243e-05, "loss": 0.6157, "step": 14416 }, { "epoch": 0.4209220168754197, "grad_norm": 0.655482481363262, "learning_rate": 1.2868775344687753e-05, "loss": 0.6154, "step": 14417 }, { "epoch": 0.42095121310326705, "grad_norm": 0.6443472006177413, "learning_rate": 1.2868126520681265e-05, "loss": 0.6106, "step": 14418 }, { "epoch": 0.4209804093311144, "grad_norm": 0.6543227293629165, "learning_rate": 1.2867477696674777e-05, "loss": 0.6502, "step": 14419 }, { "epoch": 0.4210096055589618, "grad_norm": 0.6601482655406692, "learning_rate": 1.286682887266829e-05, "loss": 0.6225, "step": 14420 }, { "epoch": 0.42103880178680914, "grad_norm": 0.6759216656230392, "learning_rate": 1.28661800486618e-05, "loss": 0.6091, "step": 14421 }, { "epoch": 0.4210679980146565, "grad_norm": 0.671161597870289, "learning_rate": 1.2865531224655315e-05, "loss": 0.612, "step": 14422 }, { "epoch": 0.42109719424250386, "grad_norm": 0.6195671620587264, "learning_rate": 1.2864882400648826e-05, "loss": 0.5768, "step": 14423 }, { "epoch": 0.4211263904703512, "grad_norm": 0.6259671277417262, "learning_rate": 1.2864233576642338e-05, "loss": 0.5912, "step": 14424 }, { "epoch": 0.4211555866981986, "grad_norm": 0.6452625948933638, "learning_rate": 1.286358475263585e-05, "loss": 0.5574, "step": 14425 }, { "epoch": 0.42118478292604594, "grad_norm": 0.6969192946424903, "learning_rate": 1.2862935928629362e-05, "loss": 0.6792, "step": 14426 }, { "epoch": 0.4212139791538933, "grad_norm": 0.6583359452300445, "learning_rate": 1.2862287104622872e-05, "loss": 0.583, "step": 14427 }, { "epoch": 0.42124317538174066, "grad_norm": 0.6439292046258535, "learning_rate": 1.2861638280616384e-05, "loss": 0.6053, "step": 14428 }, { "epoch": 0.421272371609588, "grad_norm": 0.6442320582586215, "learning_rate": 1.2860989456609896e-05, "loss": 0.6486, "step": 14429 }, { "epoch": 0.4213015678374354, "grad_norm": 0.6137771563037406, "learning_rate": 1.2860340632603408e-05, "loss": 0.5486, "step": 14430 }, { "epoch": 0.42133076406528275, "grad_norm": 0.6847729513463044, "learning_rate": 1.2859691808596918e-05, "loss": 0.6336, "step": 14431 }, { "epoch": 0.4213599602931301, "grad_norm": 0.6566886653579113, "learning_rate": 1.285904298459043e-05, "loss": 0.6625, "step": 14432 }, { "epoch": 0.42138915652097747, "grad_norm": 0.651804890361917, "learning_rate": 1.2858394160583942e-05, "loss": 0.6921, "step": 14433 }, { "epoch": 0.42141835274882483, "grad_norm": 0.6512913573844722, "learning_rate": 1.2857745336577454e-05, "loss": 0.6473, "step": 14434 }, { "epoch": 0.4214475489766722, "grad_norm": 0.6618651999579166, "learning_rate": 1.2857096512570966e-05, "loss": 0.6609, "step": 14435 }, { "epoch": 0.42147674520451955, "grad_norm": 0.6467473109961948, "learning_rate": 1.2856447688564477e-05, "loss": 0.6507, "step": 14436 }, { "epoch": 0.4215059414323669, "grad_norm": 0.6779547441016313, "learning_rate": 1.2855798864557989e-05, "loss": 0.5627, "step": 14437 }, { "epoch": 0.4215351376602143, "grad_norm": 0.6778359972994749, "learning_rate": 1.28551500405515e-05, "loss": 0.706, "step": 14438 }, { "epoch": 0.42156433388806164, "grad_norm": 0.6281448220555591, "learning_rate": 1.2854501216545013e-05, "loss": 0.5878, "step": 14439 }, { "epoch": 0.421593530115909, "grad_norm": 0.6420048277897886, "learning_rate": 1.2853852392538523e-05, "loss": 0.6525, "step": 14440 }, { "epoch": 0.42162272634375636, "grad_norm": 0.6296659681096807, "learning_rate": 1.2853203568532039e-05, "loss": 0.5863, "step": 14441 }, { "epoch": 0.4216519225716037, "grad_norm": 0.8757190633195988, "learning_rate": 1.2852554744525549e-05, "loss": 0.5663, "step": 14442 }, { "epoch": 0.42168111879945114, "grad_norm": 0.6162974079259947, "learning_rate": 1.2851905920519061e-05, "loss": 0.5781, "step": 14443 }, { "epoch": 0.4217103150272985, "grad_norm": 0.6359966581860745, "learning_rate": 1.2851257096512573e-05, "loss": 0.6292, "step": 14444 }, { "epoch": 0.42173951125514586, "grad_norm": 0.6555358978445733, "learning_rate": 1.2850608272506085e-05, "loss": 0.6643, "step": 14445 }, { "epoch": 0.4217687074829932, "grad_norm": 0.6805708901867703, "learning_rate": 1.2849959448499595e-05, "loss": 0.6861, "step": 14446 }, { "epoch": 0.4217979037108406, "grad_norm": 0.6149325826316812, "learning_rate": 1.2849310624493107e-05, "loss": 0.5972, "step": 14447 }, { "epoch": 0.42182709993868794, "grad_norm": 0.5933573368705742, "learning_rate": 1.284866180048662e-05, "loss": 0.5231, "step": 14448 }, { "epoch": 0.4218562961665353, "grad_norm": 0.6342199329910773, "learning_rate": 1.2848012976480131e-05, "loss": 0.6068, "step": 14449 }, { "epoch": 0.42188549239438267, "grad_norm": 0.6468302316295961, "learning_rate": 1.2847364152473642e-05, "loss": 0.6425, "step": 14450 }, { "epoch": 0.42191468862223, "grad_norm": 0.692489650296493, "learning_rate": 1.2846715328467154e-05, "loss": 0.7255, "step": 14451 }, { "epoch": 0.4219438848500774, "grad_norm": 0.6559742592003269, "learning_rate": 1.2846066504460666e-05, "loss": 0.6163, "step": 14452 }, { "epoch": 0.42197308107792475, "grad_norm": 0.7313522368188803, "learning_rate": 1.2845417680454178e-05, "loss": 0.7185, "step": 14453 }, { "epoch": 0.4220022773057721, "grad_norm": 0.6457890606425019, "learning_rate": 1.284476885644769e-05, "loss": 0.6231, "step": 14454 }, { "epoch": 0.4220314735336195, "grad_norm": 0.7008191049712831, "learning_rate": 1.28441200324412e-05, "loss": 0.6438, "step": 14455 }, { "epoch": 0.42206066976146683, "grad_norm": 0.6610104605638246, "learning_rate": 1.2843471208434712e-05, "loss": 0.6203, "step": 14456 }, { "epoch": 0.4220898659893142, "grad_norm": 0.7230833366843525, "learning_rate": 1.2842822384428224e-05, "loss": 0.6209, "step": 14457 }, { "epoch": 0.42211906221716156, "grad_norm": 0.6382421038724476, "learning_rate": 1.2842173560421736e-05, "loss": 0.6138, "step": 14458 }, { "epoch": 0.4221482584450089, "grad_norm": 0.6463074025456828, "learning_rate": 1.2841524736415247e-05, "loss": 0.5709, "step": 14459 }, { "epoch": 0.4221774546728563, "grad_norm": 0.747099280251424, "learning_rate": 1.2840875912408762e-05, "loss": 0.675, "step": 14460 }, { "epoch": 0.42220665090070364, "grad_norm": 0.6299125889714029, "learning_rate": 1.2840227088402272e-05, "loss": 0.5877, "step": 14461 }, { "epoch": 0.422235847128551, "grad_norm": 0.6536543632702468, "learning_rate": 1.2839578264395784e-05, "loss": 0.6492, "step": 14462 }, { "epoch": 0.42226504335639836, "grad_norm": 0.6590012207374008, "learning_rate": 1.2838929440389296e-05, "loss": 0.6143, "step": 14463 }, { "epoch": 0.4222942395842457, "grad_norm": 0.666812706269015, "learning_rate": 1.2838280616382808e-05, "loss": 0.6331, "step": 14464 }, { "epoch": 0.4223234358120931, "grad_norm": 0.6886928271804619, "learning_rate": 1.2837631792376319e-05, "loss": 0.6735, "step": 14465 }, { "epoch": 0.42235263203994045, "grad_norm": 0.698862268209537, "learning_rate": 1.283698296836983e-05, "loss": 0.6848, "step": 14466 }, { "epoch": 0.4223818282677878, "grad_norm": 0.6532305707567971, "learning_rate": 1.2836334144363343e-05, "loss": 0.6297, "step": 14467 }, { "epoch": 0.42241102449563517, "grad_norm": 0.6859542177811286, "learning_rate": 1.2835685320356855e-05, "loss": 0.6448, "step": 14468 }, { "epoch": 0.42244022072348253, "grad_norm": 0.7106442043722079, "learning_rate": 1.2835036496350365e-05, "loss": 0.6885, "step": 14469 }, { "epoch": 0.4224694169513299, "grad_norm": 0.6943261759408724, "learning_rate": 1.2834387672343877e-05, "loss": 0.674, "step": 14470 }, { "epoch": 0.42249861317917725, "grad_norm": 0.6473474251859511, "learning_rate": 1.283373884833739e-05, "loss": 0.6109, "step": 14471 }, { "epoch": 0.4225278094070246, "grad_norm": 0.6214939723550335, "learning_rate": 1.2833090024330901e-05, "loss": 0.5434, "step": 14472 }, { "epoch": 0.422557005634872, "grad_norm": 0.664123430871361, "learning_rate": 1.2832441200324413e-05, "loss": 0.6961, "step": 14473 }, { "epoch": 0.42258620186271934, "grad_norm": 0.6724310392184698, "learning_rate": 1.2831792376317924e-05, "loss": 0.5708, "step": 14474 }, { "epoch": 0.4226153980905667, "grad_norm": 0.7147831074672244, "learning_rate": 1.2831143552311436e-05, "loss": 0.6286, "step": 14475 }, { "epoch": 0.42264459431841406, "grad_norm": 0.6960566418972358, "learning_rate": 1.2830494728304948e-05, "loss": 0.6117, "step": 14476 }, { "epoch": 0.4226737905462614, "grad_norm": 0.6417992001416845, "learning_rate": 1.282984590429846e-05, "loss": 0.6041, "step": 14477 }, { "epoch": 0.4227029867741088, "grad_norm": 0.6770804892001698, "learning_rate": 1.282919708029197e-05, "loss": 0.6714, "step": 14478 }, { "epoch": 0.42273218300195614, "grad_norm": 0.6463968209562629, "learning_rate": 1.2828548256285484e-05, "loss": 0.6696, "step": 14479 }, { "epoch": 0.4227613792298035, "grad_norm": 0.6004075841843537, "learning_rate": 1.2827899432278996e-05, "loss": 0.5398, "step": 14480 }, { "epoch": 0.42279057545765086, "grad_norm": 0.6616940963379885, "learning_rate": 1.2827250608272508e-05, "loss": 0.6588, "step": 14481 }, { "epoch": 0.4228197716854982, "grad_norm": 0.6351972093447228, "learning_rate": 1.282660178426602e-05, "loss": 0.5934, "step": 14482 }, { "epoch": 0.4228489679133456, "grad_norm": 0.6630033245880318, "learning_rate": 1.2825952960259532e-05, "loss": 0.7062, "step": 14483 }, { "epoch": 0.42287816414119295, "grad_norm": 0.6261339069193971, "learning_rate": 1.2825304136253042e-05, "loss": 0.6092, "step": 14484 }, { "epoch": 0.4229073603690403, "grad_norm": 0.7042527353346419, "learning_rate": 1.2824655312246554e-05, "loss": 0.7099, "step": 14485 }, { "epoch": 0.42293655659688767, "grad_norm": 0.6872308700021325, "learning_rate": 1.2824006488240066e-05, "loss": 0.6648, "step": 14486 }, { "epoch": 0.42296575282473503, "grad_norm": 0.6455424106366943, "learning_rate": 1.2823357664233578e-05, "loss": 0.6148, "step": 14487 }, { "epoch": 0.4229949490525824, "grad_norm": 0.6503116839369282, "learning_rate": 1.2822708840227089e-05, "loss": 0.6163, "step": 14488 }, { "epoch": 0.42302414528042975, "grad_norm": 0.6676866170781778, "learning_rate": 1.28220600162206e-05, "loss": 0.6356, "step": 14489 }, { "epoch": 0.4230533415082771, "grad_norm": 0.629817320573921, "learning_rate": 1.2821411192214113e-05, "loss": 0.6159, "step": 14490 }, { "epoch": 0.4230825377361245, "grad_norm": 0.6644421551110952, "learning_rate": 1.2820762368207625e-05, "loss": 0.6401, "step": 14491 }, { "epoch": 0.42311173396397184, "grad_norm": 0.6100490980585237, "learning_rate": 1.2820113544201137e-05, "loss": 0.557, "step": 14492 }, { "epoch": 0.4231409301918192, "grad_norm": 0.6717805025246001, "learning_rate": 1.2819464720194647e-05, "loss": 0.6624, "step": 14493 }, { "epoch": 0.42317012641966656, "grad_norm": 0.6353855230378963, "learning_rate": 1.2818815896188159e-05, "loss": 0.6173, "step": 14494 }, { "epoch": 0.4231993226475139, "grad_norm": 0.6008746600275998, "learning_rate": 1.2818167072181671e-05, "loss": 0.5336, "step": 14495 }, { "epoch": 0.4232285188753613, "grad_norm": 0.6764937803408062, "learning_rate": 1.2817518248175183e-05, "loss": 0.6757, "step": 14496 }, { "epoch": 0.42325771510320864, "grad_norm": 0.6822234776022942, "learning_rate": 1.2816869424168697e-05, "loss": 0.6422, "step": 14497 }, { "epoch": 0.423286911331056, "grad_norm": 0.649250639674342, "learning_rate": 1.2816220600162207e-05, "loss": 0.6404, "step": 14498 }, { "epoch": 0.42331610755890337, "grad_norm": 0.682334981582414, "learning_rate": 1.281557177615572e-05, "loss": 0.6741, "step": 14499 }, { "epoch": 0.4233453037867507, "grad_norm": 0.6692492062230895, "learning_rate": 1.2814922952149231e-05, "loss": 0.6718, "step": 14500 }, { "epoch": 0.4233745000145981, "grad_norm": 0.6173521638046975, "learning_rate": 1.2814274128142743e-05, "loss": 0.5365, "step": 14501 }, { "epoch": 0.42340369624244545, "grad_norm": 0.6737445359604701, "learning_rate": 1.2813625304136255e-05, "loss": 0.6571, "step": 14502 }, { "epoch": 0.42343289247029287, "grad_norm": 0.6144963095582946, "learning_rate": 1.2812976480129766e-05, "loss": 0.5699, "step": 14503 }, { "epoch": 0.4234620886981402, "grad_norm": 0.6590934909010882, "learning_rate": 1.2812327656123278e-05, "loss": 0.6295, "step": 14504 }, { "epoch": 0.4234912849259876, "grad_norm": 0.6639436726794641, "learning_rate": 1.281167883211679e-05, "loss": 0.6516, "step": 14505 }, { "epoch": 0.42352048115383495, "grad_norm": 0.6187933873789837, "learning_rate": 1.2811030008110302e-05, "loss": 0.6041, "step": 14506 }, { "epoch": 0.4235496773816823, "grad_norm": 0.608241448644314, "learning_rate": 1.2810381184103812e-05, "loss": 0.5463, "step": 14507 }, { "epoch": 0.42357887360952967, "grad_norm": 0.7168121148921065, "learning_rate": 1.2809732360097324e-05, "loss": 0.7161, "step": 14508 }, { "epoch": 0.42360806983737703, "grad_norm": 0.6724696843047258, "learning_rate": 1.2809083536090836e-05, "loss": 0.6751, "step": 14509 }, { "epoch": 0.4236372660652244, "grad_norm": 0.6489957156222751, "learning_rate": 1.2808434712084348e-05, "loss": 0.5562, "step": 14510 }, { "epoch": 0.42366646229307175, "grad_norm": 0.68338469389255, "learning_rate": 1.280778588807786e-05, "loss": 0.6805, "step": 14511 }, { "epoch": 0.4236956585209191, "grad_norm": 0.6221087678269013, "learning_rate": 1.280713706407137e-05, "loss": 0.6239, "step": 14512 }, { "epoch": 0.4237248547487665, "grad_norm": 0.6313313603433833, "learning_rate": 1.2806488240064883e-05, "loss": 0.6175, "step": 14513 }, { "epoch": 0.42375405097661384, "grad_norm": 0.664062953036309, "learning_rate": 1.2805839416058395e-05, "loss": 0.6133, "step": 14514 }, { "epoch": 0.4237832472044612, "grad_norm": 0.6909279738733904, "learning_rate": 1.2805190592051907e-05, "loss": 0.6887, "step": 14515 }, { "epoch": 0.42381244343230856, "grad_norm": 0.6136202586946464, "learning_rate": 1.280454176804542e-05, "loss": 0.573, "step": 14516 }, { "epoch": 0.4238416396601559, "grad_norm": 0.6169388304614092, "learning_rate": 1.280389294403893e-05, "loss": 0.5961, "step": 14517 }, { "epoch": 0.4238708358880033, "grad_norm": 0.6695601670007136, "learning_rate": 1.2803244120032443e-05, "loss": 0.712, "step": 14518 }, { "epoch": 0.42390003211585064, "grad_norm": 0.648334971041972, "learning_rate": 1.2802595296025955e-05, "loss": 0.6333, "step": 14519 }, { "epoch": 0.423929228343698, "grad_norm": 0.6727841094374868, "learning_rate": 1.2801946472019467e-05, "loss": 0.6367, "step": 14520 }, { "epoch": 0.42395842457154537, "grad_norm": 0.6576135235797079, "learning_rate": 1.2801297648012979e-05, "loss": 0.6516, "step": 14521 }, { "epoch": 0.42398762079939273, "grad_norm": 0.6411766898541276, "learning_rate": 1.2800648824006489e-05, "loss": 0.6104, "step": 14522 }, { "epoch": 0.4240168170272401, "grad_norm": 0.6295399388521771, "learning_rate": 1.2800000000000001e-05, "loss": 0.634, "step": 14523 }, { "epoch": 0.42404601325508745, "grad_norm": 0.6401663723864048, "learning_rate": 1.2799351175993513e-05, "loss": 0.618, "step": 14524 }, { "epoch": 0.4240752094829348, "grad_norm": 0.6814056598661588, "learning_rate": 1.2798702351987025e-05, "loss": 0.64, "step": 14525 }, { "epoch": 0.4241044057107822, "grad_norm": 0.6329650225051839, "learning_rate": 1.2798053527980536e-05, "loss": 0.6065, "step": 14526 }, { "epoch": 0.42413360193862953, "grad_norm": 0.6928424019774396, "learning_rate": 1.2797404703974048e-05, "loss": 0.6985, "step": 14527 }, { "epoch": 0.4241627981664769, "grad_norm": 0.6790336968973573, "learning_rate": 1.279675587996756e-05, "loss": 0.6382, "step": 14528 }, { "epoch": 0.42419199439432426, "grad_norm": 0.5893696791967808, "learning_rate": 1.2796107055961072e-05, "loss": 0.5415, "step": 14529 }, { "epoch": 0.4242211906221716, "grad_norm": 0.6005663359378995, "learning_rate": 1.2795458231954584e-05, "loss": 0.5464, "step": 14530 }, { "epoch": 0.424250386850019, "grad_norm": 0.73157080294017, "learning_rate": 1.2794809407948094e-05, "loss": 0.677, "step": 14531 }, { "epoch": 0.42427958307786634, "grad_norm": 0.6946087836851853, "learning_rate": 1.2794160583941606e-05, "loss": 0.6622, "step": 14532 }, { "epoch": 0.4243087793057137, "grad_norm": 0.6408743115698294, "learning_rate": 1.2793511759935118e-05, "loss": 0.5903, "step": 14533 }, { "epoch": 0.42433797553356106, "grad_norm": 0.5991298909833711, "learning_rate": 1.279286293592863e-05, "loss": 0.5489, "step": 14534 }, { "epoch": 0.4243671717614084, "grad_norm": 0.661610819423076, "learning_rate": 1.2792214111922144e-05, "loss": 0.642, "step": 14535 }, { "epoch": 0.4243963679892558, "grad_norm": 0.6470763385699702, "learning_rate": 1.2791565287915654e-05, "loss": 0.6522, "step": 14536 }, { "epoch": 0.42442556421710315, "grad_norm": 0.6655067013546537, "learning_rate": 1.2790916463909166e-05, "loss": 0.6397, "step": 14537 }, { "epoch": 0.4244547604449505, "grad_norm": 0.6071750110881126, "learning_rate": 1.2790267639902678e-05, "loss": 0.5698, "step": 14538 }, { "epoch": 0.42448395667279787, "grad_norm": 0.6678087042048558, "learning_rate": 1.278961881589619e-05, "loss": 0.6363, "step": 14539 }, { "epoch": 0.42451315290064523, "grad_norm": 0.7001639354704291, "learning_rate": 1.2788969991889702e-05, "loss": 0.6858, "step": 14540 }, { "epoch": 0.4245423491284926, "grad_norm": 0.629457409115328, "learning_rate": 1.2788321167883213e-05, "loss": 0.574, "step": 14541 }, { "epoch": 0.42457154535633995, "grad_norm": 0.6589245913608764, "learning_rate": 1.2787672343876725e-05, "loss": 0.6696, "step": 14542 }, { "epoch": 0.4246007415841873, "grad_norm": 0.6477531745530913, "learning_rate": 1.2787023519870237e-05, "loss": 0.6157, "step": 14543 }, { "epoch": 0.4246299378120347, "grad_norm": 0.6466993110632258, "learning_rate": 1.2786374695863749e-05, "loss": 0.5765, "step": 14544 }, { "epoch": 0.42465913403988204, "grad_norm": 0.7294040469826255, "learning_rate": 1.2785725871857259e-05, "loss": 0.7013, "step": 14545 }, { "epoch": 0.4246883302677294, "grad_norm": 0.6765800399589557, "learning_rate": 1.2785077047850771e-05, "loss": 0.688, "step": 14546 }, { "epoch": 0.42471752649557676, "grad_norm": 0.6484750558522422, "learning_rate": 1.2784428223844283e-05, "loss": 0.6034, "step": 14547 }, { "epoch": 0.4247467227234241, "grad_norm": 0.645291355111113, "learning_rate": 1.2783779399837795e-05, "loss": 0.649, "step": 14548 }, { "epoch": 0.4247759189512715, "grad_norm": 0.6319202721996123, "learning_rate": 1.2783130575831307e-05, "loss": 0.5754, "step": 14549 }, { "epoch": 0.42480511517911884, "grad_norm": 0.6487004321140187, "learning_rate": 1.2782481751824817e-05, "loss": 0.6161, "step": 14550 }, { "epoch": 0.4248343114069662, "grad_norm": 0.6228286443969923, "learning_rate": 1.278183292781833e-05, "loss": 0.6121, "step": 14551 }, { "epoch": 0.42486350763481356, "grad_norm": 0.6503032899835675, "learning_rate": 1.2781184103811841e-05, "loss": 0.6699, "step": 14552 }, { "epoch": 0.4248927038626609, "grad_norm": 0.6878937828433085, "learning_rate": 1.2780535279805353e-05, "loss": 0.695, "step": 14553 }, { "epoch": 0.4249219000905083, "grad_norm": 0.6870335047258731, "learning_rate": 1.2779886455798867e-05, "loss": 0.6931, "step": 14554 }, { "epoch": 0.42495109631835565, "grad_norm": 0.6487255489140771, "learning_rate": 1.2779237631792378e-05, "loss": 0.5594, "step": 14555 }, { "epoch": 0.424980292546203, "grad_norm": 0.6668819932831019, "learning_rate": 1.277858880778589e-05, "loss": 0.6007, "step": 14556 }, { "epoch": 0.42500948877405037, "grad_norm": 0.7382297155867487, "learning_rate": 1.2777939983779402e-05, "loss": 0.7025, "step": 14557 }, { "epoch": 0.42503868500189773, "grad_norm": 0.6671493747914944, "learning_rate": 1.2777291159772914e-05, "loss": 0.6206, "step": 14558 }, { "epoch": 0.4250678812297451, "grad_norm": 0.6399403228919829, "learning_rate": 1.2776642335766426e-05, "loss": 0.5961, "step": 14559 }, { "epoch": 0.42509707745759245, "grad_norm": 0.5930720032456379, "learning_rate": 1.2775993511759936e-05, "loss": 0.5352, "step": 14560 }, { "epoch": 0.4251262736854398, "grad_norm": 0.599237202976536, "learning_rate": 1.2775344687753448e-05, "loss": 0.5638, "step": 14561 }, { "epoch": 0.4251554699132872, "grad_norm": 0.7004691122502403, "learning_rate": 1.277469586374696e-05, "loss": 0.6605, "step": 14562 }, { "epoch": 0.4251846661411346, "grad_norm": 0.6359107778131963, "learning_rate": 1.2774047039740472e-05, "loss": 0.5701, "step": 14563 }, { "epoch": 0.42521386236898195, "grad_norm": 0.6184654547071124, "learning_rate": 1.2773398215733982e-05, "loss": 0.5892, "step": 14564 }, { "epoch": 0.4252430585968293, "grad_norm": 0.6681356819750914, "learning_rate": 1.2772749391727494e-05, "loss": 0.6593, "step": 14565 }, { "epoch": 0.4252722548246767, "grad_norm": 0.6783907616837336, "learning_rate": 1.2772100567721006e-05, "loss": 0.6336, "step": 14566 }, { "epoch": 0.42530145105252404, "grad_norm": 0.6641941138554589, "learning_rate": 1.2771451743714518e-05, "loss": 0.6479, "step": 14567 }, { "epoch": 0.4253306472803714, "grad_norm": 0.6857819480744807, "learning_rate": 1.277080291970803e-05, "loss": 0.7016, "step": 14568 }, { "epoch": 0.42535984350821876, "grad_norm": 0.6629140434304346, "learning_rate": 1.277015409570154e-05, "loss": 0.6291, "step": 14569 }, { "epoch": 0.4253890397360661, "grad_norm": 0.6598153621130641, "learning_rate": 1.2769505271695053e-05, "loss": 0.5858, "step": 14570 }, { "epoch": 0.4254182359639135, "grad_norm": 0.6754564667389319, "learning_rate": 1.2768856447688565e-05, "loss": 0.6732, "step": 14571 }, { "epoch": 0.42544743219176084, "grad_norm": 0.5890966979792768, "learning_rate": 1.2768207623682077e-05, "loss": 0.5659, "step": 14572 }, { "epoch": 0.4254766284196082, "grad_norm": 0.6681814260796703, "learning_rate": 1.276755879967559e-05, "loss": 0.6245, "step": 14573 }, { "epoch": 0.42550582464745557, "grad_norm": 0.6179295328025005, "learning_rate": 1.2766909975669101e-05, "loss": 0.5988, "step": 14574 }, { "epoch": 0.4255350208753029, "grad_norm": 0.6377050885528829, "learning_rate": 1.2766261151662613e-05, "loss": 0.5961, "step": 14575 }, { "epoch": 0.4255642171031503, "grad_norm": 0.6275124556148406, "learning_rate": 1.2765612327656125e-05, "loss": 0.6012, "step": 14576 }, { "epoch": 0.42559341333099765, "grad_norm": 0.688193410798841, "learning_rate": 1.2764963503649637e-05, "loss": 0.708, "step": 14577 }, { "epoch": 0.425622609558845, "grad_norm": 0.6980604627513053, "learning_rate": 1.2764314679643149e-05, "loss": 0.6771, "step": 14578 }, { "epoch": 0.42565180578669237, "grad_norm": 0.6889061015438861, "learning_rate": 1.276366585563666e-05, "loss": 0.6642, "step": 14579 }, { "epoch": 0.42568100201453973, "grad_norm": 0.6408893830750335, "learning_rate": 1.2763017031630171e-05, "loss": 0.594, "step": 14580 }, { "epoch": 0.4257101982423871, "grad_norm": 0.6167339588039455, "learning_rate": 1.2762368207623683e-05, "loss": 0.5549, "step": 14581 }, { "epoch": 0.42573939447023446, "grad_norm": 0.6379957785072355, "learning_rate": 1.2761719383617195e-05, "loss": 0.5442, "step": 14582 }, { "epoch": 0.4257685906980818, "grad_norm": 0.6912488773517044, "learning_rate": 1.2761070559610706e-05, "loss": 0.6681, "step": 14583 }, { "epoch": 0.4257977869259292, "grad_norm": 0.6482158684552761, "learning_rate": 1.2760421735604218e-05, "loss": 0.5887, "step": 14584 }, { "epoch": 0.42582698315377654, "grad_norm": 0.6775104197401374, "learning_rate": 1.275977291159773e-05, "loss": 0.6657, "step": 14585 }, { "epoch": 0.4258561793816239, "grad_norm": 0.7267162476205591, "learning_rate": 1.2759124087591242e-05, "loss": 0.7397, "step": 14586 }, { "epoch": 0.42588537560947126, "grad_norm": 0.6403866888428629, "learning_rate": 1.2758475263584754e-05, "loss": 0.6166, "step": 14587 }, { "epoch": 0.4259145718373186, "grad_norm": 0.6441725950911944, "learning_rate": 1.2757826439578264e-05, "loss": 0.6234, "step": 14588 }, { "epoch": 0.425943768065166, "grad_norm": 0.6501870236211353, "learning_rate": 1.2757177615571776e-05, "loss": 0.6132, "step": 14589 }, { "epoch": 0.42597296429301335, "grad_norm": 0.6438781194579217, "learning_rate": 1.2756528791565288e-05, "loss": 0.5944, "step": 14590 }, { "epoch": 0.4260021605208607, "grad_norm": 0.6858544329853934, "learning_rate": 1.27558799675588e-05, "loss": 0.6829, "step": 14591 }, { "epoch": 0.42603135674870807, "grad_norm": 0.6965830917163223, "learning_rate": 1.2755231143552314e-05, "loss": 0.6767, "step": 14592 }, { "epoch": 0.42606055297655543, "grad_norm": 0.6314481219145522, "learning_rate": 1.2754582319545824e-05, "loss": 0.6188, "step": 14593 }, { "epoch": 0.4260897492044028, "grad_norm": 0.7072607894424147, "learning_rate": 1.2753933495539336e-05, "loss": 0.6349, "step": 14594 }, { "epoch": 0.42611894543225015, "grad_norm": 0.649047216355277, "learning_rate": 1.2753284671532848e-05, "loss": 0.6413, "step": 14595 }, { "epoch": 0.4261481416600975, "grad_norm": 0.6579437704819542, "learning_rate": 1.275263584752636e-05, "loss": 0.6054, "step": 14596 }, { "epoch": 0.4261773378879449, "grad_norm": 0.6884355504877213, "learning_rate": 1.2751987023519872e-05, "loss": 0.6428, "step": 14597 }, { "epoch": 0.42620653411579223, "grad_norm": 0.6732280056832781, "learning_rate": 1.2751338199513383e-05, "loss": 0.6702, "step": 14598 }, { "epoch": 0.4262357303436396, "grad_norm": 0.6643810932138948, "learning_rate": 1.2750689375506895e-05, "loss": 0.6724, "step": 14599 }, { "epoch": 0.42626492657148696, "grad_norm": 0.6190301579882806, "learning_rate": 1.2750040551500407e-05, "loss": 0.6074, "step": 14600 }, { "epoch": 0.4262941227993343, "grad_norm": 0.615407938494169, "learning_rate": 1.2749391727493919e-05, "loss": 0.5411, "step": 14601 }, { "epoch": 0.4263233190271817, "grad_norm": 0.6008458876648305, "learning_rate": 1.274874290348743e-05, "loss": 0.5685, "step": 14602 }, { "epoch": 0.42635251525502904, "grad_norm": 0.6955423205112305, "learning_rate": 1.2748094079480941e-05, "loss": 0.6297, "step": 14603 }, { "epoch": 0.4263817114828764, "grad_norm": 0.628662382970253, "learning_rate": 1.2747445255474453e-05, "loss": 0.62, "step": 14604 }, { "epoch": 0.42641090771072376, "grad_norm": 0.6081930500214552, "learning_rate": 1.2746796431467965e-05, "loss": 0.5586, "step": 14605 }, { "epoch": 0.4264401039385711, "grad_norm": 0.6786555680532181, "learning_rate": 1.2746147607461476e-05, "loss": 0.6697, "step": 14606 }, { "epoch": 0.4264693001664185, "grad_norm": 0.6561312916643958, "learning_rate": 1.2745498783454988e-05, "loss": 0.5925, "step": 14607 }, { "epoch": 0.42649849639426585, "grad_norm": 0.7572415790941386, "learning_rate": 1.27448499594485e-05, "loss": 0.67, "step": 14608 }, { "epoch": 0.4265276926221132, "grad_norm": 0.8895659567253601, "learning_rate": 1.2744201135442012e-05, "loss": 0.5981, "step": 14609 }, { "epoch": 0.42655688884996057, "grad_norm": 0.6981197187181153, "learning_rate": 1.2743552311435524e-05, "loss": 0.6943, "step": 14610 }, { "epoch": 0.42658608507780793, "grad_norm": 0.6565956585137731, "learning_rate": 1.2742903487429037e-05, "loss": 0.6449, "step": 14611 }, { "epoch": 0.4266152813056553, "grad_norm": 0.6175993647100193, "learning_rate": 1.2742254663422548e-05, "loss": 0.5502, "step": 14612 }, { "epoch": 0.42664447753350265, "grad_norm": 0.5880151186912741, "learning_rate": 1.274160583941606e-05, "loss": 0.5374, "step": 14613 }, { "epoch": 0.42667367376135, "grad_norm": 0.6641819185177454, "learning_rate": 1.2740957015409572e-05, "loss": 0.6383, "step": 14614 }, { "epoch": 0.4267028699891974, "grad_norm": 0.7181035749414627, "learning_rate": 1.2740308191403084e-05, "loss": 0.7419, "step": 14615 }, { "epoch": 0.42673206621704474, "grad_norm": 0.6497028271925108, "learning_rate": 1.2739659367396596e-05, "loss": 0.6662, "step": 14616 }, { "epoch": 0.4267612624448921, "grad_norm": 0.6324362605013575, "learning_rate": 1.2739010543390106e-05, "loss": 0.5951, "step": 14617 }, { "epoch": 0.42679045867273946, "grad_norm": 0.6117868308989062, "learning_rate": 1.2738361719383618e-05, "loss": 0.5296, "step": 14618 }, { "epoch": 0.4268196549005868, "grad_norm": 0.6305083028196301, "learning_rate": 1.273771289537713e-05, "loss": 0.5953, "step": 14619 }, { "epoch": 0.4268488511284342, "grad_norm": 0.6874983627599894, "learning_rate": 1.2737064071370642e-05, "loss": 0.6505, "step": 14620 }, { "epoch": 0.42687804735628154, "grad_norm": 0.6650034296201975, "learning_rate": 1.2736415247364153e-05, "loss": 0.6415, "step": 14621 }, { "epoch": 0.4269072435841289, "grad_norm": 0.6550788631601244, "learning_rate": 1.2735766423357665e-05, "loss": 0.622, "step": 14622 }, { "epoch": 0.42693643981197626, "grad_norm": 0.6495445110839345, "learning_rate": 1.2735117599351177e-05, "loss": 0.5943, "step": 14623 }, { "epoch": 0.4269656360398237, "grad_norm": 0.6311892477466197, "learning_rate": 1.2734468775344689e-05, "loss": 0.6305, "step": 14624 }, { "epoch": 0.42699483226767104, "grad_norm": 0.6479766187079471, "learning_rate": 1.2733819951338199e-05, "loss": 0.5625, "step": 14625 }, { "epoch": 0.4270240284955184, "grad_norm": 0.6496003763308009, "learning_rate": 1.2733171127331711e-05, "loss": 0.603, "step": 14626 }, { "epoch": 0.42705322472336577, "grad_norm": 0.7214379496553626, "learning_rate": 1.2732522303325223e-05, "loss": 0.6472, "step": 14627 }, { "epoch": 0.4270824209512131, "grad_norm": 0.6983728464947847, "learning_rate": 1.2731873479318735e-05, "loss": 0.6804, "step": 14628 }, { "epoch": 0.4271116171790605, "grad_norm": 0.6743639085635953, "learning_rate": 1.2731224655312247e-05, "loss": 0.6103, "step": 14629 }, { "epoch": 0.42714081340690785, "grad_norm": 0.6534339494794279, "learning_rate": 1.2730575831305761e-05, "loss": 0.6108, "step": 14630 }, { "epoch": 0.4271700096347552, "grad_norm": 0.6524339827058749, "learning_rate": 1.2729927007299271e-05, "loss": 0.5823, "step": 14631 }, { "epoch": 0.42719920586260257, "grad_norm": 0.7231653740769753, "learning_rate": 1.2729278183292783e-05, "loss": 0.6773, "step": 14632 }, { "epoch": 0.42722840209044993, "grad_norm": 0.6401772645779056, "learning_rate": 1.2728629359286295e-05, "loss": 0.6038, "step": 14633 }, { "epoch": 0.4272575983182973, "grad_norm": 0.6274011880928468, "learning_rate": 1.2727980535279807e-05, "loss": 0.6216, "step": 14634 }, { "epoch": 0.42728679454614465, "grad_norm": 0.628826761080664, "learning_rate": 1.272733171127332e-05, "loss": 0.5542, "step": 14635 }, { "epoch": 0.427315990773992, "grad_norm": 0.7629385439797677, "learning_rate": 1.272668288726683e-05, "loss": 0.8334, "step": 14636 }, { "epoch": 0.4273451870018394, "grad_norm": 0.7112687356585871, "learning_rate": 1.2726034063260342e-05, "loss": 0.6636, "step": 14637 }, { "epoch": 0.42737438322968674, "grad_norm": 0.663739212862639, "learning_rate": 1.2725385239253854e-05, "loss": 0.655, "step": 14638 }, { "epoch": 0.4274035794575341, "grad_norm": 0.6074691398461749, "learning_rate": 1.2724736415247366e-05, "loss": 0.588, "step": 14639 }, { "epoch": 0.42743277568538146, "grad_norm": 0.7166264933381079, "learning_rate": 1.2724087591240876e-05, "loss": 0.6594, "step": 14640 }, { "epoch": 0.4274619719132288, "grad_norm": 0.6506901355206205, "learning_rate": 1.2723438767234388e-05, "loss": 0.6165, "step": 14641 }, { "epoch": 0.4274911681410762, "grad_norm": 0.6939769023672269, "learning_rate": 1.27227899432279e-05, "loss": 0.6698, "step": 14642 }, { "epoch": 0.42752036436892354, "grad_norm": 0.6838730931509546, "learning_rate": 1.2722141119221412e-05, "loss": 0.6468, "step": 14643 }, { "epoch": 0.4275495605967709, "grad_norm": 0.6406331907158885, "learning_rate": 1.2721492295214923e-05, "loss": 0.6294, "step": 14644 }, { "epoch": 0.42757875682461827, "grad_norm": 0.6370366384171339, "learning_rate": 1.2720843471208435e-05, "loss": 0.5829, "step": 14645 }, { "epoch": 0.42760795305246563, "grad_norm": 0.6881751117504665, "learning_rate": 1.2720194647201947e-05, "loss": 0.6318, "step": 14646 }, { "epoch": 0.427637149280313, "grad_norm": 0.634321664889355, "learning_rate": 1.2719545823195459e-05, "loss": 0.6179, "step": 14647 }, { "epoch": 0.42766634550816035, "grad_norm": 0.6770039654533391, "learning_rate": 1.2718896999188972e-05, "loss": 0.6396, "step": 14648 }, { "epoch": 0.4276955417360077, "grad_norm": 0.6231541760249746, "learning_rate": 1.2718248175182484e-05, "loss": 0.5909, "step": 14649 }, { "epoch": 0.4277247379638551, "grad_norm": 0.6374258068012136, "learning_rate": 1.2717599351175995e-05, "loss": 0.6051, "step": 14650 }, { "epoch": 0.42775393419170243, "grad_norm": 0.6352804145916574, "learning_rate": 1.2716950527169507e-05, "loss": 0.5909, "step": 14651 }, { "epoch": 0.4277831304195498, "grad_norm": 0.6501524034226014, "learning_rate": 1.2716301703163019e-05, "loss": 0.6222, "step": 14652 }, { "epoch": 0.42781232664739716, "grad_norm": 0.5861253468116195, "learning_rate": 1.271565287915653e-05, "loss": 0.5322, "step": 14653 }, { "epoch": 0.4278415228752445, "grad_norm": 0.6507201999616581, "learning_rate": 1.2715004055150043e-05, "loss": 0.6109, "step": 14654 }, { "epoch": 0.4278707191030919, "grad_norm": 0.6176111064998682, "learning_rate": 1.2714355231143553e-05, "loss": 0.563, "step": 14655 }, { "epoch": 0.42789991533093924, "grad_norm": 0.6314914727677539, "learning_rate": 1.2713706407137065e-05, "loss": 0.6236, "step": 14656 }, { "epoch": 0.4279291115587866, "grad_norm": 0.7236273901509845, "learning_rate": 1.2713057583130577e-05, "loss": 0.7636, "step": 14657 }, { "epoch": 0.42795830778663396, "grad_norm": 0.6428805272321828, "learning_rate": 1.271240875912409e-05, "loss": 0.606, "step": 14658 }, { "epoch": 0.4279875040144813, "grad_norm": 0.653676448472596, "learning_rate": 1.27117599351176e-05, "loss": 0.6378, "step": 14659 }, { "epoch": 0.4280167002423287, "grad_norm": 0.6312938724307005, "learning_rate": 1.2711111111111112e-05, "loss": 0.5668, "step": 14660 }, { "epoch": 0.42804589647017605, "grad_norm": 0.6017554837954144, "learning_rate": 1.2710462287104624e-05, "loss": 0.5695, "step": 14661 }, { "epoch": 0.4280750926980234, "grad_norm": 0.6443269319219558, "learning_rate": 1.2709813463098136e-05, "loss": 0.6199, "step": 14662 }, { "epoch": 0.42810428892587077, "grad_norm": 0.6046578073832035, "learning_rate": 1.2709164639091646e-05, "loss": 0.57, "step": 14663 }, { "epoch": 0.42813348515371813, "grad_norm": 0.7006241519837375, "learning_rate": 1.2708515815085158e-05, "loss": 0.6626, "step": 14664 }, { "epoch": 0.4281626813815655, "grad_norm": 0.6382987389837855, "learning_rate": 1.270786699107867e-05, "loss": 0.6371, "step": 14665 }, { "epoch": 0.42819187760941285, "grad_norm": 0.6396862990715652, "learning_rate": 1.2707218167072182e-05, "loss": 0.6332, "step": 14666 }, { "epoch": 0.4282210738372602, "grad_norm": 0.6605311239799699, "learning_rate": 1.2706569343065696e-05, "loss": 0.7008, "step": 14667 }, { "epoch": 0.4282502700651076, "grad_norm": 0.664960110147186, "learning_rate": 1.2705920519059208e-05, "loss": 0.6647, "step": 14668 }, { "epoch": 0.42827946629295494, "grad_norm": 0.6492937357226848, "learning_rate": 1.2705271695052718e-05, "loss": 0.6475, "step": 14669 }, { "epoch": 0.4283086625208023, "grad_norm": 0.6766918899785475, "learning_rate": 1.270462287104623e-05, "loss": 0.689, "step": 14670 }, { "epoch": 0.42833785874864966, "grad_norm": 0.6142461165364078, "learning_rate": 1.2703974047039742e-05, "loss": 0.5464, "step": 14671 }, { "epoch": 0.428367054976497, "grad_norm": 0.6498593855862531, "learning_rate": 1.2703325223033254e-05, "loss": 0.659, "step": 14672 }, { "epoch": 0.4283962512043444, "grad_norm": 0.6690307847911935, "learning_rate": 1.2702676399026766e-05, "loss": 0.635, "step": 14673 }, { "epoch": 0.42842544743219174, "grad_norm": 0.6265016673494834, "learning_rate": 1.2702027575020277e-05, "loss": 0.6185, "step": 14674 }, { "epoch": 0.4284546436600391, "grad_norm": 0.6108961694983842, "learning_rate": 1.2701378751013789e-05, "loss": 0.5453, "step": 14675 }, { "epoch": 0.42848383988788646, "grad_norm": 0.6462906625949162, "learning_rate": 1.27007299270073e-05, "loss": 0.6342, "step": 14676 }, { "epoch": 0.4285130361157338, "grad_norm": 0.6405252983942291, "learning_rate": 1.2700081103000813e-05, "loss": 0.5872, "step": 14677 }, { "epoch": 0.4285422323435812, "grad_norm": 0.6991426542809317, "learning_rate": 1.2699432278994323e-05, "loss": 0.6977, "step": 14678 }, { "epoch": 0.42857142857142855, "grad_norm": 0.6452810589813263, "learning_rate": 1.2698783454987835e-05, "loss": 0.5987, "step": 14679 }, { "epoch": 0.4286006247992759, "grad_norm": 0.7319875068866952, "learning_rate": 1.2698134630981347e-05, "loss": 0.7798, "step": 14680 }, { "epoch": 0.42862982102712327, "grad_norm": 0.6462379209828788, "learning_rate": 1.2697485806974859e-05, "loss": 0.6368, "step": 14681 }, { "epoch": 0.42865901725497063, "grad_norm": 0.5979679559509548, "learning_rate": 1.269683698296837e-05, "loss": 0.5603, "step": 14682 }, { "epoch": 0.428688213482818, "grad_norm": 0.7014730801730756, "learning_rate": 1.2696188158961881e-05, "loss": 0.6579, "step": 14683 }, { "epoch": 0.4287174097106654, "grad_norm": 0.5928580246957816, "learning_rate": 1.2695539334955393e-05, "loss": 0.5404, "step": 14684 }, { "epoch": 0.42874660593851277, "grad_norm": 0.6904062710474578, "learning_rate": 1.2694890510948905e-05, "loss": 0.6896, "step": 14685 }, { "epoch": 0.42877580216636013, "grad_norm": 0.615156580620568, "learning_rate": 1.269424168694242e-05, "loss": 0.6059, "step": 14686 }, { "epoch": 0.4288049983942075, "grad_norm": 0.619039950860309, "learning_rate": 1.2693592862935931e-05, "loss": 0.554, "step": 14687 }, { "epoch": 0.42883419462205485, "grad_norm": 0.7121245997854847, "learning_rate": 1.2692944038929442e-05, "loss": 0.6854, "step": 14688 }, { "epoch": 0.4288633908499022, "grad_norm": 0.6553587743807275, "learning_rate": 1.2692295214922954e-05, "loss": 0.6491, "step": 14689 }, { "epoch": 0.4288925870777496, "grad_norm": 0.6916844096715471, "learning_rate": 1.2691646390916466e-05, "loss": 0.6753, "step": 14690 }, { "epoch": 0.42892178330559694, "grad_norm": 0.6694267830023815, "learning_rate": 1.2690997566909978e-05, "loss": 0.6707, "step": 14691 }, { "epoch": 0.4289509795334443, "grad_norm": 0.6105109801131647, "learning_rate": 1.269034874290349e-05, "loss": 0.5298, "step": 14692 }, { "epoch": 0.42898017576129166, "grad_norm": 0.6659565798648457, "learning_rate": 1.2689699918897e-05, "loss": 0.6221, "step": 14693 }, { "epoch": 0.429009371989139, "grad_norm": 0.67715062399803, "learning_rate": 1.2689051094890512e-05, "loss": 0.6774, "step": 14694 }, { "epoch": 0.4290385682169864, "grad_norm": 0.6730165064291103, "learning_rate": 1.2688402270884024e-05, "loss": 0.6282, "step": 14695 }, { "epoch": 0.42906776444483374, "grad_norm": 0.6387317627010558, "learning_rate": 1.2687753446877536e-05, "loss": 0.6304, "step": 14696 }, { "epoch": 0.4290969606726811, "grad_norm": 0.5973573918271583, "learning_rate": 1.2687104622871046e-05, "loss": 0.5674, "step": 14697 }, { "epoch": 0.42912615690052847, "grad_norm": 0.6892720329398885, "learning_rate": 1.2686455798864558e-05, "loss": 0.7111, "step": 14698 }, { "epoch": 0.4291553531283758, "grad_norm": 0.6677481929603316, "learning_rate": 1.268580697485807e-05, "loss": 0.6206, "step": 14699 }, { "epoch": 0.4291845493562232, "grad_norm": 0.6417672604990653, "learning_rate": 1.2685158150851582e-05, "loss": 0.6089, "step": 14700 }, { "epoch": 0.42921374558407055, "grad_norm": 0.6721629786527833, "learning_rate": 1.2684509326845093e-05, "loss": 0.6446, "step": 14701 }, { "epoch": 0.4292429418119179, "grad_norm": 0.6306436613506458, "learning_rate": 1.2683860502838605e-05, "loss": 0.5787, "step": 14702 }, { "epoch": 0.42927213803976527, "grad_norm": 0.5762925730461501, "learning_rate": 1.2683211678832117e-05, "loss": 0.4872, "step": 14703 }, { "epoch": 0.42930133426761263, "grad_norm": 0.6213534028187878, "learning_rate": 1.2682562854825629e-05, "loss": 0.5698, "step": 14704 }, { "epoch": 0.42933053049546, "grad_norm": 0.7030355134044596, "learning_rate": 1.2681914030819143e-05, "loss": 0.6174, "step": 14705 }, { "epoch": 0.42935972672330736, "grad_norm": 0.5951475873895291, "learning_rate": 1.2681265206812655e-05, "loss": 0.5521, "step": 14706 }, { "epoch": 0.4293889229511547, "grad_norm": 0.6056224737631299, "learning_rate": 1.2680616382806165e-05, "loss": 0.581, "step": 14707 }, { "epoch": 0.4294181191790021, "grad_norm": 0.6458636249758258, "learning_rate": 1.2679967558799677e-05, "loss": 0.6323, "step": 14708 }, { "epoch": 0.42944731540684944, "grad_norm": 0.6489039932160823, "learning_rate": 1.2679318734793189e-05, "loss": 0.6389, "step": 14709 }, { "epoch": 0.4294765116346968, "grad_norm": 0.6813103107341251, "learning_rate": 1.2678669910786701e-05, "loss": 0.6765, "step": 14710 }, { "epoch": 0.42950570786254416, "grad_norm": 0.7077540441415942, "learning_rate": 1.2678021086780213e-05, "loss": 0.7156, "step": 14711 }, { "epoch": 0.4295349040903915, "grad_norm": 0.738028302736316, "learning_rate": 1.2677372262773723e-05, "loss": 0.6908, "step": 14712 }, { "epoch": 0.4295641003182389, "grad_norm": 0.6006378658193516, "learning_rate": 1.2676723438767235e-05, "loss": 0.5504, "step": 14713 }, { "epoch": 0.42959329654608625, "grad_norm": 0.6038502183019487, "learning_rate": 1.2676074614760747e-05, "loss": 0.5792, "step": 14714 }, { "epoch": 0.4296224927739336, "grad_norm": 0.6283668831539141, "learning_rate": 1.267542579075426e-05, "loss": 0.5786, "step": 14715 }, { "epoch": 0.42965168900178097, "grad_norm": 0.6996574789375372, "learning_rate": 1.267477696674777e-05, "loss": 0.7082, "step": 14716 }, { "epoch": 0.42968088522962833, "grad_norm": 0.673886220516343, "learning_rate": 1.2674128142741282e-05, "loss": 0.6693, "step": 14717 }, { "epoch": 0.4297100814574757, "grad_norm": 0.7323512854765535, "learning_rate": 1.2673479318734794e-05, "loss": 0.7385, "step": 14718 }, { "epoch": 0.42973927768532305, "grad_norm": 0.6988830930084993, "learning_rate": 1.2672830494728306e-05, "loss": 0.6922, "step": 14719 }, { "epoch": 0.4297684739131704, "grad_norm": 0.651227528964944, "learning_rate": 1.2672181670721816e-05, "loss": 0.6154, "step": 14720 }, { "epoch": 0.4297976701410178, "grad_norm": 0.6665310766533776, "learning_rate": 1.2671532846715328e-05, "loss": 0.6572, "step": 14721 }, { "epoch": 0.42982686636886513, "grad_norm": 0.6685406564998498, "learning_rate": 1.267088402270884e-05, "loss": 0.68, "step": 14722 }, { "epoch": 0.4298560625967125, "grad_norm": 0.5771499599541433, "learning_rate": 1.2670235198702352e-05, "loss": 0.5179, "step": 14723 }, { "epoch": 0.42988525882455986, "grad_norm": 0.7030970941537554, "learning_rate": 1.2669586374695866e-05, "loss": 0.7152, "step": 14724 }, { "epoch": 0.4299144550524072, "grad_norm": 0.64656322044358, "learning_rate": 1.2668937550689378e-05, "loss": 0.662, "step": 14725 }, { "epoch": 0.4299436512802546, "grad_norm": 0.6580395796851652, "learning_rate": 1.2668288726682888e-05, "loss": 0.6577, "step": 14726 }, { "epoch": 0.42997284750810194, "grad_norm": 0.6627974316350338, "learning_rate": 1.26676399026764e-05, "loss": 0.6669, "step": 14727 }, { "epoch": 0.4300020437359493, "grad_norm": 0.6609924078932672, "learning_rate": 1.2666991078669912e-05, "loss": 0.5917, "step": 14728 }, { "epoch": 0.43003123996379666, "grad_norm": 0.6086637618793601, "learning_rate": 1.2666342254663425e-05, "loss": 0.5811, "step": 14729 }, { "epoch": 0.430060436191644, "grad_norm": 0.7124001505412563, "learning_rate": 1.2665693430656935e-05, "loss": 0.6736, "step": 14730 }, { "epoch": 0.4300896324194914, "grad_norm": 0.6100954899525336, "learning_rate": 1.2665044606650447e-05, "loss": 0.5387, "step": 14731 }, { "epoch": 0.43011882864733875, "grad_norm": 0.6832124757509329, "learning_rate": 1.2664395782643959e-05, "loss": 0.6975, "step": 14732 }, { "epoch": 0.4301480248751861, "grad_norm": 0.6275728229654987, "learning_rate": 1.2663746958637471e-05, "loss": 0.5791, "step": 14733 }, { "epoch": 0.43017722110303347, "grad_norm": 0.6286238335368173, "learning_rate": 1.2663098134630983e-05, "loss": 0.659, "step": 14734 }, { "epoch": 0.43020641733088083, "grad_norm": 0.6330894900105817, "learning_rate": 1.2662449310624493e-05, "loss": 0.5971, "step": 14735 }, { "epoch": 0.4302356135587282, "grad_norm": 0.6899960241259584, "learning_rate": 1.2661800486618005e-05, "loss": 0.6587, "step": 14736 }, { "epoch": 0.43026480978657555, "grad_norm": 0.6699460097885266, "learning_rate": 1.2661151662611517e-05, "loss": 0.646, "step": 14737 }, { "epoch": 0.4302940060144229, "grad_norm": 0.6736799545618318, "learning_rate": 1.266050283860503e-05, "loss": 0.6312, "step": 14738 }, { "epoch": 0.4303232022422703, "grad_norm": 0.6693404065535141, "learning_rate": 1.265985401459854e-05, "loss": 0.6298, "step": 14739 }, { "epoch": 0.43035239847011764, "grad_norm": 0.5892817220923062, "learning_rate": 1.2659205190592052e-05, "loss": 0.5686, "step": 14740 }, { "epoch": 0.430381594697965, "grad_norm": 0.636210296352141, "learning_rate": 1.2658556366585564e-05, "loss": 0.6121, "step": 14741 }, { "epoch": 0.43041079092581236, "grad_norm": 0.6855094841004965, "learning_rate": 1.2657907542579076e-05, "loss": 0.6462, "step": 14742 }, { "epoch": 0.4304399871536597, "grad_norm": 0.6567367553116868, "learning_rate": 1.265725871857259e-05, "loss": 0.6158, "step": 14743 }, { "epoch": 0.43046918338150714, "grad_norm": 0.6260768376687155, "learning_rate": 1.2656609894566102e-05, "loss": 0.6061, "step": 14744 }, { "epoch": 0.4304983796093545, "grad_norm": 0.6489542794916077, "learning_rate": 1.2655961070559612e-05, "loss": 0.6068, "step": 14745 }, { "epoch": 0.43052757583720186, "grad_norm": 0.6232870572740294, "learning_rate": 1.2655312246553124e-05, "loss": 0.5827, "step": 14746 }, { "epoch": 0.4305567720650492, "grad_norm": 0.6114662679134342, "learning_rate": 1.2654663422546636e-05, "loss": 0.6064, "step": 14747 }, { "epoch": 0.4305859682928966, "grad_norm": 0.6144978892028845, "learning_rate": 1.2654014598540148e-05, "loss": 0.5783, "step": 14748 }, { "epoch": 0.43061516452074394, "grad_norm": 0.6195890633321951, "learning_rate": 1.2653365774533658e-05, "loss": 0.5821, "step": 14749 }, { "epoch": 0.4306443607485913, "grad_norm": 0.6306718964072794, "learning_rate": 1.265271695052717e-05, "loss": 0.5992, "step": 14750 }, { "epoch": 0.43067355697643867, "grad_norm": 0.7409698149594824, "learning_rate": 1.2652068126520682e-05, "loss": 0.59, "step": 14751 }, { "epoch": 0.430702753204286, "grad_norm": 0.6296653343068881, "learning_rate": 1.2651419302514194e-05, "loss": 0.622, "step": 14752 }, { "epoch": 0.4307319494321334, "grad_norm": 0.6404614596994751, "learning_rate": 1.2650770478507706e-05, "loss": 0.6453, "step": 14753 }, { "epoch": 0.43076114565998075, "grad_norm": 0.591728062530346, "learning_rate": 1.2650121654501217e-05, "loss": 0.5095, "step": 14754 }, { "epoch": 0.4307903418878281, "grad_norm": 0.7398840831419741, "learning_rate": 1.2649472830494729e-05, "loss": 0.7209, "step": 14755 }, { "epoch": 0.43081953811567547, "grad_norm": 0.647387260373017, "learning_rate": 1.264882400648824e-05, "loss": 0.6177, "step": 14756 }, { "epoch": 0.43084873434352283, "grad_norm": 0.6153442471911473, "learning_rate": 1.2648175182481753e-05, "loss": 0.5347, "step": 14757 }, { "epoch": 0.4308779305713702, "grad_norm": 0.6287526952006476, "learning_rate": 1.2647526358475263e-05, "loss": 0.6215, "step": 14758 }, { "epoch": 0.43090712679921755, "grad_norm": 0.6474655691304866, "learning_rate": 1.2646877534468775e-05, "loss": 0.5913, "step": 14759 }, { "epoch": 0.4309363230270649, "grad_norm": 0.6277852446822145, "learning_rate": 1.2646228710462287e-05, "loss": 0.5742, "step": 14760 }, { "epoch": 0.4309655192549123, "grad_norm": 0.7087811421147003, "learning_rate": 1.26455798864558e-05, "loss": 0.6364, "step": 14761 }, { "epoch": 0.43099471548275964, "grad_norm": 0.6710144087204105, "learning_rate": 1.2644931062449313e-05, "loss": 0.6543, "step": 14762 }, { "epoch": 0.431023911710607, "grad_norm": 0.6900916535133349, "learning_rate": 1.2644282238442825e-05, "loss": 0.7388, "step": 14763 }, { "epoch": 0.43105310793845436, "grad_norm": 0.703427556045648, "learning_rate": 1.2643633414436335e-05, "loss": 0.6506, "step": 14764 }, { "epoch": 0.4310823041663017, "grad_norm": 0.6538109369924497, "learning_rate": 1.2642984590429847e-05, "loss": 0.6083, "step": 14765 }, { "epoch": 0.4311115003941491, "grad_norm": 0.6567536040799579, "learning_rate": 1.264233576642336e-05, "loss": 0.6257, "step": 14766 }, { "epoch": 0.43114069662199644, "grad_norm": 0.608150089874089, "learning_rate": 1.2641686942416871e-05, "loss": 0.5605, "step": 14767 }, { "epoch": 0.4311698928498438, "grad_norm": 0.6504603603500956, "learning_rate": 1.2641038118410382e-05, "loss": 0.629, "step": 14768 }, { "epoch": 0.43119908907769117, "grad_norm": 0.7017995236253176, "learning_rate": 1.2640389294403894e-05, "loss": 0.7064, "step": 14769 }, { "epoch": 0.43122828530553853, "grad_norm": 0.583442419014269, "learning_rate": 1.2639740470397406e-05, "loss": 0.5377, "step": 14770 }, { "epoch": 0.4312574815333859, "grad_norm": 0.6586477035933958, "learning_rate": 1.2639091646390918e-05, "loss": 0.6449, "step": 14771 }, { "epoch": 0.43128667776123325, "grad_norm": 0.6686175170186107, "learning_rate": 1.263844282238443e-05, "loss": 0.6964, "step": 14772 }, { "epoch": 0.4313158739890806, "grad_norm": 0.603564376561432, "learning_rate": 1.263779399837794e-05, "loss": 0.5667, "step": 14773 }, { "epoch": 0.431345070216928, "grad_norm": 0.727077697359227, "learning_rate": 1.2637145174371452e-05, "loss": 0.746, "step": 14774 }, { "epoch": 0.43137426644477533, "grad_norm": 0.701735136597398, "learning_rate": 1.2636496350364964e-05, "loss": 0.4739, "step": 14775 }, { "epoch": 0.4314034626726227, "grad_norm": 0.6298403428291081, "learning_rate": 1.2635847526358476e-05, "loss": 0.6528, "step": 14776 }, { "epoch": 0.43143265890047006, "grad_norm": 0.64825989195539, "learning_rate": 1.2635198702351987e-05, "loss": 0.6273, "step": 14777 }, { "epoch": 0.4314618551283174, "grad_norm": 0.6285426700190448, "learning_rate": 1.2634549878345499e-05, "loss": 0.5989, "step": 14778 }, { "epoch": 0.4314910513561648, "grad_norm": 0.6848867884421823, "learning_rate": 1.263390105433901e-05, "loss": 0.6559, "step": 14779 }, { "epoch": 0.43152024758401214, "grad_norm": 0.6160270468143952, "learning_rate": 1.2633252230332523e-05, "loss": 0.62, "step": 14780 }, { "epoch": 0.4315494438118595, "grad_norm": 0.6905190293577436, "learning_rate": 1.2632603406326036e-05, "loss": 0.725, "step": 14781 }, { "epoch": 0.43157864003970686, "grad_norm": 0.662260618614338, "learning_rate": 1.2631954582319548e-05, "loss": 0.6822, "step": 14782 }, { "epoch": 0.4316078362675542, "grad_norm": 0.6762247933381876, "learning_rate": 1.2631305758313059e-05, "loss": 0.6985, "step": 14783 }, { "epoch": 0.4316370324954016, "grad_norm": 1.6285325285924652, "learning_rate": 1.263065693430657e-05, "loss": 0.6579, "step": 14784 }, { "epoch": 0.43166622872324895, "grad_norm": 0.5947946256004425, "learning_rate": 1.2630008110300083e-05, "loss": 0.5132, "step": 14785 }, { "epoch": 0.4316954249510963, "grad_norm": 0.6495205027552375, "learning_rate": 1.2629359286293595e-05, "loss": 0.6105, "step": 14786 }, { "epoch": 0.43172462117894367, "grad_norm": 0.6175758635695808, "learning_rate": 1.2628710462287105e-05, "loss": 0.5587, "step": 14787 }, { "epoch": 0.43175381740679103, "grad_norm": 0.6366217868710484, "learning_rate": 1.2628061638280617e-05, "loss": 0.6222, "step": 14788 }, { "epoch": 0.4317830136346384, "grad_norm": 0.7462726555431884, "learning_rate": 1.262741281427413e-05, "loss": 0.7031, "step": 14789 }, { "epoch": 0.43181220986248575, "grad_norm": 0.6423735963857387, "learning_rate": 1.2626763990267641e-05, "loss": 0.6055, "step": 14790 }, { "epoch": 0.4318414060903331, "grad_norm": 0.6878144369310116, "learning_rate": 1.2626115166261153e-05, "loss": 0.7479, "step": 14791 }, { "epoch": 0.4318706023181805, "grad_norm": 0.6467108150555054, "learning_rate": 1.2625466342254664e-05, "loss": 0.6361, "step": 14792 }, { "epoch": 0.43189979854602784, "grad_norm": 0.622487485966823, "learning_rate": 1.2624817518248176e-05, "loss": 0.5558, "step": 14793 }, { "epoch": 0.4319289947738752, "grad_norm": 0.6465690057029151, "learning_rate": 1.2624168694241688e-05, "loss": 0.6446, "step": 14794 }, { "epoch": 0.43195819100172256, "grad_norm": 0.6721808989427508, "learning_rate": 1.26235198702352e-05, "loss": 0.6656, "step": 14795 }, { "epoch": 0.4319873872295699, "grad_norm": 0.674411033848572, "learning_rate": 1.262287104622871e-05, "loss": 0.7248, "step": 14796 }, { "epoch": 0.4320165834574173, "grad_norm": 0.6973756153423023, "learning_rate": 1.2622222222222222e-05, "loss": 0.6417, "step": 14797 }, { "epoch": 0.43204577968526464, "grad_norm": 0.645173470003133, "learning_rate": 1.2621573398215734e-05, "loss": 0.6152, "step": 14798 }, { "epoch": 0.432074975913112, "grad_norm": 0.6277400097202164, "learning_rate": 1.2620924574209248e-05, "loss": 0.6126, "step": 14799 }, { "epoch": 0.43210417214095936, "grad_norm": 0.68226810636064, "learning_rate": 1.262027575020276e-05, "loss": 0.6988, "step": 14800 }, { "epoch": 0.4321333683688067, "grad_norm": 0.637226396500923, "learning_rate": 1.2619626926196272e-05, "loss": 0.6318, "step": 14801 }, { "epoch": 0.4321625645966541, "grad_norm": 0.6441248603512056, "learning_rate": 1.2618978102189782e-05, "loss": 0.6287, "step": 14802 }, { "epoch": 0.43219176082450145, "grad_norm": 0.8161438912186038, "learning_rate": 1.2618329278183294e-05, "loss": 0.5552, "step": 14803 }, { "epoch": 0.43222095705234886, "grad_norm": 0.6851331651346015, "learning_rate": 1.2617680454176806e-05, "loss": 0.6723, "step": 14804 }, { "epoch": 0.4322501532801962, "grad_norm": 0.7180135188530429, "learning_rate": 1.2617031630170318e-05, "loss": 0.7381, "step": 14805 }, { "epoch": 0.4322793495080436, "grad_norm": 0.6411174660301272, "learning_rate": 1.2616382806163829e-05, "loss": 0.5742, "step": 14806 }, { "epoch": 0.43230854573589095, "grad_norm": 0.6519382953297908, "learning_rate": 1.261573398215734e-05, "loss": 0.635, "step": 14807 }, { "epoch": 0.4323377419637383, "grad_norm": 0.633084094016843, "learning_rate": 1.2615085158150853e-05, "loss": 0.6079, "step": 14808 }, { "epoch": 0.43236693819158567, "grad_norm": 0.6307704391507435, "learning_rate": 1.2614436334144365e-05, "loss": 0.5739, "step": 14809 }, { "epoch": 0.43239613441943303, "grad_norm": 0.6901287028044466, "learning_rate": 1.2613787510137877e-05, "loss": 0.6462, "step": 14810 }, { "epoch": 0.4324253306472804, "grad_norm": 0.6710017850940305, "learning_rate": 1.2613138686131387e-05, "loss": 0.6484, "step": 14811 }, { "epoch": 0.43245452687512775, "grad_norm": 0.6532769965716587, "learning_rate": 1.2612489862124899e-05, "loss": 0.5395, "step": 14812 }, { "epoch": 0.4324837231029751, "grad_norm": 0.6536206772484537, "learning_rate": 1.2611841038118411e-05, "loss": 0.6493, "step": 14813 }, { "epoch": 0.4325129193308225, "grad_norm": 0.6300768742451024, "learning_rate": 1.2611192214111923e-05, "loss": 0.6181, "step": 14814 }, { "epoch": 0.43254211555866984, "grad_norm": 0.69494694770699, "learning_rate": 1.2610543390105433e-05, "loss": 0.6648, "step": 14815 }, { "epoch": 0.4325713117865172, "grad_norm": 0.655014734554676, "learning_rate": 1.2609894566098945e-05, "loss": 0.5652, "step": 14816 }, { "epoch": 0.43260050801436456, "grad_norm": 0.6875531268245867, "learning_rate": 1.2609245742092457e-05, "loss": 0.6471, "step": 14817 }, { "epoch": 0.4326297042422119, "grad_norm": 0.6847457370266217, "learning_rate": 1.2608596918085971e-05, "loss": 0.6864, "step": 14818 }, { "epoch": 0.4326589004700593, "grad_norm": 0.5752525984883146, "learning_rate": 1.2607948094079483e-05, "loss": 0.5295, "step": 14819 }, { "epoch": 0.43268809669790664, "grad_norm": 0.6847660976789921, "learning_rate": 1.2607299270072995e-05, "loss": 0.626, "step": 14820 }, { "epoch": 0.432717292925754, "grad_norm": 0.6111653308860588, "learning_rate": 1.2606650446066506e-05, "loss": 0.6244, "step": 14821 }, { "epoch": 0.43274648915360137, "grad_norm": 0.6094824238832898, "learning_rate": 1.2606001622060018e-05, "loss": 0.6024, "step": 14822 }, { "epoch": 0.4327756853814487, "grad_norm": 0.6461715987905422, "learning_rate": 1.260535279805353e-05, "loss": 0.6304, "step": 14823 }, { "epoch": 0.4328048816092961, "grad_norm": 0.6587139869769945, "learning_rate": 1.2604703974047042e-05, "loss": 0.689, "step": 14824 }, { "epoch": 0.43283407783714345, "grad_norm": 0.6353601784901851, "learning_rate": 1.2604055150040552e-05, "loss": 0.6415, "step": 14825 }, { "epoch": 0.4328632740649908, "grad_norm": 0.6272127136081859, "learning_rate": 1.2603406326034064e-05, "loss": 0.5673, "step": 14826 }, { "epoch": 0.43289247029283817, "grad_norm": 0.6500694425323863, "learning_rate": 1.2602757502027576e-05, "loss": 0.5923, "step": 14827 }, { "epoch": 0.43292166652068553, "grad_norm": 0.5984785769251163, "learning_rate": 1.2602108678021088e-05, "loss": 0.5302, "step": 14828 }, { "epoch": 0.4329508627485329, "grad_norm": 0.6172291913237817, "learning_rate": 1.26014598540146e-05, "loss": 0.5974, "step": 14829 }, { "epoch": 0.43298005897638026, "grad_norm": 0.6423700898940548, "learning_rate": 1.260081103000811e-05, "loss": 0.6407, "step": 14830 }, { "epoch": 0.4330092552042276, "grad_norm": 0.6438197835600635, "learning_rate": 1.2600162206001622e-05, "loss": 0.6586, "step": 14831 }, { "epoch": 0.433038451432075, "grad_norm": 0.6558047836448885, "learning_rate": 1.2599513381995135e-05, "loss": 0.6175, "step": 14832 }, { "epoch": 0.43306764765992234, "grad_norm": 0.69260196673506, "learning_rate": 1.2598864557988647e-05, "loss": 0.6918, "step": 14833 }, { "epoch": 0.4330968438877697, "grad_norm": 0.6537498814816382, "learning_rate": 1.2598215733982157e-05, "loss": 0.6156, "step": 14834 }, { "epoch": 0.43312604011561706, "grad_norm": 0.6703420468980797, "learning_rate": 1.2597566909975669e-05, "loss": 0.6458, "step": 14835 }, { "epoch": 0.4331552363434644, "grad_norm": 0.64352928791968, "learning_rate": 1.2596918085969181e-05, "loss": 0.5903, "step": 14836 }, { "epoch": 0.4331844325713118, "grad_norm": 0.5915897473659512, "learning_rate": 1.2596269261962695e-05, "loss": 0.5579, "step": 14837 }, { "epoch": 0.43321362879915915, "grad_norm": 0.5974943038812345, "learning_rate": 1.2595620437956207e-05, "loss": 0.5413, "step": 14838 }, { "epoch": 0.4332428250270065, "grad_norm": 0.5997351122333374, "learning_rate": 1.2594971613949719e-05, "loss": 0.5547, "step": 14839 }, { "epoch": 0.43327202125485387, "grad_norm": 0.6720802288692467, "learning_rate": 1.2594322789943229e-05, "loss": 0.6749, "step": 14840 }, { "epoch": 0.43330121748270123, "grad_norm": 0.6169807705473788, "learning_rate": 1.2593673965936741e-05, "loss": 0.5585, "step": 14841 }, { "epoch": 0.4333304137105486, "grad_norm": 0.6761084818867562, "learning_rate": 1.2593025141930253e-05, "loss": 0.6605, "step": 14842 }, { "epoch": 0.43335960993839595, "grad_norm": 0.6336574526097205, "learning_rate": 1.2592376317923765e-05, "loss": 0.6103, "step": 14843 }, { "epoch": 0.4333888061662433, "grad_norm": 0.7143545423657404, "learning_rate": 1.2591727493917275e-05, "loss": 0.6936, "step": 14844 }, { "epoch": 0.4334180023940907, "grad_norm": 0.6724746627261928, "learning_rate": 1.2591078669910787e-05, "loss": 0.5816, "step": 14845 }, { "epoch": 0.43344719862193803, "grad_norm": 0.6876602116204935, "learning_rate": 1.25904298459043e-05, "loss": 0.6396, "step": 14846 }, { "epoch": 0.4334763948497854, "grad_norm": 0.6630246339045298, "learning_rate": 1.2589781021897812e-05, "loss": 0.625, "step": 14847 }, { "epoch": 0.43350559107763276, "grad_norm": 0.5990224371296826, "learning_rate": 1.2589132197891324e-05, "loss": 0.5517, "step": 14848 }, { "epoch": 0.4335347873054801, "grad_norm": 0.6988491150374754, "learning_rate": 1.2588483373884834e-05, "loss": 0.6355, "step": 14849 }, { "epoch": 0.4335639835333275, "grad_norm": 0.6533298449842475, "learning_rate": 1.2587834549878346e-05, "loss": 0.6127, "step": 14850 }, { "epoch": 0.43359317976117484, "grad_norm": 0.603846655382675, "learning_rate": 1.2587185725871858e-05, "loss": 0.6073, "step": 14851 }, { "epoch": 0.4336223759890222, "grad_norm": 0.6840428907433631, "learning_rate": 1.258653690186537e-05, "loss": 0.6558, "step": 14852 }, { "epoch": 0.43365157221686956, "grad_norm": 0.6579757770194825, "learning_rate": 1.258588807785888e-05, "loss": 0.6834, "step": 14853 }, { "epoch": 0.4336807684447169, "grad_norm": 0.6191563341802954, "learning_rate": 1.2585239253852392e-05, "loss": 0.609, "step": 14854 }, { "epoch": 0.4337099646725643, "grad_norm": 0.6121727897881604, "learning_rate": 1.2584590429845904e-05, "loss": 0.557, "step": 14855 }, { "epoch": 0.43373916090041165, "grad_norm": 0.6401869297327909, "learning_rate": 1.2583941605839418e-05, "loss": 0.6143, "step": 14856 }, { "epoch": 0.433768357128259, "grad_norm": 0.6285427398504513, "learning_rate": 1.258329278183293e-05, "loss": 0.6233, "step": 14857 }, { "epoch": 0.43379755335610637, "grad_norm": 0.6803598972694542, "learning_rate": 1.2582643957826442e-05, "loss": 0.7369, "step": 14858 }, { "epoch": 0.43382674958395373, "grad_norm": 0.6653244913090457, "learning_rate": 1.2581995133819952e-05, "loss": 0.6578, "step": 14859 }, { "epoch": 0.4338559458118011, "grad_norm": 0.6104379386801959, "learning_rate": 1.2581346309813464e-05, "loss": 0.5681, "step": 14860 }, { "epoch": 0.43388514203964845, "grad_norm": 0.6375700065134164, "learning_rate": 1.2580697485806977e-05, "loss": 0.5739, "step": 14861 }, { "epoch": 0.4339143382674958, "grad_norm": 0.6293016374354541, "learning_rate": 1.2580048661800489e-05, "loss": 0.5812, "step": 14862 }, { "epoch": 0.4339435344953432, "grad_norm": 0.6729954067367429, "learning_rate": 1.2579399837793999e-05, "loss": 0.6801, "step": 14863 }, { "epoch": 0.43397273072319054, "grad_norm": 0.6238018902851302, "learning_rate": 1.2578751013787511e-05, "loss": 0.5901, "step": 14864 }, { "epoch": 0.43400192695103795, "grad_norm": 0.5969155787718903, "learning_rate": 1.2578102189781023e-05, "loss": 0.5124, "step": 14865 }, { "epoch": 0.4340311231788853, "grad_norm": 0.6820904243077084, "learning_rate": 1.2577453365774535e-05, "loss": 0.7203, "step": 14866 }, { "epoch": 0.4340603194067327, "grad_norm": 0.666283726626392, "learning_rate": 1.2576804541768047e-05, "loss": 0.6652, "step": 14867 }, { "epoch": 0.43408951563458004, "grad_norm": 0.6426888144352708, "learning_rate": 1.2576155717761557e-05, "loss": 0.607, "step": 14868 }, { "epoch": 0.4341187118624274, "grad_norm": 0.6159874881307151, "learning_rate": 1.257550689375507e-05, "loss": 0.5473, "step": 14869 }, { "epoch": 0.43414790809027476, "grad_norm": 0.7745634133335831, "learning_rate": 1.2574858069748581e-05, "loss": 0.6818, "step": 14870 }, { "epoch": 0.4341771043181221, "grad_norm": 0.6309336533729701, "learning_rate": 1.2574209245742093e-05, "loss": 0.5982, "step": 14871 }, { "epoch": 0.4342063005459695, "grad_norm": 0.5990281269347486, "learning_rate": 1.2573560421735604e-05, "loss": 0.5687, "step": 14872 }, { "epoch": 0.43423549677381684, "grad_norm": 0.6903361845216158, "learning_rate": 1.2572911597729116e-05, "loss": 0.7048, "step": 14873 }, { "epoch": 0.4342646930016642, "grad_norm": 0.6637059026621486, "learning_rate": 1.2572262773722628e-05, "loss": 0.6721, "step": 14874 }, { "epoch": 0.43429388922951156, "grad_norm": 0.6481883963144124, "learning_rate": 1.2571613949716142e-05, "loss": 0.5906, "step": 14875 }, { "epoch": 0.4343230854573589, "grad_norm": 0.6970534268542513, "learning_rate": 1.2570965125709654e-05, "loss": 0.6631, "step": 14876 }, { "epoch": 0.4343522816852063, "grad_norm": 0.6739176046039453, "learning_rate": 1.2570316301703166e-05, "loss": 0.6062, "step": 14877 }, { "epoch": 0.43438147791305365, "grad_norm": 0.6233804551999331, "learning_rate": 1.2569667477696676e-05, "loss": 0.561, "step": 14878 }, { "epoch": 0.434410674140901, "grad_norm": 0.6375437749092525, "learning_rate": 1.2569018653690188e-05, "loss": 0.6256, "step": 14879 }, { "epoch": 0.43443987036874837, "grad_norm": 0.5949505188977833, "learning_rate": 1.25683698296837e-05, "loss": 0.5423, "step": 14880 }, { "epoch": 0.43446906659659573, "grad_norm": 0.6721461738354946, "learning_rate": 1.2567721005677212e-05, "loss": 0.5722, "step": 14881 }, { "epoch": 0.4344982628244431, "grad_norm": 0.6533450627628768, "learning_rate": 1.2567072181670722e-05, "loss": 0.6336, "step": 14882 }, { "epoch": 0.43452745905229045, "grad_norm": 0.6148645286535607, "learning_rate": 1.2566423357664234e-05, "loss": 0.5639, "step": 14883 }, { "epoch": 0.4345566552801378, "grad_norm": 0.6709171801914378, "learning_rate": 1.2565774533657746e-05, "loss": 0.6395, "step": 14884 }, { "epoch": 0.4345858515079852, "grad_norm": 0.5860595110856756, "learning_rate": 1.2565125709651258e-05, "loss": 0.5133, "step": 14885 }, { "epoch": 0.43461504773583254, "grad_norm": 0.6686577686161316, "learning_rate": 1.256447688564477e-05, "loss": 0.6808, "step": 14886 }, { "epoch": 0.4346442439636799, "grad_norm": 0.5887755126894448, "learning_rate": 1.256382806163828e-05, "loss": 0.4929, "step": 14887 }, { "epoch": 0.43467344019152726, "grad_norm": 0.6355622697485042, "learning_rate": 1.2563179237631793e-05, "loss": 0.6338, "step": 14888 }, { "epoch": 0.4347026364193746, "grad_norm": 0.6616384296845943, "learning_rate": 1.2562530413625305e-05, "loss": 0.6779, "step": 14889 }, { "epoch": 0.434731832647222, "grad_norm": 0.6747882824354875, "learning_rate": 1.2561881589618817e-05, "loss": 0.6459, "step": 14890 }, { "epoch": 0.43476102887506934, "grad_norm": 0.6344303471249862, "learning_rate": 1.2561232765612327e-05, "loss": 0.594, "step": 14891 }, { "epoch": 0.4347902251029167, "grad_norm": 0.6724497823819822, "learning_rate": 1.256058394160584e-05, "loss": 0.6552, "step": 14892 }, { "epoch": 0.43481942133076407, "grad_norm": 0.7276652708263858, "learning_rate": 1.2559935117599351e-05, "loss": 0.7067, "step": 14893 }, { "epoch": 0.4348486175586114, "grad_norm": 0.6067756875115744, "learning_rate": 1.2559286293592865e-05, "loss": 0.5437, "step": 14894 }, { "epoch": 0.4348778137864588, "grad_norm": 0.6709341585167529, "learning_rate": 1.2558637469586377e-05, "loss": 0.6815, "step": 14895 }, { "epoch": 0.43490701001430615, "grad_norm": 0.6314237156618377, "learning_rate": 1.2557988645579889e-05, "loss": 0.5521, "step": 14896 }, { "epoch": 0.4349362062421535, "grad_norm": 0.7047374995413088, "learning_rate": 1.25573398215734e-05, "loss": 0.6636, "step": 14897 }, { "epoch": 0.4349654024700009, "grad_norm": 0.6240874653883959, "learning_rate": 1.2556690997566911e-05, "loss": 0.5693, "step": 14898 }, { "epoch": 0.43499459869784823, "grad_norm": 0.613963644265924, "learning_rate": 1.2556042173560423e-05, "loss": 0.5614, "step": 14899 }, { "epoch": 0.4350237949256956, "grad_norm": 0.6081454154977965, "learning_rate": 1.2555393349553935e-05, "loss": 0.5138, "step": 14900 }, { "epoch": 0.43505299115354296, "grad_norm": 0.6658941077447457, "learning_rate": 1.2554744525547446e-05, "loss": 0.6217, "step": 14901 }, { "epoch": 0.4350821873813903, "grad_norm": 0.7091796109755198, "learning_rate": 1.2554095701540958e-05, "loss": 0.6844, "step": 14902 }, { "epoch": 0.4351113836092377, "grad_norm": 0.6501436869988237, "learning_rate": 1.255344687753447e-05, "loss": 0.6695, "step": 14903 }, { "epoch": 0.43514057983708504, "grad_norm": 0.6585879438928498, "learning_rate": 1.2552798053527982e-05, "loss": 0.5338, "step": 14904 }, { "epoch": 0.4351697760649324, "grad_norm": 0.7295062530912042, "learning_rate": 1.2552149229521494e-05, "loss": 0.6567, "step": 14905 }, { "epoch": 0.43519897229277976, "grad_norm": 0.6712397226524106, "learning_rate": 1.2551500405515004e-05, "loss": 0.7061, "step": 14906 }, { "epoch": 0.4352281685206271, "grad_norm": 0.7392704268785325, "learning_rate": 1.2550851581508516e-05, "loss": 0.664, "step": 14907 }, { "epoch": 0.4352573647484745, "grad_norm": 0.6728931182326725, "learning_rate": 1.2550202757502028e-05, "loss": 0.6699, "step": 14908 }, { "epoch": 0.43528656097632185, "grad_norm": 0.6482008931316507, "learning_rate": 1.254955393349554e-05, "loss": 0.6186, "step": 14909 }, { "epoch": 0.4353157572041692, "grad_norm": 0.6704241852412567, "learning_rate": 1.254890510948905e-05, "loss": 0.6839, "step": 14910 }, { "epoch": 0.43534495343201657, "grad_norm": 0.6238485330347872, "learning_rate": 1.2548256285482563e-05, "loss": 0.6178, "step": 14911 }, { "epoch": 0.43537414965986393, "grad_norm": 0.6273786761350799, "learning_rate": 1.2547607461476075e-05, "loss": 0.597, "step": 14912 }, { "epoch": 0.4354033458877113, "grad_norm": 0.633959043318257, "learning_rate": 1.2546958637469588e-05, "loss": 0.5537, "step": 14913 }, { "epoch": 0.43543254211555865, "grad_norm": 0.6035537724477272, "learning_rate": 1.25463098134631e-05, "loss": 0.55, "step": 14914 }, { "epoch": 0.435461738343406, "grad_norm": 0.6328652163231585, "learning_rate": 1.2545660989456612e-05, "loss": 0.5622, "step": 14915 }, { "epoch": 0.4354909345712534, "grad_norm": 0.6618797966761334, "learning_rate": 1.2545012165450123e-05, "loss": 0.6671, "step": 14916 }, { "epoch": 0.43552013079910074, "grad_norm": 0.6088096972287216, "learning_rate": 1.2544363341443635e-05, "loss": 0.5483, "step": 14917 }, { "epoch": 0.4355493270269481, "grad_norm": 0.5954649341873651, "learning_rate": 1.2543714517437147e-05, "loss": 0.5243, "step": 14918 }, { "epoch": 0.43557852325479546, "grad_norm": 0.6516173627784475, "learning_rate": 1.2543065693430659e-05, "loss": 0.6043, "step": 14919 }, { "epoch": 0.4356077194826428, "grad_norm": 0.7776165223378823, "learning_rate": 1.254241686942417e-05, "loss": 0.5799, "step": 14920 }, { "epoch": 0.4356369157104902, "grad_norm": 0.6527813396461031, "learning_rate": 1.2541768045417681e-05, "loss": 0.6438, "step": 14921 }, { "epoch": 0.43566611193833754, "grad_norm": 0.6986830011777804, "learning_rate": 1.2541119221411193e-05, "loss": 0.6944, "step": 14922 }, { "epoch": 0.4356953081661849, "grad_norm": 0.676509019581964, "learning_rate": 1.2540470397404705e-05, "loss": 0.6682, "step": 14923 }, { "epoch": 0.43572450439403226, "grad_norm": 0.7139937712910521, "learning_rate": 1.2539821573398217e-05, "loss": 0.6269, "step": 14924 }, { "epoch": 0.4357537006218797, "grad_norm": 0.6399345063323642, "learning_rate": 1.2539172749391728e-05, "loss": 0.6143, "step": 14925 }, { "epoch": 0.43578289684972704, "grad_norm": 0.6085783859320922, "learning_rate": 1.253852392538524e-05, "loss": 0.5602, "step": 14926 }, { "epoch": 0.4358120930775744, "grad_norm": 0.6563058148630102, "learning_rate": 1.2537875101378752e-05, "loss": 0.6176, "step": 14927 }, { "epoch": 0.43584128930542176, "grad_norm": 0.671536648041214, "learning_rate": 1.2537226277372264e-05, "loss": 0.6898, "step": 14928 }, { "epoch": 0.4358704855332691, "grad_norm": 0.6586478338860746, "learning_rate": 1.2536577453365774e-05, "loss": 0.6295, "step": 14929 }, { "epoch": 0.4358996817611165, "grad_norm": 0.5962021938300859, "learning_rate": 1.2535928629359286e-05, "loss": 0.5017, "step": 14930 }, { "epoch": 0.43592887798896385, "grad_norm": 0.6552238002886369, "learning_rate": 1.2535279805352798e-05, "loss": 0.6309, "step": 14931 }, { "epoch": 0.4359580742168112, "grad_norm": 0.6548041859627592, "learning_rate": 1.2534630981346312e-05, "loss": 0.5711, "step": 14932 }, { "epoch": 0.43598727044465857, "grad_norm": 0.6261794545276362, "learning_rate": 1.2533982157339824e-05, "loss": 0.5801, "step": 14933 }, { "epoch": 0.43601646667250593, "grad_norm": 0.7119995443539884, "learning_rate": 1.2533333333333336e-05, "loss": 0.6929, "step": 14934 }, { "epoch": 0.4360456629003533, "grad_norm": 0.6411165627715558, "learning_rate": 1.2532684509326846e-05, "loss": 0.6154, "step": 14935 }, { "epoch": 0.43607485912820065, "grad_norm": 0.6532233544503816, "learning_rate": 1.2532035685320358e-05, "loss": 0.5811, "step": 14936 }, { "epoch": 0.436104055356048, "grad_norm": 0.614224390793831, "learning_rate": 1.253138686131387e-05, "loss": 0.5633, "step": 14937 }, { "epoch": 0.4361332515838954, "grad_norm": 0.5861304505483542, "learning_rate": 1.2530738037307382e-05, "loss": 0.5099, "step": 14938 }, { "epoch": 0.43616244781174274, "grad_norm": 0.6466540188037359, "learning_rate": 1.2530089213300893e-05, "loss": 0.6182, "step": 14939 }, { "epoch": 0.4361916440395901, "grad_norm": 0.6285035672541321, "learning_rate": 1.2529440389294405e-05, "loss": 0.5613, "step": 14940 }, { "epoch": 0.43622084026743746, "grad_norm": 0.6209107218665721, "learning_rate": 1.2528791565287917e-05, "loss": 0.5491, "step": 14941 }, { "epoch": 0.4362500364952848, "grad_norm": 0.6100804196184402, "learning_rate": 1.2528142741281429e-05, "loss": 0.5451, "step": 14942 }, { "epoch": 0.4362792327231322, "grad_norm": 0.6662833722976221, "learning_rate": 1.252749391727494e-05, "loss": 0.6554, "step": 14943 }, { "epoch": 0.43630842895097954, "grad_norm": 0.6544432865257828, "learning_rate": 1.2526845093268451e-05, "loss": 0.5971, "step": 14944 }, { "epoch": 0.4363376251788269, "grad_norm": 0.6787349833676617, "learning_rate": 1.2526196269261963e-05, "loss": 0.6774, "step": 14945 }, { "epoch": 0.43636682140667427, "grad_norm": 0.63557462278042, "learning_rate": 1.2525547445255475e-05, "loss": 0.5807, "step": 14946 }, { "epoch": 0.4363960176345216, "grad_norm": 0.6862534771191637, "learning_rate": 1.2524898621248987e-05, "loss": 0.6773, "step": 14947 }, { "epoch": 0.436425213862369, "grad_norm": 0.6500757183943601, "learning_rate": 1.2524249797242497e-05, "loss": 0.6431, "step": 14948 }, { "epoch": 0.43645441009021635, "grad_norm": 0.6526691131230955, "learning_rate": 1.252360097323601e-05, "loss": 0.6135, "step": 14949 }, { "epoch": 0.4364836063180637, "grad_norm": 0.6010584473514197, "learning_rate": 1.2522952149229523e-05, "loss": 0.5729, "step": 14950 }, { "epoch": 0.43651280254591107, "grad_norm": 0.6719916074899074, "learning_rate": 1.2522303325223035e-05, "loss": 0.7043, "step": 14951 }, { "epoch": 0.43654199877375843, "grad_norm": 0.6516536494650007, "learning_rate": 1.2521654501216547e-05, "loss": 0.5941, "step": 14952 }, { "epoch": 0.4365711950016058, "grad_norm": 0.6374037605274093, "learning_rate": 1.252100567721006e-05, "loss": 0.6164, "step": 14953 }, { "epoch": 0.43660039122945316, "grad_norm": 0.6579057535417785, "learning_rate": 1.252035685320357e-05, "loss": 0.6175, "step": 14954 }, { "epoch": 0.4366295874573005, "grad_norm": 0.6207008560868221, "learning_rate": 1.2519708029197082e-05, "loss": 0.5672, "step": 14955 }, { "epoch": 0.4366587836851479, "grad_norm": 0.6333721025897842, "learning_rate": 1.2519059205190594e-05, "loss": 0.6227, "step": 14956 }, { "epoch": 0.43668797991299524, "grad_norm": 0.68882417966624, "learning_rate": 1.2518410381184106e-05, "loss": 0.6937, "step": 14957 }, { "epoch": 0.4367171761408426, "grad_norm": 0.6950414747466132, "learning_rate": 1.2517761557177616e-05, "loss": 0.6943, "step": 14958 }, { "epoch": 0.43674637236868996, "grad_norm": 0.6397624874586114, "learning_rate": 1.2517112733171128e-05, "loss": 0.5783, "step": 14959 }, { "epoch": 0.4367755685965373, "grad_norm": 0.5952138946067795, "learning_rate": 1.251646390916464e-05, "loss": 0.5518, "step": 14960 }, { "epoch": 0.4368047648243847, "grad_norm": 0.6320958464139576, "learning_rate": 1.2515815085158152e-05, "loss": 0.6417, "step": 14961 }, { "epoch": 0.43683396105223204, "grad_norm": 0.6864709752505931, "learning_rate": 1.2515166261151664e-05, "loss": 0.6585, "step": 14962 }, { "epoch": 0.4368631572800794, "grad_norm": 0.6158030374070519, "learning_rate": 1.2514517437145174e-05, "loss": 0.5611, "step": 14963 }, { "epoch": 0.43689235350792677, "grad_norm": 0.634443117732427, "learning_rate": 1.2513868613138687e-05, "loss": 0.568, "step": 14964 }, { "epoch": 0.43692154973577413, "grad_norm": 0.6218020604066063, "learning_rate": 1.2513219789132199e-05, "loss": 0.5835, "step": 14965 }, { "epoch": 0.4369507459636215, "grad_norm": 0.6066493558062345, "learning_rate": 1.251257096512571e-05, "loss": 0.6222, "step": 14966 }, { "epoch": 0.43697994219146885, "grad_norm": 0.6800460388555002, "learning_rate": 1.2511922141119221e-05, "loss": 0.6774, "step": 14967 }, { "epoch": 0.4370091384193162, "grad_norm": 0.7166778400980649, "learning_rate": 1.2511273317112733e-05, "loss": 0.7449, "step": 14968 }, { "epoch": 0.4370383346471636, "grad_norm": 0.7143210081052492, "learning_rate": 1.2510624493106247e-05, "loss": 0.7156, "step": 14969 }, { "epoch": 0.43706753087501093, "grad_norm": 0.6671416438982621, "learning_rate": 1.2509975669099759e-05, "loss": 0.6446, "step": 14970 }, { "epoch": 0.4370967271028583, "grad_norm": 0.6681910202151469, "learning_rate": 1.250932684509327e-05, "loss": 0.6259, "step": 14971 }, { "epoch": 0.43712592333070566, "grad_norm": 0.639069896023, "learning_rate": 1.2508678021086783e-05, "loss": 0.619, "step": 14972 }, { "epoch": 0.437155119558553, "grad_norm": 0.7150500898659004, "learning_rate": 1.2508029197080293e-05, "loss": 0.6438, "step": 14973 }, { "epoch": 0.4371843157864004, "grad_norm": 0.682213738697113, "learning_rate": 1.2507380373073805e-05, "loss": 0.6032, "step": 14974 }, { "epoch": 0.43721351201424774, "grad_norm": 0.6797979601756454, "learning_rate": 1.2506731549067317e-05, "loss": 0.6448, "step": 14975 }, { "epoch": 0.4372427082420951, "grad_norm": 0.6348432811860174, "learning_rate": 1.250608272506083e-05, "loss": 0.6492, "step": 14976 }, { "epoch": 0.43727190446994246, "grad_norm": 0.6158165859715835, "learning_rate": 1.250543390105434e-05, "loss": 0.59, "step": 14977 }, { "epoch": 0.4373011006977898, "grad_norm": 0.6534894929505464, "learning_rate": 1.2504785077047852e-05, "loss": 0.615, "step": 14978 }, { "epoch": 0.4373302969256372, "grad_norm": 0.647232696558207, "learning_rate": 1.2504136253041364e-05, "loss": 0.5675, "step": 14979 }, { "epoch": 0.43735949315348455, "grad_norm": 0.642237348912874, "learning_rate": 1.2503487429034876e-05, "loss": 0.5982, "step": 14980 }, { "epoch": 0.4373886893813319, "grad_norm": 0.6758566909031576, "learning_rate": 1.2502838605028388e-05, "loss": 0.6379, "step": 14981 }, { "epoch": 0.43741788560917927, "grad_norm": 0.6860140780221484, "learning_rate": 1.2502189781021898e-05, "loss": 0.5814, "step": 14982 }, { "epoch": 0.43744708183702663, "grad_norm": 0.6977494611717823, "learning_rate": 1.250154095701541e-05, "loss": 0.7373, "step": 14983 }, { "epoch": 0.437476278064874, "grad_norm": 0.6516636721461408, "learning_rate": 1.2500892133008922e-05, "loss": 0.6197, "step": 14984 }, { "epoch": 0.4375054742927214, "grad_norm": 0.62924284204176, "learning_rate": 1.2500243309002434e-05, "loss": 0.6031, "step": 14985 }, { "epoch": 0.43753467052056877, "grad_norm": 0.6723836652321312, "learning_rate": 1.2499594484995944e-05, "loss": 0.5752, "step": 14986 }, { "epoch": 0.43756386674841613, "grad_norm": 0.677992478892478, "learning_rate": 1.2498945660989456e-05, "loss": 0.6944, "step": 14987 }, { "epoch": 0.4375930629762635, "grad_norm": 0.7705353008032404, "learning_rate": 1.249829683698297e-05, "loss": 0.7246, "step": 14988 }, { "epoch": 0.43762225920411085, "grad_norm": 0.7003618511108548, "learning_rate": 1.2497648012976482e-05, "loss": 0.6067, "step": 14989 }, { "epoch": 0.4376514554319582, "grad_norm": 0.7625511315212175, "learning_rate": 1.2496999188969994e-05, "loss": 0.6545, "step": 14990 }, { "epoch": 0.4376806516598056, "grad_norm": 0.630481793392229, "learning_rate": 1.2496350364963506e-05, "loss": 0.5939, "step": 14991 }, { "epoch": 0.43770984788765294, "grad_norm": 0.6222785164580743, "learning_rate": 1.2495701540957017e-05, "loss": 0.5878, "step": 14992 }, { "epoch": 0.4377390441155003, "grad_norm": 0.6425714431718959, "learning_rate": 1.2495052716950529e-05, "loss": 0.5912, "step": 14993 }, { "epoch": 0.43776824034334766, "grad_norm": 0.6428268629299237, "learning_rate": 1.249440389294404e-05, "loss": 0.5861, "step": 14994 }, { "epoch": 0.437797436571195, "grad_norm": 0.6299249671054303, "learning_rate": 1.2493755068937553e-05, "loss": 0.6042, "step": 14995 }, { "epoch": 0.4378266327990424, "grad_norm": 0.6692564395450505, "learning_rate": 1.2493106244931063e-05, "loss": 0.6951, "step": 14996 }, { "epoch": 0.43785582902688974, "grad_norm": 0.6608734127241971, "learning_rate": 1.2492457420924575e-05, "loss": 0.5881, "step": 14997 }, { "epoch": 0.4378850252547371, "grad_norm": 0.6383774359904804, "learning_rate": 1.2491808596918087e-05, "loss": 0.6002, "step": 14998 }, { "epoch": 0.43791422148258446, "grad_norm": 0.5952689379379451, "learning_rate": 1.2491159772911599e-05, "loss": 0.5289, "step": 14999 }, { "epoch": 0.4379434177104318, "grad_norm": 0.6828766059844903, "learning_rate": 1.249051094890511e-05, "loss": 0.656, "step": 15000 }, { "epoch": 0.4379726139382792, "grad_norm": 0.6642459294758027, "learning_rate": 1.2489862124898621e-05, "loss": 0.6495, "step": 15001 }, { "epoch": 0.43800181016612655, "grad_norm": 0.6374473636354706, "learning_rate": 1.2489213300892133e-05, "loss": 0.6194, "step": 15002 }, { "epoch": 0.4380310063939739, "grad_norm": 0.8081608968035358, "learning_rate": 1.2488564476885645e-05, "loss": 0.6035, "step": 15003 }, { "epoch": 0.43806020262182127, "grad_norm": 0.6411353491424664, "learning_rate": 1.2487915652879157e-05, "loss": 0.6366, "step": 15004 }, { "epoch": 0.43808939884966863, "grad_norm": 0.7256115571232065, "learning_rate": 1.2487266828872668e-05, "loss": 0.6299, "step": 15005 }, { "epoch": 0.438118595077516, "grad_norm": 0.6646471384977983, "learning_rate": 1.248661800486618e-05, "loss": 0.7005, "step": 15006 }, { "epoch": 0.43814779130536335, "grad_norm": 0.6705228165082873, "learning_rate": 1.2485969180859694e-05, "loss": 0.756, "step": 15007 }, { "epoch": 0.4381769875332107, "grad_norm": 0.6815564940480929, "learning_rate": 1.2485320356853206e-05, "loss": 0.7042, "step": 15008 }, { "epoch": 0.4382061837610581, "grad_norm": 0.6933860663406383, "learning_rate": 1.2484671532846718e-05, "loss": 0.6692, "step": 15009 }, { "epoch": 0.43823537998890544, "grad_norm": 0.666182377285192, "learning_rate": 1.248402270884023e-05, "loss": 0.6529, "step": 15010 }, { "epoch": 0.4382645762167528, "grad_norm": 0.6015715799451945, "learning_rate": 1.248337388483374e-05, "loss": 0.5517, "step": 15011 }, { "epoch": 0.43829377244460016, "grad_norm": 0.5356775460681892, "learning_rate": 1.2482725060827252e-05, "loss": 0.4515, "step": 15012 }, { "epoch": 0.4383229686724475, "grad_norm": 0.6358193258457406, "learning_rate": 1.2482076236820764e-05, "loss": 0.5796, "step": 15013 }, { "epoch": 0.4383521649002949, "grad_norm": 0.648796284076958, "learning_rate": 1.2481427412814276e-05, "loss": 0.6113, "step": 15014 }, { "epoch": 0.43838136112814224, "grad_norm": 0.6790795737085037, "learning_rate": 1.2480778588807786e-05, "loss": 0.7203, "step": 15015 }, { "epoch": 0.4384105573559896, "grad_norm": 0.617850059909073, "learning_rate": 1.2480129764801298e-05, "loss": 0.5688, "step": 15016 }, { "epoch": 0.43843975358383697, "grad_norm": 0.5754035233805799, "learning_rate": 1.247948094079481e-05, "loss": 0.5008, "step": 15017 }, { "epoch": 0.4384689498116843, "grad_norm": 0.6910188717601198, "learning_rate": 1.2478832116788322e-05, "loss": 0.696, "step": 15018 }, { "epoch": 0.4384981460395317, "grad_norm": 0.6481620376633656, "learning_rate": 1.2478183292781833e-05, "loss": 0.6521, "step": 15019 }, { "epoch": 0.43852734226737905, "grad_norm": 0.6433372409016898, "learning_rate": 1.2477534468775345e-05, "loss": 0.6298, "step": 15020 }, { "epoch": 0.4385565384952264, "grad_norm": 0.622168882088271, "learning_rate": 1.2476885644768857e-05, "loss": 0.5733, "step": 15021 }, { "epoch": 0.4385857347230738, "grad_norm": 0.6299211632186232, "learning_rate": 1.2476236820762369e-05, "loss": 0.6284, "step": 15022 }, { "epoch": 0.43861493095092113, "grad_norm": 0.6490467937084539, "learning_rate": 1.2475587996755881e-05, "loss": 0.6129, "step": 15023 }, { "epoch": 0.4386441271787685, "grad_norm": 0.6312731088954129, "learning_rate": 1.2474939172749391e-05, "loss": 0.5738, "step": 15024 }, { "epoch": 0.43867332340661586, "grad_norm": 0.6616587618026308, "learning_rate": 1.2474290348742903e-05, "loss": 0.6395, "step": 15025 }, { "epoch": 0.4387025196344632, "grad_norm": 0.6437071792659467, "learning_rate": 1.2473641524736417e-05, "loss": 0.5951, "step": 15026 }, { "epoch": 0.4387317158623106, "grad_norm": 0.6217675017305457, "learning_rate": 1.2472992700729929e-05, "loss": 0.5915, "step": 15027 }, { "epoch": 0.43876091209015794, "grad_norm": 0.6401575373983689, "learning_rate": 1.2472343876723441e-05, "loss": 0.6112, "step": 15028 }, { "epoch": 0.4387901083180053, "grad_norm": 0.6513721036241319, "learning_rate": 1.2471695052716953e-05, "loss": 0.6031, "step": 15029 }, { "epoch": 0.43881930454585266, "grad_norm": 0.5427199147947722, "learning_rate": 1.2471046228710463e-05, "loss": 0.4328, "step": 15030 }, { "epoch": 0.4388485007737, "grad_norm": 0.672228441943698, "learning_rate": 1.2470397404703975e-05, "loss": 0.6373, "step": 15031 }, { "epoch": 0.4388776970015474, "grad_norm": 0.6577071155805989, "learning_rate": 1.2469748580697487e-05, "loss": 0.6223, "step": 15032 }, { "epoch": 0.43890689322939475, "grad_norm": 0.6465621691698453, "learning_rate": 1.2469099756691e-05, "loss": 0.6072, "step": 15033 }, { "epoch": 0.4389360894572421, "grad_norm": 0.632222673819828, "learning_rate": 1.246845093268451e-05, "loss": 0.5972, "step": 15034 }, { "epoch": 0.43896528568508947, "grad_norm": 0.6444080132557677, "learning_rate": 1.2467802108678022e-05, "loss": 0.6302, "step": 15035 }, { "epoch": 0.43899448191293683, "grad_norm": 0.6478953101342619, "learning_rate": 1.2467153284671534e-05, "loss": 0.5961, "step": 15036 }, { "epoch": 0.4390236781407842, "grad_norm": 0.673849267750113, "learning_rate": 1.2466504460665046e-05, "loss": 0.6832, "step": 15037 }, { "epoch": 0.43905287436863155, "grad_norm": 0.6313809932650851, "learning_rate": 1.2465855636658556e-05, "loss": 0.6206, "step": 15038 }, { "epoch": 0.4390820705964789, "grad_norm": 0.6248865078536486, "learning_rate": 1.2465206812652068e-05, "loss": 0.6322, "step": 15039 }, { "epoch": 0.4391112668243263, "grad_norm": 0.6553766958625202, "learning_rate": 1.246455798864558e-05, "loss": 0.6271, "step": 15040 }, { "epoch": 0.43914046305217364, "grad_norm": 0.6639293148329849, "learning_rate": 1.2463909164639092e-05, "loss": 0.6705, "step": 15041 }, { "epoch": 0.439169659280021, "grad_norm": 0.593154686119065, "learning_rate": 1.2463260340632604e-05, "loss": 0.5304, "step": 15042 }, { "epoch": 0.43919885550786836, "grad_norm": 0.632122212173829, "learning_rate": 1.2462611516626115e-05, "loss": 0.6214, "step": 15043 }, { "epoch": 0.4392280517357157, "grad_norm": 0.6654193864428504, "learning_rate": 1.2461962692619627e-05, "loss": 0.6241, "step": 15044 }, { "epoch": 0.4392572479635631, "grad_norm": 0.6838208229188864, "learning_rate": 1.246131386861314e-05, "loss": 0.7048, "step": 15045 }, { "epoch": 0.4392864441914105, "grad_norm": 0.6877826944320505, "learning_rate": 1.2460665044606652e-05, "loss": 0.679, "step": 15046 }, { "epoch": 0.43931564041925786, "grad_norm": 0.6503972852018467, "learning_rate": 1.2460016220600164e-05, "loss": 0.6277, "step": 15047 }, { "epoch": 0.4393448366471052, "grad_norm": 0.7251285621491839, "learning_rate": 1.2459367396593676e-05, "loss": 0.7361, "step": 15048 }, { "epoch": 0.4393740328749526, "grad_norm": 0.680933742384806, "learning_rate": 1.2458718572587187e-05, "loss": 0.6721, "step": 15049 }, { "epoch": 0.43940322910279994, "grad_norm": 0.6770059145605608, "learning_rate": 1.2458069748580699e-05, "loss": 0.6607, "step": 15050 }, { "epoch": 0.4394324253306473, "grad_norm": 0.6352782612405328, "learning_rate": 1.2457420924574211e-05, "loss": 0.5549, "step": 15051 }, { "epoch": 0.43946162155849466, "grad_norm": 0.6719516959321012, "learning_rate": 1.2456772100567723e-05, "loss": 0.6422, "step": 15052 }, { "epoch": 0.439490817786342, "grad_norm": 0.8396466005964559, "learning_rate": 1.2456123276561233e-05, "loss": 0.6645, "step": 15053 }, { "epoch": 0.4395200140141894, "grad_norm": 0.6972353906231901, "learning_rate": 1.2455474452554745e-05, "loss": 0.5398, "step": 15054 }, { "epoch": 0.43954921024203675, "grad_norm": 0.6657503383024315, "learning_rate": 1.2454825628548257e-05, "loss": 0.6874, "step": 15055 }, { "epoch": 0.4395784064698841, "grad_norm": 0.6912915334330539, "learning_rate": 1.245417680454177e-05, "loss": 0.6589, "step": 15056 }, { "epoch": 0.43960760269773147, "grad_norm": 0.6220344186300213, "learning_rate": 1.245352798053528e-05, "loss": 0.5991, "step": 15057 }, { "epoch": 0.43963679892557883, "grad_norm": 0.6253110942905294, "learning_rate": 1.2452879156528792e-05, "loss": 0.5405, "step": 15058 }, { "epoch": 0.4396659951534262, "grad_norm": 0.6631653611727535, "learning_rate": 1.2452230332522304e-05, "loss": 0.66, "step": 15059 }, { "epoch": 0.43969519138127355, "grad_norm": 0.6559336386841992, "learning_rate": 1.2451581508515816e-05, "loss": 0.5992, "step": 15060 }, { "epoch": 0.4397243876091209, "grad_norm": 0.6217113083415049, "learning_rate": 1.2450932684509328e-05, "loss": 0.5932, "step": 15061 }, { "epoch": 0.4397535838369683, "grad_norm": 0.6236249465319218, "learning_rate": 1.2450283860502838e-05, "loss": 0.5839, "step": 15062 }, { "epoch": 0.43978278006481564, "grad_norm": 0.6464206976739905, "learning_rate": 1.244963503649635e-05, "loss": 0.6145, "step": 15063 }, { "epoch": 0.439811976292663, "grad_norm": 0.6537673032277477, "learning_rate": 1.2448986212489864e-05, "loss": 0.5969, "step": 15064 }, { "epoch": 0.43984117252051036, "grad_norm": 0.63387399439706, "learning_rate": 1.2448337388483376e-05, "loss": 0.5912, "step": 15065 }, { "epoch": 0.4398703687483577, "grad_norm": 0.6376570554776966, "learning_rate": 1.2447688564476888e-05, "loss": 0.6323, "step": 15066 }, { "epoch": 0.4398995649762051, "grad_norm": 0.5708987558522631, "learning_rate": 1.24470397404704e-05, "loss": 0.4848, "step": 15067 }, { "epoch": 0.43992876120405244, "grad_norm": 0.6509844036340124, "learning_rate": 1.244639091646391e-05, "loss": 0.6411, "step": 15068 }, { "epoch": 0.4399579574318998, "grad_norm": 0.7647669456587678, "learning_rate": 1.2445742092457422e-05, "loss": 0.7519, "step": 15069 }, { "epoch": 0.43998715365974717, "grad_norm": 0.703580545539075, "learning_rate": 1.2445093268450934e-05, "loss": 0.6493, "step": 15070 }, { "epoch": 0.4400163498875945, "grad_norm": 0.6050791121309156, "learning_rate": 1.2444444444444446e-05, "loss": 0.5582, "step": 15071 }, { "epoch": 0.4400455461154419, "grad_norm": 0.6571477193741853, "learning_rate": 1.2443795620437957e-05, "loss": 0.6718, "step": 15072 }, { "epoch": 0.44007474234328925, "grad_norm": 0.6388483669232149, "learning_rate": 1.2443146796431469e-05, "loss": 0.623, "step": 15073 }, { "epoch": 0.4401039385711366, "grad_norm": 0.6100877017108136, "learning_rate": 1.244249797242498e-05, "loss": 0.5541, "step": 15074 }, { "epoch": 0.44013313479898397, "grad_norm": 0.6151650853365547, "learning_rate": 1.2441849148418493e-05, "loss": 0.5518, "step": 15075 }, { "epoch": 0.44016233102683133, "grad_norm": 0.7280387690738217, "learning_rate": 1.2441200324412003e-05, "loss": 0.6967, "step": 15076 }, { "epoch": 0.4401915272546787, "grad_norm": 0.6905432309038062, "learning_rate": 1.2440551500405515e-05, "loss": 0.6788, "step": 15077 }, { "epoch": 0.44022072348252606, "grad_norm": 0.5847794411516465, "learning_rate": 1.2439902676399027e-05, "loss": 0.5078, "step": 15078 }, { "epoch": 0.4402499197103734, "grad_norm": 0.9241756016438224, "learning_rate": 1.243925385239254e-05, "loss": 0.7433, "step": 15079 }, { "epoch": 0.4402791159382208, "grad_norm": 0.6809878664984099, "learning_rate": 1.2438605028386051e-05, "loss": 0.6434, "step": 15080 }, { "epoch": 0.44030831216606814, "grad_norm": 0.6682694580259185, "learning_rate": 1.2437956204379562e-05, "loss": 0.7038, "step": 15081 }, { "epoch": 0.4403375083939155, "grad_norm": 0.6329197969619385, "learning_rate": 1.2437307380373074e-05, "loss": 0.6287, "step": 15082 }, { "epoch": 0.44036670462176286, "grad_norm": 0.6452071767068692, "learning_rate": 1.2436658556366587e-05, "loss": 0.6384, "step": 15083 }, { "epoch": 0.4403959008496102, "grad_norm": 0.6662448848571505, "learning_rate": 1.24360097323601e-05, "loss": 0.6798, "step": 15084 }, { "epoch": 0.4404250970774576, "grad_norm": 0.6370790498411024, "learning_rate": 1.2435360908353611e-05, "loss": 0.6122, "step": 15085 }, { "epoch": 0.44045429330530494, "grad_norm": 0.6346710672781075, "learning_rate": 1.2434712084347123e-05, "loss": 0.6086, "step": 15086 }, { "epoch": 0.4404834895331523, "grad_norm": 0.6711006157536996, "learning_rate": 1.2434063260340634e-05, "loss": 0.6709, "step": 15087 }, { "epoch": 0.44051268576099967, "grad_norm": 0.6220192194797056, "learning_rate": 1.2433414436334146e-05, "loss": 0.5847, "step": 15088 }, { "epoch": 0.44054188198884703, "grad_norm": 0.6381688460042021, "learning_rate": 1.2432765612327658e-05, "loss": 0.6301, "step": 15089 }, { "epoch": 0.4405710782166944, "grad_norm": 0.6653820070965659, "learning_rate": 1.243211678832117e-05, "loss": 0.6256, "step": 15090 }, { "epoch": 0.44060027444454175, "grad_norm": 0.6670980137961866, "learning_rate": 1.243146796431468e-05, "loss": 0.6659, "step": 15091 }, { "epoch": 0.4406294706723891, "grad_norm": 0.6906681442890973, "learning_rate": 1.2430819140308192e-05, "loss": 0.6961, "step": 15092 }, { "epoch": 0.4406586669002365, "grad_norm": 0.6588992849241487, "learning_rate": 1.2430170316301704e-05, "loss": 0.6294, "step": 15093 }, { "epoch": 0.44068786312808383, "grad_norm": 0.663984003709741, "learning_rate": 1.2429521492295216e-05, "loss": 0.6304, "step": 15094 }, { "epoch": 0.4407170593559312, "grad_norm": 0.651257621774728, "learning_rate": 1.2428872668288727e-05, "loss": 0.6684, "step": 15095 }, { "epoch": 0.44074625558377856, "grad_norm": 0.7892605247410235, "learning_rate": 1.2428223844282239e-05, "loss": 0.7308, "step": 15096 }, { "epoch": 0.4407754518116259, "grad_norm": 0.5952866223774032, "learning_rate": 1.242757502027575e-05, "loss": 0.5637, "step": 15097 }, { "epoch": 0.4408046480394733, "grad_norm": 0.647600642646884, "learning_rate": 1.2426926196269263e-05, "loss": 0.6319, "step": 15098 }, { "epoch": 0.44083384426732064, "grad_norm": 0.6692838223928803, "learning_rate": 1.2426277372262775e-05, "loss": 0.6309, "step": 15099 }, { "epoch": 0.440863040495168, "grad_norm": 0.6042281327646153, "learning_rate": 1.2425628548256285e-05, "loss": 0.5469, "step": 15100 }, { "epoch": 0.44089223672301536, "grad_norm": 0.6048799092747406, "learning_rate": 1.2424979724249799e-05, "loss": 0.5524, "step": 15101 }, { "epoch": 0.4409214329508627, "grad_norm": 0.6544267776048408, "learning_rate": 1.242433090024331e-05, "loss": 0.6099, "step": 15102 }, { "epoch": 0.4409506291787101, "grad_norm": 0.6934019157698275, "learning_rate": 1.2423682076236823e-05, "loss": 0.7784, "step": 15103 }, { "epoch": 0.44097982540655745, "grad_norm": 0.6746076816636277, "learning_rate": 1.2423033252230335e-05, "loss": 0.6388, "step": 15104 }, { "epoch": 0.4410090216344048, "grad_norm": 0.6132668173593093, "learning_rate": 1.2422384428223847e-05, "loss": 0.5542, "step": 15105 }, { "epoch": 0.4410382178622522, "grad_norm": 0.615131817321308, "learning_rate": 1.2421735604217357e-05, "loss": 0.601, "step": 15106 }, { "epoch": 0.4410674140900996, "grad_norm": 0.6576719907273252, "learning_rate": 1.2421086780210869e-05, "loss": 0.6195, "step": 15107 }, { "epoch": 0.44109661031794695, "grad_norm": 0.6486562939080956, "learning_rate": 1.2420437956204381e-05, "loss": 0.6561, "step": 15108 }, { "epoch": 0.4411258065457943, "grad_norm": 0.7166291884840355, "learning_rate": 1.2419789132197893e-05, "loss": 0.638, "step": 15109 }, { "epoch": 0.44115500277364167, "grad_norm": 0.591835616805777, "learning_rate": 1.2419140308191404e-05, "loss": 0.5522, "step": 15110 }, { "epoch": 0.44118419900148903, "grad_norm": 0.6532290068440094, "learning_rate": 1.2418491484184916e-05, "loss": 0.5826, "step": 15111 }, { "epoch": 0.4412133952293364, "grad_norm": 0.6363355473062375, "learning_rate": 1.2417842660178428e-05, "loss": 0.6396, "step": 15112 }, { "epoch": 0.44124259145718375, "grad_norm": 0.6253666250417581, "learning_rate": 1.241719383617194e-05, "loss": 0.5785, "step": 15113 }, { "epoch": 0.4412717876850311, "grad_norm": 0.6335008541596495, "learning_rate": 1.241654501216545e-05, "loss": 0.5843, "step": 15114 }, { "epoch": 0.4413009839128785, "grad_norm": 0.6399618165220633, "learning_rate": 1.2415896188158962e-05, "loss": 0.5931, "step": 15115 }, { "epoch": 0.44133018014072584, "grad_norm": 0.6547752897209792, "learning_rate": 1.2415247364152474e-05, "loss": 0.6375, "step": 15116 }, { "epoch": 0.4413593763685732, "grad_norm": 0.6788588694335485, "learning_rate": 1.2414598540145986e-05, "loss": 0.6487, "step": 15117 }, { "epoch": 0.44138857259642056, "grad_norm": 0.6309158141173048, "learning_rate": 1.2413949716139498e-05, "loss": 0.5694, "step": 15118 }, { "epoch": 0.4414177688242679, "grad_norm": 0.610248791012515, "learning_rate": 1.2413300892133008e-05, "loss": 0.545, "step": 15119 }, { "epoch": 0.4414469650521153, "grad_norm": 0.6301557700350725, "learning_rate": 1.2412652068126522e-05, "loss": 0.5861, "step": 15120 }, { "epoch": 0.44147616127996264, "grad_norm": 0.6636771377894274, "learning_rate": 1.2412003244120034e-05, "loss": 0.6733, "step": 15121 }, { "epoch": 0.44150535750781, "grad_norm": 0.6523859532540831, "learning_rate": 1.2411354420113546e-05, "loss": 0.6138, "step": 15122 }, { "epoch": 0.44153455373565736, "grad_norm": 0.6787249947711185, "learning_rate": 1.2410705596107058e-05, "loss": 0.6352, "step": 15123 }, { "epoch": 0.4415637499635047, "grad_norm": 0.8189473554830108, "learning_rate": 1.2410056772100569e-05, "loss": 0.7587, "step": 15124 }, { "epoch": 0.4415929461913521, "grad_norm": 0.6186671885300943, "learning_rate": 1.240940794809408e-05, "loss": 0.5641, "step": 15125 }, { "epoch": 0.44162214241919945, "grad_norm": 0.607731170087328, "learning_rate": 1.2408759124087593e-05, "loss": 0.5671, "step": 15126 }, { "epoch": 0.4416513386470468, "grad_norm": 0.6268883970439002, "learning_rate": 1.2408110300081105e-05, "loss": 0.5907, "step": 15127 }, { "epoch": 0.44168053487489417, "grad_norm": 0.7034106972774908, "learning_rate": 1.2407461476074617e-05, "loss": 0.6514, "step": 15128 }, { "epoch": 0.44170973110274153, "grad_norm": 0.6020921083678415, "learning_rate": 1.2406812652068127e-05, "loss": 0.5284, "step": 15129 }, { "epoch": 0.4417389273305889, "grad_norm": 0.6281015278197046, "learning_rate": 1.2406163828061639e-05, "loss": 0.6117, "step": 15130 }, { "epoch": 0.44176812355843625, "grad_norm": 0.6355850328340392, "learning_rate": 1.2405515004055151e-05, "loss": 0.606, "step": 15131 }, { "epoch": 0.4417973197862836, "grad_norm": 0.6631930322157776, "learning_rate": 1.2404866180048663e-05, "loss": 0.6282, "step": 15132 }, { "epoch": 0.441826516014131, "grad_norm": 0.6111245202670408, "learning_rate": 1.2404217356042173e-05, "loss": 0.5432, "step": 15133 }, { "epoch": 0.44185571224197834, "grad_norm": 0.6383423410762786, "learning_rate": 1.2403568532035685e-05, "loss": 0.5961, "step": 15134 }, { "epoch": 0.4418849084698257, "grad_norm": 0.6416268713851825, "learning_rate": 1.2402919708029197e-05, "loss": 0.5919, "step": 15135 }, { "epoch": 0.44191410469767306, "grad_norm": 0.6529325560723612, "learning_rate": 1.240227088402271e-05, "loss": 0.6264, "step": 15136 }, { "epoch": 0.4419433009255204, "grad_norm": 0.6972328335363528, "learning_rate": 1.2401622060016221e-05, "loss": 0.6802, "step": 15137 }, { "epoch": 0.4419724971533678, "grad_norm": 0.6259746194387106, "learning_rate": 1.2400973236009732e-05, "loss": 0.6028, "step": 15138 }, { "epoch": 0.44200169338121514, "grad_norm": 0.6515309140876958, "learning_rate": 1.2400324412003246e-05, "loss": 0.6489, "step": 15139 }, { "epoch": 0.4420308896090625, "grad_norm": 0.6819164388432312, "learning_rate": 1.2399675587996758e-05, "loss": 0.6844, "step": 15140 }, { "epoch": 0.44206008583690987, "grad_norm": 0.7183398168027522, "learning_rate": 1.239902676399027e-05, "loss": 0.6522, "step": 15141 }, { "epoch": 0.4420892820647572, "grad_norm": 0.6243356143113836, "learning_rate": 1.2398377939983782e-05, "loss": 0.6096, "step": 15142 }, { "epoch": 0.4421184782926046, "grad_norm": 0.6245761606467345, "learning_rate": 1.2397729115977292e-05, "loss": 0.6083, "step": 15143 }, { "epoch": 0.44214767452045195, "grad_norm": 0.6649949106601607, "learning_rate": 1.2397080291970804e-05, "loss": 0.6157, "step": 15144 }, { "epoch": 0.4421768707482993, "grad_norm": 0.6995577947562882, "learning_rate": 1.2396431467964316e-05, "loss": 0.6891, "step": 15145 }, { "epoch": 0.44220606697614667, "grad_norm": 0.6242551290340399, "learning_rate": 1.2395782643957828e-05, "loss": 0.5631, "step": 15146 }, { "epoch": 0.44223526320399403, "grad_norm": 0.6280284608795528, "learning_rate": 1.239513381995134e-05, "loss": 0.6016, "step": 15147 }, { "epoch": 0.4422644594318414, "grad_norm": 0.6818795464042511, "learning_rate": 1.239448499594485e-05, "loss": 0.7357, "step": 15148 }, { "epoch": 0.44229365565968876, "grad_norm": 0.676232411494993, "learning_rate": 1.2393836171938362e-05, "loss": 0.6881, "step": 15149 }, { "epoch": 0.4423228518875361, "grad_norm": 0.6313835608206066, "learning_rate": 1.2393187347931874e-05, "loss": 0.5967, "step": 15150 }, { "epoch": 0.4423520481153835, "grad_norm": 0.7308692336454499, "learning_rate": 1.2392538523925386e-05, "loss": 0.5457, "step": 15151 }, { "epoch": 0.44238124434323084, "grad_norm": 0.6589792523259829, "learning_rate": 1.2391889699918897e-05, "loss": 0.6262, "step": 15152 }, { "epoch": 0.4424104405710782, "grad_norm": 0.6918663626585574, "learning_rate": 1.2391240875912409e-05, "loss": 0.6263, "step": 15153 }, { "epoch": 0.44243963679892556, "grad_norm": 0.6330188869646733, "learning_rate": 1.2390592051905921e-05, "loss": 0.6124, "step": 15154 }, { "epoch": 0.4424688330267729, "grad_norm": 0.6250730215803627, "learning_rate": 1.2389943227899433e-05, "loss": 0.6187, "step": 15155 }, { "epoch": 0.4424980292546203, "grad_norm": 0.6300683560340623, "learning_rate": 1.2389294403892945e-05, "loss": 0.599, "step": 15156 }, { "epoch": 0.44252722548246765, "grad_norm": 0.6420973317301683, "learning_rate": 1.2388645579886455e-05, "loss": 0.6125, "step": 15157 }, { "epoch": 0.442556421710315, "grad_norm": 0.635953268718421, "learning_rate": 1.2387996755879969e-05, "loss": 0.5921, "step": 15158 }, { "epoch": 0.44258561793816237, "grad_norm": 0.634859052219494, "learning_rate": 1.2387347931873481e-05, "loss": 0.5839, "step": 15159 }, { "epoch": 0.44261481416600973, "grad_norm": 0.629942836446978, "learning_rate": 1.2386699107866993e-05, "loss": 0.5872, "step": 15160 }, { "epoch": 0.4426440103938571, "grad_norm": 0.6829588630844662, "learning_rate": 1.2386050283860505e-05, "loss": 0.7324, "step": 15161 }, { "epoch": 0.44267320662170445, "grad_norm": 0.6376498547574787, "learning_rate": 1.2385401459854015e-05, "loss": 0.6155, "step": 15162 }, { "epoch": 0.4427024028495518, "grad_norm": 0.6327893442948767, "learning_rate": 1.2384752635847527e-05, "loss": 0.6059, "step": 15163 }, { "epoch": 0.4427315990773992, "grad_norm": 0.682968543606273, "learning_rate": 1.238410381184104e-05, "loss": 0.6895, "step": 15164 }, { "epoch": 0.44276079530524654, "grad_norm": 0.6818963634872347, "learning_rate": 1.2383454987834551e-05, "loss": 0.6978, "step": 15165 }, { "epoch": 0.44278999153309395, "grad_norm": 0.7647399387606667, "learning_rate": 1.2382806163828064e-05, "loss": 0.6644, "step": 15166 }, { "epoch": 0.4428191877609413, "grad_norm": 0.6278561207264288, "learning_rate": 1.2382157339821574e-05, "loss": 0.6055, "step": 15167 }, { "epoch": 0.4428483839887887, "grad_norm": 0.6809914947164111, "learning_rate": 1.2381508515815086e-05, "loss": 0.7076, "step": 15168 }, { "epoch": 0.44287758021663604, "grad_norm": 0.6562063748744353, "learning_rate": 1.2380859691808598e-05, "loss": 0.6062, "step": 15169 }, { "epoch": 0.4429067764444834, "grad_norm": 0.663100777399666, "learning_rate": 1.238021086780211e-05, "loss": 0.5895, "step": 15170 }, { "epoch": 0.44293597267233076, "grad_norm": 0.608811551954313, "learning_rate": 1.237956204379562e-05, "loss": 0.5877, "step": 15171 }, { "epoch": 0.4429651689001781, "grad_norm": 0.6335244377189257, "learning_rate": 1.2378913219789132e-05, "loss": 0.6048, "step": 15172 }, { "epoch": 0.4429943651280255, "grad_norm": 0.6412805083866149, "learning_rate": 1.2378264395782644e-05, "loss": 0.5723, "step": 15173 }, { "epoch": 0.44302356135587284, "grad_norm": 0.6756636055639209, "learning_rate": 1.2377615571776156e-05, "loss": 0.6383, "step": 15174 }, { "epoch": 0.4430527575837202, "grad_norm": 0.6262588097061453, "learning_rate": 1.2376966747769668e-05, "loss": 0.6106, "step": 15175 }, { "epoch": 0.44308195381156756, "grad_norm": 0.6225885147264716, "learning_rate": 1.2376317923763179e-05, "loss": 0.5283, "step": 15176 }, { "epoch": 0.4431111500394149, "grad_norm": 0.6619169984354197, "learning_rate": 1.2375669099756692e-05, "loss": 0.7031, "step": 15177 }, { "epoch": 0.4431403462672623, "grad_norm": 0.6884050370093513, "learning_rate": 1.2375020275750204e-05, "loss": 0.6507, "step": 15178 }, { "epoch": 0.44316954249510965, "grad_norm": 0.6759769741396003, "learning_rate": 1.2374371451743716e-05, "loss": 0.6223, "step": 15179 }, { "epoch": 0.443198738722957, "grad_norm": 0.5988302102214033, "learning_rate": 1.2373722627737228e-05, "loss": 0.4755, "step": 15180 }, { "epoch": 0.44322793495080437, "grad_norm": 0.642701785791561, "learning_rate": 1.2373073803730739e-05, "loss": 0.621, "step": 15181 }, { "epoch": 0.44325713117865173, "grad_norm": 0.6176676025846847, "learning_rate": 1.2372424979724251e-05, "loss": 0.5824, "step": 15182 }, { "epoch": 0.4432863274064991, "grad_norm": 0.6471823433764988, "learning_rate": 1.2371776155717763e-05, "loss": 0.5995, "step": 15183 }, { "epoch": 0.44331552363434645, "grad_norm": 0.6762908640368264, "learning_rate": 1.2371127331711275e-05, "loss": 0.6629, "step": 15184 }, { "epoch": 0.4433447198621938, "grad_norm": 0.6557088835999213, "learning_rate": 1.2370478507704787e-05, "loss": 0.6419, "step": 15185 }, { "epoch": 0.4433739160900412, "grad_norm": 0.6397434602399408, "learning_rate": 1.2369829683698297e-05, "loss": 0.6389, "step": 15186 }, { "epoch": 0.44340311231788854, "grad_norm": 0.7076126275238515, "learning_rate": 1.236918085969181e-05, "loss": 0.6431, "step": 15187 }, { "epoch": 0.4434323085457359, "grad_norm": 0.7144985395105845, "learning_rate": 1.2368532035685321e-05, "loss": 0.7352, "step": 15188 }, { "epoch": 0.44346150477358326, "grad_norm": 0.636521888349401, "learning_rate": 1.2367883211678833e-05, "loss": 0.6315, "step": 15189 }, { "epoch": 0.4434907010014306, "grad_norm": 0.6537127643948007, "learning_rate": 1.2367234387672344e-05, "loss": 0.6359, "step": 15190 }, { "epoch": 0.443519897229278, "grad_norm": 0.666045321933699, "learning_rate": 1.2366585563665856e-05, "loss": 0.6411, "step": 15191 }, { "epoch": 0.44354909345712534, "grad_norm": 0.6531575731328451, "learning_rate": 1.2365936739659368e-05, "loss": 0.6758, "step": 15192 }, { "epoch": 0.4435782896849727, "grad_norm": 0.6575090062852018, "learning_rate": 1.236528791565288e-05, "loss": 0.6173, "step": 15193 }, { "epoch": 0.44360748591282007, "grad_norm": 0.6671237148813579, "learning_rate": 1.2364639091646392e-05, "loss": 0.6377, "step": 15194 }, { "epoch": 0.4436366821406674, "grad_norm": 0.6520510535514169, "learning_rate": 1.2363990267639902e-05, "loss": 0.5947, "step": 15195 }, { "epoch": 0.4436658783685148, "grad_norm": 0.6491749865055587, "learning_rate": 1.2363341443633416e-05, "loss": 0.5984, "step": 15196 }, { "epoch": 0.44369507459636215, "grad_norm": 0.5984637532402705, "learning_rate": 1.2362692619626928e-05, "loss": 0.5785, "step": 15197 }, { "epoch": 0.4437242708242095, "grad_norm": 0.6489829481283649, "learning_rate": 1.236204379562044e-05, "loss": 0.6127, "step": 15198 }, { "epoch": 0.44375346705205687, "grad_norm": 0.6201157773820032, "learning_rate": 1.2361394971613952e-05, "loss": 0.5981, "step": 15199 }, { "epoch": 0.44378266327990423, "grad_norm": 0.6700045822015114, "learning_rate": 1.2360746147607462e-05, "loss": 0.6065, "step": 15200 }, { "epoch": 0.4438118595077516, "grad_norm": 0.6177679652682171, "learning_rate": 1.2360097323600974e-05, "loss": 0.5518, "step": 15201 }, { "epoch": 0.44384105573559895, "grad_norm": 0.6916257004677553, "learning_rate": 1.2359448499594486e-05, "loss": 0.6954, "step": 15202 }, { "epoch": 0.4438702519634463, "grad_norm": 0.684585306661423, "learning_rate": 1.2358799675587998e-05, "loss": 0.6532, "step": 15203 }, { "epoch": 0.4438994481912937, "grad_norm": 0.6014678034988604, "learning_rate": 1.235815085158151e-05, "loss": 0.5387, "step": 15204 }, { "epoch": 0.44392864441914104, "grad_norm": 0.621684714280722, "learning_rate": 1.235750202757502e-05, "loss": 0.5904, "step": 15205 }, { "epoch": 0.4439578406469884, "grad_norm": 0.678667542254544, "learning_rate": 1.2356853203568533e-05, "loss": 0.6762, "step": 15206 }, { "epoch": 0.44398703687483576, "grad_norm": 0.6317807088619846, "learning_rate": 1.2356204379562045e-05, "loss": 0.5888, "step": 15207 }, { "epoch": 0.4440162331026831, "grad_norm": 0.6015578692198053, "learning_rate": 1.2355555555555557e-05, "loss": 0.5788, "step": 15208 }, { "epoch": 0.4440454293305305, "grad_norm": 0.6329928626868868, "learning_rate": 1.2354906731549067e-05, "loss": 0.6036, "step": 15209 }, { "epoch": 0.44407462555837784, "grad_norm": 0.6293275559511355, "learning_rate": 1.2354257907542579e-05, "loss": 0.5373, "step": 15210 }, { "epoch": 0.4441038217862252, "grad_norm": 0.6894039258843272, "learning_rate": 1.2353609083536091e-05, "loss": 0.6846, "step": 15211 }, { "epoch": 0.44413301801407257, "grad_norm": 0.652109042473756, "learning_rate": 1.2352960259529603e-05, "loss": 0.6054, "step": 15212 }, { "epoch": 0.44416221424191993, "grad_norm": 0.6084070706136027, "learning_rate": 1.2352311435523115e-05, "loss": 0.5845, "step": 15213 }, { "epoch": 0.4441914104697673, "grad_norm": 0.711665257064042, "learning_rate": 1.2351662611516626e-05, "loss": 0.7342, "step": 15214 }, { "epoch": 0.44422060669761465, "grad_norm": 0.6558026353388863, "learning_rate": 1.235101378751014e-05, "loss": 0.646, "step": 15215 }, { "epoch": 0.444249802925462, "grad_norm": 0.6432017473714237, "learning_rate": 1.2350364963503651e-05, "loss": 0.5841, "step": 15216 }, { "epoch": 0.4442789991533094, "grad_norm": 0.665762816909394, "learning_rate": 1.2349716139497163e-05, "loss": 0.649, "step": 15217 }, { "epoch": 0.44430819538115673, "grad_norm": 0.5894096665215653, "learning_rate": 1.2349067315490675e-05, "loss": 0.5381, "step": 15218 }, { "epoch": 0.4443373916090041, "grad_norm": 0.683203255214996, "learning_rate": 1.2348418491484186e-05, "loss": 0.6722, "step": 15219 }, { "epoch": 0.44436658783685146, "grad_norm": 0.67336067773077, "learning_rate": 1.2347769667477698e-05, "loss": 0.5855, "step": 15220 }, { "epoch": 0.4443957840646988, "grad_norm": 0.6163318306004301, "learning_rate": 1.234712084347121e-05, "loss": 0.5924, "step": 15221 }, { "epoch": 0.4444249802925462, "grad_norm": 0.6054012916739309, "learning_rate": 1.2346472019464722e-05, "loss": 0.5643, "step": 15222 }, { "epoch": 0.44445417652039354, "grad_norm": 0.5551623172529359, "learning_rate": 1.2345823195458234e-05, "loss": 0.4717, "step": 15223 }, { "epoch": 0.4444833727482409, "grad_norm": 0.6802556091451795, "learning_rate": 1.2345174371451744e-05, "loss": 0.66, "step": 15224 }, { "epoch": 0.44451256897608826, "grad_norm": 0.7209688952934835, "learning_rate": 1.2344525547445256e-05, "loss": 0.6752, "step": 15225 }, { "epoch": 0.4445417652039356, "grad_norm": 0.6190718585334436, "learning_rate": 1.2343876723438768e-05, "loss": 0.598, "step": 15226 }, { "epoch": 0.44457096143178304, "grad_norm": 0.6801625001594237, "learning_rate": 1.234322789943228e-05, "loss": 0.6753, "step": 15227 }, { "epoch": 0.4446001576596304, "grad_norm": 0.66880613857001, "learning_rate": 1.234257907542579e-05, "loss": 0.6304, "step": 15228 }, { "epoch": 0.44462935388747776, "grad_norm": 0.6312072320030494, "learning_rate": 1.2341930251419303e-05, "loss": 0.6306, "step": 15229 }, { "epoch": 0.4446585501153251, "grad_norm": 0.7357248011200975, "learning_rate": 1.2341281427412815e-05, "loss": 0.7831, "step": 15230 }, { "epoch": 0.4446877463431725, "grad_norm": 0.5830140370670128, "learning_rate": 1.2340632603406327e-05, "loss": 0.5356, "step": 15231 }, { "epoch": 0.44471694257101985, "grad_norm": 0.6406036857705958, "learning_rate": 1.2339983779399839e-05, "loss": 0.6008, "step": 15232 }, { "epoch": 0.4447461387988672, "grad_norm": 0.6510478160286755, "learning_rate": 1.2339334955393349e-05, "loss": 0.6221, "step": 15233 }, { "epoch": 0.44477533502671457, "grad_norm": 0.5993033616565093, "learning_rate": 1.2338686131386863e-05, "loss": 0.5393, "step": 15234 }, { "epoch": 0.44480453125456193, "grad_norm": 0.6224515904498515, "learning_rate": 1.2338037307380375e-05, "loss": 0.5605, "step": 15235 }, { "epoch": 0.4448337274824093, "grad_norm": 0.6933879340822705, "learning_rate": 1.2337388483373887e-05, "loss": 0.594, "step": 15236 }, { "epoch": 0.44486292371025665, "grad_norm": 0.6020797861728393, "learning_rate": 1.2336739659367399e-05, "loss": 0.5791, "step": 15237 }, { "epoch": 0.444892119938104, "grad_norm": 0.6471976040112182, "learning_rate": 1.2336090835360909e-05, "loss": 0.5843, "step": 15238 }, { "epoch": 0.4449213161659514, "grad_norm": 0.7037482306690518, "learning_rate": 1.2335442011354421e-05, "loss": 0.5725, "step": 15239 }, { "epoch": 0.44495051239379874, "grad_norm": 0.6587841394925444, "learning_rate": 1.2334793187347933e-05, "loss": 0.6202, "step": 15240 }, { "epoch": 0.4449797086216461, "grad_norm": 0.6323130011039791, "learning_rate": 1.2334144363341445e-05, "loss": 0.6314, "step": 15241 }, { "epoch": 0.44500890484949346, "grad_norm": 0.7222123341201995, "learning_rate": 1.2333495539334957e-05, "loss": 0.7183, "step": 15242 }, { "epoch": 0.4450381010773408, "grad_norm": 0.6479598542454466, "learning_rate": 1.2332846715328468e-05, "loss": 0.6013, "step": 15243 }, { "epoch": 0.4450672973051882, "grad_norm": 0.7150234098601661, "learning_rate": 1.233219789132198e-05, "loss": 0.7037, "step": 15244 }, { "epoch": 0.44509649353303554, "grad_norm": 0.6419067625734025, "learning_rate": 1.2331549067315492e-05, "loss": 0.5947, "step": 15245 }, { "epoch": 0.4451256897608829, "grad_norm": 0.6590955632940677, "learning_rate": 1.2330900243309004e-05, "loss": 0.6666, "step": 15246 }, { "epoch": 0.44515488598873026, "grad_norm": 0.6605377180596348, "learning_rate": 1.2330251419302514e-05, "loss": 0.5839, "step": 15247 }, { "epoch": 0.4451840822165776, "grad_norm": 0.6319990823379675, "learning_rate": 1.2329602595296026e-05, "loss": 0.5912, "step": 15248 }, { "epoch": 0.445213278444425, "grad_norm": 0.6919226064598134, "learning_rate": 1.2328953771289538e-05, "loss": 0.6716, "step": 15249 }, { "epoch": 0.44524247467227235, "grad_norm": 0.6372197337195442, "learning_rate": 1.232830494728305e-05, "loss": 0.6007, "step": 15250 }, { "epoch": 0.4452716709001197, "grad_norm": 0.6943269373554183, "learning_rate": 1.2327656123276562e-05, "loss": 0.6608, "step": 15251 }, { "epoch": 0.44530086712796707, "grad_norm": 0.6626596338788683, "learning_rate": 1.2327007299270072e-05, "loss": 0.6138, "step": 15252 }, { "epoch": 0.44533006335581443, "grad_norm": 0.673369106997458, "learning_rate": 1.2326358475263586e-05, "loss": 0.6542, "step": 15253 }, { "epoch": 0.4453592595836618, "grad_norm": 0.628563207152883, "learning_rate": 1.2325709651257098e-05, "loss": 0.5804, "step": 15254 }, { "epoch": 0.44538845581150915, "grad_norm": 0.6224539092616451, "learning_rate": 1.232506082725061e-05, "loss": 0.577, "step": 15255 }, { "epoch": 0.4454176520393565, "grad_norm": 0.6365151704741873, "learning_rate": 1.2324412003244122e-05, "loss": 0.597, "step": 15256 }, { "epoch": 0.4454468482672039, "grad_norm": 0.6555370714379531, "learning_rate": 1.2323763179237633e-05, "loss": 0.6226, "step": 15257 }, { "epoch": 0.44547604449505124, "grad_norm": 0.6545453477512397, "learning_rate": 1.2323114355231145e-05, "loss": 0.6229, "step": 15258 }, { "epoch": 0.4455052407228986, "grad_norm": 0.7000299608158379, "learning_rate": 1.2322465531224657e-05, "loss": 0.7175, "step": 15259 }, { "epoch": 0.44553443695074596, "grad_norm": 0.6382985997631931, "learning_rate": 1.2321816707218169e-05, "loss": 0.6279, "step": 15260 }, { "epoch": 0.4455636331785933, "grad_norm": 0.6494220098828627, "learning_rate": 1.232116788321168e-05, "loss": 0.6229, "step": 15261 }, { "epoch": 0.4455928294064407, "grad_norm": 0.652374516181142, "learning_rate": 1.2320519059205191e-05, "loss": 0.6387, "step": 15262 }, { "epoch": 0.44562202563428804, "grad_norm": 0.6596192370539095, "learning_rate": 1.2319870235198703e-05, "loss": 0.6806, "step": 15263 }, { "epoch": 0.4456512218621354, "grad_norm": 0.6495860307653427, "learning_rate": 1.2319221411192215e-05, "loss": 0.6023, "step": 15264 }, { "epoch": 0.44568041808998277, "grad_norm": 0.5716424719551053, "learning_rate": 1.2318572587185727e-05, "loss": 0.5031, "step": 15265 }, { "epoch": 0.4457096143178301, "grad_norm": 0.612429003836339, "learning_rate": 1.2317923763179237e-05, "loss": 0.5677, "step": 15266 }, { "epoch": 0.4457388105456775, "grad_norm": 0.6226512284479991, "learning_rate": 1.231727493917275e-05, "loss": 0.5832, "step": 15267 }, { "epoch": 0.44576800677352485, "grad_norm": 0.6329578635659123, "learning_rate": 1.2316626115166261e-05, "loss": 0.6006, "step": 15268 }, { "epoch": 0.4457972030013722, "grad_norm": 0.6298671811563358, "learning_rate": 1.2315977291159774e-05, "loss": 0.6125, "step": 15269 }, { "epoch": 0.44582639922921957, "grad_norm": 0.6484490005194071, "learning_rate": 1.2315328467153284e-05, "loss": 0.6416, "step": 15270 }, { "epoch": 0.44585559545706693, "grad_norm": 0.6571939970010833, "learning_rate": 1.23146796431468e-05, "loss": 0.6461, "step": 15271 }, { "epoch": 0.4458847916849143, "grad_norm": 0.6302659587657138, "learning_rate": 1.231403081914031e-05, "loss": 0.5747, "step": 15272 }, { "epoch": 0.44591398791276166, "grad_norm": 0.6372505063048585, "learning_rate": 1.2313381995133822e-05, "loss": 0.6229, "step": 15273 }, { "epoch": 0.445943184140609, "grad_norm": 0.6433912227670553, "learning_rate": 1.2312733171127334e-05, "loss": 0.5832, "step": 15274 }, { "epoch": 0.4459723803684564, "grad_norm": 0.6137139485498994, "learning_rate": 1.2312084347120846e-05, "loss": 0.567, "step": 15275 }, { "epoch": 0.44600157659630374, "grad_norm": 0.6426031004575031, "learning_rate": 1.2311435523114356e-05, "loss": 0.6139, "step": 15276 }, { "epoch": 0.4460307728241511, "grad_norm": 0.6942302056948014, "learning_rate": 1.2310786699107868e-05, "loss": 0.6612, "step": 15277 }, { "epoch": 0.44605996905199846, "grad_norm": 0.6242298636274981, "learning_rate": 1.231013787510138e-05, "loss": 0.5855, "step": 15278 }, { "epoch": 0.4460891652798458, "grad_norm": 0.6948712188388053, "learning_rate": 1.2309489051094892e-05, "loss": 0.6365, "step": 15279 }, { "epoch": 0.4461183615076932, "grad_norm": 0.6969628290917306, "learning_rate": 1.2308840227088404e-05, "loss": 0.5968, "step": 15280 }, { "epoch": 0.44614755773554055, "grad_norm": 0.6933450724578119, "learning_rate": 1.2308191403081914e-05, "loss": 0.6178, "step": 15281 }, { "epoch": 0.4461767539633879, "grad_norm": 0.7302428949797826, "learning_rate": 1.2307542579075426e-05, "loss": 0.7672, "step": 15282 }, { "epoch": 0.44620595019123527, "grad_norm": 0.6368466801129012, "learning_rate": 1.2306893755068938e-05, "loss": 0.6127, "step": 15283 }, { "epoch": 0.44623514641908263, "grad_norm": 0.674524574184316, "learning_rate": 1.230624493106245e-05, "loss": 0.7111, "step": 15284 }, { "epoch": 0.44626434264693, "grad_norm": 0.6341707005051365, "learning_rate": 1.2305596107055961e-05, "loss": 0.6326, "step": 15285 }, { "epoch": 0.44629353887477735, "grad_norm": 0.6419926888439142, "learning_rate": 1.2304947283049473e-05, "loss": 0.5861, "step": 15286 }, { "epoch": 0.44632273510262477, "grad_norm": 0.6618365327727422, "learning_rate": 1.2304298459042985e-05, "loss": 0.6693, "step": 15287 }, { "epoch": 0.44635193133047213, "grad_norm": 0.6542325572407217, "learning_rate": 1.2303649635036497e-05, "loss": 0.6157, "step": 15288 }, { "epoch": 0.4463811275583195, "grad_norm": 0.6674896908540913, "learning_rate": 1.2303000811030007e-05, "loss": 0.637, "step": 15289 }, { "epoch": 0.44641032378616685, "grad_norm": 0.6104699939192115, "learning_rate": 1.2302351987023523e-05, "loss": 0.5854, "step": 15290 }, { "epoch": 0.4464395200140142, "grad_norm": 0.6407031039562406, "learning_rate": 1.2301703163017033e-05, "loss": 0.5659, "step": 15291 }, { "epoch": 0.4464687162418616, "grad_norm": 0.61445322201222, "learning_rate": 1.2301054339010545e-05, "loss": 0.5829, "step": 15292 }, { "epoch": 0.44649791246970894, "grad_norm": 0.6500076393742106, "learning_rate": 1.2300405515004057e-05, "loss": 0.6687, "step": 15293 }, { "epoch": 0.4465271086975563, "grad_norm": 0.6835936208168996, "learning_rate": 1.2299756690997569e-05, "loss": 0.6938, "step": 15294 }, { "epoch": 0.44655630492540366, "grad_norm": 0.652021830089911, "learning_rate": 1.229910786699108e-05, "loss": 0.6407, "step": 15295 }, { "epoch": 0.446585501153251, "grad_norm": 0.6620958222056503, "learning_rate": 1.2298459042984591e-05, "loss": 0.6391, "step": 15296 }, { "epoch": 0.4466146973810984, "grad_norm": 0.7044706752376014, "learning_rate": 1.2297810218978103e-05, "loss": 0.6878, "step": 15297 }, { "epoch": 0.44664389360894574, "grad_norm": 0.6316021291408281, "learning_rate": 1.2297161394971616e-05, "loss": 0.6024, "step": 15298 }, { "epoch": 0.4466730898367931, "grad_norm": 0.6269009814015943, "learning_rate": 1.2296512570965128e-05, "loss": 0.6066, "step": 15299 }, { "epoch": 0.44670228606464046, "grad_norm": 0.614672528665794, "learning_rate": 1.2295863746958638e-05, "loss": 0.5654, "step": 15300 }, { "epoch": 0.4467314822924878, "grad_norm": 0.6074640225374568, "learning_rate": 1.229521492295215e-05, "loss": 0.5746, "step": 15301 }, { "epoch": 0.4467606785203352, "grad_norm": 0.6299006623373202, "learning_rate": 1.2294566098945662e-05, "loss": 0.5275, "step": 15302 }, { "epoch": 0.44678987474818255, "grad_norm": 0.6561145491365117, "learning_rate": 1.2293917274939174e-05, "loss": 0.6197, "step": 15303 }, { "epoch": 0.4468190709760299, "grad_norm": 0.623170645701874, "learning_rate": 1.2293268450932684e-05, "loss": 0.5888, "step": 15304 }, { "epoch": 0.44684826720387727, "grad_norm": 0.7074779690158965, "learning_rate": 1.2292619626926196e-05, "loss": 0.7074, "step": 15305 }, { "epoch": 0.44687746343172463, "grad_norm": 0.6816345317610298, "learning_rate": 1.2291970802919708e-05, "loss": 0.6006, "step": 15306 }, { "epoch": 0.446906659659572, "grad_norm": 0.620893265280727, "learning_rate": 1.229132197891322e-05, "loss": 0.5855, "step": 15307 }, { "epoch": 0.44693585588741935, "grad_norm": 0.6356447846590254, "learning_rate": 1.229067315490673e-05, "loss": 0.6449, "step": 15308 }, { "epoch": 0.4469650521152667, "grad_norm": 0.6786465437442105, "learning_rate": 1.2290024330900246e-05, "loss": 0.685, "step": 15309 }, { "epoch": 0.4469942483431141, "grad_norm": 0.7332435157147921, "learning_rate": 1.2289375506893756e-05, "loss": 0.6583, "step": 15310 }, { "epoch": 0.44702344457096144, "grad_norm": 0.6201506658483411, "learning_rate": 1.2288726682887268e-05, "loss": 0.5702, "step": 15311 }, { "epoch": 0.4470526407988088, "grad_norm": 0.7024805144895645, "learning_rate": 1.228807785888078e-05, "loss": 0.7197, "step": 15312 }, { "epoch": 0.44708183702665616, "grad_norm": 0.6427948631540885, "learning_rate": 1.2287429034874293e-05, "loss": 0.6245, "step": 15313 }, { "epoch": 0.4471110332545035, "grad_norm": 0.6784818467861047, "learning_rate": 1.2286780210867803e-05, "loss": 0.6428, "step": 15314 }, { "epoch": 0.4471402294823509, "grad_norm": 0.6041033037770387, "learning_rate": 1.2286131386861315e-05, "loss": 0.5306, "step": 15315 }, { "epoch": 0.44716942571019824, "grad_norm": 0.6239382947182308, "learning_rate": 1.2285482562854827e-05, "loss": 0.6061, "step": 15316 }, { "epoch": 0.4471986219380456, "grad_norm": 0.6768417865932701, "learning_rate": 1.2284833738848339e-05, "loss": 0.6738, "step": 15317 }, { "epoch": 0.44722781816589297, "grad_norm": 0.6113267001787187, "learning_rate": 1.2284184914841851e-05, "loss": 0.5675, "step": 15318 }, { "epoch": 0.4472570143937403, "grad_norm": 0.6024387691176019, "learning_rate": 1.2283536090835361e-05, "loss": 0.5699, "step": 15319 }, { "epoch": 0.4472862106215877, "grad_norm": 0.639203514589365, "learning_rate": 1.2282887266828873e-05, "loss": 0.5865, "step": 15320 }, { "epoch": 0.44731540684943505, "grad_norm": 0.6349365988031037, "learning_rate": 1.2282238442822385e-05, "loss": 0.534, "step": 15321 }, { "epoch": 0.4473446030772824, "grad_norm": 0.5956916351902176, "learning_rate": 1.2281589618815897e-05, "loss": 0.5066, "step": 15322 }, { "epoch": 0.44737379930512977, "grad_norm": 0.6559498394908222, "learning_rate": 1.2280940794809408e-05, "loss": 0.635, "step": 15323 }, { "epoch": 0.44740299553297713, "grad_norm": 0.6254714976233912, "learning_rate": 1.228029197080292e-05, "loss": 0.5623, "step": 15324 }, { "epoch": 0.4474321917608245, "grad_norm": 0.6439285279280311, "learning_rate": 1.2279643146796432e-05, "loss": 0.6374, "step": 15325 }, { "epoch": 0.44746138798867185, "grad_norm": 0.6308057568567705, "learning_rate": 1.2278994322789944e-05, "loss": 0.5745, "step": 15326 }, { "epoch": 0.4474905842165192, "grad_norm": 0.6529186188722389, "learning_rate": 1.2278345498783454e-05, "loss": 0.5845, "step": 15327 }, { "epoch": 0.4475197804443666, "grad_norm": 0.692373317379277, "learning_rate": 1.227769667477697e-05, "loss": 0.6568, "step": 15328 }, { "epoch": 0.44754897667221394, "grad_norm": 0.636006272305738, "learning_rate": 1.227704785077048e-05, "loss": 0.6512, "step": 15329 }, { "epoch": 0.4475781729000613, "grad_norm": 0.6853907455432583, "learning_rate": 1.2276399026763992e-05, "loss": 0.6763, "step": 15330 }, { "epoch": 0.44760736912790866, "grad_norm": 0.690420159028307, "learning_rate": 1.2275750202757504e-05, "loss": 0.6573, "step": 15331 }, { "epoch": 0.447636565355756, "grad_norm": 0.6162571965867679, "learning_rate": 1.2275101378751016e-05, "loss": 0.5722, "step": 15332 }, { "epoch": 0.4476657615836034, "grad_norm": 0.6461527838964798, "learning_rate": 1.2274452554744526e-05, "loss": 0.6222, "step": 15333 }, { "epoch": 0.44769495781145074, "grad_norm": 0.6320568883852474, "learning_rate": 1.2273803730738038e-05, "loss": 0.6401, "step": 15334 }, { "epoch": 0.4477241540392981, "grad_norm": 0.6287184970376576, "learning_rate": 1.227315490673155e-05, "loss": 0.6241, "step": 15335 }, { "epoch": 0.44775335026714547, "grad_norm": 0.66598301471629, "learning_rate": 1.2272506082725062e-05, "loss": 0.6395, "step": 15336 }, { "epoch": 0.44778254649499283, "grad_norm": 0.9658277616933914, "learning_rate": 1.2271857258718574e-05, "loss": 0.6483, "step": 15337 }, { "epoch": 0.4478117427228402, "grad_norm": 0.6195567067347074, "learning_rate": 1.2271208434712085e-05, "loss": 0.5929, "step": 15338 }, { "epoch": 0.44784093895068755, "grad_norm": 0.6514172836335081, "learning_rate": 1.2270559610705597e-05, "loss": 0.6735, "step": 15339 }, { "epoch": 0.4478701351785349, "grad_norm": 0.6368306545961226, "learning_rate": 1.2269910786699109e-05, "loss": 0.613, "step": 15340 }, { "epoch": 0.4478993314063823, "grad_norm": 0.6731864785176828, "learning_rate": 1.226926196269262e-05, "loss": 0.689, "step": 15341 }, { "epoch": 0.44792852763422963, "grad_norm": 0.5518064379108958, "learning_rate": 1.2268613138686131e-05, "loss": 0.4869, "step": 15342 }, { "epoch": 0.447957723862077, "grad_norm": 0.674127895800922, "learning_rate": 1.2267964314679643e-05, "loss": 0.6308, "step": 15343 }, { "epoch": 0.44798692008992436, "grad_norm": 0.6622245827363068, "learning_rate": 1.2267315490673155e-05, "loss": 0.6604, "step": 15344 }, { "epoch": 0.4480161163177717, "grad_norm": 0.6632634260894041, "learning_rate": 1.2266666666666667e-05, "loss": 0.6427, "step": 15345 }, { "epoch": 0.4480453125456191, "grad_norm": 0.7081242056892967, "learning_rate": 1.2266017842660178e-05, "loss": 0.6736, "step": 15346 }, { "epoch": 0.4480745087734665, "grad_norm": 0.6202524502364211, "learning_rate": 1.2265369018653693e-05, "loss": 0.5695, "step": 15347 }, { "epoch": 0.44810370500131386, "grad_norm": 0.6612958224186255, "learning_rate": 1.2264720194647203e-05, "loss": 0.6647, "step": 15348 }, { "epoch": 0.4481329012291612, "grad_norm": 0.6363246896034602, "learning_rate": 1.2264071370640715e-05, "loss": 0.6038, "step": 15349 }, { "epoch": 0.4481620974570086, "grad_norm": 0.6423252178796787, "learning_rate": 1.2263422546634227e-05, "loss": 0.5858, "step": 15350 }, { "epoch": 0.44819129368485594, "grad_norm": 0.7794914844005, "learning_rate": 1.226277372262774e-05, "loss": 0.7085, "step": 15351 }, { "epoch": 0.4482204899127033, "grad_norm": 0.7126365505902448, "learning_rate": 1.226212489862125e-05, "loss": 0.7099, "step": 15352 }, { "epoch": 0.44824968614055066, "grad_norm": 0.6484249674137195, "learning_rate": 1.2261476074614762e-05, "loss": 0.6249, "step": 15353 }, { "epoch": 0.448278882368398, "grad_norm": 0.6109074815265739, "learning_rate": 1.2260827250608274e-05, "loss": 0.5647, "step": 15354 }, { "epoch": 0.4483080785962454, "grad_norm": 0.6353326314528576, "learning_rate": 1.2260178426601786e-05, "loss": 0.6107, "step": 15355 }, { "epoch": 0.44833727482409275, "grad_norm": 0.6329811542458726, "learning_rate": 1.2259529602595298e-05, "loss": 0.5801, "step": 15356 }, { "epoch": 0.4483664710519401, "grad_norm": 0.8344485732526847, "learning_rate": 1.2258880778588808e-05, "loss": 0.7278, "step": 15357 }, { "epoch": 0.44839566727978747, "grad_norm": 0.60069764686008, "learning_rate": 1.225823195458232e-05, "loss": 0.5379, "step": 15358 }, { "epoch": 0.44842486350763483, "grad_norm": 0.6246361912925713, "learning_rate": 1.2257583130575832e-05, "loss": 0.5697, "step": 15359 }, { "epoch": 0.4484540597354822, "grad_norm": 0.6689128780429047, "learning_rate": 1.2256934306569344e-05, "loss": 0.7044, "step": 15360 }, { "epoch": 0.44848325596332955, "grad_norm": 0.6702963996488445, "learning_rate": 1.2256285482562855e-05, "loss": 0.6074, "step": 15361 }, { "epoch": 0.4485124521911769, "grad_norm": 0.6650437725573016, "learning_rate": 1.2255636658556367e-05, "loss": 0.6368, "step": 15362 }, { "epoch": 0.4485416484190243, "grad_norm": 0.669648045535609, "learning_rate": 1.2254987834549879e-05, "loss": 0.6604, "step": 15363 }, { "epoch": 0.44857084464687164, "grad_norm": 0.623698379443035, "learning_rate": 1.225433901054339e-05, "loss": 0.6271, "step": 15364 }, { "epoch": 0.448600040874719, "grad_norm": 0.5915515171634924, "learning_rate": 1.2253690186536901e-05, "loss": 0.5267, "step": 15365 }, { "epoch": 0.44862923710256636, "grad_norm": 0.6588303814042051, "learning_rate": 1.2253041362530416e-05, "loss": 0.6202, "step": 15366 }, { "epoch": 0.4486584333304137, "grad_norm": 0.6414657631440871, "learning_rate": 1.2252392538523927e-05, "loss": 0.6473, "step": 15367 }, { "epoch": 0.4486876295582611, "grad_norm": 0.6437865622722857, "learning_rate": 1.2251743714517439e-05, "loss": 0.568, "step": 15368 }, { "epoch": 0.44871682578610844, "grad_norm": 0.7135077918912434, "learning_rate": 1.225109489051095e-05, "loss": 0.6486, "step": 15369 }, { "epoch": 0.4487460220139558, "grad_norm": 0.6658824153676011, "learning_rate": 1.2250446066504463e-05, "loss": 0.6246, "step": 15370 }, { "epoch": 0.44877521824180316, "grad_norm": 0.675281246513389, "learning_rate": 1.2249797242497973e-05, "loss": 0.6401, "step": 15371 }, { "epoch": 0.4488044144696505, "grad_norm": 0.6626291090339591, "learning_rate": 1.2249148418491485e-05, "loss": 0.6269, "step": 15372 }, { "epoch": 0.4488336106974979, "grad_norm": 0.6113464440893827, "learning_rate": 1.2248499594484997e-05, "loss": 0.599, "step": 15373 }, { "epoch": 0.44886280692534525, "grad_norm": 0.6289377514160952, "learning_rate": 1.224785077047851e-05, "loss": 0.6097, "step": 15374 }, { "epoch": 0.4488920031531926, "grad_norm": 0.8332636062784301, "learning_rate": 1.224720194647202e-05, "loss": 0.6691, "step": 15375 }, { "epoch": 0.44892119938103997, "grad_norm": 0.6769398594708175, "learning_rate": 1.2246553122465532e-05, "loss": 0.6567, "step": 15376 }, { "epoch": 0.44895039560888733, "grad_norm": 0.6748769538970244, "learning_rate": 1.2245904298459044e-05, "loss": 0.6451, "step": 15377 }, { "epoch": 0.4489795918367347, "grad_norm": 0.6331934831538992, "learning_rate": 1.2245255474452556e-05, "loss": 0.5648, "step": 15378 }, { "epoch": 0.44900878806458205, "grad_norm": 0.6302615641576084, "learning_rate": 1.2244606650446068e-05, "loss": 0.5591, "step": 15379 }, { "epoch": 0.4490379842924294, "grad_norm": 0.669187753061521, "learning_rate": 1.2243957826439578e-05, "loss": 0.6329, "step": 15380 }, { "epoch": 0.4490671805202768, "grad_norm": 0.6229279957382382, "learning_rate": 1.224330900243309e-05, "loss": 0.5999, "step": 15381 }, { "epoch": 0.44909637674812414, "grad_norm": 0.6940936409090799, "learning_rate": 1.2242660178426602e-05, "loss": 0.688, "step": 15382 }, { "epoch": 0.4491255729759715, "grad_norm": 0.6154712812133787, "learning_rate": 1.2242011354420114e-05, "loss": 0.5842, "step": 15383 }, { "epoch": 0.44915476920381886, "grad_norm": 0.6418667178319839, "learning_rate": 1.2241362530413624e-05, "loss": 0.5983, "step": 15384 }, { "epoch": 0.4491839654316662, "grad_norm": 0.6577993953088356, "learning_rate": 1.224071370640714e-05, "loss": 0.6159, "step": 15385 }, { "epoch": 0.4492131616595136, "grad_norm": 0.6456638101875211, "learning_rate": 1.224006488240065e-05, "loss": 0.6289, "step": 15386 }, { "epoch": 0.44924235788736094, "grad_norm": 0.6513615687371597, "learning_rate": 1.2239416058394162e-05, "loss": 0.6077, "step": 15387 }, { "epoch": 0.4492715541152083, "grad_norm": 0.6626892769828899, "learning_rate": 1.2238767234387674e-05, "loss": 0.6653, "step": 15388 }, { "epoch": 0.44930075034305567, "grad_norm": 0.6169678894503576, "learning_rate": 1.2238118410381186e-05, "loss": 0.5904, "step": 15389 }, { "epoch": 0.449329946570903, "grad_norm": 0.6323371051635437, "learning_rate": 1.2237469586374697e-05, "loss": 0.6258, "step": 15390 }, { "epoch": 0.4493591427987504, "grad_norm": 0.7021774780901834, "learning_rate": 1.2236820762368209e-05, "loss": 0.7011, "step": 15391 }, { "epoch": 0.44938833902659775, "grad_norm": 0.6576167816002219, "learning_rate": 1.223617193836172e-05, "loss": 0.624, "step": 15392 }, { "epoch": 0.4494175352544451, "grad_norm": 0.6428813844561313, "learning_rate": 1.2235523114355233e-05, "loss": 0.5851, "step": 15393 }, { "epoch": 0.44944673148229247, "grad_norm": 0.663867758368961, "learning_rate": 1.2234874290348743e-05, "loss": 0.6155, "step": 15394 }, { "epoch": 0.44947592771013983, "grad_norm": 0.6135606792325352, "learning_rate": 1.2234225466342255e-05, "loss": 0.6181, "step": 15395 }, { "epoch": 0.4495051239379872, "grad_norm": 0.6440314216772577, "learning_rate": 1.2233576642335767e-05, "loss": 0.6422, "step": 15396 }, { "epoch": 0.44953432016583456, "grad_norm": 0.6742231560958385, "learning_rate": 1.2232927818329279e-05, "loss": 0.6814, "step": 15397 }, { "epoch": 0.4495635163936819, "grad_norm": 0.708807139861029, "learning_rate": 1.2232278994322791e-05, "loss": 0.6029, "step": 15398 }, { "epoch": 0.4495927126215293, "grad_norm": 0.6094663740706455, "learning_rate": 1.2231630170316301e-05, "loss": 0.5738, "step": 15399 }, { "epoch": 0.44962190884937664, "grad_norm": 0.6022504603640922, "learning_rate": 1.2230981346309813e-05, "loss": 0.566, "step": 15400 }, { "epoch": 0.449651105077224, "grad_norm": 0.6472343415680379, "learning_rate": 1.2230332522303326e-05, "loss": 0.5971, "step": 15401 }, { "epoch": 0.44968030130507136, "grad_norm": 0.6222864967798533, "learning_rate": 1.2229683698296838e-05, "loss": 0.5853, "step": 15402 }, { "epoch": 0.4497094975329187, "grad_norm": 0.6684585257109116, "learning_rate": 1.2229034874290348e-05, "loss": 0.6609, "step": 15403 }, { "epoch": 0.4497386937607661, "grad_norm": 0.6880425135780465, "learning_rate": 1.2228386050283863e-05, "loss": 0.648, "step": 15404 }, { "epoch": 0.44976788998861345, "grad_norm": 0.6525383953005341, "learning_rate": 1.2227737226277374e-05, "loss": 0.5824, "step": 15405 }, { "epoch": 0.4497970862164608, "grad_norm": 0.6407710009217551, "learning_rate": 1.2227088402270886e-05, "loss": 0.618, "step": 15406 }, { "epoch": 0.4498262824443082, "grad_norm": 0.6450032270629659, "learning_rate": 1.2226439578264398e-05, "loss": 0.6182, "step": 15407 }, { "epoch": 0.4498554786721556, "grad_norm": 0.6918585289012027, "learning_rate": 1.222579075425791e-05, "loss": 0.7015, "step": 15408 }, { "epoch": 0.44988467490000295, "grad_norm": 0.6352781686734016, "learning_rate": 1.222514193025142e-05, "loss": 0.5966, "step": 15409 }, { "epoch": 0.4499138711278503, "grad_norm": 0.6819613829302095, "learning_rate": 1.2224493106244932e-05, "loss": 0.6722, "step": 15410 }, { "epoch": 0.44994306735569767, "grad_norm": 0.6301810463675108, "learning_rate": 1.2223844282238444e-05, "loss": 0.6269, "step": 15411 }, { "epoch": 0.44997226358354503, "grad_norm": 0.6521216272311592, "learning_rate": 1.2223195458231956e-05, "loss": 0.6407, "step": 15412 }, { "epoch": 0.4500014598113924, "grad_norm": 0.5919865608321797, "learning_rate": 1.2222546634225466e-05, "loss": 0.5211, "step": 15413 }, { "epoch": 0.45003065603923975, "grad_norm": 0.6479730792743827, "learning_rate": 1.2221897810218978e-05, "loss": 0.6729, "step": 15414 }, { "epoch": 0.4500598522670871, "grad_norm": 0.645697166052148, "learning_rate": 1.222124898621249e-05, "loss": 0.6851, "step": 15415 }, { "epoch": 0.4500890484949345, "grad_norm": 0.673015280617495, "learning_rate": 1.2220600162206003e-05, "loss": 0.6802, "step": 15416 }, { "epoch": 0.45011824472278184, "grad_norm": 0.6107401442035272, "learning_rate": 1.2219951338199515e-05, "loss": 0.5668, "step": 15417 }, { "epoch": 0.4501474409506292, "grad_norm": 0.6374314202169905, "learning_rate": 1.2219302514193025e-05, "loss": 0.6483, "step": 15418 }, { "epoch": 0.45017663717847656, "grad_norm": 0.6738075235353798, "learning_rate": 1.2218653690186537e-05, "loss": 0.629, "step": 15419 }, { "epoch": 0.4502058334063239, "grad_norm": 0.6446740577677864, "learning_rate": 1.2218004866180049e-05, "loss": 0.5842, "step": 15420 }, { "epoch": 0.4502350296341713, "grad_norm": 0.6547977084765182, "learning_rate": 1.2217356042173561e-05, "loss": 0.6478, "step": 15421 }, { "epoch": 0.45026422586201864, "grad_norm": 0.65231490389233, "learning_rate": 1.2216707218167075e-05, "loss": 0.6392, "step": 15422 }, { "epoch": 0.450293422089866, "grad_norm": 0.7191103483637061, "learning_rate": 1.2216058394160587e-05, "loss": 0.7642, "step": 15423 }, { "epoch": 0.45032261831771336, "grad_norm": 0.6481429956992286, "learning_rate": 1.2215409570154097e-05, "loss": 0.6334, "step": 15424 }, { "epoch": 0.4503518145455607, "grad_norm": 0.6569858777074818, "learning_rate": 1.2214760746147609e-05, "loss": 0.6089, "step": 15425 }, { "epoch": 0.4503810107734081, "grad_norm": 0.6388866939963346, "learning_rate": 1.2214111922141121e-05, "loss": 0.6022, "step": 15426 }, { "epoch": 0.45041020700125545, "grad_norm": 0.7258285401261338, "learning_rate": 1.2213463098134633e-05, "loss": 0.7679, "step": 15427 }, { "epoch": 0.4504394032291028, "grad_norm": 0.6283769614056881, "learning_rate": 1.2212814274128143e-05, "loss": 0.5751, "step": 15428 }, { "epoch": 0.45046859945695017, "grad_norm": 0.6371505348110883, "learning_rate": 1.2212165450121656e-05, "loss": 0.5748, "step": 15429 }, { "epoch": 0.45049779568479753, "grad_norm": 0.6829248289630373, "learning_rate": 1.2211516626115168e-05, "loss": 0.6762, "step": 15430 }, { "epoch": 0.4505269919126449, "grad_norm": 0.6358656225894211, "learning_rate": 1.221086780210868e-05, "loss": 0.6168, "step": 15431 }, { "epoch": 0.45055618814049225, "grad_norm": 0.6209377613866467, "learning_rate": 1.221021897810219e-05, "loss": 0.5674, "step": 15432 }, { "epoch": 0.4505853843683396, "grad_norm": 0.6646496575308055, "learning_rate": 1.2209570154095702e-05, "loss": 0.6371, "step": 15433 }, { "epoch": 0.450614580596187, "grad_norm": 0.7077520718090756, "learning_rate": 1.2208921330089214e-05, "loss": 0.722, "step": 15434 }, { "epoch": 0.45064377682403434, "grad_norm": 0.6252143270734898, "learning_rate": 1.2208272506082726e-05, "loss": 0.5705, "step": 15435 }, { "epoch": 0.4506729730518817, "grad_norm": 0.6166573732922357, "learning_rate": 1.2207623682076238e-05, "loss": 0.5714, "step": 15436 }, { "epoch": 0.45070216927972906, "grad_norm": 0.6345840953064796, "learning_rate": 1.2206974858069748e-05, "loss": 0.5825, "step": 15437 }, { "epoch": 0.4507313655075764, "grad_norm": 0.692448164831067, "learning_rate": 1.220632603406326e-05, "loss": 0.6519, "step": 15438 }, { "epoch": 0.4507605617354238, "grad_norm": 0.6055309101272153, "learning_rate": 1.2205677210056772e-05, "loss": 0.5401, "step": 15439 }, { "epoch": 0.45078975796327114, "grad_norm": 0.5905693107746008, "learning_rate": 1.2205028386050284e-05, "loss": 0.5454, "step": 15440 }, { "epoch": 0.4508189541911185, "grad_norm": 0.7608314056348257, "learning_rate": 1.2204379562043798e-05, "loss": 0.5583, "step": 15441 }, { "epoch": 0.45084815041896587, "grad_norm": 0.6728889619860078, "learning_rate": 1.220373073803731e-05, "loss": 0.6665, "step": 15442 }, { "epoch": 0.4508773466468132, "grad_norm": 0.611647132051735, "learning_rate": 1.220308191403082e-05, "loss": 0.559, "step": 15443 }, { "epoch": 0.4509065428746606, "grad_norm": 0.6326968367230363, "learning_rate": 1.2202433090024333e-05, "loss": 0.5755, "step": 15444 }, { "epoch": 0.45093573910250795, "grad_norm": 0.6137479100672689, "learning_rate": 1.2201784266017845e-05, "loss": 0.5724, "step": 15445 }, { "epoch": 0.4509649353303553, "grad_norm": 0.6375858523318798, "learning_rate": 1.2201135442011357e-05, "loss": 0.6068, "step": 15446 }, { "epoch": 0.45099413155820267, "grad_norm": 0.6629407499113096, "learning_rate": 1.2200486618004867e-05, "loss": 0.582, "step": 15447 }, { "epoch": 0.45102332778605003, "grad_norm": 0.6688169643488221, "learning_rate": 1.2199837793998379e-05, "loss": 0.6609, "step": 15448 }, { "epoch": 0.4510525240138974, "grad_norm": 0.6493825457184562, "learning_rate": 1.2199188969991891e-05, "loss": 0.6342, "step": 15449 }, { "epoch": 0.45108172024174475, "grad_norm": 0.6394013486979079, "learning_rate": 1.2198540145985403e-05, "loss": 0.6322, "step": 15450 }, { "epoch": 0.4511109164695921, "grad_norm": 0.6279063235493587, "learning_rate": 1.2197891321978913e-05, "loss": 0.5543, "step": 15451 }, { "epoch": 0.4511401126974395, "grad_norm": 0.6237070865723795, "learning_rate": 1.2197242497972425e-05, "loss": 0.624, "step": 15452 }, { "epoch": 0.45116930892528684, "grad_norm": 0.7172130452323632, "learning_rate": 1.2196593673965937e-05, "loss": 0.682, "step": 15453 }, { "epoch": 0.4511985051531342, "grad_norm": 0.5994222635048858, "learning_rate": 1.219594484995945e-05, "loss": 0.5292, "step": 15454 }, { "epoch": 0.45122770138098156, "grad_norm": 0.6615989485391635, "learning_rate": 1.2195296025952961e-05, "loss": 0.6196, "step": 15455 }, { "epoch": 0.4512568976088289, "grad_norm": 0.6326456432324904, "learning_rate": 1.2194647201946472e-05, "loss": 0.6233, "step": 15456 }, { "epoch": 0.4512860938366763, "grad_norm": 0.6600129203044336, "learning_rate": 1.2193998377939984e-05, "loss": 0.6696, "step": 15457 }, { "epoch": 0.45131529006452364, "grad_norm": 0.6476437578304968, "learning_rate": 1.2193349553933496e-05, "loss": 0.6374, "step": 15458 }, { "epoch": 0.451344486292371, "grad_norm": 0.7749574047038181, "learning_rate": 1.2192700729927008e-05, "loss": 0.7091, "step": 15459 }, { "epoch": 0.45137368252021837, "grad_norm": 0.6128491152982065, "learning_rate": 1.2192051905920522e-05, "loss": 0.5804, "step": 15460 }, { "epoch": 0.45140287874806573, "grad_norm": 0.6533173585237501, "learning_rate": 1.2191403081914034e-05, "loss": 0.6251, "step": 15461 }, { "epoch": 0.4514320749759131, "grad_norm": 0.6417392064199553, "learning_rate": 1.2190754257907544e-05, "loss": 0.5602, "step": 15462 }, { "epoch": 0.45146127120376045, "grad_norm": 0.5669861503454855, "learning_rate": 1.2190105433901056e-05, "loss": 0.4988, "step": 15463 }, { "epoch": 0.4514904674316078, "grad_norm": 0.7049104094980287, "learning_rate": 1.2189456609894568e-05, "loss": 0.7428, "step": 15464 }, { "epoch": 0.4515196636594552, "grad_norm": 0.6363501260938494, "learning_rate": 1.218880778588808e-05, "loss": 0.6042, "step": 15465 }, { "epoch": 0.45154885988730253, "grad_norm": 0.6750342128631408, "learning_rate": 1.218815896188159e-05, "loss": 0.5954, "step": 15466 }, { "epoch": 0.4515780561151499, "grad_norm": 0.6557340624214613, "learning_rate": 1.2187510137875102e-05, "loss": 0.6152, "step": 15467 }, { "epoch": 0.4516072523429973, "grad_norm": 0.6282429112547762, "learning_rate": 1.2186861313868614e-05, "loss": 0.537, "step": 15468 }, { "epoch": 0.4516364485708447, "grad_norm": 0.6672423280406513, "learning_rate": 1.2186212489862126e-05, "loss": 0.6382, "step": 15469 }, { "epoch": 0.45166564479869203, "grad_norm": 0.5995681454439468, "learning_rate": 1.2185563665855637e-05, "loss": 0.5184, "step": 15470 }, { "epoch": 0.4516948410265394, "grad_norm": 0.6379602733217217, "learning_rate": 1.2184914841849149e-05, "loss": 0.6256, "step": 15471 }, { "epoch": 0.45172403725438676, "grad_norm": 0.564861795126965, "learning_rate": 1.218426601784266e-05, "loss": 0.4881, "step": 15472 }, { "epoch": 0.4517532334822341, "grad_norm": 0.6665365406219694, "learning_rate": 1.2183617193836173e-05, "loss": 0.5865, "step": 15473 }, { "epoch": 0.4517824297100815, "grad_norm": 0.6264864870352068, "learning_rate": 1.2182968369829685e-05, "loss": 0.587, "step": 15474 }, { "epoch": 0.45181162593792884, "grad_norm": 0.7082241087657285, "learning_rate": 1.2182319545823195e-05, "loss": 0.7035, "step": 15475 }, { "epoch": 0.4518408221657762, "grad_norm": 0.632151443439631, "learning_rate": 1.2181670721816707e-05, "loss": 0.6411, "step": 15476 }, { "epoch": 0.45187001839362356, "grad_norm": 0.6753931883598471, "learning_rate": 1.218102189781022e-05, "loss": 0.7281, "step": 15477 }, { "epoch": 0.4518992146214709, "grad_norm": 0.6749496227755488, "learning_rate": 1.2180373073803731e-05, "loss": 0.6759, "step": 15478 }, { "epoch": 0.4519284108493183, "grad_norm": 0.6013623665832912, "learning_rate": 1.2179724249797245e-05, "loss": 0.5098, "step": 15479 }, { "epoch": 0.45195760707716565, "grad_norm": 0.6412731053601934, "learning_rate": 1.2179075425790757e-05, "loss": 0.6594, "step": 15480 }, { "epoch": 0.451986803305013, "grad_norm": 0.5837720961937181, "learning_rate": 1.2178426601784267e-05, "loss": 0.5491, "step": 15481 }, { "epoch": 0.45201599953286037, "grad_norm": 0.6555784710683715, "learning_rate": 1.217777777777778e-05, "loss": 0.6419, "step": 15482 }, { "epoch": 0.45204519576070773, "grad_norm": 0.6677734081641341, "learning_rate": 1.2177128953771291e-05, "loss": 0.6686, "step": 15483 }, { "epoch": 0.4520743919885551, "grad_norm": 0.6170637324137107, "learning_rate": 1.2176480129764803e-05, "loss": 0.5998, "step": 15484 }, { "epoch": 0.45210358821640245, "grad_norm": 0.6650588021292916, "learning_rate": 1.2175831305758314e-05, "loss": 0.6308, "step": 15485 }, { "epoch": 0.4521327844442498, "grad_norm": 0.5797221543319574, "learning_rate": 1.2175182481751826e-05, "loss": 0.5291, "step": 15486 }, { "epoch": 0.4521619806720972, "grad_norm": 0.6111232579542231, "learning_rate": 1.2174533657745338e-05, "loss": 0.5771, "step": 15487 }, { "epoch": 0.45219117689994454, "grad_norm": 0.640673465221953, "learning_rate": 1.217388483373885e-05, "loss": 0.6026, "step": 15488 }, { "epoch": 0.4522203731277919, "grad_norm": 0.6586766423642981, "learning_rate": 1.217323600973236e-05, "loss": 0.634, "step": 15489 }, { "epoch": 0.45224956935563926, "grad_norm": 0.6059789384421069, "learning_rate": 1.2172587185725872e-05, "loss": 0.5636, "step": 15490 }, { "epoch": 0.4522787655834866, "grad_norm": 0.6196985654971664, "learning_rate": 1.2171938361719384e-05, "loss": 0.533, "step": 15491 }, { "epoch": 0.452307961811334, "grad_norm": 0.6690825529861721, "learning_rate": 1.2171289537712896e-05, "loss": 0.5981, "step": 15492 }, { "epoch": 0.45233715803918134, "grad_norm": 0.6475246625050062, "learning_rate": 1.2170640713706408e-05, "loss": 0.6402, "step": 15493 }, { "epoch": 0.4523663542670287, "grad_norm": 0.6309822217535382, "learning_rate": 1.2169991889699919e-05, "loss": 0.6037, "step": 15494 }, { "epoch": 0.45239555049487606, "grad_norm": 0.6559245932047356, "learning_rate": 1.216934306569343e-05, "loss": 0.6032, "step": 15495 }, { "epoch": 0.4524247467227234, "grad_norm": 0.620644434875876, "learning_rate": 1.2168694241686943e-05, "loss": 0.5852, "step": 15496 }, { "epoch": 0.4524539429505708, "grad_norm": 0.635732374228913, "learning_rate": 1.2168045417680455e-05, "loss": 0.6312, "step": 15497 }, { "epoch": 0.45248313917841815, "grad_norm": 0.5988556595983531, "learning_rate": 1.2167396593673968e-05, "loss": 0.55, "step": 15498 }, { "epoch": 0.4525123354062655, "grad_norm": 0.6905048973005816, "learning_rate": 1.2166747769667479e-05, "loss": 0.6713, "step": 15499 }, { "epoch": 0.45254153163411287, "grad_norm": 0.6681944769995093, "learning_rate": 1.216609894566099e-05, "loss": 0.6569, "step": 15500 }, { "epoch": 0.45257072786196023, "grad_norm": 0.7303496890592359, "learning_rate": 1.2165450121654503e-05, "loss": 0.6683, "step": 15501 }, { "epoch": 0.4525999240898076, "grad_norm": 0.6431622401913164, "learning_rate": 1.2164801297648015e-05, "loss": 0.6517, "step": 15502 }, { "epoch": 0.45262912031765495, "grad_norm": 0.7394131862397711, "learning_rate": 1.2164152473641527e-05, "loss": 0.7436, "step": 15503 }, { "epoch": 0.4526583165455023, "grad_norm": 0.6655578583287205, "learning_rate": 1.2163503649635037e-05, "loss": 0.7298, "step": 15504 }, { "epoch": 0.4526875127733497, "grad_norm": 0.6423890988970877, "learning_rate": 1.216285482562855e-05, "loss": 0.5885, "step": 15505 }, { "epoch": 0.45271670900119704, "grad_norm": 0.6532299826256134, "learning_rate": 1.2162206001622061e-05, "loss": 0.6482, "step": 15506 }, { "epoch": 0.4527459052290444, "grad_norm": 0.6370390626512744, "learning_rate": 1.2161557177615573e-05, "loss": 0.6008, "step": 15507 }, { "epoch": 0.45277510145689176, "grad_norm": 0.6042418028498053, "learning_rate": 1.2160908353609084e-05, "loss": 0.5294, "step": 15508 }, { "epoch": 0.4528042976847391, "grad_norm": 0.6943513953111872, "learning_rate": 1.2160259529602596e-05, "loss": 0.7063, "step": 15509 }, { "epoch": 0.4528334939125865, "grad_norm": 0.6836214878435097, "learning_rate": 1.2159610705596108e-05, "loss": 0.671, "step": 15510 }, { "epoch": 0.45286269014043384, "grad_norm": 0.6241283064411421, "learning_rate": 1.215896188158962e-05, "loss": 0.6083, "step": 15511 }, { "epoch": 0.4528918863682812, "grad_norm": 0.6677127564441553, "learning_rate": 1.2158313057583132e-05, "loss": 0.6456, "step": 15512 }, { "epoch": 0.45292108259612857, "grad_norm": 0.6565716562526718, "learning_rate": 1.2157664233576642e-05, "loss": 0.6336, "step": 15513 }, { "epoch": 0.4529502788239759, "grad_norm": 0.59230024298874, "learning_rate": 1.2157015409570154e-05, "loss": 0.5326, "step": 15514 }, { "epoch": 0.4529794750518233, "grad_norm": 0.6373263833796614, "learning_rate": 1.2156366585563666e-05, "loss": 0.5605, "step": 15515 }, { "epoch": 0.45300867127967065, "grad_norm": 0.6448633001535224, "learning_rate": 1.2155717761557178e-05, "loss": 0.651, "step": 15516 }, { "epoch": 0.453037867507518, "grad_norm": 0.6914384682712138, "learning_rate": 1.2155068937550692e-05, "loss": 0.6227, "step": 15517 }, { "epoch": 0.45306706373536537, "grad_norm": 0.632801832661551, "learning_rate": 1.2154420113544202e-05, "loss": 0.6081, "step": 15518 }, { "epoch": 0.45309625996321273, "grad_norm": 0.627165078413902, "learning_rate": 1.2153771289537714e-05, "loss": 0.6282, "step": 15519 }, { "epoch": 0.4531254561910601, "grad_norm": 0.6705369281942056, "learning_rate": 1.2153122465531226e-05, "loss": 0.6258, "step": 15520 }, { "epoch": 0.45315465241890746, "grad_norm": 0.632209911966129, "learning_rate": 1.2152473641524738e-05, "loss": 0.6179, "step": 15521 }, { "epoch": 0.4531838486467548, "grad_norm": 0.6440134351765775, "learning_rate": 1.215182481751825e-05, "loss": 0.583, "step": 15522 }, { "epoch": 0.4532130448746022, "grad_norm": 0.645701054141453, "learning_rate": 1.215117599351176e-05, "loss": 0.6095, "step": 15523 }, { "epoch": 0.45324224110244954, "grad_norm": 0.653922434393878, "learning_rate": 1.2150527169505273e-05, "loss": 0.6003, "step": 15524 }, { "epoch": 0.4532714373302969, "grad_norm": 0.6995554297343787, "learning_rate": 1.2149878345498785e-05, "loss": 0.6905, "step": 15525 }, { "epoch": 0.45330063355814426, "grad_norm": 0.6152347660282403, "learning_rate": 1.2149229521492297e-05, "loss": 0.5747, "step": 15526 }, { "epoch": 0.4533298297859916, "grad_norm": 0.6336451469497555, "learning_rate": 1.2148580697485807e-05, "loss": 0.6023, "step": 15527 }, { "epoch": 0.45335902601383904, "grad_norm": 0.6729661698062919, "learning_rate": 1.2147931873479319e-05, "loss": 0.6285, "step": 15528 }, { "epoch": 0.4533882222416864, "grad_norm": 0.6425890704895443, "learning_rate": 1.2147283049472831e-05, "loss": 0.6518, "step": 15529 }, { "epoch": 0.45341741846953376, "grad_norm": 0.6624743078395612, "learning_rate": 1.2146634225466343e-05, "loss": 0.6009, "step": 15530 }, { "epoch": 0.4534466146973811, "grad_norm": 0.656980373000106, "learning_rate": 1.2145985401459855e-05, "loss": 0.6119, "step": 15531 }, { "epoch": 0.4534758109252285, "grad_norm": 0.7037116404325808, "learning_rate": 1.2145336577453366e-05, "loss": 0.6072, "step": 15532 }, { "epoch": 0.45350500715307585, "grad_norm": 0.6988459657138404, "learning_rate": 1.2144687753446878e-05, "loss": 0.7133, "step": 15533 }, { "epoch": 0.4535342033809232, "grad_norm": 0.6432381641796265, "learning_rate": 1.214403892944039e-05, "loss": 0.6292, "step": 15534 }, { "epoch": 0.45356339960877057, "grad_norm": 0.6435010521892895, "learning_rate": 1.2143390105433902e-05, "loss": 0.5827, "step": 15535 }, { "epoch": 0.45359259583661793, "grad_norm": 0.5988517543188544, "learning_rate": 1.2142741281427415e-05, "loss": 0.5511, "step": 15536 }, { "epoch": 0.4536217920644653, "grad_norm": 0.6002144400770394, "learning_rate": 1.2142092457420926e-05, "loss": 0.5413, "step": 15537 }, { "epoch": 0.45365098829231265, "grad_norm": 0.7354492122989138, "learning_rate": 1.2141443633414438e-05, "loss": 0.728, "step": 15538 }, { "epoch": 0.45368018452016, "grad_norm": 0.6819784491133205, "learning_rate": 1.214079480940795e-05, "loss": 0.7117, "step": 15539 }, { "epoch": 0.4537093807480074, "grad_norm": 0.7036239841577829, "learning_rate": 1.2140145985401462e-05, "loss": 0.6845, "step": 15540 }, { "epoch": 0.45373857697585473, "grad_norm": 0.6472072023637024, "learning_rate": 1.2139497161394974e-05, "loss": 0.5797, "step": 15541 }, { "epoch": 0.4537677732037021, "grad_norm": 0.6634820673899052, "learning_rate": 1.2138848337388484e-05, "loss": 0.6584, "step": 15542 }, { "epoch": 0.45379696943154946, "grad_norm": 0.6618158758915152, "learning_rate": 1.2138199513381996e-05, "loss": 0.5967, "step": 15543 }, { "epoch": 0.4538261656593968, "grad_norm": 0.6403906009790845, "learning_rate": 1.2137550689375508e-05, "loss": 0.5958, "step": 15544 }, { "epoch": 0.4538553618872442, "grad_norm": 0.622552993466292, "learning_rate": 1.213690186536902e-05, "loss": 0.6111, "step": 15545 }, { "epoch": 0.45388455811509154, "grad_norm": 0.602454588421309, "learning_rate": 1.213625304136253e-05, "loss": 0.531, "step": 15546 }, { "epoch": 0.4539137543429389, "grad_norm": 0.6780485814986488, "learning_rate": 1.2135604217356043e-05, "loss": 0.6621, "step": 15547 }, { "epoch": 0.45394295057078626, "grad_norm": 0.6648639842436238, "learning_rate": 1.2134955393349555e-05, "loss": 0.6593, "step": 15548 }, { "epoch": 0.4539721467986336, "grad_norm": 0.6659281900840237, "learning_rate": 1.2134306569343067e-05, "loss": 0.6017, "step": 15549 }, { "epoch": 0.454001343026481, "grad_norm": 0.6498946706634041, "learning_rate": 1.2133657745336579e-05, "loss": 0.6302, "step": 15550 }, { "epoch": 0.45403053925432835, "grad_norm": 0.6383916295901609, "learning_rate": 1.2133008921330089e-05, "loss": 0.6225, "step": 15551 }, { "epoch": 0.4540597354821757, "grad_norm": 0.6863336769770328, "learning_rate": 1.2132360097323601e-05, "loss": 0.6334, "step": 15552 }, { "epoch": 0.45408893171002307, "grad_norm": 0.6037094248701497, "learning_rate": 1.2131711273317113e-05, "loss": 0.5469, "step": 15553 }, { "epoch": 0.45411812793787043, "grad_norm": 0.6911220268439803, "learning_rate": 1.2131062449310625e-05, "loss": 0.6589, "step": 15554 }, { "epoch": 0.4541473241657178, "grad_norm": 0.6818967493833679, "learning_rate": 1.2130413625304139e-05, "loss": 0.6335, "step": 15555 }, { "epoch": 0.45417652039356515, "grad_norm": 0.6402958354634143, "learning_rate": 1.2129764801297649e-05, "loss": 0.6162, "step": 15556 }, { "epoch": 0.4542057166214125, "grad_norm": 0.6908111317730525, "learning_rate": 1.2129115977291161e-05, "loss": 0.6732, "step": 15557 }, { "epoch": 0.4542349128492599, "grad_norm": 0.6923282368884335, "learning_rate": 1.2128467153284673e-05, "loss": 0.6944, "step": 15558 }, { "epoch": 0.45426410907710724, "grad_norm": 0.6515240860654703, "learning_rate": 1.2127818329278185e-05, "loss": 0.6319, "step": 15559 }, { "epoch": 0.4542933053049546, "grad_norm": 0.6232195538536781, "learning_rate": 1.2127169505271697e-05, "loss": 0.5678, "step": 15560 }, { "epoch": 0.45432250153280196, "grad_norm": 0.6109395861839287, "learning_rate": 1.2126520681265208e-05, "loss": 0.5137, "step": 15561 }, { "epoch": 0.4543516977606493, "grad_norm": 0.661671048754951, "learning_rate": 1.212587185725872e-05, "loss": 0.704, "step": 15562 }, { "epoch": 0.4543808939884967, "grad_norm": 0.627845026456698, "learning_rate": 1.2125223033252232e-05, "loss": 0.577, "step": 15563 }, { "epoch": 0.45441009021634404, "grad_norm": 0.6995444994662411, "learning_rate": 1.2124574209245744e-05, "loss": 0.6769, "step": 15564 }, { "epoch": 0.4544392864441914, "grad_norm": 0.6458195748487674, "learning_rate": 1.2123925385239254e-05, "loss": 0.5777, "step": 15565 }, { "epoch": 0.45446848267203876, "grad_norm": 0.6798737032861037, "learning_rate": 1.2123276561232766e-05, "loss": 0.6768, "step": 15566 }, { "epoch": 0.4544976788998861, "grad_norm": 0.7138578984318107, "learning_rate": 1.2122627737226278e-05, "loss": 0.6968, "step": 15567 }, { "epoch": 0.4545268751277335, "grad_norm": 0.6503622624958398, "learning_rate": 1.212197891321979e-05, "loss": 0.5839, "step": 15568 }, { "epoch": 0.45455607135558085, "grad_norm": 0.6573029594750491, "learning_rate": 1.2121330089213302e-05, "loss": 0.6462, "step": 15569 }, { "epoch": 0.4545852675834282, "grad_norm": 0.6054083026392909, "learning_rate": 1.2120681265206812e-05, "loss": 0.5881, "step": 15570 }, { "epoch": 0.45461446381127557, "grad_norm": 0.6462686598333923, "learning_rate": 1.2120032441200324e-05, "loss": 0.6112, "step": 15571 }, { "epoch": 0.45464366003912293, "grad_norm": 0.5965569379928375, "learning_rate": 1.2119383617193836e-05, "loss": 0.5336, "step": 15572 }, { "epoch": 0.4546728562669703, "grad_norm": 0.6314719717194585, "learning_rate": 1.211873479318735e-05, "loss": 0.6212, "step": 15573 }, { "epoch": 0.45470205249481765, "grad_norm": 0.618596074370096, "learning_rate": 1.2118085969180862e-05, "loss": 0.5588, "step": 15574 }, { "epoch": 0.454731248722665, "grad_norm": 0.6690469708822742, "learning_rate": 1.2117437145174373e-05, "loss": 0.6784, "step": 15575 }, { "epoch": 0.4547604449505124, "grad_norm": 0.6037786049789552, "learning_rate": 1.2116788321167885e-05, "loss": 0.5347, "step": 15576 }, { "epoch": 0.45478964117835974, "grad_norm": 0.634452863514124, "learning_rate": 1.2116139497161397e-05, "loss": 0.5936, "step": 15577 }, { "epoch": 0.4548188374062071, "grad_norm": 0.6808604082261202, "learning_rate": 1.2115490673154909e-05, "loss": 0.6654, "step": 15578 }, { "epoch": 0.45484803363405446, "grad_norm": 0.9107178423389187, "learning_rate": 1.211484184914842e-05, "loss": 0.8079, "step": 15579 }, { "epoch": 0.4548772298619018, "grad_norm": 0.6058921479998357, "learning_rate": 1.2114193025141931e-05, "loss": 0.5355, "step": 15580 }, { "epoch": 0.4549064260897492, "grad_norm": 0.6979816206813474, "learning_rate": 1.2113544201135443e-05, "loss": 0.7168, "step": 15581 }, { "epoch": 0.45493562231759654, "grad_norm": 0.6189936001769281, "learning_rate": 1.2112895377128955e-05, "loss": 0.5818, "step": 15582 }, { "epoch": 0.4549648185454439, "grad_norm": 0.6618203958881655, "learning_rate": 1.2112246553122467e-05, "loss": 0.6345, "step": 15583 }, { "epoch": 0.45499401477329127, "grad_norm": 0.63913118305715, "learning_rate": 1.2111597729115977e-05, "loss": 0.6451, "step": 15584 }, { "epoch": 0.4550232110011386, "grad_norm": 0.6182300438894608, "learning_rate": 1.211094890510949e-05, "loss": 0.5943, "step": 15585 }, { "epoch": 0.455052407228986, "grad_norm": 0.689198428716279, "learning_rate": 1.2110300081103001e-05, "loss": 0.7645, "step": 15586 }, { "epoch": 0.45508160345683335, "grad_norm": 0.632582419307044, "learning_rate": 1.2109651257096513e-05, "loss": 0.5659, "step": 15587 }, { "epoch": 0.45511079968468077, "grad_norm": 0.6462658553395207, "learning_rate": 1.2109002433090025e-05, "loss": 0.5895, "step": 15588 }, { "epoch": 0.45513999591252813, "grad_norm": 0.6722366039500817, "learning_rate": 1.2108353609083536e-05, "loss": 0.6059, "step": 15589 }, { "epoch": 0.4551691921403755, "grad_norm": 0.6871890624773905, "learning_rate": 1.2107704785077048e-05, "loss": 0.6362, "step": 15590 }, { "epoch": 0.45519838836822285, "grad_norm": 0.6033355528121511, "learning_rate": 1.210705596107056e-05, "loss": 0.5984, "step": 15591 }, { "epoch": 0.4552275845960702, "grad_norm": 0.6424135676568266, "learning_rate": 1.2106407137064074e-05, "loss": 0.6188, "step": 15592 }, { "epoch": 0.4552567808239176, "grad_norm": 0.692688449367377, "learning_rate": 1.2105758313057586e-05, "loss": 0.6654, "step": 15593 }, { "epoch": 0.45528597705176493, "grad_norm": 0.5973499800319709, "learning_rate": 1.2105109489051096e-05, "loss": 0.5296, "step": 15594 }, { "epoch": 0.4553151732796123, "grad_norm": 0.6831896036164874, "learning_rate": 1.2104460665044608e-05, "loss": 0.6633, "step": 15595 }, { "epoch": 0.45534436950745966, "grad_norm": 0.5899103743558923, "learning_rate": 1.210381184103812e-05, "loss": 0.5316, "step": 15596 }, { "epoch": 0.455373565735307, "grad_norm": 0.6563300346039748, "learning_rate": 1.2103163017031632e-05, "loss": 0.6679, "step": 15597 }, { "epoch": 0.4554027619631544, "grad_norm": 0.6322660616352878, "learning_rate": 1.2102514193025144e-05, "loss": 0.604, "step": 15598 }, { "epoch": 0.45543195819100174, "grad_norm": 0.6432249789898647, "learning_rate": 1.2101865369018654e-05, "loss": 0.5962, "step": 15599 }, { "epoch": 0.4554611544188491, "grad_norm": 0.6765491724337851, "learning_rate": 1.2101216545012166e-05, "loss": 0.6931, "step": 15600 }, { "epoch": 0.45549035064669646, "grad_norm": 0.6691272107967737, "learning_rate": 1.2100567721005678e-05, "loss": 0.6428, "step": 15601 }, { "epoch": 0.4555195468745438, "grad_norm": 0.6164903560118822, "learning_rate": 1.209991889699919e-05, "loss": 0.5941, "step": 15602 }, { "epoch": 0.4555487431023912, "grad_norm": 0.6235555686941503, "learning_rate": 1.20992700729927e-05, "loss": 0.6004, "step": 15603 }, { "epoch": 0.45557793933023855, "grad_norm": 0.6990542959908166, "learning_rate": 1.2098621248986213e-05, "loss": 0.6379, "step": 15604 }, { "epoch": 0.4556071355580859, "grad_norm": 0.6700106933909665, "learning_rate": 1.2097972424979725e-05, "loss": 0.6151, "step": 15605 }, { "epoch": 0.45563633178593327, "grad_norm": 0.6740677061049278, "learning_rate": 1.2097323600973237e-05, "loss": 0.6574, "step": 15606 }, { "epoch": 0.45566552801378063, "grad_norm": 0.6617011629920381, "learning_rate": 1.2096674776966749e-05, "loss": 0.6741, "step": 15607 }, { "epoch": 0.455694724241628, "grad_norm": 0.7289829436995363, "learning_rate": 1.209602595296026e-05, "loss": 0.673, "step": 15608 }, { "epoch": 0.45572392046947535, "grad_norm": 0.6710353982974046, "learning_rate": 1.2095377128953771e-05, "loss": 0.613, "step": 15609 }, { "epoch": 0.4557531166973227, "grad_norm": 0.5965495227391989, "learning_rate": 1.2094728304947283e-05, "loss": 0.5574, "step": 15610 }, { "epoch": 0.4557823129251701, "grad_norm": 0.6741819632975937, "learning_rate": 1.2094079480940797e-05, "loss": 0.6473, "step": 15611 }, { "epoch": 0.45581150915301744, "grad_norm": 0.6793423037648805, "learning_rate": 1.2093430656934309e-05, "loss": 0.6126, "step": 15612 }, { "epoch": 0.4558407053808648, "grad_norm": 0.6635355657710896, "learning_rate": 1.209278183292782e-05, "loss": 0.6418, "step": 15613 }, { "epoch": 0.45586990160871216, "grad_norm": 0.6419279129283939, "learning_rate": 1.2092133008921331e-05, "loss": 0.5821, "step": 15614 }, { "epoch": 0.4558990978365595, "grad_norm": 0.6815714732350527, "learning_rate": 1.2091484184914843e-05, "loss": 0.7002, "step": 15615 }, { "epoch": 0.4559282940644069, "grad_norm": 0.6623909300409289, "learning_rate": 1.2090835360908355e-05, "loss": 0.624, "step": 15616 }, { "epoch": 0.45595749029225424, "grad_norm": 0.6575939577832416, "learning_rate": 1.2090186536901867e-05, "loss": 0.6653, "step": 15617 }, { "epoch": 0.4559866865201016, "grad_norm": 0.6420845241420264, "learning_rate": 1.2089537712895378e-05, "loss": 0.6033, "step": 15618 }, { "epoch": 0.45601588274794896, "grad_norm": 0.7021923172157494, "learning_rate": 1.208888888888889e-05, "loss": 0.7179, "step": 15619 }, { "epoch": 0.4560450789757963, "grad_norm": 0.6441813233860909, "learning_rate": 1.2088240064882402e-05, "loss": 0.6203, "step": 15620 }, { "epoch": 0.4560742752036437, "grad_norm": 0.6222353978364237, "learning_rate": 1.2087591240875914e-05, "loss": 0.5552, "step": 15621 }, { "epoch": 0.45610347143149105, "grad_norm": 0.6654497493849374, "learning_rate": 1.2086942416869424e-05, "loss": 0.5709, "step": 15622 }, { "epoch": 0.4561326676593384, "grad_norm": 0.6412034321592254, "learning_rate": 1.2086293592862936e-05, "loss": 0.6235, "step": 15623 }, { "epoch": 0.45616186388718577, "grad_norm": 0.6851144445681729, "learning_rate": 1.2085644768856448e-05, "loss": 0.6429, "step": 15624 }, { "epoch": 0.45619106011503313, "grad_norm": 0.6564707928364368, "learning_rate": 1.208499594484996e-05, "loss": 0.6414, "step": 15625 }, { "epoch": 0.4562202563428805, "grad_norm": 0.6001840405616361, "learning_rate": 1.2084347120843472e-05, "loss": 0.546, "step": 15626 }, { "epoch": 0.45624945257072785, "grad_norm": 0.6217206450409317, "learning_rate": 1.2083698296836983e-05, "loss": 0.6338, "step": 15627 }, { "epoch": 0.4562786487985752, "grad_norm": 0.6512566754260979, "learning_rate": 1.2083049472830495e-05, "loss": 0.6616, "step": 15628 }, { "epoch": 0.4563078450264226, "grad_norm": 0.6668068536739158, "learning_rate": 1.2082400648824007e-05, "loss": 0.6233, "step": 15629 }, { "epoch": 0.45633704125426994, "grad_norm": 0.6656128738304599, "learning_rate": 1.208175182481752e-05, "loss": 0.6159, "step": 15630 }, { "epoch": 0.4563662374821173, "grad_norm": 0.6565239144149838, "learning_rate": 1.2081103000811032e-05, "loss": 0.6676, "step": 15631 }, { "epoch": 0.45639543370996466, "grad_norm": 0.6200019919334407, "learning_rate": 1.2080454176804543e-05, "loss": 0.5901, "step": 15632 }, { "epoch": 0.456424629937812, "grad_norm": 0.6432609217950003, "learning_rate": 1.2079805352798055e-05, "loss": 0.5775, "step": 15633 }, { "epoch": 0.4564538261656594, "grad_norm": 0.5992401277741182, "learning_rate": 1.2079156528791567e-05, "loss": 0.5458, "step": 15634 }, { "epoch": 0.45648302239350674, "grad_norm": 0.7141738546652147, "learning_rate": 1.2078507704785079e-05, "loss": 0.7297, "step": 15635 }, { "epoch": 0.4565122186213541, "grad_norm": 0.659020832430743, "learning_rate": 1.2077858880778591e-05, "loss": 0.6414, "step": 15636 }, { "epoch": 0.45654141484920147, "grad_norm": 0.6331586595847188, "learning_rate": 1.2077210056772101e-05, "loss": 0.6109, "step": 15637 }, { "epoch": 0.4565706110770488, "grad_norm": 0.616661975435671, "learning_rate": 1.2076561232765613e-05, "loss": 0.566, "step": 15638 }, { "epoch": 0.4565998073048962, "grad_norm": 0.5939265882797915, "learning_rate": 1.2075912408759125e-05, "loss": 0.5458, "step": 15639 }, { "epoch": 0.45662900353274355, "grad_norm": 0.6908618787593377, "learning_rate": 1.2075263584752637e-05, "loss": 0.6972, "step": 15640 }, { "epoch": 0.4566581997605909, "grad_norm": 0.6303544578401492, "learning_rate": 1.2074614760746148e-05, "loss": 0.6009, "step": 15641 }, { "epoch": 0.45668739598843827, "grad_norm": 0.6122816255150729, "learning_rate": 1.207396593673966e-05, "loss": 0.599, "step": 15642 }, { "epoch": 0.45671659221628563, "grad_norm": 0.624260368216855, "learning_rate": 1.2073317112733172e-05, "loss": 0.6003, "step": 15643 }, { "epoch": 0.456745788444133, "grad_norm": 0.739344505246737, "learning_rate": 1.2072668288726684e-05, "loss": 0.7176, "step": 15644 }, { "epoch": 0.45677498467198036, "grad_norm": 0.6184923043221159, "learning_rate": 1.2072019464720194e-05, "loss": 0.5536, "step": 15645 }, { "epoch": 0.4568041808998277, "grad_norm": 0.6399015268931945, "learning_rate": 1.2071370640713706e-05, "loss": 0.5757, "step": 15646 }, { "epoch": 0.4568333771276751, "grad_norm": 0.6424076089600804, "learning_rate": 1.2070721816707218e-05, "loss": 0.6179, "step": 15647 }, { "epoch": 0.45686257335552244, "grad_norm": 0.5914510028594098, "learning_rate": 1.207007299270073e-05, "loss": 0.5312, "step": 15648 }, { "epoch": 0.45689176958336986, "grad_norm": 0.6777111026097654, "learning_rate": 1.2069424168694244e-05, "loss": 0.6109, "step": 15649 }, { "epoch": 0.4569209658112172, "grad_norm": 0.6366213646478948, "learning_rate": 1.2068775344687756e-05, "loss": 0.5814, "step": 15650 }, { "epoch": 0.4569501620390646, "grad_norm": 0.6477887717493107, "learning_rate": 1.2068126520681266e-05, "loss": 0.6249, "step": 15651 }, { "epoch": 0.45697935826691194, "grad_norm": 0.6150861271272094, "learning_rate": 1.2067477696674778e-05, "loss": 0.5703, "step": 15652 }, { "epoch": 0.4570085544947593, "grad_norm": 0.6689589001471236, "learning_rate": 1.206682887266829e-05, "loss": 0.5336, "step": 15653 }, { "epoch": 0.45703775072260666, "grad_norm": 0.6488547869334614, "learning_rate": 1.2066180048661802e-05, "loss": 0.6313, "step": 15654 }, { "epoch": 0.457066946950454, "grad_norm": 0.7262756733529434, "learning_rate": 1.2065531224655314e-05, "loss": 0.6728, "step": 15655 }, { "epoch": 0.4570961431783014, "grad_norm": 0.7117732248446812, "learning_rate": 1.2064882400648825e-05, "loss": 0.7813, "step": 15656 }, { "epoch": 0.45712533940614875, "grad_norm": 0.6751773069457718, "learning_rate": 1.2064233576642337e-05, "loss": 0.622, "step": 15657 }, { "epoch": 0.4571545356339961, "grad_norm": 0.6063610111460557, "learning_rate": 1.2063584752635849e-05, "loss": 0.5522, "step": 15658 }, { "epoch": 0.45718373186184347, "grad_norm": 0.6569765229441693, "learning_rate": 1.206293592862936e-05, "loss": 0.593, "step": 15659 }, { "epoch": 0.45721292808969083, "grad_norm": 0.6414914561371653, "learning_rate": 1.2062287104622871e-05, "loss": 0.6041, "step": 15660 }, { "epoch": 0.4572421243175382, "grad_norm": 0.6204872427239965, "learning_rate": 1.2061638280616383e-05, "loss": 0.5735, "step": 15661 }, { "epoch": 0.45727132054538555, "grad_norm": 0.6290996536529175, "learning_rate": 1.2060989456609895e-05, "loss": 0.6083, "step": 15662 }, { "epoch": 0.4573005167732329, "grad_norm": 0.6391661132287466, "learning_rate": 1.2060340632603407e-05, "loss": 0.5788, "step": 15663 }, { "epoch": 0.4573297130010803, "grad_norm": 0.6100222816389779, "learning_rate": 1.2059691808596918e-05, "loss": 0.5591, "step": 15664 }, { "epoch": 0.45735890922892763, "grad_norm": 0.6363072785795535, "learning_rate": 1.205904298459043e-05, "loss": 0.6252, "step": 15665 }, { "epoch": 0.457388105456775, "grad_norm": 0.6804046628967229, "learning_rate": 1.2058394160583942e-05, "loss": 0.6786, "step": 15666 }, { "epoch": 0.45741730168462236, "grad_norm": 0.6533108530074172, "learning_rate": 1.2057745336577454e-05, "loss": 0.632, "step": 15667 }, { "epoch": 0.4574464979124697, "grad_norm": 0.6895133159766514, "learning_rate": 1.2057096512570967e-05, "loss": 0.7242, "step": 15668 }, { "epoch": 0.4574756941403171, "grad_norm": 0.6371506292960893, "learning_rate": 1.205644768856448e-05, "loss": 0.5982, "step": 15669 }, { "epoch": 0.45750489036816444, "grad_norm": 0.584503885734339, "learning_rate": 1.205579886455799e-05, "loss": 0.5303, "step": 15670 }, { "epoch": 0.4575340865960118, "grad_norm": 0.634616364731244, "learning_rate": 1.2055150040551502e-05, "loss": 0.6301, "step": 15671 }, { "epoch": 0.45756328282385916, "grad_norm": 0.5940918930960779, "learning_rate": 1.2054501216545014e-05, "loss": 0.5271, "step": 15672 }, { "epoch": 0.4575924790517065, "grad_norm": 0.6784736576413812, "learning_rate": 1.2053852392538526e-05, "loss": 0.6582, "step": 15673 }, { "epoch": 0.4576216752795539, "grad_norm": 0.6143741353823464, "learning_rate": 1.2053203568532038e-05, "loss": 0.5726, "step": 15674 }, { "epoch": 0.45765087150740125, "grad_norm": 0.6177432915655352, "learning_rate": 1.2052554744525548e-05, "loss": 0.553, "step": 15675 }, { "epoch": 0.4576800677352486, "grad_norm": 0.6307281445574092, "learning_rate": 1.205190592051906e-05, "loss": 0.5551, "step": 15676 }, { "epoch": 0.45770926396309597, "grad_norm": 0.628331303123325, "learning_rate": 1.2051257096512572e-05, "loss": 0.6091, "step": 15677 }, { "epoch": 0.45773846019094333, "grad_norm": 0.6176150917832441, "learning_rate": 1.2050608272506084e-05, "loss": 0.5744, "step": 15678 }, { "epoch": 0.4577676564187907, "grad_norm": 0.6402750613957646, "learning_rate": 1.2049959448499595e-05, "loss": 0.5957, "step": 15679 }, { "epoch": 0.45779685264663805, "grad_norm": 0.6657544156811619, "learning_rate": 1.2049310624493107e-05, "loss": 0.629, "step": 15680 }, { "epoch": 0.4578260488744854, "grad_norm": 0.6015530697311465, "learning_rate": 1.2048661800486619e-05, "loss": 0.505, "step": 15681 }, { "epoch": 0.4578552451023328, "grad_norm": 0.6146693396795296, "learning_rate": 1.204801297648013e-05, "loss": 0.5817, "step": 15682 }, { "epoch": 0.45788444133018014, "grad_norm": 0.6678675344327858, "learning_rate": 1.2047364152473641e-05, "loss": 0.619, "step": 15683 }, { "epoch": 0.4579136375580275, "grad_norm": 0.6405994319794428, "learning_rate": 1.2046715328467153e-05, "loss": 0.6381, "step": 15684 }, { "epoch": 0.45794283378587486, "grad_norm": 0.6591309253445662, "learning_rate": 1.2046066504460665e-05, "loss": 0.6276, "step": 15685 }, { "epoch": 0.4579720300137222, "grad_norm": 0.6613750514697042, "learning_rate": 1.2045417680454177e-05, "loss": 0.6463, "step": 15686 }, { "epoch": 0.4580012262415696, "grad_norm": 0.6678886411547079, "learning_rate": 1.204476885644769e-05, "loss": 0.652, "step": 15687 }, { "epoch": 0.45803042246941694, "grad_norm": 0.6213458412095166, "learning_rate": 1.2044120032441203e-05, "loss": 0.5535, "step": 15688 }, { "epoch": 0.4580596186972643, "grad_norm": 0.6096707937770781, "learning_rate": 1.2043471208434713e-05, "loss": 0.5645, "step": 15689 }, { "epoch": 0.45808881492511166, "grad_norm": 0.6294262171056112, "learning_rate": 1.2042822384428225e-05, "loss": 0.5717, "step": 15690 }, { "epoch": 0.458118011152959, "grad_norm": 0.6236284406973992, "learning_rate": 1.2042173560421737e-05, "loss": 0.5649, "step": 15691 }, { "epoch": 0.4581472073808064, "grad_norm": 0.6288707212576423, "learning_rate": 1.204152473641525e-05, "loss": 0.5724, "step": 15692 }, { "epoch": 0.45817640360865375, "grad_norm": 0.60787468181023, "learning_rate": 1.2040875912408761e-05, "loss": 0.597, "step": 15693 }, { "epoch": 0.4582055998365011, "grad_norm": 0.6648909630077138, "learning_rate": 1.2040227088402272e-05, "loss": 0.6788, "step": 15694 }, { "epoch": 0.45823479606434847, "grad_norm": 0.6944751246586822, "learning_rate": 1.2039578264395784e-05, "loss": 0.7054, "step": 15695 }, { "epoch": 0.45826399229219583, "grad_norm": 0.6980849480700954, "learning_rate": 1.2038929440389296e-05, "loss": 0.6842, "step": 15696 }, { "epoch": 0.4582931885200432, "grad_norm": 0.6793993782047678, "learning_rate": 1.2038280616382808e-05, "loss": 0.6779, "step": 15697 }, { "epoch": 0.45832238474789055, "grad_norm": 0.6604628631622739, "learning_rate": 1.2037631792376318e-05, "loss": 0.6291, "step": 15698 }, { "epoch": 0.4583515809757379, "grad_norm": 0.6709727335607476, "learning_rate": 1.203698296836983e-05, "loss": 0.6761, "step": 15699 }, { "epoch": 0.4583807772035853, "grad_norm": 0.6718341306463047, "learning_rate": 1.2036334144363342e-05, "loss": 0.6349, "step": 15700 }, { "epoch": 0.45840997343143264, "grad_norm": 0.6249487626930841, "learning_rate": 1.2035685320356854e-05, "loss": 0.6249, "step": 15701 }, { "epoch": 0.45843916965928, "grad_norm": 0.6170126674173113, "learning_rate": 1.2035036496350364e-05, "loss": 0.5841, "step": 15702 }, { "epoch": 0.45846836588712736, "grad_norm": 0.6314290461724194, "learning_rate": 1.2034387672343876e-05, "loss": 0.581, "step": 15703 }, { "epoch": 0.4584975621149747, "grad_norm": 0.5940056863660376, "learning_rate": 1.2033738848337388e-05, "loss": 0.5359, "step": 15704 }, { "epoch": 0.4585267583428221, "grad_norm": 0.6578426809259376, "learning_rate": 1.20330900243309e-05, "loss": 0.6437, "step": 15705 }, { "epoch": 0.45855595457066944, "grad_norm": 0.5763516054653283, "learning_rate": 1.2032441200324414e-05, "loss": 0.5049, "step": 15706 }, { "epoch": 0.4585851507985168, "grad_norm": 0.6936538954858953, "learning_rate": 1.2031792376317926e-05, "loss": 0.6857, "step": 15707 }, { "epoch": 0.45861434702636417, "grad_norm": 0.6322643383429102, "learning_rate": 1.2031143552311437e-05, "loss": 0.5504, "step": 15708 }, { "epoch": 0.4586435432542116, "grad_norm": 0.6441022642109726, "learning_rate": 1.2030494728304949e-05, "loss": 0.6032, "step": 15709 }, { "epoch": 0.45867273948205894, "grad_norm": 0.6626749416858226, "learning_rate": 1.202984590429846e-05, "loss": 0.6722, "step": 15710 }, { "epoch": 0.4587019357099063, "grad_norm": 0.5834573162925576, "learning_rate": 1.2029197080291973e-05, "loss": 0.5043, "step": 15711 }, { "epoch": 0.45873113193775367, "grad_norm": 0.6844031392127218, "learning_rate": 1.2028548256285485e-05, "loss": 0.6674, "step": 15712 }, { "epoch": 0.45876032816560103, "grad_norm": 0.6722437594199199, "learning_rate": 1.2027899432278995e-05, "loss": 0.6472, "step": 15713 }, { "epoch": 0.4587895243934484, "grad_norm": 0.6460569639765541, "learning_rate": 1.2027250608272507e-05, "loss": 0.6283, "step": 15714 }, { "epoch": 0.45881872062129575, "grad_norm": 0.67734023235564, "learning_rate": 1.2026601784266019e-05, "loss": 0.6905, "step": 15715 }, { "epoch": 0.4588479168491431, "grad_norm": 0.7292712642751809, "learning_rate": 1.2025952960259531e-05, "loss": 0.6997, "step": 15716 }, { "epoch": 0.4588771130769905, "grad_norm": 0.6468401407031777, "learning_rate": 1.2025304136253041e-05, "loss": 0.6312, "step": 15717 }, { "epoch": 0.45890630930483783, "grad_norm": 0.6475553774833066, "learning_rate": 1.2024655312246553e-05, "loss": 0.6634, "step": 15718 }, { "epoch": 0.4589355055326852, "grad_norm": 0.6649867609967234, "learning_rate": 1.2024006488240065e-05, "loss": 0.6935, "step": 15719 }, { "epoch": 0.45896470176053256, "grad_norm": 0.6671458091998814, "learning_rate": 1.2023357664233577e-05, "loss": 0.6093, "step": 15720 }, { "epoch": 0.4589938979883799, "grad_norm": 0.6888523874996119, "learning_rate": 1.2022708840227088e-05, "loss": 0.6716, "step": 15721 }, { "epoch": 0.4590230942162273, "grad_norm": 0.7226278523480463, "learning_rate": 1.20220600162206e-05, "loss": 0.7216, "step": 15722 }, { "epoch": 0.45905229044407464, "grad_norm": 0.6113528555840775, "learning_rate": 1.2021411192214112e-05, "loss": 0.5694, "step": 15723 }, { "epoch": 0.459081486671922, "grad_norm": 0.6341993203612531, "learning_rate": 1.2020762368207626e-05, "loss": 0.6181, "step": 15724 }, { "epoch": 0.45911068289976936, "grad_norm": 0.6212057992620547, "learning_rate": 1.2020113544201138e-05, "loss": 0.5867, "step": 15725 }, { "epoch": 0.4591398791276167, "grad_norm": 0.6122806564777925, "learning_rate": 1.201946472019465e-05, "loss": 0.5541, "step": 15726 }, { "epoch": 0.4591690753554641, "grad_norm": 0.6681289892099584, "learning_rate": 1.201881589618816e-05, "loss": 0.6447, "step": 15727 }, { "epoch": 0.45919827158331145, "grad_norm": 0.6689663341124206, "learning_rate": 1.2018167072181672e-05, "loss": 0.6342, "step": 15728 }, { "epoch": 0.4592274678111588, "grad_norm": 0.6630804196890466, "learning_rate": 1.2017518248175184e-05, "loss": 0.5742, "step": 15729 }, { "epoch": 0.45925666403900617, "grad_norm": 0.623365477163117, "learning_rate": 1.2016869424168696e-05, "loss": 0.5662, "step": 15730 }, { "epoch": 0.45928586026685353, "grad_norm": 0.6007499552495111, "learning_rate": 1.2016220600162208e-05, "loss": 0.5692, "step": 15731 }, { "epoch": 0.4593150564947009, "grad_norm": 0.7011707523288137, "learning_rate": 1.2015571776155718e-05, "loss": 0.6955, "step": 15732 }, { "epoch": 0.45934425272254825, "grad_norm": 0.5943445021145728, "learning_rate": 1.201492295214923e-05, "loss": 0.5557, "step": 15733 }, { "epoch": 0.4593734489503956, "grad_norm": 0.5897507913959888, "learning_rate": 1.2014274128142742e-05, "loss": 0.5445, "step": 15734 }, { "epoch": 0.459402645178243, "grad_norm": 0.6724914471582731, "learning_rate": 1.2013625304136255e-05, "loss": 0.6784, "step": 15735 }, { "epoch": 0.45943184140609034, "grad_norm": 0.6958989205993631, "learning_rate": 1.2012976480129765e-05, "loss": 0.7347, "step": 15736 }, { "epoch": 0.4594610376339377, "grad_norm": 0.6579766226507254, "learning_rate": 1.2012327656123277e-05, "loss": 0.6506, "step": 15737 }, { "epoch": 0.45949023386178506, "grad_norm": 0.6405362394456006, "learning_rate": 1.2011678832116789e-05, "loss": 0.6038, "step": 15738 }, { "epoch": 0.4595194300896324, "grad_norm": 0.6635459721059948, "learning_rate": 1.2011030008110301e-05, "loss": 0.6191, "step": 15739 }, { "epoch": 0.4595486263174798, "grad_norm": 0.6233266122711723, "learning_rate": 1.2010381184103811e-05, "loss": 0.5648, "step": 15740 }, { "epoch": 0.45957782254532714, "grad_norm": 0.6327779550464073, "learning_rate": 1.2009732360097323e-05, "loss": 0.6012, "step": 15741 }, { "epoch": 0.4596070187731745, "grad_norm": 0.7054551773194639, "learning_rate": 1.2009083536090835e-05, "loss": 0.7276, "step": 15742 }, { "epoch": 0.45963621500102186, "grad_norm": 0.6079106065440445, "learning_rate": 1.2008434712084349e-05, "loss": 0.5559, "step": 15743 }, { "epoch": 0.4596654112288692, "grad_norm": 0.6410210112786399, "learning_rate": 1.2007785888077861e-05, "loss": 0.606, "step": 15744 }, { "epoch": 0.4596946074567166, "grad_norm": 0.6226999078424788, "learning_rate": 1.2007137064071373e-05, "loss": 0.5955, "step": 15745 }, { "epoch": 0.45972380368456395, "grad_norm": 0.6144210441554264, "learning_rate": 1.2006488240064883e-05, "loss": 0.5921, "step": 15746 }, { "epoch": 0.4597529999124113, "grad_norm": 0.6424627331395949, "learning_rate": 1.2005839416058395e-05, "loss": 0.5997, "step": 15747 }, { "epoch": 0.45978219614025867, "grad_norm": 0.6909767621971468, "learning_rate": 1.2005190592051907e-05, "loss": 0.6523, "step": 15748 }, { "epoch": 0.45981139236810603, "grad_norm": 0.6537404781845351, "learning_rate": 1.200454176804542e-05, "loss": 0.5951, "step": 15749 }, { "epoch": 0.4598405885959534, "grad_norm": 0.6484838416930653, "learning_rate": 1.2003892944038932e-05, "loss": 0.6197, "step": 15750 }, { "epoch": 0.45986978482380075, "grad_norm": 0.6493815705105371, "learning_rate": 1.2003244120032442e-05, "loss": 0.6357, "step": 15751 }, { "epoch": 0.4598989810516481, "grad_norm": 0.6554248608778236, "learning_rate": 1.2002595296025954e-05, "loss": 0.6604, "step": 15752 }, { "epoch": 0.4599281772794955, "grad_norm": 0.6957715942360732, "learning_rate": 1.2001946472019466e-05, "loss": 0.7184, "step": 15753 }, { "epoch": 0.45995737350734284, "grad_norm": 0.6308331274716593, "learning_rate": 1.2001297648012978e-05, "loss": 0.6167, "step": 15754 }, { "epoch": 0.4599865697351902, "grad_norm": 0.6634075726697286, "learning_rate": 1.2000648824006488e-05, "loss": 0.6739, "step": 15755 }, { "epoch": 0.46001576596303756, "grad_norm": 0.6961712202759178, "learning_rate": 1.2e-05, "loss": 0.6878, "step": 15756 }, { "epoch": 0.4600449621908849, "grad_norm": 0.5954729248725474, "learning_rate": 1.1999351175993512e-05, "loss": 0.5382, "step": 15757 }, { "epoch": 0.4600741584187323, "grad_norm": 0.691649700378943, "learning_rate": 1.1998702351987024e-05, "loss": 0.6224, "step": 15758 }, { "epoch": 0.46010335464657964, "grad_norm": 0.6868985511485111, "learning_rate": 1.1998053527980535e-05, "loss": 0.6545, "step": 15759 }, { "epoch": 0.460132550874427, "grad_norm": 0.6879393347089853, "learning_rate": 1.1997404703974047e-05, "loss": 0.6916, "step": 15760 }, { "epoch": 0.46016174710227437, "grad_norm": 0.6133912432250321, "learning_rate": 1.1996755879967559e-05, "loss": 0.5906, "step": 15761 }, { "epoch": 0.4601909433301217, "grad_norm": 0.6493901847140716, "learning_rate": 1.1996107055961072e-05, "loss": 0.6043, "step": 15762 }, { "epoch": 0.4602201395579691, "grad_norm": 0.6388710865933545, "learning_rate": 1.1995458231954584e-05, "loss": 0.6179, "step": 15763 }, { "epoch": 0.46024933578581645, "grad_norm": 0.7122031050327033, "learning_rate": 1.1994809407948097e-05, "loss": 0.6818, "step": 15764 }, { "epoch": 0.4602785320136638, "grad_norm": 0.6264280635950461, "learning_rate": 1.1994160583941607e-05, "loss": 0.576, "step": 15765 }, { "epoch": 0.46030772824151117, "grad_norm": 0.6254489871422199, "learning_rate": 1.1993511759935119e-05, "loss": 0.5821, "step": 15766 }, { "epoch": 0.46033692446935853, "grad_norm": 0.6246470341341289, "learning_rate": 1.1992862935928631e-05, "loss": 0.6109, "step": 15767 }, { "epoch": 0.4603661206972059, "grad_norm": 0.6407728091710849, "learning_rate": 1.1992214111922143e-05, "loss": 0.6147, "step": 15768 }, { "epoch": 0.4603953169250533, "grad_norm": 0.6579830518078581, "learning_rate": 1.1991565287915653e-05, "loss": 0.6122, "step": 15769 }, { "epoch": 0.46042451315290067, "grad_norm": 0.6211988127043623, "learning_rate": 1.1990916463909165e-05, "loss": 0.6095, "step": 15770 }, { "epoch": 0.46045370938074803, "grad_norm": 0.6534653492339634, "learning_rate": 1.1990267639902677e-05, "loss": 0.6435, "step": 15771 }, { "epoch": 0.4604829056085954, "grad_norm": 0.6152836550874905, "learning_rate": 1.198961881589619e-05, "loss": 0.5477, "step": 15772 }, { "epoch": 0.46051210183644276, "grad_norm": 0.7093430940271598, "learning_rate": 1.1988969991889701e-05, "loss": 0.6848, "step": 15773 }, { "epoch": 0.4605412980642901, "grad_norm": 0.6214183832031227, "learning_rate": 1.1988321167883212e-05, "loss": 0.6287, "step": 15774 }, { "epoch": 0.4605704942921375, "grad_norm": 0.6250012236663454, "learning_rate": 1.1987672343876724e-05, "loss": 0.5534, "step": 15775 }, { "epoch": 0.46059969051998484, "grad_norm": 0.6412680498119899, "learning_rate": 1.1987023519870236e-05, "loss": 0.6522, "step": 15776 }, { "epoch": 0.4606288867478322, "grad_norm": 0.634326812793119, "learning_rate": 1.1986374695863748e-05, "loss": 0.5524, "step": 15777 }, { "epoch": 0.46065808297567956, "grad_norm": 0.6565838200680001, "learning_rate": 1.1985725871857258e-05, "loss": 0.5925, "step": 15778 }, { "epoch": 0.4606872792035269, "grad_norm": 0.6438550578101977, "learning_rate": 1.198507704785077e-05, "loss": 0.6179, "step": 15779 }, { "epoch": 0.4607164754313743, "grad_norm": 0.6573500779424852, "learning_rate": 1.1984428223844282e-05, "loss": 0.6638, "step": 15780 }, { "epoch": 0.46074567165922165, "grad_norm": 0.7700235035352024, "learning_rate": 1.1983779399837796e-05, "loss": 0.7427, "step": 15781 }, { "epoch": 0.460774867887069, "grad_norm": 0.6477855747139324, "learning_rate": 1.1983130575831308e-05, "loss": 0.5939, "step": 15782 }, { "epoch": 0.46080406411491637, "grad_norm": 0.716508774344151, "learning_rate": 1.198248175182482e-05, "loss": 0.6856, "step": 15783 }, { "epoch": 0.46083326034276373, "grad_norm": 0.6601847266440094, "learning_rate": 1.198183292781833e-05, "loss": 0.6143, "step": 15784 }, { "epoch": 0.4608624565706111, "grad_norm": 0.6608099258033943, "learning_rate": 1.1981184103811842e-05, "loss": 0.6564, "step": 15785 }, { "epoch": 0.46089165279845845, "grad_norm": 0.6646953256854409, "learning_rate": 1.1980535279805354e-05, "loss": 0.6728, "step": 15786 }, { "epoch": 0.4609208490263058, "grad_norm": 0.6851656203356652, "learning_rate": 1.1979886455798866e-05, "loss": 0.6575, "step": 15787 }, { "epoch": 0.4609500452541532, "grad_norm": 0.6235641669173123, "learning_rate": 1.1979237631792377e-05, "loss": 0.6038, "step": 15788 }, { "epoch": 0.46097924148200053, "grad_norm": 0.6251636725810311, "learning_rate": 1.1978588807785889e-05, "loss": 0.5889, "step": 15789 }, { "epoch": 0.4610084377098479, "grad_norm": 0.6230731043847357, "learning_rate": 1.19779399837794e-05, "loss": 0.5932, "step": 15790 }, { "epoch": 0.46103763393769526, "grad_norm": 0.6629357340879346, "learning_rate": 1.1977291159772913e-05, "loss": 0.6401, "step": 15791 }, { "epoch": 0.4610668301655426, "grad_norm": 0.6671480114592205, "learning_rate": 1.1976642335766425e-05, "loss": 0.6289, "step": 15792 }, { "epoch": 0.46109602639339, "grad_norm": 0.6071772666296128, "learning_rate": 1.1975993511759935e-05, "loss": 0.5371, "step": 15793 }, { "epoch": 0.46112522262123734, "grad_norm": 0.5794014586159477, "learning_rate": 1.1975344687753447e-05, "loss": 0.5132, "step": 15794 }, { "epoch": 0.4611544188490847, "grad_norm": 0.6256459688829037, "learning_rate": 1.197469586374696e-05, "loss": 0.5921, "step": 15795 }, { "epoch": 0.46118361507693206, "grad_norm": 0.6219268130081292, "learning_rate": 1.1974047039740471e-05, "loss": 0.6364, "step": 15796 }, { "epoch": 0.4612128113047794, "grad_norm": 0.648075882822396, "learning_rate": 1.1973398215733982e-05, "loss": 0.5855, "step": 15797 }, { "epoch": 0.4612420075326268, "grad_norm": 0.5735327737810753, "learning_rate": 1.1972749391727494e-05, "loss": 0.4844, "step": 15798 }, { "epoch": 0.46127120376047415, "grad_norm": 0.6569648034557699, "learning_rate": 1.1972100567721006e-05, "loss": 0.6488, "step": 15799 }, { "epoch": 0.4613003999883215, "grad_norm": 0.6537031558823655, "learning_rate": 1.197145174371452e-05, "loss": 0.6489, "step": 15800 }, { "epoch": 0.46132959621616887, "grad_norm": 0.6821681368707636, "learning_rate": 1.1970802919708031e-05, "loss": 0.6398, "step": 15801 }, { "epoch": 0.46135879244401623, "grad_norm": 0.6535561486003103, "learning_rate": 1.1970154095701543e-05, "loss": 0.6522, "step": 15802 }, { "epoch": 0.4613879886718636, "grad_norm": 0.6535460299889967, "learning_rate": 1.1969505271695054e-05, "loss": 0.6108, "step": 15803 }, { "epoch": 0.46141718489971095, "grad_norm": 0.6272705113758368, "learning_rate": 1.1968856447688566e-05, "loss": 0.64, "step": 15804 }, { "epoch": 0.4614463811275583, "grad_norm": 0.6490740988919496, "learning_rate": 1.1968207623682078e-05, "loss": 0.6156, "step": 15805 }, { "epoch": 0.4614755773554057, "grad_norm": 0.7449318178232214, "learning_rate": 1.196755879967559e-05, "loss": 0.7073, "step": 15806 }, { "epoch": 0.46150477358325304, "grad_norm": 0.7216057731368669, "learning_rate": 1.19669099756691e-05, "loss": 0.7034, "step": 15807 }, { "epoch": 0.4615339698111004, "grad_norm": 0.6547358208763832, "learning_rate": 1.1966261151662612e-05, "loss": 0.6449, "step": 15808 }, { "epoch": 0.46156316603894776, "grad_norm": 0.5860593220213082, "learning_rate": 1.1965612327656124e-05, "loss": 0.5107, "step": 15809 }, { "epoch": 0.4615923622667951, "grad_norm": 0.6416700249026795, "learning_rate": 1.1964963503649636e-05, "loss": 0.6039, "step": 15810 }, { "epoch": 0.4616215584946425, "grad_norm": 0.6271003946484508, "learning_rate": 1.1964314679643148e-05, "loss": 0.5467, "step": 15811 }, { "epoch": 0.46165075472248984, "grad_norm": 0.7229619309867673, "learning_rate": 1.1963665855636659e-05, "loss": 0.6687, "step": 15812 }, { "epoch": 0.4616799509503372, "grad_norm": 0.6456736473573975, "learning_rate": 1.196301703163017e-05, "loss": 0.6224, "step": 15813 }, { "epoch": 0.46170914717818456, "grad_norm": 0.5931163047092252, "learning_rate": 1.1962368207623683e-05, "loss": 0.5286, "step": 15814 }, { "epoch": 0.4617383434060319, "grad_norm": 0.6547917231665548, "learning_rate": 1.1961719383617195e-05, "loss": 0.5775, "step": 15815 }, { "epoch": 0.4617675396338793, "grad_norm": 0.7215927967351113, "learning_rate": 1.1961070559610705e-05, "loss": 0.6928, "step": 15816 }, { "epoch": 0.46179673586172665, "grad_norm": 0.6834797321093741, "learning_rate": 1.1960421735604217e-05, "loss": 0.6737, "step": 15817 }, { "epoch": 0.461825932089574, "grad_norm": 0.6356548792616902, "learning_rate": 1.1959772911597729e-05, "loss": 0.6208, "step": 15818 }, { "epoch": 0.46185512831742137, "grad_norm": 0.636047328914548, "learning_rate": 1.1959124087591243e-05, "loss": 0.6279, "step": 15819 }, { "epoch": 0.46188432454526873, "grad_norm": 0.6795130988721765, "learning_rate": 1.1958475263584755e-05, "loss": 0.646, "step": 15820 }, { "epoch": 0.4619135207731161, "grad_norm": 0.7011847919200258, "learning_rate": 1.1957826439578267e-05, "loss": 0.6641, "step": 15821 }, { "epoch": 0.46194271700096345, "grad_norm": 0.6216231373582932, "learning_rate": 1.1957177615571777e-05, "loss": 0.5407, "step": 15822 }, { "epoch": 0.4619719132288108, "grad_norm": 0.5867346665368137, "learning_rate": 1.195652879156529e-05, "loss": 0.4867, "step": 15823 }, { "epoch": 0.4620011094566582, "grad_norm": 0.653172917794722, "learning_rate": 1.1955879967558801e-05, "loss": 0.5665, "step": 15824 }, { "epoch": 0.46203030568450554, "grad_norm": 0.6189220008614307, "learning_rate": 1.1955231143552313e-05, "loss": 0.5695, "step": 15825 }, { "epoch": 0.4620595019123529, "grad_norm": 0.684581996901049, "learning_rate": 1.1954582319545824e-05, "loss": 0.7146, "step": 15826 }, { "epoch": 0.46208869814020026, "grad_norm": 0.6170234367360541, "learning_rate": 1.1953933495539336e-05, "loss": 0.5802, "step": 15827 }, { "epoch": 0.4621178943680476, "grad_norm": 0.639493884826635, "learning_rate": 1.1953284671532848e-05, "loss": 0.5969, "step": 15828 }, { "epoch": 0.462147090595895, "grad_norm": 0.6945810277782395, "learning_rate": 1.195263584752636e-05, "loss": 0.6798, "step": 15829 }, { "epoch": 0.4621762868237424, "grad_norm": 0.6985047804857022, "learning_rate": 1.1951987023519872e-05, "loss": 0.6661, "step": 15830 }, { "epoch": 0.46220548305158976, "grad_norm": 0.5925578605521011, "learning_rate": 1.1951338199513382e-05, "loss": 0.5644, "step": 15831 }, { "epoch": 0.4622346792794371, "grad_norm": 0.6773372275978657, "learning_rate": 1.1950689375506894e-05, "loss": 0.664, "step": 15832 }, { "epoch": 0.4622638755072845, "grad_norm": 0.6205193124600695, "learning_rate": 1.1950040551500406e-05, "loss": 0.5987, "step": 15833 }, { "epoch": 0.46229307173513184, "grad_norm": 0.6152240962987368, "learning_rate": 1.1949391727493918e-05, "loss": 0.5391, "step": 15834 }, { "epoch": 0.4623222679629792, "grad_norm": 0.6399601185856207, "learning_rate": 1.1948742903487428e-05, "loss": 0.5725, "step": 15835 }, { "epoch": 0.46235146419082657, "grad_norm": 0.6088961357645541, "learning_rate": 1.194809407948094e-05, "loss": 0.5407, "step": 15836 }, { "epoch": 0.4623806604186739, "grad_norm": 0.6588205320165219, "learning_rate": 1.1947445255474452e-05, "loss": 0.5865, "step": 15837 }, { "epoch": 0.4624098566465213, "grad_norm": 0.6584429860813691, "learning_rate": 1.1946796431467966e-05, "loss": 0.6282, "step": 15838 }, { "epoch": 0.46243905287436865, "grad_norm": 0.6252290802795102, "learning_rate": 1.1946147607461478e-05, "loss": 0.6103, "step": 15839 }, { "epoch": 0.462468249102216, "grad_norm": 0.6034039050256466, "learning_rate": 1.194549878345499e-05, "loss": 0.5771, "step": 15840 }, { "epoch": 0.4624974453300634, "grad_norm": 0.6598054097280471, "learning_rate": 1.19448499594485e-05, "loss": 0.6432, "step": 15841 }, { "epoch": 0.46252664155791073, "grad_norm": 0.6428988132337089, "learning_rate": 1.1944201135442013e-05, "loss": 0.6049, "step": 15842 }, { "epoch": 0.4625558377857581, "grad_norm": 0.6112993029799613, "learning_rate": 1.1943552311435525e-05, "loss": 0.5883, "step": 15843 }, { "epoch": 0.46258503401360546, "grad_norm": 0.6321570289362072, "learning_rate": 1.1942903487429037e-05, "loss": 0.5691, "step": 15844 }, { "epoch": 0.4626142302414528, "grad_norm": 0.60904101676125, "learning_rate": 1.1942254663422547e-05, "loss": 0.5406, "step": 15845 }, { "epoch": 0.4626434264693002, "grad_norm": 0.6216695030680084, "learning_rate": 1.1941605839416059e-05, "loss": 0.575, "step": 15846 }, { "epoch": 0.46267262269714754, "grad_norm": 0.7120029826710228, "learning_rate": 1.1940957015409571e-05, "loss": 0.6693, "step": 15847 }, { "epoch": 0.4627018189249949, "grad_norm": 0.657507209794226, "learning_rate": 1.1940308191403083e-05, "loss": 0.6049, "step": 15848 }, { "epoch": 0.46273101515284226, "grad_norm": 0.6293109560357019, "learning_rate": 1.1939659367396595e-05, "loss": 0.5778, "step": 15849 }, { "epoch": 0.4627602113806896, "grad_norm": 0.5715107892773031, "learning_rate": 1.1939010543390105e-05, "loss": 0.4803, "step": 15850 }, { "epoch": 0.462789407608537, "grad_norm": 0.6852672470873318, "learning_rate": 1.1938361719383617e-05, "loss": 0.652, "step": 15851 }, { "epoch": 0.46281860383638435, "grad_norm": 0.6371873424729015, "learning_rate": 1.193771289537713e-05, "loss": 0.6003, "step": 15852 }, { "epoch": 0.4628478000642317, "grad_norm": 0.595645480197508, "learning_rate": 1.1937064071370642e-05, "loss": 0.5647, "step": 15853 }, { "epoch": 0.46287699629207907, "grad_norm": 0.6187451182677575, "learning_rate": 1.1936415247364152e-05, "loss": 0.537, "step": 15854 }, { "epoch": 0.46290619251992643, "grad_norm": 0.636822039932719, "learning_rate": 1.1935766423357664e-05, "loss": 0.6149, "step": 15855 }, { "epoch": 0.4629353887477738, "grad_norm": 0.658434093798696, "learning_rate": 1.1935117599351176e-05, "loss": 0.5975, "step": 15856 }, { "epoch": 0.46296458497562115, "grad_norm": 0.5834196291759638, "learning_rate": 1.193446877534469e-05, "loss": 0.5606, "step": 15857 }, { "epoch": 0.4629937812034685, "grad_norm": 0.6480171700245093, "learning_rate": 1.1933819951338202e-05, "loss": 0.603, "step": 15858 }, { "epoch": 0.4630229774313159, "grad_norm": 0.6370578825215087, "learning_rate": 1.1933171127331714e-05, "loss": 0.5682, "step": 15859 }, { "epoch": 0.46305217365916324, "grad_norm": 0.6525527947024917, "learning_rate": 1.1932522303325224e-05, "loss": 0.6485, "step": 15860 }, { "epoch": 0.4630813698870106, "grad_norm": 0.6576339730040902, "learning_rate": 1.1931873479318736e-05, "loss": 0.6634, "step": 15861 }, { "epoch": 0.46311056611485796, "grad_norm": 0.6406912916053582, "learning_rate": 1.1931224655312248e-05, "loss": 0.6152, "step": 15862 }, { "epoch": 0.4631397623427053, "grad_norm": 0.6530513662902226, "learning_rate": 1.193057583130576e-05, "loss": 0.6367, "step": 15863 }, { "epoch": 0.4631689585705527, "grad_norm": 0.6771597131335458, "learning_rate": 1.192992700729927e-05, "loss": 0.6547, "step": 15864 }, { "epoch": 0.46319815479840004, "grad_norm": 0.6955659091777979, "learning_rate": 1.1929278183292782e-05, "loss": 0.598, "step": 15865 }, { "epoch": 0.4632273510262474, "grad_norm": 0.7209949361372912, "learning_rate": 1.1928629359286294e-05, "loss": 0.7071, "step": 15866 }, { "epoch": 0.46325654725409476, "grad_norm": 0.5882880164655948, "learning_rate": 1.1927980535279807e-05, "loss": 0.5808, "step": 15867 }, { "epoch": 0.4632857434819421, "grad_norm": 0.6784169378187428, "learning_rate": 1.1927331711273319e-05, "loss": 0.5859, "step": 15868 }, { "epoch": 0.4633149397097895, "grad_norm": 0.6867958627354824, "learning_rate": 1.1926682887266829e-05, "loss": 0.6913, "step": 15869 }, { "epoch": 0.46334413593763685, "grad_norm": 0.5860374917786321, "learning_rate": 1.1926034063260341e-05, "loss": 0.5399, "step": 15870 }, { "epoch": 0.4633733321654842, "grad_norm": 0.6476918149783041, "learning_rate": 1.1925385239253853e-05, "loss": 0.6171, "step": 15871 }, { "epoch": 0.46340252839333157, "grad_norm": 0.7828601275437512, "learning_rate": 1.1924736415247365e-05, "loss": 0.6046, "step": 15872 }, { "epoch": 0.46343172462117893, "grad_norm": 0.6519603187950402, "learning_rate": 1.1924087591240875e-05, "loss": 0.6206, "step": 15873 }, { "epoch": 0.4634609208490263, "grad_norm": 0.6605382326997997, "learning_rate": 1.1923438767234387e-05, "loss": 0.6621, "step": 15874 }, { "epoch": 0.46349011707687365, "grad_norm": 0.6958718609049832, "learning_rate": 1.1922789943227901e-05, "loss": 0.7151, "step": 15875 }, { "epoch": 0.463519313304721, "grad_norm": 0.632160295224527, "learning_rate": 1.1922141119221413e-05, "loss": 0.5829, "step": 15876 }, { "epoch": 0.4635485095325684, "grad_norm": 0.662407715422117, "learning_rate": 1.1921492295214925e-05, "loss": 0.6241, "step": 15877 }, { "epoch": 0.46357770576041574, "grad_norm": 0.6520695573441898, "learning_rate": 1.1920843471208437e-05, "loss": 0.6163, "step": 15878 }, { "epoch": 0.4636069019882631, "grad_norm": 0.6616508106982899, "learning_rate": 1.1920194647201947e-05, "loss": 0.6198, "step": 15879 }, { "epoch": 0.46363609821611046, "grad_norm": 0.5830427635435144, "learning_rate": 1.191954582319546e-05, "loss": 0.5415, "step": 15880 }, { "epoch": 0.4636652944439578, "grad_norm": 0.6235280850785182, "learning_rate": 1.1918896999188972e-05, "loss": 0.6308, "step": 15881 }, { "epoch": 0.4636944906718052, "grad_norm": 0.6375787240788636, "learning_rate": 1.1918248175182484e-05, "loss": 0.631, "step": 15882 }, { "epoch": 0.46372368689965254, "grad_norm": 0.589648446648664, "learning_rate": 1.1917599351175994e-05, "loss": 0.4907, "step": 15883 }, { "epoch": 0.4637528831274999, "grad_norm": 0.6849110722968285, "learning_rate": 1.1916950527169506e-05, "loss": 0.6239, "step": 15884 }, { "epoch": 0.46378207935534727, "grad_norm": 0.6334539930169868, "learning_rate": 1.1916301703163018e-05, "loss": 0.5622, "step": 15885 }, { "epoch": 0.4638112755831946, "grad_norm": 0.6433423614159988, "learning_rate": 1.191565287915653e-05, "loss": 0.6053, "step": 15886 }, { "epoch": 0.463840471811042, "grad_norm": 0.6531001379395668, "learning_rate": 1.1915004055150042e-05, "loss": 0.6171, "step": 15887 }, { "epoch": 0.46386966803888935, "grad_norm": 0.739340265416703, "learning_rate": 1.1914355231143552e-05, "loss": 0.6444, "step": 15888 }, { "epoch": 0.4638988642667367, "grad_norm": 0.6520423350053495, "learning_rate": 1.1913706407137064e-05, "loss": 0.6094, "step": 15889 }, { "epoch": 0.4639280604945841, "grad_norm": 0.6508537097884928, "learning_rate": 1.1913057583130576e-05, "loss": 0.6271, "step": 15890 }, { "epoch": 0.4639572567224315, "grad_norm": 0.6203031494336911, "learning_rate": 1.1912408759124088e-05, "loss": 0.5721, "step": 15891 }, { "epoch": 0.46398645295027885, "grad_norm": 0.6275123788970476, "learning_rate": 1.1911759935117599e-05, "loss": 0.5858, "step": 15892 }, { "epoch": 0.4640156491781262, "grad_norm": 0.7321435642612204, "learning_rate": 1.191111111111111e-05, "loss": 0.6588, "step": 15893 }, { "epoch": 0.46404484540597357, "grad_norm": 0.5851226079725264, "learning_rate": 1.1910462287104624e-05, "loss": 0.5246, "step": 15894 }, { "epoch": 0.46407404163382093, "grad_norm": 0.7079676431304935, "learning_rate": 1.1909813463098137e-05, "loss": 0.6891, "step": 15895 }, { "epoch": 0.4641032378616683, "grad_norm": 0.6188078640947848, "learning_rate": 1.1909164639091649e-05, "loss": 0.5835, "step": 15896 }, { "epoch": 0.46413243408951566, "grad_norm": 0.6477375935103341, "learning_rate": 1.190851581508516e-05, "loss": 0.5619, "step": 15897 }, { "epoch": 0.464161630317363, "grad_norm": 0.7014220705619442, "learning_rate": 1.1907866991078671e-05, "loss": 0.755, "step": 15898 }, { "epoch": 0.4641908265452104, "grad_norm": 0.6864808874475958, "learning_rate": 1.1907218167072183e-05, "loss": 0.6618, "step": 15899 }, { "epoch": 0.46422002277305774, "grad_norm": 0.6548387078411858, "learning_rate": 1.1906569343065695e-05, "loss": 0.6733, "step": 15900 }, { "epoch": 0.4642492190009051, "grad_norm": 0.6227297581064496, "learning_rate": 1.1905920519059207e-05, "loss": 0.5779, "step": 15901 }, { "epoch": 0.46427841522875246, "grad_norm": 0.5459910149656787, "learning_rate": 1.1905271695052717e-05, "loss": 0.4969, "step": 15902 }, { "epoch": 0.4643076114565998, "grad_norm": 0.6572351799883204, "learning_rate": 1.190462287104623e-05, "loss": 0.6047, "step": 15903 }, { "epoch": 0.4643368076844472, "grad_norm": 0.619405800340605, "learning_rate": 1.1903974047039741e-05, "loss": 0.5921, "step": 15904 }, { "epoch": 0.46436600391229454, "grad_norm": 0.6476540207866025, "learning_rate": 1.1903325223033253e-05, "loss": 0.6489, "step": 15905 }, { "epoch": 0.4643952001401419, "grad_norm": 0.6686217146324542, "learning_rate": 1.1902676399026765e-05, "loss": 0.6549, "step": 15906 }, { "epoch": 0.46442439636798927, "grad_norm": 0.6338204745536974, "learning_rate": 1.1902027575020276e-05, "loss": 0.601, "step": 15907 }, { "epoch": 0.46445359259583663, "grad_norm": 0.6436933031246752, "learning_rate": 1.1901378751013788e-05, "loss": 0.6354, "step": 15908 }, { "epoch": 0.464482788823684, "grad_norm": 0.6818766625867694, "learning_rate": 1.19007299270073e-05, "loss": 0.7224, "step": 15909 }, { "epoch": 0.46451198505153135, "grad_norm": 0.5805793497376137, "learning_rate": 1.1900081103000812e-05, "loss": 0.5195, "step": 15910 }, { "epoch": 0.4645411812793787, "grad_norm": 0.744477478742638, "learning_rate": 1.1899432278994322e-05, "loss": 0.6862, "step": 15911 }, { "epoch": 0.4645703775072261, "grad_norm": 0.6535805780737654, "learning_rate": 1.1898783454987834e-05, "loss": 0.6155, "step": 15912 }, { "epoch": 0.46459957373507343, "grad_norm": 0.6353528335250505, "learning_rate": 1.1898134630981348e-05, "loss": 0.5944, "step": 15913 }, { "epoch": 0.4646287699629208, "grad_norm": 0.6207638974435652, "learning_rate": 1.189748580697486e-05, "loss": 0.5721, "step": 15914 }, { "epoch": 0.46465796619076816, "grad_norm": 0.6592159839819871, "learning_rate": 1.1896836982968372e-05, "loss": 0.6105, "step": 15915 }, { "epoch": 0.4646871624186155, "grad_norm": 0.6903137369738662, "learning_rate": 1.1896188158961884e-05, "loss": 0.6774, "step": 15916 }, { "epoch": 0.4647163586464629, "grad_norm": 0.6693747844195915, "learning_rate": 1.1895539334955394e-05, "loss": 0.5819, "step": 15917 }, { "epoch": 0.46474555487431024, "grad_norm": 0.6757599940701078, "learning_rate": 1.1894890510948906e-05, "loss": 0.6582, "step": 15918 }, { "epoch": 0.4647747511021576, "grad_norm": 0.6174209332385107, "learning_rate": 1.1894241686942418e-05, "loss": 0.5827, "step": 15919 }, { "epoch": 0.46480394733000496, "grad_norm": 0.6082614639651854, "learning_rate": 1.189359286293593e-05, "loss": 0.5391, "step": 15920 }, { "epoch": 0.4648331435578523, "grad_norm": 0.6578259214274185, "learning_rate": 1.189294403892944e-05, "loss": 0.5852, "step": 15921 }, { "epoch": 0.4648623397856997, "grad_norm": 0.7116953258583454, "learning_rate": 1.1892295214922953e-05, "loss": 0.6552, "step": 15922 }, { "epoch": 0.46489153601354705, "grad_norm": 0.6353607260077979, "learning_rate": 1.1891646390916465e-05, "loss": 0.613, "step": 15923 }, { "epoch": 0.4649207322413944, "grad_norm": 0.6459414179142924, "learning_rate": 1.1890997566909977e-05, "loss": 0.6358, "step": 15924 }, { "epoch": 0.46494992846924177, "grad_norm": 0.6937461859613085, "learning_rate": 1.1890348742903489e-05, "loss": 0.6144, "step": 15925 }, { "epoch": 0.46497912469708913, "grad_norm": 0.6314136090936929, "learning_rate": 1.1889699918897e-05, "loss": 0.5511, "step": 15926 }, { "epoch": 0.4650083209249365, "grad_norm": 0.620946315244268, "learning_rate": 1.1889051094890511e-05, "loss": 0.5338, "step": 15927 }, { "epoch": 0.46503751715278385, "grad_norm": 0.6688823658515685, "learning_rate": 1.1888402270884023e-05, "loss": 0.6213, "step": 15928 }, { "epoch": 0.4650667133806312, "grad_norm": 0.7331987902276859, "learning_rate": 1.1887753446877535e-05, "loss": 0.7542, "step": 15929 }, { "epoch": 0.4650959096084786, "grad_norm": 0.6671798130169635, "learning_rate": 1.1887104622871046e-05, "loss": 0.6805, "step": 15930 }, { "epoch": 0.46512510583632594, "grad_norm": 0.5898134897739619, "learning_rate": 1.1886455798864558e-05, "loss": 0.5456, "step": 15931 }, { "epoch": 0.4651543020641733, "grad_norm": 0.6515564116343715, "learning_rate": 1.1885806974858071e-05, "loss": 0.6274, "step": 15932 }, { "epoch": 0.46518349829202066, "grad_norm": 0.617346727512649, "learning_rate": 1.1885158150851583e-05, "loss": 0.5486, "step": 15933 }, { "epoch": 0.465212694519868, "grad_norm": 0.6384926075184912, "learning_rate": 1.1884509326845095e-05, "loss": 0.6167, "step": 15934 }, { "epoch": 0.4652418907477154, "grad_norm": 0.6352539488286055, "learning_rate": 1.1883860502838607e-05, "loss": 0.5959, "step": 15935 }, { "epoch": 0.46527108697556274, "grad_norm": 0.6633070726957218, "learning_rate": 1.1883211678832118e-05, "loss": 0.5992, "step": 15936 }, { "epoch": 0.4653002832034101, "grad_norm": 0.829127641134381, "learning_rate": 1.188256285482563e-05, "loss": 0.5723, "step": 15937 }, { "epoch": 0.46532947943125746, "grad_norm": 0.6206654663187724, "learning_rate": 1.1881914030819142e-05, "loss": 0.6442, "step": 15938 }, { "epoch": 0.4653586756591048, "grad_norm": 0.6111604915539003, "learning_rate": 1.1881265206812654e-05, "loss": 0.5781, "step": 15939 }, { "epoch": 0.4653878718869522, "grad_norm": 0.6510435936559412, "learning_rate": 1.1880616382806164e-05, "loss": 0.6848, "step": 15940 }, { "epoch": 0.46541706811479955, "grad_norm": 0.6679778484808322, "learning_rate": 1.1879967558799676e-05, "loss": 0.6996, "step": 15941 }, { "epoch": 0.4654462643426469, "grad_norm": 0.5981082091586173, "learning_rate": 1.1879318734793188e-05, "loss": 0.5204, "step": 15942 }, { "epoch": 0.46547546057049427, "grad_norm": 0.6309011331252368, "learning_rate": 1.18786699107867e-05, "loss": 0.5444, "step": 15943 }, { "epoch": 0.46550465679834163, "grad_norm": 0.6547474186873539, "learning_rate": 1.1878021086780212e-05, "loss": 0.6062, "step": 15944 }, { "epoch": 0.465533853026189, "grad_norm": 0.6108245149536932, "learning_rate": 1.1877372262773723e-05, "loss": 0.606, "step": 15945 }, { "epoch": 0.46556304925403635, "grad_norm": 0.657155659599098, "learning_rate": 1.1876723438767235e-05, "loss": 0.6301, "step": 15946 }, { "epoch": 0.4655922454818837, "grad_norm": 0.6940173244877517, "learning_rate": 1.1876074614760747e-05, "loss": 0.663, "step": 15947 }, { "epoch": 0.4656214417097311, "grad_norm": 0.606901651286273, "learning_rate": 1.1875425790754259e-05, "loss": 0.5541, "step": 15948 }, { "epoch": 0.46565063793757844, "grad_norm": 0.6035292258569732, "learning_rate": 1.1874776966747769e-05, "loss": 0.5835, "step": 15949 }, { "epoch": 0.46567983416542585, "grad_norm": 0.6566010051177745, "learning_rate": 1.1874128142741281e-05, "loss": 0.6095, "step": 15950 }, { "epoch": 0.4657090303932732, "grad_norm": 0.6427521065700658, "learning_rate": 1.1873479318734795e-05, "loss": 0.6626, "step": 15951 }, { "epoch": 0.4657382266211206, "grad_norm": 0.8887478879036514, "learning_rate": 1.1872830494728307e-05, "loss": 0.5928, "step": 15952 }, { "epoch": 0.46576742284896794, "grad_norm": 0.6917693277073055, "learning_rate": 1.1872181670721819e-05, "loss": 0.6644, "step": 15953 }, { "epoch": 0.4657966190768153, "grad_norm": 0.6930207586778503, "learning_rate": 1.1871532846715331e-05, "loss": 0.6243, "step": 15954 }, { "epoch": 0.46582581530466266, "grad_norm": 0.6928815029515332, "learning_rate": 1.1870884022708841e-05, "loss": 0.6644, "step": 15955 }, { "epoch": 0.46585501153251, "grad_norm": 0.6373754725244491, "learning_rate": 1.1870235198702353e-05, "loss": 0.6258, "step": 15956 }, { "epoch": 0.4658842077603574, "grad_norm": 0.6458351818127509, "learning_rate": 1.1869586374695865e-05, "loss": 0.6342, "step": 15957 }, { "epoch": 0.46591340398820474, "grad_norm": 0.6489829956531556, "learning_rate": 1.1868937550689377e-05, "loss": 0.6803, "step": 15958 }, { "epoch": 0.4659426002160521, "grad_norm": 0.6239114722248356, "learning_rate": 1.1868288726682888e-05, "loss": 0.5615, "step": 15959 }, { "epoch": 0.46597179644389947, "grad_norm": 0.653784978646627, "learning_rate": 1.18676399026764e-05, "loss": 0.6093, "step": 15960 }, { "epoch": 0.4660009926717468, "grad_norm": 0.62339999905864, "learning_rate": 1.1866991078669912e-05, "loss": 0.5594, "step": 15961 }, { "epoch": 0.4660301888995942, "grad_norm": 0.6647197957929716, "learning_rate": 1.1866342254663424e-05, "loss": 0.6314, "step": 15962 }, { "epoch": 0.46605938512744155, "grad_norm": 0.6113021459269827, "learning_rate": 1.1865693430656936e-05, "loss": 0.5475, "step": 15963 }, { "epoch": 0.4660885813552889, "grad_norm": 0.6777637364488414, "learning_rate": 1.1865044606650446e-05, "loss": 0.6214, "step": 15964 }, { "epoch": 0.4661177775831363, "grad_norm": 0.6337059385914968, "learning_rate": 1.1864395782643958e-05, "loss": 0.5774, "step": 15965 }, { "epoch": 0.46614697381098363, "grad_norm": 0.7160422980196188, "learning_rate": 1.186374695863747e-05, "loss": 0.6966, "step": 15966 }, { "epoch": 0.466176170038831, "grad_norm": 0.5882531229565227, "learning_rate": 1.1863098134630982e-05, "loss": 0.5312, "step": 15967 }, { "epoch": 0.46620536626667836, "grad_norm": 0.6558394258717078, "learning_rate": 1.1862449310624492e-05, "loss": 0.6544, "step": 15968 }, { "epoch": 0.4662345624945257, "grad_norm": 0.598728531900058, "learning_rate": 1.1861800486618004e-05, "loss": 0.5571, "step": 15969 }, { "epoch": 0.4662637587223731, "grad_norm": 0.643248688532792, "learning_rate": 1.1861151662611518e-05, "loss": 0.6493, "step": 15970 }, { "epoch": 0.46629295495022044, "grad_norm": 0.6593280953678031, "learning_rate": 1.186050283860503e-05, "loss": 0.6056, "step": 15971 }, { "epoch": 0.4663221511780678, "grad_norm": 0.808537958409218, "learning_rate": 1.1859854014598542e-05, "loss": 0.7404, "step": 15972 }, { "epoch": 0.46635134740591516, "grad_norm": 0.7020731919540415, "learning_rate": 1.1859205190592054e-05, "loss": 0.6527, "step": 15973 }, { "epoch": 0.4663805436337625, "grad_norm": 0.6262314030795787, "learning_rate": 1.1858556366585565e-05, "loss": 0.5692, "step": 15974 }, { "epoch": 0.4664097398616099, "grad_norm": 0.654917986012456, "learning_rate": 1.1857907542579077e-05, "loss": 0.6318, "step": 15975 }, { "epoch": 0.46643893608945725, "grad_norm": 0.6243711324609733, "learning_rate": 1.1857258718572589e-05, "loss": 0.572, "step": 15976 }, { "epoch": 0.4664681323173046, "grad_norm": 0.6525528426867598, "learning_rate": 1.18566098945661e-05, "loss": 0.5894, "step": 15977 }, { "epoch": 0.46649732854515197, "grad_norm": 0.6713543905669548, "learning_rate": 1.1855961070559611e-05, "loss": 0.623, "step": 15978 }, { "epoch": 0.46652652477299933, "grad_norm": 0.6320121312975329, "learning_rate": 1.1855312246553123e-05, "loss": 0.608, "step": 15979 }, { "epoch": 0.4665557210008467, "grad_norm": 0.6782890136822862, "learning_rate": 1.1854663422546635e-05, "loss": 0.6998, "step": 15980 }, { "epoch": 0.46658491722869405, "grad_norm": 0.6515010396648097, "learning_rate": 1.1854014598540147e-05, "loss": 0.598, "step": 15981 }, { "epoch": 0.4666141134565414, "grad_norm": 0.6167516411642168, "learning_rate": 1.185336577453366e-05, "loss": 0.5668, "step": 15982 }, { "epoch": 0.4666433096843888, "grad_norm": 0.6288551160486329, "learning_rate": 1.185271695052717e-05, "loss": 0.5778, "step": 15983 }, { "epoch": 0.46667250591223614, "grad_norm": 0.6183494456676397, "learning_rate": 1.1852068126520682e-05, "loss": 0.6176, "step": 15984 }, { "epoch": 0.4667017021400835, "grad_norm": 0.6599519353131745, "learning_rate": 1.1851419302514194e-05, "loss": 0.6656, "step": 15985 }, { "epoch": 0.46673089836793086, "grad_norm": 0.6351840418058932, "learning_rate": 1.1850770478507706e-05, "loss": 0.6481, "step": 15986 }, { "epoch": 0.4667600945957782, "grad_norm": 0.6447040250590159, "learning_rate": 1.1850121654501216e-05, "loss": 0.6334, "step": 15987 }, { "epoch": 0.4667892908236256, "grad_norm": 0.5902189608858351, "learning_rate": 1.1849472830494728e-05, "loss": 0.5075, "step": 15988 }, { "epoch": 0.46681848705147294, "grad_norm": 0.6624016718309729, "learning_rate": 1.1848824006488242e-05, "loss": 0.6502, "step": 15989 }, { "epoch": 0.4668476832793203, "grad_norm": 0.6613610228456506, "learning_rate": 1.1848175182481754e-05, "loss": 0.6358, "step": 15990 }, { "epoch": 0.46687687950716766, "grad_norm": 0.6405790597423073, "learning_rate": 1.1847526358475266e-05, "loss": 0.5795, "step": 15991 }, { "epoch": 0.466906075735015, "grad_norm": 0.6733794845269033, "learning_rate": 1.1846877534468778e-05, "loss": 0.6347, "step": 15992 }, { "epoch": 0.4669352719628624, "grad_norm": 0.5892549947607449, "learning_rate": 1.1846228710462288e-05, "loss": 0.5435, "step": 15993 }, { "epoch": 0.46696446819070975, "grad_norm": 0.6498144501586749, "learning_rate": 1.18455798864558e-05, "loss": 0.6096, "step": 15994 }, { "epoch": 0.4669936644185571, "grad_norm": 0.6744111547763636, "learning_rate": 1.1844931062449312e-05, "loss": 0.6899, "step": 15995 }, { "epoch": 0.46702286064640447, "grad_norm": 0.6424711962550151, "learning_rate": 1.1844282238442824e-05, "loss": 0.6008, "step": 15996 }, { "epoch": 0.46705205687425183, "grad_norm": 0.6640348689916475, "learning_rate": 1.1843633414436334e-05, "loss": 0.6253, "step": 15997 }, { "epoch": 0.4670812531020992, "grad_norm": 0.6065893376721428, "learning_rate": 1.1842984590429847e-05, "loss": 0.5586, "step": 15998 }, { "epoch": 0.46711044932994655, "grad_norm": 0.634427573936324, "learning_rate": 1.1842335766423359e-05, "loss": 0.6116, "step": 15999 }, { "epoch": 0.4671396455577939, "grad_norm": 0.6359659559198733, "learning_rate": 1.184168694241687e-05, "loss": 0.5697, "step": 16000 }, { "epoch": 0.4671688417856413, "grad_norm": 0.6241316128240902, "learning_rate": 1.1841038118410383e-05, "loss": 0.6075, "step": 16001 }, { "epoch": 0.46719803801348864, "grad_norm": 0.6118785711387448, "learning_rate": 1.1840389294403893e-05, "loss": 0.5492, "step": 16002 }, { "epoch": 0.467227234241336, "grad_norm": 0.728190562982717, "learning_rate": 1.1839740470397405e-05, "loss": 0.6833, "step": 16003 }, { "epoch": 0.46725643046918336, "grad_norm": 0.6351254195110698, "learning_rate": 1.1839091646390917e-05, "loss": 0.6181, "step": 16004 }, { "epoch": 0.4672856266970307, "grad_norm": 0.6648648081546091, "learning_rate": 1.1838442822384429e-05, "loss": 0.6133, "step": 16005 }, { "epoch": 0.4673148229248781, "grad_norm": 0.7061375795653595, "learning_rate": 1.183779399837794e-05, "loss": 0.7048, "step": 16006 }, { "epoch": 0.46734401915272544, "grad_norm": 0.6228063613127858, "learning_rate": 1.1837145174371451e-05, "loss": 0.6102, "step": 16007 }, { "epoch": 0.4673732153805728, "grad_norm": 0.7147756806017433, "learning_rate": 1.1836496350364965e-05, "loss": 0.6731, "step": 16008 }, { "epoch": 0.46740241160842017, "grad_norm": 0.6569330078444303, "learning_rate": 1.1835847526358477e-05, "loss": 0.6383, "step": 16009 }, { "epoch": 0.4674316078362676, "grad_norm": 0.6563312208542234, "learning_rate": 1.1835198702351989e-05, "loss": 0.6385, "step": 16010 }, { "epoch": 0.46746080406411494, "grad_norm": 0.6720186953857223, "learning_rate": 1.1834549878345501e-05, "loss": 0.6773, "step": 16011 }, { "epoch": 0.4674900002919623, "grad_norm": 0.6182401148657941, "learning_rate": 1.1833901054339012e-05, "loss": 0.5611, "step": 16012 }, { "epoch": 0.46751919651980967, "grad_norm": 0.6170210644956211, "learning_rate": 1.1833252230332524e-05, "loss": 0.5806, "step": 16013 }, { "epoch": 0.467548392747657, "grad_norm": 0.6521503260096847, "learning_rate": 1.1832603406326036e-05, "loss": 0.6328, "step": 16014 }, { "epoch": 0.4675775889755044, "grad_norm": 0.6709167222829208, "learning_rate": 1.1831954582319548e-05, "loss": 0.6192, "step": 16015 }, { "epoch": 0.46760678520335175, "grad_norm": 0.6415219941210885, "learning_rate": 1.1831305758313058e-05, "loss": 0.5937, "step": 16016 }, { "epoch": 0.4676359814311991, "grad_norm": 0.6335439574221687, "learning_rate": 1.183065693430657e-05, "loss": 0.6061, "step": 16017 }, { "epoch": 0.46766517765904647, "grad_norm": 0.6860934500497058, "learning_rate": 1.1830008110300082e-05, "loss": 0.6722, "step": 16018 }, { "epoch": 0.46769437388689383, "grad_norm": 0.6740063732943229, "learning_rate": 1.1829359286293594e-05, "loss": 0.6864, "step": 16019 }, { "epoch": 0.4677235701147412, "grad_norm": 0.6296839710976742, "learning_rate": 1.1828710462287104e-05, "loss": 0.5955, "step": 16020 }, { "epoch": 0.46775276634258856, "grad_norm": 0.6182001624250769, "learning_rate": 1.1828061638280616e-05, "loss": 0.5461, "step": 16021 }, { "epoch": 0.4677819625704359, "grad_norm": 0.664177182877433, "learning_rate": 1.1827412814274128e-05, "loss": 0.6062, "step": 16022 }, { "epoch": 0.4678111587982833, "grad_norm": 0.6244747883301229, "learning_rate": 1.182676399026764e-05, "loss": 0.6224, "step": 16023 }, { "epoch": 0.46784035502613064, "grad_norm": 0.7028250530875414, "learning_rate": 1.1826115166261152e-05, "loss": 0.6838, "step": 16024 }, { "epoch": 0.467869551253978, "grad_norm": 0.700786562014878, "learning_rate": 1.1825466342254663e-05, "loss": 0.7646, "step": 16025 }, { "epoch": 0.46789874748182536, "grad_norm": 0.6349155295470825, "learning_rate": 1.1824817518248176e-05, "loss": 0.5712, "step": 16026 }, { "epoch": 0.4679279437096727, "grad_norm": 0.7125336465074359, "learning_rate": 1.1824168694241689e-05, "loss": 0.6788, "step": 16027 }, { "epoch": 0.4679571399375201, "grad_norm": 0.5819527578636426, "learning_rate": 1.18235198702352e-05, "loss": 0.4976, "step": 16028 }, { "epoch": 0.46798633616536744, "grad_norm": 0.7695625564977446, "learning_rate": 1.1822871046228713e-05, "loss": 0.7572, "step": 16029 }, { "epoch": 0.4680155323932148, "grad_norm": 0.6526188664528256, "learning_rate": 1.1822222222222225e-05, "loss": 0.6383, "step": 16030 }, { "epoch": 0.46804472862106217, "grad_norm": 0.5878752813686637, "learning_rate": 1.1821573398215735e-05, "loss": 0.5361, "step": 16031 }, { "epoch": 0.46807392484890953, "grad_norm": 0.6525306434245214, "learning_rate": 1.1820924574209247e-05, "loss": 0.6432, "step": 16032 }, { "epoch": 0.4681031210767569, "grad_norm": 0.7159758876622967, "learning_rate": 1.1820275750202759e-05, "loss": 0.6316, "step": 16033 }, { "epoch": 0.46813231730460425, "grad_norm": 0.6341993074557629, "learning_rate": 1.1819626926196271e-05, "loss": 0.5939, "step": 16034 }, { "epoch": 0.4681615135324516, "grad_norm": 0.7080943147260317, "learning_rate": 1.1818978102189781e-05, "loss": 0.5972, "step": 16035 }, { "epoch": 0.468190709760299, "grad_norm": 0.6423293789762776, "learning_rate": 1.1818329278183293e-05, "loss": 0.5694, "step": 16036 }, { "epoch": 0.46821990598814633, "grad_norm": 0.6013110577769585, "learning_rate": 1.1817680454176805e-05, "loss": 0.5394, "step": 16037 }, { "epoch": 0.4682491022159937, "grad_norm": 0.6937543586258383, "learning_rate": 1.1817031630170317e-05, "loss": 0.7287, "step": 16038 }, { "epoch": 0.46827829844384106, "grad_norm": 0.6615249739413707, "learning_rate": 1.1816382806163828e-05, "loss": 0.6463, "step": 16039 }, { "epoch": 0.4683074946716884, "grad_norm": 0.6451653209512642, "learning_rate": 1.181573398215734e-05, "loss": 0.6904, "step": 16040 }, { "epoch": 0.4683366908995358, "grad_norm": 0.7129294038675738, "learning_rate": 1.1815085158150852e-05, "loss": 0.7514, "step": 16041 }, { "epoch": 0.46836588712738314, "grad_norm": 0.6321910672673058, "learning_rate": 1.1814436334144364e-05, "loss": 0.6186, "step": 16042 }, { "epoch": 0.4683950833552305, "grad_norm": 0.6648702558337698, "learning_rate": 1.1813787510137876e-05, "loss": 0.6268, "step": 16043 }, { "epoch": 0.46842427958307786, "grad_norm": 0.6900340785219513, "learning_rate": 1.1813138686131386e-05, "loss": 0.6162, "step": 16044 }, { "epoch": 0.4684534758109252, "grad_norm": 0.7314760699264193, "learning_rate": 1.18124898621249e-05, "loss": 0.714, "step": 16045 }, { "epoch": 0.4684826720387726, "grad_norm": 0.6536285183507312, "learning_rate": 1.1811841038118412e-05, "loss": 0.6391, "step": 16046 }, { "epoch": 0.46851186826661995, "grad_norm": 0.6722222182559998, "learning_rate": 1.1811192214111924e-05, "loss": 0.6053, "step": 16047 }, { "epoch": 0.4685410644944673, "grad_norm": 0.6240464548596154, "learning_rate": 1.1810543390105436e-05, "loss": 0.5972, "step": 16048 }, { "epoch": 0.46857026072231467, "grad_norm": 0.6581959419542751, "learning_rate": 1.1809894566098948e-05, "loss": 0.6486, "step": 16049 }, { "epoch": 0.46859945695016203, "grad_norm": 0.6189253632010768, "learning_rate": 1.1809245742092458e-05, "loss": 0.5745, "step": 16050 }, { "epoch": 0.4686286531780094, "grad_norm": 0.6180245419954269, "learning_rate": 1.180859691808597e-05, "loss": 0.5848, "step": 16051 }, { "epoch": 0.46865784940585675, "grad_norm": 0.608396786670278, "learning_rate": 1.1807948094079482e-05, "loss": 0.565, "step": 16052 }, { "epoch": 0.4686870456337041, "grad_norm": 0.5814241093122717, "learning_rate": 1.1807299270072994e-05, "loss": 0.5438, "step": 16053 }, { "epoch": 0.4687162418615515, "grad_norm": 0.6303908258513213, "learning_rate": 1.1806650446066505e-05, "loss": 0.6183, "step": 16054 }, { "epoch": 0.46874543808939884, "grad_norm": 0.6345714532707536, "learning_rate": 1.1806001622060017e-05, "loss": 0.5853, "step": 16055 }, { "epoch": 0.4687746343172462, "grad_norm": 0.6173975171343645, "learning_rate": 1.1805352798053529e-05, "loss": 0.5972, "step": 16056 }, { "epoch": 0.46880383054509356, "grad_norm": 0.6540449427781784, "learning_rate": 1.1804703974047041e-05, "loss": 0.6224, "step": 16057 }, { "epoch": 0.4688330267729409, "grad_norm": 0.6057022126533061, "learning_rate": 1.1804055150040551e-05, "loss": 0.5452, "step": 16058 }, { "epoch": 0.4688622230007883, "grad_norm": 0.6334775443189212, "learning_rate": 1.1803406326034063e-05, "loss": 0.6134, "step": 16059 }, { "epoch": 0.46889141922863564, "grad_norm": 0.6955232578325776, "learning_rate": 1.1802757502027575e-05, "loss": 0.6608, "step": 16060 }, { "epoch": 0.468920615456483, "grad_norm": 0.6354818720078352, "learning_rate": 1.1802108678021087e-05, "loss": 0.6153, "step": 16061 }, { "epoch": 0.46894981168433036, "grad_norm": 0.6425135330870696, "learning_rate": 1.18014598540146e-05, "loss": 0.586, "step": 16062 }, { "epoch": 0.4689790079121777, "grad_norm": 0.657453702949538, "learning_rate": 1.180081103000811e-05, "loss": 0.6511, "step": 16063 }, { "epoch": 0.4690082041400251, "grad_norm": 0.6441529148856465, "learning_rate": 1.1800162206001623e-05, "loss": 0.5927, "step": 16064 }, { "epoch": 0.46903740036787245, "grad_norm": 0.6396047925207248, "learning_rate": 1.1799513381995135e-05, "loss": 0.5988, "step": 16065 }, { "epoch": 0.4690665965957198, "grad_norm": 0.7090192344675056, "learning_rate": 1.1798864557988647e-05, "loss": 0.7041, "step": 16066 }, { "epoch": 0.46909579282356717, "grad_norm": 0.6862714955809343, "learning_rate": 1.179821573398216e-05, "loss": 0.6858, "step": 16067 }, { "epoch": 0.46912498905141453, "grad_norm": 0.6313641984534933, "learning_rate": 1.1797566909975671e-05, "loss": 0.5698, "step": 16068 }, { "epoch": 0.4691541852792619, "grad_norm": 0.6424819733369828, "learning_rate": 1.1796918085969182e-05, "loss": 0.6364, "step": 16069 }, { "epoch": 0.46918338150710925, "grad_norm": 0.5886093768278712, "learning_rate": 1.1796269261962694e-05, "loss": 0.4818, "step": 16070 }, { "epoch": 0.46921257773495667, "grad_norm": 0.6808625726159797, "learning_rate": 1.1795620437956206e-05, "loss": 0.7003, "step": 16071 }, { "epoch": 0.46924177396280403, "grad_norm": 0.6211717577710391, "learning_rate": 1.1794971613949718e-05, "loss": 0.6135, "step": 16072 }, { "epoch": 0.4692709701906514, "grad_norm": 0.6419415394460187, "learning_rate": 1.1794322789943228e-05, "loss": 0.5865, "step": 16073 }, { "epoch": 0.46930016641849875, "grad_norm": 0.5965269978777906, "learning_rate": 1.179367396593674e-05, "loss": 0.5424, "step": 16074 }, { "epoch": 0.4693293626463461, "grad_norm": 0.6815782668558409, "learning_rate": 1.1793025141930252e-05, "loss": 0.6656, "step": 16075 }, { "epoch": 0.4693585588741935, "grad_norm": 0.6268943738391971, "learning_rate": 1.1792376317923764e-05, "loss": 0.5892, "step": 16076 }, { "epoch": 0.46938775510204084, "grad_norm": 0.6658449873805323, "learning_rate": 1.1791727493917275e-05, "loss": 0.6162, "step": 16077 }, { "epoch": 0.4694169513298882, "grad_norm": 0.7096912236931748, "learning_rate": 1.1791078669910787e-05, "loss": 0.6973, "step": 16078 }, { "epoch": 0.46944614755773556, "grad_norm": 0.6955760467928729, "learning_rate": 1.1790429845904299e-05, "loss": 0.7554, "step": 16079 }, { "epoch": 0.4694753437855829, "grad_norm": 0.6400626463861624, "learning_rate": 1.178978102189781e-05, "loss": 0.6457, "step": 16080 }, { "epoch": 0.4695045400134303, "grad_norm": 0.6951214384031733, "learning_rate": 1.1789132197891323e-05, "loss": 0.7288, "step": 16081 }, { "epoch": 0.46953373624127764, "grad_norm": 0.5833229407427768, "learning_rate": 1.1788483373884833e-05, "loss": 0.5633, "step": 16082 }, { "epoch": 0.469562932469125, "grad_norm": 1.4736870155447055, "learning_rate": 1.1787834549878347e-05, "loss": 0.6706, "step": 16083 }, { "epoch": 0.46959212869697237, "grad_norm": 0.6359373467856748, "learning_rate": 1.1787185725871859e-05, "loss": 0.6204, "step": 16084 }, { "epoch": 0.4696213249248197, "grad_norm": 0.6617995493285536, "learning_rate": 1.1786536901865371e-05, "loss": 0.6721, "step": 16085 }, { "epoch": 0.4696505211526671, "grad_norm": 0.6754605011007903, "learning_rate": 1.1785888077858883e-05, "loss": 0.6578, "step": 16086 }, { "epoch": 0.46967971738051445, "grad_norm": 0.6799069560401172, "learning_rate": 1.1785239253852395e-05, "loss": 0.6693, "step": 16087 }, { "epoch": 0.4697089136083618, "grad_norm": 0.6608786203333122, "learning_rate": 1.1784590429845905e-05, "loss": 0.6317, "step": 16088 }, { "epoch": 0.4697381098362092, "grad_norm": 0.6454732037879516, "learning_rate": 1.1783941605839417e-05, "loss": 0.5772, "step": 16089 }, { "epoch": 0.46976730606405653, "grad_norm": 0.6603951452465311, "learning_rate": 1.178329278183293e-05, "loss": 0.6353, "step": 16090 }, { "epoch": 0.4697965022919039, "grad_norm": 0.6631497752729973, "learning_rate": 1.1782643957826441e-05, "loss": 0.6776, "step": 16091 }, { "epoch": 0.46982569851975126, "grad_norm": 0.6664922545745522, "learning_rate": 1.1781995133819952e-05, "loss": 0.6844, "step": 16092 }, { "epoch": 0.4698548947475986, "grad_norm": 0.6764800470553808, "learning_rate": 1.1781346309813464e-05, "loss": 0.6787, "step": 16093 }, { "epoch": 0.469884090975446, "grad_norm": 0.6624557775605379, "learning_rate": 1.1780697485806976e-05, "loss": 0.6633, "step": 16094 }, { "epoch": 0.46991328720329334, "grad_norm": 0.6353974468886903, "learning_rate": 1.1780048661800488e-05, "loss": 0.5737, "step": 16095 }, { "epoch": 0.4699424834311407, "grad_norm": 0.6032298603364769, "learning_rate": 1.1779399837793998e-05, "loss": 0.5645, "step": 16096 }, { "epoch": 0.46997167965898806, "grad_norm": 0.6111042028043727, "learning_rate": 1.177875101378751e-05, "loss": 0.5576, "step": 16097 }, { "epoch": 0.4700008758868354, "grad_norm": 0.7435041591391007, "learning_rate": 1.1778102189781022e-05, "loss": 0.6464, "step": 16098 }, { "epoch": 0.4700300721146828, "grad_norm": 0.6261029347046357, "learning_rate": 1.1777453365774534e-05, "loss": 0.5997, "step": 16099 }, { "epoch": 0.47005926834253015, "grad_norm": 0.6548129735112121, "learning_rate": 1.1776804541768046e-05, "loss": 0.6468, "step": 16100 }, { "epoch": 0.4700884645703775, "grad_norm": 0.6357866628357496, "learning_rate": 1.1776155717761557e-05, "loss": 0.5889, "step": 16101 }, { "epoch": 0.47011766079822487, "grad_norm": 0.6485551640484859, "learning_rate": 1.177550689375507e-05, "loss": 0.6069, "step": 16102 }, { "epoch": 0.47014685702607223, "grad_norm": 0.7138196645871221, "learning_rate": 1.1774858069748582e-05, "loss": 0.65, "step": 16103 }, { "epoch": 0.4701760532539196, "grad_norm": 0.6438125309386127, "learning_rate": 1.1774209245742094e-05, "loss": 0.6506, "step": 16104 }, { "epoch": 0.47020524948176695, "grad_norm": 0.6297909383572811, "learning_rate": 1.1773560421735606e-05, "loss": 0.6053, "step": 16105 }, { "epoch": 0.4702344457096143, "grad_norm": 0.6457566722600079, "learning_rate": 1.1772911597729118e-05, "loss": 0.5795, "step": 16106 }, { "epoch": 0.4702636419374617, "grad_norm": 0.6070338444635538, "learning_rate": 1.1772262773722629e-05, "loss": 0.5939, "step": 16107 }, { "epoch": 0.47029283816530904, "grad_norm": 0.6547161465456592, "learning_rate": 1.177161394971614e-05, "loss": 0.6394, "step": 16108 }, { "epoch": 0.4703220343931564, "grad_norm": 0.6565086675707503, "learning_rate": 1.1770965125709653e-05, "loss": 0.6243, "step": 16109 }, { "epoch": 0.47035123062100376, "grad_norm": 0.5959352905095142, "learning_rate": 1.1770316301703165e-05, "loss": 0.5607, "step": 16110 }, { "epoch": 0.4703804268488511, "grad_norm": 0.632517015783568, "learning_rate": 1.1769667477696675e-05, "loss": 0.5995, "step": 16111 }, { "epoch": 0.4704096230766985, "grad_norm": 0.6401098934833476, "learning_rate": 1.1769018653690187e-05, "loss": 0.6039, "step": 16112 }, { "epoch": 0.47043881930454584, "grad_norm": 0.6395632446696353, "learning_rate": 1.1768369829683699e-05, "loss": 0.6255, "step": 16113 }, { "epoch": 0.4704680155323932, "grad_norm": 0.6693828469686476, "learning_rate": 1.1767721005677211e-05, "loss": 0.616, "step": 16114 }, { "epoch": 0.47049721176024056, "grad_norm": 0.6188268032706175, "learning_rate": 1.1767072181670722e-05, "loss": 0.577, "step": 16115 }, { "epoch": 0.4705264079880879, "grad_norm": 0.682873800840303, "learning_rate": 1.1766423357664234e-05, "loss": 0.6715, "step": 16116 }, { "epoch": 0.4705556042159353, "grad_norm": 0.6866934774906143, "learning_rate": 1.1765774533657746e-05, "loss": 0.7232, "step": 16117 }, { "epoch": 0.47058480044378265, "grad_norm": 0.6278774685900436, "learning_rate": 1.1765125709651258e-05, "loss": 0.5987, "step": 16118 }, { "epoch": 0.47061399667163, "grad_norm": 0.6036344840955521, "learning_rate": 1.176447688564477e-05, "loss": 0.5723, "step": 16119 }, { "epoch": 0.47064319289947737, "grad_norm": 0.6030960932668625, "learning_rate": 1.176382806163828e-05, "loss": 0.5865, "step": 16120 }, { "epoch": 0.47067238912732473, "grad_norm": 0.6407802242368799, "learning_rate": 1.1763179237631794e-05, "loss": 0.603, "step": 16121 }, { "epoch": 0.4707015853551721, "grad_norm": 0.6588411172389577, "learning_rate": 1.1762530413625306e-05, "loss": 0.6789, "step": 16122 }, { "epoch": 0.47073078158301945, "grad_norm": 0.605862886897075, "learning_rate": 1.1761881589618818e-05, "loss": 0.5751, "step": 16123 }, { "epoch": 0.4707599778108668, "grad_norm": 0.6511173350438593, "learning_rate": 1.176123276561233e-05, "loss": 0.6327, "step": 16124 }, { "epoch": 0.4707891740387142, "grad_norm": 0.6353271136217293, "learning_rate": 1.1760583941605842e-05, "loss": 0.6205, "step": 16125 }, { "epoch": 0.47081837026656154, "grad_norm": 0.6260888897084836, "learning_rate": 1.1759935117599352e-05, "loss": 0.6112, "step": 16126 }, { "epoch": 0.4708475664944089, "grad_norm": 0.6086332435378591, "learning_rate": 1.1759286293592864e-05, "loss": 0.5795, "step": 16127 }, { "epoch": 0.47087676272225626, "grad_norm": 0.6090583425739515, "learning_rate": 1.1758637469586376e-05, "loss": 0.5933, "step": 16128 }, { "epoch": 0.4709059589501036, "grad_norm": 0.6868039680620295, "learning_rate": 1.1757988645579888e-05, "loss": 0.7204, "step": 16129 }, { "epoch": 0.470935155177951, "grad_norm": 0.6575380830910832, "learning_rate": 1.1757339821573399e-05, "loss": 0.6233, "step": 16130 }, { "epoch": 0.4709643514057984, "grad_norm": 0.6450116816407538, "learning_rate": 1.175669099756691e-05, "loss": 0.6155, "step": 16131 }, { "epoch": 0.47099354763364576, "grad_norm": 0.6177808615838928, "learning_rate": 1.1756042173560423e-05, "loss": 0.5972, "step": 16132 }, { "epoch": 0.4710227438614931, "grad_norm": 0.5780984068372038, "learning_rate": 1.1755393349553935e-05, "loss": 0.5311, "step": 16133 }, { "epoch": 0.4710519400893405, "grad_norm": 0.6679367083910546, "learning_rate": 1.1754744525547445e-05, "loss": 0.6575, "step": 16134 }, { "epoch": 0.47108113631718784, "grad_norm": 0.6903776582740162, "learning_rate": 1.1754095701540957e-05, "loss": 0.6178, "step": 16135 }, { "epoch": 0.4711103325450352, "grad_norm": 0.646059049009828, "learning_rate": 1.1753446877534469e-05, "loss": 0.5934, "step": 16136 }, { "epoch": 0.47113952877288257, "grad_norm": 0.5865950805846831, "learning_rate": 1.1752798053527981e-05, "loss": 0.5356, "step": 16137 }, { "epoch": 0.4711687250007299, "grad_norm": 0.6032915399314019, "learning_rate": 1.1752149229521493e-05, "loss": 0.6107, "step": 16138 }, { "epoch": 0.4711979212285773, "grad_norm": 0.7110070709978211, "learning_rate": 1.1751500405515003e-05, "loss": 0.7192, "step": 16139 }, { "epoch": 0.47122711745642465, "grad_norm": 0.698568449937523, "learning_rate": 1.1750851581508517e-05, "loss": 0.7554, "step": 16140 }, { "epoch": 0.471256313684272, "grad_norm": 0.6265746290480434, "learning_rate": 1.1750202757502029e-05, "loss": 0.6339, "step": 16141 }, { "epoch": 0.47128550991211937, "grad_norm": 0.643421348523353, "learning_rate": 1.1749553933495541e-05, "loss": 0.598, "step": 16142 }, { "epoch": 0.47131470613996673, "grad_norm": 0.6494981799544629, "learning_rate": 1.1748905109489053e-05, "loss": 0.6229, "step": 16143 }, { "epoch": 0.4713439023678141, "grad_norm": 0.6816794707474132, "learning_rate": 1.1748256285482564e-05, "loss": 0.6328, "step": 16144 }, { "epoch": 0.47137309859566145, "grad_norm": 0.632538475611794, "learning_rate": 1.1747607461476076e-05, "loss": 0.6162, "step": 16145 }, { "epoch": 0.4714022948235088, "grad_norm": 0.6496859065790703, "learning_rate": 1.1746958637469588e-05, "loss": 0.6014, "step": 16146 }, { "epoch": 0.4714314910513562, "grad_norm": 0.6848982911451149, "learning_rate": 1.17463098134631e-05, "loss": 0.702, "step": 16147 }, { "epoch": 0.47146068727920354, "grad_norm": 0.6411325777120557, "learning_rate": 1.1745660989456612e-05, "loss": 0.5752, "step": 16148 }, { "epoch": 0.4714898835070509, "grad_norm": 0.6999741654928359, "learning_rate": 1.1745012165450122e-05, "loss": 0.7079, "step": 16149 }, { "epoch": 0.47151907973489826, "grad_norm": 0.5984520343631894, "learning_rate": 1.1744363341443634e-05, "loss": 0.532, "step": 16150 }, { "epoch": 0.4715482759627456, "grad_norm": 0.7165905218249419, "learning_rate": 1.1743714517437146e-05, "loss": 0.715, "step": 16151 }, { "epoch": 0.471577472190593, "grad_norm": 0.6617201667913452, "learning_rate": 1.1743065693430658e-05, "loss": 0.6378, "step": 16152 }, { "epoch": 0.47160666841844034, "grad_norm": 0.627498539935978, "learning_rate": 1.1742416869424168e-05, "loss": 0.5816, "step": 16153 }, { "epoch": 0.4716358646462877, "grad_norm": 0.678381505743562, "learning_rate": 1.174176804541768e-05, "loss": 0.6379, "step": 16154 }, { "epoch": 0.47166506087413507, "grad_norm": 0.6587914131866441, "learning_rate": 1.1741119221411192e-05, "loss": 0.6264, "step": 16155 }, { "epoch": 0.47169425710198243, "grad_norm": 0.646734091869967, "learning_rate": 1.1740470397404704e-05, "loss": 0.6333, "step": 16156 }, { "epoch": 0.4717234533298298, "grad_norm": 0.6337139253017153, "learning_rate": 1.1739821573398216e-05, "loss": 0.6165, "step": 16157 }, { "epoch": 0.47175264955767715, "grad_norm": 0.6198472767555058, "learning_rate": 1.1739172749391727e-05, "loss": 0.5712, "step": 16158 }, { "epoch": 0.4717818457855245, "grad_norm": 0.6945090386115902, "learning_rate": 1.173852392538524e-05, "loss": 0.695, "step": 16159 }, { "epoch": 0.4718110420133719, "grad_norm": 0.639281658222514, "learning_rate": 1.1737875101378753e-05, "loss": 0.5758, "step": 16160 }, { "epoch": 0.47184023824121923, "grad_norm": 0.6836213578303612, "learning_rate": 1.1737226277372265e-05, "loss": 0.6137, "step": 16161 }, { "epoch": 0.4718694344690666, "grad_norm": 0.6508084662905647, "learning_rate": 1.1736577453365777e-05, "loss": 0.6205, "step": 16162 }, { "epoch": 0.47189863069691396, "grad_norm": 0.6840165210415912, "learning_rate": 1.1735928629359287e-05, "loss": 0.6707, "step": 16163 }, { "epoch": 0.4719278269247613, "grad_norm": 0.6912044531796797, "learning_rate": 1.1735279805352799e-05, "loss": 0.6435, "step": 16164 }, { "epoch": 0.4719570231526087, "grad_norm": 0.5878239557650519, "learning_rate": 1.1734630981346311e-05, "loss": 0.5138, "step": 16165 }, { "epoch": 0.47198621938045604, "grad_norm": 0.6644391316152553, "learning_rate": 1.1733982157339823e-05, "loss": 0.6189, "step": 16166 }, { "epoch": 0.4720154156083034, "grad_norm": 0.6263010844902571, "learning_rate": 1.1733333333333335e-05, "loss": 0.6149, "step": 16167 }, { "epoch": 0.47204461183615076, "grad_norm": 0.6355829270892283, "learning_rate": 1.1732684509326845e-05, "loss": 0.585, "step": 16168 }, { "epoch": 0.4720738080639981, "grad_norm": 0.6053463505360689, "learning_rate": 1.1732035685320357e-05, "loss": 0.5352, "step": 16169 }, { "epoch": 0.4721030042918455, "grad_norm": 0.6880059022765065, "learning_rate": 1.173138686131387e-05, "loss": 0.6637, "step": 16170 }, { "epoch": 0.47213220051969285, "grad_norm": 0.6193164926366709, "learning_rate": 1.1730738037307381e-05, "loss": 0.5898, "step": 16171 }, { "epoch": 0.4721613967475402, "grad_norm": 0.6943621481405824, "learning_rate": 1.1730089213300892e-05, "loss": 0.6749, "step": 16172 }, { "epoch": 0.47219059297538757, "grad_norm": 0.6555477835557705, "learning_rate": 1.1729440389294404e-05, "loss": 0.5869, "step": 16173 }, { "epoch": 0.47221978920323493, "grad_norm": 0.6041548564854786, "learning_rate": 1.1728791565287916e-05, "loss": 0.6152, "step": 16174 }, { "epoch": 0.4722489854310823, "grad_norm": 0.6454477835348338, "learning_rate": 1.1728142741281428e-05, "loss": 0.5709, "step": 16175 }, { "epoch": 0.47227818165892965, "grad_norm": 0.6550730453595359, "learning_rate": 1.172749391727494e-05, "loss": 0.641, "step": 16176 }, { "epoch": 0.472307377886777, "grad_norm": 0.6128085719783021, "learning_rate": 1.1726845093268454e-05, "loss": 0.6133, "step": 16177 }, { "epoch": 0.4723365741146244, "grad_norm": 0.6458544271395431, "learning_rate": 1.1726196269261964e-05, "loss": 0.6114, "step": 16178 }, { "epoch": 0.47236577034247174, "grad_norm": 0.6884500026647408, "learning_rate": 1.1725547445255476e-05, "loss": 0.6372, "step": 16179 }, { "epoch": 0.4723949665703191, "grad_norm": 0.6604989072350035, "learning_rate": 1.1724898621248988e-05, "loss": 0.6531, "step": 16180 }, { "epoch": 0.47242416279816646, "grad_norm": 0.6315916627782584, "learning_rate": 1.17242497972425e-05, "loss": 0.6007, "step": 16181 }, { "epoch": 0.4724533590260138, "grad_norm": 0.6611849926305282, "learning_rate": 1.172360097323601e-05, "loss": 0.6059, "step": 16182 }, { "epoch": 0.4724825552538612, "grad_norm": 0.6764247260641824, "learning_rate": 1.1722952149229522e-05, "loss": 0.6281, "step": 16183 }, { "epoch": 0.47251175148170854, "grad_norm": 0.639024487513703, "learning_rate": 1.1722303325223034e-05, "loss": 0.6178, "step": 16184 }, { "epoch": 0.4725409477095559, "grad_norm": 0.6896094654825616, "learning_rate": 1.1721654501216546e-05, "loss": 0.6845, "step": 16185 }, { "epoch": 0.47257014393740326, "grad_norm": 0.6122883655587247, "learning_rate": 1.1721005677210058e-05, "loss": 0.5284, "step": 16186 }, { "epoch": 0.4725993401652506, "grad_norm": 0.6054836266816366, "learning_rate": 1.1720356853203569e-05, "loss": 0.556, "step": 16187 }, { "epoch": 0.472628536393098, "grad_norm": 0.6883524186459737, "learning_rate": 1.1719708029197081e-05, "loss": 0.6658, "step": 16188 }, { "epoch": 0.47265773262094535, "grad_norm": 0.6777241467320879, "learning_rate": 1.1719059205190593e-05, "loss": 0.6952, "step": 16189 }, { "epoch": 0.4726869288487927, "grad_norm": 0.6483825880445575, "learning_rate": 1.1718410381184105e-05, "loss": 0.6248, "step": 16190 }, { "epoch": 0.4727161250766401, "grad_norm": 0.7042310099803303, "learning_rate": 1.1717761557177615e-05, "loss": 0.6272, "step": 16191 }, { "epoch": 0.4727453213044875, "grad_norm": 0.6708667410108833, "learning_rate": 1.1717112733171127e-05, "loss": 0.6086, "step": 16192 }, { "epoch": 0.47277451753233485, "grad_norm": 0.6255286487275437, "learning_rate": 1.171646390916464e-05, "loss": 0.598, "step": 16193 }, { "epoch": 0.4728037137601822, "grad_norm": 0.609205144273357, "learning_rate": 1.1715815085158151e-05, "loss": 0.5596, "step": 16194 }, { "epoch": 0.47283290998802957, "grad_norm": 0.6857426199558917, "learning_rate": 1.1715166261151663e-05, "loss": 0.6591, "step": 16195 }, { "epoch": 0.47286210621587693, "grad_norm": 0.6414423212222289, "learning_rate": 1.1714517437145177e-05, "loss": 0.6202, "step": 16196 }, { "epoch": 0.4728913024437243, "grad_norm": 0.6618979944922327, "learning_rate": 1.1713868613138687e-05, "loss": 0.6196, "step": 16197 }, { "epoch": 0.47292049867157165, "grad_norm": 0.5851456286644434, "learning_rate": 1.17132197891322e-05, "loss": 0.5172, "step": 16198 }, { "epoch": 0.472949694899419, "grad_norm": 0.6977701348058726, "learning_rate": 1.1712570965125711e-05, "loss": 0.6022, "step": 16199 }, { "epoch": 0.4729788911272664, "grad_norm": 0.6557987964982074, "learning_rate": 1.1711922141119223e-05, "loss": 0.6237, "step": 16200 }, { "epoch": 0.47300808735511374, "grad_norm": 0.5973687261029258, "learning_rate": 1.1711273317112734e-05, "loss": 0.5738, "step": 16201 }, { "epoch": 0.4730372835829611, "grad_norm": 0.6629660300966213, "learning_rate": 1.1710624493106246e-05, "loss": 0.6679, "step": 16202 }, { "epoch": 0.47306647981080846, "grad_norm": 0.5813571112122716, "learning_rate": 1.1709975669099758e-05, "loss": 0.4983, "step": 16203 }, { "epoch": 0.4730956760386558, "grad_norm": 0.6089193895001983, "learning_rate": 1.170932684509327e-05, "loss": 0.5814, "step": 16204 }, { "epoch": 0.4731248722665032, "grad_norm": 0.6659002261153704, "learning_rate": 1.1708678021086782e-05, "loss": 0.6779, "step": 16205 }, { "epoch": 0.47315406849435054, "grad_norm": 0.6708007800781218, "learning_rate": 1.1708029197080292e-05, "loss": 0.6901, "step": 16206 }, { "epoch": 0.4731832647221979, "grad_norm": 0.6272903946520416, "learning_rate": 1.1707380373073804e-05, "loss": 0.582, "step": 16207 }, { "epoch": 0.47321246095004527, "grad_norm": 0.6280541984430662, "learning_rate": 1.1706731549067316e-05, "loss": 0.5896, "step": 16208 }, { "epoch": 0.4732416571778926, "grad_norm": 0.6044384583253525, "learning_rate": 1.1706082725060828e-05, "loss": 0.5912, "step": 16209 }, { "epoch": 0.47327085340574, "grad_norm": 0.6343930184467045, "learning_rate": 1.1705433901054339e-05, "loss": 0.64, "step": 16210 }, { "epoch": 0.47330004963358735, "grad_norm": 0.5965625285314999, "learning_rate": 1.170478507704785e-05, "loss": 0.5722, "step": 16211 }, { "epoch": 0.4733292458614347, "grad_norm": 0.6566700691653024, "learning_rate": 1.1704136253041363e-05, "loss": 0.6354, "step": 16212 }, { "epoch": 0.47335844208928207, "grad_norm": 0.6187295254154155, "learning_rate": 1.1703487429034875e-05, "loss": 0.5872, "step": 16213 }, { "epoch": 0.47338763831712943, "grad_norm": 0.6373168071147944, "learning_rate": 1.1702838605028387e-05, "loss": 0.6322, "step": 16214 }, { "epoch": 0.4734168345449768, "grad_norm": 0.5849961461469492, "learning_rate": 1.17021897810219e-05, "loss": 0.5639, "step": 16215 }, { "epoch": 0.47344603077282416, "grad_norm": 0.6462725511379231, "learning_rate": 1.170154095701541e-05, "loss": 0.633, "step": 16216 }, { "epoch": 0.4734752270006715, "grad_norm": 0.6939383900935058, "learning_rate": 1.1700892133008923e-05, "loss": 0.5906, "step": 16217 }, { "epoch": 0.4735044232285189, "grad_norm": 0.6101342864924042, "learning_rate": 1.1700243309002435e-05, "loss": 0.5638, "step": 16218 }, { "epoch": 0.47353361945636624, "grad_norm": 0.6620177258543425, "learning_rate": 1.1699594484995947e-05, "loss": 0.6816, "step": 16219 }, { "epoch": 0.4735628156842136, "grad_norm": 0.6098635479101039, "learning_rate": 1.1698945660989457e-05, "loss": 0.5854, "step": 16220 }, { "epoch": 0.47359201191206096, "grad_norm": 0.6420884084311309, "learning_rate": 1.169829683698297e-05, "loss": 0.6261, "step": 16221 }, { "epoch": 0.4736212081399083, "grad_norm": 0.6418937142327679, "learning_rate": 1.1697648012976481e-05, "loss": 0.6464, "step": 16222 }, { "epoch": 0.4736504043677557, "grad_norm": 0.6519173484017775, "learning_rate": 1.1696999188969993e-05, "loss": 0.6598, "step": 16223 }, { "epoch": 0.47367960059560305, "grad_norm": 0.6441961030571453, "learning_rate": 1.1696350364963505e-05, "loss": 0.6605, "step": 16224 }, { "epoch": 0.4737087968234504, "grad_norm": 0.6157045993633794, "learning_rate": 1.1695701540957016e-05, "loss": 0.5952, "step": 16225 }, { "epoch": 0.47373799305129777, "grad_norm": 0.6409512837421413, "learning_rate": 1.1695052716950528e-05, "loss": 0.6192, "step": 16226 }, { "epoch": 0.47376718927914513, "grad_norm": 0.6622035156992857, "learning_rate": 1.169440389294404e-05, "loss": 0.6622, "step": 16227 }, { "epoch": 0.4737963855069925, "grad_norm": 0.6545604431141404, "learning_rate": 1.1693755068937552e-05, "loss": 0.5868, "step": 16228 }, { "epoch": 0.47382558173483985, "grad_norm": 0.6132794109970855, "learning_rate": 1.1693106244931062e-05, "loss": 0.5649, "step": 16229 }, { "epoch": 0.4738547779626872, "grad_norm": 0.6259630686857215, "learning_rate": 1.1692457420924574e-05, "loss": 0.6111, "step": 16230 }, { "epoch": 0.4738839741905346, "grad_norm": 0.6456356345058106, "learning_rate": 1.1691808596918086e-05, "loss": 0.5563, "step": 16231 }, { "epoch": 0.47391317041838193, "grad_norm": 0.6670971477061656, "learning_rate": 1.1691159772911598e-05, "loss": 0.6597, "step": 16232 }, { "epoch": 0.4739423666462293, "grad_norm": 0.6725816925631747, "learning_rate": 1.169051094890511e-05, "loss": 0.677, "step": 16233 }, { "epoch": 0.47397156287407666, "grad_norm": 0.6720592771829973, "learning_rate": 1.1689862124898624e-05, "loss": 0.6893, "step": 16234 }, { "epoch": 0.474000759101924, "grad_norm": 0.6206188005273665, "learning_rate": 1.1689213300892134e-05, "loss": 0.5714, "step": 16235 }, { "epoch": 0.4740299553297714, "grad_norm": 0.7066654122136792, "learning_rate": 1.1688564476885646e-05, "loss": 0.7137, "step": 16236 }, { "epoch": 0.47405915155761874, "grad_norm": 0.6716792444341212, "learning_rate": 1.1687915652879158e-05, "loss": 0.6045, "step": 16237 }, { "epoch": 0.4740883477854661, "grad_norm": 0.6100609550397933, "learning_rate": 1.168726682887267e-05, "loss": 0.5706, "step": 16238 }, { "epoch": 0.47411754401331346, "grad_norm": 0.5891720299928526, "learning_rate": 1.168661800486618e-05, "loss": 0.5467, "step": 16239 }, { "epoch": 0.4741467402411608, "grad_norm": 0.6482538933020631, "learning_rate": 1.1685969180859693e-05, "loss": 0.632, "step": 16240 }, { "epoch": 0.4741759364690082, "grad_norm": 0.6326858164460676, "learning_rate": 1.1685320356853205e-05, "loss": 0.5468, "step": 16241 }, { "epoch": 0.47420513269685555, "grad_norm": 0.6870449920054916, "learning_rate": 1.1684671532846717e-05, "loss": 0.6853, "step": 16242 }, { "epoch": 0.4742343289247029, "grad_norm": 0.6594083789422798, "learning_rate": 1.1684022708840229e-05, "loss": 0.6999, "step": 16243 }, { "epoch": 0.47426352515255027, "grad_norm": 0.6530050626852532, "learning_rate": 1.1683373884833739e-05, "loss": 0.6045, "step": 16244 }, { "epoch": 0.47429272138039763, "grad_norm": 0.6451238541042187, "learning_rate": 1.1682725060827251e-05, "loss": 0.6525, "step": 16245 }, { "epoch": 0.474321917608245, "grad_norm": 0.6639832289234491, "learning_rate": 1.1682076236820763e-05, "loss": 0.6303, "step": 16246 }, { "epoch": 0.47435111383609235, "grad_norm": 0.7150547379788789, "learning_rate": 1.1681427412814275e-05, "loss": 0.7344, "step": 16247 }, { "epoch": 0.4743803100639397, "grad_norm": 0.6499149814905046, "learning_rate": 1.1680778588807786e-05, "loss": 0.635, "step": 16248 }, { "epoch": 0.4744095062917871, "grad_norm": 0.6250434075057401, "learning_rate": 1.1680129764801298e-05, "loss": 0.5197, "step": 16249 }, { "epoch": 0.47443870251963444, "grad_norm": 0.6400778051551705, "learning_rate": 1.167948094079481e-05, "loss": 0.6284, "step": 16250 }, { "epoch": 0.4744678987474818, "grad_norm": 0.6631159387879731, "learning_rate": 1.1678832116788322e-05, "loss": 0.6917, "step": 16251 }, { "epoch": 0.4744970949753292, "grad_norm": 0.613941032412663, "learning_rate": 1.1678183292781834e-05, "loss": 0.5411, "step": 16252 }, { "epoch": 0.4745262912031766, "grad_norm": 0.6263751147747703, "learning_rate": 1.1677534468775347e-05, "loss": 0.6288, "step": 16253 }, { "epoch": 0.47455548743102394, "grad_norm": 0.6464733953100723, "learning_rate": 1.1676885644768858e-05, "loss": 0.6682, "step": 16254 }, { "epoch": 0.4745846836588713, "grad_norm": 0.6380423060078345, "learning_rate": 1.167623682076237e-05, "loss": 0.6099, "step": 16255 }, { "epoch": 0.47461387988671866, "grad_norm": 0.6758610232883556, "learning_rate": 1.1675587996755882e-05, "loss": 0.6131, "step": 16256 }, { "epoch": 0.474643076114566, "grad_norm": 0.625996078433189, "learning_rate": 1.1674939172749394e-05, "loss": 0.6023, "step": 16257 }, { "epoch": 0.4746722723424134, "grad_norm": 0.6991379548912585, "learning_rate": 1.1674290348742904e-05, "loss": 0.6542, "step": 16258 }, { "epoch": 0.47470146857026074, "grad_norm": 0.6201024801633176, "learning_rate": 1.1673641524736416e-05, "loss": 0.5894, "step": 16259 }, { "epoch": 0.4747306647981081, "grad_norm": 0.6413769537916406, "learning_rate": 1.1672992700729928e-05, "loss": 0.6229, "step": 16260 }, { "epoch": 0.47475986102595547, "grad_norm": 0.6755624947887706, "learning_rate": 1.167234387672344e-05, "loss": 0.68, "step": 16261 }, { "epoch": 0.4747890572538028, "grad_norm": 0.6012469709357376, "learning_rate": 1.1671695052716952e-05, "loss": 0.5874, "step": 16262 }, { "epoch": 0.4748182534816502, "grad_norm": 0.6493031317622132, "learning_rate": 1.1671046228710463e-05, "loss": 0.6448, "step": 16263 }, { "epoch": 0.47484744970949755, "grad_norm": 0.9183355362299208, "learning_rate": 1.1670397404703975e-05, "loss": 0.7985, "step": 16264 }, { "epoch": 0.4748766459373449, "grad_norm": 0.6541454453305421, "learning_rate": 1.1669748580697487e-05, "loss": 0.6612, "step": 16265 }, { "epoch": 0.47490584216519227, "grad_norm": 0.6420495038927614, "learning_rate": 1.1669099756690999e-05, "loss": 0.6025, "step": 16266 }, { "epoch": 0.47493503839303963, "grad_norm": 0.6733419604134112, "learning_rate": 1.1668450932684509e-05, "loss": 0.6804, "step": 16267 }, { "epoch": 0.474964234620887, "grad_norm": 0.6282941376547014, "learning_rate": 1.1667802108678021e-05, "loss": 0.6285, "step": 16268 }, { "epoch": 0.47499343084873435, "grad_norm": 0.5947880272530383, "learning_rate": 1.1667153284671533e-05, "loss": 0.5181, "step": 16269 }, { "epoch": 0.4750226270765817, "grad_norm": 0.6281824301789644, "learning_rate": 1.1666504460665045e-05, "loss": 0.6214, "step": 16270 }, { "epoch": 0.4750518233044291, "grad_norm": 0.6411395076035956, "learning_rate": 1.1665855636658557e-05, "loss": 0.6313, "step": 16271 }, { "epoch": 0.47508101953227644, "grad_norm": 0.6863508448272773, "learning_rate": 1.166520681265207e-05, "loss": 0.648, "step": 16272 }, { "epoch": 0.4751102157601238, "grad_norm": 0.6670848560182209, "learning_rate": 1.1664557988645581e-05, "loss": 0.631, "step": 16273 }, { "epoch": 0.47513941198797116, "grad_norm": 0.6613265627822161, "learning_rate": 1.1663909164639093e-05, "loss": 0.6424, "step": 16274 }, { "epoch": 0.4751686082158185, "grad_norm": 0.6704955136012548, "learning_rate": 1.1663260340632605e-05, "loss": 0.697, "step": 16275 }, { "epoch": 0.4751978044436659, "grad_norm": 0.6497501739879039, "learning_rate": 1.1662611516626117e-05, "loss": 0.686, "step": 16276 }, { "epoch": 0.47522700067151324, "grad_norm": 0.6128141961498227, "learning_rate": 1.1661962692619628e-05, "loss": 0.5508, "step": 16277 }, { "epoch": 0.4752561968993606, "grad_norm": 0.6035932565855532, "learning_rate": 1.166131386861314e-05, "loss": 0.5809, "step": 16278 }, { "epoch": 0.47528539312720797, "grad_norm": 0.677665483633948, "learning_rate": 1.1660665044606652e-05, "loss": 0.6555, "step": 16279 }, { "epoch": 0.47531458935505533, "grad_norm": 0.6716663087276555, "learning_rate": 1.1660016220600164e-05, "loss": 0.6153, "step": 16280 }, { "epoch": 0.4753437855829027, "grad_norm": 0.6470688618614, "learning_rate": 1.1659367396593676e-05, "loss": 0.5929, "step": 16281 }, { "epoch": 0.47537298181075005, "grad_norm": 0.6294546298752944, "learning_rate": 1.1658718572587186e-05, "loss": 0.548, "step": 16282 }, { "epoch": 0.4754021780385974, "grad_norm": 0.6745264120625467, "learning_rate": 1.1658069748580698e-05, "loss": 0.641, "step": 16283 }, { "epoch": 0.4754313742664448, "grad_norm": 0.626633659899257, "learning_rate": 1.165742092457421e-05, "loss": 0.5911, "step": 16284 }, { "epoch": 0.47546057049429213, "grad_norm": 0.6933015564762643, "learning_rate": 1.1656772100567722e-05, "loss": 0.7426, "step": 16285 }, { "epoch": 0.4754897667221395, "grad_norm": 0.5977628717661788, "learning_rate": 1.1656123276561232e-05, "loss": 0.5698, "step": 16286 }, { "epoch": 0.47551896294998686, "grad_norm": 0.6066020712075098, "learning_rate": 1.1655474452554744e-05, "loss": 0.5595, "step": 16287 }, { "epoch": 0.4755481591778342, "grad_norm": 0.6200844662561462, "learning_rate": 1.1654825628548256e-05, "loss": 0.584, "step": 16288 }, { "epoch": 0.4755773554056816, "grad_norm": 0.68775680286613, "learning_rate": 1.1654176804541768e-05, "loss": 0.673, "step": 16289 }, { "epoch": 0.47560655163352894, "grad_norm": 0.6851214304094886, "learning_rate": 1.1653527980535279e-05, "loss": 0.7093, "step": 16290 }, { "epoch": 0.4756357478613763, "grad_norm": 0.6801373370044088, "learning_rate": 1.1652879156528794e-05, "loss": 0.6321, "step": 16291 }, { "epoch": 0.47566494408922366, "grad_norm": 0.6657665401155803, "learning_rate": 1.1652230332522305e-05, "loss": 0.6291, "step": 16292 }, { "epoch": 0.475694140317071, "grad_norm": 0.7049991829859379, "learning_rate": 1.1651581508515817e-05, "loss": 0.608, "step": 16293 }, { "epoch": 0.4757233365449184, "grad_norm": 0.6543042850275158, "learning_rate": 1.1650932684509329e-05, "loss": 0.6034, "step": 16294 }, { "epoch": 0.47575253277276575, "grad_norm": 0.6576621873220475, "learning_rate": 1.165028386050284e-05, "loss": 0.6106, "step": 16295 }, { "epoch": 0.4757817290006131, "grad_norm": 0.6380894212416454, "learning_rate": 1.1649635036496351e-05, "loss": 0.6161, "step": 16296 }, { "epoch": 0.47581092522846047, "grad_norm": 0.6491413266402112, "learning_rate": 1.1648986212489863e-05, "loss": 0.6477, "step": 16297 }, { "epoch": 0.47584012145630783, "grad_norm": 0.6480140905132561, "learning_rate": 1.1648337388483375e-05, "loss": 0.6252, "step": 16298 }, { "epoch": 0.4758693176841552, "grad_norm": 0.6671766743281755, "learning_rate": 1.1647688564476887e-05, "loss": 0.6744, "step": 16299 }, { "epoch": 0.47589851391200255, "grad_norm": 0.6489727522437528, "learning_rate": 1.1647039740470399e-05, "loss": 0.6424, "step": 16300 }, { "epoch": 0.4759277101398499, "grad_norm": 0.6124623013299956, "learning_rate": 1.164639091646391e-05, "loss": 0.5722, "step": 16301 }, { "epoch": 0.4759569063676973, "grad_norm": 0.6189525414870042, "learning_rate": 1.1645742092457421e-05, "loss": 0.5474, "step": 16302 }, { "epoch": 0.47598610259554464, "grad_norm": 0.6524693740929702, "learning_rate": 1.1645093268450933e-05, "loss": 0.7084, "step": 16303 }, { "epoch": 0.476015298823392, "grad_norm": 0.6188308245077457, "learning_rate": 1.1644444444444446e-05, "loss": 0.6188, "step": 16304 }, { "epoch": 0.47604449505123936, "grad_norm": 0.6270440906315426, "learning_rate": 1.1643795620437956e-05, "loss": 0.6086, "step": 16305 }, { "epoch": 0.4760736912790867, "grad_norm": 0.6322082796645611, "learning_rate": 1.1643146796431468e-05, "loss": 0.6336, "step": 16306 }, { "epoch": 0.4761028875069341, "grad_norm": 0.6854444267930538, "learning_rate": 1.164249797242498e-05, "loss": 0.6811, "step": 16307 }, { "epoch": 0.47613208373478144, "grad_norm": 0.632892698750911, "learning_rate": 1.1641849148418492e-05, "loss": 0.5862, "step": 16308 }, { "epoch": 0.4761612799626288, "grad_norm": 0.6459849471582028, "learning_rate": 1.1641200324412002e-05, "loss": 0.6155, "step": 16309 }, { "epoch": 0.47619047619047616, "grad_norm": 0.6311020834440406, "learning_rate": 1.1640551500405518e-05, "loss": 0.6086, "step": 16310 }, { "epoch": 0.4762196724183235, "grad_norm": 0.6395230407568426, "learning_rate": 1.1639902676399028e-05, "loss": 0.6338, "step": 16311 }, { "epoch": 0.47624886864617094, "grad_norm": 0.6699179728475742, "learning_rate": 1.163925385239254e-05, "loss": 0.6039, "step": 16312 }, { "epoch": 0.4762780648740183, "grad_norm": 0.704337838660289, "learning_rate": 1.1638605028386052e-05, "loss": 0.6966, "step": 16313 }, { "epoch": 0.47630726110186566, "grad_norm": 0.6318839521823577, "learning_rate": 1.1637956204379564e-05, "loss": 0.642, "step": 16314 }, { "epoch": 0.476336457329713, "grad_norm": 0.6475616309424499, "learning_rate": 1.1637307380373074e-05, "loss": 0.6223, "step": 16315 }, { "epoch": 0.4763656535575604, "grad_norm": 0.6437728292723885, "learning_rate": 1.1636658556366586e-05, "loss": 0.6293, "step": 16316 }, { "epoch": 0.47639484978540775, "grad_norm": 0.6337332030475492, "learning_rate": 1.1636009732360098e-05, "loss": 0.5937, "step": 16317 }, { "epoch": 0.4764240460132551, "grad_norm": 0.6177204155964012, "learning_rate": 1.163536090835361e-05, "loss": 0.5535, "step": 16318 }, { "epoch": 0.47645324224110247, "grad_norm": 0.6420700395161331, "learning_rate": 1.1634712084347123e-05, "loss": 0.6125, "step": 16319 }, { "epoch": 0.47648243846894983, "grad_norm": 0.7085130359939317, "learning_rate": 1.1634063260340633e-05, "loss": 0.7313, "step": 16320 }, { "epoch": 0.4765116346967972, "grad_norm": 0.6290576399532041, "learning_rate": 1.1633414436334145e-05, "loss": 0.6414, "step": 16321 }, { "epoch": 0.47654083092464455, "grad_norm": 0.610318879371188, "learning_rate": 1.1632765612327657e-05, "loss": 0.5632, "step": 16322 }, { "epoch": 0.4765700271524919, "grad_norm": 0.7209073970028261, "learning_rate": 1.1632116788321169e-05, "loss": 0.7068, "step": 16323 }, { "epoch": 0.4765992233803393, "grad_norm": 0.6023695755867677, "learning_rate": 1.163146796431468e-05, "loss": 0.5806, "step": 16324 }, { "epoch": 0.47662841960818664, "grad_norm": 0.7102449917404852, "learning_rate": 1.1630819140308191e-05, "loss": 0.6933, "step": 16325 }, { "epoch": 0.476657615836034, "grad_norm": 0.6096360364030099, "learning_rate": 1.1630170316301703e-05, "loss": 0.5466, "step": 16326 }, { "epoch": 0.47668681206388136, "grad_norm": 0.6594122629387879, "learning_rate": 1.1629521492295215e-05, "loss": 0.6261, "step": 16327 }, { "epoch": 0.4767160082917287, "grad_norm": 0.6635487103973285, "learning_rate": 1.1628872668288729e-05, "loss": 0.6623, "step": 16328 }, { "epoch": 0.4767452045195761, "grad_norm": 0.6578258484509582, "learning_rate": 1.1628223844282241e-05, "loss": 0.676, "step": 16329 }, { "epoch": 0.47677440074742344, "grad_norm": 0.6215490158227955, "learning_rate": 1.1627575020275751e-05, "loss": 0.5382, "step": 16330 }, { "epoch": 0.4768035969752708, "grad_norm": 0.6197518103953307, "learning_rate": 1.1626926196269263e-05, "loss": 0.5627, "step": 16331 }, { "epoch": 0.47683279320311817, "grad_norm": 0.667289662899189, "learning_rate": 1.1626277372262776e-05, "loss": 0.6456, "step": 16332 }, { "epoch": 0.4768619894309655, "grad_norm": 0.6601913693708558, "learning_rate": 1.1625628548256288e-05, "loss": 0.6524, "step": 16333 }, { "epoch": 0.4768911856588129, "grad_norm": 0.6776217903501497, "learning_rate": 1.1624979724249798e-05, "loss": 0.6534, "step": 16334 }, { "epoch": 0.47692038188666025, "grad_norm": 0.639549359653811, "learning_rate": 1.162433090024331e-05, "loss": 0.6419, "step": 16335 }, { "epoch": 0.4769495781145076, "grad_norm": 0.6318086559960168, "learning_rate": 1.1623682076236822e-05, "loss": 0.552, "step": 16336 }, { "epoch": 0.47697877434235497, "grad_norm": 0.6486697815170416, "learning_rate": 1.1623033252230334e-05, "loss": 0.5581, "step": 16337 }, { "epoch": 0.47700797057020233, "grad_norm": 0.6550160079661487, "learning_rate": 1.1622384428223846e-05, "loss": 0.5795, "step": 16338 }, { "epoch": 0.4770371667980497, "grad_norm": 0.655283318765233, "learning_rate": 1.1621735604217356e-05, "loss": 0.5827, "step": 16339 }, { "epoch": 0.47706636302589706, "grad_norm": 0.6310178106969718, "learning_rate": 1.1621086780210868e-05, "loss": 0.5951, "step": 16340 }, { "epoch": 0.4770955592537444, "grad_norm": 0.6095065396360634, "learning_rate": 1.162043795620438e-05, "loss": 0.5566, "step": 16341 }, { "epoch": 0.4771247554815918, "grad_norm": 0.61845709703749, "learning_rate": 1.1619789132197892e-05, "loss": 0.553, "step": 16342 }, { "epoch": 0.47715395170943914, "grad_norm": 0.6234813413462218, "learning_rate": 1.1619140308191403e-05, "loss": 0.5846, "step": 16343 }, { "epoch": 0.4771831479372865, "grad_norm": 0.6775647151853539, "learning_rate": 1.1618491484184915e-05, "loss": 0.6576, "step": 16344 }, { "epoch": 0.47721234416513386, "grad_norm": 0.6364345042825543, "learning_rate": 1.1617842660178427e-05, "loss": 0.6173, "step": 16345 }, { "epoch": 0.4772415403929812, "grad_norm": 0.6763501071991012, "learning_rate": 1.1617193836171939e-05, "loss": 0.6269, "step": 16346 }, { "epoch": 0.4772707366208286, "grad_norm": 0.6194128297979242, "learning_rate": 1.1616545012165453e-05, "loss": 0.5887, "step": 16347 }, { "epoch": 0.47729993284867595, "grad_norm": 0.6653531214024554, "learning_rate": 1.1615896188158965e-05, "loss": 0.6817, "step": 16348 }, { "epoch": 0.4773291290765233, "grad_norm": 0.6596767647181776, "learning_rate": 1.1615247364152475e-05, "loss": 0.6211, "step": 16349 }, { "epoch": 0.47735832530437067, "grad_norm": 0.6268110853330356, "learning_rate": 1.1614598540145987e-05, "loss": 0.618, "step": 16350 }, { "epoch": 0.47738752153221803, "grad_norm": 0.6310819808663592, "learning_rate": 1.1613949716139499e-05, "loss": 0.577, "step": 16351 }, { "epoch": 0.4774167177600654, "grad_norm": 0.6652473938786607, "learning_rate": 1.1613300892133011e-05, "loss": 0.584, "step": 16352 }, { "epoch": 0.47744591398791275, "grad_norm": 0.6552700926027689, "learning_rate": 1.1612652068126521e-05, "loss": 0.5942, "step": 16353 }, { "epoch": 0.4774751102157601, "grad_norm": 0.6450922098307125, "learning_rate": 1.1612003244120033e-05, "loss": 0.6027, "step": 16354 }, { "epoch": 0.4775043064436075, "grad_norm": 0.6463509956883238, "learning_rate": 1.1611354420113545e-05, "loss": 0.6277, "step": 16355 }, { "epoch": 0.47753350267145483, "grad_norm": 0.6536617451862878, "learning_rate": 1.1610705596107057e-05, "loss": 0.5929, "step": 16356 }, { "epoch": 0.4775626988993022, "grad_norm": 0.6290243160991587, "learning_rate": 1.161005677210057e-05, "loss": 0.579, "step": 16357 }, { "epoch": 0.47759189512714956, "grad_norm": 0.7160754560692086, "learning_rate": 1.160940794809408e-05, "loss": 0.6496, "step": 16358 }, { "epoch": 0.4776210913549969, "grad_norm": 0.6559800653303095, "learning_rate": 1.1608759124087592e-05, "loss": 0.6304, "step": 16359 }, { "epoch": 0.4776502875828443, "grad_norm": 0.6108013253095683, "learning_rate": 1.1608110300081104e-05, "loss": 0.5757, "step": 16360 }, { "epoch": 0.47767948381069164, "grad_norm": 0.6709562779454472, "learning_rate": 1.1607461476074616e-05, "loss": 0.6231, "step": 16361 }, { "epoch": 0.477708680038539, "grad_norm": 0.6966465401759687, "learning_rate": 1.1606812652068126e-05, "loss": 0.6757, "step": 16362 }, { "epoch": 0.47773787626638636, "grad_norm": 0.646817276500527, "learning_rate": 1.1606163828061638e-05, "loss": 0.6268, "step": 16363 }, { "epoch": 0.4777670724942337, "grad_norm": 0.6471759741729627, "learning_rate": 1.160551500405515e-05, "loss": 0.6365, "step": 16364 }, { "epoch": 0.4777962687220811, "grad_norm": 0.6272806551099214, "learning_rate": 1.1604866180048662e-05, "loss": 0.5436, "step": 16365 }, { "epoch": 0.47782546494992845, "grad_norm": 0.6621480488849516, "learning_rate": 1.1604217356042176e-05, "loss": 0.6262, "step": 16366 }, { "epoch": 0.4778546611777758, "grad_norm": 0.6850292944570082, "learning_rate": 1.1603568532035688e-05, "loss": 0.6734, "step": 16367 }, { "epoch": 0.47788385740562317, "grad_norm": 0.7320679461189127, "learning_rate": 1.1602919708029198e-05, "loss": 0.7794, "step": 16368 }, { "epoch": 0.47791305363347053, "grad_norm": 0.5846691494338164, "learning_rate": 1.160227088402271e-05, "loss": 0.5231, "step": 16369 }, { "epoch": 0.4779422498613179, "grad_norm": 0.6929682879267862, "learning_rate": 1.1601622060016222e-05, "loss": 0.6779, "step": 16370 }, { "epoch": 0.47797144608916525, "grad_norm": 0.6023263584656776, "learning_rate": 1.1600973236009734e-05, "loss": 0.5832, "step": 16371 }, { "epoch": 0.47800064231701267, "grad_norm": 0.6024387133487165, "learning_rate": 1.1600324412003245e-05, "loss": 0.582, "step": 16372 }, { "epoch": 0.47802983854486003, "grad_norm": 0.688435252464275, "learning_rate": 1.1599675587996757e-05, "loss": 0.6916, "step": 16373 }, { "epoch": 0.4780590347727074, "grad_norm": 0.6639606202291307, "learning_rate": 1.1599026763990269e-05, "loss": 0.6401, "step": 16374 }, { "epoch": 0.47808823100055475, "grad_norm": 0.6077062724004527, "learning_rate": 1.159837793998378e-05, "loss": 0.5676, "step": 16375 }, { "epoch": 0.4781174272284021, "grad_norm": 0.6664368432341864, "learning_rate": 1.1597729115977293e-05, "loss": 0.6173, "step": 16376 }, { "epoch": 0.4781466234562495, "grad_norm": 0.6868341213138365, "learning_rate": 1.1597080291970803e-05, "loss": 0.656, "step": 16377 }, { "epoch": 0.47817581968409684, "grad_norm": 0.6738610625887966, "learning_rate": 1.1596431467964315e-05, "loss": 0.7133, "step": 16378 }, { "epoch": 0.4782050159119442, "grad_norm": 0.6910824737707686, "learning_rate": 1.1595782643957827e-05, "loss": 0.7253, "step": 16379 }, { "epoch": 0.47823421213979156, "grad_norm": 0.673706652675995, "learning_rate": 1.159513381995134e-05, "loss": 0.6544, "step": 16380 }, { "epoch": 0.4782634083676389, "grad_norm": 0.6063003555576373, "learning_rate": 1.159448499594485e-05, "loss": 0.5729, "step": 16381 }, { "epoch": 0.4782926045954863, "grad_norm": 0.6254942610457793, "learning_rate": 1.1593836171938362e-05, "loss": 0.5879, "step": 16382 }, { "epoch": 0.47832180082333364, "grad_norm": 0.6492190759922368, "learning_rate": 1.1593187347931874e-05, "loss": 0.6232, "step": 16383 }, { "epoch": 0.478350997051181, "grad_norm": 0.6421122765215598, "learning_rate": 1.1592538523925386e-05, "loss": 0.5599, "step": 16384 }, { "epoch": 0.47838019327902837, "grad_norm": 0.6348935304254361, "learning_rate": 1.15918896999189e-05, "loss": 0.5915, "step": 16385 }, { "epoch": 0.4784093895068757, "grad_norm": 0.673160037838052, "learning_rate": 1.1591240875912411e-05, "loss": 0.689, "step": 16386 }, { "epoch": 0.4784385857347231, "grad_norm": 0.6530466072504263, "learning_rate": 1.1590592051905922e-05, "loss": 0.622, "step": 16387 }, { "epoch": 0.47846778196257045, "grad_norm": 0.6360230817452102, "learning_rate": 1.1589943227899434e-05, "loss": 0.6323, "step": 16388 }, { "epoch": 0.4784969781904178, "grad_norm": 0.6545502993434416, "learning_rate": 1.1589294403892946e-05, "loss": 0.6205, "step": 16389 }, { "epoch": 0.47852617441826517, "grad_norm": 0.6537895039669434, "learning_rate": 1.1588645579886458e-05, "loss": 0.5769, "step": 16390 }, { "epoch": 0.47855537064611253, "grad_norm": 0.6648870456117387, "learning_rate": 1.1587996755879968e-05, "loss": 0.6203, "step": 16391 }, { "epoch": 0.4785845668739599, "grad_norm": 0.6202112746448847, "learning_rate": 1.158734793187348e-05, "loss": 0.6097, "step": 16392 }, { "epoch": 0.47861376310180725, "grad_norm": 0.6191678945695349, "learning_rate": 1.1586699107866992e-05, "loss": 0.6103, "step": 16393 }, { "epoch": 0.4786429593296546, "grad_norm": 0.676151502714491, "learning_rate": 1.1586050283860504e-05, "loss": 0.6131, "step": 16394 }, { "epoch": 0.478672155557502, "grad_norm": 0.6950954646295786, "learning_rate": 1.1585401459854016e-05, "loss": 0.6777, "step": 16395 }, { "epoch": 0.47870135178534934, "grad_norm": 0.6322293027398769, "learning_rate": 1.1584752635847527e-05, "loss": 0.6093, "step": 16396 }, { "epoch": 0.4787305480131967, "grad_norm": 0.6598071056779481, "learning_rate": 1.1584103811841039e-05, "loss": 0.6245, "step": 16397 }, { "epoch": 0.47875974424104406, "grad_norm": 0.6597799749712878, "learning_rate": 1.158345498783455e-05, "loss": 0.6407, "step": 16398 }, { "epoch": 0.4787889404688914, "grad_norm": 0.6693534985598936, "learning_rate": 1.1582806163828063e-05, "loss": 0.699, "step": 16399 }, { "epoch": 0.4788181366967388, "grad_norm": 0.608401500260305, "learning_rate": 1.1582157339821573e-05, "loss": 0.5443, "step": 16400 }, { "epoch": 0.47884733292458614, "grad_norm": 0.6835183968189199, "learning_rate": 1.1581508515815085e-05, "loss": 0.6972, "step": 16401 }, { "epoch": 0.4788765291524335, "grad_norm": 0.6053217930280154, "learning_rate": 1.1580859691808597e-05, "loss": 0.6035, "step": 16402 }, { "epoch": 0.47890572538028087, "grad_norm": 0.6450586533899543, "learning_rate": 1.1580210867802109e-05, "loss": 0.6343, "step": 16403 }, { "epoch": 0.47893492160812823, "grad_norm": 0.6679829518048449, "learning_rate": 1.1579562043795623e-05, "loss": 0.657, "step": 16404 }, { "epoch": 0.4789641178359756, "grad_norm": 0.6794951077549277, "learning_rate": 1.1578913219789135e-05, "loss": 0.6567, "step": 16405 }, { "epoch": 0.47899331406382295, "grad_norm": 0.6473093131735258, "learning_rate": 1.1578264395782645e-05, "loss": 0.581, "step": 16406 }, { "epoch": 0.4790225102916703, "grad_norm": 0.614719456795895, "learning_rate": 1.1577615571776157e-05, "loss": 0.5516, "step": 16407 }, { "epoch": 0.4790517065195177, "grad_norm": 0.686115637632858, "learning_rate": 1.157696674776967e-05, "loss": 0.6469, "step": 16408 }, { "epoch": 0.47908090274736503, "grad_norm": 0.6367972130858816, "learning_rate": 1.1576317923763181e-05, "loss": 0.6248, "step": 16409 }, { "epoch": 0.4791100989752124, "grad_norm": 0.7269266755964368, "learning_rate": 1.1575669099756692e-05, "loss": 0.6512, "step": 16410 }, { "epoch": 0.47913929520305976, "grad_norm": 0.6847827006127255, "learning_rate": 1.1575020275750204e-05, "loss": 0.6395, "step": 16411 }, { "epoch": 0.4791684914309071, "grad_norm": 0.6563999340299782, "learning_rate": 1.1574371451743716e-05, "loss": 0.6552, "step": 16412 }, { "epoch": 0.4791976876587545, "grad_norm": 0.6425281680685131, "learning_rate": 1.1573722627737228e-05, "loss": 0.5983, "step": 16413 }, { "epoch": 0.47922688388660184, "grad_norm": 0.573931883581216, "learning_rate": 1.1573073803730738e-05, "loss": 0.5034, "step": 16414 }, { "epoch": 0.4792560801144492, "grad_norm": 0.6102188929672128, "learning_rate": 1.157242497972425e-05, "loss": 0.5557, "step": 16415 }, { "epoch": 0.47928527634229656, "grad_norm": 0.6478479695775511, "learning_rate": 1.1571776155717762e-05, "loss": 0.6627, "step": 16416 }, { "epoch": 0.4793144725701439, "grad_norm": 0.7000785137096939, "learning_rate": 1.1571127331711274e-05, "loss": 0.7088, "step": 16417 }, { "epoch": 0.4793436687979913, "grad_norm": 0.6329827203836418, "learning_rate": 1.1570478507704786e-05, "loss": 0.5813, "step": 16418 }, { "epoch": 0.47937286502583865, "grad_norm": 0.6362792957268566, "learning_rate": 1.1569829683698296e-05, "loss": 0.6008, "step": 16419 }, { "epoch": 0.479402061253686, "grad_norm": 0.6942711880305644, "learning_rate": 1.1569180859691808e-05, "loss": 0.6581, "step": 16420 }, { "epoch": 0.47943125748153337, "grad_norm": 0.7053446798439847, "learning_rate": 1.156853203568532e-05, "loss": 0.6352, "step": 16421 }, { "epoch": 0.47946045370938073, "grad_norm": 0.6710199031587238, "learning_rate": 1.1567883211678833e-05, "loss": 0.6511, "step": 16422 }, { "epoch": 0.4794896499372281, "grad_norm": 0.6571661193532089, "learning_rate": 1.1567234387672346e-05, "loss": 0.6003, "step": 16423 }, { "epoch": 0.47951884616507545, "grad_norm": 0.6847074717517642, "learning_rate": 1.1566585563665858e-05, "loss": 0.6554, "step": 16424 }, { "epoch": 0.4795480423929228, "grad_norm": 0.6608625432612472, "learning_rate": 1.1565936739659369e-05, "loss": 0.6453, "step": 16425 }, { "epoch": 0.4795772386207702, "grad_norm": 0.6643625351364495, "learning_rate": 1.156528791565288e-05, "loss": 0.6523, "step": 16426 }, { "epoch": 0.47960643484861754, "grad_norm": 0.5922023829807302, "learning_rate": 1.1564639091646393e-05, "loss": 0.5489, "step": 16427 }, { "epoch": 0.4796356310764649, "grad_norm": 0.6282201347153946, "learning_rate": 1.1563990267639905e-05, "loss": 0.5782, "step": 16428 }, { "epoch": 0.47966482730431226, "grad_norm": 0.6567642447056882, "learning_rate": 1.1563341443633415e-05, "loss": 0.6369, "step": 16429 }, { "epoch": 0.4796940235321596, "grad_norm": 0.6222849756562762, "learning_rate": 1.1562692619626927e-05, "loss": 0.5927, "step": 16430 }, { "epoch": 0.479723219760007, "grad_norm": 0.6617211071328322, "learning_rate": 1.1562043795620439e-05, "loss": 0.6079, "step": 16431 }, { "epoch": 0.47975241598785434, "grad_norm": 0.6760700923383165, "learning_rate": 1.1561394971613951e-05, "loss": 0.6634, "step": 16432 }, { "epoch": 0.47978161221570176, "grad_norm": 0.5921222020041734, "learning_rate": 1.1560746147607461e-05, "loss": 0.5398, "step": 16433 }, { "epoch": 0.4798108084435491, "grad_norm": 0.605807959682865, "learning_rate": 1.1560097323600973e-05, "loss": 0.5671, "step": 16434 }, { "epoch": 0.4798400046713965, "grad_norm": 0.6105667583121686, "learning_rate": 1.1559448499594486e-05, "loss": 0.5849, "step": 16435 }, { "epoch": 0.47986920089924384, "grad_norm": 0.6184004263733991, "learning_rate": 1.1558799675587998e-05, "loss": 0.5869, "step": 16436 }, { "epoch": 0.4798983971270912, "grad_norm": 0.6007993248527246, "learning_rate": 1.155815085158151e-05, "loss": 0.6032, "step": 16437 }, { "epoch": 0.47992759335493856, "grad_norm": 0.6046006592226885, "learning_rate": 1.155750202757502e-05, "loss": 0.5807, "step": 16438 }, { "epoch": 0.4799567895827859, "grad_norm": 0.6577603789059538, "learning_rate": 1.1556853203568532e-05, "loss": 0.6315, "step": 16439 }, { "epoch": 0.4799859858106333, "grad_norm": 0.6338057847290454, "learning_rate": 1.1556204379562044e-05, "loss": 0.5634, "step": 16440 }, { "epoch": 0.48001518203848065, "grad_norm": 0.6289153351826453, "learning_rate": 1.1555555555555556e-05, "loss": 0.6134, "step": 16441 }, { "epoch": 0.480044378266328, "grad_norm": 0.6579839025682915, "learning_rate": 1.155490673154907e-05, "loss": 0.6645, "step": 16442 }, { "epoch": 0.48007357449417537, "grad_norm": 0.6724566863267025, "learning_rate": 1.1554257907542582e-05, "loss": 0.6232, "step": 16443 }, { "epoch": 0.48010277072202273, "grad_norm": 0.6147825813546482, "learning_rate": 1.1553609083536092e-05, "loss": 0.6233, "step": 16444 }, { "epoch": 0.4801319669498701, "grad_norm": 0.6208620039523112, "learning_rate": 1.1552960259529604e-05, "loss": 0.5819, "step": 16445 }, { "epoch": 0.48016116317771745, "grad_norm": 0.65143831283527, "learning_rate": 1.1552311435523116e-05, "loss": 0.6099, "step": 16446 }, { "epoch": 0.4801903594055648, "grad_norm": 0.6347642473244168, "learning_rate": 1.1551662611516628e-05, "loss": 0.6175, "step": 16447 }, { "epoch": 0.4802195556334122, "grad_norm": 0.6169361707158549, "learning_rate": 1.1551013787510138e-05, "loss": 0.578, "step": 16448 }, { "epoch": 0.48024875186125954, "grad_norm": 0.6097503922897011, "learning_rate": 1.155036496350365e-05, "loss": 0.5705, "step": 16449 }, { "epoch": 0.4802779480891069, "grad_norm": 0.6412586708533321, "learning_rate": 1.1549716139497163e-05, "loss": 0.5995, "step": 16450 }, { "epoch": 0.48030714431695426, "grad_norm": 0.5953420557820551, "learning_rate": 1.1549067315490675e-05, "loss": 0.5544, "step": 16451 }, { "epoch": 0.4803363405448016, "grad_norm": 0.6548253115896221, "learning_rate": 1.1548418491484185e-05, "loss": 0.6261, "step": 16452 }, { "epoch": 0.480365536772649, "grad_norm": 0.6509402524864996, "learning_rate": 1.1547769667477697e-05, "loss": 0.6082, "step": 16453 }, { "epoch": 0.48039473300049634, "grad_norm": 0.5987238319703793, "learning_rate": 1.1547120843471209e-05, "loss": 0.541, "step": 16454 }, { "epoch": 0.4804239292283437, "grad_norm": 0.6106497200167001, "learning_rate": 1.1546472019464721e-05, "loss": 0.5573, "step": 16455 }, { "epoch": 0.48045312545619107, "grad_norm": 0.6012843156906303, "learning_rate": 1.1545823195458233e-05, "loss": 0.5217, "step": 16456 }, { "epoch": 0.4804823216840384, "grad_norm": 0.6419101233906608, "learning_rate": 1.1545174371451743e-05, "loss": 0.6207, "step": 16457 }, { "epoch": 0.4805115179118858, "grad_norm": 0.6048503402983297, "learning_rate": 1.1544525547445255e-05, "loss": 0.5127, "step": 16458 }, { "epoch": 0.48054071413973315, "grad_norm": 0.6363476167377268, "learning_rate": 1.1543876723438767e-05, "loss": 0.6045, "step": 16459 }, { "epoch": 0.4805699103675805, "grad_norm": 0.6322546725489773, "learning_rate": 1.154322789943228e-05, "loss": 0.5602, "step": 16460 }, { "epoch": 0.48059910659542787, "grad_norm": 0.6829269958192985, "learning_rate": 1.1542579075425793e-05, "loss": 0.7281, "step": 16461 }, { "epoch": 0.48062830282327523, "grad_norm": 0.6342607294473852, "learning_rate": 1.1541930251419305e-05, "loss": 0.5875, "step": 16462 }, { "epoch": 0.4806574990511226, "grad_norm": 0.754580697050283, "learning_rate": 1.1541281427412815e-05, "loss": 0.6391, "step": 16463 }, { "epoch": 0.48068669527896996, "grad_norm": 0.6925126845315, "learning_rate": 1.1540632603406328e-05, "loss": 0.6997, "step": 16464 }, { "epoch": 0.4807158915068173, "grad_norm": 0.6241303844823708, "learning_rate": 1.153998377939984e-05, "loss": 0.5843, "step": 16465 }, { "epoch": 0.4807450877346647, "grad_norm": 0.6838562883292442, "learning_rate": 1.1539334955393352e-05, "loss": 0.6801, "step": 16466 }, { "epoch": 0.48077428396251204, "grad_norm": 0.6768514117395955, "learning_rate": 1.1538686131386862e-05, "loss": 0.6778, "step": 16467 }, { "epoch": 0.4808034801903594, "grad_norm": 0.6419165482416894, "learning_rate": 1.1538037307380374e-05, "loss": 0.6052, "step": 16468 }, { "epoch": 0.48083267641820676, "grad_norm": 0.6749895126361107, "learning_rate": 1.1537388483373886e-05, "loss": 0.6461, "step": 16469 }, { "epoch": 0.4808618726460541, "grad_norm": 0.6353146213732396, "learning_rate": 1.1536739659367398e-05, "loss": 0.6649, "step": 16470 }, { "epoch": 0.4808910688739015, "grad_norm": 0.6022796708756957, "learning_rate": 1.1536090835360908e-05, "loss": 0.5337, "step": 16471 }, { "epoch": 0.48092026510174885, "grad_norm": 0.6325519658046205, "learning_rate": 1.153544201135442e-05, "loss": 0.6161, "step": 16472 }, { "epoch": 0.4809494613295962, "grad_norm": 0.7295153285153535, "learning_rate": 1.1534793187347932e-05, "loss": 0.6776, "step": 16473 }, { "epoch": 0.48097865755744357, "grad_norm": 0.6414934670504884, "learning_rate": 1.1534144363341444e-05, "loss": 0.5705, "step": 16474 }, { "epoch": 0.48100785378529093, "grad_norm": 0.614596207358256, "learning_rate": 1.1533495539334956e-05, "loss": 0.5596, "step": 16475 }, { "epoch": 0.4810370500131383, "grad_norm": 0.6571801474271673, "learning_rate": 1.1532846715328467e-05, "loss": 0.5984, "step": 16476 }, { "epoch": 0.48106624624098565, "grad_norm": 0.6171201738708142, "learning_rate": 1.1532197891321979e-05, "loss": 0.5899, "step": 16477 }, { "epoch": 0.481095442468833, "grad_norm": 0.6803837208923564, "learning_rate": 1.153154906731549e-05, "loss": 0.6546, "step": 16478 }, { "epoch": 0.4811246386966804, "grad_norm": 0.6376647372356796, "learning_rate": 1.1530900243309005e-05, "loss": 0.6177, "step": 16479 }, { "epoch": 0.48115383492452773, "grad_norm": 0.6669977925032143, "learning_rate": 1.1530251419302517e-05, "loss": 0.6643, "step": 16480 }, { "epoch": 0.4811830311523751, "grad_norm": 0.6639190605143238, "learning_rate": 1.1529602595296029e-05, "loss": 0.625, "step": 16481 }, { "epoch": 0.48121222738022246, "grad_norm": 0.6294724752678836, "learning_rate": 1.1528953771289539e-05, "loss": 0.5964, "step": 16482 }, { "epoch": 0.4812414236080698, "grad_norm": 0.6388026556037507, "learning_rate": 1.1528304947283051e-05, "loss": 0.6046, "step": 16483 }, { "epoch": 0.4812706198359172, "grad_norm": 0.6305091585411914, "learning_rate": 1.1527656123276563e-05, "loss": 0.6636, "step": 16484 }, { "epoch": 0.48129981606376454, "grad_norm": 0.6303927133158018, "learning_rate": 1.1527007299270075e-05, "loss": 0.659, "step": 16485 }, { "epoch": 0.4813290122916119, "grad_norm": 0.6697736014712256, "learning_rate": 1.1526358475263585e-05, "loss": 0.6333, "step": 16486 }, { "epoch": 0.48135820851945926, "grad_norm": 0.6656104048108505, "learning_rate": 1.1525709651257097e-05, "loss": 0.6434, "step": 16487 }, { "epoch": 0.4813874047473066, "grad_norm": 0.6543753950234731, "learning_rate": 1.152506082725061e-05, "loss": 0.6371, "step": 16488 }, { "epoch": 0.481416600975154, "grad_norm": 0.6691990739250511, "learning_rate": 1.1524412003244121e-05, "loss": 0.6663, "step": 16489 }, { "epoch": 0.48144579720300135, "grad_norm": 0.6246553196181898, "learning_rate": 1.1523763179237632e-05, "loss": 0.6276, "step": 16490 }, { "epoch": 0.4814749934308487, "grad_norm": 0.6695132563593904, "learning_rate": 1.1523114355231144e-05, "loss": 0.598, "step": 16491 }, { "epoch": 0.48150418965869607, "grad_norm": 0.6138382892688748, "learning_rate": 1.1522465531224656e-05, "loss": 0.5569, "step": 16492 }, { "epoch": 0.4815333858865435, "grad_norm": 0.6444095062563897, "learning_rate": 1.1521816707218168e-05, "loss": 0.5895, "step": 16493 }, { "epoch": 0.48156258211439085, "grad_norm": 0.5935240990577643, "learning_rate": 1.152116788321168e-05, "loss": 0.542, "step": 16494 }, { "epoch": 0.4815917783422382, "grad_norm": 0.6156843059168172, "learning_rate": 1.152051905920519e-05, "loss": 0.5649, "step": 16495 }, { "epoch": 0.48162097457008557, "grad_norm": 0.6542658443589469, "learning_rate": 1.1519870235198702e-05, "loss": 0.6648, "step": 16496 }, { "epoch": 0.48165017079793293, "grad_norm": 0.6508494650015916, "learning_rate": 1.1519221411192214e-05, "loss": 0.6662, "step": 16497 }, { "epoch": 0.4816793670257803, "grad_norm": 0.6622147697933123, "learning_rate": 1.1518572587185728e-05, "loss": 0.596, "step": 16498 }, { "epoch": 0.48170856325362765, "grad_norm": 0.6271309864382619, "learning_rate": 1.151792376317924e-05, "loss": 0.6051, "step": 16499 }, { "epoch": 0.481737759481475, "grad_norm": 0.6720534079082691, "learning_rate": 1.1517274939172752e-05, "loss": 0.666, "step": 16500 }, { "epoch": 0.4817669557093224, "grad_norm": 0.6726813042118569, "learning_rate": 1.1516626115166262e-05, "loss": 0.7072, "step": 16501 }, { "epoch": 0.48179615193716974, "grad_norm": 0.5849879310877839, "learning_rate": 1.1515977291159774e-05, "loss": 0.5423, "step": 16502 }, { "epoch": 0.4818253481650171, "grad_norm": 0.5849900382087936, "learning_rate": 1.1515328467153286e-05, "loss": 0.5278, "step": 16503 }, { "epoch": 0.48185454439286446, "grad_norm": 0.6634540844083151, "learning_rate": 1.1514679643146798e-05, "loss": 0.5454, "step": 16504 }, { "epoch": 0.4818837406207118, "grad_norm": 0.649307167058564, "learning_rate": 1.1514030819140309e-05, "loss": 0.6357, "step": 16505 }, { "epoch": 0.4819129368485592, "grad_norm": 0.6308082394845423, "learning_rate": 1.151338199513382e-05, "loss": 0.5477, "step": 16506 }, { "epoch": 0.48194213307640654, "grad_norm": 0.6447533457202208, "learning_rate": 1.1512733171127333e-05, "loss": 0.5992, "step": 16507 }, { "epoch": 0.4819713293042539, "grad_norm": 0.6801151488749473, "learning_rate": 1.1512084347120845e-05, "loss": 0.6348, "step": 16508 }, { "epoch": 0.48200052553210126, "grad_norm": 0.6618677008649312, "learning_rate": 1.1511435523114355e-05, "loss": 0.6263, "step": 16509 }, { "epoch": 0.4820297217599486, "grad_norm": 0.6289187276843785, "learning_rate": 1.1510786699107867e-05, "loss": 0.577, "step": 16510 }, { "epoch": 0.482058917987796, "grad_norm": 0.6381944487207495, "learning_rate": 1.151013787510138e-05, "loss": 0.5574, "step": 16511 }, { "epoch": 0.48208811421564335, "grad_norm": 0.6522684406092802, "learning_rate": 1.1509489051094891e-05, "loss": 0.6437, "step": 16512 }, { "epoch": 0.4821173104434907, "grad_norm": 0.6446157696933458, "learning_rate": 1.1508840227088403e-05, "loss": 0.6424, "step": 16513 }, { "epoch": 0.48214650667133807, "grad_norm": 0.655288101610872, "learning_rate": 1.1508191403081914e-05, "loss": 0.6095, "step": 16514 }, { "epoch": 0.48217570289918543, "grad_norm": 0.597092189853275, "learning_rate": 1.1507542579075426e-05, "loss": 0.5997, "step": 16515 }, { "epoch": 0.4822048991270328, "grad_norm": 0.5872666034539741, "learning_rate": 1.1506893755068938e-05, "loss": 0.5331, "step": 16516 }, { "epoch": 0.48223409535488015, "grad_norm": 0.6091309187747757, "learning_rate": 1.1506244931062451e-05, "loss": 0.5627, "step": 16517 }, { "epoch": 0.4822632915827275, "grad_norm": 0.6194969112202725, "learning_rate": 1.1505596107055963e-05, "loss": 0.5817, "step": 16518 }, { "epoch": 0.4822924878105749, "grad_norm": 0.5833510435979847, "learning_rate": 1.1504947283049474e-05, "loss": 0.5586, "step": 16519 }, { "epoch": 0.48232168403842224, "grad_norm": 0.5927290078431906, "learning_rate": 1.1504298459042986e-05, "loss": 0.5065, "step": 16520 }, { "epoch": 0.4823508802662696, "grad_norm": 0.5975275054049776, "learning_rate": 1.1503649635036498e-05, "loss": 0.5566, "step": 16521 }, { "epoch": 0.48238007649411696, "grad_norm": 0.5787383547658853, "learning_rate": 1.150300081103001e-05, "loss": 0.5191, "step": 16522 }, { "epoch": 0.4824092727219643, "grad_norm": 0.662047347556999, "learning_rate": 1.1502351987023522e-05, "loss": 0.6585, "step": 16523 }, { "epoch": 0.4824384689498117, "grad_norm": 0.6549210090032419, "learning_rate": 1.1501703163017032e-05, "loss": 0.643, "step": 16524 }, { "epoch": 0.48246766517765904, "grad_norm": 0.6423957431267824, "learning_rate": 1.1501054339010544e-05, "loss": 0.6039, "step": 16525 }, { "epoch": 0.4824968614055064, "grad_norm": 0.6448881573848585, "learning_rate": 1.1500405515004056e-05, "loss": 0.5999, "step": 16526 }, { "epoch": 0.48252605763335377, "grad_norm": 0.5851695320239775, "learning_rate": 1.1499756690997568e-05, "loss": 0.5209, "step": 16527 }, { "epoch": 0.4825552538612011, "grad_norm": 0.6879389394063701, "learning_rate": 1.1499107866991079e-05, "loss": 0.6637, "step": 16528 }, { "epoch": 0.4825844500890485, "grad_norm": 0.6133653310883334, "learning_rate": 1.149845904298459e-05, "loss": 0.5697, "step": 16529 }, { "epoch": 0.48261364631689585, "grad_norm": 0.5989083384725512, "learning_rate": 1.1497810218978103e-05, "loss": 0.5395, "step": 16530 }, { "epoch": 0.4826428425447432, "grad_norm": 0.6486137229165597, "learning_rate": 1.1497161394971615e-05, "loss": 0.6109, "step": 16531 }, { "epoch": 0.4826720387725906, "grad_norm": 0.6472543533338696, "learning_rate": 1.1496512570965127e-05, "loss": 0.6489, "step": 16532 }, { "epoch": 0.48270123500043793, "grad_norm": 0.6734030084547717, "learning_rate": 1.1495863746958637e-05, "loss": 0.6781, "step": 16533 }, { "epoch": 0.4827304312282853, "grad_norm": 0.6626112719934063, "learning_rate": 1.1495214922952149e-05, "loss": 0.6182, "step": 16534 }, { "epoch": 0.48275962745613266, "grad_norm": 0.6018355164050453, "learning_rate": 1.1494566098945661e-05, "loss": 0.5718, "step": 16535 }, { "epoch": 0.48278882368398, "grad_norm": 0.6268375315781992, "learning_rate": 1.1493917274939175e-05, "loss": 0.5498, "step": 16536 }, { "epoch": 0.4828180199118274, "grad_norm": 0.6712220241687253, "learning_rate": 1.1493268450932687e-05, "loss": 0.6965, "step": 16537 }, { "epoch": 0.48284721613967474, "grad_norm": 0.6123940301968006, "learning_rate": 1.1492619626926197e-05, "loss": 0.6088, "step": 16538 }, { "epoch": 0.4828764123675221, "grad_norm": 0.8597629223062615, "learning_rate": 1.149197080291971e-05, "loss": 0.7074, "step": 16539 }, { "epoch": 0.48290560859536946, "grad_norm": 0.6385014425960603, "learning_rate": 1.1491321978913221e-05, "loss": 0.6226, "step": 16540 }, { "epoch": 0.4829348048232168, "grad_norm": 0.664532915184155, "learning_rate": 1.1490673154906733e-05, "loss": 0.6257, "step": 16541 }, { "epoch": 0.4829640010510642, "grad_norm": 0.6185585089950874, "learning_rate": 1.1490024330900245e-05, "loss": 0.6046, "step": 16542 }, { "epoch": 0.48299319727891155, "grad_norm": 0.6904200853942793, "learning_rate": 1.1489375506893756e-05, "loss": 0.6821, "step": 16543 }, { "epoch": 0.4830223935067589, "grad_norm": 0.681840320025267, "learning_rate": 1.1488726682887268e-05, "loss": 0.6641, "step": 16544 }, { "epoch": 0.48305158973460627, "grad_norm": 0.6799992316717639, "learning_rate": 1.148807785888078e-05, "loss": 0.678, "step": 16545 }, { "epoch": 0.48308078596245363, "grad_norm": 0.6401544775907735, "learning_rate": 1.1487429034874292e-05, "loss": 0.5932, "step": 16546 }, { "epoch": 0.483109982190301, "grad_norm": 0.5939007377691149, "learning_rate": 1.1486780210867802e-05, "loss": 0.5479, "step": 16547 }, { "epoch": 0.48313917841814835, "grad_norm": 0.7630071822302853, "learning_rate": 1.1486131386861314e-05, "loss": 0.5725, "step": 16548 }, { "epoch": 0.4831683746459957, "grad_norm": 0.6230005088376828, "learning_rate": 1.1485482562854826e-05, "loss": 0.5643, "step": 16549 }, { "epoch": 0.4831975708738431, "grad_norm": 0.671186128657628, "learning_rate": 1.1484833738848338e-05, "loss": 0.6524, "step": 16550 }, { "epoch": 0.48322676710169044, "grad_norm": 0.6919978940617573, "learning_rate": 1.148418491484185e-05, "loss": 0.703, "step": 16551 }, { "epoch": 0.4832559633295378, "grad_norm": 0.6219267624648637, "learning_rate": 1.148353609083536e-05, "loss": 0.587, "step": 16552 }, { "epoch": 0.4832851595573852, "grad_norm": 0.6354223385330127, "learning_rate": 1.1482887266828873e-05, "loss": 0.609, "step": 16553 }, { "epoch": 0.4833143557852326, "grad_norm": 0.6694128162860534, "learning_rate": 1.1482238442822385e-05, "loss": 0.6737, "step": 16554 }, { "epoch": 0.48334355201307994, "grad_norm": 0.6347477481053424, "learning_rate": 1.1481589618815898e-05, "loss": 0.651, "step": 16555 }, { "epoch": 0.4833727482409273, "grad_norm": 0.6286113935822469, "learning_rate": 1.148094079480941e-05, "loss": 0.6088, "step": 16556 }, { "epoch": 0.48340194446877466, "grad_norm": 0.5945094827825167, "learning_rate": 1.148029197080292e-05, "loss": 0.5866, "step": 16557 }, { "epoch": 0.483431140696622, "grad_norm": 0.6346336493513401, "learning_rate": 1.1479643146796433e-05, "loss": 0.6286, "step": 16558 }, { "epoch": 0.4834603369244694, "grad_norm": 0.5750312186142411, "learning_rate": 1.1478994322789945e-05, "loss": 0.5062, "step": 16559 }, { "epoch": 0.48348953315231674, "grad_norm": 0.6947018451867727, "learning_rate": 1.1478345498783457e-05, "loss": 0.6962, "step": 16560 }, { "epoch": 0.4835187293801641, "grad_norm": 0.6758282662034957, "learning_rate": 1.1477696674776969e-05, "loss": 0.6485, "step": 16561 }, { "epoch": 0.48354792560801146, "grad_norm": 0.6826327706165228, "learning_rate": 1.1477047850770479e-05, "loss": 0.5911, "step": 16562 }, { "epoch": 0.4835771218358588, "grad_norm": 0.6536267937028686, "learning_rate": 1.1476399026763991e-05, "loss": 0.623, "step": 16563 }, { "epoch": 0.4836063180637062, "grad_norm": 0.6358573426713805, "learning_rate": 1.1475750202757503e-05, "loss": 0.6482, "step": 16564 }, { "epoch": 0.48363551429155355, "grad_norm": 0.6898440041291113, "learning_rate": 1.1475101378751015e-05, "loss": 0.6527, "step": 16565 }, { "epoch": 0.4836647105194009, "grad_norm": 0.6670124059249521, "learning_rate": 1.1474452554744525e-05, "loss": 0.6494, "step": 16566 }, { "epoch": 0.48369390674724827, "grad_norm": 0.6291248391144786, "learning_rate": 1.1473803730738038e-05, "loss": 0.6484, "step": 16567 }, { "epoch": 0.48372310297509563, "grad_norm": 0.5939306381853265, "learning_rate": 1.147315490673155e-05, "loss": 0.5324, "step": 16568 }, { "epoch": 0.483752299202943, "grad_norm": 0.7109172332790844, "learning_rate": 1.1472506082725062e-05, "loss": 0.7117, "step": 16569 }, { "epoch": 0.48378149543079035, "grad_norm": 0.6281030730215349, "learning_rate": 1.1471857258718574e-05, "loss": 0.5937, "step": 16570 }, { "epoch": 0.4838106916586377, "grad_norm": 0.5935495009503257, "learning_rate": 1.1471208434712084e-05, "loss": 0.5476, "step": 16571 }, { "epoch": 0.4838398878864851, "grad_norm": 0.7275300791602589, "learning_rate": 1.1470559610705596e-05, "loss": 0.7368, "step": 16572 }, { "epoch": 0.48386908411433244, "grad_norm": 0.6721020470099592, "learning_rate": 1.1469910786699108e-05, "loss": 0.6468, "step": 16573 }, { "epoch": 0.4838982803421798, "grad_norm": 0.6686046998429682, "learning_rate": 1.1469261962692622e-05, "loss": 0.6865, "step": 16574 }, { "epoch": 0.48392747657002716, "grad_norm": 0.6192857803122931, "learning_rate": 1.1468613138686134e-05, "loss": 0.5749, "step": 16575 }, { "epoch": 0.4839566727978745, "grad_norm": 0.625992936999221, "learning_rate": 1.1467964314679644e-05, "loss": 0.5824, "step": 16576 }, { "epoch": 0.4839858690257219, "grad_norm": 0.7205995638680694, "learning_rate": 1.1467315490673156e-05, "loss": 0.6601, "step": 16577 }, { "epoch": 0.48401506525356924, "grad_norm": 0.6501932906666041, "learning_rate": 1.1466666666666668e-05, "loss": 0.6315, "step": 16578 }, { "epoch": 0.4840442614814166, "grad_norm": 0.6212744186451719, "learning_rate": 1.146601784266018e-05, "loss": 0.584, "step": 16579 }, { "epoch": 0.48407345770926397, "grad_norm": 0.6813143305345012, "learning_rate": 1.1465369018653692e-05, "loss": 0.6992, "step": 16580 }, { "epoch": 0.4841026539371113, "grad_norm": 0.6717473009139971, "learning_rate": 1.1464720194647203e-05, "loss": 0.7144, "step": 16581 }, { "epoch": 0.4841318501649587, "grad_norm": 0.7621354985709815, "learning_rate": 1.1464071370640715e-05, "loss": 0.6837, "step": 16582 }, { "epoch": 0.48416104639280605, "grad_norm": 0.6616063723012484, "learning_rate": 1.1463422546634227e-05, "loss": 0.6541, "step": 16583 }, { "epoch": 0.4841902426206534, "grad_norm": 0.6750271231608144, "learning_rate": 1.1462773722627739e-05, "loss": 0.6418, "step": 16584 }, { "epoch": 0.48421943884850077, "grad_norm": 0.6485548300680957, "learning_rate": 1.1462124898621249e-05, "loss": 0.662, "step": 16585 }, { "epoch": 0.48424863507634813, "grad_norm": 0.685667243421169, "learning_rate": 1.1461476074614761e-05, "loss": 0.6715, "step": 16586 }, { "epoch": 0.4842778313041955, "grad_norm": 0.6580001554402501, "learning_rate": 1.1460827250608273e-05, "loss": 0.6452, "step": 16587 }, { "epoch": 0.48430702753204286, "grad_norm": 0.611609748396353, "learning_rate": 1.1460178426601785e-05, "loss": 0.5558, "step": 16588 }, { "epoch": 0.4843362237598902, "grad_norm": 0.6959771393284875, "learning_rate": 1.1459529602595297e-05, "loss": 0.7173, "step": 16589 }, { "epoch": 0.4843654199877376, "grad_norm": 0.6269041913997231, "learning_rate": 1.1458880778588807e-05, "loss": 0.619, "step": 16590 }, { "epoch": 0.48439461621558494, "grad_norm": 0.6058615242728138, "learning_rate": 1.145823195458232e-05, "loss": 0.5861, "step": 16591 }, { "epoch": 0.4844238124434323, "grad_norm": 0.6316974463515407, "learning_rate": 1.1457583130575831e-05, "loss": 0.6074, "step": 16592 }, { "epoch": 0.48445300867127966, "grad_norm": 0.6754955102419081, "learning_rate": 1.1456934306569345e-05, "loss": 0.7239, "step": 16593 }, { "epoch": 0.484482204899127, "grad_norm": 0.5930394134450927, "learning_rate": 1.1456285482562857e-05, "loss": 0.5618, "step": 16594 }, { "epoch": 0.4845114011269744, "grad_norm": 0.6798894624350084, "learning_rate": 1.1455636658556368e-05, "loss": 0.6398, "step": 16595 }, { "epoch": 0.48454059735482174, "grad_norm": 0.6018289084147855, "learning_rate": 1.145498783454988e-05, "loss": 0.549, "step": 16596 }, { "epoch": 0.4845697935826691, "grad_norm": 0.6566586048713521, "learning_rate": 1.1454339010543392e-05, "loss": 0.5835, "step": 16597 }, { "epoch": 0.48459898981051647, "grad_norm": 0.6447592826019702, "learning_rate": 1.1453690186536904e-05, "loss": 0.6202, "step": 16598 }, { "epoch": 0.48462818603836383, "grad_norm": 0.6475199941655347, "learning_rate": 1.1453041362530416e-05, "loss": 0.6117, "step": 16599 }, { "epoch": 0.4846573822662112, "grad_norm": 0.6504670074062485, "learning_rate": 1.1452392538523926e-05, "loss": 0.6501, "step": 16600 }, { "epoch": 0.48468657849405855, "grad_norm": 0.6156611652559036, "learning_rate": 1.1451743714517438e-05, "loss": 0.5643, "step": 16601 }, { "epoch": 0.4847157747219059, "grad_norm": 0.6352172214757663, "learning_rate": 1.145109489051095e-05, "loss": 0.5463, "step": 16602 }, { "epoch": 0.4847449709497533, "grad_norm": 0.672405205579352, "learning_rate": 1.1450446066504462e-05, "loss": 0.6272, "step": 16603 }, { "epoch": 0.48477416717760063, "grad_norm": 0.6194166351816713, "learning_rate": 1.1449797242497972e-05, "loss": 0.5629, "step": 16604 }, { "epoch": 0.484803363405448, "grad_norm": 0.675184271396049, "learning_rate": 1.1449148418491484e-05, "loss": 0.699, "step": 16605 }, { "epoch": 0.48483255963329536, "grad_norm": 0.6698721698094328, "learning_rate": 1.1448499594484996e-05, "loss": 0.5933, "step": 16606 }, { "epoch": 0.4848617558611427, "grad_norm": 0.6407786406047967, "learning_rate": 1.1447850770478508e-05, "loss": 0.578, "step": 16607 }, { "epoch": 0.4848909520889901, "grad_norm": 0.6105729135557861, "learning_rate": 1.144720194647202e-05, "loss": 0.5584, "step": 16608 }, { "epoch": 0.48492014831683744, "grad_norm": 0.6760843109200909, "learning_rate": 1.144655312246553e-05, "loss": 0.6168, "step": 16609 }, { "epoch": 0.4849493445446848, "grad_norm": 0.6270995437346165, "learning_rate": 1.1445904298459043e-05, "loss": 0.6446, "step": 16610 }, { "epoch": 0.48497854077253216, "grad_norm": 0.6424316658169162, "learning_rate": 1.1445255474452555e-05, "loss": 0.5933, "step": 16611 }, { "epoch": 0.4850077370003795, "grad_norm": 0.6453836543930919, "learning_rate": 1.1444606650446069e-05, "loss": 0.6069, "step": 16612 }, { "epoch": 0.48503693322822694, "grad_norm": 0.6210802698826418, "learning_rate": 1.144395782643958e-05, "loss": 0.5747, "step": 16613 }, { "epoch": 0.4850661294560743, "grad_norm": 0.5939082736867085, "learning_rate": 1.1443309002433091e-05, "loss": 0.5161, "step": 16614 }, { "epoch": 0.48509532568392166, "grad_norm": 0.6739364468829679, "learning_rate": 1.1442660178426603e-05, "loss": 0.5968, "step": 16615 }, { "epoch": 0.485124521911769, "grad_norm": 0.9616138712972869, "learning_rate": 1.1442011354420115e-05, "loss": 0.5178, "step": 16616 }, { "epoch": 0.4851537181396164, "grad_norm": 0.6561094720301972, "learning_rate": 1.1441362530413627e-05, "loss": 0.6338, "step": 16617 }, { "epoch": 0.48518291436746375, "grad_norm": 0.6051191340638095, "learning_rate": 1.1440713706407139e-05, "loss": 0.5735, "step": 16618 }, { "epoch": 0.4852121105953111, "grad_norm": 0.6170822531777339, "learning_rate": 1.144006488240065e-05, "loss": 0.5335, "step": 16619 }, { "epoch": 0.48524130682315847, "grad_norm": 0.621493201401879, "learning_rate": 1.1439416058394161e-05, "loss": 0.6261, "step": 16620 }, { "epoch": 0.48527050305100583, "grad_norm": 0.7188296102789467, "learning_rate": 1.1438767234387673e-05, "loss": 0.58, "step": 16621 }, { "epoch": 0.4852996992788532, "grad_norm": 0.6248954904736976, "learning_rate": 1.1438118410381185e-05, "loss": 0.5458, "step": 16622 }, { "epoch": 0.48532889550670055, "grad_norm": 0.6211227032716166, "learning_rate": 1.1437469586374696e-05, "loss": 0.6022, "step": 16623 }, { "epoch": 0.4853580917345479, "grad_norm": 0.6752775010688367, "learning_rate": 1.1436820762368208e-05, "loss": 0.5895, "step": 16624 }, { "epoch": 0.4853872879623953, "grad_norm": 0.9437626417421016, "learning_rate": 1.143617193836172e-05, "loss": 0.6709, "step": 16625 }, { "epoch": 0.48541648419024264, "grad_norm": 0.6106159410432931, "learning_rate": 1.1435523114355232e-05, "loss": 0.5483, "step": 16626 }, { "epoch": 0.48544568041809, "grad_norm": 0.6628160906689546, "learning_rate": 1.1434874290348744e-05, "loss": 0.6163, "step": 16627 }, { "epoch": 0.48547487664593736, "grad_norm": 0.6808816847422419, "learning_rate": 1.1434225466342254e-05, "loss": 0.6493, "step": 16628 }, { "epoch": 0.4855040728737847, "grad_norm": 0.6739869003406666, "learning_rate": 1.1433576642335766e-05, "loss": 0.6607, "step": 16629 }, { "epoch": 0.4855332691016321, "grad_norm": 0.61219761002916, "learning_rate": 1.143292781832928e-05, "loss": 0.5743, "step": 16630 }, { "epoch": 0.48556246532947944, "grad_norm": 0.5799654511240442, "learning_rate": 1.1432278994322792e-05, "loss": 0.5158, "step": 16631 }, { "epoch": 0.4855916615573268, "grad_norm": 0.6774026362983461, "learning_rate": 1.1431630170316304e-05, "loss": 0.6787, "step": 16632 }, { "epoch": 0.48562085778517416, "grad_norm": 0.6384924269738432, "learning_rate": 1.1430981346309814e-05, "loss": 0.6494, "step": 16633 }, { "epoch": 0.4856500540130215, "grad_norm": 0.6276563759605507, "learning_rate": 1.1430332522303326e-05, "loss": 0.6013, "step": 16634 }, { "epoch": 0.4856792502408689, "grad_norm": 0.6915459745566394, "learning_rate": 1.1429683698296838e-05, "loss": 0.7041, "step": 16635 }, { "epoch": 0.48570844646871625, "grad_norm": 0.6947382343486216, "learning_rate": 1.142903487429035e-05, "loss": 0.7173, "step": 16636 }, { "epoch": 0.4857376426965636, "grad_norm": 0.6377219224854651, "learning_rate": 1.1428386050283862e-05, "loss": 0.6057, "step": 16637 }, { "epoch": 0.48576683892441097, "grad_norm": 0.6280851057058995, "learning_rate": 1.1427737226277373e-05, "loss": 0.593, "step": 16638 }, { "epoch": 0.48579603515225833, "grad_norm": 0.637851778472765, "learning_rate": 1.1427088402270885e-05, "loss": 0.6093, "step": 16639 }, { "epoch": 0.4858252313801057, "grad_norm": 0.6253955141309865, "learning_rate": 1.1426439578264397e-05, "loss": 0.5823, "step": 16640 }, { "epoch": 0.48585442760795305, "grad_norm": 0.65141007786468, "learning_rate": 1.1425790754257909e-05, "loss": 0.5989, "step": 16641 }, { "epoch": 0.4858836238358004, "grad_norm": 0.6630320031417016, "learning_rate": 1.142514193025142e-05, "loss": 0.6543, "step": 16642 }, { "epoch": 0.4859128200636478, "grad_norm": 0.662496456879172, "learning_rate": 1.1424493106244931e-05, "loss": 0.7043, "step": 16643 }, { "epoch": 0.48594201629149514, "grad_norm": 0.624283796417909, "learning_rate": 1.1423844282238443e-05, "loss": 0.5016, "step": 16644 }, { "epoch": 0.4859712125193425, "grad_norm": 0.6740784634036334, "learning_rate": 1.1423195458231955e-05, "loss": 0.6745, "step": 16645 }, { "epoch": 0.48600040874718986, "grad_norm": 0.6692182216001531, "learning_rate": 1.1422546634225467e-05, "loss": 0.6347, "step": 16646 }, { "epoch": 0.4860296049750372, "grad_norm": 0.6440891453830964, "learning_rate": 1.1421897810218978e-05, "loss": 0.6072, "step": 16647 }, { "epoch": 0.4860588012028846, "grad_norm": 0.6480900916736851, "learning_rate": 1.142124898621249e-05, "loss": 0.6052, "step": 16648 }, { "epoch": 0.48608799743073194, "grad_norm": 0.675403021438311, "learning_rate": 1.1420600162206003e-05, "loss": 0.6675, "step": 16649 }, { "epoch": 0.4861171936585793, "grad_norm": 0.6619504974968166, "learning_rate": 1.1419951338199515e-05, "loss": 0.6387, "step": 16650 }, { "epoch": 0.48614638988642667, "grad_norm": 0.6313277375171042, "learning_rate": 1.1419302514193027e-05, "loss": 0.6117, "step": 16651 }, { "epoch": 0.486175586114274, "grad_norm": 0.638849216161521, "learning_rate": 1.1418653690186538e-05, "loss": 0.6056, "step": 16652 }, { "epoch": 0.4862047823421214, "grad_norm": 0.6347424545052026, "learning_rate": 1.141800486618005e-05, "loss": 0.5652, "step": 16653 }, { "epoch": 0.48623397856996875, "grad_norm": 0.6283512429208487, "learning_rate": 1.1417356042173562e-05, "loss": 0.5934, "step": 16654 }, { "epoch": 0.4862631747978161, "grad_norm": 0.6180540696455259, "learning_rate": 1.1416707218167074e-05, "loss": 0.5443, "step": 16655 }, { "epoch": 0.4862923710256635, "grad_norm": 0.7087575415598032, "learning_rate": 1.1416058394160586e-05, "loss": 0.7295, "step": 16656 }, { "epoch": 0.48632156725351083, "grad_norm": 0.6377425131526108, "learning_rate": 1.1415409570154096e-05, "loss": 0.5609, "step": 16657 }, { "epoch": 0.4863507634813582, "grad_norm": 0.6138302584913476, "learning_rate": 1.1414760746147608e-05, "loss": 0.5733, "step": 16658 }, { "epoch": 0.48637995970920556, "grad_norm": 0.6541394883271177, "learning_rate": 1.141411192214112e-05, "loss": 0.6199, "step": 16659 }, { "epoch": 0.4864091559370529, "grad_norm": 0.60693261152733, "learning_rate": 1.1413463098134632e-05, "loss": 0.5091, "step": 16660 }, { "epoch": 0.4864383521649003, "grad_norm": 0.6181257256195152, "learning_rate": 1.1412814274128143e-05, "loss": 0.5693, "step": 16661 }, { "epoch": 0.48646754839274764, "grad_norm": 0.6666921368077462, "learning_rate": 1.1412165450121655e-05, "loss": 0.6795, "step": 16662 }, { "epoch": 0.486496744620595, "grad_norm": 0.5916650224108261, "learning_rate": 1.1411516626115167e-05, "loss": 0.522, "step": 16663 }, { "epoch": 0.48652594084844236, "grad_norm": 0.6246417694034012, "learning_rate": 1.1410867802108679e-05, "loss": 0.5841, "step": 16664 }, { "epoch": 0.4865551370762897, "grad_norm": 0.6884647233952, "learning_rate": 1.1410218978102189e-05, "loss": 0.676, "step": 16665 }, { "epoch": 0.4865843333041371, "grad_norm": 0.6508566950632596, "learning_rate": 1.1409570154095701e-05, "loss": 0.6246, "step": 16666 }, { "epoch": 0.48661352953198445, "grad_norm": 0.6732461786972653, "learning_rate": 1.1408921330089213e-05, "loss": 0.6712, "step": 16667 }, { "epoch": 0.4866427257598318, "grad_norm": 0.690956914976679, "learning_rate": 1.1408272506082727e-05, "loss": 0.6741, "step": 16668 }, { "epoch": 0.48667192198767917, "grad_norm": 0.6135531750223298, "learning_rate": 1.1407623682076239e-05, "loss": 0.5602, "step": 16669 }, { "epoch": 0.48670111821552653, "grad_norm": 0.7161245883343925, "learning_rate": 1.1406974858069751e-05, "loss": 0.6925, "step": 16670 }, { "epoch": 0.4867303144433739, "grad_norm": 0.7207497515416617, "learning_rate": 1.1406326034063261e-05, "loss": 0.6612, "step": 16671 }, { "epoch": 0.48675951067122125, "grad_norm": 0.6160820221972414, "learning_rate": 1.1405677210056773e-05, "loss": 0.5681, "step": 16672 }, { "epoch": 0.4867887068990686, "grad_norm": 0.6231484554747875, "learning_rate": 1.1405028386050285e-05, "loss": 0.5782, "step": 16673 }, { "epoch": 0.48681790312691603, "grad_norm": 0.647715033180345, "learning_rate": 1.1404379562043797e-05, "loss": 0.6249, "step": 16674 }, { "epoch": 0.4868470993547634, "grad_norm": 0.6124064823177945, "learning_rate": 1.140373073803731e-05, "loss": 0.6073, "step": 16675 }, { "epoch": 0.48687629558261075, "grad_norm": 0.6193848861498671, "learning_rate": 1.140308191403082e-05, "loss": 0.6057, "step": 16676 }, { "epoch": 0.4869054918104581, "grad_norm": 0.6554030111457712, "learning_rate": 1.1402433090024332e-05, "loss": 0.6091, "step": 16677 }, { "epoch": 0.4869346880383055, "grad_norm": 0.6677246101597586, "learning_rate": 1.1401784266017844e-05, "loss": 0.6394, "step": 16678 }, { "epoch": 0.48696388426615284, "grad_norm": 0.6886360481317673, "learning_rate": 1.1401135442011356e-05, "loss": 0.691, "step": 16679 }, { "epoch": 0.4869930804940002, "grad_norm": 0.7295860662622837, "learning_rate": 1.1400486618004866e-05, "loss": 0.6049, "step": 16680 }, { "epoch": 0.48702227672184756, "grad_norm": 0.640222497210231, "learning_rate": 1.1399837793998378e-05, "loss": 0.6352, "step": 16681 }, { "epoch": 0.4870514729496949, "grad_norm": 0.6263779192894309, "learning_rate": 1.139918896999189e-05, "loss": 0.5554, "step": 16682 }, { "epoch": 0.4870806691775423, "grad_norm": 0.7205978558416202, "learning_rate": 1.1398540145985402e-05, "loss": 0.6623, "step": 16683 }, { "epoch": 0.48710986540538964, "grad_norm": 0.631894285177492, "learning_rate": 1.1397891321978913e-05, "loss": 0.5955, "step": 16684 }, { "epoch": 0.487139061633237, "grad_norm": 0.634296360084634, "learning_rate": 1.1397242497972425e-05, "loss": 0.5671, "step": 16685 }, { "epoch": 0.48716825786108436, "grad_norm": 0.6338716868816945, "learning_rate": 1.1396593673965937e-05, "loss": 0.6292, "step": 16686 }, { "epoch": 0.4871974540889317, "grad_norm": 0.7040424353857359, "learning_rate": 1.139594484995945e-05, "loss": 0.744, "step": 16687 }, { "epoch": 0.4872266503167791, "grad_norm": 0.6330191575428294, "learning_rate": 1.1395296025952962e-05, "loss": 0.5573, "step": 16688 }, { "epoch": 0.48725584654462645, "grad_norm": 0.6722841443854193, "learning_rate": 1.1394647201946474e-05, "loss": 0.6452, "step": 16689 }, { "epoch": 0.4872850427724738, "grad_norm": 0.6393997859156296, "learning_rate": 1.1393998377939985e-05, "loss": 0.6157, "step": 16690 }, { "epoch": 0.48731423900032117, "grad_norm": 0.647992724398201, "learning_rate": 1.1393349553933497e-05, "loss": 0.6416, "step": 16691 }, { "epoch": 0.48734343522816853, "grad_norm": 0.6271114954024941, "learning_rate": 1.1392700729927009e-05, "loss": 0.6443, "step": 16692 }, { "epoch": 0.4873726314560159, "grad_norm": 0.6768342598411377, "learning_rate": 1.139205190592052e-05, "loss": 0.6606, "step": 16693 }, { "epoch": 0.48740182768386325, "grad_norm": 0.6370049600820079, "learning_rate": 1.1391403081914033e-05, "loss": 0.5401, "step": 16694 }, { "epoch": 0.4874310239117106, "grad_norm": 0.6437182603991095, "learning_rate": 1.1390754257907543e-05, "loss": 0.6024, "step": 16695 }, { "epoch": 0.487460220139558, "grad_norm": 0.5709569834503029, "learning_rate": 1.1390105433901055e-05, "loss": 0.4778, "step": 16696 }, { "epoch": 0.48748941636740534, "grad_norm": 0.6607848815477726, "learning_rate": 1.1389456609894567e-05, "loss": 0.5995, "step": 16697 }, { "epoch": 0.4875186125952527, "grad_norm": 0.6718155443573838, "learning_rate": 1.138880778588808e-05, "loss": 0.6949, "step": 16698 }, { "epoch": 0.48754780882310006, "grad_norm": 0.6650991748180667, "learning_rate": 1.138815896188159e-05, "loss": 0.6677, "step": 16699 }, { "epoch": 0.4875770050509474, "grad_norm": 0.6517681286453897, "learning_rate": 1.1387510137875102e-05, "loss": 0.5993, "step": 16700 }, { "epoch": 0.4876062012787948, "grad_norm": 0.5825957493133548, "learning_rate": 1.1386861313868614e-05, "loss": 0.5138, "step": 16701 }, { "epoch": 0.48763539750664214, "grad_norm": 0.6797099905908512, "learning_rate": 1.1386212489862126e-05, "loss": 0.6094, "step": 16702 }, { "epoch": 0.4876645937344895, "grad_norm": 0.6153127786321432, "learning_rate": 1.1385563665855636e-05, "loss": 0.5817, "step": 16703 }, { "epoch": 0.48769378996233687, "grad_norm": 0.6590282149694298, "learning_rate": 1.1384914841849148e-05, "loss": 0.642, "step": 16704 }, { "epoch": 0.4877229861901842, "grad_norm": 0.6478625931623381, "learning_rate": 1.138426601784266e-05, "loss": 0.5681, "step": 16705 }, { "epoch": 0.4877521824180316, "grad_norm": 0.6366255690141973, "learning_rate": 1.1383617193836174e-05, "loss": 0.6022, "step": 16706 }, { "epoch": 0.48778137864587895, "grad_norm": 0.6234906314025929, "learning_rate": 1.1382968369829686e-05, "loss": 0.6381, "step": 16707 }, { "epoch": 0.4878105748737263, "grad_norm": 0.6040745417470861, "learning_rate": 1.1382319545823198e-05, "loss": 0.5264, "step": 16708 }, { "epoch": 0.48783977110157367, "grad_norm": 0.5876648091448363, "learning_rate": 1.1381670721816708e-05, "loss": 0.5228, "step": 16709 }, { "epoch": 0.48786896732942103, "grad_norm": 0.6251649874064105, "learning_rate": 1.138102189781022e-05, "loss": 0.5402, "step": 16710 }, { "epoch": 0.4878981635572684, "grad_norm": 0.6304770178192832, "learning_rate": 1.1380373073803732e-05, "loss": 0.6247, "step": 16711 }, { "epoch": 0.48792735978511576, "grad_norm": 0.5852488069904201, "learning_rate": 1.1379724249797244e-05, "loss": 0.5388, "step": 16712 }, { "epoch": 0.4879565560129631, "grad_norm": 0.7595087607573201, "learning_rate": 1.1379075425790756e-05, "loss": 0.6619, "step": 16713 }, { "epoch": 0.4879857522408105, "grad_norm": 0.5854091813523223, "learning_rate": 1.1378426601784267e-05, "loss": 0.5123, "step": 16714 }, { "epoch": 0.48801494846865784, "grad_norm": 0.7060335223969548, "learning_rate": 1.1377777777777779e-05, "loss": 0.7476, "step": 16715 }, { "epoch": 0.4880441446965052, "grad_norm": 0.562164171232462, "learning_rate": 1.137712895377129e-05, "loss": 0.5066, "step": 16716 }, { "epoch": 0.48807334092435256, "grad_norm": 0.6189200837154064, "learning_rate": 1.1376480129764803e-05, "loss": 0.5802, "step": 16717 }, { "epoch": 0.4881025371521999, "grad_norm": 0.6413050613787462, "learning_rate": 1.1375831305758313e-05, "loss": 0.6082, "step": 16718 }, { "epoch": 0.4881317333800473, "grad_norm": 0.6134112893360493, "learning_rate": 1.1375182481751825e-05, "loss": 0.5552, "step": 16719 }, { "epoch": 0.48816092960789464, "grad_norm": 0.6812813065485483, "learning_rate": 1.1374533657745337e-05, "loss": 0.6018, "step": 16720 }, { "epoch": 0.488190125835742, "grad_norm": 0.5704960636085995, "learning_rate": 1.1373884833738849e-05, "loss": 0.5412, "step": 16721 }, { "epoch": 0.48821932206358937, "grad_norm": 0.6302580732899238, "learning_rate": 1.137323600973236e-05, "loss": 0.5745, "step": 16722 }, { "epoch": 0.48824851829143673, "grad_norm": 0.6326254211664116, "learning_rate": 1.1372587185725871e-05, "loss": 0.5994, "step": 16723 }, { "epoch": 0.4882777145192841, "grad_norm": 0.576390321707558, "learning_rate": 1.1371938361719383e-05, "loss": 0.513, "step": 16724 }, { "epoch": 0.48830691074713145, "grad_norm": 0.6510819758820424, "learning_rate": 1.1371289537712897e-05, "loss": 0.6538, "step": 16725 }, { "epoch": 0.4883361069749788, "grad_norm": 0.6257471589727753, "learning_rate": 1.137064071370641e-05, "loss": 0.6186, "step": 16726 }, { "epoch": 0.4883653032028262, "grad_norm": 0.6181190042888407, "learning_rate": 1.1369991889699921e-05, "loss": 0.6221, "step": 16727 }, { "epoch": 0.48839449943067353, "grad_norm": 0.5858855022327875, "learning_rate": 1.1369343065693432e-05, "loss": 0.4771, "step": 16728 }, { "epoch": 0.4884236956585209, "grad_norm": 0.6602376777458834, "learning_rate": 1.1368694241686944e-05, "loss": 0.6718, "step": 16729 }, { "epoch": 0.48845289188636826, "grad_norm": 0.6346907188012019, "learning_rate": 1.1368045417680456e-05, "loss": 0.6188, "step": 16730 }, { "epoch": 0.4884820881142156, "grad_norm": 0.6902104237192769, "learning_rate": 1.1367396593673968e-05, "loss": 0.7299, "step": 16731 }, { "epoch": 0.488511284342063, "grad_norm": 0.6212801717191908, "learning_rate": 1.136674776966748e-05, "loss": 0.6052, "step": 16732 }, { "epoch": 0.48854048056991034, "grad_norm": 0.6443333305548725, "learning_rate": 1.136609894566099e-05, "loss": 0.573, "step": 16733 }, { "epoch": 0.48856967679775776, "grad_norm": 0.6162986958529151, "learning_rate": 1.1365450121654502e-05, "loss": 0.6074, "step": 16734 }, { "epoch": 0.4885988730256051, "grad_norm": 0.6448390242548006, "learning_rate": 1.1364801297648014e-05, "loss": 0.618, "step": 16735 }, { "epoch": 0.4886280692534525, "grad_norm": 0.6938020249565006, "learning_rate": 1.1364152473641526e-05, "loss": 0.6987, "step": 16736 }, { "epoch": 0.48865726548129984, "grad_norm": 0.7156453172113508, "learning_rate": 1.1363503649635036e-05, "loss": 0.763, "step": 16737 }, { "epoch": 0.4886864617091472, "grad_norm": 0.6079322991795678, "learning_rate": 1.1362854825628548e-05, "loss": 0.5464, "step": 16738 }, { "epoch": 0.48871565793699456, "grad_norm": 0.6316713654381485, "learning_rate": 1.136220600162206e-05, "loss": 0.5742, "step": 16739 }, { "epoch": 0.4887448541648419, "grad_norm": 0.6581010897579709, "learning_rate": 1.1361557177615572e-05, "loss": 0.6873, "step": 16740 }, { "epoch": 0.4887740503926893, "grad_norm": 0.6407284555448176, "learning_rate": 1.1360908353609083e-05, "loss": 0.6201, "step": 16741 }, { "epoch": 0.48880324662053665, "grad_norm": 0.6076884237275995, "learning_rate": 1.1360259529602595e-05, "loss": 0.557, "step": 16742 }, { "epoch": 0.488832442848384, "grad_norm": 0.6258609138594622, "learning_rate": 1.1359610705596107e-05, "loss": 0.585, "step": 16743 }, { "epoch": 0.48886163907623137, "grad_norm": 0.6990684629125581, "learning_rate": 1.135896188158962e-05, "loss": 0.6753, "step": 16744 }, { "epoch": 0.48889083530407873, "grad_norm": 0.646571536842035, "learning_rate": 1.1358313057583133e-05, "loss": 0.6346, "step": 16745 }, { "epoch": 0.4889200315319261, "grad_norm": 0.6604237193111034, "learning_rate": 1.1357664233576645e-05, "loss": 0.6069, "step": 16746 }, { "epoch": 0.48894922775977345, "grad_norm": 0.684624626269167, "learning_rate": 1.1357015409570155e-05, "loss": 0.6776, "step": 16747 }, { "epoch": 0.4889784239876208, "grad_norm": 0.7059222003905461, "learning_rate": 1.1356366585563667e-05, "loss": 0.717, "step": 16748 }, { "epoch": 0.4890076202154682, "grad_norm": 0.6490130633324871, "learning_rate": 1.1355717761557179e-05, "loss": 0.6139, "step": 16749 }, { "epoch": 0.48903681644331554, "grad_norm": 0.6564660617306757, "learning_rate": 1.1355068937550691e-05, "loss": 0.6334, "step": 16750 }, { "epoch": 0.4890660126711629, "grad_norm": 0.6475320475309166, "learning_rate": 1.1354420113544203e-05, "loss": 0.5871, "step": 16751 }, { "epoch": 0.48909520889901026, "grad_norm": 0.6361223222873799, "learning_rate": 1.1353771289537713e-05, "loss": 0.615, "step": 16752 }, { "epoch": 0.4891244051268576, "grad_norm": 0.662142779028995, "learning_rate": 1.1353122465531225e-05, "loss": 0.6122, "step": 16753 }, { "epoch": 0.489153601354705, "grad_norm": 0.6539020048689812, "learning_rate": 1.1352473641524737e-05, "loss": 0.646, "step": 16754 }, { "epoch": 0.48918279758255234, "grad_norm": 0.6498984185617513, "learning_rate": 1.135182481751825e-05, "loss": 0.5955, "step": 16755 }, { "epoch": 0.4892119938103997, "grad_norm": 0.6558907217844764, "learning_rate": 1.135117599351176e-05, "loss": 0.6246, "step": 16756 }, { "epoch": 0.48924119003824706, "grad_norm": 0.673863333108471, "learning_rate": 1.1350527169505272e-05, "loss": 0.7018, "step": 16757 }, { "epoch": 0.4892703862660944, "grad_norm": 0.5985100572866977, "learning_rate": 1.1349878345498784e-05, "loss": 0.4941, "step": 16758 }, { "epoch": 0.4892995824939418, "grad_norm": 0.6976364671500177, "learning_rate": 1.1349229521492296e-05, "loss": 0.7028, "step": 16759 }, { "epoch": 0.48932877872178915, "grad_norm": 0.6568301537190365, "learning_rate": 1.1348580697485806e-05, "loss": 0.6839, "step": 16760 }, { "epoch": 0.4893579749496365, "grad_norm": 0.6556435408617194, "learning_rate": 1.1347931873479318e-05, "loss": 0.6315, "step": 16761 }, { "epoch": 0.48938717117748387, "grad_norm": 0.6304505528880607, "learning_rate": 1.134728304947283e-05, "loss": 0.6147, "step": 16762 }, { "epoch": 0.48941636740533123, "grad_norm": 0.676982488699923, "learning_rate": 1.1346634225466344e-05, "loss": 0.6671, "step": 16763 }, { "epoch": 0.4894455636331786, "grad_norm": 0.6348604475480721, "learning_rate": 1.1345985401459856e-05, "loss": 0.6119, "step": 16764 }, { "epoch": 0.48947475986102595, "grad_norm": 0.6345807083609313, "learning_rate": 1.1345336577453368e-05, "loss": 0.5414, "step": 16765 }, { "epoch": 0.4895039560888733, "grad_norm": 0.648080418599035, "learning_rate": 1.1344687753446878e-05, "loss": 0.6135, "step": 16766 }, { "epoch": 0.4895331523167207, "grad_norm": 0.5830212947483446, "learning_rate": 1.134403892944039e-05, "loss": 0.5222, "step": 16767 }, { "epoch": 0.48956234854456804, "grad_norm": 0.6170700578044478, "learning_rate": 1.1343390105433902e-05, "loss": 0.5761, "step": 16768 }, { "epoch": 0.4895915447724154, "grad_norm": 0.6533969507003737, "learning_rate": 1.1342741281427414e-05, "loss": 0.6043, "step": 16769 }, { "epoch": 0.48962074100026276, "grad_norm": 0.6432299933368363, "learning_rate": 1.1342092457420927e-05, "loss": 0.5985, "step": 16770 }, { "epoch": 0.4896499372281101, "grad_norm": 0.6513035011499995, "learning_rate": 1.1341443633414437e-05, "loss": 0.5912, "step": 16771 }, { "epoch": 0.4896791334559575, "grad_norm": 0.6702693715825774, "learning_rate": 1.1340794809407949e-05, "loss": 0.6308, "step": 16772 }, { "epoch": 0.48970832968380484, "grad_norm": 0.6048126705469502, "learning_rate": 1.1340145985401461e-05, "loss": 0.5778, "step": 16773 }, { "epoch": 0.4897375259116522, "grad_norm": 0.6318235392170943, "learning_rate": 1.1339497161394973e-05, "loss": 0.5985, "step": 16774 }, { "epoch": 0.48976672213949957, "grad_norm": 0.650744877661936, "learning_rate": 1.1338848337388483e-05, "loss": 0.5963, "step": 16775 }, { "epoch": 0.4897959183673469, "grad_norm": 0.6532926433594668, "learning_rate": 1.1338199513381995e-05, "loss": 0.6148, "step": 16776 }, { "epoch": 0.4898251145951943, "grad_norm": 0.6549751483143942, "learning_rate": 1.1337550689375507e-05, "loss": 0.668, "step": 16777 }, { "epoch": 0.48985431082304165, "grad_norm": 0.6566249815711709, "learning_rate": 1.133690186536902e-05, "loss": 0.6106, "step": 16778 }, { "epoch": 0.489883507050889, "grad_norm": 0.6901778829493095, "learning_rate": 1.133625304136253e-05, "loss": 0.6602, "step": 16779 }, { "epoch": 0.4899127032787364, "grad_norm": 0.658033327437999, "learning_rate": 1.1335604217356042e-05, "loss": 0.632, "step": 16780 }, { "epoch": 0.48994189950658373, "grad_norm": 0.686733679097685, "learning_rate": 1.1334955393349555e-05, "loss": 0.7375, "step": 16781 }, { "epoch": 0.4899710957344311, "grad_norm": 0.6129029927946947, "learning_rate": 1.1334306569343067e-05, "loss": 0.5541, "step": 16782 }, { "epoch": 0.49000029196227846, "grad_norm": 0.6657535818394288, "learning_rate": 1.133365774533658e-05, "loss": 0.6253, "step": 16783 }, { "epoch": 0.4900294881901258, "grad_norm": 0.658888007284091, "learning_rate": 1.1333008921330092e-05, "loss": 0.5998, "step": 16784 }, { "epoch": 0.4900586844179732, "grad_norm": 0.6372732308029767, "learning_rate": 1.1332360097323602e-05, "loss": 0.5558, "step": 16785 }, { "epoch": 0.49008788064582054, "grad_norm": 0.6608277766051734, "learning_rate": 1.1331711273317114e-05, "loss": 0.6534, "step": 16786 }, { "epoch": 0.4901170768736679, "grad_norm": 0.6266502614598436, "learning_rate": 1.1331062449310626e-05, "loss": 0.5657, "step": 16787 }, { "epoch": 0.49014627310151526, "grad_norm": 0.6337078761956788, "learning_rate": 1.1330413625304138e-05, "loss": 0.5661, "step": 16788 }, { "epoch": 0.4901754693293626, "grad_norm": 0.6250367140116098, "learning_rate": 1.1329764801297648e-05, "loss": 0.5998, "step": 16789 }, { "epoch": 0.49020466555721, "grad_norm": 0.6477096582830167, "learning_rate": 1.132911597729116e-05, "loss": 0.6457, "step": 16790 }, { "epoch": 0.49023386178505735, "grad_norm": 0.6649836060295222, "learning_rate": 1.1328467153284672e-05, "loss": 0.6354, "step": 16791 }, { "epoch": 0.4902630580129047, "grad_norm": 0.6321983857794163, "learning_rate": 1.1327818329278184e-05, "loss": 0.5985, "step": 16792 }, { "epoch": 0.49029225424075207, "grad_norm": 0.6664773577223753, "learning_rate": 1.1327169505271696e-05, "loss": 0.6712, "step": 16793 }, { "epoch": 0.4903214504685995, "grad_norm": 0.6209464966104946, "learning_rate": 1.1326520681265207e-05, "loss": 0.606, "step": 16794 }, { "epoch": 0.49035064669644685, "grad_norm": 0.6766712875765124, "learning_rate": 1.1325871857258719e-05, "loss": 0.6794, "step": 16795 }, { "epoch": 0.4903798429242942, "grad_norm": 0.6686436965292399, "learning_rate": 1.132522303325223e-05, "loss": 0.6401, "step": 16796 }, { "epoch": 0.49040903915214157, "grad_norm": 0.6308192312231636, "learning_rate": 1.1324574209245743e-05, "loss": 0.579, "step": 16797 }, { "epoch": 0.49043823537998893, "grad_norm": 0.602703501623793, "learning_rate": 1.1323925385239253e-05, "loss": 0.5674, "step": 16798 }, { "epoch": 0.4904674316078363, "grad_norm": 0.6417924223473866, "learning_rate": 1.1323276561232765e-05, "loss": 0.6219, "step": 16799 }, { "epoch": 0.49049662783568365, "grad_norm": 0.6543597133843794, "learning_rate": 1.1322627737226279e-05, "loss": 0.6169, "step": 16800 }, { "epoch": 0.490525824063531, "grad_norm": 0.6281338825271185, "learning_rate": 1.1321978913219791e-05, "loss": 0.6208, "step": 16801 }, { "epoch": 0.4905550202913784, "grad_norm": 0.6374684789233949, "learning_rate": 1.1321330089213303e-05, "loss": 0.6071, "step": 16802 }, { "epoch": 0.49058421651922574, "grad_norm": 0.56880769687339, "learning_rate": 1.1320681265206815e-05, "loss": 0.5226, "step": 16803 }, { "epoch": 0.4906134127470731, "grad_norm": 0.680618482239351, "learning_rate": 1.1320032441200325e-05, "loss": 0.6407, "step": 16804 }, { "epoch": 0.49064260897492046, "grad_norm": 0.6788883594676717, "learning_rate": 1.1319383617193837e-05, "loss": 0.6197, "step": 16805 }, { "epoch": 0.4906718052027678, "grad_norm": 0.626176080602468, "learning_rate": 1.131873479318735e-05, "loss": 0.6307, "step": 16806 }, { "epoch": 0.4907010014306152, "grad_norm": 0.6809052068846504, "learning_rate": 1.1318085969180861e-05, "loss": 0.6849, "step": 16807 }, { "epoch": 0.49073019765846254, "grad_norm": 0.6685485261204771, "learning_rate": 1.1317437145174372e-05, "loss": 0.6042, "step": 16808 }, { "epoch": 0.4907593938863099, "grad_norm": 0.5664714834994582, "learning_rate": 1.1316788321167884e-05, "loss": 0.5085, "step": 16809 }, { "epoch": 0.49078859011415726, "grad_norm": 0.6747396410226799, "learning_rate": 1.1316139497161396e-05, "loss": 0.6519, "step": 16810 }, { "epoch": 0.4908177863420046, "grad_norm": 0.6173101763262214, "learning_rate": 1.1315490673154908e-05, "loss": 0.5664, "step": 16811 }, { "epoch": 0.490846982569852, "grad_norm": 0.6257044005930148, "learning_rate": 1.131484184914842e-05, "loss": 0.6077, "step": 16812 }, { "epoch": 0.49087617879769935, "grad_norm": 0.5895920577131536, "learning_rate": 1.131419302514193e-05, "loss": 0.5084, "step": 16813 }, { "epoch": 0.4909053750255467, "grad_norm": 0.6501536120878059, "learning_rate": 1.1313544201135442e-05, "loss": 0.6199, "step": 16814 }, { "epoch": 0.49093457125339407, "grad_norm": 0.6865385885131438, "learning_rate": 1.1312895377128954e-05, "loss": 0.6567, "step": 16815 }, { "epoch": 0.49096376748124143, "grad_norm": 0.5922333092333277, "learning_rate": 1.1312246553122466e-05, "loss": 0.5621, "step": 16816 }, { "epoch": 0.4909929637090888, "grad_norm": 0.7513545621619758, "learning_rate": 1.1311597729115977e-05, "loss": 0.6367, "step": 16817 }, { "epoch": 0.49102215993693615, "grad_norm": 0.7056993292097745, "learning_rate": 1.1310948905109489e-05, "loss": 0.6261, "step": 16818 }, { "epoch": 0.4910513561647835, "grad_norm": 0.606484174727317, "learning_rate": 1.1310300081103002e-05, "loss": 0.5723, "step": 16819 }, { "epoch": 0.4910805523926309, "grad_norm": 0.6358115072563117, "learning_rate": 1.1309651257096514e-05, "loss": 0.6236, "step": 16820 }, { "epoch": 0.49110974862047824, "grad_norm": 0.6770077096970746, "learning_rate": 1.1309002433090026e-05, "loss": 0.6698, "step": 16821 }, { "epoch": 0.4911389448483256, "grad_norm": 0.661788605131015, "learning_rate": 1.1308353609083538e-05, "loss": 0.6676, "step": 16822 }, { "epoch": 0.49116814107617296, "grad_norm": 0.660376986876364, "learning_rate": 1.1307704785077049e-05, "loss": 0.653, "step": 16823 }, { "epoch": 0.4911973373040203, "grad_norm": 0.6941907648862367, "learning_rate": 1.130705596107056e-05, "loss": 0.6837, "step": 16824 }, { "epoch": 0.4912265335318677, "grad_norm": 0.6692894668114812, "learning_rate": 1.1306407137064073e-05, "loss": 0.6397, "step": 16825 }, { "epoch": 0.49125572975971504, "grad_norm": 0.6845180926920686, "learning_rate": 1.1305758313057585e-05, "loss": 0.6689, "step": 16826 }, { "epoch": 0.4912849259875624, "grad_norm": 0.6846452541746927, "learning_rate": 1.1305109489051095e-05, "loss": 0.6581, "step": 16827 }, { "epoch": 0.49131412221540977, "grad_norm": 0.6355514114064059, "learning_rate": 1.1304460665044607e-05, "loss": 0.6108, "step": 16828 }, { "epoch": 0.4913433184432571, "grad_norm": 0.6014690752169688, "learning_rate": 1.130381184103812e-05, "loss": 0.6045, "step": 16829 }, { "epoch": 0.4913725146711045, "grad_norm": 0.6344770315582684, "learning_rate": 1.1303163017031631e-05, "loss": 0.6006, "step": 16830 }, { "epoch": 0.49140171089895185, "grad_norm": 0.6652176476789696, "learning_rate": 1.1302514193025143e-05, "loss": 0.6048, "step": 16831 }, { "epoch": 0.4914309071267992, "grad_norm": 0.6123230316414413, "learning_rate": 1.1301865369018654e-05, "loss": 0.5988, "step": 16832 }, { "epoch": 0.49146010335464657, "grad_norm": 0.7636838588672473, "learning_rate": 1.1301216545012166e-05, "loss": 0.6433, "step": 16833 }, { "epoch": 0.49148929958249393, "grad_norm": 0.6617734108325681, "learning_rate": 1.1300567721005678e-05, "loss": 0.6114, "step": 16834 }, { "epoch": 0.4915184958103413, "grad_norm": 0.622715027244109, "learning_rate": 1.129991889699919e-05, "loss": 0.5723, "step": 16835 }, { "epoch": 0.49154769203818865, "grad_norm": 0.6140437611985958, "learning_rate": 1.12992700729927e-05, "loss": 0.5464, "step": 16836 }, { "epoch": 0.491576888266036, "grad_norm": 0.6774712375954491, "learning_rate": 1.1298621248986212e-05, "loss": 0.6407, "step": 16837 }, { "epoch": 0.4916060844938834, "grad_norm": 0.6326105784842596, "learning_rate": 1.1297972424979726e-05, "loss": 0.6293, "step": 16838 }, { "epoch": 0.49163528072173074, "grad_norm": 0.6053077101213375, "learning_rate": 1.1297323600973238e-05, "loss": 0.551, "step": 16839 }, { "epoch": 0.4916644769495781, "grad_norm": 0.6016458455652585, "learning_rate": 1.129667477696675e-05, "loss": 0.5414, "step": 16840 }, { "epoch": 0.49169367317742546, "grad_norm": 0.6178993165201601, "learning_rate": 1.1296025952960262e-05, "loss": 0.6012, "step": 16841 }, { "epoch": 0.4917228694052728, "grad_norm": 0.6930402309255511, "learning_rate": 1.1295377128953772e-05, "loss": 0.7068, "step": 16842 }, { "epoch": 0.4917520656331202, "grad_norm": 0.6458182644880625, "learning_rate": 1.1294728304947284e-05, "loss": 0.6496, "step": 16843 }, { "epoch": 0.49178126186096754, "grad_norm": 0.6175835100225594, "learning_rate": 1.1294079480940796e-05, "loss": 0.6072, "step": 16844 }, { "epoch": 0.4918104580888149, "grad_norm": 0.6546984334023598, "learning_rate": 1.1293430656934308e-05, "loss": 0.6172, "step": 16845 }, { "epoch": 0.49183965431666227, "grad_norm": 0.6649698459767993, "learning_rate": 1.1292781832927819e-05, "loss": 0.6303, "step": 16846 }, { "epoch": 0.49186885054450963, "grad_norm": 0.6256861316055005, "learning_rate": 1.129213300892133e-05, "loss": 0.6255, "step": 16847 }, { "epoch": 0.491898046772357, "grad_norm": 0.6702027991644982, "learning_rate": 1.1291484184914843e-05, "loss": 0.6342, "step": 16848 }, { "epoch": 0.49192724300020435, "grad_norm": 0.7049057566499616, "learning_rate": 1.1290835360908355e-05, "loss": 0.6382, "step": 16849 }, { "epoch": 0.4919564392280517, "grad_norm": 0.6117725481184341, "learning_rate": 1.1290186536901867e-05, "loss": 0.5975, "step": 16850 }, { "epoch": 0.4919856354558991, "grad_norm": 0.6357720134818682, "learning_rate": 1.1289537712895377e-05, "loss": 0.5963, "step": 16851 }, { "epoch": 0.49201483168374643, "grad_norm": 0.6528547660251172, "learning_rate": 1.1288888888888889e-05, "loss": 0.5667, "step": 16852 }, { "epoch": 0.4920440279115938, "grad_norm": 0.6158003029899382, "learning_rate": 1.1288240064882401e-05, "loss": 0.5848, "step": 16853 }, { "epoch": 0.49207322413944116, "grad_norm": 0.6674416873620295, "learning_rate": 1.1287591240875913e-05, "loss": 0.667, "step": 16854 }, { "epoch": 0.4921024203672886, "grad_norm": 0.5930198643448107, "learning_rate": 1.1286942416869423e-05, "loss": 0.5382, "step": 16855 }, { "epoch": 0.49213161659513593, "grad_norm": 0.6981437888235427, "learning_rate": 1.1286293592862935e-05, "loss": 0.5956, "step": 16856 }, { "epoch": 0.4921608128229833, "grad_norm": 0.6695812203618045, "learning_rate": 1.128564476885645e-05, "loss": 0.6199, "step": 16857 }, { "epoch": 0.49219000905083066, "grad_norm": 0.6651442948596898, "learning_rate": 1.1284995944849961e-05, "loss": 0.7092, "step": 16858 }, { "epoch": 0.492219205278678, "grad_norm": 0.6502033917316689, "learning_rate": 1.1284347120843473e-05, "loss": 0.6239, "step": 16859 }, { "epoch": 0.4922484015065254, "grad_norm": 0.6203035509073312, "learning_rate": 1.1283698296836985e-05, "loss": 0.6207, "step": 16860 }, { "epoch": 0.49227759773437274, "grad_norm": 0.645003844478909, "learning_rate": 1.1283049472830496e-05, "loss": 0.6389, "step": 16861 }, { "epoch": 0.4923067939622201, "grad_norm": 0.6246816130904544, "learning_rate": 1.1282400648824008e-05, "loss": 0.6233, "step": 16862 }, { "epoch": 0.49233599019006746, "grad_norm": 0.5703172181991707, "learning_rate": 1.128175182481752e-05, "loss": 0.5091, "step": 16863 }, { "epoch": 0.4923651864179148, "grad_norm": 0.6728622223268854, "learning_rate": 1.1281103000811032e-05, "loss": 0.6817, "step": 16864 }, { "epoch": 0.4923943826457622, "grad_norm": 0.6600716263170275, "learning_rate": 1.1280454176804542e-05, "loss": 0.657, "step": 16865 }, { "epoch": 0.49242357887360955, "grad_norm": 0.621099999430602, "learning_rate": 1.1279805352798054e-05, "loss": 0.6264, "step": 16866 }, { "epoch": 0.4924527751014569, "grad_norm": 0.6444116019984264, "learning_rate": 1.1279156528791566e-05, "loss": 0.6192, "step": 16867 }, { "epoch": 0.49248197132930427, "grad_norm": 0.6282092911194058, "learning_rate": 1.1278507704785078e-05, "loss": 0.5577, "step": 16868 }, { "epoch": 0.49251116755715163, "grad_norm": 0.6150032096714718, "learning_rate": 1.127785888077859e-05, "loss": 0.5868, "step": 16869 }, { "epoch": 0.492540363784999, "grad_norm": 0.6538759360409504, "learning_rate": 1.12772100567721e-05, "loss": 0.5776, "step": 16870 }, { "epoch": 0.49256956001284635, "grad_norm": 0.6360494837099471, "learning_rate": 1.1276561232765612e-05, "loss": 0.5929, "step": 16871 }, { "epoch": 0.4925987562406937, "grad_norm": 0.6493183062272249, "learning_rate": 1.1275912408759124e-05, "loss": 0.6107, "step": 16872 }, { "epoch": 0.4926279524685411, "grad_norm": 0.6888829161049418, "learning_rate": 1.1275263584752637e-05, "loss": 0.6963, "step": 16873 }, { "epoch": 0.49265714869638844, "grad_norm": 0.612268149146451, "learning_rate": 1.1274614760746147e-05, "loss": 0.6051, "step": 16874 }, { "epoch": 0.4926863449242358, "grad_norm": 0.7185549232694597, "learning_rate": 1.1273965936739659e-05, "loss": 0.6534, "step": 16875 }, { "epoch": 0.49271554115208316, "grad_norm": 0.8174162276763792, "learning_rate": 1.1273317112733173e-05, "loss": 0.6635, "step": 16876 }, { "epoch": 0.4927447373799305, "grad_norm": 0.6159508388453616, "learning_rate": 1.1272668288726685e-05, "loss": 0.6164, "step": 16877 }, { "epoch": 0.4927739336077779, "grad_norm": 0.674952908032576, "learning_rate": 1.1272019464720197e-05, "loss": 0.6321, "step": 16878 }, { "epoch": 0.49280312983562524, "grad_norm": 0.6694918121395457, "learning_rate": 1.1271370640713709e-05, "loss": 0.6754, "step": 16879 }, { "epoch": 0.4928323260634726, "grad_norm": 0.6473758241205914, "learning_rate": 1.1270721816707219e-05, "loss": 0.6068, "step": 16880 }, { "epoch": 0.49286152229131996, "grad_norm": 0.6260593146380246, "learning_rate": 1.1270072992700731e-05, "loss": 0.6128, "step": 16881 }, { "epoch": 0.4928907185191673, "grad_norm": 0.5809947798790106, "learning_rate": 1.1269424168694243e-05, "loss": 0.5438, "step": 16882 }, { "epoch": 0.4929199147470147, "grad_norm": 0.7068405092296343, "learning_rate": 1.1268775344687755e-05, "loss": 0.7196, "step": 16883 }, { "epoch": 0.49294911097486205, "grad_norm": 0.6337992662905158, "learning_rate": 1.1268126520681265e-05, "loss": 0.6183, "step": 16884 }, { "epoch": 0.4929783072027094, "grad_norm": 0.6664005809801139, "learning_rate": 1.1267477696674777e-05, "loss": 0.5927, "step": 16885 }, { "epoch": 0.49300750343055677, "grad_norm": 0.6725734821971047, "learning_rate": 1.126682887266829e-05, "loss": 0.7215, "step": 16886 }, { "epoch": 0.49303669965840413, "grad_norm": 0.6514386755667035, "learning_rate": 1.1266180048661802e-05, "loss": 0.6915, "step": 16887 }, { "epoch": 0.4930658958862515, "grad_norm": 0.6666505507082088, "learning_rate": 1.1265531224655314e-05, "loss": 0.6073, "step": 16888 }, { "epoch": 0.49309509211409885, "grad_norm": 0.7281844709328479, "learning_rate": 1.1264882400648824e-05, "loss": 0.679, "step": 16889 }, { "epoch": 0.4931242883419462, "grad_norm": 0.6347463436028719, "learning_rate": 1.1264233576642336e-05, "loss": 0.6221, "step": 16890 }, { "epoch": 0.4931534845697936, "grad_norm": 0.5731978767793867, "learning_rate": 1.1263584752635848e-05, "loss": 0.515, "step": 16891 }, { "epoch": 0.49318268079764094, "grad_norm": 0.6239599499183636, "learning_rate": 1.126293592862936e-05, "loss": 0.5611, "step": 16892 }, { "epoch": 0.4932118770254883, "grad_norm": 0.6350567777746156, "learning_rate": 1.126228710462287e-05, "loss": 0.6093, "step": 16893 }, { "epoch": 0.49324107325333566, "grad_norm": 0.5990942834823275, "learning_rate": 1.1261638280616382e-05, "loss": 0.5463, "step": 16894 }, { "epoch": 0.493270269481183, "grad_norm": 0.7298927991033725, "learning_rate": 1.1260989456609896e-05, "loss": 0.6263, "step": 16895 }, { "epoch": 0.4932994657090304, "grad_norm": 0.7121830589823537, "learning_rate": 1.1260340632603408e-05, "loss": 0.6744, "step": 16896 }, { "epoch": 0.49332866193687774, "grad_norm": 0.5722096947538374, "learning_rate": 1.125969180859692e-05, "loss": 0.5109, "step": 16897 }, { "epoch": 0.4933578581647251, "grad_norm": 0.6326577871381721, "learning_rate": 1.1259042984590432e-05, "loss": 0.5818, "step": 16898 }, { "epoch": 0.49338705439257247, "grad_norm": 0.6129709917072945, "learning_rate": 1.1258394160583942e-05, "loss": 0.5641, "step": 16899 }, { "epoch": 0.4934162506204198, "grad_norm": 0.673001927760453, "learning_rate": 1.1257745336577454e-05, "loss": 0.7274, "step": 16900 }, { "epoch": 0.4934454468482672, "grad_norm": 0.6146482690965385, "learning_rate": 1.1257096512570967e-05, "loss": 0.5517, "step": 16901 }, { "epoch": 0.49347464307611455, "grad_norm": 0.6456777790097659, "learning_rate": 1.1256447688564479e-05, "loss": 0.6329, "step": 16902 }, { "epoch": 0.4935038393039619, "grad_norm": 0.6541003293262961, "learning_rate": 1.1255798864557989e-05, "loss": 0.6194, "step": 16903 }, { "epoch": 0.49353303553180927, "grad_norm": 0.6345060491208123, "learning_rate": 1.1255150040551501e-05, "loss": 0.56, "step": 16904 }, { "epoch": 0.49356223175965663, "grad_norm": 0.6553157052739471, "learning_rate": 1.1254501216545013e-05, "loss": 0.6182, "step": 16905 }, { "epoch": 0.493591427987504, "grad_norm": 0.6536175912450015, "learning_rate": 1.1253852392538525e-05, "loss": 0.6014, "step": 16906 }, { "epoch": 0.49362062421535136, "grad_norm": 0.6089036179906844, "learning_rate": 1.1253203568532037e-05, "loss": 0.5737, "step": 16907 }, { "epoch": 0.4936498204431987, "grad_norm": 0.6342427061757739, "learning_rate": 1.1252554744525547e-05, "loss": 0.597, "step": 16908 }, { "epoch": 0.4936790166710461, "grad_norm": 0.6345536569370199, "learning_rate": 1.125190592051906e-05, "loss": 0.6125, "step": 16909 }, { "epoch": 0.49370821289889344, "grad_norm": 0.6417235029576213, "learning_rate": 1.1251257096512571e-05, "loss": 0.5558, "step": 16910 }, { "epoch": 0.4937374091267408, "grad_norm": 0.590859645204109, "learning_rate": 1.1250608272506083e-05, "loss": 0.547, "step": 16911 }, { "epoch": 0.49376660535458816, "grad_norm": 0.6384494100390319, "learning_rate": 1.1249959448499594e-05, "loss": 0.5903, "step": 16912 }, { "epoch": 0.4937958015824355, "grad_norm": 0.643686136265205, "learning_rate": 1.1249310624493106e-05, "loss": 0.6367, "step": 16913 }, { "epoch": 0.4938249978102829, "grad_norm": 0.7307490323330338, "learning_rate": 1.124866180048662e-05, "loss": 0.6948, "step": 16914 }, { "epoch": 0.4938541940381303, "grad_norm": 0.6450562059841561, "learning_rate": 1.1248012976480132e-05, "loss": 0.6316, "step": 16915 }, { "epoch": 0.49388339026597766, "grad_norm": 0.6506082561046179, "learning_rate": 1.1247364152473644e-05, "loss": 0.6418, "step": 16916 }, { "epoch": 0.493912586493825, "grad_norm": 0.6430762887182939, "learning_rate": 1.1246715328467156e-05, "loss": 0.5802, "step": 16917 }, { "epoch": 0.4939417827216724, "grad_norm": 0.6680472374428298, "learning_rate": 1.1246066504460666e-05, "loss": 0.6285, "step": 16918 }, { "epoch": 0.49397097894951975, "grad_norm": 0.6561002040483214, "learning_rate": 1.1245417680454178e-05, "loss": 0.6196, "step": 16919 }, { "epoch": 0.4940001751773671, "grad_norm": 0.7008240417567663, "learning_rate": 1.124476885644769e-05, "loss": 0.6839, "step": 16920 }, { "epoch": 0.49402937140521447, "grad_norm": 0.629521408114877, "learning_rate": 1.1244120032441202e-05, "loss": 0.6247, "step": 16921 }, { "epoch": 0.49405856763306183, "grad_norm": 0.6667035888968265, "learning_rate": 1.1243471208434712e-05, "loss": 0.6801, "step": 16922 }, { "epoch": 0.4940877638609092, "grad_norm": 0.652149223586701, "learning_rate": 1.1242822384428224e-05, "loss": 0.6281, "step": 16923 }, { "epoch": 0.49411696008875655, "grad_norm": 0.6066153705584527, "learning_rate": 1.1242173560421736e-05, "loss": 0.5829, "step": 16924 }, { "epoch": 0.4941461563166039, "grad_norm": 0.6914490099029624, "learning_rate": 1.1241524736415248e-05, "loss": 0.7066, "step": 16925 }, { "epoch": 0.4941753525444513, "grad_norm": 0.6184846771608097, "learning_rate": 1.124087591240876e-05, "loss": 0.569, "step": 16926 }, { "epoch": 0.49420454877229864, "grad_norm": 0.688450765847365, "learning_rate": 1.124022708840227e-05, "loss": 0.66, "step": 16927 }, { "epoch": 0.494233745000146, "grad_norm": 0.622882977189149, "learning_rate": 1.1239578264395783e-05, "loss": 0.5884, "step": 16928 }, { "epoch": 0.49426294122799336, "grad_norm": 0.6106000355129114, "learning_rate": 1.1238929440389295e-05, "loss": 0.5405, "step": 16929 }, { "epoch": 0.4942921374558407, "grad_norm": 0.6102828987246145, "learning_rate": 1.1238280616382807e-05, "loss": 0.5659, "step": 16930 }, { "epoch": 0.4943213336836881, "grad_norm": 0.6523338550214014, "learning_rate": 1.1237631792376317e-05, "loss": 0.6443, "step": 16931 }, { "epoch": 0.49435052991153544, "grad_norm": 0.640175797199497, "learning_rate": 1.1236982968369831e-05, "loss": 0.6029, "step": 16932 }, { "epoch": 0.4943797261393828, "grad_norm": 0.6294779185958371, "learning_rate": 1.1236334144363343e-05, "loss": 0.55, "step": 16933 }, { "epoch": 0.49440892236723016, "grad_norm": 0.7091648283479404, "learning_rate": 1.1235685320356855e-05, "loss": 0.6774, "step": 16934 }, { "epoch": 0.4944381185950775, "grad_norm": 0.6205050803102313, "learning_rate": 1.1235036496350367e-05, "loss": 0.5992, "step": 16935 }, { "epoch": 0.4944673148229249, "grad_norm": 0.6409800330777562, "learning_rate": 1.1234387672343879e-05, "loss": 0.5989, "step": 16936 }, { "epoch": 0.49449651105077225, "grad_norm": 0.6308373305015436, "learning_rate": 1.123373884833739e-05, "loss": 0.5793, "step": 16937 }, { "epoch": 0.4945257072786196, "grad_norm": 0.6289655868287644, "learning_rate": 1.1233090024330901e-05, "loss": 0.6056, "step": 16938 }, { "epoch": 0.49455490350646697, "grad_norm": 0.644734468853283, "learning_rate": 1.1232441200324413e-05, "loss": 0.6225, "step": 16939 }, { "epoch": 0.49458409973431433, "grad_norm": 0.6564793353709429, "learning_rate": 1.1231792376317925e-05, "loss": 0.6112, "step": 16940 }, { "epoch": 0.4946132959621617, "grad_norm": 0.6426649946539017, "learning_rate": 1.1231143552311436e-05, "loss": 0.5747, "step": 16941 }, { "epoch": 0.49464249219000905, "grad_norm": 0.6704889189293511, "learning_rate": 1.1230494728304948e-05, "loss": 0.5998, "step": 16942 }, { "epoch": 0.4946716884178564, "grad_norm": 0.6234826901907993, "learning_rate": 1.122984590429846e-05, "loss": 0.5658, "step": 16943 }, { "epoch": 0.4947008846457038, "grad_norm": 0.6589095159714872, "learning_rate": 1.1229197080291972e-05, "loss": 0.648, "step": 16944 }, { "epoch": 0.49473008087355114, "grad_norm": 0.6648196699497907, "learning_rate": 1.1228548256285484e-05, "loss": 0.6353, "step": 16945 }, { "epoch": 0.4947592771013985, "grad_norm": 0.6140849824070524, "learning_rate": 1.1227899432278994e-05, "loss": 0.5878, "step": 16946 }, { "epoch": 0.49478847332924586, "grad_norm": 0.6181726026692425, "learning_rate": 1.1227250608272506e-05, "loss": 0.509, "step": 16947 }, { "epoch": 0.4948176695570932, "grad_norm": 0.6215938374497733, "learning_rate": 1.1226601784266018e-05, "loss": 0.5828, "step": 16948 }, { "epoch": 0.4948468657849406, "grad_norm": 0.6058426110101024, "learning_rate": 1.122595296025953e-05, "loss": 0.5929, "step": 16949 }, { "epoch": 0.49487606201278794, "grad_norm": 0.6417912506662984, "learning_rate": 1.122530413625304e-05, "loss": 0.6051, "step": 16950 }, { "epoch": 0.4949052582406353, "grad_norm": 0.6994371454587623, "learning_rate": 1.1224655312246554e-05, "loss": 0.6673, "step": 16951 }, { "epoch": 0.49493445446848267, "grad_norm": 0.6660384698306062, "learning_rate": 1.1224006488240066e-05, "loss": 0.6494, "step": 16952 }, { "epoch": 0.49496365069633, "grad_norm": 0.6523370686546361, "learning_rate": 1.1223357664233578e-05, "loss": 0.6626, "step": 16953 }, { "epoch": 0.4949928469241774, "grad_norm": 0.6377603611745414, "learning_rate": 1.122270884022709e-05, "loss": 0.5793, "step": 16954 }, { "epoch": 0.49502204315202475, "grad_norm": 0.614412268224158, "learning_rate": 1.1222060016220602e-05, "loss": 0.5822, "step": 16955 }, { "epoch": 0.4950512393798721, "grad_norm": 0.5891143824388267, "learning_rate": 1.1221411192214113e-05, "loss": 0.5528, "step": 16956 }, { "epoch": 0.49508043560771947, "grad_norm": 0.6285182980535567, "learning_rate": 1.1220762368207625e-05, "loss": 0.6378, "step": 16957 }, { "epoch": 0.49510963183556683, "grad_norm": 0.6927571406220709, "learning_rate": 1.1220113544201137e-05, "loss": 0.6489, "step": 16958 }, { "epoch": 0.4951388280634142, "grad_norm": 0.5686677316337074, "learning_rate": 1.1219464720194649e-05, "loss": 0.524, "step": 16959 }, { "epoch": 0.49516802429126155, "grad_norm": 0.6395161068062396, "learning_rate": 1.121881589618816e-05, "loss": 0.6066, "step": 16960 }, { "epoch": 0.4951972205191089, "grad_norm": 0.6622914124130824, "learning_rate": 1.1218167072181671e-05, "loss": 0.6675, "step": 16961 }, { "epoch": 0.4952264167469563, "grad_norm": 0.6451010281550799, "learning_rate": 1.1217518248175183e-05, "loss": 0.5771, "step": 16962 }, { "epoch": 0.49525561297480364, "grad_norm": 0.6289325570429355, "learning_rate": 1.1216869424168695e-05, "loss": 0.6183, "step": 16963 }, { "epoch": 0.495284809202651, "grad_norm": 0.6321879314924372, "learning_rate": 1.1216220600162207e-05, "loss": 0.6538, "step": 16964 }, { "epoch": 0.49531400543049836, "grad_norm": 0.6751138064945854, "learning_rate": 1.1215571776155718e-05, "loss": 0.6697, "step": 16965 }, { "epoch": 0.4953432016583457, "grad_norm": 0.5977994367885788, "learning_rate": 1.121492295214923e-05, "loss": 0.5175, "step": 16966 }, { "epoch": 0.4953723978861931, "grad_norm": 0.6912477353173124, "learning_rate": 1.1214274128142742e-05, "loss": 0.6884, "step": 16967 }, { "epoch": 0.49540159411404044, "grad_norm": 0.6992029992677664, "learning_rate": 1.1213625304136254e-05, "loss": 0.669, "step": 16968 }, { "epoch": 0.4954307903418878, "grad_norm": 0.6847823978638936, "learning_rate": 1.1212976480129764e-05, "loss": 0.6856, "step": 16969 }, { "epoch": 0.49545998656973517, "grad_norm": 0.7003306879952372, "learning_rate": 1.1212327656123278e-05, "loss": 0.6963, "step": 16970 }, { "epoch": 0.49548918279758253, "grad_norm": 0.686604264923712, "learning_rate": 1.121167883211679e-05, "loss": 0.6745, "step": 16971 }, { "epoch": 0.4955183790254299, "grad_norm": 0.6142750643444173, "learning_rate": 1.1211030008110302e-05, "loss": 0.5628, "step": 16972 }, { "epoch": 0.49554757525327725, "grad_norm": 0.5888719965916439, "learning_rate": 1.1210381184103814e-05, "loss": 0.5577, "step": 16973 }, { "epoch": 0.4955767714811246, "grad_norm": 0.6079473949332982, "learning_rate": 1.1209732360097326e-05, "loss": 0.5342, "step": 16974 }, { "epoch": 0.49560596770897203, "grad_norm": 0.7165098092921663, "learning_rate": 1.1209083536090836e-05, "loss": 0.7473, "step": 16975 }, { "epoch": 0.4956351639368194, "grad_norm": 0.6217823834096515, "learning_rate": 1.1208434712084348e-05, "loss": 0.6039, "step": 16976 }, { "epoch": 0.49566436016466675, "grad_norm": 0.671756583571042, "learning_rate": 1.120778588807786e-05, "loss": 0.6509, "step": 16977 }, { "epoch": 0.4956935563925141, "grad_norm": 0.673226347758412, "learning_rate": 1.1207137064071372e-05, "loss": 0.6454, "step": 16978 }, { "epoch": 0.4957227526203615, "grad_norm": 0.6210066868065369, "learning_rate": 1.1206488240064883e-05, "loss": 0.5932, "step": 16979 }, { "epoch": 0.49575194884820883, "grad_norm": 0.638893901027478, "learning_rate": 1.1205839416058395e-05, "loss": 0.6636, "step": 16980 }, { "epoch": 0.4957811450760562, "grad_norm": 0.6811926909664199, "learning_rate": 1.1205190592051907e-05, "loss": 0.6827, "step": 16981 }, { "epoch": 0.49581034130390356, "grad_norm": 0.6438712645672097, "learning_rate": 1.1204541768045419e-05, "loss": 0.6176, "step": 16982 }, { "epoch": 0.4958395375317509, "grad_norm": 0.6345634808248882, "learning_rate": 1.120389294403893e-05, "loss": 0.6308, "step": 16983 }, { "epoch": 0.4958687337595983, "grad_norm": 0.6348341085267389, "learning_rate": 1.1203244120032441e-05, "loss": 0.5842, "step": 16984 }, { "epoch": 0.49589792998744564, "grad_norm": 0.6324718872390368, "learning_rate": 1.1202595296025953e-05, "loss": 0.5448, "step": 16985 }, { "epoch": 0.495927126215293, "grad_norm": 0.6517760292672664, "learning_rate": 1.1201946472019465e-05, "loss": 0.6355, "step": 16986 }, { "epoch": 0.49595632244314036, "grad_norm": 0.6076023493213978, "learning_rate": 1.1201297648012977e-05, "loss": 0.5709, "step": 16987 }, { "epoch": 0.4959855186709877, "grad_norm": 0.6280358612972345, "learning_rate": 1.1200648824006487e-05, "loss": 0.6105, "step": 16988 }, { "epoch": 0.4960147148988351, "grad_norm": 0.6209766945129345, "learning_rate": 1.1200000000000001e-05, "loss": 0.6323, "step": 16989 }, { "epoch": 0.49604391112668245, "grad_norm": 0.6026672068429463, "learning_rate": 1.1199351175993513e-05, "loss": 0.5692, "step": 16990 }, { "epoch": 0.4960731073545298, "grad_norm": 0.654820247208424, "learning_rate": 1.1198702351987025e-05, "loss": 0.6095, "step": 16991 }, { "epoch": 0.49610230358237717, "grad_norm": 0.606364078540127, "learning_rate": 1.1198053527980537e-05, "loss": 0.5847, "step": 16992 }, { "epoch": 0.49613149981022453, "grad_norm": 0.6645760685728866, "learning_rate": 1.119740470397405e-05, "loss": 0.6517, "step": 16993 }, { "epoch": 0.4961606960380719, "grad_norm": 0.6286692084119682, "learning_rate": 1.119675587996756e-05, "loss": 0.559, "step": 16994 }, { "epoch": 0.49618989226591925, "grad_norm": 0.6365428782726099, "learning_rate": 1.1196107055961072e-05, "loss": 0.6066, "step": 16995 }, { "epoch": 0.4962190884937666, "grad_norm": 0.6864510560714545, "learning_rate": 1.1195458231954584e-05, "loss": 0.6814, "step": 16996 }, { "epoch": 0.496248284721614, "grad_norm": 0.6068011701224075, "learning_rate": 1.1194809407948096e-05, "loss": 0.5858, "step": 16997 }, { "epoch": 0.49627748094946134, "grad_norm": 0.721675876246181, "learning_rate": 1.1194160583941606e-05, "loss": 0.7034, "step": 16998 }, { "epoch": 0.4963066771773087, "grad_norm": 0.6158801329691962, "learning_rate": 1.1193511759935118e-05, "loss": 0.5419, "step": 16999 }, { "epoch": 0.49633587340515606, "grad_norm": 0.661607669158998, "learning_rate": 1.119286293592863e-05, "loss": 0.6021, "step": 17000 }, { "epoch": 0.4963650696330034, "grad_norm": 0.6816231522566839, "learning_rate": 1.1192214111922142e-05, "loss": 0.6695, "step": 17001 }, { "epoch": 0.4963942658608508, "grad_norm": 0.6023241029444694, "learning_rate": 1.1191565287915654e-05, "loss": 0.5678, "step": 17002 }, { "epoch": 0.49642346208869814, "grad_norm": 0.6514810050674888, "learning_rate": 1.1190916463909164e-05, "loss": 0.5931, "step": 17003 }, { "epoch": 0.4964526583165455, "grad_norm": 0.6267553738833614, "learning_rate": 1.1190267639902677e-05, "loss": 0.5551, "step": 17004 }, { "epoch": 0.49648185454439286, "grad_norm": 0.6006131455414957, "learning_rate": 1.1189618815896189e-05, "loss": 0.5528, "step": 17005 }, { "epoch": 0.4965110507722402, "grad_norm": 0.6229459136058147, "learning_rate": 1.11889699918897e-05, "loss": 0.6366, "step": 17006 }, { "epoch": 0.4965402470000876, "grad_norm": 0.7012496955385527, "learning_rate": 1.1188321167883211e-05, "loss": 0.7509, "step": 17007 }, { "epoch": 0.49656944322793495, "grad_norm": 0.7197524059998163, "learning_rate": 1.1187672343876725e-05, "loss": 0.6677, "step": 17008 }, { "epoch": 0.4965986394557823, "grad_norm": 0.7093506383716502, "learning_rate": 1.1187023519870237e-05, "loss": 0.7037, "step": 17009 }, { "epoch": 0.49662783568362967, "grad_norm": 0.6643076907997384, "learning_rate": 1.1186374695863749e-05, "loss": 0.6508, "step": 17010 }, { "epoch": 0.49665703191147703, "grad_norm": 0.6760532263654963, "learning_rate": 1.118572587185726e-05, "loss": 0.6625, "step": 17011 }, { "epoch": 0.4966862281393244, "grad_norm": 0.6687404954290566, "learning_rate": 1.1185077047850773e-05, "loss": 0.6319, "step": 17012 }, { "epoch": 0.49671542436717175, "grad_norm": 0.6167468579985672, "learning_rate": 1.1184428223844283e-05, "loss": 0.5552, "step": 17013 }, { "epoch": 0.4967446205950191, "grad_norm": 0.6305760425366587, "learning_rate": 1.1183779399837795e-05, "loss": 0.5816, "step": 17014 }, { "epoch": 0.4967738168228665, "grad_norm": 0.6623572471163203, "learning_rate": 1.1183130575831307e-05, "loss": 0.6536, "step": 17015 }, { "epoch": 0.49680301305071384, "grad_norm": 0.6224697029580637, "learning_rate": 1.1182481751824819e-05, "loss": 0.5965, "step": 17016 }, { "epoch": 0.4968322092785612, "grad_norm": 0.649067812791672, "learning_rate": 1.118183292781833e-05, "loss": 0.5611, "step": 17017 }, { "epoch": 0.49686140550640856, "grad_norm": 0.7299474078947232, "learning_rate": 1.1181184103811842e-05, "loss": 0.7134, "step": 17018 }, { "epoch": 0.4968906017342559, "grad_norm": 0.6578972811514494, "learning_rate": 1.1180535279805354e-05, "loss": 0.64, "step": 17019 }, { "epoch": 0.4969197979621033, "grad_norm": 0.6430124912412313, "learning_rate": 1.1179886455798866e-05, "loss": 0.6504, "step": 17020 }, { "epoch": 0.49694899418995064, "grad_norm": 0.6902374649764597, "learning_rate": 1.1179237631792378e-05, "loss": 0.6234, "step": 17021 }, { "epoch": 0.496978190417798, "grad_norm": 0.6833317346088137, "learning_rate": 1.1178588807785888e-05, "loss": 0.6898, "step": 17022 }, { "epoch": 0.49700738664564537, "grad_norm": 0.6599387453911751, "learning_rate": 1.11779399837794e-05, "loss": 0.6118, "step": 17023 }, { "epoch": 0.4970365828734927, "grad_norm": 0.6822117342489071, "learning_rate": 1.1177291159772912e-05, "loss": 0.6725, "step": 17024 }, { "epoch": 0.4970657791013401, "grad_norm": 0.6047535878819588, "learning_rate": 1.1176642335766424e-05, "loss": 0.5381, "step": 17025 }, { "epoch": 0.49709497532918745, "grad_norm": 0.685099515472483, "learning_rate": 1.1175993511759934e-05, "loss": 0.6812, "step": 17026 }, { "epoch": 0.4971241715570348, "grad_norm": 0.68722058189871, "learning_rate": 1.1175344687753448e-05, "loss": 0.6615, "step": 17027 }, { "epoch": 0.49715336778488217, "grad_norm": 0.6707580471077628, "learning_rate": 1.117469586374696e-05, "loss": 0.6649, "step": 17028 }, { "epoch": 0.49718256401272953, "grad_norm": 0.6378737230972674, "learning_rate": 1.1174047039740472e-05, "loss": 0.5703, "step": 17029 }, { "epoch": 0.4972117602405769, "grad_norm": 0.5980102851317227, "learning_rate": 1.1173398215733984e-05, "loss": 0.5053, "step": 17030 }, { "epoch": 0.49724095646842426, "grad_norm": 0.675111310799872, "learning_rate": 1.1172749391727496e-05, "loss": 0.642, "step": 17031 }, { "epoch": 0.4972701526962716, "grad_norm": 0.6778049559116807, "learning_rate": 1.1172100567721006e-05, "loss": 0.6611, "step": 17032 }, { "epoch": 0.497299348924119, "grad_norm": 0.6136355392644999, "learning_rate": 1.1171451743714519e-05, "loss": 0.5706, "step": 17033 }, { "epoch": 0.49732854515196634, "grad_norm": 0.5727852793187648, "learning_rate": 1.117080291970803e-05, "loss": 0.504, "step": 17034 }, { "epoch": 0.4973577413798137, "grad_norm": 0.588910121422949, "learning_rate": 1.1170154095701543e-05, "loss": 0.4877, "step": 17035 }, { "epoch": 0.4973869376076611, "grad_norm": 0.644931682637183, "learning_rate": 1.1169505271695053e-05, "loss": 0.6035, "step": 17036 }, { "epoch": 0.4974161338355085, "grad_norm": 0.6299253097518965, "learning_rate": 1.1168856447688565e-05, "loss": 0.5833, "step": 17037 }, { "epoch": 0.49744533006335584, "grad_norm": 0.6213086588778782, "learning_rate": 1.1168207623682077e-05, "loss": 0.6065, "step": 17038 }, { "epoch": 0.4974745262912032, "grad_norm": 0.609626640407697, "learning_rate": 1.1167558799675589e-05, "loss": 0.5346, "step": 17039 }, { "epoch": 0.49750372251905056, "grad_norm": 0.6228593828889935, "learning_rate": 1.1166909975669101e-05, "loss": 0.5798, "step": 17040 }, { "epoch": 0.4975329187468979, "grad_norm": 0.6227457469151777, "learning_rate": 1.1166261151662611e-05, "loss": 0.5921, "step": 17041 }, { "epoch": 0.4975621149747453, "grad_norm": 0.6983737858079987, "learning_rate": 1.1165612327656123e-05, "loss": 0.6673, "step": 17042 }, { "epoch": 0.49759131120259265, "grad_norm": 0.6600549035443619, "learning_rate": 1.1164963503649635e-05, "loss": 0.6406, "step": 17043 }, { "epoch": 0.49762050743044, "grad_norm": 0.6764899537240104, "learning_rate": 1.1164314679643147e-05, "loss": 0.6003, "step": 17044 }, { "epoch": 0.49764970365828737, "grad_norm": 0.670840097784398, "learning_rate": 1.1163665855636658e-05, "loss": 0.6456, "step": 17045 }, { "epoch": 0.49767889988613473, "grad_norm": 0.6471631005423034, "learning_rate": 1.1163017031630171e-05, "loss": 0.5962, "step": 17046 }, { "epoch": 0.4977080961139821, "grad_norm": 0.6728996796448261, "learning_rate": 1.1162368207623684e-05, "loss": 0.6697, "step": 17047 }, { "epoch": 0.49773729234182945, "grad_norm": 0.6477942262402329, "learning_rate": 1.1161719383617196e-05, "loss": 0.6596, "step": 17048 }, { "epoch": 0.4977664885696768, "grad_norm": 0.6314541052960988, "learning_rate": 1.1161070559610708e-05, "loss": 0.6122, "step": 17049 }, { "epoch": 0.4977956847975242, "grad_norm": 0.6265794342484543, "learning_rate": 1.116042173560422e-05, "loss": 0.5956, "step": 17050 }, { "epoch": 0.49782488102537154, "grad_norm": 0.6416978260338274, "learning_rate": 1.115977291159773e-05, "loss": 0.6126, "step": 17051 }, { "epoch": 0.4978540772532189, "grad_norm": 0.5938269075950352, "learning_rate": 1.1159124087591242e-05, "loss": 0.5793, "step": 17052 }, { "epoch": 0.49788327348106626, "grad_norm": 0.6723240984696837, "learning_rate": 1.1158475263584754e-05, "loss": 0.6906, "step": 17053 }, { "epoch": 0.4979124697089136, "grad_norm": 0.6166937686904799, "learning_rate": 1.1157826439578266e-05, "loss": 0.5643, "step": 17054 }, { "epoch": 0.497941665936761, "grad_norm": 0.6695900961723552, "learning_rate": 1.1157177615571776e-05, "loss": 0.7019, "step": 17055 }, { "epoch": 0.49797086216460834, "grad_norm": 0.6577136902663969, "learning_rate": 1.1156528791565288e-05, "loss": 0.646, "step": 17056 }, { "epoch": 0.4980000583924557, "grad_norm": 0.6725831194715283, "learning_rate": 1.11558799675588e-05, "loss": 0.6121, "step": 17057 }, { "epoch": 0.49802925462030306, "grad_norm": 0.6921564052177464, "learning_rate": 1.1155231143552312e-05, "loss": 0.5522, "step": 17058 }, { "epoch": 0.4980584508481504, "grad_norm": 0.611303545184254, "learning_rate": 1.1154582319545823e-05, "loss": 0.5472, "step": 17059 }, { "epoch": 0.4980876470759978, "grad_norm": 0.6843819710762536, "learning_rate": 1.1153933495539335e-05, "loss": 0.682, "step": 17060 }, { "epoch": 0.49811684330384515, "grad_norm": 0.7171861883221198, "learning_rate": 1.1153284671532847e-05, "loss": 0.7135, "step": 17061 }, { "epoch": 0.4981460395316925, "grad_norm": 0.6348807568654772, "learning_rate": 1.1152635847526359e-05, "loss": 0.5551, "step": 17062 }, { "epoch": 0.49817523575953987, "grad_norm": 0.6235632620779209, "learning_rate": 1.1151987023519871e-05, "loss": 0.5493, "step": 17063 }, { "epoch": 0.49820443198738723, "grad_norm": 0.6387956554467707, "learning_rate": 1.1151338199513381e-05, "loss": 0.6107, "step": 17064 }, { "epoch": 0.4982336282152346, "grad_norm": 0.6129904704044706, "learning_rate": 1.1150689375506895e-05, "loss": 0.5691, "step": 17065 }, { "epoch": 0.49826282444308195, "grad_norm": 0.7195963470828369, "learning_rate": 1.1150040551500407e-05, "loss": 0.7193, "step": 17066 }, { "epoch": 0.4982920206709293, "grad_norm": 0.6314345348557387, "learning_rate": 1.1149391727493919e-05, "loss": 0.5753, "step": 17067 }, { "epoch": 0.4983212168987767, "grad_norm": 0.644955335761659, "learning_rate": 1.1148742903487431e-05, "loss": 0.6021, "step": 17068 }, { "epoch": 0.49835041312662404, "grad_norm": 0.6706316252460185, "learning_rate": 1.1148094079480943e-05, "loss": 0.6863, "step": 17069 }, { "epoch": 0.4983796093544714, "grad_norm": 0.633230013247843, "learning_rate": 1.1147445255474453e-05, "loss": 0.5996, "step": 17070 }, { "epoch": 0.49840880558231876, "grad_norm": 0.6406479674837331, "learning_rate": 1.1146796431467965e-05, "loss": 0.5856, "step": 17071 }, { "epoch": 0.4984380018101661, "grad_norm": 0.641617785244576, "learning_rate": 1.1146147607461477e-05, "loss": 0.6107, "step": 17072 }, { "epoch": 0.4984671980380135, "grad_norm": 0.6613364788539192, "learning_rate": 1.114549878345499e-05, "loss": 0.5252, "step": 17073 }, { "epoch": 0.49849639426586084, "grad_norm": 0.6649410766330198, "learning_rate": 1.11448499594485e-05, "loss": 0.6543, "step": 17074 }, { "epoch": 0.4985255904937082, "grad_norm": 0.594349410433084, "learning_rate": 1.1144201135442012e-05, "loss": 0.5724, "step": 17075 }, { "epoch": 0.49855478672155557, "grad_norm": 0.6182249793697898, "learning_rate": 1.1143552311435524e-05, "loss": 0.5387, "step": 17076 }, { "epoch": 0.4985839829494029, "grad_norm": 0.6728331674509901, "learning_rate": 1.1142903487429036e-05, "loss": 0.6803, "step": 17077 }, { "epoch": 0.4986131791772503, "grad_norm": 0.6709579451709172, "learning_rate": 1.1142254663422546e-05, "loss": 0.6645, "step": 17078 }, { "epoch": 0.49864237540509765, "grad_norm": 0.6863148323563414, "learning_rate": 1.1141605839416058e-05, "loss": 0.6119, "step": 17079 }, { "epoch": 0.498671571632945, "grad_norm": 0.6708015456112479, "learning_rate": 1.114095701540957e-05, "loss": 0.6079, "step": 17080 }, { "epoch": 0.49870076786079237, "grad_norm": 0.6686285196640597, "learning_rate": 1.1140308191403082e-05, "loss": 0.6595, "step": 17081 }, { "epoch": 0.49872996408863973, "grad_norm": 0.6952594620872051, "learning_rate": 1.1139659367396594e-05, "loss": 0.7065, "step": 17082 }, { "epoch": 0.4987591603164871, "grad_norm": 0.6321779445685969, "learning_rate": 1.1139010543390105e-05, "loss": 0.6213, "step": 17083 }, { "epoch": 0.49878835654433445, "grad_norm": 0.6289204087642885, "learning_rate": 1.1138361719383618e-05, "loss": 0.6018, "step": 17084 }, { "epoch": 0.4988175527721818, "grad_norm": 0.6412577493117558, "learning_rate": 1.113771289537713e-05, "loss": 0.5785, "step": 17085 }, { "epoch": 0.4988467490000292, "grad_norm": 0.6705728468365195, "learning_rate": 1.1137064071370642e-05, "loss": 0.6668, "step": 17086 }, { "epoch": 0.49887594522787654, "grad_norm": 0.5922503511777588, "learning_rate": 1.1136415247364154e-05, "loss": 0.5255, "step": 17087 }, { "epoch": 0.4989051414557239, "grad_norm": 0.6343148469554708, "learning_rate": 1.1135766423357666e-05, "loss": 0.6023, "step": 17088 }, { "epoch": 0.49893433768357126, "grad_norm": 0.6295147074369807, "learning_rate": 1.1135117599351177e-05, "loss": 0.5778, "step": 17089 }, { "epoch": 0.4989635339114186, "grad_norm": 0.6636629032490986, "learning_rate": 1.1134468775344689e-05, "loss": 0.6663, "step": 17090 }, { "epoch": 0.498992730139266, "grad_norm": 0.5779928688103875, "learning_rate": 1.1133819951338201e-05, "loss": 0.5385, "step": 17091 }, { "epoch": 0.49902192636711334, "grad_norm": 0.6624842809957928, "learning_rate": 1.1133171127331713e-05, "loss": 0.6568, "step": 17092 }, { "epoch": 0.4990511225949607, "grad_norm": 0.5921511032967745, "learning_rate": 1.1132522303325223e-05, "loss": 0.515, "step": 17093 }, { "epoch": 0.49908031882280807, "grad_norm": 0.6248579607362346, "learning_rate": 1.1131873479318735e-05, "loss": 0.5988, "step": 17094 }, { "epoch": 0.49910951505065543, "grad_norm": 0.7018398715565067, "learning_rate": 1.1131224655312247e-05, "loss": 0.6581, "step": 17095 }, { "epoch": 0.49913871127850284, "grad_norm": 0.602646782193403, "learning_rate": 1.113057583130576e-05, "loss": 0.553, "step": 17096 }, { "epoch": 0.4991679075063502, "grad_norm": 0.6245324721911863, "learning_rate": 1.112992700729927e-05, "loss": 0.6122, "step": 17097 }, { "epoch": 0.49919710373419757, "grad_norm": 0.6558753826394023, "learning_rate": 1.1129278183292782e-05, "loss": 0.6136, "step": 17098 }, { "epoch": 0.49922629996204493, "grad_norm": 0.6304077312028635, "learning_rate": 1.1128629359286294e-05, "loss": 0.5898, "step": 17099 }, { "epoch": 0.4992554961898923, "grad_norm": 0.7202853329404308, "learning_rate": 1.1127980535279806e-05, "loss": 0.6174, "step": 17100 }, { "epoch": 0.49928469241773965, "grad_norm": 0.6694603619330982, "learning_rate": 1.1127331711273318e-05, "loss": 0.5811, "step": 17101 }, { "epoch": 0.499313888645587, "grad_norm": 0.6213149186294947, "learning_rate": 1.1126682887266831e-05, "loss": 0.5781, "step": 17102 }, { "epoch": 0.4993430848734344, "grad_norm": 0.661509279831883, "learning_rate": 1.1126034063260342e-05, "loss": 0.6265, "step": 17103 }, { "epoch": 0.49937228110128173, "grad_norm": 0.6633876077366785, "learning_rate": 1.1125385239253854e-05, "loss": 0.6288, "step": 17104 }, { "epoch": 0.4994014773291291, "grad_norm": 0.6298244350331365, "learning_rate": 1.1124736415247366e-05, "loss": 0.5656, "step": 17105 }, { "epoch": 0.49943067355697646, "grad_norm": 0.7872574399447542, "learning_rate": 1.1124087591240878e-05, "loss": 0.6351, "step": 17106 }, { "epoch": 0.4994598697848238, "grad_norm": 0.6414551901593072, "learning_rate": 1.112343876723439e-05, "loss": 0.5834, "step": 17107 }, { "epoch": 0.4994890660126712, "grad_norm": 0.664405628946743, "learning_rate": 1.11227899432279e-05, "loss": 0.6538, "step": 17108 }, { "epoch": 0.49951826224051854, "grad_norm": 0.6324442065838317, "learning_rate": 1.1122141119221412e-05, "loss": 0.5992, "step": 17109 }, { "epoch": 0.4995474584683659, "grad_norm": 0.653375413226091, "learning_rate": 1.1121492295214924e-05, "loss": 0.6047, "step": 17110 }, { "epoch": 0.49957665469621326, "grad_norm": 0.7304366918061469, "learning_rate": 1.1120843471208436e-05, "loss": 0.6305, "step": 17111 }, { "epoch": 0.4996058509240606, "grad_norm": 0.6460358416327493, "learning_rate": 1.1120194647201947e-05, "loss": 0.612, "step": 17112 }, { "epoch": 0.499635047151908, "grad_norm": 0.6587036625799674, "learning_rate": 1.1119545823195459e-05, "loss": 0.6578, "step": 17113 }, { "epoch": 0.49966424337975535, "grad_norm": 0.6369503071711419, "learning_rate": 1.111889699918897e-05, "loss": 0.6244, "step": 17114 }, { "epoch": 0.4996934396076027, "grad_norm": 0.6111755915887294, "learning_rate": 1.1118248175182483e-05, "loss": 0.5582, "step": 17115 }, { "epoch": 0.49972263583545007, "grad_norm": 0.6554516287553942, "learning_rate": 1.1117599351175993e-05, "loss": 0.592, "step": 17116 }, { "epoch": 0.49975183206329743, "grad_norm": 0.6171395857847236, "learning_rate": 1.1116950527169505e-05, "loss": 0.5714, "step": 17117 }, { "epoch": 0.4997810282911448, "grad_norm": 0.644365812508399, "learning_rate": 1.1116301703163017e-05, "loss": 0.6437, "step": 17118 }, { "epoch": 0.49981022451899215, "grad_norm": 0.6591632578765076, "learning_rate": 1.1115652879156529e-05, "loss": 0.6254, "step": 17119 }, { "epoch": 0.4998394207468395, "grad_norm": 0.6546994479823053, "learning_rate": 1.1115004055150041e-05, "loss": 0.6492, "step": 17120 }, { "epoch": 0.4998686169746869, "grad_norm": 0.6250395121653077, "learning_rate": 1.1114355231143555e-05, "loss": 0.6323, "step": 17121 }, { "epoch": 0.49989781320253424, "grad_norm": 0.6899697451444046, "learning_rate": 1.1113706407137065e-05, "loss": 0.651, "step": 17122 }, { "epoch": 0.4999270094303816, "grad_norm": 0.650003895094803, "learning_rate": 1.1113057583130577e-05, "loss": 0.6385, "step": 17123 }, { "epoch": 0.49995620565822896, "grad_norm": 0.6632419200910729, "learning_rate": 1.111240875912409e-05, "loss": 0.6891, "step": 17124 }, { "epoch": 0.4999854018860763, "grad_norm": 0.7258190553958432, "learning_rate": 1.1111759935117601e-05, "loss": 0.6124, "step": 17125 }, { "epoch": 0.5000145981139237, "grad_norm": 0.5949206598484149, "learning_rate": 1.1111111111111113e-05, "loss": 0.5258, "step": 17126 }, { "epoch": 0.500043794341771, "grad_norm": 0.6548526524528758, "learning_rate": 1.1110462287104624e-05, "loss": 0.6289, "step": 17127 }, { "epoch": 0.5000729905696184, "grad_norm": 0.6556170788019147, "learning_rate": 1.1109813463098136e-05, "loss": 0.6363, "step": 17128 }, { "epoch": 0.5001021867974658, "grad_norm": 0.6221974097531695, "learning_rate": 1.1109164639091648e-05, "loss": 0.5996, "step": 17129 }, { "epoch": 0.5001313830253131, "grad_norm": 0.6398213334316869, "learning_rate": 1.110851581508516e-05, "loss": 0.6483, "step": 17130 }, { "epoch": 0.5001605792531605, "grad_norm": 0.643323607398361, "learning_rate": 1.110786699107867e-05, "loss": 0.6599, "step": 17131 }, { "epoch": 0.5001897754810078, "grad_norm": 0.6495557846264219, "learning_rate": 1.1107218167072182e-05, "loss": 0.6084, "step": 17132 }, { "epoch": 0.5002189717088552, "grad_norm": 0.6301165657920863, "learning_rate": 1.1106569343065694e-05, "loss": 0.5594, "step": 17133 }, { "epoch": 0.5002481679367026, "grad_norm": 0.6416241707245289, "learning_rate": 1.1105920519059206e-05, "loss": 0.6057, "step": 17134 }, { "epoch": 0.5002773641645499, "grad_norm": 0.7044105239838536, "learning_rate": 1.1105271695052717e-05, "loss": 0.6121, "step": 17135 }, { "epoch": 0.5003065603923973, "grad_norm": 0.7108414614028011, "learning_rate": 1.1104622871046229e-05, "loss": 0.7026, "step": 17136 }, { "epoch": 0.5003357566202447, "grad_norm": 0.6266567312472852, "learning_rate": 1.110397404703974e-05, "loss": 0.6285, "step": 17137 }, { "epoch": 0.500364952848092, "grad_norm": 0.6413219322297641, "learning_rate": 1.1103325223033253e-05, "loss": 0.6252, "step": 17138 }, { "epoch": 0.5003941490759394, "grad_norm": 0.6415898437151188, "learning_rate": 1.1102676399026765e-05, "loss": 0.6102, "step": 17139 }, { "epoch": 0.5004233453037867, "grad_norm": 0.6579360902928354, "learning_rate": 1.1102027575020278e-05, "loss": 0.6075, "step": 17140 }, { "epoch": 0.5004525415316341, "grad_norm": 0.6958132236597371, "learning_rate": 1.1101378751013789e-05, "loss": 0.6884, "step": 17141 }, { "epoch": 0.5004817377594815, "grad_norm": 0.6254008804657348, "learning_rate": 1.11007299270073e-05, "loss": 0.5939, "step": 17142 }, { "epoch": 0.5005109339873288, "grad_norm": 0.6580673082422966, "learning_rate": 1.1100081103000813e-05, "loss": 0.6224, "step": 17143 }, { "epoch": 0.5005401302151762, "grad_norm": 0.6719461074196194, "learning_rate": 1.1099432278994325e-05, "loss": 0.6488, "step": 17144 }, { "epoch": 0.5005693264430235, "grad_norm": 0.6481021917916924, "learning_rate": 1.1098783454987837e-05, "loss": 0.6092, "step": 17145 }, { "epoch": 0.5005985226708709, "grad_norm": 0.6270304239720798, "learning_rate": 1.1098134630981347e-05, "loss": 0.6027, "step": 17146 }, { "epoch": 0.5006277188987183, "grad_norm": 0.6557530392062968, "learning_rate": 1.1097485806974859e-05, "loss": 0.5996, "step": 17147 }, { "epoch": 0.5006569151265656, "grad_norm": 0.6644157109817456, "learning_rate": 1.1096836982968371e-05, "loss": 0.6371, "step": 17148 }, { "epoch": 0.500686111354413, "grad_norm": 0.6867966183986972, "learning_rate": 1.1096188158961883e-05, "loss": 0.6445, "step": 17149 }, { "epoch": 0.5007153075822603, "grad_norm": 0.7371544330582671, "learning_rate": 1.1095539334955394e-05, "loss": 0.6033, "step": 17150 }, { "epoch": 0.5007445038101077, "grad_norm": 0.6575085858990508, "learning_rate": 1.1094890510948906e-05, "loss": 0.6425, "step": 17151 }, { "epoch": 0.5007737000379551, "grad_norm": 0.6655336754767067, "learning_rate": 1.1094241686942418e-05, "loss": 0.671, "step": 17152 }, { "epoch": 0.5008028962658024, "grad_norm": 0.6530258558643526, "learning_rate": 1.109359286293593e-05, "loss": 0.6662, "step": 17153 }, { "epoch": 0.5008320924936498, "grad_norm": 0.6795746166639353, "learning_rate": 1.109294403892944e-05, "loss": 0.607, "step": 17154 }, { "epoch": 0.5008612887214972, "grad_norm": 0.6263127684754897, "learning_rate": 1.1092295214922952e-05, "loss": 0.6263, "step": 17155 }, { "epoch": 0.5008904849493445, "grad_norm": 0.6351463530287699, "learning_rate": 1.1091646390916464e-05, "loss": 0.5925, "step": 17156 }, { "epoch": 0.5009196811771919, "grad_norm": 0.6527208255493144, "learning_rate": 1.1090997566909976e-05, "loss": 0.6165, "step": 17157 }, { "epoch": 0.5009488774050392, "grad_norm": 0.6432335087610942, "learning_rate": 1.1090348742903488e-05, "loss": 0.5935, "step": 17158 }, { "epoch": 0.5009780736328866, "grad_norm": 0.680572111105721, "learning_rate": 1.1089699918897002e-05, "loss": 0.6713, "step": 17159 }, { "epoch": 0.501007269860734, "grad_norm": 0.6211548507618635, "learning_rate": 1.1089051094890512e-05, "loss": 0.6099, "step": 17160 }, { "epoch": 0.5010364660885813, "grad_norm": 0.6746116341607232, "learning_rate": 1.1088402270884024e-05, "loss": 0.6146, "step": 17161 }, { "epoch": 0.5010656623164287, "grad_norm": 0.7353888700589304, "learning_rate": 1.1087753446877536e-05, "loss": 0.636, "step": 17162 }, { "epoch": 0.501094858544276, "grad_norm": 0.6676932987557054, "learning_rate": 1.1087104622871048e-05, "loss": 0.6726, "step": 17163 }, { "epoch": 0.5011240547721234, "grad_norm": 0.6042728218622948, "learning_rate": 1.108645579886456e-05, "loss": 0.5365, "step": 17164 }, { "epoch": 0.5011532509999708, "grad_norm": 0.6652120574949719, "learning_rate": 1.108580697485807e-05, "loss": 0.6208, "step": 17165 }, { "epoch": 0.5011824472278181, "grad_norm": 0.6538813809362124, "learning_rate": 1.1085158150851583e-05, "loss": 0.6609, "step": 17166 }, { "epoch": 0.5012116434556655, "grad_norm": 0.6728327065232954, "learning_rate": 1.1084509326845095e-05, "loss": 0.6345, "step": 17167 }, { "epoch": 0.5012408396835129, "grad_norm": 0.664804444273191, "learning_rate": 1.1083860502838607e-05, "loss": 0.6238, "step": 17168 }, { "epoch": 0.5012700359113602, "grad_norm": 0.6077475968924261, "learning_rate": 1.1083211678832117e-05, "loss": 0.5779, "step": 17169 }, { "epoch": 0.5012992321392076, "grad_norm": 0.6799824687473907, "learning_rate": 1.1082562854825629e-05, "loss": 0.5928, "step": 17170 }, { "epoch": 0.5013284283670549, "grad_norm": 0.634267295351373, "learning_rate": 1.1081914030819141e-05, "loss": 0.5923, "step": 17171 }, { "epoch": 0.5013576245949023, "grad_norm": 0.7024507620743703, "learning_rate": 1.1081265206812653e-05, "loss": 0.6774, "step": 17172 }, { "epoch": 0.5013868208227497, "grad_norm": 0.6572270508587692, "learning_rate": 1.1080616382806163e-05, "loss": 0.6311, "step": 17173 }, { "epoch": 0.501416017050597, "grad_norm": 0.6150555515603208, "learning_rate": 1.1079967558799675e-05, "loss": 0.5713, "step": 17174 }, { "epoch": 0.5014452132784444, "grad_norm": 0.6001713764785892, "learning_rate": 1.1079318734793187e-05, "loss": 0.5701, "step": 17175 }, { "epoch": 0.5014744095062917, "grad_norm": 0.6358457570121577, "learning_rate": 1.10786699107867e-05, "loss": 0.619, "step": 17176 }, { "epoch": 0.5015036057341391, "grad_norm": 0.6850975775411612, "learning_rate": 1.1078021086780211e-05, "loss": 0.6595, "step": 17177 }, { "epoch": 0.5015328019619865, "grad_norm": 0.6643555081481092, "learning_rate": 1.1077372262773725e-05, "loss": 0.6412, "step": 17178 }, { "epoch": 0.5015619981898338, "grad_norm": 0.6715830544672556, "learning_rate": 1.1076723438767236e-05, "loss": 0.5638, "step": 17179 }, { "epoch": 0.5015911944176812, "grad_norm": 0.6227511169900193, "learning_rate": 1.1076074614760748e-05, "loss": 0.5738, "step": 17180 }, { "epoch": 0.5016203906455285, "grad_norm": 0.7154199367072062, "learning_rate": 1.107542579075426e-05, "loss": 0.571, "step": 17181 }, { "epoch": 0.5016495868733759, "grad_norm": 0.6550093369625437, "learning_rate": 1.1074776966747772e-05, "loss": 0.6105, "step": 17182 }, { "epoch": 0.5016787831012233, "grad_norm": 0.6376971782617065, "learning_rate": 1.1074128142741282e-05, "loss": 0.599, "step": 17183 }, { "epoch": 0.5017079793290706, "grad_norm": 0.6440444782314505, "learning_rate": 1.1073479318734794e-05, "loss": 0.6017, "step": 17184 }, { "epoch": 0.501737175556918, "grad_norm": 0.6445779569596701, "learning_rate": 1.1072830494728306e-05, "loss": 0.6531, "step": 17185 }, { "epoch": 0.5017663717847655, "grad_norm": 0.665368801005048, "learning_rate": 1.1072181670721818e-05, "loss": 0.6492, "step": 17186 }, { "epoch": 0.5017955680126128, "grad_norm": 0.6617152192887726, "learning_rate": 1.107153284671533e-05, "loss": 0.6877, "step": 17187 }, { "epoch": 0.5018247642404602, "grad_norm": 0.6293921969705074, "learning_rate": 1.107088402270884e-05, "loss": 0.6445, "step": 17188 }, { "epoch": 0.5018539604683075, "grad_norm": 0.6370075147846723, "learning_rate": 1.1070235198702352e-05, "loss": 0.6138, "step": 17189 }, { "epoch": 0.5018831566961549, "grad_norm": 0.5921257636962185, "learning_rate": 1.1069586374695864e-05, "loss": 0.5509, "step": 17190 }, { "epoch": 0.5019123529240023, "grad_norm": 0.6160423208232201, "learning_rate": 1.1068937550689376e-05, "loss": 0.5821, "step": 17191 }, { "epoch": 0.5019415491518496, "grad_norm": 0.6436555083692079, "learning_rate": 1.1068288726682887e-05, "loss": 0.6169, "step": 17192 }, { "epoch": 0.501970745379697, "grad_norm": 0.6029088697939954, "learning_rate": 1.1067639902676399e-05, "loss": 0.5487, "step": 17193 }, { "epoch": 0.5019999416075444, "grad_norm": 0.6143982152976714, "learning_rate": 1.1066991078669911e-05, "loss": 0.5292, "step": 17194 }, { "epoch": 0.5020291378353917, "grad_norm": 0.7193247938659505, "learning_rate": 1.1066342254663423e-05, "loss": 0.6113, "step": 17195 }, { "epoch": 0.5020583340632391, "grad_norm": 0.6450588423020525, "learning_rate": 1.1065693430656935e-05, "loss": 0.6312, "step": 17196 }, { "epoch": 0.5020875302910864, "grad_norm": 0.6407197604035636, "learning_rate": 1.1065044606650449e-05, "loss": 0.5797, "step": 17197 }, { "epoch": 0.5021167265189338, "grad_norm": 0.611066888279525, "learning_rate": 1.1064395782643959e-05, "loss": 0.5699, "step": 17198 }, { "epoch": 0.5021459227467812, "grad_norm": 0.6522730440462329, "learning_rate": 1.1063746958637471e-05, "loss": 0.667, "step": 17199 }, { "epoch": 0.5021751189746285, "grad_norm": 0.6721483300079066, "learning_rate": 1.1063098134630983e-05, "loss": 0.6561, "step": 17200 }, { "epoch": 0.5022043152024759, "grad_norm": 0.6253143928160951, "learning_rate": 1.1062449310624495e-05, "loss": 0.5984, "step": 17201 }, { "epoch": 0.5022335114303232, "grad_norm": 0.6202069922677965, "learning_rate": 1.1061800486618005e-05, "loss": 0.5874, "step": 17202 }, { "epoch": 0.5022627076581706, "grad_norm": 0.6107273164247009, "learning_rate": 1.1061151662611517e-05, "loss": 0.5847, "step": 17203 }, { "epoch": 0.502291903886018, "grad_norm": 0.7093844717186931, "learning_rate": 1.106050283860503e-05, "loss": 0.6206, "step": 17204 }, { "epoch": 0.5023211001138653, "grad_norm": 0.6312493597773482, "learning_rate": 1.1059854014598541e-05, "loss": 0.5716, "step": 17205 }, { "epoch": 0.5023502963417127, "grad_norm": 0.613028375133155, "learning_rate": 1.1059205190592053e-05, "loss": 0.5742, "step": 17206 }, { "epoch": 0.50237949256956, "grad_norm": 0.6196945582886972, "learning_rate": 1.1058556366585564e-05, "loss": 0.5872, "step": 17207 }, { "epoch": 0.5024086887974074, "grad_norm": 0.6555175211658054, "learning_rate": 1.1057907542579076e-05, "loss": 0.701, "step": 17208 }, { "epoch": 0.5024378850252548, "grad_norm": 0.6423345476138705, "learning_rate": 1.1057258718572588e-05, "loss": 0.6143, "step": 17209 }, { "epoch": 0.5024670812531021, "grad_norm": 0.6098388816820675, "learning_rate": 1.10566098945661e-05, "loss": 0.5479, "step": 17210 }, { "epoch": 0.5024962774809495, "grad_norm": 0.7039840844976442, "learning_rate": 1.105596107055961e-05, "loss": 0.6868, "step": 17211 }, { "epoch": 0.5025254737087969, "grad_norm": 0.6236542903292137, "learning_rate": 1.1055312246553122e-05, "loss": 0.5404, "step": 17212 }, { "epoch": 0.5025546699366442, "grad_norm": 0.6727339380041683, "learning_rate": 1.1054663422546634e-05, "loss": 0.5767, "step": 17213 }, { "epoch": 0.5025838661644916, "grad_norm": 0.6649918551359503, "learning_rate": 1.1054014598540146e-05, "loss": 0.6355, "step": 17214 }, { "epoch": 0.5026130623923389, "grad_norm": 0.6265385326208207, "learning_rate": 1.1053365774533658e-05, "loss": 0.5914, "step": 17215 }, { "epoch": 0.5026422586201863, "grad_norm": 0.7289769980598505, "learning_rate": 1.1052716950527172e-05, "loss": 0.7686, "step": 17216 }, { "epoch": 0.5026714548480337, "grad_norm": 0.6726087676055181, "learning_rate": 1.1052068126520682e-05, "loss": 0.6709, "step": 17217 }, { "epoch": 0.502700651075881, "grad_norm": 0.6500422682683624, "learning_rate": 1.1051419302514194e-05, "loss": 0.6292, "step": 17218 }, { "epoch": 0.5027298473037284, "grad_norm": 0.6693394785947149, "learning_rate": 1.1050770478507706e-05, "loss": 0.6459, "step": 17219 }, { "epoch": 0.5027590435315757, "grad_norm": 0.6905457830946345, "learning_rate": 1.1050121654501218e-05, "loss": 0.6344, "step": 17220 }, { "epoch": 0.5027882397594231, "grad_norm": 0.5969651180435686, "learning_rate": 1.1049472830494729e-05, "loss": 0.5156, "step": 17221 }, { "epoch": 0.5028174359872705, "grad_norm": 0.6825372257713602, "learning_rate": 1.104882400648824e-05, "loss": 0.6458, "step": 17222 }, { "epoch": 0.5028466322151178, "grad_norm": 0.708573175975656, "learning_rate": 1.1048175182481753e-05, "loss": 0.6673, "step": 17223 }, { "epoch": 0.5028758284429652, "grad_norm": 0.670303076444812, "learning_rate": 1.1047526358475265e-05, "loss": 0.6333, "step": 17224 }, { "epoch": 0.5029050246708126, "grad_norm": 0.6888977233850545, "learning_rate": 1.1046877534468777e-05, "loss": 0.6108, "step": 17225 }, { "epoch": 0.5029342208986599, "grad_norm": 0.6195938692710183, "learning_rate": 1.1046228710462287e-05, "loss": 0.5774, "step": 17226 }, { "epoch": 0.5029634171265073, "grad_norm": 0.696427723605787, "learning_rate": 1.10455798864558e-05, "loss": 0.655, "step": 17227 }, { "epoch": 0.5029926133543546, "grad_norm": 0.6759596088789586, "learning_rate": 1.1044931062449311e-05, "loss": 0.6834, "step": 17228 }, { "epoch": 0.503021809582202, "grad_norm": 0.6755453297566185, "learning_rate": 1.1044282238442823e-05, "loss": 0.6797, "step": 17229 }, { "epoch": 0.5030510058100494, "grad_norm": 0.6337458439318395, "learning_rate": 1.1043633414436334e-05, "loss": 0.5847, "step": 17230 }, { "epoch": 0.5030802020378967, "grad_norm": 0.64579847935984, "learning_rate": 1.1042984590429846e-05, "loss": 0.6404, "step": 17231 }, { "epoch": 0.5031093982657441, "grad_norm": 0.6585696583389198, "learning_rate": 1.1042335766423358e-05, "loss": 0.5882, "step": 17232 }, { "epoch": 0.5031385944935914, "grad_norm": 0.6681956424622891, "learning_rate": 1.104168694241687e-05, "loss": 0.6451, "step": 17233 }, { "epoch": 0.5031677907214388, "grad_norm": 0.6573038249055007, "learning_rate": 1.1041038118410382e-05, "loss": 0.6367, "step": 17234 }, { "epoch": 0.5031969869492862, "grad_norm": 0.6219881546722863, "learning_rate": 1.1040389294403896e-05, "loss": 0.5727, "step": 17235 }, { "epoch": 0.5032261831771335, "grad_norm": 0.6561530770173022, "learning_rate": 1.1039740470397406e-05, "loss": 0.6385, "step": 17236 }, { "epoch": 0.5032553794049809, "grad_norm": 0.6005604194198565, "learning_rate": 1.1039091646390918e-05, "loss": 0.5404, "step": 17237 }, { "epoch": 0.5032845756328282, "grad_norm": 0.6551021301290699, "learning_rate": 1.103844282238443e-05, "loss": 0.6355, "step": 17238 }, { "epoch": 0.5033137718606756, "grad_norm": 0.6546157026232872, "learning_rate": 1.1037793998377942e-05, "loss": 0.5899, "step": 17239 }, { "epoch": 0.503342968088523, "grad_norm": 0.6375176731559707, "learning_rate": 1.1037145174371452e-05, "loss": 0.5779, "step": 17240 }, { "epoch": 0.5033721643163703, "grad_norm": 0.6641207363740275, "learning_rate": 1.1036496350364964e-05, "loss": 0.6593, "step": 17241 }, { "epoch": 0.5034013605442177, "grad_norm": 0.6147707609012351, "learning_rate": 1.1035847526358476e-05, "loss": 0.5648, "step": 17242 }, { "epoch": 0.503430556772065, "grad_norm": 0.6302292083349779, "learning_rate": 1.1035198702351988e-05, "loss": 0.5996, "step": 17243 }, { "epoch": 0.5034597529999124, "grad_norm": 0.6756608568591713, "learning_rate": 1.10345498783455e-05, "loss": 0.6899, "step": 17244 }, { "epoch": 0.5034889492277598, "grad_norm": 0.6452467516581639, "learning_rate": 1.103390105433901e-05, "loss": 0.6356, "step": 17245 }, { "epoch": 0.5035181454556071, "grad_norm": 0.5986469803827749, "learning_rate": 1.1033252230332523e-05, "loss": 0.5361, "step": 17246 }, { "epoch": 0.5035473416834545, "grad_norm": 0.692665171483763, "learning_rate": 1.1032603406326035e-05, "loss": 0.6817, "step": 17247 }, { "epoch": 0.5035765379113019, "grad_norm": 0.6628962853962047, "learning_rate": 1.1031954582319547e-05, "loss": 0.6207, "step": 17248 }, { "epoch": 0.5036057341391492, "grad_norm": 0.6889177293769726, "learning_rate": 1.1031305758313057e-05, "loss": 0.7187, "step": 17249 }, { "epoch": 0.5036349303669966, "grad_norm": 0.6128664184765344, "learning_rate": 1.1030656934306569e-05, "loss": 0.5772, "step": 17250 }, { "epoch": 0.5036641265948439, "grad_norm": 0.653546688667018, "learning_rate": 1.1030008110300081e-05, "loss": 0.5993, "step": 17251 }, { "epoch": 0.5036933228226913, "grad_norm": 0.6165365062679076, "learning_rate": 1.1029359286293593e-05, "loss": 0.5869, "step": 17252 }, { "epoch": 0.5037225190505387, "grad_norm": 0.6232661330882097, "learning_rate": 1.1028710462287107e-05, "loss": 0.5994, "step": 17253 }, { "epoch": 0.503751715278386, "grad_norm": 0.624819453061591, "learning_rate": 1.1028061638280619e-05, "loss": 0.5948, "step": 17254 }, { "epoch": 0.5037809115062334, "grad_norm": 0.6456701944013388, "learning_rate": 1.102741281427413e-05, "loss": 0.6604, "step": 17255 }, { "epoch": 0.5038101077340807, "grad_norm": 0.6552924210999815, "learning_rate": 1.1026763990267641e-05, "loss": 0.6017, "step": 17256 }, { "epoch": 0.5038393039619281, "grad_norm": 0.6861521709889281, "learning_rate": 1.1026115166261153e-05, "loss": 0.681, "step": 17257 }, { "epoch": 0.5038685001897755, "grad_norm": 0.6281798482229489, "learning_rate": 1.1025466342254665e-05, "loss": 0.6022, "step": 17258 }, { "epoch": 0.5038976964176228, "grad_norm": 0.6176580439736146, "learning_rate": 1.1024817518248176e-05, "loss": 0.5635, "step": 17259 }, { "epoch": 0.5039268926454702, "grad_norm": 0.6629415348380532, "learning_rate": 1.1024168694241688e-05, "loss": 0.5907, "step": 17260 }, { "epoch": 0.5039560888733176, "grad_norm": 0.6890879366951854, "learning_rate": 1.10235198702352e-05, "loss": 0.6884, "step": 17261 }, { "epoch": 0.5039852851011649, "grad_norm": 0.6488288482968989, "learning_rate": 1.1022871046228712e-05, "loss": 0.6182, "step": 17262 }, { "epoch": 0.5040144813290123, "grad_norm": 0.6123505368078076, "learning_rate": 1.1022222222222224e-05, "loss": 0.5649, "step": 17263 }, { "epoch": 0.5040436775568596, "grad_norm": 0.6710682637413166, "learning_rate": 1.1021573398215734e-05, "loss": 0.6521, "step": 17264 }, { "epoch": 0.504072873784707, "grad_norm": 0.6599887106233732, "learning_rate": 1.1020924574209246e-05, "loss": 0.6305, "step": 17265 }, { "epoch": 0.5041020700125544, "grad_norm": 0.6239593448183358, "learning_rate": 1.1020275750202758e-05, "loss": 0.6199, "step": 17266 }, { "epoch": 0.5041312662404017, "grad_norm": 0.6523825966783519, "learning_rate": 1.101962692619627e-05, "loss": 0.6264, "step": 17267 }, { "epoch": 0.5041604624682491, "grad_norm": 0.6160669217612419, "learning_rate": 1.101897810218978e-05, "loss": 0.5488, "step": 17268 }, { "epoch": 0.5041896586960964, "grad_norm": 0.6117065347244368, "learning_rate": 1.1018329278183293e-05, "loss": 0.5637, "step": 17269 }, { "epoch": 0.5042188549239438, "grad_norm": 0.7484599956517576, "learning_rate": 1.1017680454176805e-05, "loss": 0.6893, "step": 17270 }, { "epoch": 0.5042480511517912, "grad_norm": 0.6411890401436116, "learning_rate": 1.1017031630170317e-05, "loss": 0.5695, "step": 17271 }, { "epoch": 0.5042772473796385, "grad_norm": 0.6765747699668335, "learning_rate": 1.101638280616383e-05, "loss": 0.6776, "step": 17272 }, { "epoch": 0.5043064436074859, "grad_norm": 0.6385683360111222, "learning_rate": 1.1015733982157342e-05, "loss": 0.5776, "step": 17273 }, { "epoch": 0.5043356398353332, "grad_norm": 0.6467527214408351, "learning_rate": 1.1015085158150853e-05, "loss": 0.6457, "step": 17274 }, { "epoch": 0.5043648360631806, "grad_norm": 0.6742098715717999, "learning_rate": 1.1014436334144365e-05, "loss": 0.6338, "step": 17275 }, { "epoch": 0.504394032291028, "grad_norm": 0.6775074634861822, "learning_rate": 1.1013787510137877e-05, "loss": 0.7084, "step": 17276 }, { "epoch": 0.5044232285188753, "grad_norm": 0.6249963981350539, "learning_rate": 1.1013138686131389e-05, "loss": 0.6056, "step": 17277 }, { "epoch": 0.5044524247467227, "grad_norm": 0.6706262042286115, "learning_rate": 1.1012489862124899e-05, "loss": 0.6511, "step": 17278 }, { "epoch": 0.50448162097457, "grad_norm": 0.6646839181541057, "learning_rate": 1.1011841038118411e-05, "loss": 0.6486, "step": 17279 }, { "epoch": 0.5045108172024174, "grad_norm": 0.6419456822815859, "learning_rate": 1.1011192214111923e-05, "loss": 0.656, "step": 17280 }, { "epoch": 0.5045400134302648, "grad_norm": 0.6360417899173798, "learning_rate": 1.1010543390105435e-05, "loss": 0.5828, "step": 17281 }, { "epoch": 0.5045692096581121, "grad_norm": 0.6232068111309089, "learning_rate": 1.1009894566098947e-05, "loss": 0.582, "step": 17282 }, { "epoch": 0.5045984058859595, "grad_norm": 0.6435219211306848, "learning_rate": 1.1009245742092458e-05, "loss": 0.6068, "step": 17283 }, { "epoch": 0.5046276021138069, "grad_norm": 0.635735965231202, "learning_rate": 1.100859691808597e-05, "loss": 0.6432, "step": 17284 }, { "epoch": 0.5046567983416542, "grad_norm": 0.6287747036295177, "learning_rate": 1.1007948094079482e-05, "loss": 0.6046, "step": 17285 }, { "epoch": 0.5046859945695016, "grad_norm": 0.6103527735711175, "learning_rate": 1.1007299270072994e-05, "loss": 0.5848, "step": 17286 }, { "epoch": 0.504715190797349, "grad_norm": 0.7081576534339429, "learning_rate": 1.1006650446066504e-05, "loss": 0.6721, "step": 17287 }, { "epoch": 0.5047443870251963, "grad_norm": 0.6487440685046647, "learning_rate": 1.1006001622060016e-05, "loss": 0.5817, "step": 17288 }, { "epoch": 0.5047735832530437, "grad_norm": 0.6742660507324791, "learning_rate": 1.1005352798053528e-05, "loss": 0.6381, "step": 17289 }, { "epoch": 0.504802779480891, "grad_norm": 0.5913190410214277, "learning_rate": 1.100470397404704e-05, "loss": 0.5503, "step": 17290 }, { "epoch": 0.5048319757087384, "grad_norm": 0.5938041852329858, "learning_rate": 1.1004055150040554e-05, "loss": 0.5595, "step": 17291 }, { "epoch": 0.5048611719365858, "grad_norm": 0.6947327098117769, "learning_rate": 1.1003406326034066e-05, "loss": 0.6775, "step": 17292 }, { "epoch": 0.5048903681644331, "grad_norm": 0.6409974565358679, "learning_rate": 1.1002757502027576e-05, "loss": 0.6203, "step": 17293 }, { "epoch": 0.5049195643922805, "grad_norm": 0.6295343834160924, "learning_rate": 1.1002108678021088e-05, "loss": 0.5765, "step": 17294 }, { "epoch": 0.5049487606201278, "grad_norm": 0.610951986695884, "learning_rate": 1.10014598540146e-05, "loss": 0.5306, "step": 17295 }, { "epoch": 0.5049779568479752, "grad_norm": 0.6847520270989234, "learning_rate": 1.1000811030008112e-05, "loss": 0.7587, "step": 17296 }, { "epoch": 0.5050071530758226, "grad_norm": 0.6755947240922674, "learning_rate": 1.1000162206001623e-05, "loss": 0.6672, "step": 17297 }, { "epoch": 0.5050363493036699, "grad_norm": 0.6002775410489862, "learning_rate": 1.0999513381995135e-05, "loss": 0.5541, "step": 17298 }, { "epoch": 0.5050655455315173, "grad_norm": 0.5824635737047621, "learning_rate": 1.0998864557988647e-05, "loss": 0.5348, "step": 17299 }, { "epoch": 0.5050947417593646, "grad_norm": 0.6097399196028278, "learning_rate": 1.0998215733982159e-05, "loss": 0.5096, "step": 17300 }, { "epoch": 0.505123937987212, "grad_norm": 0.6177975708093877, "learning_rate": 1.099756690997567e-05, "loss": 0.6037, "step": 17301 }, { "epoch": 0.5051531342150594, "grad_norm": 0.6531607315365653, "learning_rate": 1.0996918085969181e-05, "loss": 0.6246, "step": 17302 }, { "epoch": 0.5051823304429067, "grad_norm": 0.6197531627035455, "learning_rate": 1.0996269261962693e-05, "loss": 0.5559, "step": 17303 }, { "epoch": 0.5052115266707541, "grad_norm": 0.6332636577286676, "learning_rate": 1.0995620437956205e-05, "loss": 0.6383, "step": 17304 }, { "epoch": 0.5052407228986014, "grad_norm": 0.6680852683580054, "learning_rate": 1.0994971613949717e-05, "loss": 0.7093, "step": 17305 }, { "epoch": 0.5052699191264488, "grad_norm": 0.6520055024832663, "learning_rate": 1.0994322789943227e-05, "loss": 0.6757, "step": 17306 }, { "epoch": 0.5052991153542963, "grad_norm": 0.6445277007338232, "learning_rate": 1.099367396593674e-05, "loss": 0.6131, "step": 17307 }, { "epoch": 0.5053283115821436, "grad_norm": 0.62082929432135, "learning_rate": 1.0993025141930251e-05, "loss": 0.5649, "step": 17308 }, { "epoch": 0.505357507809991, "grad_norm": 0.6576656714796317, "learning_rate": 1.0992376317923763e-05, "loss": 0.6384, "step": 17309 }, { "epoch": 0.5053867040378384, "grad_norm": 0.6717333934975799, "learning_rate": 1.0991727493917277e-05, "loss": 0.657, "step": 17310 }, { "epoch": 0.5054159002656857, "grad_norm": 0.6613475811040698, "learning_rate": 1.099107866991079e-05, "loss": 0.6728, "step": 17311 }, { "epoch": 0.5054450964935331, "grad_norm": 0.6001226151627738, "learning_rate": 1.09904298459043e-05, "loss": 0.5316, "step": 17312 }, { "epoch": 0.5054742927213804, "grad_norm": 0.631516450877029, "learning_rate": 1.0989781021897812e-05, "loss": 0.5717, "step": 17313 }, { "epoch": 0.5055034889492278, "grad_norm": 0.6347057172286193, "learning_rate": 1.0989132197891324e-05, "loss": 0.6044, "step": 17314 }, { "epoch": 0.5055326851770752, "grad_norm": 0.6330850207446553, "learning_rate": 1.0988483373884836e-05, "loss": 0.6112, "step": 17315 }, { "epoch": 0.5055618814049225, "grad_norm": 0.6950270314730624, "learning_rate": 1.0987834549878346e-05, "loss": 0.6781, "step": 17316 }, { "epoch": 0.5055910776327699, "grad_norm": 0.675395573259351, "learning_rate": 1.0987185725871858e-05, "loss": 0.6294, "step": 17317 }, { "epoch": 0.5056202738606173, "grad_norm": 0.6416055787638598, "learning_rate": 1.098653690186537e-05, "loss": 0.5689, "step": 17318 }, { "epoch": 0.5056494700884646, "grad_norm": 0.6365940875898748, "learning_rate": 1.0985888077858882e-05, "loss": 0.6117, "step": 17319 }, { "epoch": 0.505678666316312, "grad_norm": 0.6936446886134123, "learning_rate": 1.0985239253852394e-05, "loss": 0.6575, "step": 17320 }, { "epoch": 0.5057078625441593, "grad_norm": 0.6854373404417388, "learning_rate": 1.0984590429845904e-05, "loss": 0.6485, "step": 17321 }, { "epoch": 0.5057370587720067, "grad_norm": 0.6459252960444622, "learning_rate": 1.0983941605839416e-05, "loss": 0.62, "step": 17322 }, { "epoch": 0.5057662549998541, "grad_norm": 0.6064404042158137, "learning_rate": 1.0983292781832928e-05, "loss": 0.5629, "step": 17323 }, { "epoch": 0.5057954512277014, "grad_norm": 0.6028001090520126, "learning_rate": 1.098264395782644e-05, "loss": 0.5789, "step": 17324 }, { "epoch": 0.5058246474555488, "grad_norm": 0.6327009432977565, "learning_rate": 1.098199513381995e-05, "loss": 0.6069, "step": 17325 }, { "epoch": 0.5058538436833961, "grad_norm": 0.6580910645640291, "learning_rate": 1.0981346309813463e-05, "loss": 0.6262, "step": 17326 }, { "epoch": 0.5058830399112435, "grad_norm": 0.702495119383792, "learning_rate": 1.0980697485806975e-05, "loss": 0.6514, "step": 17327 }, { "epoch": 0.5059122361390909, "grad_norm": 0.675183350493323, "learning_rate": 1.0980048661800487e-05, "loss": 0.6654, "step": 17328 }, { "epoch": 0.5059414323669382, "grad_norm": 0.6518310017000047, "learning_rate": 1.0979399837794e-05, "loss": 0.6739, "step": 17329 }, { "epoch": 0.5059706285947856, "grad_norm": 0.6715899796432041, "learning_rate": 1.0978751013787513e-05, "loss": 0.6492, "step": 17330 }, { "epoch": 0.505999824822633, "grad_norm": 0.6620398834568009, "learning_rate": 1.0978102189781023e-05, "loss": 0.5881, "step": 17331 }, { "epoch": 0.5060290210504803, "grad_norm": 0.6233509763342886, "learning_rate": 1.0977453365774535e-05, "loss": 0.607, "step": 17332 }, { "epoch": 0.5060582172783277, "grad_norm": 0.6206607376471253, "learning_rate": 1.0976804541768047e-05, "loss": 0.601, "step": 17333 }, { "epoch": 0.506087413506175, "grad_norm": 0.7717743227310718, "learning_rate": 1.0976155717761559e-05, "loss": 0.6555, "step": 17334 }, { "epoch": 0.5061166097340224, "grad_norm": 0.6343948750312376, "learning_rate": 1.097550689375507e-05, "loss": 0.5754, "step": 17335 }, { "epoch": 0.5061458059618698, "grad_norm": 0.668960806453424, "learning_rate": 1.0974858069748581e-05, "loss": 0.6829, "step": 17336 }, { "epoch": 0.5061750021897171, "grad_norm": 0.6286051669101075, "learning_rate": 1.0974209245742093e-05, "loss": 0.6, "step": 17337 }, { "epoch": 0.5062041984175645, "grad_norm": 0.6773017901495254, "learning_rate": 1.0973560421735606e-05, "loss": 0.6809, "step": 17338 }, { "epoch": 0.5062333946454118, "grad_norm": 0.5979643038331999, "learning_rate": 1.0972911597729118e-05, "loss": 0.5687, "step": 17339 }, { "epoch": 0.5062625908732592, "grad_norm": 0.6304371582937592, "learning_rate": 1.0972262773722628e-05, "loss": 0.5776, "step": 17340 }, { "epoch": 0.5062917871011066, "grad_norm": 0.6738618203901253, "learning_rate": 1.097161394971614e-05, "loss": 0.618, "step": 17341 }, { "epoch": 0.5063209833289539, "grad_norm": 0.6950470459967353, "learning_rate": 1.0970965125709652e-05, "loss": 0.6633, "step": 17342 }, { "epoch": 0.5063501795568013, "grad_norm": 0.6293726373079862, "learning_rate": 1.0970316301703164e-05, "loss": 0.5648, "step": 17343 }, { "epoch": 0.5063793757846486, "grad_norm": 0.6280639884663414, "learning_rate": 1.0969667477696674e-05, "loss": 0.5508, "step": 17344 }, { "epoch": 0.506408572012496, "grad_norm": 0.586053210510449, "learning_rate": 1.0969018653690186e-05, "loss": 0.5264, "step": 17345 }, { "epoch": 0.5064377682403434, "grad_norm": 0.6270337768216003, "learning_rate": 1.0968369829683698e-05, "loss": 0.5953, "step": 17346 }, { "epoch": 0.5064669644681907, "grad_norm": 0.6267681755088913, "learning_rate": 1.096772100567721e-05, "loss": 0.5496, "step": 17347 }, { "epoch": 0.5064961606960381, "grad_norm": 0.6574503120733025, "learning_rate": 1.0967072181670724e-05, "loss": 0.6321, "step": 17348 }, { "epoch": 0.5065253569238855, "grad_norm": 0.6908540684594248, "learning_rate": 1.0966423357664236e-05, "loss": 0.6696, "step": 17349 }, { "epoch": 0.5065545531517328, "grad_norm": 0.6208429285413568, "learning_rate": 1.0965774533657746e-05, "loss": 0.5863, "step": 17350 }, { "epoch": 0.5065837493795802, "grad_norm": 0.6620312235004547, "learning_rate": 1.0965125709651258e-05, "loss": 0.6739, "step": 17351 }, { "epoch": 0.5066129456074275, "grad_norm": 0.6777503468564167, "learning_rate": 1.096447688564477e-05, "loss": 0.6206, "step": 17352 }, { "epoch": 0.5066421418352749, "grad_norm": 0.617913294540374, "learning_rate": 1.0963828061638283e-05, "loss": 0.5187, "step": 17353 }, { "epoch": 0.5066713380631223, "grad_norm": 0.6774990816890722, "learning_rate": 1.0963179237631793e-05, "loss": 0.6691, "step": 17354 }, { "epoch": 0.5067005342909696, "grad_norm": 0.6997187356689994, "learning_rate": 1.0962530413625305e-05, "loss": 0.6626, "step": 17355 }, { "epoch": 0.506729730518817, "grad_norm": 0.647062393507296, "learning_rate": 1.0961881589618817e-05, "loss": 0.6284, "step": 17356 }, { "epoch": 0.5067589267466643, "grad_norm": 0.6293099537804424, "learning_rate": 1.0961232765612329e-05, "loss": 0.5387, "step": 17357 }, { "epoch": 0.5067881229745117, "grad_norm": 0.6607883640527086, "learning_rate": 1.0960583941605841e-05, "loss": 0.6692, "step": 17358 }, { "epoch": 0.5068173192023591, "grad_norm": 0.6952931233069498, "learning_rate": 1.0959935117599351e-05, "loss": 0.6906, "step": 17359 }, { "epoch": 0.5068465154302064, "grad_norm": 0.5951271420125561, "learning_rate": 1.0959286293592863e-05, "loss": 0.5251, "step": 17360 }, { "epoch": 0.5068757116580538, "grad_norm": 0.6593923117644955, "learning_rate": 1.0958637469586375e-05, "loss": 0.6763, "step": 17361 }, { "epoch": 0.5069049078859011, "grad_norm": 0.6693121524497481, "learning_rate": 1.0957988645579887e-05, "loss": 0.657, "step": 17362 }, { "epoch": 0.5069341041137485, "grad_norm": 0.6734106293282573, "learning_rate": 1.0957339821573398e-05, "loss": 0.6487, "step": 17363 }, { "epoch": 0.5069633003415959, "grad_norm": 0.6203030949953888, "learning_rate": 1.095669099756691e-05, "loss": 0.5756, "step": 17364 }, { "epoch": 0.5069924965694432, "grad_norm": 0.6475031801192569, "learning_rate": 1.0956042173560422e-05, "loss": 0.6228, "step": 17365 }, { "epoch": 0.5070216927972906, "grad_norm": 0.6594405524029324, "learning_rate": 1.0955393349553934e-05, "loss": 0.6415, "step": 17366 }, { "epoch": 0.507050889025138, "grad_norm": 0.686649424504648, "learning_rate": 1.0954744525547448e-05, "loss": 0.6819, "step": 17367 }, { "epoch": 0.5070800852529853, "grad_norm": 0.6787629372074345, "learning_rate": 1.095409570154096e-05, "loss": 0.6288, "step": 17368 }, { "epoch": 0.5071092814808327, "grad_norm": 0.6288628989555309, "learning_rate": 1.095344687753447e-05, "loss": 0.591, "step": 17369 }, { "epoch": 0.50713847770868, "grad_norm": 0.6089468445438835, "learning_rate": 1.0952798053527982e-05, "loss": 0.5833, "step": 17370 }, { "epoch": 0.5071676739365274, "grad_norm": 0.6333942724041981, "learning_rate": 1.0952149229521494e-05, "loss": 0.5992, "step": 17371 }, { "epoch": 0.5071968701643748, "grad_norm": 0.678122584965113, "learning_rate": 1.0951500405515006e-05, "loss": 0.664, "step": 17372 }, { "epoch": 0.5072260663922221, "grad_norm": 0.7101434401840225, "learning_rate": 1.0950851581508516e-05, "loss": 0.6648, "step": 17373 }, { "epoch": 0.5072552626200695, "grad_norm": 0.6592018360143107, "learning_rate": 1.0950202757502028e-05, "loss": 0.6074, "step": 17374 }, { "epoch": 0.5072844588479168, "grad_norm": 0.674126506093349, "learning_rate": 1.094955393349554e-05, "loss": 0.6493, "step": 17375 }, { "epoch": 0.5073136550757642, "grad_norm": 0.6139538284394592, "learning_rate": 1.0948905109489052e-05, "loss": 0.5981, "step": 17376 }, { "epoch": 0.5073428513036116, "grad_norm": 0.604039721565603, "learning_rate": 1.0948256285482564e-05, "loss": 0.6004, "step": 17377 }, { "epoch": 0.5073720475314589, "grad_norm": 0.6849492499888845, "learning_rate": 1.0947607461476075e-05, "loss": 0.6813, "step": 17378 }, { "epoch": 0.5074012437593063, "grad_norm": 0.6407946310213194, "learning_rate": 1.0946958637469587e-05, "loss": 0.5924, "step": 17379 }, { "epoch": 0.5074304399871536, "grad_norm": 0.6182192497168764, "learning_rate": 1.0946309813463099e-05, "loss": 0.5934, "step": 17380 }, { "epoch": 0.507459636215001, "grad_norm": 0.7723314030668984, "learning_rate": 1.094566098945661e-05, "loss": 0.6856, "step": 17381 }, { "epoch": 0.5074888324428484, "grad_norm": 0.6280070219936573, "learning_rate": 1.0945012165450121e-05, "loss": 0.6046, "step": 17382 }, { "epoch": 0.5075180286706957, "grad_norm": 0.5887479368366345, "learning_rate": 1.0944363341443633e-05, "loss": 0.5538, "step": 17383 }, { "epoch": 0.5075472248985431, "grad_norm": 0.607743470819019, "learning_rate": 1.0943714517437145e-05, "loss": 0.5478, "step": 17384 }, { "epoch": 0.5075764211263905, "grad_norm": 0.6383625946210532, "learning_rate": 1.0943065693430657e-05, "loss": 0.6398, "step": 17385 }, { "epoch": 0.5076056173542378, "grad_norm": 0.6209104443994246, "learning_rate": 1.0942416869424171e-05, "loss": 0.5742, "step": 17386 }, { "epoch": 0.5076348135820852, "grad_norm": 0.6479148576431011, "learning_rate": 1.0941768045417683e-05, "loss": 0.5671, "step": 17387 }, { "epoch": 0.5076640098099325, "grad_norm": 0.6400956820200716, "learning_rate": 1.0941119221411193e-05, "loss": 0.6281, "step": 17388 }, { "epoch": 0.5076932060377799, "grad_norm": 0.6602947138204933, "learning_rate": 1.0940470397404705e-05, "loss": 0.6131, "step": 17389 }, { "epoch": 0.5077224022656273, "grad_norm": 0.6665218952867251, "learning_rate": 1.0939821573398217e-05, "loss": 0.6155, "step": 17390 }, { "epoch": 0.5077515984934746, "grad_norm": 0.6323888160620033, "learning_rate": 1.093917274939173e-05, "loss": 0.5921, "step": 17391 }, { "epoch": 0.507780794721322, "grad_norm": 0.6416212394066383, "learning_rate": 1.093852392538524e-05, "loss": 0.5735, "step": 17392 }, { "epoch": 0.5078099909491693, "grad_norm": 0.6576193049964085, "learning_rate": 1.0937875101378752e-05, "loss": 0.675, "step": 17393 }, { "epoch": 0.5078391871770167, "grad_norm": 0.6349259628091812, "learning_rate": 1.0937226277372264e-05, "loss": 0.5826, "step": 17394 }, { "epoch": 0.5078683834048641, "grad_norm": 0.6210542567788575, "learning_rate": 1.0936577453365776e-05, "loss": 0.5872, "step": 17395 }, { "epoch": 0.5078975796327114, "grad_norm": 0.659361281229061, "learning_rate": 1.0935928629359288e-05, "loss": 0.6394, "step": 17396 }, { "epoch": 0.5079267758605588, "grad_norm": 0.6797733347185896, "learning_rate": 1.0935279805352798e-05, "loss": 0.7153, "step": 17397 }, { "epoch": 0.5079559720884061, "grad_norm": 0.5798301535876945, "learning_rate": 1.093463098134631e-05, "loss": 0.5545, "step": 17398 }, { "epoch": 0.5079851683162535, "grad_norm": 0.632775826839714, "learning_rate": 1.0933982157339822e-05, "loss": 0.5881, "step": 17399 }, { "epoch": 0.5080143645441009, "grad_norm": 0.6115175306044637, "learning_rate": 1.0933333333333334e-05, "loss": 0.549, "step": 17400 }, { "epoch": 0.5080435607719482, "grad_norm": 0.591371984426044, "learning_rate": 1.0932684509326845e-05, "loss": 0.5363, "step": 17401 }, { "epoch": 0.5080727569997956, "grad_norm": 0.6392809906728948, "learning_rate": 1.0932035685320357e-05, "loss": 0.6396, "step": 17402 }, { "epoch": 0.508101953227643, "grad_norm": 0.6810536795639666, "learning_rate": 1.0931386861313869e-05, "loss": 0.6576, "step": 17403 }, { "epoch": 0.5081311494554903, "grad_norm": 0.6340904545625727, "learning_rate": 1.0930738037307382e-05, "loss": 0.6304, "step": 17404 }, { "epoch": 0.5081603456833377, "grad_norm": 0.6855561704331862, "learning_rate": 1.0930089213300894e-05, "loss": 0.6488, "step": 17405 }, { "epoch": 0.508189541911185, "grad_norm": 0.6292761430645952, "learning_rate": 1.0929440389294406e-05, "loss": 0.6076, "step": 17406 }, { "epoch": 0.5082187381390324, "grad_norm": 0.5977408303856795, "learning_rate": 1.0928791565287917e-05, "loss": 0.5141, "step": 17407 }, { "epoch": 0.5082479343668798, "grad_norm": 0.6531252442928519, "learning_rate": 1.0928142741281429e-05, "loss": 0.6097, "step": 17408 }, { "epoch": 0.5082771305947271, "grad_norm": 0.6433048587651555, "learning_rate": 1.092749391727494e-05, "loss": 0.6032, "step": 17409 }, { "epoch": 0.5083063268225745, "grad_norm": 0.7799207996289976, "learning_rate": 1.0926845093268453e-05, "loss": 0.719, "step": 17410 }, { "epoch": 0.5083355230504218, "grad_norm": 0.6671398762052516, "learning_rate": 1.0926196269261963e-05, "loss": 0.6526, "step": 17411 }, { "epoch": 0.5083647192782692, "grad_norm": 0.8000698263940993, "learning_rate": 1.0925547445255475e-05, "loss": 0.7469, "step": 17412 }, { "epoch": 0.5083939155061166, "grad_norm": 0.6736902781485613, "learning_rate": 1.0924898621248987e-05, "loss": 0.711, "step": 17413 }, { "epoch": 0.5084231117339639, "grad_norm": 0.621516155038887, "learning_rate": 1.09242497972425e-05, "loss": 0.5732, "step": 17414 }, { "epoch": 0.5084523079618113, "grad_norm": 0.6213146190108939, "learning_rate": 1.0923600973236011e-05, "loss": 0.5977, "step": 17415 }, { "epoch": 0.5084815041896587, "grad_norm": 0.6479416891952072, "learning_rate": 1.0922952149229522e-05, "loss": 0.619, "step": 17416 }, { "epoch": 0.508510700417506, "grad_norm": 0.6380579422441254, "learning_rate": 1.0922303325223034e-05, "loss": 0.6009, "step": 17417 }, { "epoch": 0.5085398966453534, "grad_norm": 0.6498923434039952, "learning_rate": 1.0921654501216546e-05, "loss": 0.5756, "step": 17418 }, { "epoch": 0.5085690928732007, "grad_norm": 0.7840818954606126, "learning_rate": 1.0921005677210058e-05, "loss": 0.6577, "step": 17419 }, { "epoch": 0.5085982891010481, "grad_norm": 0.6529776798484542, "learning_rate": 1.0920356853203568e-05, "loss": 0.578, "step": 17420 }, { "epoch": 0.5086274853288955, "grad_norm": 0.6791963967320562, "learning_rate": 1.091970802919708e-05, "loss": 0.6575, "step": 17421 }, { "epoch": 0.5086566815567428, "grad_norm": 0.6821274472683584, "learning_rate": 1.0919059205190592e-05, "loss": 0.6769, "step": 17422 }, { "epoch": 0.5086858777845902, "grad_norm": 0.6348451245182788, "learning_rate": 1.0918410381184106e-05, "loss": 0.5884, "step": 17423 }, { "epoch": 0.5087150740124375, "grad_norm": 0.5961893900478688, "learning_rate": 1.0917761557177618e-05, "loss": 0.5719, "step": 17424 }, { "epoch": 0.5087442702402849, "grad_norm": 0.6797348153360102, "learning_rate": 1.091711273317113e-05, "loss": 0.6821, "step": 17425 }, { "epoch": 0.5087734664681323, "grad_norm": 0.6542611167597131, "learning_rate": 1.091646390916464e-05, "loss": 0.5527, "step": 17426 }, { "epoch": 0.5088026626959796, "grad_norm": 0.6565207365740001, "learning_rate": 1.0915815085158152e-05, "loss": 0.6, "step": 17427 }, { "epoch": 0.5088318589238271, "grad_norm": 0.6762788243743987, "learning_rate": 1.0915166261151664e-05, "loss": 0.6076, "step": 17428 }, { "epoch": 0.5088610551516745, "grad_norm": 0.6550478853613471, "learning_rate": 1.0914517437145176e-05, "loss": 0.6298, "step": 17429 }, { "epoch": 0.5088902513795218, "grad_norm": 0.646433343955682, "learning_rate": 1.0913868613138687e-05, "loss": 0.5988, "step": 17430 }, { "epoch": 0.5089194476073692, "grad_norm": 0.6746074781000364, "learning_rate": 1.0913219789132199e-05, "loss": 0.63, "step": 17431 }, { "epoch": 0.5089486438352165, "grad_norm": 0.6844431592902771, "learning_rate": 1.091257096512571e-05, "loss": 0.6663, "step": 17432 }, { "epoch": 0.5089778400630639, "grad_norm": 0.6925576950329444, "learning_rate": 1.0911922141119223e-05, "loss": 0.6781, "step": 17433 }, { "epoch": 0.5090070362909113, "grad_norm": 0.6406065224322711, "learning_rate": 1.0911273317112733e-05, "loss": 0.6279, "step": 17434 }, { "epoch": 0.5090362325187586, "grad_norm": 0.6226681497427885, "learning_rate": 1.0910624493106245e-05, "loss": 0.6038, "step": 17435 }, { "epoch": 0.509065428746606, "grad_norm": 0.6280906186449134, "learning_rate": 1.0909975669099757e-05, "loss": 0.6089, "step": 17436 }, { "epoch": 0.5090946249744533, "grad_norm": 0.7155731492826414, "learning_rate": 1.0909326845093269e-05, "loss": 0.6977, "step": 17437 }, { "epoch": 0.5091238212023007, "grad_norm": 0.6417913255741214, "learning_rate": 1.0908678021086781e-05, "loss": 0.6171, "step": 17438 }, { "epoch": 0.5091530174301481, "grad_norm": 0.7131672613453931, "learning_rate": 1.0908029197080291e-05, "loss": 0.6919, "step": 17439 }, { "epoch": 0.5091822136579954, "grad_norm": 0.6610098242554521, "learning_rate": 1.0907380373073803e-05, "loss": 0.5522, "step": 17440 }, { "epoch": 0.5092114098858428, "grad_norm": 0.601664968603036, "learning_rate": 1.0906731549067316e-05, "loss": 0.534, "step": 17441 }, { "epoch": 0.5092406061136902, "grad_norm": 0.6884651875949009, "learning_rate": 1.090608272506083e-05, "loss": 0.6519, "step": 17442 }, { "epoch": 0.5092698023415375, "grad_norm": 0.6221743210341222, "learning_rate": 1.0905433901054341e-05, "loss": 0.6144, "step": 17443 }, { "epoch": 0.5092989985693849, "grad_norm": 0.6499471126242741, "learning_rate": 1.0904785077047853e-05, "loss": 0.5757, "step": 17444 }, { "epoch": 0.5093281947972322, "grad_norm": 0.635699632295532, "learning_rate": 1.0904136253041364e-05, "loss": 0.606, "step": 17445 }, { "epoch": 0.5093573910250796, "grad_norm": 0.7049968542585494, "learning_rate": 1.0903487429034876e-05, "loss": 0.6137, "step": 17446 }, { "epoch": 0.509386587252927, "grad_norm": 0.6675109719505146, "learning_rate": 1.0902838605028388e-05, "loss": 0.612, "step": 17447 }, { "epoch": 0.5094157834807743, "grad_norm": 0.6691642268775712, "learning_rate": 1.09021897810219e-05, "loss": 0.674, "step": 17448 }, { "epoch": 0.5094449797086217, "grad_norm": 0.6212649078189445, "learning_rate": 1.090154095701541e-05, "loss": 0.566, "step": 17449 }, { "epoch": 0.509474175936469, "grad_norm": 0.6980811129152397, "learning_rate": 1.0900892133008922e-05, "loss": 0.6819, "step": 17450 }, { "epoch": 0.5095033721643164, "grad_norm": 0.6471860879241075, "learning_rate": 1.0900243309002434e-05, "loss": 0.5982, "step": 17451 }, { "epoch": 0.5095325683921638, "grad_norm": 0.6551089557541925, "learning_rate": 1.0899594484995946e-05, "loss": 0.654, "step": 17452 }, { "epoch": 0.5095617646200111, "grad_norm": 0.6064727789693104, "learning_rate": 1.0898945660989456e-05, "loss": 0.6034, "step": 17453 }, { "epoch": 0.5095909608478585, "grad_norm": 0.6292192166095852, "learning_rate": 1.0898296836982968e-05, "loss": 0.564, "step": 17454 }, { "epoch": 0.5096201570757058, "grad_norm": 0.6646339588920354, "learning_rate": 1.089764801297648e-05, "loss": 0.6705, "step": 17455 }, { "epoch": 0.5096493533035532, "grad_norm": 0.6696732004438005, "learning_rate": 1.0896999188969993e-05, "loss": 0.6587, "step": 17456 }, { "epoch": 0.5096785495314006, "grad_norm": 0.6388859634138755, "learning_rate": 1.0896350364963505e-05, "loss": 0.5828, "step": 17457 }, { "epoch": 0.5097077457592479, "grad_norm": 0.6072847991575322, "learning_rate": 1.0895701540957015e-05, "loss": 0.5224, "step": 17458 }, { "epoch": 0.5097369419870953, "grad_norm": 0.7529344250110959, "learning_rate": 1.0895052716950527e-05, "loss": 0.6973, "step": 17459 }, { "epoch": 0.5097661382149427, "grad_norm": 0.5994554853827373, "learning_rate": 1.0894403892944039e-05, "loss": 0.5634, "step": 17460 }, { "epoch": 0.50979533444279, "grad_norm": 0.6658778641231811, "learning_rate": 1.0893755068937553e-05, "loss": 0.6346, "step": 17461 }, { "epoch": 0.5098245306706374, "grad_norm": 0.6632599928812812, "learning_rate": 1.0893106244931065e-05, "loss": 0.6595, "step": 17462 }, { "epoch": 0.5098537268984847, "grad_norm": 0.6702526317335279, "learning_rate": 1.0892457420924577e-05, "loss": 0.6659, "step": 17463 }, { "epoch": 0.5098829231263321, "grad_norm": 0.6963690290722988, "learning_rate": 1.0891808596918087e-05, "loss": 0.6441, "step": 17464 }, { "epoch": 0.5099121193541795, "grad_norm": 0.6410903398213996, "learning_rate": 1.0891159772911599e-05, "loss": 0.6078, "step": 17465 }, { "epoch": 0.5099413155820268, "grad_norm": 0.6732633668121326, "learning_rate": 1.0890510948905111e-05, "loss": 0.6442, "step": 17466 }, { "epoch": 0.5099705118098742, "grad_norm": 0.6224344564290651, "learning_rate": 1.0889862124898623e-05, "loss": 0.5884, "step": 17467 }, { "epoch": 0.5099997080377215, "grad_norm": 0.644310798996404, "learning_rate": 1.0889213300892133e-05, "loss": 0.684, "step": 17468 }, { "epoch": 0.5100289042655689, "grad_norm": 0.619687973985406, "learning_rate": 1.0888564476885645e-05, "loss": 0.5815, "step": 17469 }, { "epoch": 0.5100581004934163, "grad_norm": 0.6428781903108338, "learning_rate": 1.0887915652879158e-05, "loss": 0.6238, "step": 17470 }, { "epoch": 0.5100872967212636, "grad_norm": 0.6796286549707488, "learning_rate": 1.088726682887267e-05, "loss": 0.657, "step": 17471 }, { "epoch": 0.510116492949111, "grad_norm": 0.6580401017156349, "learning_rate": 1.088661800486618e-05, "loss": 0.6754, "step": 17472 }, { "epoch": 0.5101456891769584, "grad_norm": 0.6745097444096884, "learning_rate": 1.0885969180859692e-05, "loss": 0.6704, "step": 17473 }, { "epoch": 0.5101748854048057, "grad_norm": 0.6457831370142065, "learning_rate": 1.0885320356853204e-05, "loss": 0.6509, "step": 17474 }, { "epoch": 0.5102040816326531, "grad_norm": 0.6906257882823482, "learning_rate": 1.0884671532846716e-05, "loss": 0.6929, "step": 17475 }, { "epoch": 0.5102332778605004, "grad_norm": 0.6208384072012125, "learning_rate": 1.0884022708840228e-05, "loss": 0.5791, "step": 17476 }, { "epoch": 0.5102624740883478, "grad_norm": 0.6449607979052195, "learning_rate": 1.0883373884833738e-05, "loss": 0.5855, "step": 17477 }, { "epoch": 0.5102916703161952, "grad_norm": 0.6156960121761517, "learning_rate": 1.088272506082725e-05, "loss": 0.6096, "step": 17478 }, { "epoch": 0.5103208665440425, "grad_norm": 0.642084427899962, "learning_rate": 1.0882076236820762e-05, "loss": 0.5821, "step": 17479 }, { "epoch": 0.5103500627718899, "grad_norm": 0.6244814017728071, "learning_rate": 1.0881427412814276e-05, "loss": 0.5859, "step": 17480 }, { "epoch": 0.5103792589997372, "grad_norm": 0.6213129752607859, "learning_rate": 1.0880778588807788e-05, "loss": 0.5698, "step": 17481 }, { "epoch": 0.5104084552275846, "grad_norm": 0.6140421881135771, "learning_rate": 1.08801297648013e-05, "loss": 0.5747, "step": 17482 }, { "epoch": 0.510437651455432, "grad_norm": 0.614824651313271, "learning_rate": 1.087948094079481e-05, "loss": 0.5762, "step": 17483 }, { "epoch": 0.5104668476832793, "grad_norm": 0.685118534950858, "learning_rate": 1.0878832116788323e-05, "loss": 0.6879, "step": 17484 }, { "epoch": 0.5104960439111267, "grad_norm": 0.7187681232767001, "learning_rate": 1.0878183292781835e-05, "loss": 0.6881, "step": 17485 }, { "epoch": 0.510525240138974, "grad_norm": 0.6479017393614419, "learning_rate": 1.0877534468775347e-05, "loss": 0.6336, "step": 17486 }, { "epoch": 0.5105544363668214, "grad_norm": 0.6002463319013859, "learning_rate": 1.0876885644768857e-05, "loss": 0.5701, "step": 17487 }, { "epoch": 0.5105836325946688, "grad_norm": 0.6612485012085353, "learning_rate": 1.0876236820762369e-05, "loss": 0.6277, "step": 17488 }, { "epoch": 0.5106128288225161, "grad_norm": 0.6982493694738182, "learning_rate": 1.0875587996755881e-05, "loss": 0.7784, "step": 17489 }, { "epoch": 0.5106420250503635, "grad_norm": 0.6123887555701425, "learning_rate": 1.0874939172749393e-05, "loss": 0.6157, "step": 17490 }, { "epoch": 0.5106712212782109, "grad_norm": 0.6734706493946686, "learning_rate": 1.0874290348742903e-05, "loss": 0.5807, "step": 17491 }, { "epoch": 0.5107004175060582, "grad_norm": 0.6373033181390807, "learning_rate": 1.0873641524736415e-05, "loss": 0.6021, "step": 17492 }, { "epoch": 0.5107296137339056, "grad_norm": 0.6484643472306267, "learning_rate": 1.0872992700729927e-05, "loss": 0.6576, "step": 17493 }, { "epoch": 0.5107588099617529, "grad_norm": 0.6456859789539767, "learning_rate": 1.087234387672344e-05, "loss": 0.6327, "step": 17494 }, { "epoch": 0.5107880061896003, "grad_norm": 0.6203196715175832, "learning_rate": 1.0871695052716951e-05, "loss": 0.5864, "step": 17495 }, { "epoch": 0.5108172024174477, "grad_norm": 0.7368087644355154, "learning_rate": 1.0871046228710462e-05, "loss": 0.6957, "step": 17496 }, { "epoch": 0.510846398645295, "grad_norm": 0.6173598845471777, "learning_rate": 1.0870397404703974e-05, "loss": 0.5663, "step": 17497 }, { "epoch": 0.5108755948731424, "grad_norm": 0.6391746907988176, "learning_rate": 1.0869748580697486e-05, "loss": 0.6293, "step": 17498 }, { "epoch": 0.5109047911009897, "grad_norm": 0.6260376124573027, "learning_rate": 1.0869099756691e-05, "loss": 0.6082, "step": 17499 }, { "epoch": 0.5109339873288371, "grad_norm": 0.6456585748928962, "learning_rate": 1.0868450932684512e-05, "loss": 0.6311, "step": 17500 }, { "epoch": 0.5109631835566845, "grad_norm": 0.5691195869968982, "learning_rate": 1.0867802108678024e-05, "loss": 0.538, "step": 17501 }, { "epoch": 0.5109923797845318, "grad_norm": 0.6267211199175565, "learning_rate": 1.0867153284671534e-05, "loss": 0.6207, "step": 17502 }, { "epoch": 0.5110215760123792, "grad_norm": 0.6877603723586878, "learning_rate": 1.0866504460665046e-05, "loss": 0.7212, "step": 17503 }, { "epoch": 0.5110507722402265, "grad_norm": 0.5806117828205468, "learning_rate": 1.0865855636658558e-05, "loss": 0.5463, "step": 17504 }, { "epoch": 0.5110799684680739, "grad_norm": 0.6299088635680732, "learning_rate": 1.086520681265207e-05, "loss": 0.6051, "step": 17505 }, { "epoch": 0.5111091646959213, "grad_norm": 0.6448924952672765, "learning_rate": 1.086455798864558e-05, "loss": 0.5859, "step": 17506 }, { "epoch": 0.5111383609237686, "grad_norm": 0.6693234263824221, "learning_rate": 1.0863909164639092e-05, "loss": 0.6363, "step": 17507 }, { "epoch": 0.511167557151616, "grad_norm": 0.5986919816841081, "learning_rate": 1.0863260340632604e-05, "loss": 0.5991, "step": 17508 }, { "epoch": 0.5111967533794634, "grad_norm": 0.6420783582030408, "learning_rate": 1.0862611516626116e-05, "loss": 0.6444, "step": 17509 }, { "epoch": 0.5112259496073107, "grad_norm": 0.6178750772816911, "learning_rate": 1.0861962692619627e-05, "loss": 0.5859, "step": 17510 }, { "epoch": 0.5112551458351581, "grad_norm": 0.6212865903896574, "learning_rate": 1.0861313868613139e-05, "loss": 0.6079, "step": 17511 }, { "epoch": 0.5112843420630054, "grad_norm": 0.5783968542506013, "learning_rate": 1.086066504460665e-05, "loss": 0.54, "step": 17512 }, { "epoch": 0.5113135382908528, "grad_norm": 0.5993461010685459, "learning_rate": 1.0860016220600163e-05, "loss": 0.5529, "step": 17513 }, { "epoch": 0.5113427345187002, "grad_norm": 0.6129907120073903, "learning_rate": 1.0859367396593675e-05, "loss": 0.5747, "step": 17514 }, { "epoch": 0.5113719307465475, "grad_norm": 0.5723471189385703, "learning_rate": 1.0858718572587185e-05, "loss": 0.5183, "step": 17515 }, { "epoch": 0.5114011269743949, "grad_norm": 0.6188245149868793, "learning_rate": 1.0858069748580697e-05, "loss": 0.5822, "step": 17516 }, { "epoch": 0.5114303232022422, "grad_norm": 0.6229548034161675, "learning_rate": 1.085742092457421e-05, "loss": 0.5512, "step": 17517 }, { "epoch": 0.5114595194300896, "grad_norm": 0.6185466312226112, "learning_rate": 1.0856772100567723e-05, "loss": 0.584, "step": 17518 }, { "epoch": 0.511488715657937, "grad_norm": 0.5820097832181458, "learning_rate": 1.0856123276561235e-05, "loss": 0.5208, "step": 17519 }, { "epoch": 0.5115179118857843, "grad_norm": 0.6232044396232772, "learning_rate": 1.0855474452554747e-05, "loss": 0.5883, "step": 17520 }, { "epoch": 0.5115471081136317, "grad_norm": 0.7036543490430843, "learning_rate": 1.0854825628548257e-05, "loss": 0.7103, "step": 17521 }, { "epoch": 0.511576304341479, "grad_norm": 0.6353216981337525, "learning_rate": 1.085417680454177e-05, "loss": 0.6105, "step": 17522 }, { "epoch": 0.5116055005693264, "grad_norm": 0.6982694564747748, "learning_rate": 1.0853527980535281e-05, "loss": 0.6538, "step": 17523 }, { "epoch": 0.5116346967971738, "grad_norm": 0.6666861104809791, "learning_rate": 1.0852879156528793e-05, "loss": 0.6542, "step": 17524 }, { "epoch": 0.5116638930250211, "grad_norm": 0.6069398055349782, "learning_rate": 1.0852230332522304e-05, "loss": 0.5724, "step": 17525 }, { "epoch": 0.5116930892528685, "grad_norm": 0.5958676716563207, "learning_rate": 1.0851581508515816e-05, "loss": 0.5764, "step": 17526 }, { "epoch": 0.5117222854807159, "grad_norm": 0.6601126830055067, "learning_rate": 1.0850932684509328e-05, "loss": 0.6981, "step": 17527 }, { "epoch": 0.5117514817085632, "grad_norm": 0.6296847870344026, "learning_rate": 1.085028386050284e-05, "loss": 0.6093, "step": 17528 }, { "epoch": 0.5117806779364106, "grad_norm": 0.639407801446213, "learning_rate": 1.084963503649635e-05, "loss": 0.6196, "step": 17529 }, { "epoch": 0.5118098741642579, "grad_norm": 0.6040852001059863, "learning_rate": 1.0848986212489862e-05, "loss": 0.5896, "step": 17530 }, { "epoch": 0.5118390703921053, "grad_norm": 0.6569651632770005, "learning_rate": 1.0848337388483374e-05, "loss": 0.6476, "step": 17531 }, { "epoch": 0.5118682666199527, "grad_norm": 0.5990783954862576, "learning_rate": 1.0847688564476886e-05, "loss": 0.5258, "step": 17532 }, { "epoch": 0.5118974628478, "grad_norm": 0.6479950466491754, "learning_rate": 1.0847039740470398e-05, "loss": 0.6111, "step": 17533 }, { "epoch": 0.5119266590756474, "grad_norm": 0.6665335658350883, "learning_rate": 1.0846390916463909e-05, "loss": 0.6346, "step": 17534 }, { "epoch": 0.5119558553034947, "grad_norm": 0.6775073800060343, "learning_rate": 1.084574209245742e-05, "loss": 0.6464, "step": 17535 }, { "epoch": 0.5119850515313421, "grad_norm": 0.6759594425390782, "learning_rate": 1.0845093268450933e-05, "loss": 0.6906, "step": 17536 }, { "epoch": 0.5120142477591895, "grad_norm": 0.6652468876111347, "learning_rate": 1.0844444444444446e-05, "loss": 0.6376, "step": 17537 }, { "epoch": 0.5120434439870368, "grad_norm": 0.6559936804592669, "learning_rate": 1.0843795620437958e-05, "loss": 0.6467, "step": 17538 }, { "epoch": 0.5120726402148842, "grad_norm": 0.6275256702888501, "learning_rate": 1.084314679643147e-05, "loss": 0.5678, "step": 17539 }, { "epoch": 0.5121018364427316, "grad_norm": 0.6324655493818804, "learning_rate": 1.084249797242498e-05, "loss": 0.6112, "step": 17540 }, { "epoch": 0.5121310326705789, "grad_norm": 0.5850442194827462, "learning_rate": 1.0841849148418493e-05, "loss": 0.5115, "step": 17541 }, { "epoch": 0.5121602288984263, "grad_norm": 0.6849199970855266, "learning_rate": 1.0841200324412005e-05, "loss": 0.6554, "step": 17542 }, { "epoch": 0.5121894251262736, "grad_norm": 0.6307556690308666, "learning_rate": 1.0840551500405517e-05, "loss": 0.6096, "step": 17543 }, { "epoch": 0.512218621354121, "grad_norm": 0.6676355289858233, "learning_rate": 1.0839902676399027e-05, "loss": 0.6469, "step": 17544 }, { "epoch": 0.5122478175819684, "grad_norm": 0.6396478535948653, "learning_rate": 1.083925385239254e-05, "loss": 0.5925, "step": 17545 }, { "epoch": 0.5122770138098157, "grad_norm": 0.6625156824341373, "learning_rate": 1.0838605028386051e-05, "loss": 0.6509, "step": 17546 }, { "epoch": 0.5123062100376631, "grad_norm": 0.6254937186356481, "learning_rate": 1.0837956204379563e-05, "loss": 0.5781, "step": 17547 }, { "epoch": 0.5123354062655106, "grad_norm": 0.6629603734262126, "learning_rate": 1.0837307380373074e-05, "loss": 0.6582, "step": 17548 }, { "epoch": 0.5123646024933579, "grad_norm": 0.7046319865465773, "learning_rate": 1.0836658556366586e-05, "loss": 0.5965, "step": 17549 }, { "epoch": 0.5123937987212053, "grad_norm": 0.6416271637838453, "learning_rate": 1.0836009732360098e-05, "loss": 0.6275, "step": 17550 }, { "epoch": 0.5124229949490526, "grad_norm": 0.5693266709088082, "learning_rate": 1.083536090835361e-05, "loss": 0.5003, "step": 17551 }, { "epoch": 0.5124521911769, "grad_norm": 0.6524056251953848, "learning_rate": 1.0834712084347122e-05, "loss": 0.6715, "step": 17552 }, { "epoch": 0.5124813874047474, "grad_norm": 0.7562731397337809, "learning_rate": 1.0834063260340632e-05, "loss": 0.6472, "step": 17553 }, { "epoch": 0.5125105836325947, "grad_norm": 0.6457543326835902, "learning_rate": 1.0833414436334144e-05, "loss": 0.632, "step": 17554 }, { "epoch": 0.5125397798604421, "grad_norm": 0.660563265502459, "learning_rate": 1.0832765612327658e-05, "loss": 0.6229, "step": 17555 }, { "epoch": 0.5125689760882894, "grad_norm": 0.6364405979222967, "learning_rate": 1.083211678832117e-05, "loss": 0.5925, "step": 17556 }, { "epoch": 0.5125981723161368, "grad_norm": 0.6685361797008249, "learning_rate": 1.0831467964314682e-05, "loss": 0.6292, "step": 17557 }, { "epoch": 0.5126273685439842, "grad_norm": 0.622265862955258, "learning_rate": 1.0830819140308192e-05, "loss": 0.5694, "step": 17558 }, { "epoch": 0.5126565647718315, "grad_norm": 0.6141453808714893, "learning_rate": 1.0830170316301704e-05, "loss": 0.5791, "step": 17559 }, { "epoch": 0.5126857609996789, "grad_norm": 0.6364618810668097, "learning_rate": 1.0829521492295216e-05, "loss": 0.6515, "step": 17560 }, { "epoch": 0.5127149572275262, "grad_norm": 0.621972353906759, "learning_rate": 1.0828872668288728e-05, "loss": 0.5657, "step": 17561 }, { "epoch": 0.5127441534553736, "grad_norm": 0.6181689105138748, "learning_rate": 1.082822384428224e-05, "loss": 0.5645, "step": 17562 }, { "epoch": 0.512773349683221, "grad_norm": 0.6197454206144659, "learning_rate": 1.082757502027575e-05, "loss": 0.5307, "step": 17563 }, { "epoch": 0.5128025459110683, "grad_norm": 0.6656291717908961, "learning_rate": 1.0826926196269263e-05, "loss": 0.651, "step": 17564 }, { "epoch": 0.5128317421389157, "grad_norm": 0.6571682673517948, "learning_rate": 1.0826277372262775e-05, "loss": 0.6299, "step": 17565 }, { "epoch": 0.512860938366763, "grad_norm": 0.6186204108587574, "learning_rate": 1.0825628548256287e-05, "loss": 0.6054, "step": 17566 }, { "epoch": 0.5128901345946104, "grad_norm": 0.6715708174079082, "learning_rate": 1.0824979724249797e-05, "loss": 0.6864, "step": 17567 }, { "epoch": 0.5129193308224578, "grad_norm": 0.6106024818706709, "learning_rate": 1.0824330900243309e-05, "loss": 0.5905, "step": 17568 }, { "epoch": 0.5129485270503051, "grad_norm": 0.6554478752085824, "learning_rate": 1.0823682076236821e-05, "loss": 0.6207, "step": 17569 }, { "epoch": 0.5129777232781525, "grad_norm": 0.6365174675031415, "learning_rate": 1.0823033252230333e-05, "loss": 0.6378, "step": 17570 }, { "epoch": 0.5130069195059999, "grad_norm": 0.6444710121582835, "learning_rate": 1.0822384428223845e-05, "loss": 0.5799, "step": 17571 }, { "epoch": 0.5130361157338472, "grad_norm": 0.6019867801455573, "learning_rate": 1.0821735604217355e-05, "loss": 0.5738, "step": 17572 }, { "epoch": 0.5130653119616946, "grad_norm": 0.5916095080330119, "learning_rate": 1.0821086780210868e-05, "loss": 0.5332, "step": 17573 }, { "epoch": 0.5130945081895419, "grad_norm": 0.677151116731703, "learning_rate": 1.0820437956204381e-05, "loss": 0.6835, "step": 17574 }, { "epoch": 0.5131237044173893, "grad_norm": 0.6701060084402652, "learning_rate": 1.0819789132197893e-05, "loss": 0.6391, "step": 17575 }, { "epoch": 0.5131529006452367, "grad_norm": 0.701597149620278, "learning_rate": 1.0819140308191405e-05, "loss": 0.643, "step": 17576 }, { "epoch": 0.513182096873084, "grad_norm": 0.6080259111801822, "learning_rate": 1.0818491484184916e-05, "loss": 0.6048, "step": 17577 }, { "epoch": 0.5132112931009314, "grad_norm": 0.628635915423793, "learning_rate": 1.0817842660178428e-05, "loss": 0.623, "step": 17578 }, { "epoch": 0.5132404893287787, "grad_norm": 0.5839668692576039, "learning_rate": 1.081719383617194e-05, "loss": 0.5291, "step": 17579 }, { "epoch": 0.5132696855566261, "grad_norm": 0.6404212019911564, "learning_rate": 1.0816545012165452e-05, "loss": 0.6391, "step": 17580 }, { "epoch": 0.5132988817844735, "grad_norm": 0.6449649612315843, "learning_rate": 1.0815896188158964e-05, "loss": 0.6795, "step": 17581 }, { "epoch": 0.5133280780123208, "grad_norm": 0.6211344541949834, "learning_rate": 1.0815247364152474e-05, "loss": 0.5644, "step": 17582 }, { "epoch": 0.5133572742401682, "grad_norm": 0.6375729282140741, "learning_rate": 1.0814598540145986e-05, "loss": 0.5705, "step": 17583 }, { "epoch": 0.5133864704680156, "grad_norm": 0.6200894041305441, "learning_rate": 1.0813949716139498e-05, "loss": 0.582, "step": 17584 }, { "epoch": 0.5134156666958629, "grad_norm": 0.6512125243885458, "learning_rate": 1.081330089213301e-05, "loss": 0.6193, "step": 17585 }, { "epoch": 0.5134448629237103, "grad_norm": 0.6230691676464005, "learning_rate": 1.081265206812652e-05, "loss": 0.572, "step": 17586 }, { "epoch": 0.5134740591515576, "grad_norm": 0.6534027100231761, "learning_rate": 1.0812003244120033e-05, "loss": 0.6453, "step": 17587 }, { "epoch": 0.513503255379405, "grad_norm": 0.591448101526375, "learning_rate": 1.0811354420113545e-05, "loss": 0.5458, "step": 17588 }, { "epoch": 0.5135324516072524, "grad_norm": 0.6127508579984365, "learning_rate": 1.0810705596107057e-05, "loss": 0.6115, "step": 17589 }, { "epoch": 0.5135616478350997, "grad_norm": 0.6581941868171042, "learning_rate": 1.0810056772100569e-05, "loss": 0.6407, "step": 17590 }, { "epoch": 0.5135908440629471, "grad_norm": 0.702125044251268, "learning_rate": 1.0809407948094079e-05, "loss": 0.6919, "step": 17591 }, { "epoch": 0.5136200402907944, "grad_norm": 0.6792586035397606, "learning_rate": 1.0808759124087591e-05, "loss": 0.5687, "step": 17592 }, { "epoch": 0.5136492365186418, "grad_norm": 0.7388572179846935, "learning_rate": 1.0808110300081105e-05, "loss": 0.6152, "step": 17593 }, { "epoch": 0.5136784327464892, "grad_norm": 0.6885273446369157, "learning_rate": 1.0807461476074617e-05, "loss": 0.6374, "step": 17594 }, { "epoch": 0.5137076289743365, "grad_norm": 0.6489843718197055, "learning_rate": 1.0806812652068129e-05, "loss": 0.5976, "step": 17595 }, { "epoch": 0.5137368252021839, "grad_norm": 0.6634987304561544, "learning_rate": 1.0806163828061639e-05, "loss": 0.5987, "step": 17596 }, { "epoch": 0.5137660214300313, "grad_norm": 0.6326140464844888, "learning_rate": 1.0805515004055151e-05, "loss": 0.5788, "step": 17597 }, { "epoch": 0.5137952176578786, "grad_norm": 0.6659440323771025, "learning_rate": 1.0804866180048663e-05, "loss": 0.6591, "step": 17598 }, { "epoch": 0.513824413885726, "grad_norm": 0.610493907958437, "learning_rate": 1.0804217356042175e-05, "loss": 0.5396, "step": 17599 }, { "epoch": 0.5138536101135733, "grad_norm": 0.5811283508622626, "learning_rate": 1.0803568532035687e-05, "loss": 0.5313, "step": 17600 }, { "epoch": 0.5138828063414207, "grad_norm": 0.6500085505310215, "learning_rate": 1.0802919708029198e-05, "loss": 0.6161, "step": 17601 }, { "epoch": 0.5139120025692681, "grad_norm": 0.6949346155902081, "learning_rate": 1.080227088402271e-05, "loss": 0.7136, "step": 17602 }, { "epoch": 0.5139411987971154, "grad_norm": 0.630819567998158, "learning_rate": 1.0801622060016222e-05, "loss": 0.5709, "step": 17603 }, { "epoch": 0.5139703950249628, "grad_norm": 0.7166164701338334, "learning_rate": 1.0800973236009734e-05, "loss": 0.6837, "step": 17604 }, { "epoch": 0.5139995912528101, "grad_norm": 0.5988060747726484, "learning_rate": 1.0800324412003244e-05, "loss": 0.5497, "step": 17605 }, { "epoch": 0.5140287874806575, "grad_norm": 0.6413574842876709, "learning_rate": 1.0799675587996756e-05, "loss": 0.6035, "step": 17606 }, { "epoch": 0.5140579837085049, "grad_norm": 0.6393435882186252, "learning_rate": 1.0799026763990268e-05, "loss": 0.5941, "step": 17607 }, { "epoch": 0.5140871799363522, "grad_norm": 0.6178265304582827, "learning_rate": 1.079837793998378e-05, "loss": 0.5544, "step": 17608 }, { "epoch": 0.5141163761641996, "grad_norm": 0.6190759890673027, "learning_rate": 1.0797729115977292e-05, "loss": 0.5689, "step": 17609 }, { "epoch": 0.514145572392047, "grad_norm": 1.0601908050678477, "learning_rate": 1.0797080291970802e-05, "loss": 0.6138, "step": 17610 }, { "epoch": 0.5141747686198943, "grad_norm": 0.6516157289897158, "learning_rate": 1.0796431467964314e-05, "loss": 0.6501, "step": 17611 }, { "epoch": 0.5142039648477417, "grad_norm": 0.6526694732949128, "learning_rate": 1.0795782643957828e-05, "loss": 0.6399, "step": 17612 }, { "epoch": 0.514233161075589, "grad_norm": 0.6642871146384093, "learning_rate": 1.079513381995134e-05, "loss": 0.5447, "step": 17613 }, { "epoch": 0.5142623573034364, "grad_norm": 0.6609979267832539, "learning_rate": 1.0794484995944852e-05, "loss": 0.6536, "step": 17614 }, { "epoch": 0.5142915535312838, "grad_norm": 0.6663413727656207, "learning_rate": 1.0793836171938362e-05, "loss": 0.623, "step": 17615 }, { "epoch": 0.5143207497591311, "grad_norm": 0.603286054634838, "learning_rate": 1.0793187347931875e-05, "loss": 0.5939, "step": 17616 }, { "epoch": 0.5143499459869785, "grad_norm": 0.6400520256815347, "learning_rate": 1.0792538523925387e-05, "loss": 0.6028, "step": 17617 }, { "epoch": 0.5143791422148258, "grad_norm": 0.6218514352665558, "learning_rate": 1.0791889699918899e-05, "loss": 0.572, "step": 17618 }, { "epoch": 0.5144083384426732, "grad_norm": 0.6523021933457726, "learning_rate": 1.079124087591241e-05, "loss": 0.6268, "step": 17619 }, { "epoch": 0.5144375346705206, "grad_norm": 0.5926832529737136, "learning_rate": 1.0790592051905921e-05, "loss": 0.5399, "step": 17620 }, { "epoch": 0.5144667308983679, "grad_norm": 0.6771062829043728, "learning_rate": 1.0789943227899433e-05, "loss": 0.7171, "step": 17621 }, { "epoch": 0.5144959271262153, "grad_norm": 0.6036995205673156, "learning_rate": 1.0789294403892945e-05, "loss": 0.562, "step": 17622 }, { "epoch": 0.5145251233540626, "grad_norm": 0.6601888727023183, "learning_rate": 1.0788645579886457e-05, "loss": 0.6227, "step": 17623 }, { "epoch": 0.51455431958191, "grad_norm": 0.6017270375300587, "learning_rate": 1.0787996755879967e-05, "loss": 0.5886, "step": 17624 }, { "epoch": 0.5145835158097574, "grad_norm": 0.7136983594933078, "learning_rate": 1.078734793187348e-05, "loss": 0.7354, "step": 17625 }, { "epoch": 0.5146127120376047, "grad_norm": 0.6186645569086486, "learning_rate": 1.0786699107866991e-05, "loss": 0.5449, "step": 17626 }, { "epoch": 0.5146419082654521, "grad_norm": 0.6125490622027974, "learning_rate": 1.0786050283860503e-05, "loss": 0.5847, "step": 17627 }, { "epoch": 0.5146711044932994, "grad_norm": 0.6328203188315656, "learning_rate": 1.0785401459854015e-05, "loss": 0.593, "step": 17628 }, { "epoch": 0.5147003007211468, "grad_norm": 0.6375861082034063, "learning_rate": 1.0784752635847526e-05, "loss": 0.5676, "step": 17629 }, { "epoch": 0.5147294969489942, "grad_norm": 0.6449926219764128, "learning_rate": 1.0784103811841038e-05, "loss": 0.618, "step": 17630 }, { "epoch": 0.5147586931768415, "grad_norm": 0.6203374628553988, "learning_rate": 1.0783454987834552e-05, "loss": 0.6205, "step": 17631 }, { "epoch": 0.5147878894046889, "grad_norm": 0.6274852658489308, "learning_rate": 1.0782806163828064e-05, "loss": 0.5604, "step": 17632 }, { "epoch": 0.5148170856325363, "grad_norm": 0.6111802442160539, "learning_rate": 1.0782157339821576e-05, "loss": 0.5719, "step": 17633 }, { "epoch": 0.5148462818603836, "grad_norm": 0.6171292614441989, "learning_rate": 1.0781508515815086e-05, "loss": 0.5773, "step": 17634 }, { "epoch": 0.514875478088231, "grad_norm": 0.6281926174676048, "learning_rate": 1.0780859691808598e-05, "loss": 0.5952, "step": 17635 }, { "epoch": 0.5149046743160783, "grad_norm": 0.6198118935209127, "learning_rate": 1.078021086780211e-05, "loss": 0.5888, "step": 17636 }, { "epoch": 0.5149338705439257, "grad_norm": 0.6406655968969522, "learning_rate": 1.0779562043795622e-05, "loss": 0.6256, "step": 17637 }, { "epoch": 0.5149630667717731, "grad_norm": 0.635096376759768, "learning_rate": 1.0778913219789134e-05, "loss": 0.5944, "step": 17638 }, { "epoch": 0.5149922629996204, "grad_norm": 0.6816917124271176, "learning_rate": 1.0778264395782644e-05, "loss": 0.662, "step": 17639 }, { "epoch": 0.5150214592274678, "grad_norm": 0.6543004704255128, "learning_rate": 1.0777615571776156e-05, "loss": 0.6244, "step": 17640 }, { "epoch": 0.5150506554553151, "grad_norm": 0.6553830313799517, "learning_rate": 1.0776966747769668e-05, "loss": 0.6451, "step": 17641 }, { "epoch": 0.5150798516831625, "grad_norm": 0.6788818743928254, "learning_rate": 1.077631792376318e-05, "loss": 0.6279, "step": 17642 }, { "epoch": 0.5151090479110099, "grad_norm": 0.6567605693454808, "learning_rate": 1.077566909975669e-05, "loss": 0.6249, "step": 17643 }, { "epoch": 0.5151382441388572, "grad_norm": 0.5866675200770748, "learning_rate": 1.0775020275750203e-05, "loss": 0.552, "step": 17644 }, { "epoch": 0.5151674403667046, "grad_norm": 0.6343880969907728, "learning_rate": 1.0774371451743715e-05, "loss": 0.5819, "step": 17645 }, { "epoch": 0.515196636594552, "grad_norm": 0.6325641554338253, "learning_rate": 1.0773722627737227e-05, "loss": 0.5744, "step": 17646 }, { "epoch": 0.5152258328223993, "grad_norm": 0.5889185440734293, "learning_rate": 1.0773073803730739e-05, "loss": 0.5659, "step": 17647 }, { "epoch": 0.5152550290502467, "grad_norm": 0.6413461327523969, "learning_rate": 1.077242497972425e-05, "loss": 0.5786, "step": 17648 }, { "epoch": 0.515284225278094, "grad_norm": 0.6463675158339741, "learning_rate": 1.0771776155717761e-05, "loss": 0.6526, "step": 17649 }, { "epoch": 0.5153134215059414, "grad_norm": 0.6646055118889198, "learning_rate": 1.0771127331711275e-05, "loss": 0.6313, "step": 17650 }, { "epoch": 0.5153426177337888, "grad_norm": 0.6503324413348914, "learning_rate": 1.0770478507704787e-05, "loss": 0.6246, "step": 17651 }, { "epoch": 0.5153718139616361, "grad_norm": 0.6132470757410448, "learning_rate": 1.0769829683698299e-05, "loss": 0.5732, "step": 17652 }, { "epoch": 0.5154010101894835, "grad_norm": 0.5845246917289675, "learning_rate": 1.076918085969181e-05, "loss": 0.5138, "step": 17653 }, { "epoch": 0.5154302064173308, "grad_norm": 0.6541379855581142, "learning_rate": 1.0768532035685321e-05, "loss": 0.6465, "step": 17654 }, { "epoch": 0.5154594026451782, "grad_norm": 0.7484750028065484, "learning_rate": 1.0767883211678833e-05, "loss": 0.5585, "step": 17655 }, { "epoch": 0.5154885988730256, "grad_norm": 0.6890033278173429, "learning_rate": 1.0767234387672345e-05, "loss": 0.6648, "step": 17656 }, { "epoch": 0.5155177951008729, "grad_norm": 0.6604732927221966, "learning_rate": 1.0766585563665857e-05, "loss": 0.6373, "step": 17657 }, { "epoch": 0.5155469913287203, "grad_norm": 0.6408977650015557, "learning_rate": 1.0765936739659368e-05, "loss": 0.634, "step": 17658 }, { "epoch": 0.5155761875565676, "grad_norm": 0.6493143891336907, "learning_rate": 1.076528791565288e-05, "loss": 0.606, "step": 17659 }, { "epoch": 0.515605383784415, "grad_norm": 0.6854878037643803, "learning_rate": 1.0764639091646392e-05, "loss": 0.7023, "step": 17660 }, { "epoch": 0.5156345800122624, "grad_norm": 0.6219207289814848, "learning_rate": 1.0763990267639904e-05, "loss": 0.5706, "step": 17661 }, { "epoch": 0.5156637762401097, "grad_norm": 0.6391637571534172, "learning_rate": 1.0763341443633414e-05, "loss": 0.6434, "step": 17662 }, { "epoch": 0.5156929724679571, "grad_norm": 0.614943927013015, "learning_rate": 1.0762692619626926e-05, "loss": 0.5796, "step": 17663 }, { "epoch": 0.5157221686958045, "grad_norm": 0.7116440678439163, "learning_rate": 1.0762043795620438e-05, "loss": 0.6503, "step": 17664 }, { "epoch": 0.5157513649236518, "grad_norm": 0.6628063026157662, "learning_rate": 1.076139497161395e-05, "loss": 0.6156, "step": 17665 }, { "epoch": 0.5157805611514992, "grad_norm": 0.6251800847330048, "learning_rate": 1.0760746147607462e-05, "loss": 0.5889, "step": 17666 }, { "epoch": 0.5158097573793465, "grad_norm": 0.6556635988065286, "learning_rate": 1.0760097323600973e-05, "loss": 0.611, "step": 17667 }, { "epoch": 0.5158389536071939, "grad_norm": 0.6598042016455476, "learning_rate": 1.0759448499594485e-05, "loss": 0.7137, "step": 17668 }, { "epoch": 0.5158681498350414, "grad_norm": 0.636352376878044, "learning_rate": 1.0758799675587998e-05, "loss": 0.5774, "step": 17669 }, { "epoch": 0.5158973460628887, "grad_norm": 0.6297201346315439, "learning_rate": 1.075815085158151e-05, "loss": 0.6063, "step": 17670 }, { "epoch": 0.5159265422907361, "grad_norm": 0.6402803079533658, "learning_rate": 1.0757502027575022e-05, "loss": 0.5718, "step": 17671 }, { "epoch": 0.5159557385185835, "grad_norm": 0.6290159185311401, "learning_rate": 1.0756853203568533e-05, "loss": 0.5296, "step": 17672 }, { "epoch": 0.5159849347464308, "grad_norm": 0.711812185201805, "learning_rate": 1.0756204379562045e-05, "loss": 0.6983, "step": 17673 }, { "epoch": 0.5160141309742782, "grad_norm": 0.657888238010829, "learning_rate": 1.0755555555555557e-05, "loss": 0.6625, "step": 17674 }, { "epoch": 0.5160433272021255, "grad_norm": 0.6671360165544902, "learning_rate": 1.0754906731549069e-05, "loss": 0.6746, "step": 17675 }, { "epoch": 0.5160725234299729, "grad_norm": 0.5851204842695956, "learning_rate": 1.0754257907542581e-05, "loss": 0.5613, "step": 17676 }, { "epoch": 0.5161017196578203, "grad_norm": 0.6222463064415996, "learning_rate": 1.0753609083536091e-05, "loss": 0.5489, "step": 17677 }, { "epoch": 0.5161309158856676, "grad_norm": 0.6277041063001836, "learning_rate": 1.0752960259529603e-05, "loss": 0.5792, "step": 17678 }, { "epoch": 0.516160112113515, "grad_norm": 0.6914454655757228, "learning_rate": 1.0752311435523115e-05, "loss": 0.6913, "step": 17679 }, { "epoch": 0.5161893083413623, "grad_norm": 0.687583421589214, "learning_rate": 1.0751662611516627e-05, "loss": 0.6753, "step": 17680 }, { "epoch": 0.5162185045692097, "grad_norm": 0.6109109762103753, "learning_rate": 1.0751013787510138e-05, "loss": 0.5438, "step": 17681 }, { "epoch": 0.5162477007970571, "grad_norm": 0.5882717809699843, "learning_rate": 1.075036496350365e-05, "loss": 0.5722, "step": 17682 }, { "epoch": 0.5162768970249044, "grad_norm": 0.6100474454714218, "learning_rate": 1.0749716139497162e-05, "loss": 0.5638, "step": 17683 }, { "epoch": 0.5163060932527518, "grad_norm": 0.6309748816268904, "learning_rate": 1.0749067315490674e-05, "loss": 0.5811, "step": 17684 }, { "epoch": 0.5163352894805991, "grad_norm": 0.6215693710856192, "learning_rate": 1.0748418491484186e-05, "loss": 0.6275, "step": 17685 }, { "epoch": 0.5163644857084465, "grad_norm": 0.6438144141103495, "learning_rate": 1.0747769667477696e-05, "loss": 0.6091, "step": 17686 }, { "epoch": 0.5163936819362939, "grad_norm": 0.6359282841558351, "learning_rate": 1.0747120843471208e-05, "loss": 0.6282, "step": 17687 }, { "epoch": 0.5164228781641412, "grad_norm": 0.6905781774553196, "learning_rate": 1.0746472019464722e-05, "loss": 0.6552, "step": 17688 }, { "epoch": 0.5164520743919886, "grad_norm": 0.6237441312835066, "learning_rate": 1.0745823195458234e-05, "loss": 0.5651, "step": 17689 }, { "epoch": 0.516481270619836, "grad_norm": 0.6454522511992995, "learning_rate": 1.0745174371451746e-05, "loss": 0.6218, "step": 17690 }, { "epoch": 0.5165104668476833, "grad_norm": 0.687283288873413, "learning_rate": 1.0744525547445256e-05, "loss": 0.6586, "step": 17691 }, { "epoch": 0.5165396630755307, "grad_norm": 0.5556967666412014, "learning_rate": 1.0743876723438768e-05, "loss": 0.4861, "step": 17692 }, { "epoch": 0.516568859303378, "grad_norm": 0.6254021381234737, "learning_rate": 1.074322789943228e-05, "loss": 0.5762, "step": 17693 }, { "epoch": 0.5165980555312254, "grad_norm": 0.6074603183554087, "learning_rate": 1.0742579075425792e-05, "loss": 0.5729, "step": 17694 }, { "epoch": 0.5166272517590728, "grad_norm": 0.6407772806430593, "learning_rate": 1.0741930251419304e-05, "loss": 0.6279, "step": 17695 }, { "epoch": 0.5166564479869201, "grad_norm": 0.660438978612943, "learning_rate": 1.0741281427412815e-05, "loss": 0.6149, "step": 17696 }, { "epoch": 0.5166856442147675, "grad_norm": 0.7038942898203595, "learning_rate": 1.0740632603406327e-05, "loss": 0.6788, "step": 17697 }, { "epoch": 0.5167148404426148, "grad_norm": 0.6417375653350682, "learning_rate": 1.0739983779399839e-05, "loss": 0.6133, "step": 17698 }, { "epoch": 0.5167440366704622, "grad_norm": 0.643387400619136, "learning_rate": 1.073933495539335e-05, "loss": 0.5984, "step": 17699 }, { "epoch": 0.5167732328983096, "grad_norm": 0.6857552541554528, "learning_rate": 1.0738686131386861e-05, "loss": 0.6991, "step": 17700 }, { "epoch": 0.5168024291261569, "grad_norm": 0.678736608739751, "learning_rate": 1.0738037307380373e-05, "loss": 0.6807, "step": 17701 }, { "epoch": 0.5168316253540043, "grad_norm": 0.6108673693352173, "learning_rate": 1.0737388483373885e-05, "loss": 0.5946, "step": 17702 }, { "epoch": 0.5168608215818516, "grad_norm": 0.6534107810483462, "learning_rate": 1.0736739659367397e-05, "loss": 0.6152, "step": 17703 }, { "epoch": 0.516890017809699, "grad_norm": 0.7386393865622282, "learning_rate": 1.0736090835360908e-05, "loss": 0.6926, "step": 17704 }, { "epoch": 0.5169192140375464, "grad_norm": 0.6797372533530389, "learning_rate": 1.073544201135442e-05, "loss": 0.6919, "step": 17705 }, { "epoch": 0.5169484102653937, "grad_norm": 0.6110056799803472, "learning_rate": 1.0734793187347933e-05, "loss": 0.5903, "step": 17706 }, { "epoch": 0.5169776064932411, "grad_norm": 0.6213717079823292, "learning_rate": 1.0734144363341445e-05, "loss": 0.6026, "step": 17707 }, { "epoch": 0.5170068027210885, "grad_norm": 0.6413746407036046, "learning_rate": 1.0733495539334957e-05, "loss": 0.6042, "step": 17708 }, { "epoch": 0.5170359989489358, "grad_norm": 0.6229274339089845, "learning_rate": 1.073284671532847e-05, "loss": 0.5849, "step": 17709 }, { "epoch": 0.5170651951767832, "grad_norm": 0.6619895264462973, "learning_rate": 1.073219789132198e-05, "loss": 0.6343, "step": 17710 }, { "epoch": 0.5170943914046305, "grad_norm": 0.6475737021223062, "learning_rate": 1.0731549067315492e-05, "loss": 0.5821, "step": 17711 }, { "epoch": 0.5171235876324779, "grad_norm": 0.6525800944357204, "learning_rate": 1.0730900243309004e-05, "loss": 0.5594, "step": 17712 }, { "epoch": 0.5171527838603253, "grad_norm": 0.6833806866627483, "learning_rate": 1.0730251419302516e-05, "loss": 0.6801, "step": 17713 }, { "epoch": 0.5171819800881726, "grad_norm": 0.5914044728682514, "learning_rate": 1.0729602595296028e-05, "loss": 0.5556, "step": 17714 }, { "epoch": 0.51721117631602, "grad_norm": 0.651782952830442, "learning_rate": 1.0728953771289538e-05, "loss": 0.6189, "step": 17715 }, { "epoch": 0.5172403725438673, "grad_norm": 0.6428474650285205, "learning_rate": 1.072830494728305e-05, "loss": 0.5967, "step": 17716 }, { "epoch": 0.5172695687717147, "grad_norm": 0.6439656977059812, "learning_rate": 1.0727656123276562e-05, "loss": 0.6173, "step": 17717 }, { "epoch": 0.5172987649995621, "grad_norm": 0.6696856475058253, "learning_rate": 1.0727007299270074e-05, "loss": 0.7074, "step": 17718 }, { "epoch": 0.5173279612274094, "grad_norm": 0.6603938271335417, "learning_rate": 1.0726358475263585e-05, "loss": 0.6604, "step": 17719 }, { "epoch": 0.5173571574552568, "grad_norm": 0.6198532690837943, "learning_rate": 1.0725709651257097e-05, "loss": 0.6084, "step": 17720 }, { "epoch": 0.5173863536831041, "grad_norm": 0.6225256946017146, "learning_rate": 1.0725060827250609e-05, "loss": 0.5503, "step": 17721 }, { "epoch": 0.5174155499109515, "grad_norm": 0.6348050580571953, "learning_rate": 1.072441200324412e-05, "loss": 0.578, "step": 17722 }, { "epoch": 0.5174447461387989, "grad_norm": 0.6438301562674873, "learning_rate": 1.0723763179237631e-05, "loss": 0.5824, "step": 17723 }, { "epoch": 0.5174739423666462, "grad_norm": 0.5971661104262114, "learning_rate": 1.0723114355231143e-05, "loss": 0.5637, "step": 17724 }, { "epoch": 0.5175031385944936, "grad_norm": 0.6492283325786227, "learning_rate": 1.0722465531224657e-05, "loss": 0.6106, "step": 17725 }, { "epoch": 0.517532334822341, "grad_norm": 0.6867889729521752, "learning_rate": 1.0721816707218169e-05, "loss": 0.5929, "step": 17726 }, { "epoch": 0.5175615310501883, "grad_norm": 0.6211477975615121, "learning_rate": 1.072116788321168e-05, "loss": 0.5607, "step": 17727 }, { "epoch": 0.5175907272780357, "grad_norm": 0.7023648034957806, "learning_rate": 1.0720519059205193e-05, "loss": 0.7071, "step": 17728 }, { "epoch": 0.517619923505883, "grad_norm": 0.6659943885442093, "learning_rate": 1.0719870235198703e-05, "loss": 0.6476, "step": 17729 }, { "epoch": 0.5176491197337304, "grad_norm": 0.6752173493143332, "learning_rate": 1.0719221411192215e-05, "loss": 0.6031, "step": 17730 }, { "epoch": 0.5176783159615778, "grad_norm": 0.6217510017764665, "learning_rate": 1.0718572587185727e-05, "loss": 0.5674, "step": 17731 }, { "epoch": 0.5177075121894251, "grad_norm": 0.6572329148088988, "learning_rate": 1.071792376317924e-05, "loss": 0.6346, "step": 17732 }, { "epoch": 0.5177367084172725, "grad_norm": 0.6331776395586485, "learning_rate": 1.0717274939172751e-05, "loss": 0.5533, "step": 17733 }, { "epoch": 0.5177659046451198, "grad_norm": 0.646915442915002, "learning_rate": 1.0716626115166262e-05, "loss": 0.6392, "step": 17734 }, { "epoch": 0.5177951008729672, "grad_norm": 0.6251643903423454, "learning_rate": 1.0715977291159774e-05, "loss": 0.605, "step": 17735 }, { "epoch": 0.5178242971008146, "grad_norm": 0.677664703534844, "learning_rate": 1.0715328467153286e-05, "loss": 0.6843, "step": 17736 }, { "epoch": 0.5178534933286619, "grad_norm": 0.6200440873942071, "learning_rate": 1.0714679643146798e-05, "loss": 0.5845, "step": 17737 }, { "epoch": 0.5178826895565093, "grad_norm": 0.6582801982443661, "learning_rate": 1.0714030819140308e-05, "loss": 0.6449, "step": 17738 }, { "epoch": 0.5179118857843567, "grad_norm": 0.6843383050770546, "learning_rate": 1.071338199513382e-05, "loss": 0.645, "step": 17739 }, { "epoch": 0.517941082012204, "grad_norm": 0.6556336153332509, "learning_rate": 1.0712733171127332e-05, "loss": 0.6505, "step": 17740 }, { "epoch": 0.5179702782400514, "grad_norm": 0.652793479409919, "learning_rate": 1.0712084347120844e-05, "loss": 0.6664, "step": 17741 }, { "epoch": 0.5179994744678987, "grad_norm": 0.6318262152835343, "learning_rate": 1.0711435523114354e-05, "loss": 0.6456, "step": 17742 }, { "epoch": 0.5180286706957461, "grad_norm": 0.6130417281726205, "learning_rate": 1.0710786699107866e-05, "loss": 0.5548, "step": 17743 }, { "epoch": 0.5180578669235935, "grad_norm": 0.6712911815646408, "learning_rate": 1.071013787510138e-05, "loss": 0.6377, "step": 17744 }, { "epoch": 0.5180870631514408, "grad_norm": 0.5896741163608288, "learning_rate": 1.0709489051094892e-05, "loss": 0.5486, "step": 17745 }, { "epoch": 0.5181162593792882, "grad_norm": 0.649654166104813, "learning_rate": 1.0708840227088404e-05, "loss": 0.6286, "step": 17746 }, { "epoch": 0.5181454556071355, "grad_norm": 0.6863966094225193, "learning_rate": 1.0708191403081916e-05, "loss": 0.6125, "step": 17747 }, { "epoch": 0.5181746518349829, "grad_norm": 0.7065022317159423, "learning_rate": 1.0707542579075427e-05, "loss": 0.6801, "step": 17748 }, { "epoch": 0.5182038480628303, "grad_norm": 0.6491052320820311, "learning_rate": 1.0706893755068939e-05, "loss": 0.5929, "step": 17749 }, { "epoch": 0.5182330442906776, "grad_norm": 0.6079615330113776, "learning_rate": 1.070624493106245e-05, "loss": 0.4933, "step": 17750 }, { "epoch": 0.518262240518525, "grad_norm": 0.6497798332060044, "learning_rate": 1.0705596107055963e-05, "loss": 0.6297, "step": 17751 }, { "epoch": 0.5182914367463723, "grad_norm": 0.6333857023697932, "learning_rate": 1.0704947283049475e-05, "loss": 0.5889, "step": 17752 }, { "epoch": 0.5183206329742197, "grad_norm": 0.6493282667604523, "learning_rate": 1.0704298459042985e-05, "loss": 0.5804, "step": 17753 }, { "epoch": 0.5183498292020671, "grad_norm": 0.661259003837306, "learning_rate": 1.0703649635036497e-05, "loss": 0.7086, "step": 17754 }, { "epoch": 0.5183790254299144, "grad_norm": 0.6611286402336993, "learning_rate": 1.0703000811030009e-05, "loss": 0.6763, "step": 17755 }, { "epoch": 0.5184082216577618, "grad_norm": 0.6517110972252742, "learning_rate": 1.0702351987023521e-05, "loss": 0.6098, "step": 17756 }, { "epoch": 0.5184374178856092, "grad_norm": 0.6576219587760223, "learning_rate": 1.0701703163017031e-05, "loss": 0.6161, "step": 17757 }, { "epoch": 0.5184666141134565, "grad_norm": 0.6612852264506941, "learning_rate": 1.0701054339010543e-05, "loss": 0.6498, "step": 17758 }, { "epoch": 0.5184958103413039, "grad_norm": 0.6412367027172649, "learning_rate": 1.0700405515004055e-05, "loss": 0.6178, "step": 17759 }, { "epoch": 0.5185250065691512, "grad_norm": 0.6759843885316842, "learning_rate": 1.0699756690997567e-05, "loss": 0.671, "step": 17760 }, { "epoch": 0.5185542027969986, "grad_norm": 0.6364339817186143, "learning_rate": 1.0699107866991078e-05, "loss": 0.6108, "step": 17761 }, { "epoch": 0.518583399024846, "grad_norm": 0.7510716469879254, "learning_rate": 1.069845904298459e-05, "loss": 0.6036, "step": 17762 }, { "epoch": 0.5186125952526933, "grad_norm": 0.648027403520627, "learning_rate": 1.0697810218978104e-05, "loss": 0.6534, "step": 17763 }, { "epoch": 0.5186417914805407, "grad_norm": 0.6422761120360345, "learning_rate": 1.0697161394971616e-05, "loss": 0.6168, "step": 17764 }, { "epoch": 0.518670987708388, "grad_norm": 0.6093547076684909, "learning_rate": 1.0696512570965128e-05, "loss": 0.5612, "step": 17765 }, { "epoch": 0.5187001839362354, "grad_norm": 0.7020758582795812, "learning_rate": 1.069586374695864e-05, "loss": 0.7398, "step": 17766 }, { "epoch": 0.5187293801640828, "grad_norm": 0.6799268660773344, "learning_rate": 1.069521492295215e-05, "loss": 0.4928, "step": 17767 }, { "epoch": 0.5187585763919301, "grad_norm": 0.6690028639743287, "learning_rate": 1.0694566098945662e-05, "loss": 0.6763, "step": 17768 }, { "epoch": 0.5187877726197775, "grad_norm": 0.6495895402139783, "learning_rate": 1.0693917274939174e-05, "loss": 0.6179, "step": 17769 }, { "epoch": 0.5188169688476248, "grad_norm": 0.6556035082216104, "learning_rate": 1.0693268450932686e-05, "loss": 0.6588, "step": 17770 }, { "epoch": 0.5188461650754722, "grad_norm": 0.7465800368782838, "learning_rate": 1.0692619626926198e-05, "loss": 0.6563, "step": 17771 }, { "epoch": 0.5188753613033196, "grad_norm": 0.7019720404113461, "learning_rate": 1.0691970802919708e-05, "loss": 0.7053, "step": 17772 }, { "epoch": 0.5189045575311669, "grad_norm": 0.6397696812749051, "learning_rate": 1.069132197891322e-05, "loss": 0.5998, "step": 17773 }, { "epoch": 0.5189337537590143, "grad_norm": 0.6361126586151978, "learning_rate": 1.0690673154906732e-05, "loss": 0.599, "step": 17774 }, { "epoch": 0.5189629499868617, "grad_norm": 0.6891759192836312, "learning_rate": 1.0690024330900244e-05, "loss": 0.6731, "step": 17775 }, { "epoch": 0.518992146214709, "grad_norm": 0.6518996564316422, "learning_rate": 1.0689375506893755e-05, "loss": 0.6274, "step": 17776 }, { "epoch": 0.5190213424425564, "grad_norm": 0.6934199124179655, "learning_rate": 1.0688726682887267e-05, "loss": 0.6997, "step": 17777 }, { "epoch": 0.5190505386704037, "grad_norm": 0.691340574200125, "learning_rate": 1.0688077858880779e-05, "loss": 0.6349, "step": 17778 }, { "epoch": 0.5190797348982511, "grad_norm": 0.6384826057471124, "learning_rate": 1.0687429034874291e-05, "loss": 0.6127, "step": 17779 }, { "epoch": 0.5191089311260985, "grad_norm": 0.6009973853501804, "learning_rate": 1.0686780210867801e-05, "loss": 0.5432, "step": 17780 }, { "epoch": 0.5191381273539458, "grad_norm": 0.685978926992561, "learning_rate": 1.0686131386861313e-05, "loss": 0.6406, "step": 17781 }, { "epoch": 0.5191673235817932, "grad_norm": 0.6511469643824351, "learning_rate": 1.0685482562854827e-05, "loss": 0.6197, "step": 17782 }, { "epoch": 0.5191965198096405, "grad_norm": 0.6108180139677004, "learning_rate": 1.0684833738848339e-05, "loss": 0.5497, "step": 17783 }, { "epoch": 0.5192257160374879, "grad_norm": 0.6261719939580968, "learning_rate": 1.0684184914841851e-05, "loss": 0.5483, "step": 17784 }, { "epoch": 0.5192549122653353, "grad_norm": 0.6560156009098037, "learning_rate": 1.0683536090835363e-05, "loss": 0.6224, "step": 17785 }, { "epoch": 0.5192841084931826, "grad_norm": 0.6807900506519459, "learning_rate": 1.0682887266828873e-05, "loss": 0.678, "step": 17786 }, { "epoch": 0.51931330472103, "grad_norm": 0.6573255077756984, "learning_rate": 1.0682238442822385e-05, "loss": 0.6102, "step": 17787 }, { "epoch": 0.5193425009488774, "grad_norm": 0.6972780196384144, "learning_rate": 1.0681589618815897e-05, "loss": 0.7626, "step": 17788 }, { "epoch": 0.5193716971767248, "grad_norm": 0.624507263944407, "learning_rate": 1.068094079480941e-05, "loss": 0.6103, "step": 17789 }, { "epoch": 0.5194008934045722, "grad_norm": 0.5878359571794799, "learning_rate": 1.0680291970802922e-05, "loss": 0.5538, "step": 17790 }, { "epoch": 0.5194300896324195, "grad_norm": 0.6893176355209071, "learning_rate": 1.0679643146796432e-05, "loss": 0.7189, "step": 17791 }, { "epoch": 0.5194592858602669, "grad_norm": 0.6398375490453645, "learning_rate": 1.0678994322789944e-05, "loss": 0.643, "step": 17792 }, { "epoch": 0.5194884820881143, "grad_norm": 0.6587260284349162, "learning_rate": 1.0678345498783456e-05, "loss": 0.6027, "step": 17793 }, { "epoch": 0.5195176783159616, "grad_norm": 0.7316169285760616, "learning_rate": 1.0677696674776968e-05, "loss": 0.7307, "step": 17794 }, { "epoch": 0.519546874543809, "grad_norm": 0.6691500514240755, "learning_rate": 1.0677047850770478e-05, "loss": 0.6785, "step": 17795 }, { "epoch": 0.5195760707716564, "grad_norm": 0.6564765174073188, "learning_rate": 1.067639902676399e-05, "loss": 0.6613, "step": 17796 }, { "epoch": 0.5196052669995037, "grad_norm": 0.7144732522654312, "learning_rate": 1.0675750202757502e-05, "loss": 0.7067, "step": 17797 }, { "epoch": 0.5196344632273511, "grad_norm": 0.6607441046467358, "learning_rate": 1.0675101378751014e-05, "loss": 0.6264, "step": 17798 }, { "epoch": 0.5196636594551984, "grad_norm": 0.6308069161823149, "learning_rate": 1.0674452554744525e-05, "loss": 0.5785, "step": 17799 }, { "epoch": 0.5196928556830458, "grad_norm": 0.6690596330331761, "learning_rate": 1.0673803730738037e-05, "loss": 0.6061, "step": 17800 }, { "epoch": 0.5197220519108932, "grad_norm": 0.7335911074113618, "learning_rate": 1.067315490673155e-05, "loss": 0.6236, "step": 17801 }, { "epoch": 0.5197512481387405, "grad_norm": 0.6855459158148013, "learning_rate": 1.0672506082725062e-05, "loss": 0.6424, "step": 17802 }, { "epoch": 0.5197804443665879, "grad_norm": 0.6725031286965232, "learning_rate": 1.0671857258718574e-05, "loss": 0.6492, "step": 17803 }, { "epoch": 0.5198096405944352, "grad_norm": 0.6296225397871437, "learning_rate": 1.0671208434712087e-05, "loss": 0.5767, "step": 17804 }, { "epoch": 0.5198388368222826, "grad_norm": 0.7218797411239481, "learning_rate": 1.0670559610705597e-05, "loss": 0.6952, "step": 17805 }, { "epoch": 0.51986803305013, "grad_norm": 0.6405873714980983, "learning_rate": 1.0669910786699109e-05, "loss": 0.6209, "step": 17806 }, { "epoch": 0.5198972292779773, "grad_norm": 0.6423556085833485, "learning_rate": 1.0669261962692621e-05, "loss": 0.611, "step": 17807 }, { "epoch": 0.5199264255058247, "grad_norm": 0.6378102983046117, "learning_rate": 1.0668613138686133e-05, "loss": 0.5707, "step": 17808 }, { "epoch": 0.519955621733672, "grad_norm": 0.6451486459794459, "learning_rate": 1.0667964314679645e-05, "loss": 0.5943, "step": 17809 }, { "epoch": 0.5199848179615194, "grad_norm": 0.6387891012364749, "learning_rate": 1.0667315490673155e-05, "loss": 0.5912, "step": 17810 }, { "epoch": 0.5200140141893668, "grad_norm": 0.6296449641950329, "learning_rate": 1.0666666666666667e-05, "loss": 0.5349, "step": 17811 }, { "epoch": 0.5200432104172141, "grad_norm": 0.6607495354792244, "learning_rate": 1.066601784266018e-05, "loss": 0.6097, "step": 17812 }, { "epoch": 0.5200724066450615, "grad_norm": 0.6880716297087702, "learning_rate": 1.0665369018653691e-05, "loss": 0.6422, "step": 17813 }, { "epoch": 0.5201016028729089, "grad_norm": 0.6706869483918649, "learning_rate": 1.0664720194647202e-05, "loss": 0.6456, "step": 17814 }, { "epoch": 0.5201307991007562, "grad_norm": 0.6492110224166875, "learning_rate": 1.0664071370640714e-05, "loss": 0.6347, "step": 17815 }, { "epoch": 0.5201599953286036, "grad_norm": 0.6409830827504791, "learning_rate": 1.0663422546634226e-05, "loss": 0.5981, "step": 17816 }, { "epoch": 0.5201891915564509, "grad_norm": 0.7005753979513022, "learning_rate": 1.0662773722627738e-05, "loss": 0.5997, "step": 17817 }, { "epoch": 0.5202183877842983, "grad_norm": 0.7179602572475244, "learning_rate": 1.0662124898621248e-05, "loss": 0.6707, "step": 17818 }, { "epoch": 0.5202475840121457, "grad_norm": 0.6368535123152371, "learning_rate": 1.066147607461476e-05, "loss": 0.6486, "step": 17819 }, { "epoch": 0.520276780239993, "grad_norm": 0.7136842656861757, "learning_rate": 1.0660827250608274e-05, "loss": 0.6644, "step": 17820 }, { "epoch": 0.5203059764678404, "grad_norm": 0.6138984925074326, "learning_rate": 1.0660178426601786e-05, "loss": 0.5856, "step": 17821 }, { "epoch": 0.5203351726956877, "grad_norm": 0.6555282713894788, "learning_rate": 1.0659529602595298e-05, "loss": 0.6331, "step": 17822 }, { "epoch": 0.5203643689235351, "grad_norm": 0.6417357864202801, "learning_rate": 1.065888077858881e-05, "loss": 0.6155, "step": 17823 }, { "epoch": 0.5203935651513825, "grad_norm": 0.6283165124136164, "learning_rate": 1.065823195458232e-05, "loss": 0.6124, "step": 17824 }, { "epoch": 0.5204227613792298, "grad_norm": 0.617228878988605, "learning_rate": 1.0657583130575832e-05, "loss": 0.5618, "step": 17825 }, { "epoch": 0.5204519576070772, "grad_norm": 0.6955860195186582, "learning_rate": 1.0656934306569344e-05, "loss": 0.6763, "step": 17826 }, { "epoch": 0.5204811538349245, "grad_norm": 0.6763764584732455, "learning_rate": 1.0656285482562856e-05, "loss": 0.6306, "step": 17827 }, { "epoch": 0.5205103500627719, "grad_norm": 0.6664360555337483, "learning_rate": 1.0655636658556367e-05, "loss": 0.6799, "step": 17828 }, { "epoch": 0.5205395462906193, "grad_norm": 0.66027178045762, "learning_rate": 1.0654987834549879e-05, "loss": 0.6446, "step": 17829 }, { "epoch": 0.5205687425184666, "grad_norm": 0.5925477267102947, "learning_rate": 1.065433901054339e-05, "loss": 0.5284, "step": 17830 }, { "epoch": 0.520597938746314, "grad_norm": 0.6278155300478818, "learning_rate": 1.0653690186536903e-05, "loss": 0.6595, "step": 17831 }, { "epoch": 0.5206271349741614, "grad_norm": 0.6483606385652314, "learning_rate": 1.0653041362530415e-05, "loss": 0.6527, "step": 17832 }, { "epoch": 0.5206563312020087, "grad_norm": 0.6272660277988061, "learning_rate": 1.0652392538523925e-05, "loss": 0.5461, "step": 17833 }, { "epoch": 0.5206855274298561, "grad_norm": 0.6369106916725881, "learning_rate": 1.0651743714517437e-05, "loss": 0.6085, "step": 17834 }, { "epoch": 0.5207147236577034, "grad_norm": 0.6928343627477396, "learning_rate": 1.065109489051095e-05, "loss": 0.6658, "step": 17835 }, { "epoch": 0.5207439198855508, "grad_norm": 0.6566225061636166, "learning_rate": 1.0650446066504461e-05, "loss": 0.605, "step": 17836 }, { "epoch": 0.5207731161133982, "grad_norm": 0.6926507458742521, "learning_rate": 1.0649797242497972e-05, "loss": 0.6613, "step": 17837 }, { "epoch": 0.5208023123412455, "grad_norm": 0.6767202279747841, "learning_rate": 1.0649148418491484e-05, "loss": 0.663, "step": 17838 }, { "epoch": 0.5208315085690929, "grad_norm": 0.6157221197704187, "learning_rate": 1.0648499594484997e-05, "loss": 0.5782, "step": 17839 }, { "epoch": 0.5208607047969402, "grad_norm": 0.6192011406526662, "learning_rate": 1.064785077047851e-05, "loss": 0.6281, "step": 17840 }, { "epoch": 0.5208899010247876, "grad_norm": 0.6051736963804439, "learning_rate": 1.0647201946472021e-05, "loss": 0.5694, "step": 17841 }, { "epoch": 0.520919097252635, "grad_norm": 0.664720943589872, "learning_rate": 1.0646553122465533e-05, "loss": 0.6034, "step": 17842 }, { "epoch": 0.5209482934804823, "grad_norm": 0.6181840516578234, "learning_rate": 1.0645904298459044e-05, "loss": 0.589, "step": 17843 }, { "epoch": 0.5209774897083297, "grad_norm": 0.6894509106263247, "learning_rate": 1.0645255474452556e-05, "loss": 0.7191, "step": 17844 }, { "epoch": 0.521006685936177, "grad_norm": 0.6272122478010647, "learning_rate": 1.0644606650446068e-05, "loss": 0.6263, "step": 17845 }, { "epoch": 0.5210358821640244, "grad_norm": 0.6460270420903266, "learning_rate": 1.064395782643958e-05, "loss": 0.6369, "step": 17846 }, { "epoch": 0.5210650783918718, "grad_norm": 0.6516063264506426, "learning_rate": 1.064330900243309e-05, "loss": 0.6304, "step": 17847 }, { "epoch": 0.5210942746197191, "grad_norm": 0.6279703639464298, "learning_rate": 1.0642660178426602e-05, "loss": 0.5641, "step": 17848 }, { "epoch": 0.5211234708475665, "grad_norm": 0.6473638572445641, "learning_rate": 1.0642011354420114e-05, "loss": 0.6304, "step": 17849 }, { "epoch": 0.5211526670754139, "grad_norm": 0.6758790651766085, "learning_rate": 1.0641362530413626e-05, "loss": 0.5799, "step": 17850 }, { "epoch": 0.5211818633032612, "grad_norm": 0.6026542333907898, "learning_rate": 1.0640713706407138e-05, "loss": 0.5718, "step": 17851 }, { "epoch": 0.5212110595311086, "grad_norm": 0.628085792351384, "learning_rate": 1.0640064882400649e-05, "loss": 0.545, "step": 17852 }, { "epoch": 0.5212402557589559, "grad_norm": 0.6184075570490826, "learning_rate": 1.063941605839416e-05, "loss": 0.5672, "step": 17853 }, { "epoch": 0.5212694519868033, "grad_norm": 0.6538680590083589, "learning_rate": 1.0638767234387673e-05, "loss": 0.6468, "step": 17854 }, { "epoch": 0.5212986482146507, "grad_norm": 0.6168417364211254, "learning_rate": 1.0638118410381185e-05, "loss": 0.6186, "step": 17855 }, { "epoch": 0.521327844442498, "grad_norm": 0.6150263176156547, "learning_rate": 1.0637469586374695e-05, "loss": 0.5522, "step": 17856 }, { "epoch": 0.5213570406703454, "grad_norm": 0.6574955719089438, "learning_rate": 1.063682076236821e-05, "loss": 0.5949, "step": 17857 }, { "epoch": 0.5213862368981927, "grad_norm": 0.6216350221349034, "learning_rate": 1.063617193836172e-05, "loss": 0.6038, "step": 17858 }, { "epoch": 0.5214154331260401, "grad_norm": 0.6666072861384926, "learning_rate": 1.0635523114355233e-05, "loss": 0.6645, "step": 17859 }, { "epoch": 0.5214446293538875, "grad_norm": 0.6714066081339548, "learning_rate": 1.0634874290348745e-05, "loss": 0.6683, "step": 17860 }, { "epoch": 0.5214738255817348, "grad_norm": 0.6924874715180791, "learning_rate": 1.0634225466342257e-05, "loss": 0.7068, "step": 17861 }, { "epoch": 0.5215030218095822, "grad_norm": 0.6749255842305377, "learning_rate": 1.0633576642335767e-05, "loss": 0.7007, "step": 17862 }, { "epoch": 0.5215322180374296, "grad_norm": 0.6045977373772674, "learning_rate": 1.063292781832928e-05, "loss": 0.5371, "step": 17863 }, { "epoch": 0.5215614142652769, "grad_norm": 0.6834321289225144, "learning_rate": 1.0632278994322791e-05, "loss": 0.6535, "step": 17864 }, { "epoch": 0.5215906104931243, "grad_norm": 0.652511123840416, "learning_rate": 1.0631630170316303e-05, "loss": 0.6611, "step": 17865 }, { "epoch": 0.5216198067209716, "grad_norm": 0.6585524517936557, "learning_rate": 1.0630981346309814e-05, "loss": 0.6452, "step": 17866 }, { "epoch": 0.521649002948819, "grad_norm": 0.6595262567607398, "learning_rate": 1.0630332522303326e-05, "loss": 0.6607, "step": 17867 }, { "epoch": 0.5216781991766664, "grad_norm": 0.5897773582937405, "learning_rate": 1.0629683698296838e-05, "loss": 0.5217, "step": 17868 }, { "epoch": 0.5217073954045137, "grad_norm": 0.6546376003113589, "learning_rate": 1.062903487429035e-05, "loss": 0.6503, "step": 17869 }, { "epoch": 0.5217365916323611, "grad_norm": 0.6229110069608372, "learning_rate": 1.0628386050283862e-05, "loss": 0.5822, "step": 17870 }, { "epoch": 0.5217657878602084, "grad_norm": 0.8193635473569092, "learning_rate": 1.0627737226277372e-05, "loss": 0.5802, "step": 17871 }, { "epoch": 0.5217949840880558, "grad_norm": 0.6956802094520319, "learning_rate": 1.0627088402270884e-05, "loss": 0.6749, "step": 17872 }, { "epoch": 0.5218241803159032, "grad_norm": 0.5815412004804335, "learning_rate": 1.0626439578264396e-05, "loss": 0.4945, "step": 17873 }, { "epoch": 0.5218533765437505, "grad_norm": 0.6601056908105446, "learning_rate": 1.0625790754257908e-05, "loss": 0.6651, "step": 17874 }, { "epoch": 0.5218825727715979, "grad_norm": 0.6895426268794531, "learning_rate": 1.0625141930251418e-05, "loss": 0.6829, "step": 17875 }, { "epoch": 0.5219117689994452, "grad_norm": 0.6294544890909988, "learning_rate": 1.0624493106244934e-05, "loss": 0.6206, "step": 17876 }, { "epoch": 0.5219409652272926, "grad_norm": 0.7252102011222672, "learning_rate": 1.0623844282238444e-05, "loss": 0.7839, "step": 17877 }, { "epoch": 0.52197016145514, "grad_norm": 0.5940500429594741, "learning_rate": 1.0623195458231956e-05, "loss": 0.5438, "step": 17878 }, { "epoch": 0.5219993576829873, "grad_norm": 0.6186692313766494, "learning_rate": 1.0622546634225468e-05, "loss": 0.6189, "step": 17879 }, { "epoch": 0.5220285539108347, "grad_norm": 0.6467407471449578, "learning_rate": 1.062189781021898e-05, "loss": 0.6079, "step": 17880 }, { "epoch": 0.522057750138682, "grad_norm": 0.648251721061504, "learning_rate": 1.062124898621249e-05, "loss": 0.5967, "step": 17881 }, { "epoch": 0.5220869463665294, "grad_norm": 0.6993202154987129, "learning_rate": 1.0620600162206003e-05, "loss": 0.6458, "step": 17882 }, { "epoch": 0.5221161425943768, "grad_norm": 0.5998606291786779, "learning_rate": 1.0619951338199515e-05, "loss": 0.5669, "step": 17883 }, { "epoch": 0.5221453388222241, "grad_norm": 0.6035736922609358, "learning_rate": 1.0619302514193027e-05, "loss": 0.5666, "step": 17884 }, { "epoch": 0.5221745350500715, "grad_norm": 0.639581087851353, "learning_rate": 1.0618653690186537e-05, "loss": 0.6574, "step": 17885 }, { "epoch": 0.5222037312779189, "grad_norm": 0.6330750088888186, "learning_rate": 1.0618004866180049e-05, "loss": 0.563, "step": 17886 }, { "epoch": 0.5222329275057662, "grad_norm": 0.623409358343525, "learning_rate": 1.0617356042173561e-05, "loss": 0.5924, "step": 17887 }, { "epoch": 0.5222621237336136, "grad_norm": 0.6094803594811641, "learning_rate": 1.0616707218167073e-05, "loss": 0.5875, "step": 17888 }, { "epoch": 0.5222913199614609, "grad_norm": 0.6638996638127874, "learning_rate": 1.0616058394160585e-05, "loss": 0.655, "step": 17889 }, { "epoch": 0.5223205161893083, "grad_norm": 0.6410236222212055, "learning_rate": 1.0615409570154095e-05, "loss": 0.6023, "step": 17890 }, { "epoch": 0.5223497124171557, "grad_norm": 0.6447806729429744, "learning_rate": 1.0614760746147607e-05, "loss": 0.5943, "step": 17891 }, { "epoch": 0.522378908645003, "grad_norm": 0.6540491594054525, "learning_rate": 1.061411192214112e-05, "loss": 0.6315, "step": 17892 }, { "epoch": 0.5224081048728504, "grad_norm": 0.6416285719183353, "learning_rate": 1.0613463098134632e-05, "loss": 0.6207, "step": 17893 }, { "epoch": 0.5224373011006977, "grad_norm": 0.5805580129491394, "learning_rate": 1.0612814274128142e-05, "loss": 0.5376, "step": 17894 }, { "epoch": 0.5224664973285451, "grad_norm": 0.6793977768863044, "learning_rate": 1.0612165450121657e-05, "loss": 0.6658, "step": 17895 }, { "epoch": 0.5224956935563925, "grad_norm": 0.6284565898651777, "learning_rate": 1.0611516626115168e-05, "loss": 0.6005, "step": 17896 }, { "epoch": 0.5225248897842398, "grad_norm": 0.6808432966008969, "learning_rate": 1.061086780210868e-05, "loss": 0.6394, "step": 17897 }, { "epoch": 0.5225540860120872, "grad_norm": 0.6472656462685885, "learning_rate": 1.0610218978102192e-05, "loss": 0.6289, "step": 17898 }, { "epoch": 0.5225832822399346, "grad_norm": 0.7023699257708467, "learning_rate": 1.0609570154095704e-05, "loss": 0.664, "step": 17899 }, { "epoch": 0.5226124784677819, "grad_norm": 0.64775626910199, "learning_rate": 1.0608921330089214e-05, "loss": 0.6161, "step": 17900 }, { "epoch": 0.5226416746956293, "grad_norm": 0.6481379906049368, "learning_rate": 1.0608272506082726e-05, "loss": 0.5784, "step": 17901 }, { "epoch": 0.5226708709234766, "grad_norm": 0.6314428448927139, "learning_rate": 1.0607623682076238e-05, "loss": 0.6063, "step": 17902 }, { "epoch": 0.522700067151324, "grad_norm": 0.6087576194966128, "learning_rate": 1.060697485806975e-05, "loss": 0.5479, "step": 17903 }, { "epoch": 0.5227292633791714, "grad_norm": 0.6147291746937937, "learning_rate": 1.060632603406326e-05, "loss": 0.5904, "step": 17904 }, { "epoch": 0.5227584596070187, "grad_norm": 0.6425694953726072, "learning_rate": 1.0605677210056772e-05, "loss": 0.5928, "step": 17905 }, { "epoch": 0.5227876558348661, "grad_norm": 0.6420872398138843, "learning_rate": 1.0605028386050284e-05, "loss": 0.6039, "step": 17906 }, { "epoch": 0.5228168520627134, "grad_norm": 0.6509946086509978, "learning_rate": 1.0604379562043797e-05, "loss": 0.6244, "step": 17907 }, { "epoch": 0.5228460482905608, "grad_norm": 0.6783807800792311, "learning_rate": 1.0603730738037309e-05, "loss": 0.6445, "step": 17908 }, { "epoch": 0.5228752445184082, "grad_norm": 0.6167573123117673, "learning_rate": 1.0603081914030819e-05, "loss": 0.5826, "step": 17909 }, { "epoch": 0.5229044407462556, "grad_norm": 0.6629182967918027, "learning_rate": 1.0602433090024331e-05, "loss": 0.6576, "step": 17910 }, { "epoch": 0.522933636974103, "grad_norm": 0.6317424504976079, "learning_rate": 1.0601784266017843e-05, "loss": 0.5745, "step": 17911 }, { "epoch": 0.5229628332019504, "grad_norm": 0.611347032843755, "learning_rate": 1.0601135442011355e-05, "loss": 0.5545, "step": 17912 }, { "epoch": 0.5229920294297977, "grad_norm": 0.5801420615719051, "learning_rate": 1.0600486618004865e-05, "loss": 0.5066, "step": 17913 }, { "epoch": 0.5230212256576451, "grad_norm": 0.6397111761096101, "learning_rate": 1.059983779399838e-05, "loss": 0.6091, "step": 17914 }, { "epoch": 0.5230504218854924, "grad_norm": 0.7049928879648596, "learning_rate": 1.0599188969991891e-05, "loss": 0.7833, "step": 17915 }, { "epoch": 0.5230796181133398, "grad_norm": 0.6196533363442512, "learning_rate": 1.0598540145985403e-05, "loss": 0.6164, "step": 17916 }, { "epoch": 0.5231088143411872, "grad_norm": 0.6135335405540162, "learning_rate": 1.0597891321978915e-05, "loss": 0.5384, "step": 17917 }, { "epoch": 0.5231380105690345, "grad_norm": 0.6399959550423511, "learning_rate": 1.0597242497972427e-05, "loss": 0.5954, "step": 17918 }, { "epoch": 0.5231672067968819, "grad_norm": 0.6200230974894273, "learning_rate": 1.0596593673965937e-05, "loss": 0.5741, "step": 17919 }, { "epoch": 0.5231964030247293, "grad_norm": 0.701323638340158, "learning_rate": 1.059594484995945e-05, "loss": 0.6606, "step": 17920 }, { "epoch": 0.5232255992525766, "grad_norm": 0.6618144285837438, "learning_rate": 1.0595296025952962e-05, "loss": 0.6513, "step": 17921 }, { "epoch": 0.523254795480424, "grad_norm": 0.5879757858046778, "learning_rate": 1.0594647201946474e-05, "loss": 0.5205, "step": 17922 }, { "epoch": 0.5232839917082713, "grad_norm": 0.7256630584547779, "learning_rate": 1.0593998377939984e-05, "loss": 0.6587, "step": 17923 }, { "epoch": 0.5233131879361187, "grad_norm": 0.634609967407976, "learning_rate": 1.0593349553933496e-05, "loss": 0.591, "step": 17924 }, { "epoch": 0.5233423841639661, "grad_norm": 0.6449112225640282, "learning_rate": 1.0592700729927008e-05, "loss": 0.6273, "step": 17925 }, { "epoch": 0.5233715803918134, "grad_norm": 0.6268715690341208, "learning_rate": 1.059205190592052e-05, "loss": 0.5856, "step": 17926 }, { "epoch": 0.5234007766196608, "grad_norm": 0.6038225260900723, "learning_rate": 1.0591403081914032e-05, "loss": 0.5485, "step": 17927 }, { "epoch": 0.5234299728475081, "grad_norm": 0.6237798284141864, "learning_rate": 1.0590754257907542e-05, "loss": 0.6121, "step": 17928 }, { "epoch": 0.5234591690753555, "grad_norm": 0.657720922887654, "learning_rate": 1.0590105433901054e-05, "loss": 0.6519, "step": 17929 }, { "epoch": 0.5234883653032029, "grad_norm": 0.6351515762470947, "learning_rate": 1.0589456609894566e-05, "loss": 0.6167, "step": 17930 }, { "epoch": 0.5235175615310502, "grad_norm": 0.6592610923556081, "learning_rate": 1.0588807785888078e-05, "loss": 0.629, "step": 17931 }, { "epoch": 0.5235467577588976, "grad_norm": 0.6632108024294818, "learning_rate": 1.0588158961881589e-05, "loss": 0.6764, "step": 17932 }, { "epoch": 0.523575953986745, "grad_norm": 0.6372904356655879, "learning_rate": 1.0587510137875102e-05, "loss": 0.5991, "step": 17933 }, { "epoch": 0.5236051502145923, "grad_norm": 0.6198957552967735, "learning_rate": 1.0586861313868614e-05, "loss": 0.5575, "step": 17934 }, { "epoch": 0.5236343464424397, "grad_norm": 0.6540981281280086, "learning_rate": 1.0586212489862126e-05, "loss": 0.6421, "step": 17935 }, { "epoch": 0.523663542670287, "grad_norm": 0.6907047347067509, "learning_rate": 1.0585563665855639e-05, "loss": 0.7171, "step": 17936 }, { "epoch": 0.5236927388981344, "grad_norm": 0.6643297204542522, "learning_rate": 1.058491484184915e-05, "loss": 0.6549, "step": 17937 }, { "epoch": 0.5237219351259818, "grad_norm": 0.6597585972554184, "learning_rate": 1.0584266017842661e-05, "loss": 0.6519, "step": 17938 }, { "epoch": 0.5237511313538291, "grad_norm": 0.6579113047547381, "learning_rate": 1.0583617193836173e-05, "loss": 0.5773, "step": 17939 }, { "epoch": 0.5237803275816765, "grad_norm": 0.6952330789121467, "learning_rate": 1.0582968369829685e-05, "loss": 0.6646, "step": 17940 }, { "epoch": 0.5238095238095238, "grad_norm": 0.7033243416938879, "learning_rate": 1.0582319545823197e-05, "loss": 0.6983, "step": 17941 }, { "epoch": 0.5238387200373712, "grad_norm": 0.62071207774542, "learning_rate": 1.0581670721816707e-05, "loss": 0.5593, "step": 17942 }, { "epoch": 0.5238679162652186, "grad_norm": 0.5909908513918307, "learning_rate": 1.058102189781022e-05, "loss": 0.5326, "step": 17943 }, { "epoch": 0.5238971124930659, "grad_norm": 0.6265223838910867, "learning_rate": 1.0580373073803731e-05, "loss": 0.6079, "step": 17944 }, { "epoch": 0.5239263087209133, "grad_norm": 0.62756266053838, "learning_rate": 1.0579724249797243e-05, "loss": 0.6036, "step": 17945 }, { "epoch": 0.5239555049487606, "grad_norm": 0.6481396764522398, "learning_rate": 1.0579075425790755e-05, "loss": 0.6624, "step": 17946 }, { "epoch": 0.523984701176608, "grad_norm": 0.6342964346314833, "learning_rate": 1.0578426601784266e-05, "loss": 0.6087, "step": 17947 }, { "epoch": 0.5240138974044554, "grad_norm": 0.6734427737939755, "learning_rate": 1.0577777777777778e-05, "loss": 0.6902, "step": 17948 }, { "epoch": 0.5240430936323027, "grad_norm": 0.6178594324025508, "learning_rate": 1.057712895377129e-05, "loss": 0.6008, "step": 17949 }, { "epoch": 0.5240722898601501, "grad_norm": 0.6732679733924857, "learning_rate": 1.0576480129764802e-05, "loss": 0.6456, "step": 17950 }, { "epoch": 0.5241014860879974, "grad_norm": 0.698557634356557, "learning_rate": 1.0575831305758312e-05, "loss": 0.6846, "step": 17951 }, { "epoch": 0.5241306823158448, "grad_norm": 0.6837858456673698, "learning_rate": 1.0575182481751826e-05, "loss": 0.7225, "step": 17952 }, { "epoch": 0.5241598785436922, "grad_norm": 0.6534731256244356, "learning_rate": 1.0574533657745338e-05, "loss": 0.6353, "step": 17953 }, { "epoch": 0.5241890747715395, "grad_norm": 0.6413950223569386, "learning_rate": 1.057388483373885e-05, "loss": 0.6248, "step": 17954 }, { "epoch": 0.5242182709993869, "grad_norm": 0.6027286071825159, "learning_rate": 1.0573236009732362e-05, "loss": 0.5445, "step": 17955 }, { "epoch": 0.5242474672272343, "grad_norm": 0.6309722115435825, "learning_rate": 1.0572587185725874e-05, "loss": 0.5963, "step": 17956 }, { "epoch": 0.5242766634550816, "grad_norm": 0.6588969626139883, "learning_rate": 1.0571938361719384e-05, "loss": 0.6287, "step": 17957 }, { "epoch": 0.524305859682929, "grad_norm": 0.7717311253964357, "learning_rate": 1.0571289537712896e-05, "loss": 0.6814, "step": 17958 }, { "epoch": 0.5243350559107763, "grad_norm": 0.6529629166248999, "learning_rate": 1.0570640713706408e-05, "loss": 0.6818, "step": 17959 }, { "epoch": 0.5243642521386237, "grad_norm": 0.6158490604158597, "learning_rate": 1.056999188969992e-05, "loss": 0.5872, "step": 17960 }, { "epoch": 0.5243934483664711, "grad_norm": 0.6371604300691629, "learning_rate": 1.056934306569343e-05, "loss": 0.6195, "step": 17961 }, { "epoch": 0.5244226445943184, "grad_norm": 0.6743791038274867, "learning_rate": 1.0568694241686943e-05, "loss": 0.657, "step": 17962 }, { "epoch": 0.5244518408221658, "grad_norm": 0.6228862511821959, "learning_rate": 1.0568045417680455e-05, "loss": 0.6019, "step": 17963 }, { "epoch": 0.5244810370500131, "grad_norm": 0.6112340011365842, "learning_rate": 1.0567396593673967e-05, "loss": 0.5232, "step": 17964 }, { "epoch": 0.5245102332778605, "grad_norm": 0.6486734403145101, "learning_rate": 1.0566747769667479e-05, "loss": 0.6061, "step": 17965 }, { "epoch": 0.5245394295057079, "grad_norm": 0.6345973031519615, "learning_rate": 1.056609894566099e-05, "loss": 0.6272, "step": 17966 }, { "epoch": 0.5245686257335552, "grad_norm": 0.6304022542747018, "learning_rate": 1.0565450121654501e-05, "loss": 0.6119, "step": 17967 }, { "epoch": 0.5245978219614026, "grad_norm": 0.5970525883157707, "learning_rate": 1.0564801297648013e-05, "loss": 0.5856, "step": 17968 }, { "epoch": 0.52462701818925, "grad_norm": 0.6181800702337563, "learning_rate": 1.0564152473641525e-05, "loss": 0.5644, "step": 17969 }, { "epoch": 0.5246562144170973, "grad_norm": 0.7107198476379061, "learning_rate": 1.0563503649635036e-05, "loss": 0.6805, "step": 17970 }, { "epoch": 0.5246854106449447, "grad_norm": 0.6722464828491577, "learning_rate": 1.056285482562855e-05, "loss": 0.6379, "step": 17971 }, { "epoch": 0.524714606872792, "grad_norm": 0.6644597580476459, "learning_rate": 1.0562206001622061e-05, "loss": 0.6391, "step": 17972 }, { "epoch": 0.5247438031006394, "grad_norm": 0.6781428236921431, "learning_rate": 1.0561557177615573e-05, "loss": 0.5961, "step": 17973 }, { "epoch": 0.5247729993284868, "grad_norm": 0.6377789416143852, "learning_rate": 1.0560908353609085e-05, "loss": 0.6106, "step": 17974 }, { "epoch": 0.5248021955563341, "grad_norm": 0.6354535262219706, "learning_rate": 1.0560259529602597e-05, "loss": 0.6111, "step": 17975 }, { "epoch": 0.5248313917841815, "grad_norm": 0.6391281172920048, "learning_rate": 1.0559610705596108e-05, "loss": 0.6056, "step": 17976 }, { "epoch": 0.5248605880120288, "grad_norm": 0.6051078902215282, "learning_rate": 1.055896188158962e-05, "loss": 0.5185, "step": 17977 }, { "epoch": 0.5248897842398762, "grad_norm": 0.7218790207025015, "learning_rate": 1.0558313057583132e-05, "loss": 0.7191, "step": 17978 }, { "epoch": 0.5249189804677236, "grad_norm": 0.5969634239688283, "learning_rate": 1.0557664233576644e-05, "loss": 0.565, "step": 17979 }, { "epoch": 0.5249481766955709, "grad_norm": 0.6581650294355965, "learning_rate": 1.0557015409570154e-05, "loss": 0.6507, "step": 17980 }, { "epoch": 0.5249773729234183, "grad_norm": 0.6643650100666684, "learning_rate": 1.0556366585563666e-05, "loss": 0.6559, "step": 17981 }, { "epoch": 0.5250065691512656, "grad_norm": 0.6509001580922894, "learning_rate": 1.0555717761557178e-05, "loss": 0.6087, "step": 17982 }, { "epoch": 0.525035765379113, "grad_norm": 0.6781043778931158, "learning_rate": 1.055506893755069e-05, "loss": 0.6448, "step": 17983 }, { "epoch": 0.5250649616069604, "grad_norm": 0.6488402045358465, "learning_rate": 1.0554420113544202e-05, "loss": 0.6325, "step": 17984 }, { "epoch": 0.5250941578348077, "grad_norm": 0.5976532367616678, "learning_rate": 1.0553771289537713e-05, "loss": 0.5302, "step": 17985 }, { "epoch": 0.5251233540626551, "grad_norm": 0.6557822079051242, "learning_rate": 1.0553122465531225e-05, "loss": 0.6792, "step": 17986 }, { "epoch": 0.5251525502905025, "grad_norm": 0.6096685198507701, "learning_rate": 1.0552473641524737e-05, "loss": 0.6182, "step": 17987 }, { "epoch": 0.5251817465183498, "grad_norm": 0.6743217503522474, "learning_rate": 1.0551824817518249e-05, "loss": 0.7053, "step": 17988 }, { "epoch": 0.5252109427461972, "grad_norm": 0.6470491405959483, "learning_rate": 1.0551175993511759e-05, "loss": 0.6268, "step": 17989 }, { "epoch": 0.5252401389740445, "grad_norm": 0.6709286356104145, "learning_rate": 1.0550527169505273e-05, "loss": 0.6249, "step": 17990 }, { "epoch": 0.5252693352018919, "grad_norm": 0.6550048719545991, "learning_rate": 1.0549878345498785e-05, "loss": 0.6491, "step": 17991 }, { "epoch": 0.5252985314297393, "grad_norm": 0.6404633482401502, "learning_rate": 1.0549229521492297e-05, "loss": 0.6338, "step": 17992 }, { "epoch": 0.5253277276575866, "grad_norm": 0.6350447049219884, "learning_rate": 1.0548580697485809e-05, "loss": 0.5999, "step": 17993 }, { "epoch": 0.525356923885434, "grad_norm": 0.6866299859580366, "learning_rate": 1.0547931873479321e-05, "loss": 0.6974, "step": 17994 }, { "epoch": 0.5253861201132813, "grad_norm": 0.6202936948282641, "learning_rate": 1.0547283049472831e-05, "loss": 0.5953, "step": 17995 }, { "epoch": 0.5254153163411287, "grad_norm": 0.6237925241114486, "learning_rate": 1.0546634225466343e-05, "loss": 0.6064, "step": 17996 }, { "epoch": 0.5254445125689761, "grad_norm": 0.6252817264558808, "learning_rate": 1.0545985401459855e-05, "loss": 0.6166, "step": 17997 }, { "epoch": 0.5254737087968234, "grad_norm": 0.6684520300898646, "learning_rate": 1.0545336577453367e-05, "loss": 0.654, "step": 17998 }, { "epoch": 0.5255029050246708, "grad_norm": 0.6050641261012051, "learning_rate": 1.0544687753446878e-05, "loss": 0.5549, "step": 17999 }, { "epoch": 0.5255321012525181, "grad_norm": 0.6534511154795194, "learning_rate": 1.054403892944039e-05, "loss": 0.634, "step": 18000 }, { "epoch": 0.5255612974803655, "grad_norm": 0.659336266858762, "learning_rate": 1.0543390105433902e-05, "loss": 0.6506, "step": 18001 }, { "epoch": 0.5255904937082129, "grad_norm": 0.6437812359371976, "learning_rate": 1.0542741281427414e-05, "loss": 0.6257, "step": 18002 }, { "epoch": 0.5256196899360602, "grad_norm": 0.6103703714394164, "learning_rate": 1.0542092457420926e-05, "loss": 0.5323, "step": 18003 }, { "epoch": 0.5256488861639076, "grad_norm": 0.6261724076540746, "learning_rate": 1.0541443633414436e-05, "loss": 0.596, "step": 18004 }, { "epoch": 0.525678082391755, "grad_norm": 0.6119553674417159, "learning_rate": 1.0540794809407948e-05, "loss": 0.5687, "step": 18005 }, { "epoch": 0.5257072786196023, "grad_norm": 0.716652126654827, "learning_rate": 1.054014598540146e-05, "loss": 0.7113, "step": 18006 }, { "epoch": 0.5257364748474497, "grad_norm": 0.7109221154305275, "learning_rate": 1.0539497161394972e-05, "loss": 0.7569, "step": 18007 }, { "epoch": 0.525765671075297, "grad_norm": 0.6323468531196578, "learning_rate": 1.0538848337388486e-05, "loss": 0.5717, "step": 18008 }, { "epoch": 0.5257948673031444, "grad_norm": 0.6415628372988252, "learning_rate": 1.0538199513381996e-05, "loss": 0.6507, "step": 18009 }, { "epoch": 0.5258240635309918, "grad_norm": 0.6957354639201048, "learning_rate": 1.0537550689375508e-05, "loss": 0.7081, "step": 18010 }, { "epoch": 0.5258532597588391, "grad_norm": 0.6041395436095817, "learning_rate": 1.053690186536902e-05, "loss": 0.5305, "step": 18011 }, { "epoch": 0.5258824559866865, "grad_norm": 0.594499053267182, "learning_rate": 1.0536253041362532e-05, "loss": 0.5082, "step": 18012 }, { "epoch": 0.5259116522145338, "grad_norm": 0.675051170183035, "learning_rate": 1.0535604217356044e-05, "loss": 0.6785, "step": 18013 }, { "epoch": 0.5259408484423812, "grad_norm": 0.6140003462188249, "learning_rate": 1.0534955393349555e-05, "loss": 0.5548, "step": 18014 }, { "epoch": 0.5259700446702286, "grad_norm": 0.6694800236160813, "learning_rate": 1.0534306569343067e-05, "loss": 0.6509, "step": 18015 }, { "epoch": 0.5259992408980759, "grad_norm": 0.6392117323773785, "learning_rate": 1.0533657745336579e-05, "loss": 0.6291, "step": 18016 }, { "epoch": 0.5260284371259233, "grad_norm": 0.6733349604152977, "learning_rate": 1.053300892133009e-05, "loss": 0.6145, "step": 18017 }, { "epoch": 0.5260576333537706, "grad_norm": 0.6501133768048641, "learning_rate": 1.0532360097323601e-05, "loss": 0.6179, "step": 18018 }, { "epoch": 0.526086829581618, "grad_norm": 0.6626910538816145, "learning_rate": 1.0531711273317113e-05, "loss": 0.6344, "step": 18019 }, { "epoch": 0.5261160258094654, "grad_norm": 0.630498731556507, "learning_rate": 1.0531062449310625e-05, "loss": 0.5843, "step": 18020 }, { "epoch": 0.5261452220373127, "grad_norm": 0.6652264100637323, "learning_rate": 1.0530413625304137e-05, "loss": 0.6475, "step": 18021 }, { "epoch": 0.5261744182651601, "grad_norm": 0.5803486816495019, "learning_rate": 1.0529764801297649e-05, "loss": 0.4988, "step": 18022 }, { "epoch": 0.5262036144930075, "grad_norm": 0.6231421126341343, "learning_rate": 1.052911597729116e-05, "loss": 0.5789, "step": 18023 }, { "epoch": 0.5262328107208548, "grad_norm": 0.6499751576884318, "learning_rate": 1.0528467153284672e-05, "loss": 0.6021, "step": 18024 }, { "epoch": 0.5262620069487022, "grad_norm": 0.622757619987283, "learning_rate": 1.0527818329278184e-05, "loss": 0.5777, "step": 18025 }, { "epoch": 0.5262912031765495, "grad_norm": 0.6368548671715382, "learning_rate": 1.0527169505271696e-05, "loss": 0.6323, "step": 18026 }, { "epoch": 0.5263203994043969, "grad_norm": 0.5983528795467749, "learning_rate": 1.052652068126521e-05, "loss": 0.5397, "step": 18027 }, { "epoch": 0.5263495956322443, "grad_norm": 0.5900789851544643, "learning_rate": 1.052587185725872e-05, "loss": 0.5091, "step": 18028 }, { "epoch": 0.5263787918600916, "grad_norm": 0.6469405918016076, "learning_rate": 1.0525223033252232e-05, "loss": 0.5905, "step": 18029 }, { "epoch": 0.526407988087939, "grad_norm": 0.6272966098792198, "learning_rate": 1.0524574209245744e-05, "loss": 0.5782, "step": 18030 }, { "epoch": 0.5264371843157865, "grad_norm": 0.6397805182964919, "learning_rate": 1.0523925385239256e-05, "loss": 0.5917, "step": 18031 }, { "epoch": 0.5264663805436338, "grad_norm": 0.6209200453428063, "learning_rate": 1.0523276561232768e-05, "loss": 0.6166, "step": 18032 }, { "epoch": 0.5264955767714812, "grad_norm": 0.6173477763177677, "learning_rate": 1.0522627737226278e-05, "loss": 0.5691, "step": 18033 }, { "epoch": 0.5265247729993285, "grad_norm": 0.6469512743225645, "learning_rate": 1.052197891321979e-05, "loss": 0.6053, "step": 18034 }, { "epoch": 0.5265539692271759, "grad_norm": 0.6950172772606075, "learning_rate": 1.0521330089213302e-05, "loss": 0.7161, "step": 18035 }, { "epoch": 0.5265831654550233, "grad_norm": 0.6985294563094213, "learning_rate": 1.0520681265206814e-05, "loss": 0.7143, "step": 18036 }, { "epoch": 0.5266123616828706, "grad_norm": 0.7012574142162152, "learning_rate": 1.0520032441200324e-05, "loss": 0.7252, "step": 18037 }, { "epoch": 0.526641557910718, "grad_norm": 0.6622508159073038, "learning_rate": 1.0519383617193837e-05, "loss": 0.6176, "step": 18038 }, { "epoch": 0.5266707541385653, "grad_norm": 0.6792880672090009, "learning_rate": 1.0518734793187349e-05, "loss": 0.6636, "step": 18039 }, { "epoch": 0.5266999503664127, "grad_norm": 0.593764814066981, "learning_rate": 1.051808596918086e-05, "loss": 0.5088, "step": 18040 }, { "epoch": 0.5267291465942601, "grad_norm": 0.6228131119587226, "learning_rate": 1.0517437145174373e-05, "loss": 0.5722, "step": 18041 }, { "epoch": 0.5267583428221074, "grad_norm": 0.6016751280457876, "learning_rate": 1.0516788321167883e-05, "loss": 0.5502, "step": 18042 }, { "epoch": 0.5267875390499548, "grad_norm": 0.6206477039410314, "learning_rate": 1.0516139497161395e-05, "loss": 0.5534, "step": 18043 }, { "epoch": 0.5268167352778022, "grad_norm": 0.6100270800887664, "learning_rate": 1.0515490673154907e-05, "loss": 0.5626, "step": 18044 }, { "epoch": 0.5268459315056495, "grad_norm": 0.661569601236462, "learning_rate": 1.0514841849148419e-05, "loss": 0.6627, "step": 18045 }, { "epoch": 0.5268751277334969, "grad_norm": 0.8536273831544351, "learning_rate": 1.0514193025141933e-05, "loss": 0.7092, "step": 18046 }, { "epoch": 0.5269043239613442, "grad_norm": 0.6172253208976931, "learning_rate": 1.0513544201135443e-05, "loss": 0.5498, "step": 18047 }, { "epoch": 0.5269335201891916, "grad_norm": 0.666678837273268, "learning_rate": 1.0512895377128955e-05, "loss": 0.562, "step": 18048 }, { "epoch": 0.526962716417039, "grad_norm": 0.6883279011056503, "learning_rate": 1.0512246553122467e-05, "loss": 0.6363, "step": 18049 }, { "epoch": 0.5269919126448863, "grad_norm": 0.6713581602848699, "learning_rate": 1.0511597729115979e-05, "loss": 0.6445, "step": 18050 }, { "epoch": 0.5270211088727337, "grad_norm": 0.6173517375671739, "learning_rate": 1.0510948905109491e-05, "loss": 0.5982, "step": 18051 }, { "epoch": 0.527050305100581, "grad_norm": 0.6679033101781752, "learning_rate": 1.0510300081103001e-05, "loss": 0.6775, "step": 18052 }, { "epoch": 0.5270795013284284, "grad_norm": 0.6532711251554504, "learning_rate": 1.0509651257096514e-05, "loss": 0.6443, "step": 18053 }, { "epoch": 0.5271086975562758, "grad_norm": 0.7275114831995558, "learning_rate": 1.0509002433090026e-05, "loss": 0.7339, "step": 18054 }, { "epoch": 0.5271378937841231, "grad_norm": 0.6159025694019236, "learning_rate": 1.0508353609083538e-05, "loss": 0.5745, "step": 18055 }, { "epoch": 0.5271670900119705, "grad_norm": 0.6273330593927045, "learning_rate": 1.0507704785077048e-05, "loss": 0.5808, "step": 18056 }, { "epoch": 0.5271962862398178, "grad_norm": 0.7030788109014631, "learning_rate": 1.050705596107056e-05, "loss": 0.6711, "step": 18057 }, { "epoch": 0.5272254824676652, "grad_norm": 0.6093910993380884, "learning_rate": 1.0506407137064072e-05, "loss": 0.5404, "step": 18058 }, { "epoch": 0.5272546786955126, "grad_norm": 0.6248600160725878, "learning_rate": 1.0505758313057584e-05, "loss": 0.5502, "step": 18059 }, { "epoch": 0.5272838749233599, "grad_norm": 0.6279034094361073, "learning_rate": 1.0505109489051096e-05, "loss": 0.5808, "step": 18060 }, { "epoch": 0.5273130711512073, "grad_norm": 0.6045538977490262, "learning_rate": 1.0504460665044606e-05, "loss": 0.516, "step": 18061 }, { "epoch": 0.5273422673790547, "grad_norm": 0.6523312541577192, "learning_rate": 1.0503811841038118e-05, "loss": 0.6257, "step": 18062 }, { "epoch": 0.527371463606902, "grad_norm": 0.6191005124069678, "learning_rate": 1.050316301703163e-05, "loss": 0.5969, "step": 18063 }, { "epoch": 0.5274006598347494, "grad_norm": 0.7831550277747739, "learning_rate": 1.0502514193025142e-05, "loss": 0.6234, "step": 18064 }, { "epoch": 0.5274298560625967, "grad_norm": 0.6424228067358014, "learning_rate": 1.0501865369018656e-05, "loss": 0.5849, "step": 18065 }, { "epoch": 0.5274590522904441, "grad_norm": 0.6545796597248085, "learning_rate": 1.0501216545012166e-05, "loss": 0.6149, "step": 18066 }, { "epoch": 0.5274882485182915, "grad_norm": 0.9150908585256831, "learning_rate": 1.0500567721005679e-05, "loss": 0.6004, "step": 18067 }, { "epoch": 0.5275174447461388, "grad_norm": 0.6525821053049188, "learning_rate": 1.049991889699919e-05, "loss": 0.61, "step": 18068 }, { "epoch": 0.5275466409739862, "grad_norm": 0.6313925830769224, "learning_rate": 1.0499270072992703e-05, "loss": 0.5639, "step": 18069 }, { "epoch": 0.5275758372018335, "grad_norm": 0.5941399738558484, "learning_rate": 1.0498621248986215e-05, "loss": 0.5334, "step": 18070 }, { "epoch": 0.5276050334296809, "grad_norm": 0.6131453952314675, "learning_rate": 1.0497972424979725e-05, "loss": 0.5525, "step": 18071 }, { "epoch": 0.5276342296575283, "grad_norm": 0.6063263587645904, "learning_rate": 1.0497323600973237e-05, "loss": 0.5457, "step": 18072 }, { "epoch": 0.5276634258853756, "grad_norm": 0.6190745108643724, "learning_rate": 1.0496674776966749e-05, "loss": 0.5745, "step": 18073 }, { "epoch": 0.527692622113223, "grad_norm": 0.6612853202713738, "learning_rate": 1.0496025952960261e-05, "loss": 0.6469, "step": 18074 }, { "epoch": 0.5277218183410703, "grad_norm": 0.6947953234205425, "learning_rate": 1.0495377128953771e-05, "loss": 0.6883, "step": 18075 }, { "epoch": 0.5277510145689177, "grad_norm": 0.63945353914301, "learning_rate": 1.0494728304947283e-05, "loss": 0.567, "step": 18076 }, { "epoch": 0.5277802107967651, "grad_norm": 0.6505342070169912, "learning_rate": 1.0494079480940795e-05, "loss": 0.6333, "step": 18077 }, { "epoch": 0.5278094070246124, "grad_norm": 0.6106297106767422, "learning_rate": 1.0493430656934307e-05, "loss": 0.5357, "step": 18078 }, { "epoch": 0.5278386032524598, "grad_norm": 0.6737200076029426, "learning_rate": 1.0492781832927818e-05, "loss": 0.6608, "step": 18079 }, { "epoch": 0.5278677994803072, "grad_norm": 0.6313222263235495, "learning_rate": 1.049213300892133e-05, "loss": 0.5763, "step": 18080 }, { "epoch": 0.5278969957081545, "grad_norm": 0.6313273887133778, "learning_rate": 1.0491484184914842e-05, "loss": 0.5933, "step": 18081 }, { "epoch": 0.5279261919360019, "grad_norm": 0.6620358030083159, "learning_rate": 1.0490835360908354e-05, "loss": 0.6597, "step": 18082 }, { "epoch": 0.5279553881638492, "grad_norm": 0.6234580945776492, "learning_rate": 1.0490186536901866e-05, "loss": 0.5961, "step": 18083 }, { "epoch": 0.5279845843916966, "grad_norm": 0.6778508909972828, "learning_rate": 1.048953771289538e-05, "loss": 0.6803, "step": 18084 }, { "epoch": 0.528013780619544, "grad_norm": 0.6307362875717859, "learning_rate": 1.048888888888889e-05, "loss": 0.5644, "step": 18085 }, { "epoch": 0.5280429768473913, "grad_norm": 0.6368297784379193, "learning_rate": 1.0488240064882402e-05, "loss": 0.5823, "step": 18086 }, { "epoch": 0.5280721730752387, "grad_norm": 0.6268304823945928, "learning_rate": 1.0487591240875914e-05, "loss": 0.5936, "step": 18087 }, { "epoch": 0.528101369303086, "grad_norm": 0.7235718153317141, "learning_rate": 1.0486942416869426e-05, "loss": 0.5551, "step": 18088 }, { "epoch": 0.5281305655309334, "grad_norm": 0.6622484657005899, "learning_rate": 1.0486293592862938e-05, "loss": 0.6559, "step": 18089 }, { "epoch": 0.5281597617587808, "grad_norm": 0.6443075669704403, "learning_rate": 1.0485644768856448e-05, "loss": 0.6129, "step": 18090 }, { "epoch": 0.5281889579866281, "grad_norm": 0.6650358064948884, "learning_rate": 1.048499594484996e-05, "loss": 0.6541, "step": 18091 }, { "epoch": 0.5282181542144755, "grad_norm": 0.6255969702937463, "learning_rate": 1.0484347120843472e-05, "loss": 0.603, "step": 18092 }, { "epoch": 0.5282473504423228, "grad_norm": 0.696992207642918, "learning_rate": 1.0483698296836984e-05, "loss": 0.6865, "step": 18093 }, { "epoch": 0.5282765466701702, "grad_norm": 0.6745165348893369, "learning_rate": 1.0483049472830495e-05, "loss": 0.6557, "step": 18094 }, { "epoch": 0.5283057428980176, "grad_norm": 0.6800463882815131, "learning_rate": 1.0482400648824007e-05, "loss": 0.6611, "step": 18095 }, { "epoch": 0.5283349391258649, "grad_norm": 0.6499440013762507, "learning_rate": 1.0481751824817519e-05, "loss": 0.6575, "step": 18096 }, { "epoch": 0.5283641353537123, "grad_norm": 0.6705415196878284, "learning_rate": 1.0481103000811031e-05, "loss": 0.6969, "step": 18097 }, { "epoch": 0.5283933315815597, "grad_norm": 0.6460374462034729, "learning_rate": 1.0480454176804541e-05, "loss": 0.6467, "step": 18098 }, { "epoch": 0.528422527809407, "grad_norm": 0.6645539318866336, "learning_rate": 1.0479805352798053e-05, "loss": 0.6432, "step": 18099 }, { "epoch": 0.5284517240372544, "grad_norm": 0.6281609892839748, "learning_rate": 1.0479156528791565e-05, "loss": 0.5857, "step": 18100 }, { "epoch": 0.5284809202651017, "grad_norm": 0.9546509501225224, "learning_rate": 1.0478507704785077e-05, "loss": 0.6434, "step": 18101 }, { "epoch": 0.5285101164929491, "grad_norm": 0.6737130012624883, "learning_rate": 1.047785888077859e-05, "loss": 0.6613, "step": 18102 }, { "epoch": 0.5285393127207965, "grad_norm": 0.6448850304663681, "learning_rate": 1.0477210056772103e-05, "loss": 0.5805, "step": 18103 }, { "epoch": 0.5285685089486438, "grad_norm": 0.6176370360746325, "learning_rate": 1.0476561232765613e-05, "loss": 0.5774, "step": 18104 }, { "epoch": 0.5285977051764912, "grad_norm": 0.6494986773060254, "learning_rate": 1.0475912408759125e-05, "loss": 0.6345, "step": 18105 }, { "epoch": 0.5286269014043385, "grad_norm": 0.6144118057076589, "learning_rate": 1.0475263584752637e-05, "loss": 0.5466, "step": 18106 }, { "epoch": 0.5286560976321859, "grad_norm": 0.612423200887289, "learning_rate": 1.047461476074615e-05, "loss": 0.554, "step": 18107 }, { "epoch": 0.5286852938600333, "grad_norm": 0.6457276772067719, "learning_rate": 1.0473965936739661e-05, "loss": 0.6152, "step": 18108 }, { "epoch": 0.5287144900878806, "grad_norm": 0.635093793604649, "learning_rate": 1.0473317112733172e-05, "loss": 0.5615, "step": 18109 }, { "epoch": 0.528743686315728, "grad_norm": 0.6522604185080219, "learning_rate": 1.0472668288726684e-05, "loss": 0.6588, "step": 18110 }, { "epoch": 0.5287728825435754, "grad_norm": 0.7045299251320266, "learning_rate": 1.0472019464720196e-05, "loss": 0.7196, "step": 18111 }, { "epoch": 0.5288020787714227, "grad_norm": 0.6220809982899618, "learning_rate": 1.0471370640713708e-05, "loss": 0.5865, "step": 18112 }, { "epoch": 0.5288312749992701, "grad_norm": 0.6445408068555818, "learning_rate": 1.0470721816707218e-05, "loss": 0.6431, "step": 18113 }, { "epoch": 0.5288604712271174, "grad_norm": 0.6645908882424666, "learning_rate": 1.047007299270073e-05, "loss": 0.6295, "step": 18114 }, { "epoch": 0.5288896674549648, "grad_norm": 0.6818621757785447, "learning_rate": 1.0469424168694242e-05, "loss": 0.7, "step": 18115 }, { "epoch": 0.5289188636828122, "grad_norm": 0.6561632490643489, "learning_rate": 1.0468775344687754e-05, "loss": 0.6292, "step": 18116 }, { "epoch": 0.5289480599106595, "grad_norm": 0.6506462826354799, "learning_rate": 1.0468126520681265e-05, "loss": 0.5913, "step": 18117 }, { "epoch": 0.5289772561385069, "grad_norm": 0.6550539376820234, "learning_rate": 1.0467477696674777e-05, "loss": 0.5848, "step": 18118 }, { "epoch": 0.5290064523663542, "grad_norm": 0.6144920687083595, "learning_rate": 1.0466828872668289e-05, "loss": 0.5874, "step": 18119 }, { "epoch": 0.5290356485942016, "grad_norm": 0.6312488971128666, "learning_rate": 1.04661800486618e-05, "loss": 0.609, "step": 18120 }, { "epoch": 0.529064844822049, "grad_norm": 0.6477519279515669, "learning_rate": 1.0465531224655313e-05, "loss": 0.665, "step": 18121 }, { "epoch": 0.5290940410498963, "grad_norm": 0.6100875432264187, "learning_rate": 1.0464882400648826e-05, "loss": 0.5586, "step": 18122 }, { "epoch": 0.5291232372777437, "grad_norm": 0.6265043327217015, "learning_rate": 1.0464233576642337e-05, "loss": 0.558, "step": 18123 }, { "epoch": 0.529152433505591, "grad_norm": 0.624623834958329, "learning_rate": 1.0463584752635849e-05, "loss": 0.6108, "step": 18124 }, { "epoch": 0.5291816297334384, "grad_norm": 0.6308474364057262, "learning_rate": 1.046293592862936e-05, "loss": 0.5936, "step": 18125 }, { "epoch": 0.5292108259612858, "grad_norm": 0.6024884645258128, "learning_rate": 1.0462287104622873e-05, "loss": 0.5613, "step": 18126 }, { "epoch": 0.5292400221891331, "grad_norm": 0.6684827361991094, "learning_rate": 1.0461638280616385e-05, "loss": 0.6183, "step": 18127 }, { "epoch": 0.5292692184169805, "grad_norm": 0.6334570115335751, "learning_rate": 1.0460989456609895e-05, "loss": 0.6382, "step": 18128 }, { "epoch": 0.5292984146448279, "grad_norm": 0.7201959493187517, "learning_rate": 1.0460340632603407e-05, "loss": 0.6988, "step": 18129 }, { "epoch": 0.5293276108726752, "grad_norm": 0.6724773125196335, "learning_rate": 1.045969180859692e-05, "loss": 0.6103, "step": 18130 }, { "epoch": 0.5293568071005226, "grad_norm": 0.6182474981311068, "learning_rate": 1.0459042984590431e-05, "loss": 0.5657, "step": 18131 }, { "epoch": 0.5293860033283699, "grad_norm": 0.7218441871868506, "learning_rate": 1.0458394160583942e-05, "loss": 0.686, "step": 18132 }, { "epoch": 0.5294151995562173, "grad_norm": 0.6297327049700074, "learning_rate": 1.0457745336577454e-05, "loss": 0.5976, "step": 18133 }, { "epoch": 0.5294443957840647, "grad_norm": 0.6009322227426962, "learning_rate": 1.0457096512570966e-05, "loss": 0.5277, "step": 18134 }, { "epoch": 0.529473592011912, "grad_norm": 0.6302189510255485, "learning_rate": 1.0456447688564478e-05, "loss": 0.5951, "step": 18135 }, { "epoch": 0.5295027882397594, "grad_norm": 0.6087789062146735, "learning_rate": 1.0455798864557988e-05, "loss": 0.5643, "step": 18136 }, { "epoch": 0.5295319844676067, "grad_norm": 0.6436611349254386, "learning_rate": 1.04551500405515e-05, "loss": 0.5813, "step": 18137 }, { "epoch": 0.5295611806954541, "grad_norm": 0.6665563786599976, "learning_rate": 1.0454501216545012e-05, "loss": 0.6625, "step": 18138 }, { "epoch": 0.5295903769233015, "grad_norm": 0.6297338917766178, "learning_rate": 1.0453852392538524e-05, "loss": 0.5993, "step": 18139 }, { "epoch": 0.5296195731511488, "grad_norm": 0.6817024007470978, "learning_rate": 1.0453203568532036e-05, "loss": 0.6966, "step": 18140 }, { "epoch": 0.5296487693789962, "grad_norm": 0.604498500095318, "learning_rate": 1.045255474452555e-05, "loss": 0.5254, "step": 18141 }, { "epoch": 0.5296779656068435, "grad_norm": 0.6190378684000203, "learning_rate": 1.045190592051906e-05, "loss": 0.538, "step": 18142 }, { "epoch": 0.5297071618346909, "grad_norm": 0.6365030533156468, "learning_rate": 1.0451257096512572e-05, "loss": 0.6243, "step": 18143 }, { "epoch": 0.5297363580625383, "grad_norm": 0.5898906395302922, "learning_rate": 1.0450608272506084e-05, "loss": 0.5587, "step": 18144 }, { "epoch": 0.5297655542903856, "grad_norm": 0.6469499035745628, "learning_rate": 1.0449959448499596e-05, "loss": 0.6155, "step": 18145 }, { "epoch": 0.529794750518233, "grad_norm": 0.6548049291032207, "learning_rate": 1.0449310624493108e-05, "loss": 0.626, "step": 18146 }, { "epoch": 0.5298239467460804, "grad_norm": 0.6460522072791515, "learning_rate": 1.0448661800486619e-05, "loss": 0.6677, "step": 18147 }, { "epoch": 0.5298531429739277, "grad_norm": 0.6642751625167437, "learning_rate": 1.044801297648013e-05, "loss": 0.6477, "step": 18148 }, { "epoch": 0.5298823392017751, "grad_norm": 0.6660315390288258, "learning_rate": 1.0447364152473643e-05, "loss": 0.5863, "step": 18149 }, { "epoch": 0.5299115354296224, "grad_norm": 0.6295748950176432, "learning_rate": 1.0446715328467155e-05, "loss": 0.6072, "step": 18150 }, { "epoch": 0.5299407316574699, "grad_norm": 0.6256894025927391, "learning_rate": 1.0446066504460665e-05, "loss": 0.5959, "step": 18151 }, { "epoch": 0.5299699278853173, "grad_norm": 0.6381670779206287, "learning_rate": 1.0445417680454177e-05, "loss": 0.5868, "step": 18152 }, { "epoch": 0.5299991241131646, "grad_norm": 0.6591099077308901, "learning_rate": 1.0444768856447689e-05, "loss": 0.6248, "step": 18153 }, { "epoch": 0.530028320341012, "grad_norm": 0.6459597708619415, "learning_rate": 1.0444120032441201e-05, "loss": 0.6239, "step": 18154 }, { "epoch": 0.5300575165688594, "grad_norm": 0.576889732263444, "learning_rate": 1.0443471208434711e-05, "loss": 0.5057, "step": 18155 }, { "epoch": 0.5300867127967067, "grad_norm": 0.6093367081522905, "learning_rate": 1.0442822384428224e-05, "loss": 0.556, "step": 18156 }, { "epoch": 0.5301159090245541, "grad_norm": 0.6187304639772373, "learning_rate": 1.0442173560421736e-05, "loss": 0.6017, "step": 18157 }, { "epoch": 0.5301451052524014, "grad_norm": 0.6662509612262794, "learning_rate": 1.0441524736415248e-05, "loss": 0.6348, "step": 18158 }, { "epoch": 0.5301743014802488, "grad_norm": 0.6394864679676721, "learning_rate": 1.0440875912408761e-05, "loss": 0.6381, "step": 18159 }, { "epoch": 0.5302034977080962, "grad_norm": 0.6214186902539071, "learning_rate": 1.0440227088402273e-05, "loss": 0.5591, "step": 18160 }, { "epoch": 0.5302326939359435, "grad_norm": 0.6747725863035244, "learning_rate": 1.0439578264395784e-05, "loss": 0.6064, "step": 18161 }, { "epoch": 0.5302618901637909, "grad_norm": 0.6192047919341898, "learning_rate": 1.0438929440389296e-05, "loss": 0.579, "step": 18162 }, { "epoch": 0.5302910863916382, "grad_norm": 0.6775907376302742, "learning_rate": 1.0438280616382808e-05, "loss": 0.686, "step": 18163 }, { "epoch": 0.5303202826194856, "grad_norm": 0.6908444816848689, "learning_rate": 1.043763179237632e-05, "loss": 0.6951, "step": 18164 }, { "epoch": 0.530349478847333, "grad_norm": 0.611228030032139, "learning_rate": 1.0436982968369832e-05, "loss": 0.5649, "step": 18165 }, { "epoch": 0.5303786750751803, "grad_norm": 0.6642234459279373, "learning_rate": 1.0436334144363342e-05, "loss": 0.6616, "step": 18166 }, { "epoch": 0.5304078713030277, "grad_norm": 0.6670541306614529, "learning_rate": 1.0435685320356854e-05, "loss": 0.6422, "step": 18167 }, { "epoch": 0.530437067530875, "grad_norm": 0.688483469450101, "learning_rate": 1.0435036496350366e-05, "loss": 0.6014, "step": 18168 }, { "epoch": 0.5304662637587224, "grad_norm": 0.6792647154943693, "learning_rate": 1.0434387672343878e-05, "loss": 0.6915, "step": 18169 }, { "epoch": 0.5304954599865698, "grad_norm": 0.6501821369496642, "learning_rate": 1.0433738848337389e-05, "loss": 0.5982, "step": 18170 }, { "epoch": 0.5305246562144171, "grad_norm": 0.6463699837641893, "learning_rate": 1.04330900243309e-05, "loss": 0.6205, "step": 18171 }, { "epoch": 0.5305538524422645, "grad_norm": 0.5940276071817115, "learning_rate": 1.0432441200324413e-05, "loss": 0.5235, "step": 18172 }, { "epoch": 0.5305830486701119, "grad_norm": 0.6720200226351006, "learning_rate": 1.0431792376317925e-05, "loss": 0.4932, "step": 18173 }, { "epoch": 0.5306122448979592, "grad_norm": 0.5681053490590344, "learning_rate": 1.0431143552311435e-05, "loss": 0.5047, "step": 18174 }, { "epoch": 0.5306414411258066, "grad_norm": 0.66489567861435, "learning_rate": 1.0430494728304947e-05, "loss": 0.6476, "step": 18175 }, { "epoch": 0.5306706373536539, "grad_norm": 0.6530119776371144, "learning_rate": 1.0429845904298459e-05, "loss": 0.5967, "step": 18176 }, { "epoch": 0.5306998335815013, "grad_norm": 0.684338093379282, "learning_rate": 1.0429197080291971e-05, "loss": 0.6531, "step": 18177 }, { "epoch": 0.5307290298093487, "grad_norm": 0.6291872723081647, "learning_rate": 1.0428548256285485e-05, "loss": 0.5568, "step": 18178 }, { "epoch": 0.530758226037196, "grad_norm": 0.6450426825143786, "learning_rate": 1.0427899432278997e-05, "loss": 0.6283, "step": 18179 }, { "epoch": 0.5307874222650434, "grad_norm": 0.6329204487892562, "learning_rate": 1.0427250608272507e-05, "loss": 0.5847, "step": 18180 }, { "epoch": 0.5308166184928907, "grad_norm": 0.605493211793087, "learning_rate": 1.0426601784266019e-05, "loss": 0.5329, "step": 18181 }, { "epoch": 0.5308458147207381, "grad_norm": 0.6826428413645675, "learning_rate": 1.0425952960259531e-05, "loss": 0.7012, "step": 18182 }, { "epoch": 0.5308750109485855, "grad_norm": 0.6914345171948157, "learning_rate": 1.0425304136253043e-05, "loss": 0.6361, "step": 18183 }, { "epoch": 0.5309042071764328, "grad_norm": 0.6189514206326829, "learning_rate": 1.0424655312246555e-05, "loss": 0.5772, "step": 18184 }, { "epoch": 0.5309334034042802, "grad_norm": 0.6157084585622613, "learning_rate": 1.0424006488240066e-05, "loss": 0.5308, "step": 18185 }, { "epoch": 0.5309625996321276, "grad_norm": 0.6252020947649077, "learning_rate": 1.0423357664233578e-05, "loss": 0.5859, "step": 18186 }, { "epoch": 0.5309917958599749, "grad_norm": 0.6448278080749613, "learning_rate": 1.042270884022709e-05, "loss": 0.6037, "step": 18187 }, { "epoch": 0.5310209920878223, "grad_norm": 0.6476755575140072, "learning_rate": 1.0422060016220602e-05, "loss": 0.6462, "step": 18188 }, { "epoch": 0.5310501883156696, "grad_norm": 0.6530361077718604, "learning_rate": 1.0421411192214112e-05, "loss": 0.6274, "step": 18189 }, { "epoch": 0.531079384543517, "grad_norm": 0.6513760045319673, "learning_rate": 1.0420762368207624e-05, "loss": 0.5864, "step": 18190 }, { "epoch": 0.5311085807713644, "grad_norm": 0.6778976981792988, "learning_rate": 1.0420113544201136e-05, "loss": 0.6167, "step": 18191 }, { "epoch": 0.5311377769992117, "grad_norm": 0.6396279164258921, "learning_rate": 1.0419464720194648e-05, "loss": 0.6568, "step": 18192 }, { "epoch": 0.5311669732270591, "grad_norm": 0.6649342261774527, "learning_rate": 1.0418815896188158e-05, "loss": 0.562, "step": 18193 }, { "epoch": 0.5311961694549064, "grad_norm": 0.6190281363636162, "learning_rate": 1.041816707218167e-05, "loss": 0.5861, "step": 18194 }, { "epoch": 0.5312253656827538, "grad_norm": 0.623566949436421, "learning_rate": 1.0417518248175182e-05, "loss": 0.586, "step": 18195 }, { "epoch": 0.5312545619106012, "grad_norm": 0.6308253514232417, "learning_rate": 1.0416869424168694e-05, "loss": 0.593, "step": 18196 }, { "epoch": 0.5312837581384485, "grad_norm": 0.6206095635443346, "learning_rate": 1.0416220600162208e-05, "loss": 0.631, "step": 18197 }, { "epoch": 0.5313129543662959, "grad_norm": 0.6222058964248365, "learning_rate": 1.041557177615572e-05, "loss": 0.5476, "step": 18198 }, { "epoch": 0.5313421505941432, "grad_norm": 0.5772462602224652, "learning_rate": 1.041492295214923e-05, "loss": 0.5147, "step": 18199 }, { "epoch": 0.5313713468219906, "grad_norm": 0.6512431514690576, "learning_rate": 1.0414274128142743e-05, "loss": 0.6837, "step": 18200 }, { "epoch": 0.531400543049838, "grad_norm": 0.624534570612158, "learning_rate": 1.0413625304136255e-05, "loss": 0.6001, "step": 18201 }, { "epoch": 0.5314297392776853, "grad_norm": 0.6050173428889916, "learning_rate": 1.0412976480129767e-05, "loss": 0.5802, "step": 18202 }, { "epoch": 0.5314589355055327, "grad_norm": 0.6490822676628735, "learning_rate": 1.0412327656123277e-05, "loss": 0.6356, "step": 18203 }, { "epoch": 0.53148813173338, "grad_norm": 0.6193131973823656, "learning_rate": 1.0411678832116789e-05, "loss": 0.5693, "step": 18204 }, { "epoch": 0.5315173279612274, "grad_norm": 0.6215305432703744, "learning_rate": 1.0411030008110301e-05, "loss": 0.5942, "step": 18205 }, { "epoch": 0.5315465241890748, "grad_norm": 0.6154038205196503, "learning_rate": 1.0410381184103813e-05, "loss": 0.6242, "step": 18206 }, { "epoch": 0.5315757204169221, "grad_norm": 0.6273709640786982, "learning_rate": 1.0409732360097325e-05, "loss": 0.5879, "step": 18207 }, { "epoch": 0.5316049166447695, "grad_norm": 0.6249756084004736, "learning_rate": 1.0409083536090835e-05, "loss": 0.5896, "step": 18208 }, { "epoch": 0.5316341128726169, "grad_norm": 0.7061006962019416, "learning_rate": 1.0408434712084347e-05, "loss": 0.6539, "step": 18209 }, { "epoch": 0.5316633091004642, "grad_norm": 0.827713907562571, "learning_rate": 1.040778588807786e-05, "loss": 0.7164, "step": 18210 }, { "epoch": 0.5316925053283116, "grad_norm": 0.62921685536975, "learning_rate": 1.0407137064071371e-05, "loss": 0.6134, "step": 18211 }, { "epoch": 0.5317217015561589, "grad_norm": 0.6435689674447957, "learning_rate": 1.0406488240064882e-05, "loss": 0.6043, "step": 18212 }, { "epoch": 0.5317508977840063, "grad_norm": 0.6416524270109194, "learning_rate": 1.0405839416058394e-05, "loss": 0.5419, "step": 18213 }, { "epoch": 0.5317800940118537, "grad_norm": 0.6229940725315084, "learning_rate": 1.0405190592051906e-05, "loss": 0.5776, "step": 18214 }, { "epoch": 0.531809290239701, "grad_norm": 0.6611872735790219, "learning_rate": 1.0404541768045418e-05, "loss": 0.6927, "step": 18215 }, { "epoch": 0.5318384864675484, "grad_norm": 0.6302679270254267, "learning_rate": 1.0403892944038932e-05, "loss": 0.577, "step": 18216 }, { "epoch": 0.5318676826953957, "grad_norm": 0.6899883754242594, "learning_rate": 1.0403244120032444e-05, "loss": 0.6569, "step": 18217 }, { "epoch": 0.5318968789232431, "grad_norm": 0.7244348395199908, "learning_rate": 1.0402595296025954e-05, "loss": 0.6735, "step": 18218 }, { "epoch": 0.5319260751510905, "grad_norm": 0.6389527239766174, "learning_rate": 1.0401946472019466e-05, "loss": 0.6242, "step": 18219 }, { "epoch": 0.5319552713789378, "grad_norm": 0.6300158275659081, "learning_rate": 1.0401297648012978e-05, "loss": 0.6027, "step": 18220 }, { "epoch": 0.5319844676067852, "grad_norm": 0.6797062919058569, "learning_rate": 1.040064882400649e-05, "loss": 0.6327, "step": 18221 }, { "epoch": 0.5320136638346326, "grad_norm": 0.6357090960575905, "learning_rate": 1.04e-05, "loss": 0.5905, "step": 18222 }, { "epoch": 0.5320428600624799, "grad_norm": 0.582695723161131, "learning_rate": 1.0399351175993512e-05, "loss": 0.5188, "step": 18223 }, { "epoch": 0.5320720562903273, "grad_norm": 0.684161778045553, "learning_rate": 1.0398702351987024e-05, "loss": 0.6544, "step": 18224 }, { "epoch": 0.5321012525181746, "grad_norm": 0.6660433480587815, "learning_rate": 1.0398053527980536e-05, "loss": 0.6734, "step": 18225 }, { "epoch": 0.532130448746022, "grad_norm": 0.6283229260055819, "learning_rate": 1.0397404703974048e-05, "loss": 0.554, "step": 18226 }, { "epoch": 0.5321596449738694, "grad_norm": 0.647163278556649, "learning_rate": 1.0396755879967559e-05, "loss": 0.6285, "step": 18227 }, { "epoch": 0.5321888412017167, "grad_norm": 0.6352033823289245, "learning_rate": 1.039610705596107e-05, "loss": 0.5905, "step": 18228 }, { "epoch": 0.5322180374295641, "grad_norm": 0.6498430699989322, "learning_rate": 1.0395458231954583e-05, "loss": 0.5934, "step": 18229 }, { "epoch": 0.5322472336574114, "grad_norm": 0.6485685381786411, "learning_rate": 1.0394809407948095e-05, "loss": 0.6344, "step": 18230 }, { "epoch": 0.5322764298852588, "grad_norm": 0.6656207273484984, "learning_rate": 1.0394160583941605e-05, "loss": 0.6682, "step": 18231 }, { "epoch": 0.5323056261131062, "grad_norm": 0.6541724371470222, "learning_rate": 1.0393511759935117e-05, "loss": 0.6207, "step": 18232 }, { "epoch": 0.5323348223409535, "grad_norm": 0.6408790163567709, "learning_rate": 1.039286293592863e-05, "loss": 0.6166, "step": 18233 }, { "epoch": 0.5323640185688009, "grad_norm": 0.6813928286625032, "learning_rate": 1.0392214111922141e-05, "loss": 0.7058, "step": 18234 }, { "epoch": 0.5323932147966483, "grad_norm": 0.6413470450721279, "learning_rate": 1.0391565287915655e-05, "loss": 0.6358, "step": 18235 }, { "epoch": 0.5324224110244956, "grad_norm": 0.6289788498546349, "learning_rate": 1.0390916463909167e-05, "loss": 0.6349, "step": 18236 }, { "epoch": 0.532451607252343, "grad_norm": 0.6504933937332145, "learning_rate": 1.0390267639902677e-05, "loss": 0.5821, "step": 18237 }, { "epoch": 0.5324808034801903, "grad_norm": 0.6123571118521534, "learning_rate": 1.038961881589619e-05, "loss": 0.5882, "step": 18238 }, { "epoch": 0.5325099997080377, "grad_norm": 0.6797106588163386, "learning_rate": 1.0388969991889701e-05, "loss": 0.6415, "step": 18239 }, { "epoch": 0.5325391959358851, "grad_norm": 0.7481992771266188, "learning_rate": 1.0388321167883213e-05, "loss": 0.6998, "step": 18240 }, { "epoch": 0.5325683921637324, "grad_norm": 0.6304785318749814, "learning_rate": 1.0387672343876724e-05, "loss": 0.5525, "step": 18241 }, { "epoch": 0.5325975883915798, "grad_norm": 0.648575666041469, "learning_rate": 1.0387023519870236e-05, "loss": 0.604, "step": 18242 }, { "epoch": 0.5326267846194271, "grad_norm": 0.6354312664295502, "learning_rate": 1.0386374695863748e-05, "loss": 0.6275, "step": 18243 }, { "epoch": 0.5326559808472745, "grad_norm": 0.6543827424949706, "learning_rate": 1.038572587185726e-05, "loss": 0.6948, "step": 18244 }, { "epoch": 0.5326851770751219, "grad_norm": 0.6492627077416765, "learning_rate": 1.0385077047850772e-05, "loss": 0.6375, "step": 18245 }, { "epoch": 0.5327143733029692, "grad_norm": 0.6242728187127068, "learning_rate": 1.0384428223844282e-05, "loss": 0.6314, "step": 18246 }, { "epoch": 0.5327435695308166, "grad_norm": 0.7126411541318409, "learning_rate": 1.0383779399837794e-05, "loss": 0.6025, "step": 18247 }, { "epoch": 0.532772765758664, "grad_norm": 0.6832846853961176, "learning_rate": 1.0383130575831306e-05, "loss": 0.7141, "step": 18248 }, { "epoch": 0.5328019619865113, "grad_norm": 0.564306690561164, "learning_rate": 1.0382481751824818e-05, "loss": 0.5044, "step": 18249 }, { "epoch": 0.5328311582143587, "grad_norm": 0.6607605973439846, "learning_rate": 1.0381832927818329e-05, "loss": 0.663, "step": 18250 }, { "epoch": 0.532860354442206, "grad_norm": 0.680838429633972, "learning_rate": 1.038118410381184e-05, "loss": 0.6436, "step": 18251 }, { "epoch": 0.5328895506700534, "grad_norm": 0.589025736494346, "learning_rate": 1.0380535279805353e-05, "loss": 0.5438, "step": 18252 }, { "epoch": 0.5329187468979008, "grad_norm": 0.6226838822036441, "learning_rate": 1.0379886455798865e-05, "loss": 0.5978, "step": 18253 }, { "epoch": 0.5329479431257481, "grad_norm": 0.6088116017418584, "learning_rate": 1.0379237631792378e-05, "loss": 0.5392, "step": 18254 }, { "epoch": 0.5329771393535955, "grad_norm": 0.6730935118802079, "learning_rate": 1.037858880778589e-05, "loss": 0.653, "step": 18255 }, { "epoch": 0.5330063355814428, "grad_norm": 0.7591699984153354, "learning_rate": 1.03779399837794e-05, "loss": 0.6173, "step": 18256 }, { "epoch": 0.5330355318092902, "grad_norm": 0.6960630928457017, "learning_rate": 1.0377291159772913e-05, "loss": 0.6609, "step": 18257 }, { "epoch": 0.5330647280371376, "grad_norm": 0.6064508541158454, "learning_rate": 1.0376642335766425e-05, "loss": 0.537, "step": 18258 }, { "epoch": 0.5330939242649849, "grad_norm": 0.6758743141204592, "learning_rate": 1.0375993511759937e-05, "loss": 0.6386, "step": 18259 }, { "epoch": 0.5331231204928323, "grad_norm": 0.6431462688890996, "learning_rate": 1.0375344687753447e-05, "loss": 0.6518, "step": 18260 }, { "epoch": 0.5331523167206796, "grad_norm": 0.701071547357067, "learning_rate": 1.037469586374696e-05, "loss": 0.6887, "step": 18261 }, { "epoch": 0.533181512948527, "grad_norm": 0.6726182551838263, "learning_rate": 1.0374047039740471e-05, "loss": 0.667, "step": 18262 }, { "epoch": 0.5332107091763744, "grad_norm": 0.6983297177709851, "learning_rate": 1.0373398215733983e-05, "loss": 0.751, "step": 18263 }, { "epoch": 0.5332399054042217, "grad_norm": 0.5908210913100778, "learning_rate": 1.0372749391727495e-05, "loss": 0.5658, "step": 18264 }, { "epoch": 0.5332691016320691, "grad_norm": 0.6393972346024409, "learning_rate": 1.0372100567721006e-05, "loss": 0.6234, "step": 18265 }, { "epoch": 0.5332982978599164, "grad_norm": 0.6518695246290646, "learning_rate": 1.0371451743714518e-05, "loss": 0.6779, "step": 18266 }, { "epoch": 0.5333274940877638, "grad_norm": 0.708471336379741, "learning_rate": 1.037080291970803e-05, "loss": 0.6436, "step": 18267 }, { "epoch": 0.5333566903156112, "grad_norm": 0.6771255730844152, "learning_rate": 1.0370154095701542e-05, "loss": 0.6811, "step": 18268 }, { "epoch": 0.5333858865434585, "grad_norm": 0.6183470771470242, "learning_rate": 1.0369505271695052e-05, "loss": 0.586, "step": 18269 }, { "epoch": 0.5334150827713059, "grad_norm": 0.5895250356461439, "learning_rate": 1.0368856447688564e-05, "loss": 0.5296, "step": 18270 }, { "epoch": 0.5334442789991533, "grad_norm": 0.6529045945061424, "learning_rate": 1.0368207623682076e-05, "loss": 0.6047, "step": 18271 }, { "epoch": 0.5334734752270007, "grad_norm": 0.6090710451154145, "learning_rate": 1.0367558799675588e-05, "loss": 0.5536, "step": 18272 }, { "epoch": 0.5335026714548481, "grad_norm": 0.682119059810879, "learning_rate": 1.0366909975669102e-05, "loss": 0.6557, "step": 18273 }, { "epoch": 0.5335318676826954, "grad_norm": 0.6684881780462538, "learning_rate": 1.0366261151662614e-05, "loss": 0.6503, "step": 18274 }, { "epoch": 0.5335610639105428, "grad_norm": 0.6452066819674402, "learning_rate": 1.0365612327656124e-05, "loss": 0.598, "step": 18275 }, { "epoch": 0.5335902601383902, "grad_norm": 0.6341992166885685, "learning_rate": 1.0364963503649636e-05, "loss": 0.6476, "step": 18276 }, { "epoch": 0.5336194563662375, "grad_norm": 0.6086486995871325, "learning_rate": 1.0364314679643148e-05, "loss": 0.5741, "step": 18277 }, { "epoch": 0.5336486525940849, "grad_norm": 0.6390574433586239, "learning_rate": 1.036366585563666e-05, "loss": 0.6402, "step": 18278 }, { "epoch": 0.5336778488219323, "grad_norm": 0.625749829067887, "learning_rate": 1.036301703163017e-05, "loss": 0.5717, "step": 18279 }, { "epoch": 0.5337070450497796, "grad_norm": 0.6294092138364304, "learning_rate": 1.0362368207623683e-05, "loss": 0.602, "step": 18280 }, { "epoch": 0.533736241277627, "grad_norm": 0.638121921536683, "learning_rate": 1.0361719383617195e-05, "loss": 0.6162, "step": 18281 }, { "epoch": 0.5337654375054743, "grad_norm": 0.6256054760866623, "learning_rate": 1.0361070559610707e-05, "loss": 0.6225, "step": 18282 }, { "epoch": 0.5337946337333217, "grad_norm": 0.6098423018573395, "learning_rate": 1.0360421735604219e-05, "loss": 0.5915, "step": 18283 }, { "epoch": 0.5338238299611691, "grad_norm": 0.6802181044938461, "learning_rate": 1.0359772911597729e-05, "loss": 0.7222, "step": 18284 }, { "epoch": 0.5338530261890164, "grad_norm": 0.6990411399953546, "learning_rate": 1.0359124087591241e-05, "loss": 0.7147, "step": 18285 }, { "epoch": 0.5338822224168638, "grad_norm": 0.650575840169583, "learning_rate": 1.0358475263584753e-05, "loss": 0.6526, "step": 18286 }, { "epoch": 0.5339114186447111, "grad_norm": 0.6675939933282115, "learning_rate": 1.0357826439578265e-05, "loss": 0.6346, "step": 18287 }, { "epoch": 0.5339406148725585, "grad_norm": 0.6641809336556099, "learning_rate": 1.0357177615571776e-05, "loss": 0.6566, "step": 18288 }, { "epoch": 0.5339698111004059, "grad_norm": 0.6981366851357627, "learning_rate": 1.0356528791565288e-05, "loss": 0.5943, "step": 18289 }, { "epoch": 0.5339990073282532, "grad_norm": 0.6893130603199624, "learning_rate": 1.03558799675588e-05, "loss": 0.676, "step": 18290 }, { "epoch": 0.5340282035561006, "grad_norm": 0.6448659010518938, "learning_rate": 1.0355231143552312e-05, "loss": 0.6579, "step": 18291 }, { "epoch": 0.534057399783948, "grad_norm": 0.5945494316384877, "learning_rate": 1.0354582319545825e-05, "loss": 0.5366, "step": 18292 }, { "epoch": 0.5340865960117953, "grad_norm": 0.5908690396313097, "learning_rate": 1.0353933495539337e-05, "loss": 0.5569, "step": 18293 }, { "epoch": 0.5341157922396427, "grad_norm": 0.6847827976955734, "learning_rate": 1.0353284671532848e-05, "loss": 0.6938, "step": 18294 }, { "epoch": 0.53414498846749, "grad_norm": 0.7095092106780151, "learning_rate": 1.035263584752636e-05, "loss": 0.6603, "step": 18295 }, { "epoch": 0.5341741846953374, "grad_norm": 0.651364754195869, "learning_rate": 1.0351987023519872e-05, "loss": 0.5632, "step": 18296 }, { "epoch": 0.5342033809231848, "grad_norm": 0.6087657575333474, "learning_rate": 1.0351338199513384e-05, "loss": 0.5478, "step": 18297 }, { "epoch": 0.5342325771510321, "grad_norm": 0.6230562691060951, "learning_rate": 1.0350689375506894e-05, "loss": 0.5677, "step": 18298 }, { "epoch": 0.5342617733788795, "grad_norm": 0.6878938726597883, "learning_rate": 1.0350040551500406e-05, "loss": 0.6312, "step": 18299 }, { "epoch": 0.5342909696067268, "grad_norm": 0.6296725404068725, "learning_rate": 1.0349391727493918e-05, "loss": 0.5864, "step": 18300 }, { "epoch": 0.5343201658345742, "grad_norm": 0.668525735188227, "learning_rate": 1.034874290348743e-05, "loss": 0.6186, "step": 18301 }, { "epoch": 0.5343493620624216, "grad_norm": 0.6279198432475133, "learning_rate": 1.0348094079480942e-05, "loss": 0.5601, "step": 18302 }, { "epoch": 0.5343785582902689, "grad_norm": 0.7471406781454948, "learning_rate": 1.0347445255474453e-05, "loss": 0.7434, "step": 18303 }, { "epoch": 0.5344077545181163, "grad_norm": 0.6332051976154361, "learning_rate": 1.0346796431467965e-05, "loss": 0.5563, "step": 18304 }, { "epoch": 0.5344369507459636, "grad_norm": 0.6654365924214714, "learning_rate": 1.0346147607461477e-05, "loss": 0.6387, "step": 18305 }, { "epoch": 0.534466146973811, "grad_norm": 0.6261558345597507, "learning_rate": 1.0345498783454989e-05, "loss": 0.5688, "step": 18306 }, { "epoch": 0.5344953432016584, "grad_norm": 0.616284472050146, "learning_rate": 1.0344849959448499e-05, "loss": 0.5863, "step": 18307 }, { "epoch": 0.5345245394295057, "grad_norm": 0.6460696546448895, "learning_rate": 1.0344201135442011e-05, "loss": 0.6301, "step": 18308 }, { "epoch": 0.5345537356573531, "grad_norm": 0.6499000928894002, "learning_rate": 1.0343552311435523e-05, "loss": 0.5754, "step": 18309 }, { "epoch": 0.5345829318852005, "grad_norm": 0.639311757738176, "learning_rate": 1.0342903487429037e-05, "loss": 0.6219, "step": 18310 }, { "epoch": 0.5346121281130478, "grad_norm": 0.6346705085631986, "learning_rate": 1.0342254663422549e-05, "loss": 0.5585, "step": 18311 }, { "epoch": 0.5346413243408952, "grad_norm": 0.6754132860183352, "learning_rate": 1.034160583941606e-05, "loss": 0.6447, "step": 18312 }, { "epoch": 0.5346705205687425, "grad_norm": 0.6871394377098432, "learning_rate": 1.0340957015409571e-05, "loss": 0.6326, "step": 18313 }, { "epoch": 0.5346997167965899, "grad_norm": 0.6678032318642594, "learning_rate": 1.0340308191403083e-05, "loss": 0.6597, "step": 18314 }, { "epoch": 0.5347289130244373, "grad_norm": 0.6288113717382834, "learning_rate": 1.0339659367396595e-05, "loss": 0.6221, "step": 18315 }, { "epoch": 0.5347581092522846, "grad_norm": 0.6727186039390457, "learning_rate": 1.0339010543390107e-05, "loss": 0.6315, "step": 18316 }, { "epoch": 0.534787305480132, "grad_norm": 0.6669874928388358, "learning_rate": 1.0338361719383618e-05, "loss": 0.6126, "step": 18317 }, { "epoch": 0.5348165017079793, "grad_norm": 0.6043327997579544, "learning_rate": 1.033771289537713e-05, "loss": 0.5651, "step": 18318 }, { "epoch": 0.5348456979358267, "grad_norm": 0.6689778450242038, "learning_rate": 1.0337064071370642e-05, "loss": 0.6478, "step": 18319 }, { "epoch": 0.5348748941636741, "grad_norm": 0.6463537833224197, "learning_rate": 1.0336415247364154e-05, "loss": 0.615, "step": 18320 }, { "epoch": 0.5349040903915214, "grad_norm": 0.6710375708500278, "learning_rate": 1.0335766423357666e-05, "loss": 0.568, "step": 18321 }, { "epoch": 0.5349332866193688, "grad_norm": 0.6691016364085167, "learning_rate": 1.0335117599351176e-05, "loss": 0.6929, "step": 18322 }, { "epoch": 0.5349624828472161, "grad_norm": 0.6853437514485755, "learning_rate": 1.0334468775344688e-05, "loss": 0.589, "step": 18323 }, { "epoch": 0.5349916790750635, "grad_norm": 0.6412942469425287, "learning_rate": 1.03338199513382e-05, "loss": 0.5625, "step": 18324 }, { "epoch": 0.5350208753029109, "grad_norm": 0.6455520369943709, "learning_rate": 1.0333171127331712e-05, "loss": 0.6194, "step": 18325 }, { "epoch": 0.5350500715307582, "grad_norm": 0.672333531145793, "learning_rate": 1.0332522303325222e-05, "loss": 0.6363, "step": 18326 }, { "epoch": 0.5350792677586056, "grad_norm": 0.6477808283854428, "learning_rate": 1.0331873479318734e-05, "loss": 0.6158, "step": 18327 }, { "epoch": 0.535108463986453, "grad_norm": 0.6134740082028514, "learning_rate": 1.0331224655312246e-05, "loss": 0.5487, "step": 18328 }, { "epoch": 0.5351376602143003, "grad_norm": 0.6666088642155891, "learning_rate": 1.033057583130576e-05, "loss": 0.6575, "step": 18329 }, { "epoch": 0.5351668564421477, "grad_norm": 0.6743548207891372, "learning_rate": 1.0329927007299272e-05, "loss": 0.6652, "step": 18330 }, { "epoch": 0.535196052669995, "grad_norm": 0.641222638731846, "learning_rate": 1.0329278183292784e-05, "loss": 0.6208, "step": 18331 }, { "epoch": 0.5352252488978424, "grad_norm": 0.6634574500584631, "learning_rate": 1.0328629359286295e-05, "loss": 0.6395, "step": 18332 }, { "epoch": 0.5352544451256898, "grad_norm": 0.6096190131904174, "learning_rate": 1.0327980535279807e-05, "loss": 0.5835, "step": 18333 }, { "epoch": 0.5352836413535371, "grad_norm": 0.6313204836361159, "learning_rate": 1.0327331711273319e-05, "loss": 0.5864, "step": 18334 }, { "epoch": 0.5353128375813845, "grad_norm": 0.6132012150440294, "learning_rate": 1.032668288726683e-05, "loss": 0.5645, "step": 18335 }, { "epoch": 0.5353420338092318, "grad_norm": 0.6645202832810243, "learning_rate": 1.0326034063260341e-05, "loss": 0.6571, "step": 18336 }, { "epoch": 0.5353712300370792, "grad_norm": 0.5804871926972679, "learning_rate": 1.0325385239253853e-05, "loss": 0.5079, "step": 18337 }, { "epoch": 0.5354004262649266, "grad_norm": 0.6242155296125874, "learning_rate": 1.0324736415247365e-05, "loss": 0.5974, "step": 18338 }, { "epoch": 0.5354296224927739, "grad_norm": 0.6547539571127384, "learning_rate": 1.0324087591240877e-05, "loss": 0.6122, "step": 18339 }, { "epoch": 0.5354588187206213, "grad_norm": 0.6651730025757917, "learning_rate": 1.0323438767234389e-05, "loss": 0.6327, "step": 18340 }, { "epoch": 0.5354880149484686, "grad_norm": 0.6180986608456642, "learning_rate": 1.03227899432279e-05, "loss": 0.5501, "step": 18341 }, { "epoch": 0.535517211176316, "grad_norm": 0.5810578407364486, "learning_rate": 1.0322141119221411e-05, "loss": 0.5587, "step": 18342 }, { "epoch": 0.5355464074041634, "grad_norm": 0.6280767599122361, "learning_rate": 1.0321492295214923e-05, "loss": 0.5864, "step": 18343 }, { "epoch": 0.5355756036320107, "grad_norm": 0.6424669302343148, "learning_rate": 1.0320843471208436e-05, "loss": 0.6261, "step": 18344 }, { "epoch": 0.5356047998598581, "grad_norm": 0.6373173091110613, "learning_rate": 1.0320194647201946e-05, "loss": 0.6195, "step": 18345 }, { "epoch": 0.5356339960877055, "grad_norm": 0.6304224920403378, "learning_rate": 1.0319545823195458e-05, "loss": 0.6281, "step": 18346 }, { "epoch": 0.5356631923155528, "grad_norm": 0.6588621600013028, "learning_rate": 1.031889699918897e-05, "loss": 0.6039, "step": 18347 }, { "epoch": 0.5356923885434002, "grad_norm": 0.7039618283476331, "learning_rate": 1.0318248175182484e-05, "loss": 0.6553, "step": 18348 }, { "epoch": 0.5357215847712475, "grad_norm": 0.6059692690259175, "learning_rate": 1.0317599351175996e-05, "loss": 0.5542, "step": 18349 }, { "epoch": 0.5357507809990949, "grad_norm": 0.6787761446894961, "learning_rate": 1.0316950527169508e-05, "loss": 0.6637, "step": 18350 }, { "epoch": 0.5357799772269423, "grad_norm": 0.626667849011721, "learning_rate": 1.0316301703163018e-05, "loss": 0.6092, "step": 18351 }, { "epoch": 0.5358091734547896, "grad_norm": 0.6101049795924194, "learning_rate": 1.031565287915653e-05, "loss": 0.5248, "step": 18352 }, { "epoch": 0.535838369682637, "grad_norm": 0.6417378076579301, "learning_rate": 1.0315004055150042e-05, "loss": 0.6033, "step": 18353 }, { "epoch": 0.5358675659104843, "grad_norm": 0.651807154123166, "learning_rate": 1.0314355231143554e-05, "loss": 0.6543, "step": 18354 }, { "epoch": 0.5358967621383317, "grad_norm": 0.6553917920344772, "learning_rate": 1.0313706407137064e-05, "loss": 0.6369, "step": 18355 }, { "epoch": 0.5359259583661791, "grad_norm": 0.7346960333456392, "learning_rate": 1.0313057583130576e-05, "loss": 0.7158, "step": 18356 }, { "epoch": 0.5359551545940264, "grad_norm": 0.6211425186165371, "learning_rate": 1.0312408759124088e-05, "loss": 0.5981, "step": 18357 }, { "epoch": 0.5359843508218738, "grad_norm": 0.6412584258097322, "learning_rate": 1.03117599351176e-05, "loss": 0.5713, "step": 18358 }, { "epoch": 0.5360135470497212, "grad_norm": 0.6376200896228505, "learning_rate": 1.0311111111111113e-05, "loss": 0.5828, "step": 18359 }, { "epoch": 0.5360427432775685, "grad_norm": 0.6701619089235106, "learning_rate": 1.0310462287104623e-05, "loss": 0.6629, "step": 18360 }, { "epoch": 0.5360719395054159, "grad_norm": 0.6428560263317984, "learning_rate": 1.0309813463098135e-05, "loss": 0.6781, "step": 18361 }, { "epoch": 0.5361011357332632, "grad_norm": 0.6523215580754254, "learning_rate": 1.0309164639091647e-05, "loss": 0.6035, "step": 18362 }, { "epoch": 0.5361303319611106, "grad_norm": 0.6397116444708216, "learning_rate": 1.0308515815085159e-05, "loss": 0.6067, "step": 18363 }, { "epoch": 0.536159528188958, "grad_norm": 0.6345458015679076, "learning_rate": 1.030786699107867e-05, "loss": 0.5926, "step": 18364 }, { "epoch": 0.5361887244168053, "grad_norm": 0.744000645206484, "learning_rate": 1.0307218167072181e-05, "loss": 0.6874, "step": 18365 }, { "epoch": 0.5362179206446527, "grad_norm": 0.6419378906936153, "learning_rate": 1.0306569343065693e-05, "loss": 0.6115, "step": 18366 }, { "epoch": 0.5362471168725, "grad_norm": 0.6693329015291962, "learning_rate": 1.0305920519059207e-05, "loss": 0.6367, "step": 18367 }, { "epoch": 0.5362763131003474, "grad_norm": 0.6769407588953386, "learning_rate": 1.0305271695052719e-05, "loss": 0.6565, "step": 18368 }, { "epoch": 0.5363055093281948, "grad_norm": 0.6271767714487022, "learning_rate": 1.0304622871046231e-05, "loss": 0.6198, "step": 18369 }, { "epoch": 0.5363347055560421, "grad_norm": 0.6463597409146977, "learning_rate": 1.0303974047039741e-05, "loss": 0.5908, "step": 18370 }, { "epoch": 0.5363639017838895, "grad_norm": 0.625436810276908, "learning_rate": 1.0303325223033253e-05, "loss": 0.5536, "step": 18371 }, { "epoch": 0.5363930980117368, "grad_norm": 0.6463878010042771, "learning_rate": 1.0302676399026765e-05, "loss": 0.6205, "step": 18372 }, { "epoch": 0.5364222942395842, "grad_norm": 0.6841798491591381, "learning_rate": 1.0302027575020278e-05, "loss": 0.677, "step": 18373 }, { "epoch": 0.5364514904674316, "grad_norm": 0.6767802343800788, "learning_rate": 1.0301378751013788e-05, "loss": 0.6375, "step": 18374 }, { "epoch": 0.5364806866952789, "grad_norm": 0.6501142661859469, "learning_rate": 1.03007299270073e-05, "loss": 0.6098, "step": 18375 }, { "epoch": 0.5365098829231263, "grad_norm": 0.6173577671815323, "learning_rate": 1.0300081103000812e-05, "loss": 0.551, "step": 18376 }, { "epoch": 0.5365390791509737, "grad_norm": 0.6491762676075804, "learning_rate": 1.0299432278994324e-05, "loss": 0.6263, "step": 18377 }, { "epoch": 0.536568275378821, "grad_norm": 0.7724227540633661, "learning_rate": 1.0298783454987836e-05, "loss": 0.5528, "step": 18378 }, { "epoch": 0.5365974716066684, "grad_norm": 0.6782010305830367, "learning_rate": 1.0298134630981346e-05, "loss": 0.6621, "step": 18379 }, { "epoch": 0.5366266678345157, "grad_norm": 0.6678456881504798, "learning_rate": 1.0297485806974858e-05, "loss": 0.6741, "step": 18380 }, { "epoch": 0.5366558640623631, "grad_norm": 0.6040737196728253, "learning_rate": 1.029683698296837e-05, "loss": 0.4951, "step": 18381 }, { "epoch": 0.5366850602902105, "grad_norm": 0.6627005619283419, "learning_rate": 1.0296188158961882e-05, "loss": 0.6484, "step": 18382 }, { "epoch": 0.5367142565180578, "grad_norm": 0.6389102307331093, "learning_rate": 1.0295539334955393e-05, "loss": 0.6305, "step": 18383 }, { "epoch": 0.5367434527459052, "grad_norm": 0.6876417872974881, "learning_rate": 1.0294890510948905e-05, "loss": 0.6253, "step": 18384 }, { "epoch": 0.5367726489737525, "grad_norm": 0.6199562779005304, "learning_rate": 1.0294241686942417e-05, "loss": 0.6243, "step": 18385 }, { "epoch": 0.5368018452015999, "grad_norm": 0.6110879599109333, "learning_rate": 1.029359286293593e-05, "loss": 0.5758, "step": 18386 }, { "epoch": 0.5368310414294473, "grad_norm": 0.6708709278604523, "learning_rate": 1.0292944038929443e-05, "loss": 0.6467, "step": 18387 }, { "epoch": 0.5368602376572946, "grad_norm": 0.6807420523647314, "learning_rate": 1.0292295214922955e-05, "loss": 0.6459, "step": 18388 }, { "epoch": 0.536889433885142, "grad_norm": 0.6811275987287624, "learning_rate": 1.0291646390916465e-05, "loss": 0.6685, "step": 18389 }, { "epoch": 0.5369186301129893, "grad_norm": 0.6137095157507071, "learning_rate": 1.0290997566909977e-05, "loss": 0.5806, "step": 18390 }, { "epoch": 0.5369478263408367, "grad_norm": 0.6131921606476882, "learning_rate": 1.0290348742903489e-05, "loss": 0.5815, "step": 18391 }, { "epoch": 0.5369770225686842, "grad_norm": 0.6381945012647096, "learning_rate": 1.0289699918897001e-05, "loss": 0.6026, "step": 18392 }, { "epoch": 0.5370062187965315, "grad_norm": 0.6199659803580364, "learning_rate": 1.0289051094890511e-05, "loss": 0.5628, "step": 18393 }, { "epoch": 0.5370354150243789, "grad_norm": 0.6742130915291258, "learning_rate": 1.0288402270884023e-05, "loss": 0.6759, "step": 18394 }, { "epoch": 0.5370646112522263, "grad_norm": 0.6602947734254242, "learning_rate": 1.0287753446877535e-05, "loss": 0.6042, "step": 18395 }, { "epoch": 0.5370938074800736, "grad_norm": 0.6654090734643501, "learning_rate": 1.0287104622871047e-05, "loss": 0.6489, "step": 18396 }, { "epoch": 0.537123003707921, "grad_norm": 0.6238946727751166, "learning_rate": 1.028645579886456e-05, "loss": 0.5891, "step": 18397 }, { "epoch": 0.5371521999357683, "grad_norm": 0.683748409301273, "learning_rate": 1.028580697485807e-05, "loss": 0.6837, "step": 18398 }, { "epoch": 0.5371813961636157, "grad_norm": 0.6147533079025961, "learning_rate": 1.0285158150851582e-05, "loss": 0.5534, "step": 18399 }, { "epoch": 0.5372105923914631, "grad_norm": 0.6055484868491166, "learning_rate": 1.0284509326845094e-05, "loss": 0.5414, "step": 18400 }, { "epoch": 0.5372397886193104, "grad_norm": 0.624075830364718, "learning_rate": 1.0283860502838606e-05, "loss": 0.6089, "step": 18401 }, { "epoch": 0.5372689848471578, "grad_norm": 0.6756598969054434, "learning_rate": 1.0283211678832116e-05, "loss": 0.6482, "step": 18402 }, { "epoch": 0.5372981810750052, "grad_norm": 0.6640903437881177, "learning_rate": 1.0282562854825628e-05, "loss": 0.6562, "step": 18403 }, { "epoch": 0.5373273773028525, "grad_norm": 0.649301696644413, "learning_rate": 1.028191403081914e-05, "loss": 0.6672, "step": 18404 }, { "epoch": 0.5373565735306999, "grad_norm": 0.6550789106198525, "learning_rate": 1.0281265206812654e-05, "loss": 0.6137, "step": 18405 }, { "epoch": 0.5373857697585472, "grad_norm": 0.6633433874864523, "learning_rate": 1.0280616382806166e-05, "loss": 0.6487, "step": 18406 }, { "epoch": 0.5374149659863946, "grad_norm": 0.7107380318066301, "learning_rate": 1.0279967558799678e-05, "loss": 0.687, "step": 18407 }, { "epoch": 0.537444162214242, "grad_norm": 0.657460970119144, "learning_rate": 1.0279318734793188e-05, "loss": 0.6277, "step": 18408 }, { "epoch": 0.5374733584420893, "grad_norm": 0.6618754426686861, "learning_rate": 1.02786699107867e-05, "loss": 0.6526, "step": 18409 }, { "epoch": 0.5375025546699367, "grad_norm": 0.6186579628213793, "learning_rate": 1.0278021086780212e-05, "loss": 0.5695, "step": 18410 }, { "epoch": 0.537531750897784, "grad_norm": 0.6247118375414767, "learning_rate": 1.0277372262773724e-05, "loss": 0.5963, "step": 18411 }, { "epoch": 0.5375609471256314, "grad_norm": 0.6412839500553622, "learning_rate": 1.0276723438767235e-05, "loss": 0.6096, "step": 18412 }, { "epoch": 0.5375901433534788, "grad_norm": 0.663491599256584, "learning_rate": 1.0276074614760747e-05, "loss": 0.6375, "step": 18413 }, { "epoch": 0.5376193395813261, "grad_norm": 0.631861663800218, "learning_rate": 1.0275425790754259e-05, "loss": 0.6238, "step": 18414 }, { "epoch": 0.5376485358091735, "grad_norm": 0.6204054092738563, "learning_rate": 1.027477696674777e-05, "loss": 0.5874, "step": 18415 }, { "epoch": 0.5376777320370209, "grad_norm": 0.6130190647604643, "learning_rate": 1.0274128142741283e-05, "loss": 0.5828, "step": 18416 }, { "epoch": 0.5377069282648682, "grad_norm": 0.6324439083786969, "learning_rate": 1.0273479318734793e-05, "loss": 0.5815, "step": 18417 }, { "epoch": 0.5377361244927156, "grad_norm": 0.5936714963985387, "learning_rate": 1.0272830494728305e-05, "loss": 0.5898, "step": 18418 }, { "epoch": 0.5377653207205629, "grad_norm": 0.5710419623316182, "learning_rate": 1.0272181670721817e-05, "loss": 0.502, "step": 18419 }, { "epoch": 0.5377945169484103, "grad_norm": 0.6660982647008544, "learning_rate": 1.027153284671533e-05, "loss": 0.6102, "step": 18420 }, { "epoch": 0.5378237131762577, "grad_norm": 0.6220462324540961, "learning_rate": 1.027088402270884e-05, "loss": 0.6154, "step": 18421 }, { "epoch": 0.537852909404105, "grad_norm": 0.6698236065185521, "learning_rate": 1.0270235198702352e-05, "loss": 0.6538, "step": 18422 }, { "epoch": 0.5378821056319524, "grad_norm": 0.7161281397919056, "learning_rate": 1.0269586374695864e-05, "loss": 0.7011, "step": 18423 }, { "epoch": 0.5379113018597997, "grad_norm": 0.6676520925137689, "learning_rate": 1.0268937550689377e-05, "loss": 0.6226, "step": 18424 }, { "epoch": 0.5379404980876471, "grad_norm": 0.6879975576947512, "learning_rate": 1.026828872668289e-05, "loss": 0.6599, "step": 18425 }, { "epoch": 0.5379696943154945, "grad_norm": 0.6597311271918743, "learning_rate": 1.0267639902676401e-05, "loss": 0.6565, "step": 18426 }, { "epoch": 0.5379988905433418, "grad_norm": 0.6761878284999698, "learning_rate": 1.0266991078669912e-05, "loss": 0.6833, "step": 18427 }, { "epoch": 0.5380280867711892, "grad_norm": 0.6338779007347415, "learning_rate": 1.0266342254663424e-05, "loss": 0.5803, "step": 18428 }, { "epoch": 0.5380572829990365, "grad_norm": 0.6697805592916277, "learning_rate": 1.0265693430656936e-05, "loss": 0.646, "step": 18429 }, { "epoch": 0.5380864792268839, "grad_norm": 0.6123705428813164, "learning_rate": 1.0265044606650448e-05, "loss": 0.5958, "step": 18430 }, { "epoch": 0.5381156754547313, "grad_norm": 0.6390594639805645, "learning_rate": 1.0264395782643958e-05, "loss": 0.5592, "step": 18431 }, { "epoch": 0.5381448716825786, "grad_norm": 0.6172331707226242, "learning_rate": 1.026374695863747e-05, "loss": 0.5618, "step": 18432 }, { "epoch": 0.538174067910426, "grad_norm": 0.624896291561054, "learning_rate": 1.0263098134630982e-05, "loss": 0.587, "step": 18433 }, { "epoch": 0.5382032641382734, "grad_norm": 0.6748866453513712, "learning_rate": 1.0262449310624494e-05, "loss": 0.6344, "step": 18434 }, { "epoch": 0.5382324603661207, "grad_norm": 0.7028143246706636, "learning_rate": 1.0261800486618006e-05, "loss": 0.653, "step": 18435 }, { "epoch": 0.5382616565939681, "grad_norm": 0.6565356825394807, "learning_rate": 1.0261151662611517e-05, "loss": 0.5989, "step": 18436 }, { "epoch": 0.5382908528218154, "grad_norm": 0.6637285724600637, "learning_rate": 1.0260502838605029e-05, "loss": 0.6733, "step": 18437 }, { "epoch": 0.5383200490496628, "grad_norm": 0.6741002441430156, "learning_rate": 1.025985401459854e-05, "loss": 0.6489, "step": 18438 }, { "epoch": 0.5383492452775102, "grad_norm": 0.654568398459966, "learning_rate": 1.0259205190592053e-05, "loss": 0.6051, "step": 18439 }, { "epoch": 0.5383784415053575, "grad_norm": 0.6544046371429565, "learning_rate": 1.0258556366585563e-05, "loss": 0.6518, "step": 18440 }, { "epoch": 0.5384076377332049, "grad_norm": 0.6179029535613598, "learning_rate": 1.0257907542579075e-05, "loss": 0.5912, "step": 18441 }, { "epoch": 0.5384368339610522, "grad_norm": 0.6132359309038482, "learning_rate": 1.0257258718572587e-05, "loss": 0.5762, "step": 18442 }, { "epoch": 0.5384660301888996, "grad_norm": 0.654656930091077, "learning_rate": 1.02566098945661e-05, "loss": 0.6177, "step": 18443 }, { "epoch": 0.538495226416747, "grad_norm": 0.6248956446884072, "learning_rate": 1.0255961070559613e-05, "loss": 0.5497, "step": 18444 }, { "epoch": 0.5385244226445943, "grad_norm": 0.6571450027088368, "learning_rate": 1.0255312246553125e-05, "loss": 0.6199, "step": 18445 }, { "epoch": 0.5385536188724417, "grad_norm": 0.6182132505999347, "learning_rate": 1.0254663422546635e-05, "loss": 0.5985, "step": 18446 }, { "epoch": 0.538582815100289, "grad_norm": 0.6871535276255616, "learning_rate": 1.0254014598540147e-05, "loss": 0.6459, "step": 18447 }, { "epoch": 0.5386120113281364, "grad_norm": 0.5766537832440899, "learning_rate": 1.025336577453366e-05, "loss": 0.5092, "step": 18448 }, { "epoch": 0.5386412075559838, "grad_norm": 0.6302749599447046, "learning_rate": 1.0252716950527171e-05, "loss": 0.5697, "step": 18449 }, { "epoch": 0.5386704037838311, "grad_norm": 0.7011403968599422, "learning_rate": 1.0252068126520682e-05, "loss": 0.6274, "step": 18450 }, { "epoch": 0.5386996000116785, "grad_norm": 0.6169778702709866, "learning_rate": 1.0251419302514194e-05, "loss": 0.5913, "step": 18451 }, { "epoch": 0.5387287962395259, "grad_norm": 0.6955992021434174, "learning_rate": 1.0250770478507706e-05, "loss": 0.7169, "step": 18452 }, { "epoch": 0.5387579924673732, "grad_norm": 0.5970234433596321, "learning_rate": 1.0250121654501218e-05, "loss": 0.5254, "step": 18453 }, { "epoch": 0.5387871886952206, "grad_norm": 0.6396251137340299, "learning_rate": 1.024947283049473e-05, "loss": 0.588, "step": 18454 }, { "epoch": 0.5388163849230679, "grad_norm": 0.6222183118805477, "learning_rate": 1.024882400648824e-05, "loss": 0.577, "step": 18455 }, { "epoch": 0.5388455811509153, "grad_norm": 0.6775054092452046, "learning_rate": 1.0248175182481752e-05, "loss": 0.7181, "step": 18456 }, { "epoch": 0.5388747773787627, "grad_norm": 0.6203373960954525, "learning_rate": 1.0247526358475264e-05, "loss": 0.5689, "step": 18457 }, { "epoch": 0.53890397360661, "grad_norm": 0.6020994307817992, "learning_rate": 1.0246877534468776e-05, "loss": 0.5456, "step": 18458 }, { "epoch": 0.5389331698344574, "grad_norm": 0.6397345806077891, "learning_rate": 1.0246228710462286e-05, "loss": 0.5311, "step": 18459 }, { "epoch": 0.5389623660623047, "grad_norm": 0.6215185425411176, "learning_rate": 1.0245579886455798e-05, "loss": 0.5916, "step": 18460 }, { "epoch": 0.5389915622901521, "grad_norm": 0.6249416017702447, "learning_rate": 1.0244931062449312e-05, "loss": 0.5544, "step": 18461 }, { "epoch": 0.5390207585179995, "grad_norm": 0.6250739572736665, "learning_rate": 1.0244282238442824e-05, "loss": 0.5581, "step": 18462 }, { "epoch": 0.5390499547458468, "grad_norm": 0.6636247662488167, "learning_rate": 1.0243633414436336e-05, "loss": 0.6464, "step": 18463 }, { "epoch": 0.5390791509736942, "grad_norm": 0.6508500072768846, "learning_rate": 1.0242984590429848e-05, "loss": 0.6021, "step": 18464 }, { "epoch": 0.5391083472015415, "grad_norm": 0.64779330219234, "learning_rate": 1.0242335766423359e-05, "loss": 0.6124, "step": 18465 }, { "epoch": 0.5391375434293889, "grad_norm": 0.6342124573900483, "learning_rate": 1.024168694241687e-05, "loss": 0.5879, "step": 18466 }, { "epoch": 0.5391667396572363, "grad_norm": 0.6684374106196425, "learning_rate": 1.0241038118410383e-05, "loss": 0.6807, "step": 18467 }, { "epoch": 0.5391959358850836, "grad_norm": 0.8132834693195884, "learning_rate": 1.0240389294403895e-05, "loss": 0.6375, "step": 18468 }, { "epoch": 0.539225132112931, "grad_norm": 0.6630087934718049, "learning_rate": 1.0239740470397405e-05, "loss": 0.6021, "step": 18469 }, { "epoch": 0.5392543283407784, "grad_norm": 0.6280485831780983, "learning_rate": 1.0239091646390917e-05, "loss": 0.6238, "step": 18470 }, { "epoch": 0.5392835245686257, "grad_norm": 0.6577837742886519, "learning_rate": 1.0238442822384429e-05, "loss": 0.6587, "step": 18471 }, { "epoch": 0.5393127207964731, "grad_norm": 0.6769201410646211, "learning_rate": 1.0237793998377941e-05, "loss": 0.6901, "step": 18472 }, { "epoch": 0.5393419170243204, "grad_norm": 0.636996904511392, "learning_rate": 1.0237145174371451e-05, "loss": 0.6202, "step": 18473 }, { "epoch": 0.5393711132521678, "grad_norm": 0.641192182217705, "learning_rate": 1.0236496350364963e-05, "loss": 0.6238, "step": 18474 }, { "epoch": 0.5394003094800152, "grad_norm": 0.7005880047868717, "learning_rate": 1.0235847526358475e-05, "loss": 0.6268, "step": 18475 }, { "epoch": 0.5394295057078625, "grad_norm": 0.6562561073963544, "learning_rate": 1.0235198702351988e-05, "loss": 0.6366, "step": 18476 }, { "epoch": 0.5394587019357099, "grad_norm": 0.6843354662284602, "learning_rate": 1.02345498783455e-05, "loss": 0.606, "step": 18477 }, { "epoch": 0.5394878981635572, "grad_norm": 0.7222242330837076, "learning_rate": 1.023390105433901e-05, "loss": 0.735, "step": 18478 }, { "epoch": 0.5395170943914046, "grad_norm": 0.6699378564596289, "learning_rate": 1.0233252230332522e-05, "loss": 0.6556, "step": 18479 }, { "epoch": 0.539546290619252, "grad_norm": 0.6634236029113285, "learning_rate": 1.0232603406326036e-05, "loss": 0.6714, "step": 18480 }, { "epoch": 0.5395754868470993, "grad_norm": 0.6326960376953003, "learning_rate": 1.0231954582319548e-05, "loss": 0.5916, "step": 18481 }, { "epoch": 0.5396046830749467, "grad_norm": 0.6726595440966252, "learning_rate": 1.023130575831306e-05, "loss": 0.6548, "step": 18482 }, { "epoch": 0.539633879302794, "grad_norm": 0.6062722653206253, "learning_rate": 1.0230656934306572e-05, "loss": 0.5637, "step": 18483 }, { "epoch": 0.5396630755306414, "grad_norm": 0.6679690507074986, "learning_rate": 1.0230008110300082e-05, "loss": 0.6352, "step": 18484 }, { "epoch": 0.5396922717584888, "grad_norm": 0.6213139863656282, "learning_rate": 1.0229359286293594e-05, "loss": 0.6182, "step": 18485 }, { "epoch": 0.5397214679863361, "grad_norm": 0.6460710834765923, "learning_rate": 1.0228710462287106e-05, "loss": 0.6313, "step": 18486 }, { "epoch": 0.5397506642141835, "grad_norm": 0.6071162716359437, "learning_rate": 1.0228061638280618e-05, "loss": 0.5773, "step": 18487 }, { "epoch": 0.5397798604420309, "grad_norm": 0.5982672206271397, "learning_rate": 1.0227412814274128e-05, "loss": 0.547, "step": 18488 }, { "epoch": 0.5398090566698782, "grad_norm": 0.6034555264769828, "learning_rate": 1.022676399026764e-05, "loss": 0.533, "step": 18489 }, { "epoch": 0.5398382528977256, "grad_norm": 0.6420429965951902, "learning_rate": 1.0226115166261153e-05, "loss": 0.6039, "step": 18490 }, { "epoch": 0.5398674491255729, "grad_norm": 0.6142741568078427, "learning_rate": 1.0225466342254665e-05, "loss": 0.5868, "step": 18491 }, { "epoch": 0.5398966453534203, "grad_norm": 0.6386687228281667, "learning_rate": 1.0224817518248175e-05, "loss": 0.6063, "step": 18492 }, { "epoch": 0.5399258415812677, "grad_norm": 0.6449531881217155, "learning_rate": 1.0224168694241687e-05, "loss": 0.6437, "step": 18493 }, { "epoch": 0.539955037809115, "grad_norm": 0.6079960011174111, "learning_rate": 1.0223519870235199e-05, "loss": 0.5553, "step": 18494 }, { "epoch": 0.5399842340369624, "grad_norm": 0.6295790810106564, "learning_rate": 1.0222871046228711e-05, "loss": 0.5762, "step": 18495 }, { "epoch": 0.5400134302648097, "grad_norm": 0.6282418657468682, "learning_rate": 1.0222222222222223e-05, "loss": 0.5874, "step": 18496 }, { "epoch": 0.5400426264926571, "grad_norm": 0.6351441396725677, "learning_rate": 1.0221573398215733e-05, "loss": 0.6254, "step": 18497 }, { "epoch": 0.5400718227205045, "grad_norm": 0.626774927694167, "learning_rate": 1.0220924574209245e-05, "loss": 0.6033, "step": 18498 }, { "epoch": 0.5401010189483518, "grad_norm": 0.5974088902492428, "learning_rate": 1.0220275750202759e-05, "loss": 0.5422, "step": 18499 }, { "epoch": 0.5401302151761992, "grad_norm": 0.5922604474866277, "learning_rate": 1.0219626926196271e-05, "loss": 0.49, "step": 18500 }, { "epoch": 0.5401594114040466, "grad_norm": 0.6538516113680598, "learning_rate": 1.0218978102189783e-05, "loss": 0.6066, "step": 18501 }, { "epoch": 0.5401886076318939, "grad_norm": 0.681107354510311, "learning_rate": 1.0218329278183295e-05, "loss": 0.6592, "step": 18502 }, { "epoch": 0.5402178038597413, "grad_norm": 0.6126665771097027, "learning_rate": 1.0217680454176805e-05, "loss": 0.5716, "step": 18503 }, { "epoch": 0.5402470000875886, "grad_norm": 0.6352546590649932, "learning_rate": 1.0217031630170318e-05, "loss": 0.5717, "step": 18504 }, { "epoch": 0.540276196315436, "grad_norm": 0.6484131470353339, "learning_rate": 1.021638280616383e-05, "loss": 0.6125, "step": 18505 }, { "epoch": 0.5403053925432834, "grad_norm": 0.6586036236343796, "learning_rate": 1.0215733982157342e-05, "loss": 0.657, "step": 18506 }, { "epoch": 0.5403345887711307, "grad_norm": 0.6306443412478331, "learning_rate": 1.0215085158150852e-05, "loss": 0.5983, "step": 18507 }, { "epoch": 0.5403637849989781, "grad_norm": 0.7135125196230012, "learning_rate": 1.0214436334144364e-05, "loss": 0.6786, "step": 18508 }, { "epoch": 0.5403929812268254, "grad_norm": 0.6218137181169342, "learning_rate": 1.0213787510137876e-05, "loss": 0.555, "step": 18509 }, { "epoch": 0.5404221774546728, "grad_norm": 0.6235781135360993, "learning_rate": 1.0213138686131388e-05, "loss": 0.6036, "step": 18510 }, { "epoch": 0.5404513736825202, "grad_norm": 0.5842975189880195, "learning_rate": 1.0212489862124898e-05, "loss": 0.5126, "step": 18511 }, { "epoch": 0.5404805699103675, "grad_norm": 0.736139312897893, "learning_rate": 1.021184103811841e-05, "loss": 0.6341, "step": 18512 }, { "epoch": 0.540509766138215, "grad_norm": 0.6246160676105912, "learning_rate": 1.0211192214111922e-05, "loss": 0.5725, "step": 18513 }, { "epoch": 0.5405389623660624, "grad_norm": 0.5996764302933952, "learning_rate": 1.0210543390105434e-05, "loss": 0.5267, "step": 18514 }, { "epoch": 0.5405681585939097, "grad_norm": 0.6471324881670776, "learning_rate": 1.0209894566098946e-05, "loss": 0.5335, "step": 18515 }, { "epoch": 0.5405973548217571, "grad_norm": 0.622445179352281, "learning_rate": 1.0209245742092457e-05, "loss": 0.5498, "step": 18516 }, { "epoch": 0.5406265510496044, "grad_norm": 0.6542935885368804, "learning_rate": 1.0208596918085969e-05, "loss": 0.6173, "step": 18517 }, { "epoch": 0.5406557472774518, "grad_norm": 0.8113817974973844, "learning_rate": 1.0207948094079482e-05, "loss": 0.6875, "step": 18518 }, { "epoch": 0.5406849435052992, "grad_norm": 0.6905284103383432, "learning_rate": 1.0207299270072995e-05, "loss": 0.6549, "step": 18519 }, { "epoch": 0.5407141397331465, "grad_norm": 0.6576840744538898, "learning_rate": 1.0206650446066507e-05, "loss": 0.6503, "step": 18520 }, { "epoch": 0.5407433359609939, "grad_norm": 0.622975848142361, "learning_rate": 1.0206001622060019e-05, "loss": 0.6105, "step": 18521 }, { "epoch": 0.5407725321888412, "grad_norm": 0.6448420396604408, "learning_rate": 1.0205352798053529e-05, "loss": 0.5984, "step": 18522 }, { "epoch": 0.5408017284166886, "grad_norm": 0.640809236903388, "learning_rate": 1.0204703974047041e-05, "loss": 0.5848, "step": 18523 }, { "epoch": 0.540830924644536, "grad_norm": 0.6395802814962361, "learning_rate": 1.0204055150040553e-05, "loss": 0.6021, "step": 18524 }, { "epoch": 0.5408601208723833, "grad_norm": 0.6662475540446758, "learning_rate": 1.0203406326034065e-05, "loss": 0.6804, "step": 18525 }, { "epoch": 0.5408893171002307, "grad_norm": 0.6391546813077907, "learning_rate": 1.0202757502027575e-05, "loss": 0.6016, "step": 18526 }, { "epoch": 0.540918513328078, "grad_norm": 0.7051077399899233, "learning_rate": 1.0202108678021087e-05, "loss": 0.7023, "step": 18527 }, { "epoch": 0.5409477095559254, "grad_norm": 0.6671243820111528, "learning_rate": 1.02014598540146e-05, "loss": 0.6401, "step": 18528 }, { "epoch": 0.5409769057837728, "grad_norm": 0.6979934269885425, "learning_rate": 1.0200811030008111e-05, "loss": 0.6778, "step": 18529 }, { "epoch": 0.5410061020116201, "grad_norm": 0.654952370254849, "learning_rate": 1.0200162206001622e-05, "loss": 0.6291, "step": 18530 }, { "epoch": 0.5410352982394675, "grad_norm": 0.6295501329642791, "learning_rate": 1.0199513381995134e-05, "loss": 0.5611, "step": 18531 }, { "epoch": 0.5410644944673149, "grad_norm": 0.677693396686088, "learning_rate": 1.0198864557988646e-05, "loss": 0.6424, "step": 18532 }, { "epoch": 0.5410936906951622, "grad_norm": 0.5731272044076854, "learning_rate": 1.0198215733982158e-05, "loss": 0.4978, "step": 18533 }, { "epoch": 0.5411228869230096, "grad_norm": 0.6550550628175572, "learning_rate": 1.019756690997567e-05, "loss": 0.6351, "step": 18534 }, { "epoch": 0.5411520831508569, "grad_norm": 0.5733640800354849, "learning_rate": 1.019691808596918e-05, "loss": 0.5176, "step": 18535 }, { "epoch": 0.5411812793787043, "grad_norm": 0.6504239136779143, "learning_rate": 1.0196269261962692e-05, "loss": 0.6095, "step": 18536 }, { "epoch": 0.5412104756065517, "grad_norm": 0.6236766575116541, "learning_rate": 1.0195620437956206e-05, "loss": 0.6073, "step": 18537 }, { "epoch": 0.541239671834399, "grad_norm": 0.6842063311684854, "learning_rate": 1.0194971613949718e-05, "loss": 0.6788, "step": 18538 }, { "epoch": 0.5412688680622464, "grad_norm": 0.5767873557239983, "learning_rate": 1.019432278994323e-05, "loss": 0.5435, "step": 18539 }, { "epoch": 0.5412980642900938, "grad_norm": 0.6353076707756142, "learning_rate": 1.0193673965936742e-05, "loss": 0.6159, "step": 18540 }, { "epoch": 0.5413272605179411, "grad_norm": 0.6230779776966061, "learning_rate": 1.0193025141930252e-05, "loss": 0.5738, "step": 18541 }, { "epoch": 0.5413564567457885, "grad_norm": 0.6949816253661092, "learning_rate": 1.0192376317923764e-05, "loss": 0.6283, "step": 18542 }, { "epoch": 0.5413856529736358, "grad_norm": 0.6113600600466532, "learning_rate": 1.0191727493917276e-05, "loss": 0.5487, "step": 18543 }, { "epoch": 0.5414148492014832, "grad_norm": 0.6147227150349468, "learning_rate": 1.0191078669910788e-05, "loss": 0.5428, "step": 18544 }, { "epoch": 0.5414440454293306, "grad_norm": 0.641475191579316, "learning_rate": 1.0190429845904299e-05, "loss": 0.5965, "step": 18545 }, { "epoch": 0.5414732416571779, "grad_norm": 0.6417982924104974, "learning_rate": 1.018978102189781e-05, "loss": 0.6362, "step": 18546 }, { "epoch": 0.5415024378850253, "grad_norm": 0.6687360202870288, "learning_rate": 1.0189132197891323e-05, "loss": 0.6439, "step": 18547 }, { "epoch": 0.5415316341128726, "grad_norm": 0.6071885273966889, "learning_rate": 1.0188483373884835e-05, "loss": 0.5243, "step": 18548 }, { "epoch": 0.54156083034072, "grad_norm": 0.5777982094735705, "learning_rate": 1.0187834549878345e-05, "loss": 0.4801, "step": 18549 }, { "epoch": 0.5415900265685674, "grad_norm": 0.6800860872899976, "learning_rate": 1.0187185725871857e-05, "loss": 0.5634, "step": 18550 }, { "epoch": 0.5416192227964147, "grad_norm": 0.6271021981891389, "learning_rate": 1.018653690186537e-05, "loss": 0.6318, "step": 18551 }, { "epoch": 0.5416484190242621, "grad_norm": 0.6681021381388175, "learning_rate": 1.0185888077858881e-05, "loss": 0.6637, "step": 18552 }, { "epoch": 0.5416776152521094, "grad_norm": 0.6118461460801571, "learning_rate": 1.0185239253852393e-05, "loss": 0.5537, "step": 18553 }, { "epoch": 0.5417068114799568, "grad_norm": 0.6093323877268904, "learning_rate": 1.0184590429845904e-05, "loss": 0.5604, "step": 18554 }, { "epoch": 0.5417360077078042, "grad_norm": 0.5934751011311913, "learning_rate": 1.0183941605839416e-05, "loss": 0.5403, "step": 18555 }, { "epoch": 0.5417652039356515, "grad_norm": 0.6416376373880945, "learning_rate": 1.018329278183293e-05, "loss": 0.6724, "step": 18556 }, { "epoch": 0.5417944001634989, "grad_norm": 0.6990396242800938, "learning_rate": 1.0182643957826441e-05, "loss": 0.6824, "step": 18557 }, { "epoch": 0.5418235963913463, "grad_norm": 0.6326133268961427, "learning_rate": 1.0181995133819953e-05, "loss": 0.6003, "step": 18558 }, { "epoch": 0.5418527926191936, "grad_norm": 0.6303351277699173, "learning_rate": 1.0181346309813465e-05, "loss": 0.5786, "step": 18559 }, { "epoch": 0.541881988847041, "grad_norm": 0.6202111244442782, "learning_rate": 1.0180697485806976e-05, "loss": 0.5646, "step": 18560 }, { "epoch": 0.5419111850748883, "grad_norm": 0.6275339960659954, "learning_rate": 1.0180048661800488e-05, "loss": 0.5678, "step": 18561 }, { "epoch": 0.5419403813027357, "grad_norm": 0.6947345650367022, "learning_rate": 1.0179399837794e-05, "loss": 0.7164, "step": 18562 }, { "epoch": 0.5419695775305831, "grad_norm": 0.6346989937517493, "learning_rate": 1.0178751013787512e-05, "loss": 0.572, "step": 18563 }, { "epoch": 0.5419987737584304, "grad_norm": 0.6872014506094604, "learning_rate": 1.0178102189781022e-05, "loss": 0.6629, "step": 18564 }, { "epoch": 0.5420279699862778, "grad_norm": 0.6110869239154368, "learning_rate": 1.0177453365774534e-05, "loss": 0.5618, "step": 18565 }, { "epoch": 0.5420571662141251, "grad_norm": 0.6057652030360064, "learning_rate": 1.0176804541768046e-05, "loss": 0.5732, "step": 18566 }, { "epoch": 0.5420863624419725, "grad_norm": 0.6935793266347591, "learning_rate": 1.0176155717761558e-05, "loss": 0.6998, "step": 18567 }, { "epoch": 0.5421155586698199, "grad_norm": 0.6183790241441027, "learning_rate": 1.0175506893755069e-05, "loss": 0.597, "step": 18568 }, { "epoch": 0.5421447548976672, "grad_norm": 0.6676180346533926, "learning_rate": 1.017485806974858e-05, "loss": 0.699, "step": 18569 }, { "epoch": 0.5421739511255146, "grad_norm": 0.6311082490986796, "learning_rate": 1.0174209245742093e-05, "loss": 0.5862, "step": 18570 }, { "epoch": 0.542203147353362, "grad_norm": 0.6636042158270701, "learning_rate": 1.0173560421735605e-05, "loss": 0.6286, "step": 18571 }, { "epoch": 0.5422323435812093, "grad_norm": 0.6371357201662494, "learning_rate": 1.0172911597729117e-05, "loss": 0.557, "step": 18572 }, { "epoch": 0.5422615398090567, "grad_norm": 0.6266712699455048, "learning_rate": 1.0172262773722627e-05, "loss": 0.5921, "step": 18573 }, { "epoch": 0.542290736036904, "grad_norm": 0.5998621252089938, "learning_rate": 1.0171613949716139e-05, "loss": 0.5728, "step": 18574 }, { "epoch": 0.5423199322647514, "grad_norm": 0.6452762282193197, "learning_rate": 1.0170965125709653e-05, "loss": 0.5679, "step": 18575 }, { "epoch": 0.5423491284925988, "grad_norm": 0.6572635752210388, "learning_rate": 1.0170316301703165e-05, "loss": 0.6162, "step": 18576 }, { "epoch": 0.5423783247204461, "grad_norm": 0.6343870741937387, "learning_rate": 1.0169667477696677e-05, "loss": 0.6013, "step": 18577 }, { "epoch": 0.5424075209482935, "grad_norm": 0.6074169750982054, "learning_rate": 1.0169018653690187e-05, "loss": 0.5715, "step": 18578 }, { "epoch": 0.5424367171761408, "grad_norm": 0.6522125430348152, "learning_rate": 1.01683698296837e-05, "loss": 0.6013, "step": 18579 }, { "epoch": 0.5424659134039882, "grad_norm": 0.6584972924486768, "learning_rate": 1.0167721005677211e-05, "loss": 0.6201, "step": 18580 }, { "epoch": 0.5424951096318356, "grad_norm": 0.6410106756176916, "learning_rate": 1.0167072181670723e-05, "loss": 0.5319, "step": 18581 }, { "epoch": 0.5425243058596829, "grad_norm": 0.6426985234977587, "learning_rate": 1.0166423357664235e-05, "loss": 0.608, "step": 18582 }, { "epoch": 0.5425535020875303, "grad_norm": 0.7178237336272008, "learning_rate": 1.0165774533657746e-05, "loss": 0.7263, "step": 18583 }, { "epoch": 0.5425826983153776, "grad_norm": 0.6149345498459494, "learning_rate": 1.0165125709651258e-05, "loss": 0.5677, "step": 18584 }, { "epoch": 0.542611894543225, "grad_norm": 0.664068011017211, "learning_rate": 1.016447688564477e-05, "loss": 0.653, "step": 18585 }, { "epoch": 0.5426410907710724, "grad_norm": 0.6351871956661781, "learning_rate": 1.0163828061638282e-05, "loss": 0.5607, "step": 18586 }, { "epoch": 0.5426702869989197, "grad_norm": 0.5963692918072604, "learning_rate": 1.0163179237631792e-05, "loss": 0.5385, "step": 18587 }, { "epoch": 0.5426994832267671, "grad_norm": 0.6189331778460018, "learning_rate": 1.0162530413625304e-05, "loss": 0.5581, "step": 18588 }, { "epoch": 0.5427286794546144, "grad_norm": 0.6450916420429433, "learning_rate": 1.0161881589618816e-05, "loss": 0.6054, "step": 18589 }, { "epoch": 0.5427578756824618, "grad_norm": 0.5879979145515246, "learning_rate": 1.0161232765612328e-05, "loss": 0.5261, "step": 18590 }, { "epoch": 0.5427870719103092, "grad_norm": 0.6685055179710069, "learning_rate": 1.016058394160584e-05, "loss": 0.6601, "step": 18591 }, { "epoch": 0.5428162681381565, "grad_norm": 0.6234750562698372, "learning_rate": 1.015993511759935e-05, "loss": 0.5971, "step": 18592 }, { "epoch": 0.5428454643660039, "grad_norm": 0.6840933800272305, "learning_rate": 1.0159286293592863e-05, "loss": 0.6579, "step": 18593 }, { "epoch": 0.5428746605938513, "grad_norm": 0.6255503231243124, "learning_rate": 1.0158637469586376e-05, "loss": 0.5606, "step": 18594 }, { "epoch": 0.5429038568216986, "grad_norm": 0.6910203126788397, "learning_rate": 1.0157988645579888e-05, "loss": 0.6199, "step": 18595 }, { "epoch": 0.542933053049546, "grad_norm": 0.6582921593711981, "learning_rate": 1.01573398215734e-05, "loss": 0.6261, "step": 18596 }, { "epoch": 0.5429622492773933, "grad_norm": 0.5894665647584193, "learning_rate": 1.015669099756691e-05, "loss": 0.5551, "step": 18597 }, { "epoch": 0.5429914455052407, "grad_norm": 0.6307759350785522, "learning_rate": 1.0156042173560423e-05, "loss": 0.5918, "step": 18598 }, { "epoch": 0.5430206417330881, "grad_norm": 0.6509164878484648, "learning_rate": 1.0155393349553935e-05, "loss": 0.6626, "step": 18599 }, { "epoch": 0.5430498379609354, "grad_norm": 0.6408309803616511, "learning_rate": 1.0154744525547447e-05, "loss": 0.5362, "step": 18600 }, { "epoch": 0.5430790341887828, "grad_norm": 0.6396413147134707, "learning_rate": 1.0154095701540959e-05, "loss": 0.5849, "step": 18601 }, { "epoch": 0.5431082304166301, "grad_norm": 0.6236656301208623, "learning_rate": 1.0153446877534469e-05, "loss": 0.5991, "step": 18602 }, { "epoch": 0.5431374266444775, "grad_norm": 0.6409901555837594, "learning_rate": 1.0152798053527981e-05, "loss": 0.5752, "step": 18603 }, { "epoch": 0.5431666228723249, "grad_norm": 0.6159933725562866, "learning_rate": 1.0152149229521493e-05, "loss": 0.5641, "step": 18604 }, { "epoch": 0.5431958191001722, "grad_norm": 0.7118347995760822, "learning_rate": 1.0151500405515005e-05, "loss": 0.6482, "step": 18605 }, { "epoch": 0.5432250153280196, "grad_norm": 0.6589529499428121, "learning_rate": 1.0150851581508515e-05, "loss": 0.6014, "step": 18606 }, { "epoch": 0.543254211555867, "grad_norm": 0.6291707347121089, "learning_rate": 1.0150202757502028e-05, "loss": 0.5961, "step": 18607 }, { "epoch": 0.5432834077837143, "grad_norm": 0.6385090315882291, "learning_rate": 1.014955393349554e-05, "loss": 0.6306, "step": 18608 }, { "epoch": 0.5433126040115617, "grad_norm": 0.6748416926780691, "learning_rate": 1.0148905109489052e-05, "loss": 0.6796, "step": 18609 }, { "epoch": 0.543341800239409, "grad_norm": 0.6454031825070339, "learning_rate": 1.0148256285482564e-05, "loss": 0.6087, "step": 18610 }, { "epoch": 0.5433709964672564, "grad_norm": 0.6944415223015432, "learning_rate": 1.0147607461476074e-05, "loss": 0.7123, "step": 18611 }, { "epoch": 0.5434001926951038, "grad_norm": 0.6433127387701076, "learning_rate": 1.0146958637469588e-05, "loss": 0.5573, "step": 18612 }, { "epoch": 0.5434293889229511, "grad_norm": 0.6125992119765937, "learning_rate": 1.01463098134631e-05, "loss": 0.5313, "step": 18613 }, { "epoch": 0.5434585851507985, "grad_norm": 0.6449266200903339, "learning_rate": 1.0145660989456612e-05, "loss": 0.6137, "step": 18614 }, { "epoch": 0.5434877813786458, "grad_norm": 0.6173327935856529, "learning_rate": 1.0145012165450124e-05, "loss": 0.5375, "step": 18615 }, { "epoch": 0.5435169776064932, "grad_norm": 0.6146157882154372, "learning_rate": 1.0144363341443634e-05, "loss": 0.5526, "step": 18616 }, { "epoch": 0.5435461738343406, "grad_norm": 0.6134969309035179, "learning_rate": 1.0143714517437146e-05, "loss": 0.558, "step": 18617 }, { "epoch": 0.5435753700621879, "grad_norm": 0.6200017877199892, "learning_rate": 1.0143065693430658e-05, "loss": 0.6222, "step": 18618 }, { "epoch": 0.5436045662900353, "grad_norm": 0.6841308622715199, "learning_rate": 1.014241686942417e-05, "loss": 0.6568, "step": 18619 }, { "epoch": 0.5436337625178826, "grad_norm": 0.6145528140712261, "learning_rate": 1.0141768045417682e-05, "loss": 0.5468, "step": 18620 }, { "epoch": 0.54366295874573, "grad_norm": 0.6660689322021441, "learning_rate": 1.0141119221411193e-05, "loss": 0.6858, "step": 18621 }, { "epoch": 0.5436921549735774, "grad_norm": 0.6351740586101452, "learning_rate": 1.0140470397404705e-05, "loss": 0.5829, "step": 18622 }, { "epoch": 0.5437213512014247, "grad_norm": 0.6705127158219639, "learning_rate": 1.0139821573398217e-05, "loss": 0.64, "step": 18623 }, { "epoch": 0.5437505474292721, "grad_norm": 0.631367503733035, "learning_rate": 1.0139172749391729e-05, "loss": 0.578, "step": 18624 }, { "epoch": 0.5437797436571195, "grad_norm": 0.6855053029169653, "learning_rate": 1.0138523925385239e-05, "loss": 0.689, "step": 18625 }, { "epoch": 0.5438089398849668, "grad_norm": 0.6362311818097803, "learning_rate": 1.0137875101378751e-05, "loss": 0.6029, "step": 18626 }, { "epoch": 0.5438381361128142, "grad_norm": 0.6295596451685449, "learning_rate": 1.0137226277372263e-05, "loss": 0.5779, "step": 18627 }, { "epoch": 0.5438673323406615, "grad_norm": 0.616045863443039, "learning_rate": 1.0136577453365775e-05, "loss": 0.5615, "step": 18628 }, { "epoch": 0.5438965285685089, "grad_norm": 0.6449921574141926, "learning_rate": 1.0135928629359287e-05, "loss": 0.65, "step": 18629 }, { "epoch": 0.5439257247963563, "grad_norm": 0.6187073542166277, "learning_rate": 1.0135279805352797e-05, "loss": 0.5837, "step": 18630 }, { "epoch": 0.5439549210242036, "grad_norm": 0.636897175077803, "learning_rate": 1.0134630981346311e-05, "loss": 0.5953, "step": 18631 }, { "epoch": 0.543984117252051, "grad_norm": 0.6772399662545685, "learning_rate": 1.0133982157339823e-05, "loss": 0.6942, "step": 18632 }, { "epoch": 0.5440133134798983, "grad_norm": 0.7002105932613305, "learning_rate": 1.0133333333333335e-05, "loss": 0.6676, "step": 18633 }, { "epoch": 0.5440425097077458, "grad_norm": 0.6607632527579064, "learning_rate": 1.0132684509326847e-05, "loss": 0.5784, "step": 18634 }, { "epoch": 0.5440717059355932, "grad_norm": 0.6581999201794319, "learning_rate": 1.0132035685320357e-05, "loss": 0.6272, "step": 18635 }, { "epoch": 0.5441009021634405, "grad_norm": 0.6705472084256168, "learning_rate": 1.013138686131387e-05, "loss": 0.7251, "step": 18636 }, { "epoch": 0.5441300983912879, "grad_norm": 0.6149026938269229, "learning_rate": 1.0130738037307382e-05, "loss": 0.553, "step": 18637 }, { "epoch": 0.5441592946191353, "grad_norm": 0.671330127019911, "learning_rate": 1.0130089213300894e-05, "loss": 0.6734, "step": 18638 }, { "epoch": 0.5441884908469826, "grad_norm": 0.6556581344641571, "learning_rate": 1.0129440389294406e-05, "loss": 0.5701, "step": 18639 }, { "epoch": 0.54421768707483, "grad_norm": 0.6769798212055864, "learning_rate": 1.0128791565287916e-05, "loss": 0.6834, "step": 18640 }, { "epoch": 0.5442468833026773, "grad_norm": 0.6850903062814315, "learning_rate": 1.0128142741281428e-05, "loss": 0.6763, "step": 18641 }, { "epoch": 0.5442760795305247, "grad_norm": 0.6443688699052399, "learning_rate": 1.012749391727494e-05, "loss": 0.6329, "step": 18642 }, { "epoch": 0.5443052757583721, "grad_norm": 0.6255256163247205, "learning_rate": 1.0126845093268452e-05, "loss": 0.6016, "step": 18643 }, { "epoch": 0.5443344719862194, "grad_norm": 0.5936235025118556, "learning_rate": 1.0126196269261962e-05, "loss": 0.5526, "step": 18644 }, { "epoch": 0.5443636682140668, "grad_norm": 0.6484332215183575, "learning_rate": 1.0125547445255474e-05, "loss": 0.624, "step": 18645 }, { "epoch": 0.5443928644419141, "grad_norm": 0.6237020579184608, "learning_rate": 1.0124898621248986e-05, "loss": 0.5798, "step": 18646 }, { "epoch": 0.5444220606697615, "grad_norm": 0.6944417071714454, "learning_rate": 1.0124249797242498e-05, "loss": 0.6844, "step": 18647 }, { "epoch": 0.5444512568976089, "grad_norm": 0.63750619554834, "learning_rate": 1.012360097323601e-05, "loss": 0.687, "step": 18648 }, { "epoch": 0.5444804531254562, "grad_norm": 0.6723631811716761, "learning_rate": 1.012295214922952e-05, "loss": 0.6024, "step": 18649 }, { "epoch": 0.5445096493533036, "grad_norm": 0.633889428860136, "learning_rate": 1.0122303325223035e-05, "loss": 0.5686, "step": 18650 }, { "epoch": 0.544538845581151, "grad_norm": 0.6198145805058483, "learning_rate": 1.0121654501216547e-05, "loss": 0.5702, "step": 18651 }, { "epoch": 0.5445680418089983, "grad_norm": 0.6096080736063455, "learning_rate": 1.0121005677210059e-05, "loss": 0.5911, "step": 18652 }, { "epoch": 0.5445972380368457, "grad_norm": 0.6477686292832245, "learning_rate": 1.012035685320357e-05, "loss": 0.5416, "step": 18653 }, { "epoch": 0.544626434264693, "grad_norm": 0.6224270817977604, "learning_rate": 1.0119708029197081e-05, "loss": 0.6, "step": 18654 }, { "epoch": 0.5446556304925404, "grad_norm": 0.6237894425102989, "learning_rate": 1.0119059205190593e-05, "loss": 0.6036, "step": 18655 }, { "epoch": 0.5446848267203878, "grad_norm": 0.6142288603676643, "learning_rate": 1.0118410381184105e-05, "loss": 0.5756, "step": 18656 }, { "epoch": 0.5447140229482351, "grad_norm": 0.6115609258495625, "learning_rate": 1.0117761557177617e-05, "loss": 0.5661, "step": 18657 }, { "epoch": 0.5447432191760825, "grad_norm": 0.6533207574511953, "learning_rate": 1.0117112733171129e-05, "loss": 0.5985, "step": 18658 }, { "epoch": 0.5447724154039298, "grad_norm": 0.6524374560594169, "learning_rate": 1.011646390916464e-05, "loss": 0.6456, "step": 18659 }, { "epoch": 0.5448016116317772, "grad_norm": 0.6334309347179582, "learning_rate": 1.0115815085158151e-05, "loss": 0.5889, "step": 18660 }, { "epoch": 0.5448308078596246, "grad_norm": 0.6480562585910457, "learning_rate": 1.0115166261151663e-05, "loss": 0.6133, "step": 18661 }, { "epoch": 0.5448600040874719, "grad_norm": 0.5590183125344181, "learning_rate": 1.0114517437145175e-05, "loss": 0.4955, "step": 18662 }, { "epoch": 0.5448892003153193, "grad_norm": 0.6014450418634308, "learning_rate": 1.0113868613138686e-05, "loss": 0.5526, "step": 18663 }, { "epoch": 0.5449183965431666, "grad_norm": 0.63753806296556, "learning_rate": 1.0113219789132198e-05, "loss": 0.6355, "step": 18664 }, { "epoch": 0.544947592771014, "grad_norm": 0.6667441294494927, "learning_rate": 1.011257096512571e-05, "loss": 0.6894, "step": 18665 }, { "epoch": 0.5449767889988614, "grad_norm": 0.6112854097931715, "learning_rate": 1.0111922141119222e-05, "loss": 0.5726, "step": 18666 }, { "epoch": 0.5450059852267087, "grad_norm": 0.6614867960571972, "learning_rate": 1.0111273317112734e-05, "loss": 0.6451, "step": 18667 }, { "epoch": 0.5450351814545561, "grad_norm": 0.6557552058992002, "learning_rate": 1.0110624493106244e-05, "loss": 0.6666, "step": 18668 }, { "epoch": 0.5450643776824035, "grad_norm": 0.6137674520130397, "learning_rate": 1.0109975669099758e-05, "loss": 0.5859, "step": 18669 }, { "epoch": 0.5450935739102508, "grad_norm": 0.6656046575457942, "learning_rate": 1.010932684509327e-05, "loss": 0.6741, "step": 18670 }, { "epoch": 0.5451227701380982, "grad_norm": 0.6107234459477552, "learning_rate": 1.0108678021086782e-05, "loss": 0.5825, "step": 18671 }, { "epoch": 0.5451519663659455, "grad_norm": 0.6082130581824224, "learning_rate": 1.0108029197080294e-05, "loss": 0.531, "step": 18672 }, { "epoch": 0.5451811625937929, "grad_norm": 0.763435682312336, "learning_rate": 1.0107380373073804e-05, "loss": 0.6093, "step": 18673 }, { "epoch": 0.5452103588216403, "grad_norm": 0.598080783866895, "learning_rate": 1.0106731549067316e-05, "loss": 0.5572, "step": 18674 }, { "epoch": 0.5452395550494876, "grad_norm": 0.5964353226810497, "learning_rate": 1.0106082725060828e-05, "loss": 0.5211, "step": 18675 }, { "epoch": 0.545268751277335, "grad_norm": 0.6612419785386379, "learning_rate": 1.010543390105434e-05, "loss": 0.623, "step": 18676 }, { "epoch": 0.5452979475051823, "grad_norm": 0.6352099497592949, "learning_rate": 1.0104785077047852e-05, "loss": 0.6025, "step": 18677 }, { "epoch": 0.5453271437330297, "grad_norm": 0.6051421313143766, "learning_rate": 1.0104136253041363e-05, "loss": 0.5628, "step": 18678 }, { "epoch": 0.5453563399608771, "grad_norm": 0.6773614403258786, "learning_rate": 1.0103487429034875e-05, "loss": 0.6478, "step": 18679 }, { "epoch": 0.5453855361887244, "grad_norm": 0.6464337093979026, "learning_rate": 1.0102838605028387e-05, "loss": 0.6159, "step": 18680 }, { "epoch": 0.5454147324165718, "grad_norm": 0.6662059019150447, "learning_rate": 1.0102189781021899e-05, "loss": 0.7121, "step": 18681 }, { "epoch": 0.5454439286444192, "grad_norm": 0.8296068465006371, "learning_rate": 1.010154095701541e-05, "loss": 0.6845, "step": 18682 }, { "epoch": 0.5454731248722665, "grad_norm": 0.6946879769449427, "learning_rate": 1.0100892133008921e-05, "loss": 0.6612, "step": 18683 }, { "epoch": 0.5455023211001139, "grad_norm": 0.6273913590734277, "learning_rate": 1.0100243309002433e-05, "loss": 0.6324, "step": 18684 }, { "epoch": 0.5455315173279612, "grad_norm": 0.61234708220696, "learning_rate": 1.0099594484995945e-05, "loss": 0.5771, "step": 18685 }, { "epoch": 0.5455607135558086, "grad_norm": 0.6366151313846213, "learning_rate": 1.0098945660989457e-05, "loss": 0.6173, "step": 18686 }, { "epoch": 0.545589909783656, "grad_norm": 0.6525835474617417, "learning_rate": 1.0098296836982968e-05, "loss": 0.6725, "step": 18687 }, { "epoch": 0.5456191060115033, "grad_norm": 0.6677323380565562, "learning_rate": 1.0097648012976481e-05, "loss": 0.6691, "step": 18688 }, { "epoch": 0.5456483022393507, "grad_norm": 0.6482695989880966, "learning_rate": 1.0096999188969993e-05, "loss": 0.6174, "step": 18689 }, { "epoch": 0.545677498467198, "grad_norm": 0.6661252710824844, "learning_rate": 1.0096350364963505e-05, "loss": 0.6167, "step": 18690 }, { "epoch": 0.5457066946950454, "grad_norm": 0.6420704880041193, "learning_rate": 1.0095701540957017e-05, "loss": 0.6125, "step": 18691 }, { "epoch": 0.5457358909228928, "grad_norm": 0.6433172116689118, "learning_rate": 1.0095052716950528e-05, "loss": 0.6054, "step": 18692 }, { "epoch": 0.5457650871507401, "grad_norm": 0.6696921143904908, "learning_rate": 1.009440389294404e-05, "loss": 0.6591, "step": 18693 }, { "epoch": 0.5457942833785875, "grad_norm": 0.6434951268357064, "learning_rate": 1.0093755068937552e-05, "loss": 0.6125, "step": 18694 }, { "epoch": 0.5458234796064348, "grad_norm": 0.62365997397351, "learning_rate": 1.0093106244931064e-05, "loss": 0.5872, "step": 18695 }, { "epoch": 0.5458526758342822, "grad_norm": 0.6346827968304761, "learning_rate": 1.0092457420924576e-05, "loss": 0.5795, "step": 18696 }, { "epoch": 0.5458818720621296, "grad_norm": 0.6474670918770519, "learning_rate": 1.0091808596918086e-05, "loss": 0.5972, "step": 18697 }, { "epoch": 0.5459110682899769, "grad_norm": 0.6400658270078179, "learning_rate": 1.0091159772911598e-05, "loss": 0.64, "step": 18698 }, { "epoch": 0.5459402645178243, "grad_norm": 0.6041824345553541, "learning_rate": 1.009051094890511e-05, "loss": 0.588, "step": 18699 }, { "epoch": 0.5459694607456717, "grad_norm": 0.6426914839524166, "learning_rate": 1.0089862124898622e-05, "loss": 0.6104, "step": 18700 }, { "epoch": 0.545998656973519, "grad_norm": 0.6274015621685939, "learning_rate": 1.0089213300892133e-05, "loss": 0.6123, "step": 18701 }, { "epoch": 0.5460278532013664, "grad_norm": 0.7003176782190852, "learning_rate": 1.0088564476885645e-05, "loss": 0.7084, "step": 18702 }, { "epoch": 0.5460570494292137, "grad_norm": 0.6567188724677941, "learning_rate": 1.0087915652879157e-05, "loss": 0.6409, "step": 18703 }, { "epoch": 0.5460862456570611, "grad_norm": 0.6400451588439763, "learning_rate": 1.0087266828872669e-05, "loss": 0.5269, "step": 18704 }, { "epoch": 0.5461154418849085, "grad_norm": 0.6358742162216712, "learning_rate": 1.008661800486618e-05, "loss": 0.5698, "step": 18705 }, { "epoch": 0.5461446381127558, "grad_norm": 0.6192305717066153, "learning_rate": 1.0085969180859691e-05, "loss": 0.6131, "step": 18706 }, { "epoch": 0.5461738343406032, "grad_norm": 0.6801664320736535, "learning_rate": 1.0085320356853205e-05, "loss": 0.6449, "step": 18707 }, { "epoch": 0.5462030305684505, "grad_norm": 0.6448223638229852, "learning_rate": 1.0084671532846717e-05, "loss": 0.5647, "step": 18708 }, { "epoch": 0.5462322267962979, "grad_norm": 0.6545303557925345, "learning_rate": 1.0084022708840229e-05, "loss": 0.6075, "step": 18709 }, { "epoch": 0.5462614230241453, "grad_norm": 0.6532111468193885, "learning_rate": 1.0083373884833741e-05, "loss": 0.5814, "step": 18710 }, { "epoch": 0.5462906192519926, "grad_norm": 0.6397213621620825, "learning_rate": 1.0082725060827251e-05, "loss": 0.6337, "step": 18711 }, { "epoch": 0.54631981547984, "grad_norm": 0.686419236529909, "learning_rate": 1.0082076236820763e-05, "loss": 0.6847, "step": 18712 }, { "epoch": 0.5463490117076873, "grad_norm": 0.7222233523622503, "learning_rate": 1.0081427412814275e-05, "loss": 0.7797, "step": 18713 }, { "epoch": 0.5463782079355347, "grad_norm": 0.6661397350278447, "learning_rate": 1.0080778588807787e-05, "loss": 0.611, "step": 18714 }, { "epoch": 0.5464074041633821, "grad_norm": 0.6111290771899643, "learning_rate": 1.00801297648013e-05, "loss": 0.5386, "step": 18715 }, { "epoch": 0.5464366003912294, "grad_norm": 0.6332774250443769, "learning_rate": 1.007948094079481e-05, "loss": 0.609, "step": 18716 }, { "epoch": 0.5464657966190768, "grad_norm": 0.614104362484942, "learning_rate": 1.0078832116788322e-05, "loss": 0.5598, "step": 18717 }, { "epoch": 0.5464949928469242, "grad_norm": 0.6342671201668157, "learning_rate": 1.0078183292781834e-05, "loss": 0.6049, "step": 18718 }, { "epoch": 0.5465241890747715, "grad_norm": 0.6245461173459771, "learning_rate": 1.0077534468775346e-05, "loss": 0.598, "step": 18719 }, { "epoch": 0.5465533853026189, "grad_norm": 0.6998936973222666, "learning_rate": 1.0076885644768856e-05, "loss": 0.6993, "step": 18720 }, { "epoch": 0.5465825815304662, "grad_norm": 0.6814401670782214, "learning_rate": 1.0076236820762368e-05, "loss": 0.7046, "step": 18721 }, { "epoch": 0.5466117777583136, "grad_norm": 0.6794115195412926, "learning_rate": 1.007558799675588e-05, "loss": 0.6899, "step": 18722 }, { "epoch": 0.546640973986161, "grad_norm": 0.6293700058675251, "learning_rate": 1.0074939172749392e-05, "loss": 0.5827, "step": 18723 }, { "epoch": 0.5466701702140083, "grad_norm": 0.6298181411177023, "learning_rate": 1.0074290348742903e-05, "loss": 0.5813, "step": 18724 }, { "epoch": 0.5466993664418557, "grad_norm": 0.6507805005281883, "learning_rate": 1.0073641524736415e-05, "loss": 0.6479, "step": 18725 }, { "epoch": 0.546728562669703, "grad_norm": 0.6352048574536543, "learning_rate": 1.0072992700729928e-05, "loss": 0.6034, "step": 18726 }, { "epoch": 0.5467577588975504, "grad_norm": 0.6514551995599002, "learning_rate": 1.007234387672344e-05, "loss": 0.6854, "step": 18727 }, { "epoch": 0.5467869551253978, "grad_norm": 0.7101712642658248, "learning_rate": 1.0071695052716952e-05, "loss": 0.6896, "step": 18728 }, { "epoch": 0.5468161513532451, "grad_norm": 0.635901894331778, "learning_rate": 1.0071046228710464e-05, "loss": 0.5886, "step": 18729 }, { "epoch": 0.5468453475810925, "grad_norm": 0.64903165523622, "learning_rate": 1.0070397404703975e-05, "loss": 0.6472, "step": 18730 }, { "epoch": 0.5468745438089399, "grad_norm": 0.6093688841533798, "learning_rate": 1.0069748580697487e-05, "loss": 0.5546, "step": 18731 }, { "epoch": 0.5469037400367872, "grad_norm": 0.6144805967459163, "learning_rate": 1.0069099756690999e-05, "loss": 0.5722, "step": 18732 }, { "epoch": 0.5469329362646346, "grad_norm": 0.6127137243373284, "learning_rate": 1.006845093268451e-05, "loss": 0.619, "step": 18733 }, { "epoch": 0.5469621324924819, "grad_norm": 0.6819700785346363, "learning_rate": 1.0067802108678023e-05, "loss": 0.6779, "step": 18734 }, { "epoch": 0.5469913287203293, "grad_norm": 0.6563475860200453, "learning_rate": 1.0067153284671533e-05, "loss": 0.6312, "step": 18735 }, { "epoch": 0.5470205249481767, "grad_norm": 0.6526726158743309, "learning_rate": 1.0066504460665045e-05, "loss": 0.642, "step": 18736 }, { "epoch": 0.547049721176024, "grad_norm": 0.6366385145562695, "learning_rate": 1.0065855636658557e-05, "loss": 0.614, "step": 18737 }, { "epoch": 0.5470789174038714, "grad_norm": 0.6891247386510695, "learning_rate": 1.006520681265207e-05, "loss": 0.6505, "step": 18738 }, { "epoch": 0.5471081136317187, "grad_norm": 0.6564082247298886, "learning_rate": 1.006455798864558e-05, "loss": 0.6173, "step": 18739 }, { "epoch": 0.5471373098595661, "grad_norm": 0.6257364824658285, "learning_rate": 1.0063909164639092e-05, "loss": 0.5959, "step": 18740 }, { "epoch": 0.5471665060874135, "grad_norm": 0.6489666550886065, "learning_rate": 1.0063260340632604e-05, "loss": 0.641, "step": 18741 }, { "epoch": 0.5471957023152608, "grad_norm": 0.6668317438749424, "learning_rate": 1.0062611516626116e-05, "loss": 0.6402, "step": 18742 }, { "epoch": 0.5472248985431082, "grad_norm": 0.9423297033184825, "learning_rate": 1.0061962692619626e-05, "loss": 0.5958, "step": 18743 }, { "epoch": 0.5472540947709555, "grad_norm": 0.6720549375342683, "learning_rate": 1.0061313868613138e-05, "loss": 0.6675, "step": 18744 }, { "epoch": 0.5472832909988029, "grad_norm": 0.7321826377692249, "learning_rate": 1.0060665044606652e-05, "loss": 0.6522, "step": 18745 }, { "epoch": 0.5473124872266503, "grad_norm": 0.6726099638518912, "learning_rate": 1.0060016220600164e-05, "loss": 0.6366, "step": 18746 }, { "epoch": 0.5473416834544976, "grad_norm": 0.6098558826931144, "learning_rate": 1.0059367396593676e-05, "loss": 0.5553, "step": 18747 }, { "epoch": 0.547370879682345, "grad_norm": 0.6540511930298488, "learning_rate": 1.0058718572587188e-05, "loss": 0.6004, "step": 18748 }, { "epoch": 0.5474000759101924, "grad_norm": 0.6316523568788522, "learning_rate": 1.0058069748580698e-05, "loss": 0.6102, "step": 18749 }, { "epoch": 0.5474292721380397, "grad_norm": 0.6976248791946843, "learning_rate": 1.005742092457421e-05, "loss": 0.6354, "step": 18750 }, { "epoch": 0.5474584683658871, "grad_norm": 0.6516097491013079, "learning_rate": 1.0056772100567722e-05, "loss": 0.6029, "step": 18751 }, { "epoch": 0.5474876645937344, "grad_norm": 0.654982616428381, "learning_rate": 1.0056123276561234e-05, "loss": 0.6676, "step": 18752 }, { "epoch": 0.5475168608215818, "grad_norm": 0.6978603107939189, "learning_rate": 1.0055474452554746e-05, "loss": 0.7199, "step": 18753 }, { "epoch": 0.5475460570494293, "grad_norm": 0.7037838852261084, "learning_rate": 1.0054825628548257e-05, "loss": 0.6855, "step": 18754 }, { "epoch": 0.5475752532772766, "grad_norm": 0.6022055310779364, "learning_rate": 1.0054176804541769e-05, "loss": 0.5542, "step": 18755 }, { "epoch": 0.547604449505124, "grad_norm": 0.618134017460253, "learning_rate": 1.005352798053528e-05, "loss": 0.5467, "step": 18756 }, { "epoch": 0.5476336457329714, "grad_norm": 0.6565250542945874, "learning_rate": 1.0052879156528793e-05, "loss": 0.6204, "step": 18757 }, { "epoch": 0.5476628419608187, "grad_norm": 0.64403627752505, "learning_rate": 1.0052230332522303e-05, "loss": 0.5886, "step": 18758 }, { "epoch": 0.5476920381886661, "grad_norm": 0.6927449547715164, "learning_rate": 1.0051581508515815e-05, "loss": 0.6958, "step": 18759 }, { "epoch": 0.5477212344165134, "grad_norm": 0.6697646977755732, "learning_rate": 1.0050932684509327e-05, "loss": 0.6246, "step": 18760 }, { "epoch": 0.5477504306443608, "grad_norm": 0.6316843565338265, "learning_rate": 1.0050283860502839e-05, "loss": 0.6114, "step": 18761 }, { "epoch": 0.5477796268722082, "grad_norm": 0.6913841956069794, "learning_rate": 1.004963503649635e-05, "loss": 0.7118, "step": 18762 }, { "epoch": 0.5478088231000555, "grad_norm": 0.6047054075456154, "learning_rate": 1.0048986212489865e-05, "loss": 0.5354, "step": 18763 }, { "epoch": 0.5478380193279029, "grad_norm": 0.6172406750457535, "learning_rate": 1.0048337388483375e-05, "loss": 0.5669, "step": 18764 }, { "epoch": 0.5478672155557502, "grad_norm": 0.6588270936113719, "learning_rate": 1.0047688564476887e-05, "loss": 0.6305, "step": 18765 }, { "epoch": 0.5478964117835976, "grad_norm": 0.6558062014846822, "learning_rate": 1.00470397404704e-05, "loss": 0.6274, "step": 18766 }, { "epoch": 0.547925608011445, "grad_norm": 0.6032969345341341, "learning_rate": 1.0046390916463911e-05, "loss": 0.5491, "step": 18767 }, { "epoch": 0.5479548042392923, "grad_norm": 0.6811931226101077, "learning_rate": 1.0045742092457422e-05, "loss": 0.6289, "step": 18768 }, { "epoch": 0.5479840004671397, "grad_norm": 0.6715732273047526, "learning_rate": 1.0045093268450934e-05, "loss": 0.6882, "step": 18769 }, { "epoch": 0.548013196694987, "grad_norm": 0.5838591840241413, "learning_rate": 1.0044444444444446e-05, "loss": 0.5542, "step": 18770 }, { "epoch": 0.5480423929228344, "grad_norm": 0.6006638419148298, "learning_rate": 1.0043795620437958e-05, "loss": 0.5261, "step": 18771 }, { "epoch": 0.5480715891506818, "grad_norm": 0.6869410235035494, "learning_rate": 1.004314679643147e-05, "loss": 0.6956, "step": 18772 }, { "epoch": 0.5481007853785291, "grad_norm": 0.6460752587113573, "learning_rate": 1.004249797242498e-05, "loss": 0.6551, "step": 18773 }, { "epoch": 0.5481299816063765, "grad_norm": 0.6324108579294149, "learning_rate": 1.0041849148418492e-05, "loss": 0.6052, "step": 18774 }, { "epoch": 0.5481591778342239, "grad_norm": 0.6975912957720893, "learning_rate": 1.0041200324412004e-05, "loss": 0.655, "step": 18775 }, { "epoch": 0.5481883740620712, "grad_norm": 0.6700162740595695, "learning_rate": 1.0040551500405516e-05, "loss": 0.6783, "step": 18776 }, { "epoch": 0.5482175702899186, "grad_norm": 0.6340580376067548, "learning_rate": 1.0039902676399026e-05, "loss": 0.584, "step": 18777 }, { "epoch": 0.5482467665177659, "grad_norm": 0.6419214634644567, "learning_rate": 1.0039253852392538e-05, "loss": 0.6061, "step": 18778 }, { "epoch": 0.5482759627456133, "grad_norm": 0.6775745908572258, "learning_rate": 1.003860502838605e-05, "loss": 0.6942, "step": 18779 }, { "epoch": 0.5483051589734607, "grad_norm": 0.6488964295566172, "learning_rate": 1.0037956204379562e-05, "loss": 0.6258, "step": 18780 }, { "epoch": 0.548334355201308, "grad_norm": 0.5950043849809669, "learning_rate": 1.0037307380373073e-05, "loss": 0.5475, "step": 18781 }, { "epoch": 0.5483635514291554, "grad_norm": 0.6571540045257565, "learning_rate": 1.0036658556366588e-05, "loss": 0.6815, "step": 18782 }, { "epoch": 0.5483927476570027, "grad_norm": 0.626166941252358, "learning_rate": 1.0036009732360099e-05, "loss": 0.6154, "step": 18783 }, { "epoch": 0.5484219438848501, "grad_norm": 0.6927018770906482, "learning_rate": 1.003536090835361e-05, "loss": 0.7378, "step": 18784 }, { "epoch": 0.5484511401126975, "grad_norm": 0.6536964629641098, "learning_rate": 1.0034712084347123e-05, "loss": 0.6257, "step": 18785 }, { "epoch": 0.5484803363405448, "grad_norm": 0.639977226695341, "learning_rate": 1.0034063260340635e-05, "loss": 0.6108, "step": 18786 }, { "epoch": 0.5485095325683922, "grad_norm": 0.6363093146870608, "learning_rate": 1.0033414436334145e-05, "loss": 0.5986, "step": 18787 }, { "epoch": 0.5485387287962395, "grad_norm": 0.6297238313752344, "learning_rate": 1.0032765612327657e-05, "loss": 0.611, "step": 18788 }, { "epoch": 0.5485679250240869, "grad_norm": 0.6416997692642789, "learning_rate": 1.0032116788321169e-05, "loss": 0.6021, "step": 18789 }, { "epoch": 0.5485971212519343, "grad_norm": 0.6712694816692679, "learning_rate": 1.0031467964314681e-05, "loss": 0.6472, "step": 18790 }, { "epoch": 0.5486263174797816, "grad_norm": 0.6649168839886613, "learning_rate": 1.0030819140308193e-05, "loss": 0.6834, "step": 18791 }, { "epoch": 0.548655513707629, "grad_norm": 0.6483534926825841, "learning_rate": 1.0030170316301703e-05, "loss": 0.6188, "step": 18792 }, { "epoch": 0.5486847099354764, "grad_norm": 0.5870522720465665, "learning_rate": 1.0029521492295215e-05, "loss": 0.5145, "step": 18793 }, { "epoch": 0.5487139061633237, "grad_norm": 0.6400235087999498, "learning_rate": 1.0028872668288727e-05, "loss": 0.596, "step": 18794 }, { "epoch": 0.5487431023911711, "grad_norm": 0.6436143286414889, "learning_rate": 1.002822384428224e-05, "loss": 0.642, "step": 18795 }, { "epoch": 0.5487722986190184, "grad_norm": 0.6137572871926944, "learning_rate": 1.002757502027575e-05, "loss": 0.5483, "step": 18796 }, { "epoch": 0.5488014948468658, "grad_norm": 0.6624916379136957, "learning_rate": 1.0026926196269262e-05, "loss": 0.6244, "step": 18797 }, { "epoch": 0.5488306910747132, "grad_norm": 0.67357672647408, "learning_rate": 1.0026277372262774e-05, "loss": 0.6313, "step": 18798 }, { "epoch": 0.5488598873025605, "grad_norm": 0.598046828918993, "learning_rate": 1.0025628548256286e-05, "loss": 0.567, "step": 18799 }, { "epoch": 0.5488890835304079, "grad_norm": 0.6527014527631856, "learning_rate": 1.0024979724249796e-05, "loss": 0.6299, "step": 18800 }, { "epoch": 0.5489182797582552, "grad_norm": 0.6716812526238259, "learning_rate": 1.0024330900243312e-05, "loss": 0.6806, "step": 18801 }, { "epoch": 0.5489474759861026, "grad_norm": 0.6214109151810642, "learning_rate": 1.0023682076236822e-05, "loss": 0.5454, "step": 18802 }, { "epoch": 0.54897667221395, "grad_norm": 0.627280476724739, "learning_rate": 1.0023033252230334e-05, "loss": 0.5216, "step": 18803 }, { "epoch": 0.5490058684417973, "grad_norm": 0.61643986442579, "learning_rate": 1.0022384428223846e-05, "loss": 0.5824, "step": 18804 }, { "epoch": 0.5490350646696447, "grad_norm": 0.6826934446828815, "learning_rate": 1.0021735604217358e-05, "loss": 0.6744, "step": 18805 }, { "epoch": 0.549064260897492, "grad_norm": 0.63881842510237, "learning_rate": 1.0021086780210868e-05, "loss": 0.5875, "step": 18806 }, { "epoch": 0.5490934571253394, "grad_norm": 0.645844362237758, "learning_rate": 1.002043795620438e-05, "loss": 0.5907, "step": 18807 }, { "epoch": 0.5491226533531868, "grad_norm": 0.6151921251925949, "learning_rate": 1.0019789132197892e-05, "loss": 0.6172, "step": 18808 }, { "epoch": 0.5491518495810341, "grad_norm": 0.6807474249192147, "learning_rate": 1.0019140308191404e-05, "loss": 0.6338, "step": 18809 }, { "epoch": 0.5491810458088815, "grad_norm": 0.6484855237729591, "learning_rate": 1.0018491484184917e-05, "loss": 0.624, "step": 18810 }, { "epoch": 0.5492102420367289, "grad_norm": 0.7062694736863974, "learning_rate": 1.0017842660178427e-05, "loss": 0.6901, "step": 18811 }, { "epoch": 0.5492394382645762, "grad_norm": 0.6476618049687233, "learning_rate": 1.0017193836171939e-05, "loss": 0.5833, "step": 18812 }, { "epoch": 0.5492686344924236, "grad_norm": 0.6950940994103801, "learning_rate": 1.0016545012165451e-05, "loss": 0.646, "step": 18813 }, { "epoch": 0.5492978307202709, "grad_norm": 0.6963381968618777, "learning_rate": 1.0015896188158963e-05, "loss": 0.6565, "step": 18814 }, { "epoch": 0.5493270269481183, "grad_norm": 0.6788789478461055, "learning_rate": 1.0015247364152473e-05, "loss": 0.5759, "step": 18815 }, { "epoch": 0.5493562231759657, "grad_norm": 0.7000642274566162, "learning_rate": 1.0014598540145985e-05, "loss": 0.6136, "step": 18816 }, { "epoch": 0.549385419403813, "grad_norm": 0.607283618706502, "learning_rate": 1.0013949716139497e-05, "loss": 0.5304, "step": 18817 }, { "epoch": 0.5494146156316604, "grad_norm": 0.7031923424977148, "learning_rate": 1.001330089213301e-05, "loss": 0.6901, "step": 18818 }, { "epoch": 0.5494438118595077, "grad_norm": 0.6426769630578434, "learning_rate": 1.001265206812652e-05, "loss": 0.6417, "step": 18819 }, { "epoch": 0.5494730080873551, "grad_norm": 0.6522667301584698, "learning_rate": 1.0012003244120035e-05, "loss": 0.6754, "step": 18820 }, { "epoch": 0.5495022043152025, "grad_norm": 0.6383701393919543, "learning_rate": 1.0011354420113545e-05, "loss": 0.6244, "step": 18821 }, { "epoch": 0.5495314005430498, "grad_norm": 0.6325746799819861, "learning_rate": 1.0010705596107057e-05, "loss": 0.594, "step": 18822 }, { "epoch": 0.5495605967708972, "grad_norm": 0.6388727612780056, "learning_rate": 1.001005677210057e-05, "loss": 0.598, "step": 18823 }, { "epoch": 0.5495897929987446, "grad_norm": 0.6687426468157216, "learning_rate": 1.0009407948094082e-05, "loss": 0.6359, "step": 18824 }, { "epoch": 0.5496189892265919, "grad_norm": 0.6701925922343179, "learning_rate": 1.0008759124087592e-05, "loss": 0.6286, "step": 18825 }, { "epoch": 0.5496481854544393, "grad_norm": 0.648911050956231, "learning_rate": 1.0008110300081104e-05, "loss": 0.6244, "step": 18826 }, { "epoch": 0.5496773816822866, "grad_norm": 0.6416179838298215, "learning_rate": 1.0007461476074616e-05, "loss": 0.6701, "step": 18827 }, { "epoch": 0.549706577910134, "grad_norm": 0.607140937294081, "learning_rate": 1.0006812652068128e-05, "loss": 0.5882, "step": 18828 }, { "epoch": 0.5497357741379814, "grad_norm": 0.6758506410491916, "learning_rate": 1.000616382806164e-05, "loss": 0.6706, "step": 18829 }, { "epoch": 0.5497649703658287, "grad_norm": 0.6564261192266856, "learning_rate": 1.000551500405515e-05, "loss": 0.5965, "step": 18830 }, { "epoch": 0.5497941665936761, "grad_norm": 0.6178951369848961, "learning_rate": 1.0004866180048662e-05, "loss": 0.5612, "step": 18831 }, { "epoch": 0.5498233628215234, "grad_norm": 0.6064569632991353, "learning_rate": 1.0004217356042174e-05, "loss": 0.5767, "step": 18832 }, { "epoch": 0.5498525590493708, "grad_norm": 0.6673624372113972, "learning_rate": 1.0003568532035686e-05, "loss": 0.647, "step": 18833 }, { "epoch": 0.5498817552772182, "grad_norm": 0.6445165620895967, "learning_rate": 1.0002919708029197e-05, "loss": 0.5456, "step": 18834 }, { "epoch": 0.5499109515050655, "grad_norm": 0.6528896825736281, "learning_rate": 1.0002270884022709e-05, "loss": 0.6201, "step": 18835 }, { "epoch": 0.5499401477329129, "grad_norm": 0.7227779889113345, "learning_rate": 1.000162206001622e-05, "loss": 0.5677, "step": 18836 }, { "epoch": 0.5499693439607602, "grad_norm": 0.6917004168424956, "learning_rate": 1.0000973236009733e-05, "loss": 0.6275, "step": 18837 }, { "epoch": 0.5499985401886076, "grad_norm": 0.6479626294968923, "learning_rate": 1.0000324412003243e-05, "loss": 0.6445, "step": 18838 }, { "epoch": 0.550027736416455, "grad_norm": 0.658132261728033, "learning_rate": 9.999675587996757e-06, "loss": 0.6075, "step": 18839 }, { "epoch": 0.5500569326443023, "grad_norm": 0.6522908007484711, "learning_rate": 9.999026763990269e-06, "loss": 0.5983, "step": 18840 }, { "epoch": 0.5500861288721497, "grad_norm": 0.6372551603144868, "learning_rate": 9.99837793998378e-06, "loss": 0.6243, "step": 18841 }, { "epoch": 0.550115325099997, "grad_norm": 0.6451857018563593, "learning_rate": 9.997729115977291e-06, "loss": 0.6433, "step": 18842 }, { "epoch": 0.5501445213278444, "grad_norm": 0.6669935855304173, "learning_rate": 9.997080291970803e-06, "loss": 0.6509, "step": 18843 }, { "epoch": 0.5501737175556918, "grad_norm": 0.6714296499012988, "learning_rate": 9.996431467964315e-06, "loss": 0.6463, "step": 18844 }, { "epoch": 0.5502029137835391, "grad_norm": 0.6540024821131781, "learning_rate": 9.995782643957827e-06, "loss": 0.5838, "step": 18845 }, { "epoch": 0.5502321100113865, "grad_norm": 0.6361879562051681, "learning_rate": 9.99513381995134e-06, "loss": 0.5999, "step": 18846 }, { "epoch": 0.5502613062392339, "grad_norm": 0.6364668422349098, "learning_rate": 9.994484995944851e-06, "loss": 0.5975, "step": 18847 }, { "epoch": 0.5502905024670812, "grad_norm": 0.647026702752725, "learning_rate": 9.993836171938362e-06, "loss": 0.6114, "step": 18848 }, { "epoch": 0.5503196986949286, "grad_norm": 0.6536647887092185, "learning_rate": 9.993187347931874e-06, "loss": 0.6154, "step": 18849 }, { "epoch": 0.5503488949227759, "grad_norm": 0.6266708052285812, "learning_rate": 9.992538523925386e-06, "loss": 0.5805, "step": 18850 }, { "epoch": 0.5503780911506233, "grad_norm": 0.6726106151937368, "learning_rate": 9.991889699918898e-06, "loss": 0.6582, "step": 18851 }, { "epoch": 0.5504072873784707, "grad_norm": 0.6812556768606403, "learning_rate": 9.99124087591241e-06, "loss": 0.6328, "step": 18852 }, { "epoch": 0.550436483606318, "grad_norm": 0.6667095306852691, "learning_rate": 9.990592051905922e-06, "loss": 0.6784, "step": 18853 }, { "epoch": 0.5504656798341654, "grad_norm": 0.6474409474540027, "learning_rate": 9.989943227899434e-06, "loss": 0.6172, "step": 18854 }, { "epoch": 0.5504948760620128, "grad_norm": 0.6182034816324228, "learning_rate": 9.989294403892946e-06, "loss": 0.598, "step": 18855 }, { "epoch": 0.5505240722898601, "grad_norm": 0.6152916265758995, "learning_rate": 9.988645579886456e-06, "loss": 0.5929, "step": 18856 }, { "epoch": 0.5505532685177075, "grad_norm": 0.6790256264083452, "learning_rate": 9.987996755879968e-06, "loss": 0.6695, "step": 18857 }, { "epoch": 0.5505824647455548, "grad_norm": 0.6480499029798766, "learning_rate": 9.98734793187348e-06, "loss": 0.6426, "step": 18858 }, { "epoch": 0.5506116609734022, "grad_norm": 0.6562041904029362, "learning_rate": 9.986699107866992e-06, "loss": 0.6047, "step": 18859 }, { "epoch": 0.5506408572012496, "grad_norm": 0.6188399309674262, "learning_rate": 9.986050283860503e-06, "loss": 0.5997, "step": 18860 }, { "epoch": 0.5506700534290969, "grad_norm": 0.5816501630943782, "learning_rate": 9.985401459854015e-06, "loss": 0.4938, "step": 18861 }, { "epoch": 0.5506992496569443, "grad_norm": 0.6853028179147155, "learning_rate": 9.984752635847527e-06, "loss": 0.6772, "step": 18862 }, { "epoch": 0.5507284458847916, "grad_norm": 0.6991566464481745, "learning_rate": 9.984103811841039e-06, "loss": 0.7349, "step": 18863 }, { "epoch": 0.550757642112639, "grad_norm": 0.6917701656832286, "learning_rate": 9.98345498783455e-06, "loss": 0.6928, "step": 18864 }, { "epoch": 0.5507868383404864, "grad_norm": 0.6770479122891692, "learning_rate": 9.982806163828063e-06, "loss": 0.6666, "step": 18865 }, { "epoch": 0.5508160345683337, "grad_norm": 0.6674435855516572, "learning_rate": 9.982157339821575e-06, "loss": 0.6269, "step": 18866 }, { "epoch": 0.5508452307961811, "grad_norm": 0.6524427852243123, "learning_rate": 9.981508515815085e-06, "loss": 0.6373, "step": 18867 }, { "epoch": 0.5508744270240284, "grad_norm": 0.6818936459957656, "learning_rate": 9.980859691808597e-06, "loss": 0.6526, "step": 18868 }, { "epoch": 0.5509036232518758, "grad_norm": 0.648185178326538, "learning_rate": 9.98021086780211e-06, "loss": 0.6388, "step": 18869 }, { "epoch": 0.5509328194797232, "grad_norm": 0.6015561746080244, "learning_rate": 9.979562043795621e-06, "loss": 0.5609, "step": 18870 }, { "epoch": 0.5509620157075705, "grad_norm": 0.6158593876476286, "learning_rate": 9.978913219789133e-06, "loss": 0.5327, "step": 18871 }, { "epoch": 0.5509912119354179, "grad_norm": 0.623769902121938, "learning_rate": 9.978264395782645e-06, "loss": 0.5337, "step": 18872 }, { "epoch": 0.5510204081632653, "grad_norm": 0.6555902850321859, "learning_rate": 9.977615571776157e-06, "loss": 0.6065, "step": 18873 }, { "epoch": 0.5510496043911126, "grad_norm": 0.670150380855531, "learning_rate": 9.97696674776967e-06, "loss": 0.5787, "step": 18874 }, { "epoch": 0.5510788006189601, "grad_norm": 0.5960817782083212, "learning_rate": 9.97631792376318e-06, "loss": 0.5418, "step": 18875 }, { "epoch": 0.5511079968468074, "grad_norm": 0.6415608919341178, "learning_rate": 9.975669099756692e-06, "loss": 0.5869, "step": 18876 }, { "epoch": 0.5511371930746548, "grad_norm": 0.6170738057250799, "learning_rate": 9.975020275750204e-06, "loss": 0.6098, "step": 18877 }, { "epoch": 0.5511663893025022, "grad_norm": 0.7351842883085269, "learning_rate": 9.974371451743716e-06, "loss": 0.7204, "step": 18878 }, { "epoch": 0.5511955855303495, "grad_norm": 0.6807670936129201, "learning_rate": 9.973722627737226e-06, "loss": 0.6951, "step": 18879 }, { "epoch": 0.5512247817581969, "grad_norm": 0.6375753693663225, "learning_rate": 9.973073803730738e-06, "loss": 0.595, "step": 18880 }, { "epoch": 0.5512539779860443, "grad_norm": 0.6451949997260149, "learning_rate": 9.972424979724252e-06, "loss": 0.6452, "step": 18881 }, { "epoch": 0.5512831742138916, "grad_norm": 0.6476042211258163, "learning_rate": 9.971776155717762e-06, "loss": 0.6286, "step": 18882 }, { "epoch": 0.551312370441739, "grad_norm": 0.6825133028797273, "learning_rate": 9.971127331711274e-06, "loss": 0.7144, "step": 18883 }, { "epoch": 0.5513415666695863, "grad_norm": 0.7385592402472764, "learning_rate": 9.970478507704786e-06, "loss": 0.7108, "step": 18884 }, { "epoch": 0.5513707628974337, "grad_norm": 0.6410370280700961, "learning_rate": 9.969829683698298e-06, "loss": 0.6202, "step": 18885 }, { "epoch": 0.5513999591252811, "grad_norm": 0.6554854875292996, "learning_rate": 9.969180859691809e-06, "loss": 0.6618, "step": 18886 }, { "epoch": 0.5514291553531284, "grad_norm": 0.614963274190016, "learning_rate": 9.96853203568532e-06, "loss": 0.6052, "step": 18887 }, { "epoch": 0.5514583515809758, "grad_norm": 0.616316159971569, "learning_rate": 9.967883211678833e-06, "loss": 0.5763, "step": 18888 }, { "epoch": 0.5514875478088231, "grad_norm": 0.5985367907804878, "learning_rate": 9.967234387672345e-06, "loss": 0.5391, "step": 18889 }, { "epoch": 0.5515167440366705, "grad_norm": 0.5736752703676898, "learning_rate": 9.966585563665857e-06, "loss": 0.5219, "step": 18890 }, { "epoch": 0.5515459402645179, "grad_norm": 0.673338783360894, "learning_rate": 9.965936739659369e-06, "loss": 0.6506, "step": 18891 }, { "epoch": 0.5515751364923652, "grad_norm": 0.6562504309428738, "learning_rate": 9.96528791565288e-06, "loss": 0.5747, "step": 18892 }, { "epoch": 0.5516043327202126, "grad_norm": 0.6478915262902264, "learning_rate": 9.964639091646393e-06, "loss": 0.6412, "step": 18893 }, { "epoch": 0.55163352894806, "grad_norm": 0.6083919996490432, "learning_rate": 9.963990267639903e-06, "loss": 0.5412, "step": 18894 }, { "epoch": 0.5516627251759073, "grad_norm": 0.6158609453529167, "learning_rate": 9.963341443633415e-06, "loss": 0.5732, "step": 18895 }, { "epoch": 0.5516919214037547, "grad_norm": 0.6741026384309763, "learning_rate": 9.962692619626927e-06, "loss": 0.6699, "step": 18896 }, { "epoch": 0.551721117631602, "grad_norm": 0.6428110247300982, "learning_rate": 9.96204379562044e-06, "loss": 0.6012, "step": 18897 }, { "epoch": 0.5517503138594494, "grad_norm": 0.6245100883839881, "learning_rate": 9.96139497161395e-06, "loss": 0.6209, "step": 18898 }, { "epoch": 0.5517795100872968, "grad_norm": 0.6595273571601359, "learning_rate": 9.960746147607462e-06, "loss": 0.6579, "step": 18899 }, { "epoch": 0.5518087063151441, "grad_norm": 0.6156138667731141, "learning_rate": 9.960097323600975e-06, "loss": 0.575, "step": 18900 }, { "epoch": 0.5518379025429915, "grad_norm": 0.6506174457502939, "learning_rate": 9.959448499594486e-06, "loss": 0.6546, "step": 18901 }, { "epoch": 0.5518670987708388, "grad_norm": 0.6275733601984977, "learning_rate": 9.958799675587998e-06, "loss": 0.5407, "step": 18902 }, { "epoch": 0.5518962949986862, "grad_norm": 0.6356971548304333, "learning_rate": 9.95815085158151e-06, "loss": 0.6128, "step": 18903 }, { "epoch": 0.5519254912265336, "grad_norm": 0.6964110164740585, "learning_rate": 9.957502027575022e-06, "loss": 0.6811, "step": 18904 }, { "epoch": 0.5519546874543809, "grad_norm": 0.6398093649472691, "learning_rate": 9.956853203568532e-06, "loss": 0.6057, "step": 18905 }, { "epoch": 0.5519838836822283, "grad_norm": 0.6733995135733523, "learning_rate": 9.956204379562044e-06, "loss": 0.6511, "step": 18906 }, { "epoch": 0.5520130799100756, "grad_norm": 0.6371005912832323, "learning_rate": 9.955555555555556e-06, "loss": 0.6239, "step": 18907 }, { "epoch": 0.552042276137923, "grad_norm": 0.641818480186341, "learning_rate": 9.954906731549068e-06, "loss": 0.5863, "step": 18908 }, { "epoch": 0.5520714723657704, "grad_norm": 0.6275751250632952, "learning_rate": 9.95425790754258e-06, "loss": 0.5583, "step": 18909 }, { "epoch": 0.5521006685936177, "grad_norm": 0.6446897912394784, "learning_rate": 9.953609083536092e-06, "loss": 0.5764, "step": 18910 }, { "epoch": 0.5521298648214651, "grad_norm": 0.6251916712401101, "learning_rate": 9.952960259529604e-06, "loss": 0.5807, "step": 18911 }, { "epoch": 0.5521590610493124, "grad_norm": 0.6274697469271444, "learning_rate": 9.952311435523116e-06, "loss": 0.5417, "step": 18912 }, { "epoch": 0.5521882572771598, "grad_norm": 0.5834471066775198, "learning_rate": 9.951662611516627e-06, "loss": 0.5146, "step": 18913 }, { "epoch": 0.5522174535050072, "grad_norm": 0.671341948071252, "learning_rate": 9.951013787510139e-06, "loss": 0.6047, "step": 18914 }, { "epoch": 0.5522466497328545, "grad_norm": 0.648644794526555, "learning_rate": 9.95036496350365e-06, "loss": 0.5746, "step": 18915 }, { "epoch": 0.5522758459607019, "grad_norm": 0.6718041176504588, "learning_rate": 9.949716139497163e-06, "loss": 0.6376, "step": 18916 }, { "epoch": 0.5523050421885493, "grad_norm": 0.599390577162532, "learning_rate": 9.949067315490673e-06, "loss": 0.5746, "step": 18917 }, { "epoch": 0.5523342384163966, "grad_norm": 0.7082852906604085, "learning_rate": 9.948418491484185e-06, "loss": 0.7043, "step": 18918 }, { "epoch": 0.552363434644244, "grad_norm": 0.627292751581537, "learning_rate": 9.947769667477699e-06, "loss": 0.5832, "step": 18919 }, { "epoch": 0.5523926308720913, "grad_norm": 0.6732316691687084, "learning_rate": 9.947120843471209e-06, "loss": 0.6053, "step": 18920 }, { "epoch": 0.5524218270999387, "grad_norm": 0.6245005210625293, "learning_rate": 9.946472019464721e-06, "loss": 0.5956, "step": 18921 }, { "epoch": 0.5524510233277861, "grad_norm": 0.6310979476442482, "learning_rate": 9.945823195458233e-06, "loss": 0.6282, "step": 18922 }, { "epoch": 0.5524802195556334, "grad_norm": 0.7018586583932125, "learning_rate": 9.945174371451745e-06, "loss": 0.6586, "step": 18923 }, { "epoch": 0.5525094157834808, "grad_norm": 0.6122739769130957, "learning_rate": 9.944525547445255e-06, "loss": 0.5645, "step": 18924 }, { "epoch": 0.5525386120113281, "grad_norm": 0.6297472347122569, "learning_rate": 9.943876723438767e-06, "loss": 0.5977, "step": 18925 }, { "epoch": 0.5525678082391755, "grad_norm": 0.6364781739424696, "learning_rate": 9.94322789943228e-06, "loss": 0.5903, "step": 18926 }, { "epoch": 0.5525970044670229, "grad_norm": 0.672880811584157, "learning_rate": 9.942579075425792e-06, "loss": 0.6494, "step": 18927 }, { "epoch": 0.5526262006948702, "grad_norm": 0.6322220064046694, "learning_rate": 9.941930251419304e-06, "loss": 0.5613, "step": 18928 }, { "epoch": 0.5526553969227176, "grad_norm": 0.6354039991626905, "learning_rate": 9.941281427412816e-06, "loss": 0.5981, "step": 18929 }, { "epoch": 0.552684593150565, "grad_norm": 0.6694396400343419, "learning_rate": 9.940632603406328e-06, "loss": 0.6797, "step": 18930 }, { "epoch": 0.5527137893784123, "grad_norm": 0.5773701769568774, "learning_rate": 9.93998377939984e-06, "loss": 0.5188, "step": 18931 }, { "epoch": 0.5527429856062597, "grad_norm": 0.6881638596738445, "learning_rate": 9.93933495539335e-06, "loss": 0.6892, "step": 18932 }, { "epoch": 0.552772181834107, "grad_norm": 0.6457326489091318, "learning_rate": 9.938686131386862e-06, "loss": 0.6308, "step": 18933 }, { "epoch": 0.5528013780619544, "grad_norm": 0.6408997769525985, "learning_rate": 9.938037307380374e-06, "loss": 0.5869, "step": 18934 }, { "epoch": 0.5528305742898018, "grad_norm": 0.6999996795795518, "learning_rate": 9.937388483373886e-06, "loss": 0.6586, "step": 18935 }, { "epoch": 0.5528597705176491, "grad_norm": 0.6447385129275816, "learning_rate": 9.936739659367396e-06, "loss": 0.5489, "step": 18936 }, { "epoch": 0.5528889667454965, "grad_norm": 0.6577933901320598, "learning_rate": 9.936090835360908e-06, "loss": 0.6281, "step": 18937 }, { "epoch": 0.5529181629733438, "grad_norm": 0.6196661322825122, "learning_rate": 9.935442011354422e-06, "loss": 0.5777, "step": 18938 }, { "epoch": 0.5529473592011912, "grad_norm": 0.6400286181816986, "learning_rate": 9.934793187347932e-06, "loss": 0.6346, "step": 18939 }, { "epoch": 0.5529765554290386, "grad_norm": 0.6083994666409628, "learning_rate": 9.934144363341444e-06, "loss": 0.5321, "step": 18940 }, { "epoch": 0.5530057516568859, "grad_norm": 0.6098701349191414, "learning_rate": 9.933495539334957e-06, "loss": 0.5275, "step": 18941 }, { "epoch": 0.5530349478847333, "grad_norm": 0.7014927097473266, "learning_rate": 9.932846715328469e-06, "loss": 0.6923, "step": 18942 }, { "epoch": 0.5530641441125806, "grad_norm": 0.6255953433728008, "learning_rate": 9.932197891321979e-06, "loss": 0.5774, "step": 18943 }, { "epoch": 0.553093340340428, "grad_norm": 0.5951515114186259, "learning_rate": 9.931549067315491e-06, "loss": 0.5499, "step": 18944 }, { "epoch": 0.5531225365682754, "grad_norm": 0.665445575546448, "learning_rate": 9.930900243309003e-06, "loss": 0.6318, "step": 18945 }, { "epoch": 0.5531517327961227, "grad_norm": 0.65998721968309, "learning_rate": 9.930251419302515e-06, "loss": 0.6216, "step": 18946 }, { "epoch": 0.5531809290239701, "grad_norm": 0.6052059473652815, "learning_rate": 9.929602595296027e-06, "loss": 0.5514, "step": 18947 }, { "epoch": 0.5532101252518175, "grad_norm": 0.6583984704746126, "learning_rate": 9.928953771289539e-06, "loss": 0.6153, "step": 18948 }, { "epoch": 0.5532393214796648, "grad_norm": 0.6754124564452306, "learning_rate": 9.928304947283051e-06, "loss": 0.6956, "step": 18949 }, { "epoch": 0.5532685177075122, "grad_norm": 0.6385867395730072, "learning_rate": 9.927656123276563e-06, "loss": 0.6247, "step": 18950 }, { "epoch": 0.5532977139353595, "grad_norm": 0.5727712043302354, "learning_rate": 9.927007299270073e-06, "loss": 0.5177, "step": 18951 }, { "epoch": 0.5533269101632069, "grad_norm": 0.6671437421285433, "learning_rate": 9.926358475263585e-06, "loss": 0.6492, "step": 18952 }, { "epoch": 0.5533561063910543, "grad_norm": 0.7031861715749734, "learning_rate": 9.925709651257097e-06, "loss": 0.7125, "step": 18953 }, { "epoch": 0.5533853026189016, "grad_norm": 0.6248272803231683, "learning_rate": 9.92506082725061e-06, "loss": 0.5764, "step": 18954 }, { "epoch": 0.553414498846749, "grad_norm": 0.6424212043450169, "learning_rate": 9.92441200324412e-06, "loss": 0.6195, "step": 18955 }, { "epoch": 0.5534436950745963, "grad_norm": 0.6115827863447509, "learning_rate": 9.923763179237632e-06, "loss": 0.5533, "step": 18956 }, { "epoch": 0.5534728913024437, "grad_norm": 0.6902681319454389, "learning_rate": 9.923114355231146e-06, "loss": 0.6798, "step": 18957 }, { "epoch": 0.5535020875302911, "grad_norm": 0.66568780860904, "learning_rate": 9.922465531224656e-06, "loss": 0.6005, "step": 18958 }, { "epoch": 0.5535312837581384, "grad_norm": 0.6317731459908827, "learning_rate": 9.921816707218168e-06, "loss": 0.6003, "step": 18959 }, { "epoch": 0.5535604799859858, "grad_norm": 0.6850265190674842, "learning_rate": 9.92116788321168e-06, "loss": 0.6368, "step": 18960 }, { "epoch": 0.5535896762138331, "grad_norm": 0.6801740076218082, "learning_rate": 9.920519059205192e-06, "loss": 0.6805, "step": 18961 }, { "epoch": 0.5536188724416805, "grad_norm": 0.6872324059790738, "learning_rate": 9.919870235198702e-06, "loss": 0.6515, "step": 18962 }, { "epoch": 0.5536480686695279, "grad_norm": 0.6191842890075703, "learning_rate": 9.919221411192214e-06, "loss": 0.5821, "step": 18963 }, { "epoch": 0.5536772648973752, "grad_norm": 0.5960048465368355, "learning_rate": 9.918572587185726e-06, "loss": 0.5546, "step": 18964 }, { "epoch": 0.5537064611252226, "grad_norm": 0.6362892837699936, "learning_rate": 9.917923763179238e-06, "loss": 0.6105, "step": 18965 }, { "epoch": 0.55373565735307, "grad_norm": 0.6218694178419092, "learning_rate": 9.91727493917275e-06, "loss": 0.5608, "step": 18966 }, { "epoch": 0.5537648535809173, "grad_norm": 0.688232062620721, "learning_rate": 9.916626115166262e-06, "loss": 0.6504, "step": 18967 }, { "epoch": 0.5537940498087647, "grad_norm": 0.6683598318802214, "learning_rate": 9.915977291159774e-06, "loss": 0.6619, "step": 18968 }, { "epoch": 0.553823246036612, "grad_norm": 0.6410381491189037, "learning_rate": 9.915328467153286e-06, "loss": 0.5896, "step": 18969 }, { "epoch": 0.5538524422644594, "grad_norm": 0.609446328942897, "learning_rate": 9.914679643146797e-06, "loss": 0.5494, "step": 18970 }, { "epoch": 0.5538816384923068, "grad_norm": 0.61939640438935, "learning_rate": 9.914030819140309e-06, "loss": 0.5836, "step": 18971 }, { "epoch": 0.5539108347201541, "grad_norm": 0.644605271121469, "learning_rate": 9.913381995133821e-06, "loss": 0.629, "step": 18972 }, { "epoch": 0.5539400309480015, "grad_norm": 0.6226906666082413, "learning_rate": 9.912733171127333e-06, "loss": 0.5424, "step": 18973 }, { "epoch": 0.5539692271758488, "grad_norm": 0.5790136471523459, "learning_rate": 9.912084347120843e-06, "loss": 0.5143, "step": 18974 }, { "epoch": 0.5539984234036962, "grad_norm": 0.6759358747997762, "learning_rate": 9.911435523114355e-06, "loss": 0.7182, "step": 18975 }, { "epoch": 0.5540276196315436, "grad_norm": 0.6188276895599948, "learning_rate": 9.910786699107869e-06, "loss": 0.5952, "step": 18976 }, { "epoch": 0.5540568158593909, "grad_norm": 0.677466044058787, "learning_rate": 9.91013787510138e-06, "loss": 0.6454, "step": 18977 }, { "epoch": 0.5540860120872383, "grad_norm": 0.6535425561092426, "learning_rate": 9.909489051094891e-06, "loss": 0.5867, "step": 18978 }, { "epoch": 0.5541152083150856, "grad_norm": 0.9290534543778362, "learning_rate": 9.908840227088403e-06, "loss": 0.7267, "step": 18979 }, { "epoch": 0.554144404542933, "grad_norm": 0.6758442418707835, "learning_rate": 9.908191403081915e-06, "loss": 0.6306, "step": 18980 }, { "epoch": 0.5541736007707804, "grad_norm": 0.6795708560674357, "learning_rate": 9.907542579075426e-06, "loss": 0.6263, "step": 18981 }, { "epoch": 0.5542027969986277, "grad_norm": 0.6379730861490821, "learning_rate": 9.906893755068938e-06, "loss": 0.6235, "step": 18982 }, { "epoch": 0.5542319932264751, "grad_norm": 0.6575741377298591, "learning_rate": 9.90624493106245e-06, "loss": 0.6151, "step": 18983 }, { "epoch": 0.5542611894543225, "grad_norm": 0.6718831113049395, "learning_rate": 9.905596107055962e-06, "loss": 0.6434, "step": 18984 }, { "epoch": 0.5542903856821698, "grad_norm": 0.6535601918472583, "learning_rate": 9.904947283049474e-06, "loss": 0.5969, "step": 18985 }, { "epoch": 0.5543195819100172, "grad_norm": 0.6800418875561948, "learning_rate": 9.904298459042986e-06, "loss": 0.7241, "step": 18986 }, { "epoch": 0.5543487781378645, "grad_norm": 0.618936289265899, "learning_rate": 9.903649635036498e-06, "loss": 0.5563, "step": 18987 }, { "epoch": 0.5543779743657119, "grad_norm": 0.653214491364389, "learning_rate": 9.90300081103001e-06, "loss": 0.6296, "step": 18988 }, { "epoch": 0.5544071705935593, "grad_norm": 0.6890659850819811, "learning_rate": 9.90235198702352e-06, "loss": 0.6557, "step": 18989 }, { "epoch": 0.5544363668214066, "grad_norm": 0.6633027781840297, "learning_rate": 9.901703163017032e-06, "loss": 0.6169, "step": 18990 }, { "epoch": 0.554465563049254, "grad_norm": 0.6334963519137216, "learning_rate": 9.901054339010544e-06, "loss": 0.5364, "step": 18991 }, { "epoch": 0.5544947592771013, "grad_norm": 0.606501438935439, "learning_rate": 9.900405515004056e-06, "loss": 0.5829, "step": 18992 }, { "epoch": 0.5545239555049487, "grad_norm": 0.6564987342212539, "learning_rate": 9.899756690997567e-06, "loss": 0.6133, "step": 18993 }, { "epoch": 0.5545531517327961, "grad_norm": 0.6463794217346378, "learning_rate": 9.899107866991079e-06, "loss": 0.5955, "step": 18994 }, { "epoch": 0.5545823479606435, "grad_norm": 0.653203374894616, "learning_rate": 9.898459042984592e-06, "loss": 0.6497, "step": 18995 }, { "epoch": 0.5546115441884909, "grad_norm": 0.667849264737215, "learning_rate": 9.897810218978103e-06, "loss": 0.5921, "step": 18996 }, { "epoch": 0.5546407404163383, "grad_norm": 0.6269475150336136, "learning_rate": 9.897161394971615e-06, "loss": 0.5935, "step": 18997 }, { "epoch": 0.5546699366441856, "grad_norm": 0.6085020884389672, "learning_rate": 9.896512570965127e-06, "loss": 0.5446, "step": 18998 }, { "epoch": 0.554699132872033, "grad_norm": 0.6805211384046319, "learning_rate": 9.895863746958639e-06, "loss": 0.6278, "step": 18999 }, { "epoch": 0.5547283290998803, "grad_norm": 0.6333546739475658, "learning_rate": 9.89521492295215e-06, "loss": 0.601, "step": 19000 }, { "epoch": 0.5547575253277277, "grad_norm": 0.6632124888093421, "learning_rate": 9.894566098945661e-06, "loss": 0.6418, "step": 19001 }, { "epoch": 0.5547867215555751, "grad_norm": 0.6309193534880125, "learning_rate": 9.893917274939173e-06, "loss": 0.6239, "step": 19002 }, { "epoch": 0.5548159177834224, "grad_norm": 0.6556294729881639, "learning_rate": 9.893268450932685e-06, "loss": 0.6108, "step": 19003 }, { "epoch": 0.5548451140112698, "grad_norm": 0.63899690884639, "learning_rate": 9.892619626926197e-06, "loss": 0.5704, "step": 19004 }, { "epoch": 0.5548743102391172, "grad_norm": 0.6426489052649502, "learning_rate": 9.89197080291971e-06, "loss": 0.6413, "step": 19005 }, { "epoch": 0.5549035064669645, "grad_norm": 0.7273369960084516, "learning_rate": 9.891321978913221e-06, "loss": 0.6694, "step": 19006 }, { "epoch": 0.5549327026948119, "grad_norm": 0.5969289791404575, "learning_rate": 9.890673154906733e-06, "loss": 0.5511, "step": 19007 }, { "epoch": 0.5549618989226592, "grad_norm": 0.6541590015209645, "learning_rate": 9.890024330900244e-06, "loss": 0.6269, "step": 19008 }, { "epoch": 0.5549910951505066, "grad_norm": 0.6394218450456401, "learning_rate": 9.889375506893756e-06, "loss": 0.6279, "step": 19009 }, { "epoch": 0.555020291378354, "grad_norm": 0.6876428712627322, "learning_rate": 9.888726682887268e-06, "loss": 0.6307, "step": 19010 }, { "epoch": 0.5550494876062013, "grad_norm": 0.6515925827970809, "learning_rate": 9.88807785888078e-06, "loss": 0.62, "step": 19011 }, { "epoch": 0.5550786838340487, "grad_norm": 0.5979319958269704, "learning_rate": 9.88742903487429e-06, "loss": 0.5556, "step": 19012 }, { "epoch": 0.555107880061896, "grad_norm": 0.6590950355093987, "learning_rate": 9.886780210867802e-06, "loss": 0.6934, "step": 19013 }, { "epoch": 0.5551370762897434, "grad_norm": 0.6297883335775264, "learning_rate": 9.886131386861316e-06, "loss": 0.6429, "step": 19014 }, { "epoch": 0.5551662725175908, "grad_norm": 0.6695893342296017, "learning_rate": 9.885482562854826e-06, "loss": 0.6681, "step": 19015 }, { "epoch": 0.5551954687454381, "grad_norm": 0.7419227768564699, "learning_rate": 9.884833738848338e-06, "loss": 0.7361, "step": 19016 }, { "epoch": 0.5552246649732855, "grad_norm": 0.6495780802286125, "learning_rate": 9.88418491484185e-06, "loss": 0.6082, "step": 19017 }, { "epoch": 0.5552538612011328, "grad_norm": 0.7126334582344582, "learning_rate": 9.883536090835362e-06, "loss": 0.6789, "step": 19018 }, { "epoch": 0.5552830574289802, "grad_norm": 0.6544896633773271, "learning_rate": 9.882887266828873e-06, "loss": 0.6433, "step": 19019 }, { "epoch": 0.5553122536568276, "grad_norm": 0.6355453842088206, "learning_rate": 9.882238442822385e-06, "loss": 0.6379, "step": 19020 }, { "epoch": 0.5553414498846749, "grad_norm": 0.6598335697107336, "learning_rate": 9.881589618815897e-06, "loss": 0.6439, "step": 19021 }, { "epoch": 0.5553706461125223, "grad_norm": 0.6743759453347228, "learning_rate": 9.880940794809409e-06, "loss": 0.7025, "step": 19022 }, { "epoch": 0.5553998423403697, "grad_norm": 0.7885257897467067, "learning_rate": 9.88029197080292e-06, "loss": 0.6575, "step": 19023 }, { "epoch": 0.555429038568217, "grad_norm": 0.6196762120474636, "learning_rate": 9.879643146796433e-06, "loss": 0.595, "step": 19024 }, { "epoch": 0.5554582347960644, "grad_norm": 0.6080427591705351, "learning_rate": 9.878994322789945e-06, "loss": 0.5456, "step": 19025 }, { "epoch": 0.5554874310239117, "grad_norm": 0.6248610302824754, "learning_rate": 9.878345498783457e-06, "loss": 0.5933, "step": 19026 }, { "epoch": 0.5555166272517591, "grad_norm": 0.7229870772519523, "learning_rate": 9.877696674776967e-06, "loss": 0.7288, "step": 19027 }, { "epoch": 0.5555458234796065, "grad_norm": 0.6484100871571199, "learning_rate": 9.877047850770479e-06, "loss": 0.5936, "step": 19028 }, { "epoch": 0.5555750197074538, "grad_norm": 0.6694033850232541, "learning_rate": 9.876399026763991e-06, "loss": 0.6329, "step": 19029 }, { "epoch": 0.5556042159353012, "grad_norm": 0.6363467772561422, "learning_rate": 9.875750202757503e-06, "loss": 0.6395, "step": 19030 }, { "epoch": 0.5556334121631485, "grad_norm": 0.7190892913774966, "learning_rate": 9.875101378751014e-06, "loss": 0.7251, "step": 19031 }, { "epoch": 0.5556626083909959, "grad_norm": 0.6269932474849987, "learning_rate": 9.874452554744527e-06, "loss": 0.5907, "step": 19032 }, { "epoch": 0.5556918046188433, "grad_norm": 0.6765189478702498, "learning_rate": 9.87380373073804e-06, "loss": 0.6901, "step": 19033 }, { "epoch": 0.5557210008466906, "grad_norm": 0.6436378626680583, "learning_rate": 9.87315490673155e-06, "loss": 0.6335, "step": 19034 }, { "epoch": 0.555750197074538, "grad_norm": 0.5784460142744259, "learning_rate": 9.872506082725062e-06, "loss": 0.5186, "step": 19035 }, { "epoch": 0.5557793933023853, "grad_norm": 0.747678151505553, "learning_rate": 9.871857258718574e-06, "loss": 0.6078, "step": 19036 }, { "epoch": 0.5558085895302327, "grad_norm": 0.6202682072948938, "learning_rate": 9.871208434712086e-06, "loss": 0.5754, "step": 19037 }, { "epoch": 0.5558377857580801, "grad_norm": 0.6642631552007215, "learning_rate": 9.870559610705596e-06, "loss": 0.6553, "step": 19038 }, { "epoch": 0.5558669819859274, "grad_norm": 0.689581963238506, "learning_rate": 9.869910786699108e-06, "loss": 0.6733, "step": 19039 }, { "epoch": 0.5558961782137748, "grad_norm": 0.6435592587621844, "learning_rate": 9.86926196269262e-06, "loss": 0.6294, "step": 19040 }, { "epoch": 0.5559253744416222, "grad_norm": 0.5983167903083146, "learning_rate": 9.868613138686132e-06, "loss": 0.4906, "step": 19041 }, { "epoch": 0.5559545706694695, "grad_norm": 0.6668876242061228, "learning_rate": 9.867964314679644e-06, "loss": 0.676, "step": 19042 }, { "epoch": 0.5559837668973169, "grad_norm": 0.6193832032226362, "learning_rate": 9.867315490673156e-06, "loss": 0.5869, "step": 19043 }, { "epoch": 0.5560129631251642, "grad_norm": 0.6881002380717137, "learning_rate": 9.866666666666668e-06, "loss": 0.6325, "step": 19044 }, { "epoch": 0.5560421593530116, "grad_norm": 0.6416414349961469, "learning_rate": 9.866017842660179e-06, "loss": 0.6078, "step": 19045 }, { "epoch": 0.556071355580859, "grad_norm": 0.6520813108263868, "learning_rate": 9.86536901865369e-06, "loss": 0.5983, "step": 19046 }, { "epoch": 0.5561005518087063, "grad_norm": 0.6647578818965141, "learning_rate": 9.864720194647203e-06, "loss": 0.6285, "step": 19047 }, { "epoch": 0.5561297480365537, "grad_norm": 0.6468099308501449, "learning_rate": 9.864071370640715e-06, "loss": 0.601, "step": 19048 }, { "epoch": 0.556158944264401, "grad_norm": 0.6589638887931736, "learning_rate": 9.863422546634227e-06, "loss": 0.6035, "step": 19049 }, { "epoch": 0.5561881404922484, "grad_norm": 0.6631629223222362, "learning_rate": 9.862773722627737e-06, "loss": 0.6137, "step": 19050 }, { "epoch": 0.5562173367200958, "grad_norm": 0.612734338402759, "learning_rate": 9.86212489862125e-06, "loss": 0.5839, "step": 19051 }, { "epoch": 0.5562465329479431, "grad_norm": 0.6459149566239577, "learning_rate": 9.861476074614763e-06, "loss": 0.634, "step": 19052 }, { "epoch": 0.5562757291757905, "grad_norm": 0.6438581042867731, "learning_rate": 9.860827250608273e-06, "loss": 0.6166, "step": 19053 }, { "epoch": 0.5563049254036379, "grad_norm": 0.7340385728683627, "learning_rate": 9.860178426601785e-06, "loss": 0.7039, "step": 19054 }, { "epoch": 0.5563341216314852, "grad_norm": 0.6498564179579093, "learning_rate": 9.859529602595297e-06, "loss": 0.6555, "step": 19055 }, { "epoch": 0.5563633178593326, "grad_norm": 0.6781044989864768, "learning_rate": 9.858880778588809e-06, "loss": 0.6716, "step": 19056 }, { "epoch": 0.5563925140871799, "grad_norm": 0.7289765272740012, "learning_rate": 9.85823195458232e-06, "loss": 0.7502, "step": 19057 }, { "epoch": 0.5564217103150273, "grad_norm": 0.6346324925699962, "learning_rate": 9.857583130575831e-06, "loss": 0.5952, "step": 19058 }, { "epoch": 0.5564509065428747, "grad_norm": 0.6136562124936019, "learning_rate": 9.856934306569344e-06, "loss": 0.5532, "step": 19059 }, { "epoch": 0.556480102770722, "grad_norm": 0.6385060257624912, "learning_rate": 9.856285482562856e-06, "loss": 0.6042, "step": 19060 }, { "epoch": 0.5565092989985694, "grad_norm": 0.6282257351708649, "learning_rate": 9.855636658556368e-06, "loss": 0.5685, "step": 19061 }, { "epoch": 0.5565384952264167, "grad_norm": 0.6460147597550098, "learning_rate": 9.85498783454988e-06, "loss": 0.6357, "step": 19062 }, { "epoch": 0.5565676914542641, "grad_norm": 0.6402323062624056, "learning_rate": 9.854339010543392e-06, "loss": 0.5886, "step": 19063 }, { "epoch": 0.5565968876821115, "grad_norm": 0.6350983104758364, "learning_rate": 9.853690186536902e-06, "loss": 0.6347, "step": 19064 }, { "epoch": 0.5566260839099588, "grad_norm": 0.6811260943574879, "learning_rate": 9.853041362530414e-06, "loss": 0.6431, "step": 19065 }, { "epoch": 0.5566552801378062, "grad_norm": 0.6315922611523256, "learning_rate": 9.852392538523926e-06, "loss": 0.5328, "step": 19066 }, { "epoch": 0.5566844763656535, "grad_norm": 0.6699503932467836, "learning_rate": 9.851743714517438e-06, "loss": 0.6552, "step": 19067 }, { "epoch": 0.5567136725935009, "grad_norm": 0.6271711226897199, "learning_rate": 9.85109489051095e-06, "loss": 0.5914, "step": 19068 }, { "epoch": 0.5567428688213483, "grad_norm": 0.6383239219153766, "learning_rate": 9.85044606650446e-06, "loss": 0.5807, "step": 19069 }, { "epoch": 0.5567720650491956, "grad_norm": 0.6125328447975211, "learning_rate": 9.849797242497974e-06, "loss": 0.5303, "step": 19070 }, { "epoch": 0.556801261277043, "grad_norm": 0.6633991963935929, "learning_rate": 9.849148418491486e-06, "loss": 0.643, "step": 19071 }, { "epoch": 0.5568304575048904, "grad_norm": 0.6376703269070578, "learning_rate": 9.848499594484996e-06, "loss": 0.6592, "step": 19072 }, { "epoch": 0.5568596537327377, "grad_norm": 0.6178508034485782, "learning_rate": 9.847850770478509e-06, "loss": 0.6374, "step": 19073 }, { "epoch": 0.5568888499605851, "grad_norm": 0.6392492247614573, "learning_rate": 9.84720194647202e-06, "loss": 0.5747, "step": 19074 }, { "epoch": 0.5569180461884324, "grad_norm": 0.6525282709259324, "learning_rate": 9.846553122465533e-06, "loss": 0.6411, "step": 19075 }, { "epoch": 0.5569472424162798, "grad_norm": 0.6746576966601029, "learning_rate": 9.845904298459043e-06, "loss": 0.6901, "step": 19076 }, { "epoch": 0.5569764386441272, "grad_norm": 0.6390311868182355, "learning_rate": 9.845255474452555e-06, "loss": 0.5896, "step": 19077 }, { "epoch": 0.5570056348719745, "grad_norm": 0.6648251733607905, "learning_rate": 9.844606650446067e-06, "loss": 0.6886, "step": 19078 }, { "epoch": 0.5570348310998219, "grad_norm": 0.6829640158322215, "learning_rate": 9.843957826439579e-06, "loss": 0.6471, "step": 19079 }, { "epoch": 0.5570640273276692, "grad_norm": 0.6670750812756735, "learning_rate": 9.843309002433091e-06, "loss": 0.6436, "step": 19080 }, { "epoch": 0.5570932235555166, "grad_norm": 0.6542284970875831, "learning_rate": 9.842660178426603e-06, "loss": 0.6076, "step": 19081 }, { "epoch": 0.557122419783364, "grad_norm": 0.6629509626196057, "learning_rate": 9.842011354420115e-06, "loss": 0.5612, "step": 19082 }, { "epoch": 0.5571516160112113, "grad_norm": 0.6473589134044242, "learning_rate": 9.841362530413625e-06, "loss": 0.5953, "step": 19083 }, { "epoch": 0.5571808122390587, "grad_norm": 0.6139835398799517, "learning_rate": 9.840713706407137e-06, "loss": 0.5926, "step": 19084 }, { "epoch": 0.557210008466906, "grad_norm": 0.6100190416438848, "learning_rate": 9.84006488240065e-06, "loss": 0.5693, "step": 19085 }, { "epoch": 0.5572392046947534, "grad_norm": 0.6649046468061052, "learning_rate": 9.839416058394161e-06, "loss": 0.6461, "step": 19086 }, { "epoch": 0.5572684009226008, "grad_norm": 0.6250436498761933, "learning_rate": 9.838767234387674e-06, "loss": 0.622, "step": 19087 }, { "epoch": 0.5572975971504481, "grad_norm": 0.6517511985574507, "learning_rate": 9.838118410381184e-06, "loss": 0.6524, "step": 19088 }, { "epoch": 0.5573267933782955, "grad_norm": 0.6389169620367645, "learning_rate": 9.837469586374698e-06, "loss": 0.6253, "step": 19089 }, { "epoch": 0.5573559896061429, "grad_norm": 0.667122207627523, "learning_rate": 9.83682076236821e-06, "loss": 0.5928, "step": 19090 }, { "epoch": 0.5573851858339902, "grad_norm": 0.6902600098429951, "learning_rate": 9.83617193836172e-06, "loss": 0.6693, "step": 19091 }, { "epoch": 0.5574143820618376, "grad_norm": 0.6085525511244017, "learning_rate": 9.835523114355232e-06, "loss": 0.5295, "step": 19092 }, { "epoch": 0.5574435782896849, "grad_norm": 0.616477797941814, "learning_rate": 9.834874290348744e-06, "loss": 0.584, "step": 19093 }, { "epoch": 0.5574727745175323, "grad_norm": 0.6655681945774374, "learning_rate": 9.834225466342256e-06, "loss": 0.6143, "step": 19094 }, { "epoch": 0.5575019707453797, "grad_norm": 0.6052011608541135, "learning_rate": 9.833576642335766e-06, "loss": 0.5463, "step": 19095 }, { "epoch": 0.557531166973227, "grad_norm": 0.723601881372007, "learning_rate": 9.832927818329278e-06, "loss": 0.6439, "step": 19096 }, { "epoch": 0.5575603632010744, "grad_norm": 0.6468263372028005, "learning_rate": 9.83227899432279e-06, "loss": 0.624, "step": 19097 }, { "epoch": 0.5575895594289217, "grad_norm": 0.6603679589288028, "learning_rate": 9.831630170316302e-06, "loss": 0.6215, "step": 19098 }, { "epoch": 0.5576187556567691, "grad_norm": 0.6651457405527268, "learning_rate": 9.830981346309814e-06, "loss": 0.6459, "step": 19099 }, { "epoch": 0.5576479518846165, "grad_norm": 0.6404075263887756, "learning_rate": 9.830332522303326e-06, "loss": 0.6252, "step": 19100 }, { "epoch": 0.5576771481124638, "grad_norm": 0.669264870923843, "learning_rate": 9.829683698296839e-06, "loss": 0.6575, "step": 19101 }, { "epoch": 0.5577063443403112, "grad_norm": 0.56370753076228, "learning_rate": 9.829034874290349e-06, "loss": 0.4838, "step": 19102 }, { "epoch": 0.5577355405681585, "grad_norm": 0.6527942881677339, "learning_rate": 9.828386050283861e-06, "loss": 0.5974, "step": 19103 }, { "epoch": 0.5577647367960059, "grad_norm": 0.6201806315057481, "learning_rate": 9.827737226277373e-06, "loss": 0.6085, "step": 19104 }, { "epoch": 0.5577939330238533, "grad_norm": 0.6537465977768696, "learning_rate": 9.827088402270885e-06, "loss": 0.621, "step": 19105 }, { "epoch": 0.5578231292517006, "grad_norm": 0.6569418263343535, "learning_rate": 9.826439578264397e-06, "loss": 0.671, "step": 19106 }, { "epoch": 0.557852325479548, "grad_norm": 0.5904505662476588, "learning_rate": 9.825790754257907e-06, "loss": 0.5297, "step": 19107 }, { "epoch": 0.5578815217073954, "grad_norm": 0.6213701312823541, "learning_rate": 9.825141930251421e-06, "loss": 0.5796, "step": 19108 }, { "epoch": 0.5579107179352427, "grad_norm": 0.6598366256150152, "learning_rate": 9.824493106244933e-06, "loss": 0.6353, "step": 19109 }, { "epoch": 0.5579399141630901, "grad_norm": 0.6294068359764895, "learning_rate": 9.823844282238443e-06, "loss": 0.568, "step": 19110 }, { "epoch": 0.5579691103909374, "grad_norm": 0.6179599841072257, "learning_rate": 9.823195458231955e-06, "loss": 0.5985, "step": 19111 }, { "epoch": 0.5579983066187848, "grad_norm": 0.6303203150031924, "learning_rate": 9.822546634225467e-06, "loss": 0.5897, "step": 19112 }, { "epoch": 0.5580275028466322, "grad_norm": 0.6004684761306581, "learning_rate": 9.82189781021898e-06, "loss": 0.5176, "step": 19113 }, { "epoch": 0.5580566990744795, "grad_norm": 0.6449753242104529, "learning_rate": 9.82124898621249e-06, "loss": 0.6108, "step": 19114 }, { "epoch": 0.5580858953023269, "grad_norm": 0.607822623058761, "learning_rate": 9.820600162206002e-06, "loss": 0.6094, "step": 19115 }, { "epoch": 0.5581150915301744, "grad_norm": 0.6228029294244734, "learning_rate": 9.819951338199514e-06, "loss": 0.5663, "step": 19116 }, { "epoch": 0.5581442877580217, "grad_norm": 0.6597168247025057, "learning_rate": 9.819302514193026e-06, "loss": 0.6718, "step": 19117 }, { "epoch": 0.5581734839858691, "grad_norm": 0.6405730008782501, "learning_rate": 9.818653690186538e-06, "loss": 0.5818, "step": 19118 }, { "epoch": 0.5582026802137164, "grad_norm": 0.6789775646283749, "learning_rate": 9.81800486618005e-06, "loss": 0.6592, "step": 19119 }, { "epoch": 0.5582318764415638, "grad_norm": 0.6781565131017473, "learning_rate": 9.817356042173562e-06, "loss": 0.6573, "step": 19120 }, { "epoch": 0.5582610726694112, "grad_norm": 0.675550336004329, "learning_rate": 9.816707218167072e-06, "loss": 0.6485, "step": 19121 }, { "epoch": 0.5582902688972585, "grad_norm": 0.6508541533674417, "learning_rate": 9.816058394160584e-06, "loss": 0.5925, "step": 19122 }, { "epoch": 0.5583194651251059, "grad_norm": 0.6375585286905857, "learning_rate": 9.815409570154096e-06, "loss": 0.6092, "step": 19123 }, { "epoch": 0.5583486613529532, "grad_norm": 0.6275527398849503, "learning_rate": 9.814760746147608e-06, "loss": 0.6053, "step": 19124 }, { "epoch": 0.5583778575808006, "grad_norm": 0.7549902441789945, "learning_rate": 9.81411192214112e-06, "loss": 0.742, "step": 19125 }, { "epoch": 0.558407053808648, "grad_norm": 0.6325578922316001, "learning_rate": 9.81346309813463e-06, "loss": 0.5904, "step": 19126 }, { "epoch": 0.5584362500364953, "grad_norm": 0.6483962247574759, "learning_rate": 9.812814274128144e-06, "loss": 0.634, "step": 19127 }, { "epoch": 0.5584654462643427, "grad_norm": 0.6232587673971798, "learning_rate": 9.812165450121656e-06, "loss": 0.57, "step": 19128 }, { "epoch": 0.55849464249219, "grad_norm": 0.6535401033692488, "learning_rate": 9.811516626115167e-06, "loss": 0.6346, "step": 19129 }, { "epoch": 0.5585238387200374, "grad_norm": 0.5921370369883258, "learning_rate": 9.810867802108679e-06, "loss": 0.5296, "step": 19130 }, { "epoch": 0.5585530349478848, "grad_norm": 0.6279216533247532, "learning_rate": 9.81021897810219e-06, "loss": 0.6146, "step": 19131 }, { "epoch": 0.5585822311757321, "grad_norm": 0.6939223489261066, "learning_rate": 9.809570154095703e-06, "loss": 0.7072, "step": 19132 }, { "epoch": 0.5586114274035795, "grad_norm": 0.655159450923413, "learning_rate": 9.808921330089213e-06, "loss": 0.5728, "step": 19133 }, { "epoch": 0.5586406236314269, "grad_norm": 0.6181650420216634, "learning_rate": 9.808272506082725e-06, "loss": 0.5355, "step": 19134 }, { "epoch": 0.5586698198592742, "grad_norm": 0.6144561562419649, "learning_rate": 9.807623682076237e-06, "loss": 0.6153, "step": 19135 }, { "epoch": 0.5586990160871216, "grad_norm": 0.6180133638782165, "learning_rate": 9.80697485806975e-06, "loss": 0.5745, "step": 19136 }, { "epoch": 0.5587282123149689, "grad_norm": 0.631935692628752, "learning_rate": 9.806326034063261e-06, "loss": 0.6172, "step": 19137 }, { "epoch": 0.5587574085428163, "grad_norm": 0.6347172839296784, "learning_rate": 9.805677210056773e-06, "loss": 0.6164, "step": 19138 }, { "epoch": 0.5587866047706637, "grad_norm": 0.6238627130620751, "learning_rate": 9.805028386050285e-06, "loss": 0.583, "step": 19139 }, { "epoch": 0.558815800998511, "grad_norm": 0.6441802277256968, "learning_rate": 9.804379562043796e-06, "loss": 0.6282, "step": 19140 }, { "epoch": 0.5588449972263584, "grad_norm": 0.6565734475935053, "learning_rate": 9.803730738037308e-06, "loss": 0.6003, "step": 19141 }, { "epoch": 0.5588741934542057, "grad_norm": 0.6780002774290065, "learning_rate": 9.80308191403082e-06, "loss": 0.6639, "step": 19142 }, { "epoch": 0.5589033896820531, "grad_norm": 0.6131221841689549, "learning_rate": 9.802433090024332e-06, "loss": 0.5584, "step": 19143 }, { "epoch": 0.5589325859099005, "grad_norm": 0.768992660735869, "learning_rate": 9.801784266017844e-06, "loss": 0.5392, "step": 19144 }, { "epoch": 0.5589617821377478, "grad_norm": 0.6881492245734591, "learning_rate": 9.801135442011354e-06, "loss": 0.6405, "step": 19145 }, { "epoch": 0.5589909783655952, "grad_norm": 0.6553429510343881, "learning_rate": 9.800486618004868e-06, "loss": 0.6438, "step": 19146 }, { "epoch": 0.5590201745934426, "grad_norm": 0.6222699504930072, "learning_rate": 9.79983779399838e-06, "loss": 0.5718, "step": 19147 }, { "epoch": 0.5590493708212899, "grad_norm": 0.7285574459707103, "learning_rate": 9.79918896999189e-06, "loss": 0.6241, "step": 19148 }, { "epoch": 0.5590785670491373, "grad_norm": 0.6996297304631172, "learning_rate": 9.798540145985402e-06, "loss": 0.6818, "step": 19149 }, { "epoch": 0.5591077632769846, "grad_norm": 0.6719380464472101, "learning_rate": 9.797891321978914e-06, "loss": 0.6507, "step": 19150 }, { "epoch": 0.559136959504832, "grad_norm": 0.6312510007570479, "learning_rate": 9.797242497972426e-06, "loss": 0.6014, "step": 19151 }, { "epoch": 0.5591661557326794, "grad_norm": 0.6556654631282532, "learning_rate": 9.796593673965937e-06, "loss": 0.6502, "step": 19152 }, { "epoch": 0.5591953519605267, "grad_norm": 0.6472654730702391, "learning_rate": 9.795944849959449e-06, "loss": 0.6069, "step": 19153 }, { "epoch": 0.5592245481883741, "grad_norm": 0.714006466907279, "learning_rate": 9.79529602595296e-06, "loss": 0.6726, "step": 19154 }, { "epoch": 0.5592537444162214, "grad_norm": 0.6795826539639963, "learning_rate": 9.794647201946473e-06, "loss": 0.6793, "step": 19155 }, { "epoch": 0.5592829406440688, "grad_norm": 0.6364637398334791, "learning_rate": 9.793998377939985e-06, "loss": 0.6089, "step": 19156 }, { "epoch": 0.5593121368719162, "grad_norm": 0.6859456903879039, "learning_rate": 9.793349553933497e-06, "loss": 0.6637, "step": 19157 }, { "epoch": 0.5593413330997635, "grad_norm": 0.6174253076494972, "learning_rate": 9.792700729927009e-06, "loss": 0.6017, "step": 19158 }, { "epoch": 0.5593705293276109, "grad_norm": 0.5858493841660376, "learning_rate": 9.792051905920519e-06, "loss": 0.5481, "step": 19159 }, { "epoch": 0.5593997255554582, "grad_norm": 0.6413385358220217, "learning_rate": 9.791403081914031e-06, "loss": 0.6238, "step": 19160 }, { "epoch": 0.5594289217833056, "grad_norm": 0.6681902891736401, "learning_rate": 9.790754257907543e-06, "loss": 0.5991, "step": 19161 }, { "epoch": 0.559458118011153, "grad_norm": 0.6452218942637886, "learning_rate": 9.790105433901055e-06, "loss": 0.6195, "step": 19162 }, { "epoch": 0.5594873142390003, "grad_norm": 0.688094024787255, "learning_rate": 9.789456609894567e-06, "loss": 0.6055, "step": 19163 }, { "epoch": 0.5595165104668477, "grad_norm": 0.6540725349608815, "learning_rate": 9.788807785888078e-06, "loss": 0.6612, "step": 19164 }, { "epoch": 0.559545706694695, "grad_norm": 0.6205092128621871, "learning_rate": 9.788158961881591e-06, "loss": 0.601, "step": 19165 }, { "epoch": 0.5595749029225424, "grad_norm": 0.6841280339035102, "learning_rate": 9.787510137875103e-06, "loss": 0.7021, "step": 19166 }, { "epoch": 0.5596040991503898, "grad_norm": 0.6134223865139935, "learning_rate": 9.786861313868614e-06, "loss": 0.5968, "step": 19167 }, { "epoch": 0.5596332953782371, "grad_norm": 0.6616554128103455, "learning_rate": 9.786212489862126e-06, "loss": 0.6627, "step": 19168 }, { "epoch": 0.5596624916060845, "grad_norm": 0.6246271746723715, "learning_rate": 9.785563665855638e-06, "loss": 0.5926, "step": 19169 }, { "epoch": 0.5596916878339319, "grad_norm": 0.645871753434506, "learning_rate": 9.78491484184915e-06, "loss": 0.6581, "step": 19170 }, { "epoch": 0.5597208840617792, "grad_norm": 0.6256210784044405, "learning_rate": 9.78426601784266e-06, "loss": 0.5834, "step": 19171 }, { "epoch": 0.5597500802896266, "grad_norm": 0.6205369530465927, "learning_rate": 9.783617193836172e-06, "loss": 0.552, "step": 19172 }, { "epoch": 0.559779276517474, "grad_norm": 0.6173981426054195, "learning_rate": 9.782968369829684e-06, "loss": 0.6137, "step": 19173 }, { "epoch": 0.5598084727453213, "grad_norm": 0.6190848831304178, "learning_rate": 9.782319545823196e-06, "loss": 0.5615, "step": 19174 }, { "epoch": 0.5598376689731687, "grad_norm": 0.6120730843961786, "learning_rate": 9.781670721816708e-06, "loss": 0.5982, "step": 19175 }, { "epoch": 0.559866865201016, "grad_norm": 0.7186826551654719, "learning_rate": 9.78102189781022e-06, "loss": 0.6371, "step": 19176 }, { "epoch": 0.5598960614288634, "grad_norm": 0.6805079178597846, "learning_rate": 9.780373073803732e-06, "loss": 0.6902, "step": 19177 }, { "epoch": 0.5599252576567108, "grad_norm": 0.6752953185002415, "learning_rate": 9.779724249797243e-06, "loss": 0.6932, "step": 19178 }, { "epoch": 0.5599544538845581, "grad_norm": 0.6150589306388976, "learning_rate": 9.779075425790755e-06, "loss": 0.5343, "step": 19179 }, { "epoch": 0.5599836501124055, "grad_norm": 0.8446812720902055, "learning_rate": 9.778426601784267e-06, "loss": 0.6659, "step": 19180 }, { "epoch": 0.5600128463402528, "grad_norm": 0.6648395090116904, "learning_rate": 9.777777777777779e-06, "loss": 0.6489, "step": 19181 }, { "epoch": 0.5600420425681002, "grad_norm": 0.6268432942333928, "learning_rate": 9.77712895377129e-06, "loss": 0.5812, "step": 19182 }, { "epoch": 0.5600712387959476, "grad_norm": 0.6037180727029876, "learning_rate": 9.776480129764803e-06, "loss": 0.5528, "step": 19183 }, { "epoch": 0.5601004350237949, "grad_norm": 0.6390239608211423, "learning_rate": 9.775831305758315e-06, "loss": 0.6095, "step": 19184 }, { "epoch": 0.5601296312516423, "grad_norm": 0.6387078433802376, "learning_rate": 9.775182481751827e-06, "loss": 0.6145, "step": 19185 }, { "epoch": 0.5601588274794896, "grad_norm": 0.6491598222628312, "learning_rate": 9.774533657745337e-06, "loss": 0.5956, "step": 19186 }, { "epoch": 0.560188023707337, "grad_norm": 0.6374192530316256, "learning_rate": 9.773884833738849e-06, "loss": 0.6007, "step": 19187 }, { "epoch": 0.5602172199351844, "grad_norm": 0.5938392062081396, "learning_rate": 9.773236009732361e-06, "loss": 0.5783, "step": 19188 }, { "epoch": 0.5602464161630317, "grad_norm": 0.6950311682244869, "learning_rate": 9.772587185725873e-06, "loss": 0.7018, "step": 19189 }, { "epoch": 0.5602756123908791, "grad_norm": 0.7419887173742205, "learning_rate": 9.771938361719384e-06, "loss": 0.6002, "step": 19190 }, { "epoch": 0.5603048086187264, "grad_norm": 0.634070061280057, "learning_rate": 9.771289537712896e-06, "loss": 0.6434, "step": 19191 }, { "epoch": 0.5603340048465738, "grad_norm": 0.6108643785775206, "learning_rate": 9.770640713706408e-06, "loss": 0.5761, "step": 19192 }, { "epoch": 0.5603632010744212, "grad_norm": 0.6867711226892892, "learning_rate": 9.76999188969992e-06, "loss": 0.676, "step": 19193 }, { "epoch": 0.5603923973022685, "grad_norm": 0.6447528027355686, "learning_rate": 9.769343065693432e-06, "loss": 0.5743, "step": 19194 }, { "epoch": 0.5604215935301159, "grad_norm": 0.6549911557205521, "learning_rate": 9.768694241686944e-06, "loss": 0.661, "step": 19195 }, { "epoch": 0.5604507897579633, "grad_norm": 0.62068912995536, "learning_rate": 9.768045417680456e-06, "loss": 0.5621, "step": 19196 }, { "epoch": 0.5604799859858106, "grad_norm": 0.6505065546463189, "learning_rate": 9.767396593673966e-06, "loss": 0.6508, "step": 19197 }, { "epoch": 0.560509182213658, "grad_norm": 0.6417605005487217, "learning_rate": 9.766747769667478e-06, "loss": 0.5979, "step": 19198 }, { "epoch": 0.5605383784415053, "grad_norm": 0.7060501383840299, "learning_rate": 9.76609894566099e-06, "loss": 0.6395, "step": 19199 }, { "epoch": 0.5605675746693527, "grad_norm": 0.6240376431952545, "learning_rate": 9.765450121654502e-06, "loss": 0.6269, "step": 19200 }, { "epoch": 0.5605967708972001, "grad_norm": 0.6772515983577515, "learning_rate": 9.764801297648014e-06, "loss": 0.6728, "step": 19201 }, { "epoch": 0.5606259671250474, "grad_norm": 0.6229382854240947, "learning_rate": 9.764152473641526e-06, "loss": 0.5609, "step": 19202 }, { "epoch": 0.5606551633528948, "grad_norm": 0.6215684436084198, "learning_rate": 9.763503649635038e-06, "loss": 0.5763, "step": 19203 }, { "epoch": 0.5606843595807421, "grad_norm": 0.6361665864694367, "learning_rate": 9.76285482562855e-06, "loss": 0.6168, "step": 19204 }, { "epoch": 0.5607135558085895, "grad_norm": 0.6371100946534323, "learning_rate": 9.76220600162206e-06, "loss": 0.6253, "step": 19205 }, { "epoch": 0.5607427520364369, "grad_norm": 0.9388281767614339, "learning_rate": 9.761557177615573e-06, "loss": 0.7363, "step": 19206 }, { "epoch": 0.5607719482642842, "grad_norm": 0.6820727924721399, "learning_rate": 9.760908353609085e-06, "loss": 0.6768, "step": 19207 }, { "epoch": 0.5608011444921316, "grad_norm": 0.6835562236608196, "learning_rate": 9.760259529602597e-06, "loss": 0.6493, "step": 19208 }, { "epoch": 0.560830340719979, "grad_norm": 0.5496347192422455, "learning_rate": 9.759610705596107e-06, "loss": 0.4886, "step": 19209 }, { "epoch": 0.5608595369478263, "grad_norm": 0.6497712776630522, "learning_rate": 9.758961881589619e-06, "loss": 0.6304, "step": 19210 }, { "epoch": 0.5608887331756737, "grad_norm": 0.656991440224187, "learning_rate": 9.758313057583131e-06, "loss": 0.629, "step": 19211 }, { "epoch": 0.560917929403521, "grad_norm": 0.6369435104992286, "learning_rate": 9.757664233576643e-06, "loss": 0.6181, "step": 19212 }, { "epoch": 0.5609471256313684, "grad_norm": 0.7107818860312992, "learning_rate": 9.757015409570155e-06, "loss": 0.7112, "step": 19213 }, { "epoch": 0.5609763218592158, "grad_norm": 0.6483736887922221, "learning_rate": 9.756366585563667e-06, "loss": 0.6013, "step": 19214 }, { "epoch": 0.5610055180870631, "grad_norm": 0.6753197110920548, "learning_rate": 9.755717761557179e-06, "loss": 0.6405, "step": 19215 }, { "epoch": 0.5610347143149105, "grad_norm": 0.6466407283831683, "learning_rate": 9.75506893755069e-06, "loss": 0.5744, "step": 19216 }, { "epoch": 0.5610639105427578, "grad_norm": 0.6558263500364712, "learning_rate": 9.754420113544201e-06, "loss": 0.6355, "step": 19217 }, { "epoch": 0.5610931067706052, "grad_norm": 0.602922048457524, "learning_rate": 9.753771289537713e-06, "loss": 0.5321, "step": 19218 }, { "epoch": 0.5611223029984526, "grad_norm": 0.6626622711468125, "learning_rate": 9.753122465531226e-06, "loss": 0.6029, "step": 19219 }, { "epoch": 0.5611514992262999, "grad_norm": 0.6873771116502129, "learning_rate": 9.752473641524738e-06, "loss": 0.6566, "step": 19220 }, { "epoch": 0.5611806954541473, "grad_norm": 0.6594169575595141, "learning_rate": 9.75182481751825e-06, "loss": 0.616, "step": 19221 }, { "epoch": 0.5612098916819946, "grad_norm": 0.6427188377238366, "learning_rate": 9.751175993511762e-06, "loss": 0.6378, "step": 19222 }, { "epoch": 0.561239087909842, "grad_norm": 0.5868145613533566, "learning_rate": 9.750527169505272e-06, "loss": 0.5137, "step": 19223 }, { "epoch": 0.5612682841376894, "grad_norm": 0.6670393294607284, "learning_rate": 9.749878345498784e-06, "loss": 0.5956, "step": 19224 }, { "epoch": 0.5612974803655367, "grad_norm": 0.6621122744322635, "learning_rate": 9.749229521492296e-06, "loss": 0.6726, "step": 19225 }, { "epoch": 0.5613266765933841, "grad_norm": 0.6361579564867462, "learning_rate": 9.748580697485808e-06, "loss": 0.5676, "step": 19226 }, { "epoch": 0.5613558728212314, "grad_norm": 0.6535582586942029, "learning_rate": 9.74793187347932e-06, "loss": 0.6067, "step": 19227 }, { "epoch": 0.5613850690490788, "grad_norm": 0.6086234197130932, "learning_rate": 9.74728304947283e-06, "loss": 0.5469, "step": 19228 }, { "epoch": 0.5614142652769262, "grad_norm": 0.6649368409628391, "learning_rate": 9.746634225466342e-06, "loss": 0.6334, "step": 19229 }, { "epoch": 0.5614434615047735, "grad_norm": 0.658432483517008, "learning_rate": 9.745985401459854e-06, "loss": 0.6651, "step": 19230 }, { "epoch": 0.5614726577326209, "grad_norm": 0.6312837191988461, "learning_rate": 9.745336577453366e-06, "loss": 0.5699, "step": 19231 }, { "epoch": 0.5615018539604683, "grad_norm": 0.6699233683750215, "learning_rate": 9.744687753446878e-06, "loss": 0.6638, "step": 19232 }, { "epoch": 0.5615310501883156, "grad_norm": 0.6085151790069165, "learning_rate": 9.74403892944039e-06, "loss": 0.5856, "step": 19233 }, { "epoch": 0.561560246416163, "grad_norm": 0.6909568527067736, "learning_rate": 9.743390105433903e-06, "loss": 0.7566, "step": 19234 }, { "epoch": 0.5615894426440103, "grad_norm": 0.7145200986573791, "learning_rate": 9.742741281427413e-06, "loss": 0.7073, "step": 19235 }, { "epoch": 0.5616186388718577, "grad_norm": 0.8996398457966749, "learning_rate": 9.742092457420925e-06, "loss": 0.7157, "step": 19236 }, { "epoch": 0.5616478350997052, "grad_norm": 0.6443083166748158, "learning_rate": 9.741443633414437e-06, "loss": 0.6675, "step": 19237 }, { "epoch": 0.5616770313275525, "grad_norm": 0.6478798854233234, "learning_rate": 9.740794809407949e-06, "loss": 0.6219, "step": 19238 }, { "epoch": 0.5617062275553999, "grad_norm": 0.6283528548559594, "learning_rate": 9.740145985401461e-06, "loss": 0.5624, "step": 19239 }, { "epoch": 0.5617354237832473, "grad_norm": 0.6312727254969677, "learning_rate": 9.739497161394973e-06, "loss": 0.5843, "step": 19240 }, { "epoch": 0.5617646200110946, "grad_norm": 0.6132199651236402, "learning_rate": 9.738848337388485e-06, "loss": 0.5783, "step": 19241 }, { "epoch": 0.561793816238942, "grad_norm": 0.6371626287113752, "learning_rate": 9.738199513381995e-06, "loss": 0.5918, "step": 19242 }, { "epoch": 0.5618230124667893, "grad_norm": 0.6352779972603108, "learning_rate": 9.737550689375507e-06, "loss": 0.6464, "step": 19243 }, { "epoch": 0.5618522086946367, "grad_norm": 0.6297503827438645, "learning_rate": 9.73690186536902e-06, "loss": 0.5793, "step": 19244 }, { "epoch": 0.5618814049224841, "grad_norm": 0.7217464021965226, "learning_rate": 9.736253041362531e-06, "loss": 0.6891, "step": 19245 }, { "epoch": 0.5619106011503314, "grad_norm": 0.5843963059219335, "learning_rate": 9.735604217356043e-06, "loss": 0.5501, "step": 19246 }, { "epoch": 0.5619397973781788, "grad_norm": 0.6252604805248461, "learning_rate": 9.734955393349554e-06, "loss": 0.5693, "step": 19247 }, { "epoch": 0.5619689936060261, "grad_norm": 0.6356070415319904, "learning_rate": 9.734306569343066e-06, "loss": 0.5757, "step": 19248 }, { "epoch": 0.5619981898338735, "grad_norm": 0.6737654664126324, "learning_rate": 9.733657745336578e-06, "loss": 0.5923, "step": 19249 }, { "epoch": 0.5620273860617209, "grad_norm": 0.62423630889975, "learning_rate": 9.73300892133009e-06, "loss": 0.5652, "step": 19250 }, { "epoch": 0.5620565822895682, "grad_norm": 0.7056950271563981, "learning_rate": 9.732360097323602e-06, "loss": 0.6641, "step": 19251 }, { "epoch": 0.5620857785174156, "grad_norm": 0.7254327834202052, "learning_rate": 9.731711273317114e-06, "loss": 0.6908, "step": 19252 }, { "epoch": 0.562114974745263, "grad_norm": 0.6230582683298642, "learning_rate": 9.731062449310626e-06, "loss": 0.5488, "step": 19253 }, { "epoch": 0.5621441709731103, "grad_norm": 0.5837391636567962, "learning_rate": 9.730413625304136e-06, "loss": 0.5126, "step": 19254 }, { "epoch": 0.5621733672009577, "grad_norm": 0.6286132783941695, "learning_rate": 9.729764801297648e-06, "loss": 0.573, "step": 19255 }, { "epoch": 0.562202563428805, "grad_norm": 0.7020212620903218, "learning_rate": 9.72911597729116e-06, "loss": 0.5889, "step": 19256 }, { "epoch": 0.5622317596566524, "grad_norm": 0.635089909862893, "learning_rate": 9.728467153284672e-06, "loss": 0.6043, "step": 19257 }, { "epoch": 0.5622609558844998, "grad_norm": 0.6111944578446636, "learning_rate": 9.727818329278184e-06, "loss": 0.552, "step": 19258 }, { "epoch": 0.5622901521123471, "grad_norm": 0.6415338677682906, "learning_rate": 9.727169505271696e-06, "loss": 0.6009, "step": 19259 }, { "epoch": 0.5623193483401945, "grad_norm": 0.6801565735861059, "learning_rate": 9.726520681265208e-06, "loss": 0.6428, "step": 19260 }, { "epoch": 0.5623485445680418, "grad_norm": 0.6506902453536606, "learning_rate": 9.725871857258719e-06, "loss": 0.6384, "step": 19261 }, { "epoch": 0.5623777407958892, "grad_norm": 0.6232400922567493, "learning_rate": 9.72522303325223e-06, "loss": 0.604, "step": 19262 }, { "epoch": 0.5624069370237366, "grad_norm": 0.63315249125224, "learning_rate": 9.724574209245743e-06, "loss": 0.5972, "step": 19263 }, { "epoch": 0.5624361332515839, "grad_norm": 0.67451694074486, "learning_rate": 9.723925385239255e-06, "loss": 0.5947, "step": 19264 }, { "epoch": 0.5624653294794313, "grad_norm": 0.6384512226674697, "learning_rate": 9.723276561232767e-06, "loss": 0.6149, "step": 19265 }, { "epoch": 0.5624945257072786, "grad_norm": 0.6478888662701651, "learning_rate": 9.722627737226277e-06, "loss": 0.5628, "step": 19266 }, { "epoch": 0.562523721935126, "grad_norm": 0.6310965999405167, "learning_rate": 9.72197891321979e-06, "loss": 0.5816, "step": 19267 }, { "epoch": 0.5625529181629734, "grad_norm": 0.6512850392770284, "learning_rate": 9.721330089213303e-06, "loss": 0.6234, "step": 19268 }, { "epoch": 0.5625821143908207, "grad_norm": 0.6295496432738998, "learning_rate": 9.720681265206813e-06, "loss": 0.5958, "step": 19269 }, { "epoch": 0.5626113106186681, "grad_norm": 0.638060102796005, "learning_rate": 9.720032441200325e-06, "loss": 0.6149, "step": 19270 }, { "epoch": 0.5626405068465155, "grad_norm": 0.7350013622943949, "learning_rate": 9.719383617193837e-06, "loss": 0.7355, "step": 19271 }, { "epoch": 0.5626697030743628, "grad_norm": 0.6629574510845556, "learning_rate": 9.71873479318735e-06, "loss": 0.6298, "step": 19272 }, { "epoch": 0.5626988993022102, "grad_norm": 0.6117770220206565, "learning_rate": 9.71808596918086e-06, "loss": 0.599, "step": 19273 }, { "epoch": 0.5627280955300575, "grad_norm": 0.6462158046669242, "learning_rate": 9.717437145174372e-06, "loss": 0.6303, "step": 19274 }, { "epoch": 0.5627572917579049, "grad_norm": 0.5798663939609797, "learning_rate": 9.716788321167884e-06, "loss": 0.5149, "step": 19275 }, { "epoch": 0.5627864879857523, "grad_norm": 0.6077369717476557, "learning_rate": 9.716139497161396e-06, "loss": 0.5671, "step": 19276 }, { "epoch": 0.5628156842135996, "grad_norm": 0.6847381678853621, "learning_rate": 9.715490673154908e-06, "loss": 0.6889, "step": 19277 }, { "epoch": 0.562844880441447, "grad_norm": 0.6503303320676002, "learning_rate": 9.71484184914842e-06, "loss": 0.617, "step": 19278 }, { "epoch": 0.5628740766692943, "grad_norm": 0.654093669592759, "learning_rate": 9.714193025141932e-06, "loss": 0.6162, "step": 19279 }, { "epoch": 0.5629032728971417, "grad_norm": 0.6345034202971614, "learning_rate": 9.713544201135442e-06, "loss": 0.5814, "step": 19280 }, { "epoch": 0.5629324691249891, "grad_norm": 0.619532088475331, "learning_rate": 9.712895377128954e-06, "loss": 0.592, "step": 19281 }, { "epoch": 0.5629616653528364, "grad_norm": 0.6517024817842739, "learning_rate": 9.712246553122466e-06, "loss": 0.655, "step": 19282 }, { "epoch": 0.5629908615806838, "grad_norm": 0.5945192252483403, "learning_rate": 9.711597729115978e-06, "loss": 0.5335, "step": 19283 }, { "epoch": 0.5630200578085311, "grad_norm": 0.6552561122742375, "learning_rate": 9.71094890510949e-06, "loss": 0.6146, "step": 19284 }, { "epoch": 0.5630492540363785, "grad_norm": 0.720600957090541, "learning_rate": 9.710300081103e-06, "loss": 0.6611, "step": 19285 }, { "epoch": 0.5630784502642259, "grad_norm": 0.6130060967414731, "learning_rate": 9.709651257096513e-06, "loss": 0.5737, "step": 19286 }, { "epoch": 0.5631076464920732, "grad_norm": 0.6725117973970587, "learning_rate": 9.709002433090026e-06, "loss": 0.6594, "step": 19287 }, { "epoch": 0.5631368427199206, "grad_norm": 0.6429344395448046, "learning_rate": 9.708353609083537e-06, "loss": 0.6083, "step": 19288 }, { "epoch": 0.563166038947768, "grad_norm": 0.6243096592628631, "learning_rate": 9.707704785077049e-06, "loss": 0.6321, "step": 19289 }, { "epoch": 0.5631952351756153, "grad_norm": 0.6090836181201855, "learning_rate": 9.70705596107056e-06, "loss": 0.5807, "step": 19290 }, { "epoch": 0.5632244314034627, "grad_norm": 0.6345067412686436, "learning_rate": 9.706407137064073e-06, "loss": 0.6364, "step": 19291 }, { "epoch": 0.56325362763131, "grad_norm": 0.6517858303641442, "learning_rate": 9.705758313057583e-06, "loss": 0.6136, "step": 19292 }, { "epoch": 0.5632828238591574, "grad_norm": 0.6076506314799964, "learning_rate": 9.705109489051095e-06, "loss": 0.5615, "step": 19293 }, { "epoch": 0.5633120200870048, "grad_norm": 0.6044816572338026, "learning_rate": 9.704460665044607e-06, "loss": 0.5914, "step": 19294 }, { "epoch": 0.5633412163148521, "grad_norm": 0.6128902940852541, "learning_rate": 9.70381184103812e-06, "loss": 0.5672, "step": 19295 }, { "epoch": 0.5633704125426995, "grad_norm": 0.6394889473900236, "learning_rate": 9.70316301703163e-06, "loss": 0.5735, "step": 19296 }, { "epoch": 0.5633996087705468, "grad_norm": 0.629921991792691, "learning_rate": 9.702514193025143e-06, "loss": 0.556, "step": 19297 }, { "epoch": 0.5634288049983942, "grad_norm": 0.6746921544380567, "learning_rate": 9.701865369018655e-06, "loss": 0.6669, "step": 19298 }, { "epoch": 0.5634580012262416, "grad_norm": 0.6624264156013604, "learning_rate": 9.701216545012166e-06, "loss": 0.6311, "step": 19299 }, { "epoch": 0.5634871974540889, "grad_norm": 0.6898187153920208, "learning_rate": 9.700567721005678e-06, "loss": 0.632, "step": 19300 }, { "epoch": 0.5635163936819363, "grad_norm": 0.5929831322296705, "learning_rate": 9.69991889699919e-06, "loss": 0.5382, "step": 19301 }, { "epoch": 0.5635455899097837, "grad_norm": 0.6442662881178121, "learning_rate": 9.699270072992702e-06, "loss": 0.6318, "step": 19302 }, { "epoch": 0.563574786137631, "grad_norm": 0.593033210567106, "learning_rate": 9.698621248986214e-06, "loss": 0.5308, "step": 19303 }, { "epoch": 0.5636039823654784, "grad_norm": 0.6458823693806216, "learning_rate": 9.697972424979724e-06, "loss": 0.6039, "step": 19304 }, { "epoch": 0.5636331785933257, "grad_norm": 0.6320308781992866, "learning_rate": 9.697323600973236e-06, "loss": 0.5849, "step": 19305 }, { "epoch": 0.5636623748211731, "grad_norm": 0.6000666743949723, "learning_rate": 9.69667477696675e-06, "loss": 0.5271, "step": 19306 }, { "epoch": 0.5636915710490205, "grad_norm": 0.6393437414806163, "learning_rate": 9.69602595296026e-06, "loss": 0.6308, "step": 19307 }, { "epoch": 0.5637207672768678, "grad_norm": 0.6613422391936935, "learning_rate": 9.695377128953772e-06, "loss": 0.6026, "step": 19308 }, { "epoch": 0.5637499635047152, "grad_norm": 0.6894163008155747, "learning_rate": 9.694728304947284e-06, "loss": 0.6999, "step": 19309 }, { "epoch": 0.5637791597325625, "grad_norm": 0.5992590754082718, "learning_rate": 9.694079480940796e-06, "loss": 0.5465, "step": 19310 }, { "epoch": 0.5638083559604099, "grad_norm": 0.608522942279835, "learning_rate": 9.693430656934307e-06, "loss": 0.5421, "step": 19311 }, { "epoch": 0.5638375521882573, "grad_norm": 0.6145668059382448, "learning_rate": 9.692781832927819e-06, "loss": 0.5982, "step": 19312 }, { "epoch": 0.5638667484161046, "grad_norm": 0.7109280358480802, "learning_rate": 9.69213300892133e-06, "loss": 0.7153, "step": 19313 }, { "epoch": 0.563895944643952, "grad_norm": 0.6842883406880904, "learning_rate": 9.691484184914843e-06, "loss": 0.6607, "step": 19314 }, { "epoch": 0.5639251408717993, "grad_norm": 0.6259826676738638, "learning_rate": 9.690835360908353e-06, "loss": 0.5933, "step": 19315 }, { "epoch": 0.5639543370996467, "grad_norm": 0.6653432117743348, "learning_rate": 9.690186536901867e-06, "loss": 0.6062, "step": 19316 }, { "epoch": 0.5639835333274941, "grad_norm": 0.7111073648048357, "learning_rate": 9.689537712895379e-06, "loss": 0.712, "step": 19317 }, { "epoch": 0.5640127295553414, "grad_norm": 0.5997844169385257, "learning_rate": 9.688888888888889e-06, "loss": 0.5822, "step": 19318 }, { "epoch": 0.5640419257831888, "grad_norm": 0.6424595790160917, "learning_rate": 9.688240064882401e-06, "loss": 0.6168, "step": 19319 }, { "epoch": 0.5640711220110362, "grad_norm": 0.5763249121307278, "learning_rate": 9.687591240875913e-06, "loss": 0.5225, "step": 19320 }, { "epoch": 0.5641003182388835, "grad_norm": 0.6196671407880285, "learning_rate": 9.686942416869425e-06, "loss": 0.6315, "step": 19321 }, { "epoch": 0.5641295144667309, "grad_norm": 0.6377559031440474, "learning_rate": 9.686293592862937e-06, "loss": 0.5794, "step": 19322 }, { "epoch": 0.5641587106945782, "grad_norm": 0.6576732596118993, "learning_rate": 9.685644768856448e-06, "loss": 0.6302, "step": 19323 }, { "epoch": 0.5641879069224256, "grad_norm": 0.6775256183902072, "learning_rate": 9.68499594484996e-06, "loss": 0.6542, "step": 19324 }, { "epoch": 0.564217103150273, "grad_norm": 0.6241656528148107, "learning_rate": 9.684347120843473e-06, "loss": 0.5769, "step": 19325 }, { "epoch": 0.5642462993781203, "grad_norm": 0.6126362833268523, "learning_rate": 9.683698296836984e-06, "loss": 0.5386, "step": 19326 }, { "epoch": 0.5642754956059677, "grad_norm": 0.6888063902294059, "learning_rate": 9.683049472830496e-06, "loss": 0.6592, "step": 19327 }, { "epoch": 0.564304691833815, "grad_norm": 0.6414289272572824, "learning_rate": 9.682400648824008e-06, "loss": 0.6259, "step": 19328 }, { "epoch": 0.5643338880616624, "grad_norm": 0.7242695667636713, "learning_rate": 9.68175182481752e-06, "loss": 0.7126, "step": 19329 }, { "epoch": 0.5643630842895098, "grad_norm": 0.6334191985238585, "learning_rate": 9.68110300081103e-06, "loss": 0.6101, "step": 19330 }, { "epoch": 0.5643922805173571, "grad_norm": 0.7067822625337404, "learning_rate": 9.680454176804542e-06, "loss": 0.6862, "step": 19331 }, { "epoch": 0.5644214767452045, "grad_norm": 0.6321742136611436, "learning_rate": 9.679805352798054e-06, "loss": 0.5655, "step": 19332 }, { "epoch": 0.5644506729730518, "grad_norm": 0.6436895356835538, "learning_rate": 9.679156528791566e-06, "loss": 0.5787, "step": 19333 }, { "epoch": 0.5644798692008992, "grad_norm": 0.6510506223427523, "learning_rate": 9.678507704785076e-06, "loss": 0.5795, "step": 19334 }, { "epoch": 0.5645090654287466, "grad_norm": 0.6391180382181524, "learning_rate": 9.67785888077859e-06, "loss": 0.6043, "step": 19335 }, { "epoch": 0.5645382616565939, "grad_norm": 0.6239860749298198, "learning_rate": 9.677210056772102e-06, "loss": 0.6031, "step": 19336 }, { "epoch": 0.5645674578844413, "grad_norm": 0.6711239052676473, "learning_rate": 9.676561232765613e-06, "loss": 0.6932, "step": 19337 }, { "epoch": 0.5645966541122887, "grad_norm": 0.7554206788820437, "learning_rate": 9.675912408759125e-06, "loss": 0.587, "step": 19338 }, { "epoch": 0.564625850340136, "grad_norm": 0.5832106084805265, "learning_rate": 9.675263584752637e-06, "loss": 0.5356, "step": 19339 }, { "epoch": 0.5646550465679834, "grad_norm": 0.5956618974468445, "learning_rate": 9.674614760746149e-06, "loss": 0.5106, "step": 19340 }, { "epoch": 0.5646842427958307, "grad_norm": 0.6226156726330044, "learning_rate": 9.67396593673966e-06, "loss": 0.6251, "step": 19341 }, { "epoch": 0.5647134390236781, "grad_norm": 0.6792672037436006, "learning_rate": 9.673317112733171e-06, "loss": 0.6577, "step": 19342 }, { "epoch": 0.5647426352515255, "grad_norm": 0.7119020283851778, "learning_rate": 9.672668288726683e-06, "loss": 0.6426, "step": 19343 }, { "epoch": 0.5647718314793728, "grad_norm": 0.6696678038616058, "learning_rate": 9.672019464720197e-06, "loss": 0.6803, "step": 19344 }, { "epoch": 0.5648010277072202, "grad_norm": 0.6964095000843193, "learning_rate": 9.671370640713707e-06, "loss": 0.6536, "step": 19345 }, { "epoch": 0.5648302239350675, "grad_norm": 0.6515397367497476, "learning_rate": 9.670721816707219e-06, "loss": 0.5924, "step": 19346 }, { "epoch": 0.5648594201629149, "grad_norm": 0.6785396418309294, "learning_rate": 9.670072992700731e-06, "loss": 0.667, "step": 19347 }, { "epoch": 0.5648886163907623, "grad_norm": 0.6566881460902328, "learning_rate": 9.669424168694243e-06, "loss": 0.6179, "step": 19348 }, { "epoch": 0.5649178126186096, "grad_norm": 0.6746025560734237, "learning_rate": 9.668775344687753e-06, "loss": 0.6496, "step": 19349 }, { "epoch": 0.564947008846457, "grad_norm": 0.6044540351105138, "learning_rate": 9.668126520681266e-06, "loss": 0.5672, "step": 19350 }, { "epoch": 0.5649762050743043, "grad_norm": 0.619897725835355, "learning_rate": 9.667477696674778e-06, "loss": 0.6086, "step": 19351 }, { "epoch": 0.5650054013021517, "grad_norm": 0.6810677112613454, "learning_rate": 9.66682887266829e-06, "loss": 0.5329, "step": 19352 }, { "epoch": 0.5650345975299991, "grad_norm": 0.6792888712946866, "learning_rate": 9.666180048661802e-06, "loss": 0.675, "step": 19353 }, { "epoch": 0.5650637937578464, "grad_norm": 0.6255224386573914, "learning_rate": 9.665531224655314e-06, "loss": 0.5899, "step": 19354 }, { "epoch": 0.5650929899856938, "grad_norm": 0.6661819684428354, "learning_rate": 9.664882400648826e-06, "loss": 0.6217, "step": 19355 }, { "epoch": 0.5651221862135412, "grad_norm": 0.6596771416478817, "learning_rate": 9.664233576642336e-06, "loss": 0.6256, "step": 19356 }, { "epoch": 0.5651513824413886, "grad_norm": 0.6519539773118255, "learning_rate": 9.663584752635848e-06, "loss": 0.6152, "step": 19357 }, { "epoch": 0.565180578669236, "grad_norm": 0.6939371488129394, "learning_rate": 9.66293592862936e-06, "loss": 0.6562, "step": 19358 }, { "epoch": 0.5652097748970834, "grad_norm": 0.6032994093695436, "learning_rate": 9.662287104622872e-06, "loss": 0.5458, "step": 19359 }, { "epoch": 0.5652389711249307, "grad_norm": 0.7051860364581121, "learning_rate": 9.661638280616384e-06, "loss": 0.612, "step": 19360 }, { "epoch": 0.5652681673527781, "grad_norm": 0.6093312820544343, "learning_rate": 9.660989456609894e-06, "loss": 0.589, "step": 19361 }, { "epoch": 0.5652973635806254, "grad_norm": 0.6521557908198872, "learning_rate": 9.660340632603406e-06, "loss": 0.6907, "step": 19362 }, { "epoch": 0.5653265598084728, "grad_norm": 0.693264295866189, "learning_rate": 9.65969180859692e-06, "loss": 0.651, "step": 19363 }, { "epoch": 0.5653557560363202, "grad_norm": 0.6202400082464019, "learning_rate": 9.65904298459043e-06, "loss": 0.5758, "step": 19364 }, { "epoch": 0.5653849522641675, "grad_norm": 0.6902931449820092, "learning_rate": 9.658394160583943e-06, "loss": 0.7316, "step": 19365 }, { "epoch": 0.5654141484920149, "grad_norm": 0.683905966307642, "learning_rate": 9.657745336577455e-06, "loss": 0.6327, "step": 19366 }, { "epoch": 0.5654433447198622, "grad_norm": 0.5945193983833126, "learning_rate": 9.657096512570967e-06, "loss": 0.5126, "step": 19367 }, { "epoch": 0.5654725409477096, "grad_norm": 0.6609088326294666, "learning_rate": 9.656447688564477e-06, "loss": 0.6001, "step": 19368 }, { "epoch": 0.565501737175557, "grad_norm": 0.6868233999009735, "learning_rate": 9.655798864557989e-06, "loss": 0.6673, "step": 19369 }, { "epoch": 0.5655309334034043, "grad_norm": 0.6446443626627835, "learning_rate": 9.655150040551501e-06, "loss": 0.5995, "step": 19370 }, { "epoch": 0.5655601296312517, "grad_norm": 0.6227804095842098, "learning_rate": 9.654501216545013e-06, "loss": 0.55, "step": 19371 }, { "epoch": 0.565589325859099, "grad_norm": 0.6640099200234801, "learning_rate": 9.653852392538525e-06, "loss": 0.6398, "step": 19372 }, { "epoch": 0.5656185220869464, "grad_norm": 0.6534508186542093, "learning_rate": 9.653203568532037e-06, "loss": 0.6402, "step": 19373 }, { "epoch": 0.5656477183147938, "grad_norm": 0.6703222300688533, "learning_rate": 9.652554744525549e-06, "loss": 0.6308, "step": 19374 }, { "epoch": 0.5656769145426411, "grad_norm": 0.6671998620366427, "learning_rate": 9.65190592051906e-06, "loss": 0.5836, "step": 19375 }, { "epoch": 0.5657061107704885, "grad_norm": 0.6367580008543616, "learning_rate": 9.651257096512571e-06, "loss": 0.5758, "step": 19376 }, { "epoch": 0.5657353069983359, "grad_norm": 0.6428469665970441, "learning_rate": 9.650608272506083e-06, "loss": 0.5901, "step": 19377 }, { "epoch": 0.5657645032261832, "grad_norm": 0.5804146482697033, "learning_rate": 9.649959448499595e-06, "loss": 0.5173, "step": 19378 }, { "epoch": 0.5657936994540306, "grad_norm": 0.6561250271152248, "learning_rate": 9.649310624493108e-06, "loss": 0.602, "step": 19379 }, { "epoch": 0.5658228956818779, "grad_norm": 0.7183411891778836, "learning_rate": 9.648661800486618e-06, "loss": 0.6656, "step": 19380 }, { "epoch": 0.5658520919097253, "grad_norm": 0.645255320174525, "learning_rate": 9.64801297648013e-06, "loss": 0.6308, "step": 19381 }, { "epoch": 0.5658812881375727, "grad_norm": 0.6292287516142689, "learning_rate": 9.647364152473644e-06, "loss": 0.6226, "step": 19382 }, { "epoch": 0.56591048436542, "grad_norm": 0.6343446516210489, "learning_rate": 9.646715328467154e-06, "loss": 0.584, "step": 19383 }, { "epoch": 0.5659396805932674, "grad_norm": 0.6442204899656739, "learning_rate": 9.646066504460666e-06, "loss": 0.5976, "step": 19384 }, { "epoch": 0.5659688768211147, "grad_norm": 0.6216297383139496, "learning_rate": 9.645417680454178e-06, "loss": 0.5829, "step": 19385 }, { "epoch": 0.5659980730489621, "grad_norm": 0.6537161196318593, "learning_rate": 9.64476885644769e-06, "loss": 0.5659, "step": 19386 }, { "epoch": 0.5660272692768095, "grad_norm": 0.6237451366080009, "learning_rate": 9.6441200324412e-06, "loss": 0.5627, "step": 19387 }, { "epoch": 0.5660564655046568, "grad_norm": 0.6299441362520432, "learning_rate": 9.643471208434712e-06, "loss": 0.6027, "step": 19388 }, { "epoch": 0.5660856617325042, "grad_norm": 0.6529806431397708, "learning_rate": 9.642822384428224e-06, "loss": 0.6133, "step": 19389 }, { "epoch": 0.5661148579603515, "grad_norm": 0.6697078693668228, "learning_rate": 9.642173560421736e-06, "loss": 0.5972, "step": 19390 }, { "epoch": 0.5661440541881989, "grad_norm": 0.6378213183730379, "learning_rate": 9.641524736415248e-06, "loss": 0.6284, "step": 19391 }, { "epoch": 0.5661732504160463, "grad_norm": 0.610789815729637, "learning_rate": 9.64087591240876e-06, "loss": 0.5557, "step": 19392 }, { "epoch": 0.5662024466438936, "grad_norm": 0.6331629523870338, "learning_rate": 9.640227088402273e-06, "loss": 0.581, "step": 19393 }, { "epoch": 0.566231642871741, "grad_norm": 0.5800482509157612, "learning_rate": 9.639578264395783e-06, "loss": 0.5222, "step": 19394 }, { "epoch": 0.5662608390995884, "grad_norm": 0.713514324119488, "learning_rate": 9.638929440389295e-06, "loss": 0.6337, "step": 19395 }, { "epoch": 0.5662900353274357, "grad_norm": 0.6004169735720737, "learning_rate": 9.638280616382807e-06, "loss": 0.5298, "step": 19396 }, { "epoch": 0.5663192315552831, "grad_norm": 0.6141773837274103, "learning_rate": 9.637631792376319e-06, "loss": 0.5561, "step": 19397 }, { "epoch": 0.5663484277831304, "grad_norm": 0.7022691759688022, "learning_rate": 9.636982968369831e-06, "loss": 0.708, "step": 19398 }, { "epoch": 0.5663776240109778, "grad_norm": 0.69438229929906, "learning_rate": 9.636334144363341e-06, "loss": 0.6839, "step": 19399 }, { "epoch": 0.5664068202388252, "grad_norm": 0.6891345335420922, "learning_rate": 9.635685320356853e-06, "loss": 0.6748, "step": 19400 }, { "epoch": 0.5664360164666725, "grad_norm": 0.6578241432523018, "learning_rate": 9.635036496350367e-06, "loss": 0.6574, "step": 19401 }, { "epoch": 0.5664652126945199, "grad_norm": 0.6741945730973303, "learning_rate": 9.634387672343877e-06, "loss": 0.6423, "step": 19402 }, { "epoch": 0.5664944089223672, "grad_norm": 0.6325837738620655, "learning_rate": 9.63373884833739e-06, "loss": 0.595, "step": 19403 }, { "epoch": 0.5665236051502146, "grad_norm": 0.6523934052193175, "learning_rate": 9.633090024330901e-06, "loss": 0.6012, "step": 19404 }, { "epoch": 0.566552801378062, "grad_norm": 0.6495915871723686, "learning_rate": 9.632441200324413e-06, "loss": 0.6015, "step": 19405 }, { "epoch": 0.5665819976059093, "grad_norm": 0.6316682213402526, "learning_rate": 9.631792376317924e-06, "loss": 0.5863, "step": 19406 }, { "epoch": 0.5666111938337567, "grad_norm": 0.632647170251276, "learning_rate": 9.631143552311436e-06, "loss": 0.6001, "step": 19407 }, { "epoch": 0.566640390061604, "grad_norm": 0.6408503383925538, "learning_rate": 9.630494728304948e-06, "loss": 0.5694, "step": 19408 }, { "epoch": 0.5666695862894514, "grad_norm": 0.633768385361475, "learning_rate": 9.62984590429846e-06, "loss": 0.616, "step": 19409 }, { "epoch": 0.5666987825172988, "grad_norm": 0.6585053646020633, "learning_rate": 9.629197080291972e-06, "loss": 0.6649, "step": 19410 }, { "epoch": 0.5667279787451461, "grad_norm": 0.7601889604717091, "learning_rate": 9.628548256285484e-06, "loss": 0.6581, "step": 19411 }, { "epoch": 0.5667571749729935, "grad_norm": 0.6287125246759993, "learning_rate": 9.627899432278996e-06, "loss": 0.5848, "step": 19412 }, { "epoch": 0.5667863712008409, "grad_norm": 0.6931367320353461, "learning_rate": 9.627250608272506e-06, "loss": 0.6277, "step": 19413 }, { "epoch": 0.5668155674286882, "grad_norm": 0.6418778313032052, "learning_rate": 9.626601784266018e-06, "loss": 0.5861, "step": 19414 }, { "epoch": 0.5668447636565356, "grad_norm": 0.6143304962456855, "learning_rate": 9.62595296025953e-06, "loss": 0.5302, "step": 19415 }, { "epoch": 0.5668739598843829, "grad_norm": 0.6316028712095286, "learning_rate": 9.625304136253042e-06, "loss": 0.614, "step": 19416 }, { "epoch": 0.5669031561122303, "grad_norm": 0.6552133118767947, "learning_rate": 9.624655312246554e-06, "loss": 0.6326, "step": 19417 }, { "epoch": 0.5669323523400777, "grad_norm": 0.6284270166123721, "learning_rate": 9.624006488240065e-06, "loss": 0.5221, "step": 19418 }, { "epoch": 0.566961548567925, "grad_norm": 0.7032017244920353, "learning_rate": 9.623357664233578e-06, "loss": 0.6531, "step": 19419 }, { "epoch": 0.5669907447957724, "grad_norm": 0.6160102089193701, "learning_rate": 9.622708840227089e-06, "loss": 0.5603, "step": 19420 }, { "epoch": 0.5670199410236197, "grad_norm": 0.6514727016963755, "learning_rate": 9.6220600162206e-06, "loss": 0.6324, "step": 19421 }, { "epoch": 0.5670491372514671, "grad_norm": 0.7305230599299264, "learning_rate": 9.621411192214113e-06, "loss": 0.7069, "step": 19422 }, { "epoch": 0.5670783334793145, "grad_norm": 0.6161197429888058, "learning_rate": 9.620762368207625e-06, "loss": 0.5796, "step": 19423 }, { "epoch": 0.5671075297071618, "grad_norm": 0.6257258465321467, "learning_rate": 9.620113544201137e-06, "loss": 0.5611, "step": 19424 }, { "epoch": 0.5671367259350092, "grad_norm": 0.6533016670746167, "learning_rate": 9.619464720194647e-06, "loss": 0.6152, "step": 19425 }, { "epoch": 0.5671659221628566, "grad_norm": 0.6773458197411013, "learning_rate": 9.61881589618816e-06, "loss": 0.6523, "step": 19426 }, { "epoch": 0.5671951183907039, "grad_norm": 0.5905500915034589, "learning_rate": 9.618167072181671e-06, "loss": 0.5298, "step": 19427 }, { "epoch": 0.5672243146185513, "grad_norm": 0.656887250031936, "learning_rate": 9.617518248175183e-06, "loss": 0.5923, "step": 19428 }, { "epoch": 0.5672535108463986, "grad_norm": 0.6233953877356617, "learning_rate": 9.616869424168695e-06, "loss": 0.5848, "step": 19429 }, { "epoch": 0.567282707074246, "grad_norm": 0.6450256268136815, "learning_rate": 9.616220600162207e-06, "loss": 0.635, "step": 19430 }, { "epoch": 0.5673119033020934, "grad_norm": 0.6835343781375878, "learning_rate": 9.61557177615572e-06, "loss": 0.5815, "step": 19431 }, { "epoch": 0.5673410995299407, "grad_norm": 0.664784787780908, "learning_rate": 9.61492295214923e-06, "loss": 0.6476, "step": 19432 }, { "epoch": 0.5673702957577881, "grad_norm": 0.6928556150165339, "learning_rate": 9.614274128142742e-06, "loss": 0.6386, "step": 19433 }, { "epoch": 0.5673994919856354, "grad_norm": 0.6468112804447699, "learning_rate": 9.613625304136254e-06, "loss": 0.6624, "step": 19434 }, { "epoch": 0.5674286882134828, "grad_norm": 0.6039613206327664, "learning_rate": 9.612976480129766e-06, "loss": 0.5281, "step": 19435 }, { "epoch": 0.5674578844413302, "grad_norm": 0.6265704118847407, "learning_rate": 9.612327656123278e-06, "loss": 0.5428, "step": 19436 }, { "epoch": 0.5674870806691775, "grad_norm": 0.6566189150674779, "learning_rate": 9.611678832116788e-06, "loss": 0.5908, "step": 19437 }, { "epoch": 0.5675162768970249, "grad_norm": 0.6580874177785033, "learning_rate": 9.611030008110302e-06, "loss": 0.6021, "step": 19438 }, { "epoch": 0.5675454731248722, "grad_norm": 0.6427780056944746, "learning_rate": 9.610381184103812e-06, "loss": 0.5685, "step": 19439 }, { "epoch": 0.5675746693527196, "grad_norm": 0.6635263953352515, "learning_rate": 9.609732360097324e-06, "loss": 0.6565, "step": 19440 }, { "epoch": 0.567603865580567, "grad_norm": 0.6548632425220738, "learning_rate": 9.609083536090836e-06, "loss": 0.6397, "step": 19441 }, { "epoch": 0.5676330618084143, "grad_norm": 0.5982503237324549, "learning_rate": 9.608434712084348e-06, "loss": 0.5393, "step": 19442 }, { "epoch": 0.5676622580362617, "grad_norm": 0.6536519918860969, "learning_rate": 9.60778588807786e-06, "loss": 0.6299, "step": 19443 }, { "epoch": 0.567691454264109, "grad_norm": 0.6359192288578256, "learning_rate": 9.60713706407137e-06, "loss": 0.6437, "step": 19444 }, { "epoch": 0.5677206504919564, "grad_norm": 0.6376551549175661, "learning_rate": 9.606488240064883e-06, "loss": 0.6194, "step": 19445 }, { "epoch": 0.5677498467198038, "grad_norm": 0.6385084990994861, "learning_rate": 9.605839416058395e-06, "loss": 0.5994, "step": 19446 }, { "epoch": 0.5677790429476511, "grad_norm": 0.6424307452036682, "learning_rate": 9.605190592051907e-06, "loss": 0.6321, "step": 19447 }, { "epoch": 0.5678082391754985, "grad_norm": 0.6189515560147772, "learning_rate": 9.604541768045419e-06, "loss": 0.5706, "step": 19448 }, { "epoch": 0.5678374354033459, "grad_norm": 0.6544829397755055, "learning_rate": 9.60389294403893e-06, "loss": 0.6142, "step": 19449 }, { "epoch": 0.5678666316311932, "grad_norm": 0.642143704746542, "learning_rate": 9.603244120032443e-06, "loss": 0.5741, "step": 19450 }, { "epoch": 0.5678958278590406, "grad_norm": 0.6590180531745875, "learning_rate": 9.602595296025953e-06, "loss": 0.6395, "step": 19451 }, { "epoch": 0.5679250240868879, "grad_norm": 0.597643629766204, "learning_rate": 9.601946472019465e-06, "loss": 0.5695, "step": 19452 }, { "epoch": 0.5679542203147353, "grad_norm": 0.5949336297553862, "learning_rate": 9.601297648012977e-06, "loss": 0.561, "step": 19453 }, { "epoch": 0.5679834165425827, "grad_norm": 0.6172846586545694, "learning_rate": 9.60064882400649e-06, "loss": 0.5384, "step": 19454 }, { "epoch": 0.56801261277043, "grad_norm": 0.6366712763264188, "learning_rate": 9.600000000000001e-06, "loss": 0.6174, "step": 19455 }, { "epoch": 0.5680418089982774, "grad_norm": 0.5984369687931723, "learning_rate": 9.599351175993512e-06, "loss": 0.5749, "step": 19456 }, { "epoch": 0.5680710052261247, "grad_norm": 0.6261754295708925, "learning_rate": 9.598702351987025e-06, "loss": 0.5529, "step": 19457 }, { "epoch": 0.5681002014539721, "grad_norm": 0.6556926127837951, "learning_rate": 9.598053527980536e-06, "loss": 0.6482, "step": 19458 }, { "epoch": 0.5681293976818195, "grad_norm": 0.6340062383194661, "learning_rate": 9.597404703974048e-06, "loss": 0.6137, "step": 19459 }, { "epoch": 0.5681585939096668, "grad_norm": 0.6698692204655314, "learning_rate": 9.59675587996756e-06, "loss": 0.683, "step": 19460 }, { "epoch": 0.5681877901375142, "grad_norm": 0.59854230790273, "learning_rate": 9.596107055961072e-06, "loss": 0.5015, "step": 19461 }, { "epoch": 0.5682169863653616, "grad_norm": 0.6562318627334371, "learning_rate": 9.595458231954584e-06, "loss": 0.6526, "step": 19462 }, { "epoch": 0.5682461825932089, "grad_norm": 0.6517268488369448, "learning_rate": 9.594809407948094e-06, "loss": 0.6184, "step": 19463 }, { "epoch": 0.5682753788210563, "grad_norm": 0.7005183072177117, "learning_rate": 9.594160583941606e-06, "loss": 0.616, "step": 19464 }, { "epoch": 0.5683045750489036, "grad_norm": 0.6433264319655625, "learning_rate": 9.593511759935118e-06, "loss": 0.6123, "step": 19465 }, { "epoch": 0.568333771276751, "grad_norm": 0.6571419113624425, "learning_rate": 9.59286293592863e-06, "loss": 0.6617, "step": 19466 }, { "epoch": 0.5683629675045984, "grad_norm": 0.6270644743962712, "learning_rate": 9.592214111922142e-06, "loss": 0.5991, "step": 19467 }, { "epoch": 0.5683921637324457, "grad_norm": 0.6337159359460955, "learning_rate": 9.591565287915654e-06, "loss": 0.6319, "step": 19468 }, { "epoch": 0.5684213599602931, "grad_norm": 0.6474251940868947, "learning_rate": 9.590916463909166e-06, "loss": 0.5767, "step": 19469 }, { "epoch": 0.5684505561881404, "grad_norm": 0.6635935572343222, "learning_rate": 9.590267639902677e-06, "loss": 0.6395, "step": 19470 }, { "epoch": 0.5684797524159878, "grad_norm": 0.5955381526341178, "learning_rate": 9.589618815896189e-06, "loss": 0.5641, "step": 19471 }, { "epoch": 0.5685089486438352, "grad_norm": 0.6367433457642915, "learning_rate": 9.5889699918897e-06, "loss": 0.6092, "step": 19472 }, { "epoch": 0.5685381448716825, "grad_norm": 0.6953148622985217, "learning_rate": 9.588321167883213e-06, "loss": 0.6727, "step": 19473 }, { "epoch": 0.5685673410995299, "grad_norm": 0.6554937072533309, "learning_rate": 9.587672343876725e-06, "loss": 0.6376, "step": 19474 }, { "epoch": 0.5685965373273772, "grad_norm": 0.5847288264459437, "learning_rate": 9.587023519870235e-06, "loss": 0.5714, "step": 19475 }, { "epoch": 0.5686257335552246, "grad_norm": 0.6008792648363722, "learning_rate": 9.586374695863749e-06, "loss": 0.5392, "step": 19476 }, { "epoch": 0.568654929783072, "grad_norm": 0.6200983970637443, "learning_rate": 9.585725871857259e-06, "loss": 0.5912, "step": 19477 }, { "epoch": 0.5686841260109194, "grad_norm": 0.6477373852784845, "learning_rate": 9.585077047850771e-06, "loss": 0.678, "step": 19478 }, { "epoch": 0.5687133222387668, "grad_norm": 0.6386354384051844, "learning_rate": 9.584428223844283e-06, "loss": 0.6137, "step": 19479 }, { "epoch": 0.5687425184666142, "grad_norm": 0.7037996697712235, "learning_rate": 9.583779399837795e-06, "loss": 0.6602, "step": 19480 }, { "epoch": 0.5687717146944615, "grad_norm": 0.6257666018938328, "learning_rate": 9.583130575831307e-06, "loss": 0.5682, "step": 19481 }, { "epoch": 0.5688009109223089, "grad_norm": 0.6422942721490258, "learning_rate": 9.582481751824818e-06, "loss": 0.6062, "step": 19482 }, { "epoch": 0.5688301071501563, "grad_norm": 0.6542997698539437, "learning_rate": 9.58183292781833e-06, "loss": 0.6363, "step": 19483 }, { "epoch": 0.5688593033780036, "grad_norm": 0.6393568062091963, "learning_rate": 9.581184103811842e-06, "loss": 0.6286, "step": 19484 }, { "epoch": 0.568888499605851, "grad_norm": 0.6542811006750604, "learning_rate": 9.580535279805354e-06, "loss": 0.6386, "step": 19485 }, { "epoch": 0.5689176958336983, "grad_norm": 0.653397428311538, "learning_rate": 9.579886455798866e-06, "loss": 0.6503, "step": 19486 }, { "epoch": 0.5689468920615457, "grad_norm": 0.6699999138072248, "learning_rate": 9.579237631792378e-06, "loss": 0.6529, "step": 19487 }, { "epoch": 0.5689760882893931, "grad_norm": 0.639173145721296, "learning_rate": 9.57858880778589e-06, "loss": 0.5897, "step": 19488 }, { "epoch": 0.5690052845172404, "grad_norm": 0.6422379353843723, "learning_rate": 9.5779399837794e-06, "loss": 0.6312, "step": 19489 }, { "epoch": 0.5690344807450878, "grad_norm": 0.6647491504761519, "learning_rate": 9.577291159772912e-06, "loss": 0.6282, "step": 19490 }, { "epoch": 0.5690636769729351, "grad_norm": 0.7151206224529749, "learning_rate": 9.576642335766424e-06, "loss": 0.7069, "step": 19491 }, { "epoch": 0.5690928732007825, "grad_norm": 0.6760337039361536, "learning_rate": 9.575993511759936e-06, "loss": 0.6161, "step": 19492 }, { "epoch": 0.5691220694286299, "grad_norm": 0.6336649852241166, "learning_rate": 9.575344687753446e-06, "loss": 0.6333, "step": 19493 }, { "epoch": 0.5691512656564772, "grad_norm": 0.6510708934048731, "learning_rate": 9.574695863746958e-06, "loss": 0.6338, "step": 19494 }, { "epoch": 0.5691804618843246, "grad_norm": 0.7095614012420053, "learning_rate": 9.574047039740472e-06, "loss": 0.631, "step": 19495 }, { "epoch": 0.569209658112172, "grad_norm": 0.6841786446400772, "learning_rate": 9.573398215733983e-06, "loss": 0.6527, "step": 19496 }, { "epoch": 0.5692388543400193, "grad_norm": 0.6299627549477957, "learning_rate": 9.572749391727495e-06, "loss": 0.599, "step": 19497 }, { "epoch": 0.5692680505678667, "grad_norm": 0.6677003272600631, "learning_rate": 9.572100567721007e-06, "loss": 0.617, "step": 19498 }, { "epoch": 0.569297246795714, "grad_norm": 0.605537798845167, "learning_rate": 9.571451743714519e-06, "loss": 0.5644, "step": 19499 }, { "epoch": 0.5693264430235614, "grad_norm": 0.5926467895740103, "learning_rate": 9.57080291970803e-06, "loss": 0.5525, "step": 19500 }, { "epoch": 0.5693556392514088, "grad_norm": 0.7134667984691349, "learning_rate": 9.570154095701541e-06, "loss": 0.6689, "step": 19501 }, { "epoch": 0.5693848354792561, "grad_norm": 0.6894190697645431, "learning_rate": 9.569505271695053e-06, "loss": 0.6492, "step": 19502 }, { "epoch": 0.5694140317071035, "grad_norm": 0.6617137038682922, "learning_rate": 9.568856447688565e-06, "loss": 0.6396, "step": 19503 }, { "epoch": 0.5694432279349508, "grad_norm": 0.632000121484624, "learning_rate": 9.568207623682077e-06, "loss": 0.6188, "step": 19504 }, { "epoch": 0.5694724241627982, "grad_norm": 0.6277276662697965, "learning_rate": 9.567558799675589e-06, "loss": 0.5544, "step": 19505 }, { "epoch": 0.5695016203906456, "grad_norm": 0.8583991314204444, "learning_rate": 9.566909975669101e-06, "loss": 0.5825, "step": 19506 }, { "epoch": 0.5695308166184929, "grad_norm": 0.595974766561634, "learning_rate": 9.566261151662613e-06, "loss": 0.558, "step": 19507 }, { "epoch": 0.5695600128463403, "grad_norm": 0.6469507905166691, "learning_rate": 9.565612327656123e-06, "loss": 0.6423, "step": 19508 }, { "epoch": 0.5695892090741876, "grad_norm": 0.6474368886853976, "learning_rate": 9.564963503649635e-06, "loss": 0.5621, "step": 19509 }, { "epoch": 0.569618405302035, "grad_norm": 0.6396718155277388, "learning_rate": 9.564314679643148e-06, "loss": 0.6232, "step": 19510 }, { "epoch": 0.5696476015298824, "grad_norm": 0.6336228107449029, "learning_rate": 9.56366585563666e-06, "loss": 0.6039, "step": 19511 }, { "epoch": 0.5696767977577297, "grad_norm": 0.6434059221889298, "learning_rate": 9.56301703163017e-06, "loss": 0.6244, "step": 19512 }, { "epoch": 0.5697059939855771, "grad_norm": 0.5792439078214375, "learning_rate": 9.562368207623682e-06, "loss": 0.5576, "step": 19513 }, { "epoch": 0.5697351902134244, "grad_norm": 0.5702928328022779, "learning_rate": 9.561719383617196e-06, "loss": 0.5047, "step": 19514 }, { "epoch": 0.5697643864412718, "grad_norm": 0.6626912096750399, "learning_rate": 9.561070559610706e-06, "loss": 0.6166, "step": 19515 }, { "epoch": 0.5697935826691192, "grad_norm": 0.6024094306022104, "learning_rate": 9.560421735604218e-06, "loss": 0.5686, "step": 19516 }, { "epoch": 0.5698227788969665, "grad_norm": 0.6177595582049633, "learning_rate": 9.55977291159773e-06, "loss": 0.5922, "step": 19517 }, { "epoch": 0.5698519751248139, "grad_norm": 0.6694913160695443, "learning_rate": 9.559124087591242e-06, "loss": 0.6419, "step": 19518 }, { "epoch": 0.5698811713526613, "grad_norm": 0.6308030921457699, "learning_rate": 9.558475263584754e-06, "loss": 0.6091, "step": 19519 }, { "epoch": 0.5699103675805086, "grad_norm": 0.6680978296652937, "learning_rate": 9.557826439578264e-06, "loss": 0.6437, "step": 19520 }, { "epoch": 0.569939563808356, "grad_norm": 0.6824418964825115, "learning_rate": 9.557177615571776e-06, "loss": 0.6068, "step": 19521 }, { "epoch": 0.5699687600362033, "grad_norm": 0.6842424534221606, "learning_rate": 9.556528791565288e-06, "loss": 0.6768, "step": 19522 }, { "epoch": 0.5699979562640507, "grad_norm": 0.6362734070157303, "learning_rate": 9.5558799675588e-06, "loss": 0.593, "step": 19523 }, { "epoch": 0.5700271524918981, "grad_norm": 0.6521074242026277, "learning_rate": 9.555231143552313e-06, "loss": 0.5968, "step": 19524 }, { "epoch": 0.5700563487197454, "grad_norm": 0.6833756097783995, "learning_rate": 9.554582319545825e-06, "loss": 0.672, "step": 19525 }, { "epoch": 0.5700855449475928, "grad_norm": 0.6279758207543832, "learning_rate": 9.553933495539337e-06, "loss": 0.6159, "step": 19526 }, { "epoch": 0.5701147411754401, "grad_norm": 0.659822021645464, "learning_rate": 9.553284671532847e-06, "loss": 0.6252, "step": 19527 }, { "epoch": 0.5701439374032875, "grad_norm": 0.6992796372958158, "learning_rate": 9.552635847526359e-06, "loss": 0.6449, "step": 19528 }, { "epoch": 0.5701731336311349, "grad_norm": 0.6211493613042475, "learning_rate": 9.551987023519871e-06, "loss": 0.5883, "step": 19529 }, { "epoch": 0.5702023298589822, "grad_norm": 0.7262989386663418, "learning_rate": 9.551338199513383e-06, "loss": 0.6842, "step": 19530 }, { "epoch": 0.5702315260868296, "grad_norm": 0.7068234376732543, "learning_rate": 9.550689375506893e-06, "loss": 0.7006, "step": 19531 }, { "epoch": 0.570260722314677, "grad_norm": 0.6149724191184379, "learning_rate": 9.550040551500405e-06, "loss": 0.5741, "step": 19532 }, { "epoch": 0.5702899185425243, "grad_norm": 0.6515462790109543, "learning_rate": 9.549391727493919e-06, "loss": 0.6213, "step": 19533 }, { "epoch": 0.5703191147703717, "grad_norm": 0.6982303987209626, "learning_rate": 9.54874290348743e-06, "loss": 0.6766, "step": 19534 }, { "epoch": 0.570348310998219, "grad_norm": 0.6732678788270419, "learning_rate": 9.548094079480941e-06, "loss": 0.7092, "step": 19535 }, { "epoch": 0.5703775072260664, "grad_norm": 0.5990181659649184, "learning_rate": 9.547445255474453e-06, "loss": 0.5452, "step": 19536 }, { "epoch": 0.5704067034539138, "grad_norm": 0.6454685434562287, "learning_rate": 9.546796431467965e-06, "loss": 0.6149, "step": 19537 }, { "epoch": 0.5704358996817611, "grad_norm": 0.6000987250921435, "learning_rate": 9.546147607461477e-06, "loss": 0.5221, "step": 19538 }, { "epoch": 0.5704650959096085, "grad_norm": 0.6449459833729934, "learning_rate": 9.545498783454988e-06, "loss": 0.6154, "step": 19539 }, { "epoch": 0.5704942921374558, "grad_norm": 0.6193960882116359, "learning_rate": 9.5448499594485e-06, "loss": 0.5382, "step": 19540 }, { "epoch": 0.5705234883653032, "grad_norm": 0.686486588034264, "learning_rate": 9.544201135442012e-06, "loss": 0.7008, "step": 19541 }, { "epoch": 0.5705526845931506, "grad_norm": 0.6381737773939229, "learning_rate": 9.543552311435524e-06, "loss": 0.5992, "step": 19542 }, { "epoch": 0.5705818808209979, "grad_norm": 0.7106616419454597, "learning_rate": 9.542903487429036e-06, "loss": 0.6796, "step": 19543 }, { "epoch": 0.5706110770488453, "grad_norm": 0.6657870562561241, "learning_rate": 9.542254663422548e-06, "loss": 0.6274, "step": 19544 }, { "epoch": 0.5706402732766926, "grad_norm": 0.6214454642610013, "learning_rate": 9.54160583941606e-06, "loss": 0.5922, "step": 19545 }, { "epoch": 0.57066946950454, "grad_norm": 0.6174829903372486, "learning_rate": 9.54095701540957e-06, "loss": 0.5606, "step": 19546 }, { "epoch": 0.5706986657323874, "grad_norm": 0.5860567428178555, "learning_rate": 9.540308191403082e-06, "loss": 0.539, "step": 19547 }, { "epoch": 0.5707278619602347, "grad_norm": 0.6711435770050652, "learning_rate": 9.539659367396594e-06, "loss": 0.6667, "step": 19548 }, { "epoch": 0.5707570581880821, "grad_norm": 0.6134078873058134, "learning_rate": 9.539010543390106e-06, "loss": 0.5615, "step": 19549 }, { "epoch": 0.5707862544159295, "grad_norm": 0.6328693200387185, "learning_rate": 9.538361719383617e-06, "loss": 0.5874, "step": 19550 }, { "epoch": 0.5708154506437768, "grad_norm": 0.6666264238160314, "learning_rate": 9.537712895377129e-06, "loss": 0.6539, "step": 19551 }, { "epoch": 0.5708446468716242, "grad_norm": 0.6118372103838127, "learning_rate": 9.537064071370642e-06, "loss": 0.5601, "step": 19552 }, { "epoch": 0.5708738430994715, "grad_norm": 0.6900607886202753, "learning_rate": 9.536415247364153e-06, "loss": 0.6583, "step": 19553 }, { "epoch": 0.5709030393273189, "grad_norm": 0.6573126096492522, "learning_rate": 9.535766423357665e-06, "loss": 0.5853, "step": 19554 }, { "epoch": 0.5709322355551663, "grad_norm": 0.6697804825092694, "learning_rate": 9.535117599351177e-06, "loss": 0.6298, "step": 19555 }, { "epoch": 0.5709614317830136, "grad_norm": 0.6744315767919998, "learning_rate": 9.534468775344689e-06, "loss": 0.6597, "step": 19556 }, { "epoch": 0.570990628010861, "grad_norm": 0.6288193747836134, "learning_rate": 9.533819951338201e-06, "loss": 0.5703, "step": 19557 }, { "epoch": 0.5710198242387083, "grad_norm": 0.6502388838704111, "learning_rate": 9.533171127331711e-06, "loss": 0.6158, "step": 19558 }, { "epoch": 0.5710490204665557, "grad_norm": 0.6635673951321021, "learning_rate": 9.532522303325223e-06, "loss": 0.6544, "step": 19559 }, { "epoch": 0.5710782166944031, "grad_norm": 0.6302644037458021, "learning_rate": 9.531873479318735e-06, "loss": 0.6054, "step": 19560 }, { "epoch": 0.5711074129222504, "grad_norm": 0.6599581375859032, "learning_rate": 9.531224655312247e-06, "loss": 0.5968, "step": 19561 }, { "epoch": 0.5711366091500978, "grad_norm": 0.6531757640006831, "learning_rate": 9.53057583130576e-06, "loss": 0.6439, "step": 19562 }, { "epoch": 0.5711658053779451, "grad_norm": 0.6208148193242641, "learning_rate": 9.529927007299271e-06, "loss": 0.5529, "step": 19563 }, { "epoch": 0.5711950016057925, "grad_norm": 0.625218879702242, "learning_rate": 9.529278183292783e-06, "loss": 0.6254, "step": 19564 }, { "epoch": 0.5712241978336399, "grad_norm": 0.6249375200739679, "learning_rate": 9.528629359286294e-06, "loss": 0.6144, "step": 19565 }, { "epoch": 0.5712533940614872, "grad_norm": 0.666584111445215, "learning_rate": 9.527980535279806e-06, "loss": 0.6641, "step": 19566 }, { "epoch": 0.5712825902893346, "grad_norm": 0.6633233598396525, "learning_rate": 9.527331711273318e-06, "loss": 0.682, "step": 19567 }, { "epoch": 0.571311786517182, "grad_norm": 0.6378073489763184, "learning_rate": 9.52668288726683e-06, "loss": 0.6373, "step": 19568 }, { "epoch": 0.5713409827450293, "grad_norm": 0.6471889346517662, "learning_rate": 9.52603406326034e-06, "loss": 0.6296, "step": 19569 }, { "epoch": 0.5713701789728767, "grad_norm": 0.6101685334628902, "learning_rate": 9.525385239253854e-06, "loss": 0.5824, "step": 19570 }, { "epoch": 0.571399375200724, "grad_norm": 0.6696257605015662, "learning_rate": 9.524736415247366e-06, "loss": 0.6591, "step": 19571 }, { "epoch": 0.5714285714285714, "grad_norm": 0.6393463462203071, "learning_rate": 9.524087591240876e-06, "loss": 0.6311, "step": 19572 }, { "epoch": 0.5714577676564188, "grad_norm": 0.6453735069716552, "learning_rate": 9.523438767234388e-06, "loss": 0.6444, "step": 19573 }, { "epoch": 0.5714869638842661, "grad_norm": 0.6326158828760933, "learning_rate": 9.5227899432279e-06, "loss": 0.6003, "step": 19574 }, { "epoch": 0.5715161601121135, "grad_norm": 0.6188117992511226, "learning_rate": 9.522141119221412e-06, "loss": 0.5267, "step": 19575 }, { "epoch": 0.5715453563399608, "grad_norm": 0.6678464314944195, "learning_rate": 9.521492295214924e-06, "loss": 0.6409, "step": 19576 }, { "epoch": 0.5715745525678082, "grad_norm": 0.6759429464048461, "learning_rate": 9.520843471208435e-06, "loss": 0.722, "step": 19577 }, { "epoch": 0.5716037487956556, "grad_norm": 0.6315951527873127, "learning_rate": 9.520194647201947e-06, "loss": 0.6053, "step": 19578 }, { "epoch": 0.5716329450235029, "grad_norm": 0.6604901796142728, "learning_rate": 9.519545823195459e-06, "loss": 0.6195, "step": 19579 }, { "epoch": 0.5716621412513503, "grad_norm": 0.6068949305315958, "learning_rate": 9.51889699918897e-06, "loss": 0.5814, "step": 19580 }, { "epoch": 0.5716913374791976, "grad_norm": 0.6623307816935312, "learning_rate": 9.518248175182483e-06, "loss": 0.6018, "step": 19581 }, { "epoch": 0.571720533707045, "grad_norm": 0.617997788962397, "learning_rate": 9.517599351175995e-06, "loss": 0.5703, "step": 19582 }, { "epoch": 0.5717497299348924, "grad_norm": 0.68332925240518, "learning_rate": 9.516950527169507e-06, "loss": 0.6418, "step": 19583 }, { "epoch": 0.5717789261627397, "grad_norm": 0.64959594460727, "learning_rate": 9.516301703163017e-06, "loss": 0.5921, "step": 19584 }, { "epoch": 0.5718081223905871, "grad_norm": 0.6361631602735911, "learning_rate": 9.51565287915653e-06, "loss": 0.5667, "step": 19585 }, { "epoch": 0.5718373186184345, "grad_norm": 0.6649216019691601, "learning_rate": 9.515004055150041e-06, "loss": 0.6945, "step": 19586 }, { "epoch": 0.5718665148462818, "grad_norm": 0.6673519598917476, "learning_rate": 9.514355231143553e-06, "loss": 0.6292, "step": 19587 }, { "epoch": 0.5718957110741292, "grad_norm": 0.6156391897575206, "learning_rate": 9.513706407137064e-06, "loss": 0.6071, "step": 19588 }, { "epoch": 0.5719249073019765, "grad_norm": 0.6311327764351793, "learning_rate": 9.513057583130577e-06, "loss": 0.6052, "step": 19589 }, { "epoch": 0.5719541035298239, "grad_norm": 0.6379977196667698, "learning_rate": 9.51240875912409e-06, "loss": 0.6259, "step": 19590 }, { "epoch": 0.5719832997576713, "grad_norm": 0.6924720425835041, "learning_rate": 9.5117599351176e-06, "loss": 0.7245, "step": 19591 }, { "epoch": 0.5720124959855186, "grad_norm": 0.6720215070693116, "learning_rate": 9.511111111111112e-06, "loss": 0.673, "step": 19592 }, { "epoch": 0.572041692213366, "grad_norm": 0.6832965782744584, "learning_rate": 9.510462287104624e-06, "loss": 0.6917, "step": 19593 }, { "epoch": 0.5720708884412133, "grad_norm": 0.6668902719117005, "learning_rate": 9.509813463098136e-06, "loss": 0.6612, "step": 19594 }, { "epoch": 0.5721000846690607, "grad_norm": 0.6108716436342128, "learning_rate": 9.509164639091648e-06, "loss": 0.5794, "step": 19595 }, { "epoch": 0.5721292808969081, "grad_norm": 0.6686763403207875, "learning_rate": 9.508515815085158e-06, "loss": 0.6398, "step": 19596 }, { "epoch": 0.5721584771247554, "grad_norm": 0.6193224148888237, "learning_rate": 9.50786699107867e-06, "loss": 0.5761, "step": 19597 }, { "epoch": 0.5721876733526029, "grad_norm": 0.6562267504675421, "learning_rate": 9.507218167072182e-06, "loss": 0.6103, "step": 19598 }, { "epoch": 0.5722168695804503, "grad_norm": 0.6126200940735982, "learning_rate": 9.506569343065694e-06, "loss": 0.5469, "step": 19599 }, { "epoch": 0.5722460658082976, "grad_norm": 0.6610118203782884, "learning_rate": 9.505920519059206e-06, "loss": 0.662, "step": 19600 }, { "epoch": 0.572275262036145, "grad_norm": 0.6217062589140214, "learning_rate": 9.505271695052718e-06, "loss": 0.5802, "step": 19601 }, { "epoch": 0.5723044582639923, "grad_norm": 0.6315969993783177, "learning_rate": 9.50462287104623e-06, "loss": 0.5768, "step": 19602 }, { "epoch": 0.5723336544918397, "grad_norm": 0.608003481860326, "learning_rate": 9.50397404703974e-06, "loss": 0.5786, "step": 19603 }, { "epoch": 0.5723628507196871, "grad_norm": 0.6514161915161295, "learning_rate": 9.503325223033253e-06, "loss": 0.6128, "step": 19604 }, { "epoch": 0.5723920469475344, "grad_norm": 0.7036186290087537, "learning_rate": 9.502676399026765e-06, "loss": 0.7029, "step": 19605 }, { "epoch": 0.5724212431753818, "grad_norm": 0.6434880258615667, "learning_rate": 9.502027575020277e-06, "loss": 0.6161, "step": 19606 }, { "epoch": 0.5724504394032291, "grad_norm": 0.6393615883093683, "learning_rate": 9.501378751013787e-06, "loss": 0.5825, "step": 19607 }, { "epoch": 0.5724796356310765, "grad_norm": 0.6189199448623255, "learning_rate": 9.5007299270073e-06, "loss": 0.6039, "step": 19608 }, { "epoch": 0.5725088318589239, "grad_norm": 0.6689928247149542, "learning_rate": 9.500081103000813e-06, "loss": 0.6285, "step": 19609 }, { "epoch": 0.5725380280867712, "grad_norm": 0.7198861091753366, "learning_rate": 9.499432278994323e-06, "loss": 0.6605, "step": 19610 }, { "epoch": 0.5725672243146186, "grad_norm": 0.6092626149535227, "learning_rate": 9.498783454987835e-06, "loss": 0.6007, "step": 19611 }, { "epoch": 0.572596420542466, "grad_norm": 0.619261123948143, "learning_rate": 9.498134630981347e-06, "loss": 0.6013, "step": 19612 }, { "epoch": 0.5726256167703133, "grad_norm": 0.6391938903846495, "learning_rate": 9.49748580697486e-06, "loss": 0.6022, "step": 19613 }, { "epoch": 0.5726548129981607, "grad_norm": 0.6294704260050138, "learning_rate": 9.496836982968371e-06, "loss": 0.6116, "step": 19614 }, { "epoch": 0.572684009226008, "grad_norm": 0.6924854973293011, "learning_rate": 9.496188158961882e-06, "loss": 0.6565, "step": 19615 }, { "epoch": 0.5727132054538554, "grad_norm": 0.6218615698453729, "learning_rate": 9.495539334955394e-06, "loss": 0.5844, "step": 19616 }, { "epoch": 0.5727424016817028, "grad_norm": 0.6244271400339861, "learning_rate": 9.494890510948906e-06, "loss": 0.5669, "step": 19617 }, { "epoch": 0.5727715979095501, "grad_norm": 0.6601831357543395, "learning_rate": 9.494241686942418e-06, "loss": 0.6493, "step": 19618 }, { "epoch": 0.5728007941373975, "grad_norm": 0.7862110709950612, "learning_rate": 9.49359286293593e-06, "loss": 0.6808, "step": 19619 }, { "epoch": 0.5728299903652448, "grad_norm": 0.6523406733421281, "learning_rate": 9.492944038929442e-06, "loss": 0.6182, "step": 19620 }, { "epoch": 0.5728591865930922, "grad_norm": 0.6878869201155686, "learning_rate": 9.492295214922954e-06, "loss": 0.7044, "step": 19621 }, { "epoch": 0.5728883828209396, "grad_norm": 0.6386235887038293, "learning_rate": 9.491646390916464e-06, "loss": 0.572, "step": 19622 }, { "epoch": 0.5729175790487869, "grad_norm": 0.7087756497348403, "learning_rate": 9.490997566909976e-06, "loss": 0.6265, "step": 19623 }, { "epoch": 0.5729467752766343, "grad_norm": 0.6566307384156853, "learning_rate": 9.490348742903488e-06, "loss": 0.6825, "step": 19624 }, { "epoch": 0.5729759715044817, "grad_norm": 0.6288372544886681, "learning_rate": 9.489699918897e-06, "loss": 0.6122, "step": 19625 }, { "epoch": 0.573005167732329, "grad_norm": 0.6070862368193439, "learning_rate": 9.48905109489051e-06, "loss": 0.5824, "step": 19626 }, { "epoch": 0.5730343639601764, "grad_norm": 0.6159559189938347, "learning_rate": 9.488402270884024e-06, "loss": 0.5508, "step": 19627 }, { "epoch": 0.5730635601880237, "grad_norm": 0.6569558287182763, "learning_rate": 9.487753446877536e-06, "loss": 0.6387, "step": 19628 }, { "epoch": 0.5730927564158711, "grad_norm": 0.6106576519545169, "learning_rate": 9.487104622871047e-06, "loss": 0.574, "step": 19629 }, { "epoch": 0.5731219526437185, "grad_norm": 0.6674764577990983, "learning_rate": 9.486455798864559e-06, "loss": 0.6478, "step": 19630 }, { "epoch": 0.5731511488715658, "grad_norm": 0.6408803060303531, "learning_rate": 9.48580697485807e-06, "loss": 0.6087, "step": 19631 }, { "epoch": 0.5731803450994132, "grad_norm": 0.6365743656766721, "learning_rate": 9.485158150851583e-06, "loss": 0.5817, "step": 19632 }, { "epoch": 0.5732095413272605, "grad_norm": 0.658630427373662, "learning_rate": 9.484509326845095e-06, "loss": 0.6528, "step": 19633 }, { "epoch": 0.5732387375551079, "grad_norm": 0.658038880726458, "learning_rate": 9.483860502838605e-06, "loss": 0.673, "step": 19634 }, { "epoch": 0.5732679337829553, "grad_norm": 0.6002380104944764, "learning_rate": 9.483211678832117e-06, "loss": 0.541, "step": 19635 }, { "epoch": 0.5732971300108026, "grad_norm": 0.6391222607762697, "learning_rate": 9.482562854825629e-06, "loss": 0.5925, "step": 19636 }, { "epoch": 0.57332632623865, "grad_norm": 0.6671588309179804, "learning_rate": 9.481914030819141e-06, "loss": 0.6, "step": 19637 }, { "epoch": 0.5733555224664973, "grad_norm": 0.5977309689082054, "learning_rate": 9.481265206812653e-06, "loss": 0.5743, "step": 19638 }, { "epoch": 0.5733847186943447, "grad_norm": 0.6067791222124194, "learning_rate": 9.480616382806165e-06, "loss": 0.5598, "step": 19639 }, { "epoch": 0.5734139149221921, "grad_norm": 0.630521007860691, "learning_rate": 9.479967558799677e-06, "loss": 0.6133, "step": 19640 }, { "epoch": 0.5734431111500394, "grad_norm": 0.7570374088892337, "learning_rate": 9.479318734793187e-06, "loss": 0.8009, "step": 19641 }, { "epoch": 0.5734723073778868, "grad_norm": 0.6731721285283329, "learning_rate": 9.4786699107867e-06, "loss": 0.6684, "step": 19642 }, { "epoch": 0.5735015036057342, "grad_norm": 0.6947065748014679, "learning_rate": 9.478021086780212e-06, "loss": 0.7049, "step": 19643 }, { "epoch": 0.5735306998335815, "grad_norm": 0.607882652217395, "learning_rate": 9.477372262773724e-06, "loss": 0.5562, "step": 19644 }, { "epoch": 0.5735598960614289, "grad_norm": 0.6435753053544242, "learning_rate": 9.476723438767234e-06, "loss": 0.5783, "step": 19645 }, { "epoch": 0.5735890922892762, "grad_norm": 0.6796760172613933, "learning_rate": 9.476074614760748e-06, "loss": 0.679, "step": 19646 }, { "epoch": 0.5736182885171236, "grad_norm": 0.6361360697510527, "learning_rate": 9.47542579075426e-06, "loss": 0.607, "step": 19647 }, { "epoch": 0.573647484744971, "grad_norm": 0.6485519622661031, "learning_rate": 9.47477696674777e-06, "loss": 0.6054, "step": 19648 }, { "epoch": 0.5736766809728183, "grad_norm": 0.634715893535462, "learning_rate": 9.474128142741282e-06, "loss": 0.5932, "step": 19649 }, { "epoch": 0.5737058772006657, "grad_norm": 0.6442150116095547, "learning_rate": 9.473479318734794e-06, "loss": 0.5878, "step": 19650 }, { "epoch": 0.573735073428513, "grad_norm": 0.6189375542635759, "learning_rate": 9.472830494728306e-06, "loss": 0.5999, "step": 19651 }, { "epoch": 0.5737642696563604, "grad_norm": 0.6437949399314882, "learning_rate": 9.472181670721818e-06, "loss": 0.581, "step": 19652 }, { "epoch": 0.5737934658842078, "grad_norm": 0.6219491095969648, "learning_rate": 9.471532846715328e-06, "loss": 0.5461, "step": 19653 }, { "epoch": 0.5738226621120551, "grad_norm": 0.6473394892702194, "learning_rate": 9.47088402270884e-06, "loss": 0.5584, "step": 19654 }, { "epoch": 0.5738518583399025, "grad_norm": 0.6065133825470992, "learning_rate": 9.470235198702352e-06, "loss": 0.5923, "step": 19655 }, { "epoch": 0.5738810545677498, "grad_norm": 0.6162434761831475, "learning_rate": 9.469586374695865e-06, "loss": 0.5821, "step": 19656 }, { "epoch": 0.5739102507955972, "grad_norm": 0.648657849229673, "learning_rate": 9.468937550689377e-06, "loss": 0.6196, "step": 19657 }, { "epoch": 0.5739394470234446, "grad_norm": 0.621093698100591, "learning_rate": 9.468288726682889e-06, "loss": 0.5797, "step": 19658 }, { "epoch": 0.5739686432512919, "grad_norm": 0.614805561509804, "learning_rate": 9.4676399026764e-06, "loss": 0.4999, "step": 19659 }, { "epoch": 0.5739978394791393, "grad_norm": 0.6446024239330586, "learning_rate": 9.466991078669911e-06, "loss": 0.6202, "step": 19660 }, { "epoch": 0.5740270357069867, "grad_norm": 0.6859413725635589, "learning_rate": 9.466342254663423e-06, "loss": 0.6876, "step": 19661 }, { "epoch": 0.574056231934834, "grad_norm": 0.6883296132828322, "learning_rate": 9.465693430656935e-06, "loss": 0.6823, "step": 19662 }, { "epoch": 0.5740854281626814, "grad_norm": 0.6203261438704014, "learning_rate": 9.465044606650447e-06, "loss": 0.5996, "step": 19663 }, { "epoch": 0.5741146243905287, "grad_norm": 0.604333722233943, "learning_rate": 9.464395782643957e-06, "loss": 0.5652, "step": 19664 }, { "epoch": 0.5741438206183761, "grad_norm": 0.6278488441830743, "learning_rate": 9.463746958637471e-06, "loss": 0.5985, "step": 19665 }, { "epoch": 0.5741730168462235, "grad_norm": 0.5835495070458465, "learning_rate": 9.463098134630983e-06, "loss": 0.5261, "step": 19666 }, { "epoch": 0.5742022130740708, "grad_norm": 0.6769459000055107, "learning_rate": 9.462449310624493e-06, "loss": 0.6361, "step": 19667 }, { "epoch": 0.5742314093019182, "grad_norm": 0.6885771378266236, "learning_rate": 9.461800486618005e-06, "loss": 0.7116, "step": 19668 }, { "epoch": 0.5742606055297655, "grad_norm": 0.6617105815504687, "learning_rate": 9.461151662611517e-06, "loss": 0.5995, "step": 19669 }, { "epoch": 0.5742898017576129, "grad_norm": 0.6610633369066619, "learning_rate": 9.46050283860503e-06, "loss": 0.6319, "step": 19670 }, { "epoch": 0.5743189979854603, "grad_norm": 0.6957390413241319, "learning_rate": 9.459854014598542e-06, "loss": 0.687, "step": 19671 }, { "epoch": 0.5743481942133076, "grad_norm": 0.6626885526907612, "learning_rate": 9.459205190592052e-06, "loss": 0.5918, "step": 19672 }, { "epoch": 0.574377390441155, "grad_norm": 0.6652261116796201, "learning_rate": 9.458556366585564e-06, "loss": 0.6362, "step": 19673 }, { "epoch": 0.5744065866690024, "grad_norm": 0.651290424871768, "learning_rate": 9.457907542579076e-06, "loss": 0.6048, "step": 19674 }, { "epoch": 0.5744357828968497, "grad_norm": 0.5835306220290989, "learning_rate": 9.457258718572588e-06, "loss": 0.4794, "step": 19675 }, { "epoch": 0.5744649791246971, "grad_norm": 0.624999972036044, "learning_rate": 9.4566098945661e-06, "loss": 0.5605, "step": 19676 }, { "epoch": 0.5744941753525444, "grad_norm": 0.6775810611619343, "learning_rate": 9.455961070559612e-06, "loss": 0.6265, "step": 19677 }, { "epoch": 0.5745233715803918, "grad_norm": 0.6369275942650025, "learning_rate": 9.455312246553124e-06, "loss": 0.5649, "step": 19678 }, { "epoch": 0.5745525678082392, "grad_norm": 0.6702606004824915, "learning_rate": 9.454663422546634e-06, "loss": 0.6512, "step": 19679 }, { "epoch": 0.5745817640360865, "grad_norm": 0.6467121081009745, "learning_rate": 9.454014598540146e-06, "loss": 0.6367, "step": 19680 }, { "epoch": 0.5746109602639339, "grad_norm": 0.6189527424236416, "learning_rate": 9.453365774533658e-06, "loss": 0.5902, "step": 19681 }, { "epoch": 0.5746401564917812, "grad_norm": 0.5999904020448792, "learning_rate": 9.45271695052717e-06, "loss": 0.542, "step": 19682 }, { "epoch": 0.5746693527196286, "grad_norm": 0.6447609079617226, "learning_rate": 9.45206812652068e-06, "loss": 0.594, "step": 19683 }, { "epoch": 0.574698548947476, "grad_norm": 0.6672646365661059, "learning_rate": 9.451419302514195e-06, "loss": 0.6585, "step": 19684 }, { "epoch": 0.5747277451753233, "grad_norm": 0.6834480615390449, "learning_rate": 9.450770478507707e-06, "loss": 0.6479, "step": 19685 }, { "epoch": 0.5747569414031707, "grad_norm": 0.6362983502630668, "learning_rate": 9.450121654501217e-06, "loss": 0.6034, "step": 19686 }, { "epoch": 0.574786137631018, "grad_norm": 0.5995145977998686, "learning_rate": 9.449472830494729e-06, "loss": 0.5533, "step": 19687 }, { "epoch": 0.5748153338588654, "grad_norm": 0.6584207235553127, "learning_rate": 9.448824006488241e-06, "loss": 0.6594, "step": 19688 }, { "epoch": 0.5748445300867128, "grad_norm": 0.5945179260744372, "learning_rate": 9.448175182481753e-06, "loss": 0.5384, "step": 19689 }, { "epoch": 0.5748737263145601, "grad_norm": 0.6293409676329499, "learning_rate": 9.447526358475263e-06, "loss": 0.5885, "step": 19690 }, { "epoch": 0.5749029225424075, "grad_norm": 0.6713846172201212, "learning_rate": 9.446877534468775e-06, "loss": 0.6566, "step": 19691 }, { "epoch": 0.5749321187702549, "grad_norm": 0.6457830973156465, "learning_rate": 9.446228710462287e-06, "loss": 0.6117, "step": 19692 }, { "epoch": 0.5749613149981022, "grad_norm": 0.6424900770545877, "learning_rate": 9.4455798864558e-06, "loss": 0.6554, "step": 19693 }, { "epoch": 0.5749905112259496, "grad_norm": 0.6666024068978956, "learning_rate": 9.444931062449311e-06, "loss": 0.6174, "step": 19694 }, { "epoch": 0.5750197074537969, "grad_norm": 0.6477926895720527, "learning_rate": 9.444282238442823e-06, "loss": 0.6228, "step": 19695 }, { "epoch": 0.5750489036816443, "grad_norm": 0.6872006556221467, "learning_rate": 9.443633414436335e-06, "loss": 0.676, "step": 19696 }, { "epoch": 0.5750780999094917, "grad_norm": 0.6078500411103633, "learning_rate": 9.442984590429847e-06, "loss": 0.575, "step": 19697 }, { "epoch": 0.575107296137339, "grad_norm": 0.6323428262789533, "learning_rate": 9.442335766423358e-06, "loss": 0.6088, "step": 19698 }, { "epoch": 0.5751364923651864, "grad_norm": 0.6386243819119353, "learning_rate": 9.44168694241687e-06, "loss": 0.5836, "step": 19699 }, { "epoch": 0.5751656885930337, "grad_norm": 0.6416198976749317, "learning_rate": 9.441038118410382e-06, "loss": 0.6282, "step": 19700 }, { "epoch": 0.5751948848208811, "grad_norm": 0.6177745377528984, "learning_rate": 9.440389294403894e-06, "loss": 0.5755, "step": 19701 }, { "epoch": 0.5752240810487285, "grad_norm": 0.5887423430384784, "learning_rate": 9.439740470397404e-06, "loss": 0.4964, "step": 19702 }, { "epoch": 0.5752532772765758, "grad_norm": 0.5849232170675758, "learning_rate": 9.439091646390918e-06, "loss": 0.5486, "step": 19703 }, { "epoch": 0.5752824735044232, "grad_norm": 0.6756172182524783, "learning_rate": 9.43844282238443e-06, "loss": 0.7191, "step": 19704 }, { "epoch": 0.5753116697322705, "grad_norm": 0.6759861217082246, "learning_rate": 9.43779399837794e-06, "loss": 0.6504, "step": 19705 }, { "epoch": 0.5753408659601179, "grad_norm": 0.6216459113151591, "learning_rate": 9.437145174371452e-06, "loss": 0.5743, "step": 19706 }, { "epoch": 0.5753700621879653, "grad_norm": 0.6623959463429022, "learning_rate": 9.436496350364964e-06, "loss": 0.6631, "step": 19707 }, { "epoch": 0.5753992584158126, "grad_norm": 0.6323363162276374, "learning_rate": 9.435847526358476e-06, "loss": 0.6168, "step": 19708 }, { "epoch": 0.57542845464366, "grad_norm": 0.7323676112209309, "learning_rate": 9.435198702351987e-06, "loss": 0.7524, "step": 19709 }, { "epoch": 0.5754576508715074, "grad_norm": 0.6754029610028757, "learning_rate": 9.434549878345499e-06, "loss": 0.666, "step": 19710 }, { "epoch": 0.5754868470993547, "grad_norm": 0.683291392565732, "learning_rate": 9.43390105433901e-06, "loss": 0.6606, "step": 19711 }, { "epoch": 0.5755160433272021, "grad_norm": 0.6403369033362989, "learning_rate": 9.433252230332523e-06, "loss": 0.6144, "step": 19712 }, { "epoch": 0.5755452395550494, "grad_norm": 0.6202412669028469, "learning_rate": 9.432603406326035e-06, "loss": 0.5919, "step": 19713 }, { "epoch": 0.5755744357828968, "grad_norm": 0.6615403222575545, "learning_rate": 9.431954582319547e-06, "loss": 0.6467, "step": 19714 }, { "epoch": 0.5756036320107442, "grad_norm": 0.6397185310628841, "learning_rate": 9.431305758313059e-06, "loss": 0.6389, "step": 19715 }, { "epoch": 0.5756328282385915, "grad_norm": 0.6431620555251771, "learning_rate": 9.430656934306571e-06, "loss": 0.6018, "step": 19716 }, { "epoch": 0.5756620244664389, "grad_norm": 0.6501135696895023, "learning_rate": 9.430008110300081e-06, "loss": 0.6202, "step": 19717 }, { "epoch": 0.5756912206942862, "grad_norm": 0.602674013978317, "learning_rate": 9.429359286293593e-06, "loss": 0.5753, "step": 19718 }, { "epoch": 0.5757204169221337, "grad_norm": 0.6588411497895724, "learning_rate": 9.428710462287105e-06, "loss": 0.631, "step": 19719 }, { "epoch": 0.5757496131499811, "grad_norm": 0.6556139448128451, "learning_rate": 9.428061638280617e-06, "loss": 0.6402, "step": 19720 }, { "epoch": 0.5757788093778284, "grad_norm": 0.601909001393036, "learning_rate": 9.42741281427413e-06, "loss": 0.5397, "step": 19721 }, { "epoch": 0.5758080056056758, "grad_norm": 0.6679695311679613, "learning_rate": 9.426763990267641e-06, "loss": 0.6445, "step": 19722 }, { "epoch": 0.5758372018335232, "grad_norm": 0.6670293750974026, "learning_rate": 9.426115166261153e-06, "loss": 0.6334, "step": 19723 }, { "epoch": 0.5758663980613705, "grad_norm": 0.599457759145224, "learning_rate": 9.425466342254664e-06, "loss": 0.5156, "step": 19724 }, { "epoch": 0.5758955942892179, "grad_norm": 0.6219145407682973, "learning_rate": 9.424817518248176e-06, "loss": 0.5852, "step": 19725 }, { "epoch": 0.5759247905170652, "grad_norm": 0.5841938690203852, "learning_rate": 9.424168694241688e-06, "loss": 0.5148, "step": 19726 }, { "epoch": 0.5759539867449126, "grad_norm": 0.6475824711321772, "learning_rate": 9.4235198702352e-06, "loss": 0.6067, "step": 19727 }, { "epoch": 0.57598318297276, "grad_norm": 0.6023240996996317, "learning_rate": 9.42287104622871e-06, "loss": 0.5318, "step": 19728 }, { "epoch": 0.5760123792006073, "grad_norm": 0.6321481825564202, "learning_rate": 9.422222222222222e-06, "loss": 0.5883, "step": 19729 }, { "epoch": 0.5760415754284547, "grad_norm": 0.6657445389831932, "learning_rate": 9.421573398215734e-06, "loss": 0.5413, "step": 19730 }, { "epoch": 0.576070771656302, "grad_norm": 0.5944100118712026, "learning_rate": 9.420924574209246e-06, "loss": 0.5576, "step": 19731 }, { "epoch": 0.5760999678841494, "grad_norm": 0.5790728707262646, "learning_rate": 9.420275750202758e-06, "loss": 0.5264, "step": 19732 }, { "epoch": 0.5761291641119968, "grad_norm": 0.6068071765009365, "learning_rate": 9.41962692619627e-06, "loss": 0.561, "step": 19733 }, { "epoch": 0.5761583603398441, "grad_norm": 0.6571784509214367, "learning_rate": 9.418978102189782e-06, "loss": 0.5938, "step": 19734 }, { "epoch": 0.5761875565676915, "grad_norm": 0.6722820187903505, "learning_rate": 9.418329278183294e-06, "loss": 0.6677, "step": 19735 }, { "epoch": 0.5762167527955389, "grad_norm": 0.6002861648299482, "learning_rate": 9.417680454176805e-06, "loss": 0.5888, "step": 19736 }, { "epoch": 0.5762459490233862, "grad_norm": 0.7884555605433756, "learning_rate": 9.417031630170317e-06, "loss": 0.6143, "step": 19737 }, { "epoch": 0.5762751452512336, "grad_norm": 0.5958071025499474, "learning_rate": 9.416382806163829e-06, "loss": 0.5252, "step": 19738 }, { "epoch": 0.5763043414790809, "grad_norm": 0.6862816572211685, "learning_rate": 9.41573398215734e-06, "loss": 0.6503, "step": 19739 }, { "epoch": 0.5763335377069283, "grad_norm": 0.6328014930124052, "learning_rate": 9.415085158150853e-06, "loss": 0.5706, "step": 19740 }, { "epoch": 0.5763627339347757, "grad_norm": 0.6509307273256686, "learning_rate": 9.414436334144365e-06, "loss": 0.5968, "step": 19741 }, { "epoch": 0.576391930162623, "grad_norm": 0.647390725134871, "learning_rate": 9.413787510137877e-06, "loss": 0.6088, "step": 19742 }, { "epoch": 0.5764211263904704, "grad_norm": 0.6185240358459538, "learning_rate": 9.413138686131387e-06, "loss": 0.5553, "step": 19743 }, { "epoch": 0.5764503226183177, "grad_norm": 0.6672497286702007, "learning_rate": 9.4124898621249e-06, "loss": 0.6198, "step": 19744 }, { "epoch": 0.5764795188461651, "grad_norm": 0.6531918511128517, "learning_rate": 9.411841038118411e-06, "loss": 0.6472, "step": 19745 }, { "epoch": 0.5765087150740125, "grad_norm": 0.6561152913533516, "learning_rate": 9.411192214111923e-06, "loss": 0.6738, "step": 19746 }, { "epoch": 0.5765379113018598, "grad_norm": 0.6760402798581342, "learning_rate": 9.410543390105434e-06, "loss": 0.5651, "step": 19747 }, { "epoch": 0.5765671075297072, "grad_norm": 0.644276612856239, "learning_rate": 9.409894566098946e-06, "loss": 0.64, "step": 19748 }, { "epoch": 0.5765963037575546, "grad_norm": 0.605213435192521, "learning_rate": 9.409245742092458e-06, "loss": 0.5266, "step": 19749 }, { "epoch": 0.5766254999854019, "grad_norm": 0.8684218208635277, "learning_rate": 9.40859691808597e-06, "loss": 0.6752, "step": 19750 }, { "epoch": 0.5766546962132493, "grad_norm": 0.6256891931155178, "learning_rate": 9.407948094079482e-06, "loss": 0.586, "step": 19751 }, { "epoch": 0.5766838924410966, "grad_norm": 0.5933818412204197, "learning_rate": 9.407299270072994e-06, "loss": 0.5519, "step": 19752 }, { "epoch": 0.576713088668944, "grad_norm": 0.6220238914396354, "learning_rate": 9.406650446066506e-06, "loss": 0.5834, "step": 19753 }, { "epoch": 0.5767422848967914, "grad_norm": 0.580211851750311, "learning_rate": 9.406001622060018e-06, "loss": 0.5096, "step": 19754 }, { "epoch": 0.5767714811246387, "grad_norm": 0.7122464707627157, "learning_rate": 9.405352798053528e-06, "loss": 0.7312, "step": 19755 }, { "epoch": 0.5768006773524861, "grad_norm": 0.6212890472300063, "learning_rate": 9.40470397404704e-06, "loss": 0.5839, "step": 19756 }, { "epoch": 0.5768298735803334, "grad_norm": 0.6516743190463533, "learning_rate": 9.404055150040552e-06, "loss": 0.6334, "step": 19757 }, { "epoch": 0.5768590698081808, "grad_norm": 0.5826696581170411, "learning_rate": 9.403406326034064e-06, "loss": 0.4989, "step": 19758 }, { "epoch": 0.5768882660360282, "grad_norm": 0.5974526672042676, "learning_rate": 9.402757502027576e-06, "loss": 0.5393, "step": 19759 }, { "epoch": 0.5769174622638755, "grad_norm": 0.5783661667071486, "learning_rate": 9.402108678021088e-06, "loss": 0.4775, "step": 19760 }, { "epoch": 0.5769466584917229, "grad_norm": 0.6513656567163155, "learning_rate": 9.4014598540146e-06, "loss": 0.6226, "step": 19761 }, { "epoch": 0.5769758547195702, "grad_norm": 0.6603733641166949, "learning_rate": 9.40081103000811e-06, "loss": 0.6777, "step": 19762 }, { "epoch": 0.5770050509474176, "grad_norm": 0.6207143113155719, "learning_rate": 9.400162206001623e-06, "loss": 0.5272, "step": 19763 }, { "epoch": 0.577034247175265, "grad_norm": 0.661879893637665, "learning_rate": 9.399513381995135e-06, "loss": 0.6014, "step": 19764 }, { "epoch": 0.5770634434031123, "grad_norm": 0.7002268003564968, "learning_rate": 9.398864557988647e-06, "loss": 0.6844, "step": 19765 }, { "epoch": 0.5770926396309597, "grad_norm": 0.6418320997898792, "learning_rate": 9.398215733982157e-06, "loss": 0.5942, "step": 19766 }, { "epoch": 0.577121835858807, "grad_norm": 0.6215542812030532, "learning_rate": 9.397566909975669e-06, "loss": 0.6045, "step": 19767 }, { "epoch": 0.5771510320866544, "grad_norm": 0.6600959752137773, "learning_rate": 9.396918085969181e-06, "loss": 0.7003, "step": 19768 }, { "epoch": 0.5771802283145018, "grad_norm": 0.633230571823423, "learning_rate": 9.396269261962693e-06, "loss": 0.5535, "step": 19769 }, { "epoch": 0.5772094245423491, "grad_norm": 0.5810271700479792, "learning_rate": 9.395620437956205e-06, "loss": 0.5394, "step": 19770 }, { "epoch": 0.5772386207701965, "grad_norm": 0.6548089903839099, "learning_rate": 9.394971613949717e-06, "loss": 0.6192, "step": 19771 }, { "epoch": 0.5772678169980439, "grad_norm": 0.7107773315500844, "learning_rate": 9.39432278994323e-06, "loss": 0.6264, "step": 19772 }, { "epoch": 0.5772970132258912, "grad_norm": 0.6146153695667834, "learning_rate": 9.393673965936741e-06, "loss": 0.5737, "step": 19773 }, { "epoch": 0.5773262094537386, "grad_norm": 0.643767453425803, "learning_rate": 9.393025141930252e-06, "loss": 0.5925, "step": 19774 }, { "epoch": 0.5773554056815859, "grad_norm": 0.5571257059432544, "learning_rate": 9.392376317923764e-06, "loss": 0.4608, "step": 19775 }, { "epoch": 0.5773846019094333, "grad_norm": 0.6439705612847223, "learning_rate": 9.391727493917276e-06, "loss": 0.575, "step": 19776 }, { "epoch": 0.5774137981372807, "grad_norm": 0.634954805151983, "learning_rate": 9.391078669910788e-06, "loss": 0.5296, "step": 19777 }, { "epoch": 0.577442994365128, "grad_norm": 0.6422383597683939, "learning_rate": 9.3904298459043e-06, "loss": 0.6223, "step": 19778 }, { "epoch": 0.5774721905929754, "grad_norm": 0.6388367153253983, "learning_rate": 9.389781021897812e-06, "loss": 0.6186, "step": 19779 }, { "epoch": 0.5775013868208227, "grad_norm": 0.6210020278254625, "learning_rate": 9.389132197891324e-06, "loss": 0.5916, "step": 19780 }, { "epoch": 0.5775305830486701, "grad_norm": 0.6722679859533295, "learning_rate": 9.388483373884834e-06, "loss": 0.5774, "step": 19781 }, { "epoch": 0.5775597792765175, "grad_norm": 0.6715192958101458, "learning_rate": 9.387834549878346e-06, "loss": 0.6612, "step": 19782 }, { "epoch": 0.5775889755043648, "grad_norm": 0.6446452624214588, "learning_rate": 9.387185725871858e-06, "loss": 0.5963, "step": 19783 }, { "epoch": 0.5776181717322122, "grad_norm": 0.6777382743882708, "learning_rate": 9.38653690186537e-06, "loss": 0.708, "step": 19784 }, { "epoch": 0.5776473679600596, "grad_norm": 0.6364710978762874, "learning_rate": 9.38588807785888e-06, "loss": 0.6172, "step": 19785 }, { "epoch": 0.5776765641879069, "grad_norm": 0.7347522391715492, "learning_rate": 9.385239253852392e-06, "loss": 0.5969, "step": 19786 }, { "epoch": 0.5777057604157543, "grad_norm": 0.6459268008271217, "learning_rate": 9.384590429845905e-06, "loss": 0.6204, "step": 19787 }, { "epoch": 0.5777349566436016, "grad_norm": 0.6533623775206101, "learning_rate": 9.383941605839417e-06, "loss": 0.6025, "step": 19788 }, { "epoch": 0.577764152871449, "grad_norm": 0.6748828524747736, "learning_rate": 9.383292781832929e-06, "loss": 0.6672, "step": 19789 }, { "epoch": 0.5777933490992964, "grad_norm": 0.6597574750648191, "learning_rate": 9.38264395782644e-06, "loss": 0.5853, "step": 19790 }, { "epoch": 0.5778225453271437, "grad_norm": 0.627070367650616, "learning_rate": 9.381995133819953e-06, "loss": 0.6055, "step": 19791 }, { "epoch": 0.5778517415549911, "grad_norm": 0.6679849821876357, "learning_rate": 9.381346309813465e-06, "loss": 0.6344, "step": 19792 }, { "epoch": 0.5778809377828384, "grad_norm": 0.6392455721701893, "learning_rate": 9.380697485806975e-06, "loss": 0.6189, "step": 19793 }, { "epoch": 0.5779101340106858, "grad_norm": 0.6270293130569957, "learning_rate": 9.380048661800487e-06, "loss": 0.5945, "step": 19794 }, { "epoch": 0.5779393302385332, "grad_norm": 0.6800632587980727, "learning_rate": 9.379399837793999e-06, "loss": 0.6576, "step": 19795 }, { "epoch": 0.5779685264663805, "grad_norm": 0.6328345972192223, "learning_rate": 9.378751013787511e-06, "loss": 0.5882, "step": 19796 }, { "epoch": 0.5779977226942279, "grad_norm": 0.7373233891375918, "learning_rate": 9.378102189781023e-06, "loss": 0.654, "step": 19797 }, { "epoch": 0.5780269189220753, "grad_norm": 0.6400986620129826, "learning_rate": 9.377453365774535e-06, "loss": 0.6285, "step": 19798 }, { "epoch": 0.5780561151499226, "grad_norm": 0.6142102365372301, "learning_rate": 9.376804541768047e-06, "loss": 0.5699, "step": 19799 }, { "epoch": 0.57808531137777, "grad_norm": 0.6714225403608547, "learning_rate": 9.376155717761557e-06, "loss": 0.6313, "step": 19800 }, { "epoch": 0.5781145076056173, "grad_norm": 0.6146915346814009, "learning_rate": 9.37550689375507e-06, "loss": 0.5366, "step": 19801 }, { "epoch": 0.5781437038334647, "grad_norm": 0.6706544004853814, "learning_rate": 9.374858069748582e-06, "loss": 0.6783, "step": 19802 }, { "epoch": 0.5781729000613121, "grad_norm": 0.6767698773494538, "learning_rate": 9.374209245742094e-06, "loss": 0.6238, "step": 19803 }, { "epoch": 0.5782020962891594, "grad_norm": 0.6718325372361804, "learning_rate": 9.373560421735604e-06, "loss": 0.6416, "step": 19804 }, { "epoch": 0.5782312925170068, "grad_norm": 0.6467116036833331, "learning_rate": 9.372911597729116e-06, "loss": 0.6002, "step": 19805 }, { "epoch": 0.5782604887448541, "grad_norm": 0.6438316523905925, "learning_rate": 9.37226277372263e-06, "loss": 0.6082, "step": 19806 }, { "epoch": 0.5782896849727015, "grad_norm": 0.654096348896403, "learning_rate": 9.37161394971614e-06, "loss": 0.5911, "step": 19807 }, { "epoch": 0.5783188812005489, "grad_norm": 0.6141370553445701, "learning_rate": 9.370965125709652e-06, "loss": 0.5779, "step": 19808 }, { "epoch": 0.5783480774283962, "grad_norm": 0.6759288669412168, "learning_rate": 9.370316301703164e-06, "loss": 0.6024, "step": 19809 }, { "epoch": 0.5783772736562436, "grad_norm": 0.6302123917337579, "learning_rate": 9.369667477696676e-06, "loss": 0.6015, "step": 19810 }, { "epoch": 0.578406469884091, "grad_norm": 0.6193709749998113, "learning_rate": 9.369018653690188e-06, "loss": 0.5676, "step": 19811 }, { "epoch": 0.5784356661119383, "grad_norm": 0.6610518750818892, "learning_rate": 9.368369829683698e-06, "loss": 0.6253, "step": 19812 }, { "epoch": 0.5784648623397857, "grad_norm": 0.6299039190602309, "learning_rate": 9.36772100567721e-06, "loss": 0.6105, "step": 19813 }, { "epoch": 0.578494058567633, "grad_norm": 0.5989513659816186, "learning_rate": 9.367072181670722e-06, "loss": 0.5584, "step": 19814 }, { "epoch": 0.5785232547954804, "grad_norm": 0.6388366373451361, "learning_rate": 9.366423357664234e-06, "loss": 0.5847, "step": 19815 }, { "epoch": 0.5785524510233278, "grad_norm": 0.6766682697761608, "learning_rate": 9.365774533657747e-06, "loss": 0.67, "step": 19816 }, { "epoch": 0.5785816472511751, "grad_norm": 0.6319014015027002, "learning_rate": 9.365125709651259e-06, "loss": 0.6324, "step": 19817 }, { "epoch": 0.5786108434790225, "grad_norm": 0.7362302362142015, "learning_rate": 9.36447688564477e-06, "loss": 0.8097, "step": 19818 }, { "epoch": 0.5786400397068698, "grad_norm": 0.6404181568152439, "learning_rate": 9.363828061638281e-06, "loss": 0.6209, "step": 19819 }, { "epoch": 0.5786692359347172, "grad_norm": 0.6213822295646898, "learning_rate": 9.363179237631793e-06, "loss": 0.5486, "step": 19820 }, { "epoch": 0.5786984321625646, "grad_norm": 0.6219182047929813, "learning_rate": 9.362530413625305e-06, "loss": 0.5592, "step": 19821 }, { "epoch": 0.5787276283904119, "grad_norm": 0.6407068522248229, "learning_rate": 9.361881589618817e-06, "loss": 0.6345, "step": 19822 }, { "epoch": 0.5787568246182593, "grad_norm": 0.5969622052680473, "learning_rate": 9.361232765612327e-06, "loss": 0.5072, "step": 19823 }, { "epoch": 0.5787860208461066, "grad_norm": 0.6460636553266011, "learning_rate": 9.36058394160584e-06, "loss": 0.6407, "step": 19824 }, { "epoch": 0.578815217073954, "grad_norm": 0.6341613741708352, "learning_rate": 9.359935117599353e-06, "loss": 0.5477, "step": 19825 }, { "epoch": 0.5788444133018014, "grad_norm": 0.6237054116557107, "learning_rate": 9.359286293592863e-06, "loss": 0.5493, "step": 19826 }, { "epoch": 0.5788736095296487, "grad_norm": 0.6943993823034456, "learning_rate": 9.358637469586375e-06, "loss": 0.6685, "step": 19827 }, { "epoch": 0.5789028057574961, "grad_norm": 0.6352180926839033, "learning_rate": 9.357988645579887e-06, "loss": 0.612, "step": 19828 }, { "epoch": 0.5789320019853434, "grad_norm": 0.6753108508567949, "learning_rate": 9.3573398215734e-06, "loss": 0.6563, "step": 19829 }, { "epoch": 0.5789611982131908, "grad_norm": 0.6002785287586327, "learning_rate": 9.356690997566912e-06, "loss": 0.5094, "step": 19830 }, { "epoch": 0.5789903944410382, "grad_norm": 0.6291162481670366, "learning_rate": 9.356042173560422e-06, "loss": 0.6016, "step": 19831 }, { "epoch": 0.5790195906688855, "grad_norm": 0.6721387947802537, "learning_rate": 9.355393349553934e-06, "loss": 0.6454, "step": 19832 }, { "epoch": 0.5790487868967329, "grad_norm": 0.6914390588118192, "learning_rate": 9.354744525547446e-06, "loss": 0.6782, "step": 19833 }, { "epoch": 0.5790779831245803, "grad_norm": 0.617079420091046, "learning_rate": 9.354095701540958e-06, "loss": 0.5624, "step": 19834 }, { "epoch": 0.5791071793524276, "grad_norm": 0.659373424636154, "learning_rate": 9.35344687753447e-06, "loss": 0.6667, "step": 19835 }, { "epoch": 0.579136375580275, "grad_norm": 0.668394168016586, "learning_rate": 9.352798053527982e-06, "loss": 0.6521, "step": 19836 }, { "epoch": 0.5791655718081223, "grad_norm": 0.6614453846558516, "learning_rate": 9.352149229521494e-06, "loss": 0.657, "step": 19837 }, { "epoch": 0.5791947680359697, "grad_norm": 0.6039953884800047, "learning_rate": 9.351500405515004e-06, "loss": 0.5497, "step": 19838 }, { "epoch": 0.5792239642638171, "grad_norm": 0.6812765211866277, "learning_rate": 9.350851581508516e-06, "loss": 0.6286, "step": 19839 }, { "epoch": 0.5792531604916645, "grad_norm": 0.6402130672291827, "learning_rate": 9.350202757502028e-06, "loss": 0.5934, "step": 19840 }, { "epoch": 0.5792823567195119, "grad_norm": 0.6309403028472381, "learning_rate": 9.34955393349554e-06, "loss": 0.6313, "step": 19841 }, { "epoch": 0.5793115529473593, "grad_norm": 0.6480589658890835, "learning_rate": 9.34890510948905e-06, "loss": 0.6326, "step": 19842 }, { "epoch": 0.5793407491752066, "grad_norm": 0.6821029638723889, "learning_rate": 9.348256285482563e-06, "loss": 0.6393, "step": 19843 }, { "epoch": 0.579369945403054, "grad_norm": 0.668362566331354, "learning_rate": 9.347607461476077e-06, "loss": 0.6464, "step": 19844 }, { "epoch": 0.5793991416309013, "grad_norm": 0.6500361643094729, "learning_rate": 9.346958637469587e-06, "loss": 0.6532, "step": 19845 }, { "epoch": 0.5794283378587487, "grad_norm": 0.6677976744153497, "learning_rate": 9.346309813463099e-06, "loss": 0.7033, "step": 19846 }, { "epoch": 0.5794575340865961, "grad_norm": 0.6130443640221637, "learning_rate": 9.345660989456611e-06, "loss": 0.5637, "step": 19847 }, { "epoch": 0.5794867303144434, "grad_norm": 0.6597455487220074, "learning_rate": 9.345012165450123e-06, "loss": 0.6372, "step": 19848 }, { "epoch": 0.5795159265422908, "grad_norm": 0.6522893761093971, "learning_rate": 9.344363341443635e-06, "loss": 0.5986, "step": 19849 }, { "epoch": 0.5795451227701381, "grad_norm": 0.6133368676781144, "learning_rate": 9.343714517437145e-06, "loss": 0.5382, "step": 19850 }, { "epoch": 0.5795743189979855, "grad_norm": 0.6253651448383398, "learning_rate": 9.343065693430657e-06, "loss": 0.5728, "step": 19851 }, { "epoch": 0.5796035152258329, "grad_norm": 0.6692160448010227, "learning_rate": 9.34241686942417e-06, "loss": 0.6563, "step": 19852 }, { "epoch": 0.5796327114536802, "grad_norm": 0.6112991345825213, "learning_rate": 9.341768045417681e-06, "loss": 0.5943, "step": 19853 }, { "epoch": 0.5796619076815276, "grad_norm": 0.6258814985412249, "learning_rate": 9.341119221411193e-06, "loss": 0.6111, "step": 19854 }, { "epoch": 0.579691103909375, "grad_norm": 0.6238360017559107, "learning_rate": 9.340470397404705e-06, "loss": 0.5789, "step": 19855 }, { "epoch": 0.5797203001372223, "grad_norm": 0.6283596581379105, "learning_rate": 9.339821573398217e-06, "loss": 0.6134, "step": 19856 }, { "epoch": 0.5797494963650697, "grad_norm": 0.5705675736027201, "learning_rate": 9.339172749391728e-06, "loss": 0.4825, "step": 19857 }, { "epoch": 0.579778692592917, "grad_norm": 0.6024131509347717, "learning_rate": 9.33852392538524e-06, "loss": 0.5256, "step": 19858 }, { "epoch": 0.5798078888207644, "grad_norm": 0.6091958975952998, "learning_rate": 9.337875101378752e-06, "loss": 0.551, "step": 19859 }, { "epoch": 0.5798370850486118, "grad_norm": 0.6556629530170961, "learning_rate": 9.337226277372264e-06, "loss": 0.6234, "step": 19860 }, { "epoch": 0.5798662812764591, "grad_norm": 0.5851213174168374, "learning_rate": 9.336577453365774e-06, "loss": 0.4977, "step": 19861 }, { "epoch": 0.5798954775043065, "grad_norm": 0.6847894052169777, "learning_rate": 9.335928629359286e-06, "loss": 0.6297, "step": 19862 }, { "epoch": 0.5799246737321538, "grad_norm": 0.666095325574468, "learning_rate": 9.3352798053528e-06, "loss": 0.6333, "step": 19863 }, { "epoch": 0.5799538699600012, "grad_norm": 0.6608403726007427, "learning_rate": 9.33463098134631e-06, "loss": 0.6671, "step": 19864 }, { "epoch": 0.5799830661878486, "grad_norm": 0.6631104024066375, "learning_rate": 9.333982157339822e-06, "loss": 0.6353, "step": 19865 }, { "epoch": 0.5800122624156959, "grad_norm": 0.6306849902747304, "learning_rate": 9.333333333333334e-06, "loss": 0.5596, "step": 19866 }, { "epoch": 0.5800414586435433, "grad_norm": 0.6243995628308991, "learning_rate": 9.332684509326846e-06, "loss": 0.5814, "step": 19867 }, { "epoch": 0.5800706548713906, "grad_norm": 0.6883209083991542, "learning_rate": 9.332035685320357e-06, "loss": 0.6268, "step": 19868 }, { "epoch": 0.580099851099238, "grad_norm": 0.5904720726605888, "learning_rate": 9.331386861313869e-06, "loss": 0.5572, "step": 19869 }, { "epoch": 0.5801290473270854, "grad_norm": 0.6192657433934295, "learning_rate": 9.33073803730738e-06, "loss": 0.5623, "step": 19870 }, { "epoch": 0.5801582435549327, "grad_norm": 0.6024313016229533, "learning_rate": 9.330089213300893e-06, "loss": 0.5334, "step": 19871 }, { "epoch": 0.5801874397827801, "grad_norm": 0.6426246441702723, "learning_rate": 9.329440389294405e-06, "loss": 0.6108, "step": 19872 }, { "epoch": 0.5802166360106275, "grad_norm": 0.6667279379405194, "learning_rate": 9.328791565287917e-06, "loss": 0.5933, "step": 19873 }, { "epoch": 0.5802458322384748, "grad_norm": 0.6926424431414666, "learning_rate": 9.328142741281429e-06, "loss": 0.7362, "step": 19874 }, { "epoch": 0.5802750284663222, "grad_norm": 0.6979008820921372, "learning_rate": 9.327493917274941e-06, "loss": 0.6529, "step": 19875 }, { "epoch": 0.5803042246941695, "grad_norm": 0.6066811993586958, "learning_rate": 9.326845093268451e-06, "loss": 0.5733, "step": 19876 }, { "epoch": 0.5803334209220169, "grad_norm": 0.626921489846184, "learning_rate": 9.326196269261963e-06, "loss": 0.5941, "step": 19877 }, { "epoch": 0.5803626171498643, "grad_norm": 0.6528791157236717, "learning_rate": 9.325547445255475e-06, "loss": 0.6074, "step": 19878 }, { "epoch": 0.5803918133777116, "grad_norm": 0.618099133368303, "learning_rate": 9.324898621248987e-06, "loss": 0.5855, "step": 19879 }, { "epoch": 0.580421009605559, "grad_norm": 0.6493400633351177, "learning_rate": 9.324249797242498e-06, "loss": 0.6179, "step": 19880 }, { "epoch": 0.5804502058334063, "grad_norm": 0.6409651196989588, "learning_rate": 9.32360097323601e-06, "loss": 0.5895, "step": 19881 }, { "epoch": 0.5804794020612537, "grad_norm": 0.6186002607774848, "learning_rate": 9.322952149229523e-06, "loss": 0.5848, "step": 19882 }, { "epoch": 0.5805085982891011, "grad_norm": 0.6511908131235475, "learning_rate": 9.322303325223034e-06, "loss": 0.653, "step": 19883 }, { "epoch": 0.5805377945169484, "grad_norm": 0.6067239107459987, "learning_rate": 9.321654501216546e-06, "loss": 0.5373, "step": 19884 }, { "epoch": 0.5805669907447958, "grad_norm": 0.632887080142793, "learning_rate": 9.321005677210058e-06, "loss": 0.5682, "step": 19885 }, { "epoch": 0.5805961869726431, "grad_norm": 0.6468971024175073, "learning_rate": 9.32035685320357e-06, "loss": 0.6104, "step": 19886 }, { "epoch": 0.5806253832004905, "grad_norm": 0.578985474490089, "learning_rate": 9.31970802919708e-06, "loss": 0.539, "step": 19887 }, { "epoch": 0.5806545794283379, "grad_norm": 0.6370816873688202, "learning_rate": 9.319059205190592e-06, "loss": 0.5623, "step": 19888 }, { "epoch": 0.5806837756561852, "grad_norm": 0.7122171283282224, "learning_rate": 9.318410381184104e-06, "loss": 0.6869, "step": 19889 }, { "epoch": 0.5807129718840326, "grad_norm": 0.621434742430111, "learning_rate": 9.317761557177616e-06, "loss": 0.6017, "step": 19890 }, { "epoch": 0.58074216811188, "grad_norm": 0.6316486197764867, "learning_rate": 9.317112733171128e-06, "loss": 0.5704, "step": 19891 }, { "epoch": 0.5807713643397273, "grad_norm": 0.6469242906780346, "learning_rate": 9.31646390916464e-06, "loss": 0.6094, "step": 19892 }, { "epoch": 0.5808005605675747, "grad_norm": 0.6555559979862846, "learning_rate": 9.315815085158152e-06, "loss": 0.6535, "step": 19893 }, { "epoch": 0.580829756795422, "grad_norm": 0.5940084121429883, "learning_rate": 9.315166261151664e-06, "loss": 0.5495, "step": 19894 }, { "epoch": 0.5808589530232694, "grad_norm": 0.6143440543968386, "learning_rate": 9.314517437145175e-06, "loss": 0.5806, "step": 19895 }, { "epoch": 0.5808881492511168, "grad_norm": 0.6400185054232274, "learning_rate": 9.313868613138687e-06, "loss": 0.6371, "step": 19896 }, { "epoch": 0.5809173454789641, "grad_norm": 0.6769991892198015, "learning_rate": 9.313219789132199e-06, "loss": 0.6409, "step": 19897 }, { "epoch": 0.5809465417068115, "grad_norm": 0.6477389458328072, "learning_rate": 9.31257096512571e-06, "loss": 0.6386, "step": 19898 }, { "epoch": 0.5809757379346588, "grad_norm": 0.6095069540127387, "learning_rate": 9.311922141119221e-06, "loss": 0.5863, "step": 19899 }, { "epoch": 0.5810049341625062, "grad_norm": 0.6489229635916032, "learning_rate": 9.311273317112733e-06, "loss": 0.6182, "step": 19900 }, { "epoch": 0.5810341303903536, "grad_norm": 0.6229374921223141, "learning_rate": 9.310624493106247e-06, "loss": 0.6196, "step": 19901 }, { "epoch": 0.5810633266182009, "grad_norm": 0.6311653606038569, "learning_rate": 9.309975669099757e-06, "loss": 0.6078, "step": 19902 }, { "epoch": 0.5810925228460483, "grad_norm": 0.6990771807601093, "learning_rate": 9.30932684509327e-06, "loss": 0.7106, "step": 19903 }, { "epoch": 0.5811217190738956, "grad_norm": 0.6841317474370143, "learning_rate": 9.308678021086781e-06, "loss": 0.6695, "step": 19904 }, { "epoch": 0.581150915301743, "grad_norm": 0.6497692878742882, "learning_rate": 9.308029197080293e-06, "loss": 0.6582, "step": 19905 }, { "epoch": 0.5811801115295904, "grad_norm": 0.6326498857312096, "learning_rate": 9.307380373073804e-06, "loss": 0.6277, "step": 19906 }, { "epoch": 0.5812093077574377, "grad_norm": 0.5837580244252334, "learning_rate": 9.306731549067316e-06, "loss": 0.5171, "step": 19907 }, { "epoch": 0.5812385039852851, "grad_norm": 0.6596947134743578, "learning_rate": 9.306082725060828e-06, "loss": 0.5996, "step": 19908 }, { "epoch": 0.5812677002131325, "grad_norm": 0.5746327718172287, "learning_rate": 9.30543390105434e-06, "loss": 0.4956, "step": 19909 }, { "epoch": 0.5812968964409798, "grad_norm": 0.5996040962711914, "learning_rate": 9.304785077047852e-06, "loss": 0.5966, "step": 19910 }, { "epoch": 0.5813260926688272, "grad_norm": 0.6067937087558185, "learning_rate": 9.304136253041364e-06, "loss": 0.5151, "step": 19911 }, { "epoch": 0.5813552888966745, "grad_norm": 0.5962205896889808, "learning_rate": 9.303487429034876e-06, "loss": 0.5326, "step": 19912 }, { "epoch": 0.5813844851245219, "grad_norm": 0.6361558199089452, "learning_rate": 9.302838605028388e-06, "loss": 0.6037, "step": 19913 }, { "epoch": 0.5814136813523693, "grad_norm": 0.611141656898784, "learning_rate": 9.302189781021898e-06, "loss": 0.5541, "step": 19914 }, { "epoch": 0.5814428775802166, "grad_norm": 0.6051893566781489, "learning_rate": 9.30154095701541e-06, "loss": 0.5728, "step": 19915 }, { "epoch": 0.581472073808064, "grad_norm": 0.6129812333331593, "learning_rate": 9.300892133008922e-06, "loss": 0.5703, "step": 19916 }, { "epoch": 0.5815012700359113, "grad_norm": 0.6353054817904394, "learning_rate": 9.300243309002434e-06, "loss": 0.5916, "step": 19917 }, { "epoch": 0.5815304662637587, "grad_norm": 0.6343649711452729, "learning_rate": 9.299594484995944e-06, "loss": 0.5974, "step": 19918 }, { "epoch": 0.5815596624916061, "grad_norm": 0.6204075267740342, "learning_rate": 9.298945660989457e-06, "loss": 0.5423, "step": 19919 }, { "epoch": 0.5815888587194534, "grad_norm": 0.658834823186503, "learning_rate": 9.29829683698297e-06, "loss": 0.6107, "step": 19920 }, { "epoch": 0.5816180549473008, "grad_norm": 0.6289778219702653, "learning_rate": 9.29764801297648e-06, "loss": 0.6037, "step": 19921 }, { "epoch": 0.5816472511751482, "grad_norm": 0.6404469806741374, "learning_rate": 9.296999188969993e-06, "loss": 0.581, "step": 19922 }, { "epoch": 0.5816764474029955, "grad_norm": 0.6475081347192468, "learning_rate": 9.296350364963505e-06, "loss": 0.6, "step": 19923 }, { "epoch": 0.5817056436308429, "grad_norm": 0.6450628262705216, "learning_rate": 9.295701540957017e-06, "loss": 0.596, "step": 19924 }, { "epoch": 0.5817348398586902, "grad_norm": 0.6082353932240553, "learning_rate": 9.295052716950527e-06, "loss": 0.6145, "step": 19925 }, { "epoch": 0.5817640360865376, "grad_norm": 0.6686367949142208, "learning_rate": 9.294403892944039e-06, "loss": 0.6411, "step": 19926 }, { "epoch": 0.581793232314385, "grad_norm": 0.6407694941069672, "learning_rate": 9.293755068937551e-06, "loss": 0.5917, "step": 19927 }, { "epoch": 0.5818224285422323, "grad_norm": 0.63372323322981, "learning_rate": 9.293106244931063e-06, "loss": 0.6184, "step": 19928 }, { "epoch": 0.5818516247700797, "grad_norm": 0.6501181283685981, "learning_rate": 9.292457420924575e-06, "loss": 0.6318, "step": 19929 }, { "epoch": 0.581880820997927, "grad_norm": 0.6149222830629256, "learning_rate": 9.291808596918087e-06, "loss": 0.5278, "step": 19930 }, { "epoch": 0.5819100172257744, "grad_norm": 0.6473293475310635, "learning_rate": 9.291159772911599e-06, "loss": 0.6182, "step": 19931 }, { "epoch": 0.5819392134536218, "grad_norm": 0.6623780484011674, "learning_rate": 9.290510948905111e-06, "loss": 0.6382, "step": 19932 }, { "epoch": 0.5819684096814691, "grad_norm": 0.6363556402622499, "learning_rate": 9.289862124898622e-06, "loss": 0.5918, "step": 19933 }, { "epoch": 0.5819976059093165, "grad_norm": 0.701839343375555, "learning_rate": 9.289213300892134e-06, "loss": 0.7484, "step": 19934 }, { "epoch": 0.5820268021371638, "grad_norm": 0.598280762813286, "learning_rate": 9.288564476885646e-06, "loss": 0.5406, "step": 19935 }, { "epoch": 0.5820559983650112, "grad_norm": 0.669976985637985, "learning_rate": 9.287915652879158e-06, "loss": 0.6447, "step": 19936 }, { "epoch": 0.5820851945928586, "grad_norm": 0.6381837527522315, "learning_rate": 9.287266828872668e-06, "loss": 0.6162, "step": 19937 }, { "epoch": 0.5821143908207059, "grad_norm": 0.6218722970849136, "learning_rate": 9.28661800486618e-06, "loss": 0.5849, "step": 19938 }, { "epoch": 0.5821435870485533, "grad_norm": 0.6514789924592972, "learning_rate": 9.285969180859694e-06, "loss": 0.6557, "step": 19939 }, { "epoch": 0.5821727832764007, "grad_norm": 0.677215173764008, "learning_rate": 9.285320356853204e-06, "loss": 0.6464, "step": 19940 }, { "epoch": 0.582201979504248, "grad_norm": 0.6503648969932391, "learning_rate": 9.284671532846716e-06, "loss": 0.585, "step": 19941 }, { "epoch": 0.5822311757320954, "grad_norm": 0.6785180527063971, "learning_rate": 9.284022708840228e-06, "loss": 0.6572, "step": 19942 }, { "epoch": 0.5822603719599427, "grad_norm": 0.5878200839361446, "learning_rate": 9.28337388483374e-06, "loss": 0.5151, "step": 19943 }, { "epoch": 0.5822895681877901, "grad_norm": 0.5978248272049467, "learning_rate": 9.28272506082725e-06, "loss": 0.5523, "step": 19944 }, { "epoch": 0.5823187644156375, "grad_norm": 0.6221231701481511, "learning_rate": 9.282076236820762e-06, "loss": 0.5782, "step": 19945 }, { "epoch": 0.5823479606434848, "grad_norm": 0.7711999053680519, "learning_rate": 9.281427412814274e-06, "loss": 0.6175, "step": 19946 }, { "epoch": 0.5823771568713322, "grad_norm": 0.6757211449147479, "learning_rate": 9.280778588807787e-06, "loss": 0.6125, "step": 19947 }, { "epoch": 0.5824063530991795, "grad_norm": 0.6347127980657365, "learning_rate": 9.280129764801299e-06, "loss": 0.5908, "step": 19948 }, { "epoch": 0.5824355493270269, "grad_norm": 0.7061932108452632, "learning_rate": 9.27948094079481e-06, "loss": 0.6263, "step": 19949 }, { "epoch": 0.5824647455548743, "grad_norm": 0.671448030092756, "learning_rate": 9.278832116788323e-06, "loss": 0.6526, "step": 19950 }, { "epoch": 0.5824939417827216, "grad_norm": 0.6444155508165623, "learning_rate": 9.278183292781835e-06, "loss": 0.6226, "step": 19951 }, { "epoch": 0.582523138010569, "grad_norm": 0.6586847637327099, "learning_rate": 9.277534468775345e-06, "loss": 0.6345, "step": 19952 }, { "epoch": 0.5825523342384163, "grad_norm": 0.6357214075764717, "learning_rate": 9.276885644768857e-06, "loss": 0.6212, "step": 19953 }, { "epoch": 0.5825815304662637, "grad_norm": 0.641483181452687, "learning_rate": 9.276236820762369e-06, "loss": 0.5981, "step": 19954 }, { "epoch": 0.5826107266941111, "grad_norm": 0.6041666374558783, "learning_rate": 9.275587996755881e-06, "loss": 0.5707, "step": 19955 }, { "epoch": 0.5826399229219584, "grad_norm": 0.66097535558695, "learning_rate": 9.274939172749391e-06, "loss": 0.6401, "step": 19956 }, { "epoch": 0.5826691191498058, "grad_norm": 0.6491586914211674, "learning_rate": 9.274290348742905e-06, "loss": 0.5949, "step": 19957 }, { "epoch": 0.5826983153776532, "grad_norm": 0.7126785038308696, "learning_rate": 9.273641524736417e-06, "loss": 0.7037, "step": 19958 }, { "epoch": 0.5827275116055005, "grad_norm": 0.6493893582463302, "learning_rate": 9.272992700729927e-06, "loss": 0.6037, "step": 19959 }, { "epoch": 0.582756707833348, "grad_norm": 0.638809231401046, "learning_rate": 9.27234387672344e-06, "loss": 0.5832, "step": 19960 }, { "epoch": 0.5827859040611953, "grad_norm": 0.6259862625839457, "learning_rate": 9.271695052716951e-06, "loss": 0.5938, "step": 19961 }, { "epoch": 0.5828151002890427, "grad_norm": 0.6693748149116682, "learning_rate": 9.271046228710464e-06, "loss": 0.6342, "step": 19962 }, { "epoch": 0.5828442965168901, "grad_norm": 0.5988036143609721, "learning_rate": 9.270397404703974e-06, "loss": 0.5732, "step": 19963 }, { "epoch": 0.5828734927447374, "grad_norm": 0.5941382800414049, "learning_rate": 9.269748580697486e-06, "loss": 0.5351, "step": 19964 }, { "epoch": 0.5829026889725848, "grad_norm": 0.6609220745265036, "learning_rate": 9.269099756690998e-06, "loss": 0.6992, "step": 19965 }, { "epoch": 0.5829318852004322, "grad_norm": 0.6668488377803197, "learning_rate": 9.26845093268451e-06, "loss": 0.635, "step": 19966 }, { "epoch": 0.5829610814282795, "grad_norm": 0.6991342815227727, "learning_rate": 9.267802108678022e-06, "loss": 0.6263, "step": 19967 }, { "epoch": 0.5829902776561269, "grad_norm": 0.6813366426629404, "learning_rate": 9.267153284671534e-06, "loss": 0.6399, "step": 19968 }, { "epoch": 0.5830194738839742, "grad_norm": 0.6613138477791866, "learning_rate": 9.266504460665046e-06, "loss": 0.6527, "step": 19969 }, { "epoch": 0.5830486701118216, "grad_norm": 0.696664496301138, "learning_rate": 9.265855636658558e-06, "loss": 0.703, "step": 19970 }, { "epoch": 0.583077866339669, "grad_norm": 0.6016178071982027, "learning_rate": 9.265206812652068e-06, "loss": 0.5697, "step": 19971 }, { "epoch": 0.5831070625675163, "grad_norm": 0.6147035926313137, "learning_rate": 9.26455798864558e-06, "loss": 0.5786, "step": 19972 }, { "epoch": 0.5831362587953637, "grad_norm": 0.5908983735091863, "learning_rate": 9.263909164639092e-06, "loss": 0.5391, "step": 19973 }, { "epoch": 0.583165455023211, "grad_norm": 0.6361638925121647, "learning_rate": 9.263260340632604e-06, "loss": 0.5888, "step": 19974 }, { "epoch": 0.5831946512510584, "grad_norm": 0.6859104642370936, "learning_rate": 9.262611516626115e-06, "loss": 0.6321, "step": 19975 }, { "epoch": 0.5832238474789058, "grad_norm": 0.5945172883532874, "learning_rate": 9.261962692619629e-06, "loss": 0.5512, "step": 19976 }, { "epoch": 0.5832530437067531, "grad_norm": 0.6726595970219906, "learning_rate": 9.26131386861314e-06, "loss": 0.6631, "step": 19977 }, { "epoch": 0.5832822399346005, "grad_norm": 0.6206139089090837, "learning_rate": 9.260665044606651e-06, "loss": 0.5864, "step": 19978 }, { "epoch": 0.5833114361624478, "grad_norm": 0.6518935542467303, "learning_rate": 9.260016220600163e-06, "loss": 0.6546, "step": 19979 }, { "epoch": 0.5833406323902952, "grad_norm": 0.6809827033445617, "learning_rate": 9.259367396593675e-06, "loss": 0.6376, "step": 19980 }, { "epoch": 0.5833698286181426, "grad_norm": 0.6179149223908521, "learning_rate": 9.258718572587187e-06, "loss": 0.5653, "step": 19981 }, { "epoch": 0.5833990248459899, "grad_norm": 0.6619007002056974, "learning_rate": 9.258069748580697e-06, "loss": 0.6543, "step": 19982 }, { "epoch": 0.5834282210738373, "grad_norm": 0.6116487027995986, "learning_rate": 9.25742092457421e-06, "loss": 0.5625, "step": 19983 }, { "epoch": 0.5834574173016847, "grad_norm": 0.6233620793907234, "learning_rate": 9.256772100567721e-06, "loss": 0.6095, "step": 19984 }, { "epoch": 0.583486613529532, "grad_norm": 0.669846008622328, "learning_rate": 9.256123276561233e-06, "loss": 0.6413, "step": 19985 }, { "epoch": 0.5835158097573794, "grad_norm": 0.6710344272823623, "learning_rate": 9.255474452554745e-06, "loss": 0.6592, "step": 19986 }, { "epoch": 0.5835450059852267, "grad_norm": 0.6466086352209811, "learning_rate": 9.254825628548257e-06, "loss": 0.6136, "step": 19987 }, { "epoch": 0.5835742022130741, "grad_norm": 0.6044483605816335, "learning_rate": 9.25417680454177e-06, "loss": 0.4932, "step": 19988 }, { "epoch": 0.5836033984409215, "grad_norm": 0.6059150325600173, "learning_rate": 9.253527980535281e-06, "loss": 0.5648, "step": 19989 }, { "epoch": 0.5836325946687688, "grad_norm": 0.6237710208880033, "learning_rate": 9.252879156528792e-06, "loss": 0.5582, "step": 19990 }, { "epoch": 0.5836617908966162, "grad_norm": 0.6566875875042176, "learning_rate": 9.252230332522304e-06, "loss": 0.6343, "step": 19991 }, { "epoch": 0.5836909871244635, "grad_norm": 0.7153723240352553, "learning_rate": 9.251581508515816e-06, "loss": 0.7071, "step": 19992 }, { "epoch": 0.5837201833523109, "grad_norm": 0.6426011281290349, "learning_rate": 9.250932684509328e-06, "loss": 0.6503, "step": 19993 }, { "epoch": 0.5837493795801583, "grad_norm": 0.5951452770554659, "learning_rate": 9.250283860502838e-06, "loss": 0.5031, "step": 19994 }, { "epoch": 0.5837785758080056, "grad_norm": 0.6423073392449465, "learning_rate": 9.249635036496352e-06, "loss": 0.6342, "step": 19995 }, { "epoch": 0.583807772035853, "grad_norm": 0.64054981797904, "learning_rate": 9.248986212489864e-06, "loss": 0.5898, "step": 19996 }, { "epoch": 0.5838369682637004, "grad_norm": 0.6941057986422224, "learning_rate": 9.248337388483374e-06, "loss": 0.6229, "step": 19997 }, { "epoch": 0.5838661644915477, "grad_norm": 0.676175767021432, "learning_rate": 9.247688564476886e-06, "loss": 0.6655, "step": 19998 }, { "epoch": 0.5838953607193951, "grad_norm": 0.6668768797993657, "learning_rate": 9.247039740470398e-06, "loss": 0.6527, "step": 19999 }, { "epoch": 0.5839245569472424, "grad_norm": 0.6617111142144064, "learning_rate": 9.24639091646391e-06, "loss": 0.5967, "step": 20000 }, { "epoch": 0.5839537531750898, "grad_norm": 0.6081301877353912, "learning_rate": 9.24574209245742e-06, "loss": 0.5007, "step": 20001 }, { "epoch": 0.5839829494029372, "grad_norm": 0.6068988526269511, "learning_rate": 9.245093268450933e-06, "loss": 0.5267, "step": 20002 }, { "epoch": 0.5840121456307845, "grad_norm": 0.6028848253194413, "learning_rate": 9.244444444444445e-06, "loss": 0.5757, "step": 20003 }, { "epoch": 0.5840413418586319, "grad_norm": 0.5525868999343697, "learning_rate": 9.243795620437957e-06, "loss": 0.4716, "step": 20004 }, { "epoch": 0.5840705380864792, "grad_norm": 0.6572604226044897, "learning_rate": 9.243146796431469e-06, "loss": 0.6242, "step": 20005 }, { "epoch": 0.5840997343143266, "grad_norm": 0.6537901553297031, "learning_rate": 9.242497972424981e-06, "loss": 0.6081, "step": 20006 }, { "epoch": 0.584128930542174, "grad_norm": 0.6402514698618805, "learning_rate": 9.241849148418493e-06, "loss": 0.6013, "step": 20007 }, { "epoch": 0.5841581267700213, "grad_norm": 0.6707348440209926, "learning_rate": 9.241200324412005e-06, "loss": 0.632, "step": 20008 }, { "epoch": 0.5841873229978687, "grad_norm": 0.6744728980907903, "learning_rate": 9.240551500405515e-06, "loss": 0.6002, "step": 20009 }, { "epoch": 0.584216519225716, "grad_norm": 0.6582661649789594, "learning_rate": 9.239902676399027e-06, "loss": 0.647, "step": 20010 }, { "epoch": 0.5842457154535634, "grad_norm": 0.6243871581106685, "learning_rate": 9.23925385239254e-06, "loss": 0.5746, "step": 20011 }, { "epoch": 0.5842749116814108, "grad_norm": 0.6233203667964945, "learning_rate": 9.238605028386051e-06, "loss": 0.619, "step": 20012 }, { "epoch": 0.5843041079092581, "grad_norm": 0.6082230224710352, "learning_rate": 9.237956204379562e-06, "loss": 0.5302, "step": 20013 }, { "epoch": 0.5843333041371055, "grad_norm": 0.6121771198166052, "learning_rate": 9.237307380373075e-06, "loss": 0.593, "step": 20014 }, { "epoch": 0.5843625003649529, "grad_norm": 0.6328782459888794, "learning_rate": 9.236658556366587e-06, "loss": 0.6316, "step": 20015 }, { "epoch": 0.5843916965928002, "grad_norm": 0.670459157058299, "learning_rate": 9.236009732360098e-06, "loss": 0.6299, "step": 20016 }, { "epoch": 0.5844208928206476, "grad_norm": 0.701944190881627, "learning_rate": 9.23536090835361e-06, "loss": 0.6475, "step": 20017 }, { "epoch": 0.5844500890484949, "grad_norm": 0.6640320581393474, "learning_rate": 9.234712084347122e-06, "loss": 0.6816, "step": 20018 }, { "epoch": 0.5844792852763423, "grad_norm": 0.6098718878292103, "learning_rate": 9.234063260340634e-06, "loss": 0.576, "step": 20019 }, { "epoch": 0.5845084815041897, "grad_norm": 0.7159948508137601, "learning_rate": 9.233414436334144e-06, "loss": 0.668, "step": 20020 }, { "epoch": 0.584537677732037, "grad_norm": 0.6236495708583868, "learning_rate": 9.232765612327656e-06, "loss": 0.5595, "step": 20021 }, { "epoch": 0.5845668739598844, "grad_norm": 0.7089502316501326, "learning_rate": 9.232116788321168e-06, "loss": 0.6922, "step": 20022 }, { "epoch": 0.5845960701877317, "grad_norm": 0.6029923821683617, "learning_rate": 9.23146796431468e-06, "loss": 0.5488, "step": 20023 }, { "epoch": 0.5846252664155791, "grad_norm": 0.6313879863519554, "learning_rate": 9.230819140308192e-06, "loss": 0.5421, "step": 20024 }, { "epoch": 0.5846544626434265, "grad_norm": 0.6300570357192369, "learning_rate": 9.230170316301704e-06, "loss": 0.6079, "step": 20025 }, { "epoch": 0.5846836588712738, "grad_norm": 0.6777112466637593, "learning_rate": 9.229521492295216e-06, "loss": 0.5995, "step": 20026 }, { "epoch": 0.5847128550991212, "grad_norm": 0.6491183370749843, "learning_rate": 9.228872668288728e-06, "loss": 0.5912, "step": 20027 }, { "epoch": 0.5847420513269685, "grad_norm": 0.6491863433450809, "learning_rate": 9.228223844282239e-06, "loss": 0.5528, "step": 20028 }, { "epoch": 0.5847712475548159, "grad_norm": 0.6379055454741177, "learning_rate": 9.22757502027575e-06, "loss": 0.5511, "step": 20029 }, { "epoch": 0.5848004437826633, "grad_norm": 0.6193919948855942, "learning_rate": 9.226926196269263e-06, "loss": 0.5472, "step": 20030 }, { "epoch": 0.5848296400105106, "grad_norm": 0.5906320827168248, "learning_rate": 9.226277372262775e-06, "loss": 0.5313, "step": 20031 }, { "epoch": 0.584858836238358, "grad_norm": 0.6545150989242174, "learning_rate": 9.225628548256285e-06, "loss": 0.6282, "step": 20032 }, { "epoch": 0.5848880324662054, "grad_norm": 0.6457272044828247, "learning_rate": 9.224979724249799e-06, "loss": 0.6552, "step": 20033 }, { "epoch": 0.5849172286940527, "grad_norm": 0.6897346899837444, "learning_rate": 9.22433090024331e-06, "loss": 0.6608, "step": 20034 }, { "epoch": 0.5849464249219001, "grad_norm": 0.661429310605166, "learning_rate": 9.223682076236821e-06, "loss": 0.6134, "step": 20035 }, { "epoch": 0.5849756211497474, "grad_norm": 0.6686089713350415, "learning_rate": 9.223033252230333e-06, "loss": 0.6004, "step": 20036 }, { "epoch": 0.5850048173775948, "grad_norm": 0.6710718119057238, "learning_rate": 9.222384428223845e-06, "loss": 0.6282, "step": 20037 }, { "epoch": 0.5850340136054422, "grad_norm": 0.5888131060425397, "learning_rate": 9.221735604217357e-06, "loss": 0.5098, "step": 20038 }, { "epoch": 0.5850632098332895, "grad_norm": 0.6656638945809163, "learning_rate": 9.221086780210868e-06, "loss": 0.6349, "step": 20039 }, { "epoch": 0.5850924060611369, "grad_norm": 0.6776744453837734, "learning_rate": 9.22043795620438e-06, "loss": 0.5932, "step": 20040 }, { "epoch": 0.5851216022889842, "grad_norm": 0.6455995203182955, "learning_rate": 9.219789132197892e-06, "loss": 0.6158, "step": 20041 }, { "epoch": 0.5851507985168316, "grad_norm": 0.6315527507868185, "learning_rate": 9.219140308191404e-06, "loss": 0.5573, "step": 20042 }, { "epoch": 0.585179994744679, "grad_norm": 0.6348492699419391, "learning_rate": 9.218491484184916e-06, "loss": 0.5572, "step": 20043 }, { "epoch": 0.5852091909725263, "grad_norm": 0.6412786447279012, "learning_rate": 9.217842660178428e-06, "loss": 0.6224, "step": 20044 }, { "epoch": 0.5852383872003737, "grad_norm": 0.6518412204411249, "learning_rate": 9.21719383617194e-06, "loss": 0.6434, "step": 20045 }, { "epoch": 0.585267583428221, "grad_norm": 0.63173654287034, "learning_rate": 9.216545012165452e-06, "loss": 0.607, "step": 20046 }, { "epoch": 0.5852967796560684, "grad_norm": 0.6407743853555581, "learning_rate": 9.215896188158962e-06, "loss": 0.6097, "step": 20047 }, { "epoch": 0.5853259758839158, "grad_norm": 0.6531509818487805, "learning_rate": 9.215247364152474e-06, "loss": 0.6095, "step": 20048 }, { "epoch": 0.5853551721117631, "grad_norm": 0.6091490461958686, "learning_rate": 9.214598540145986e-06, "loss": 0.5572, "step": 20049 }, { "epoch": 0.5853843683396105, "grad_norm": 0.6040641743560123, "learning_rate": 9.213949716139498e-06, "loss": 0.5508, "step": 20050 }, { "epoch": 0.5854135645674579, "grad_norm": 0.621138462407295, "learning_rate": 9.213300892133009e-06, "loss": 0.6182, "step": 20051 }, { "epoch": 0.5854427607953052, "grad_norm": 0.6415823073669737, "learning_rate": 9.212652068126522e-06, "loss": 0.5968, "step": 20052 }, { "epoch": 0.5854719570231526, "grad_norm": 0.6018279306872094, "learning_rate": 9.212003244120034e-06, "loss": 0.5515, "step": 20053 }, { "epoch": 0.5855011532509999, "grad_norm": 0.6516576323188918, "learning_rate": 9.211354420113545e-06, "loss": 0.6231, "step": 20054 }, { "epoch": 0.5855303494788473, "grad_norm": 0.6606289601107924, "learning_rate": 9.210705596107057e-06, "loss": 0.6812, "step": 20055 }, { "epoch": 0.5855595457066947, "grad_norm": 0.6123829470096573, "learning_rate": 9.210056772100569e-06, "loss": 0.5492, "step": 20056 }, { "epoch": 0.585588741934542, "grad_norm": 0.6067141432298176, "learning_rate": 9.20940794809408e-06, "loss": 0.5655, "step": 20057 }, { "epoch": 0.5856179381623894, "grad_norm": 0.7111284630267019, "learning_rate": 9.208759124087591e-06, "loss": 0.7142, "step": 20058 }, { "epoch": 0.5856471343902367, "grad_norm": 0.6755382083957454, "learning_rate": 9.208110300081103e-06, "loss": 0.726, "step": 20059 }, { "epoch": 0.5856763306180841, "grad_norm": 0.6130889949066114, "learning_rate": 9.207461476074615e-06, "loss": 0.5529, "step": 20060 }, { "epoch": 0.5857055268459315, "grad_norm": 0.5944138491405873, "learning_rate": 9.206812652068127e-06, "loss": 0.5569, "step": 20061 }, { "epoch": 0.5857347230737788, "grad_norm": 0.6346526541055789, "learning_rate": 9.206163828061639e-06, "loss": 0.636, "step": 20062 }, { "epoch": 0.5857639193016262, "grad_norm": 0.6684958331410119, "learning_rate": 9.205515004055151e-06, "loss": 0.6337, "step": 20063 }, { "epoch": 0.5857931155294736, "grad_norm": 0.6149282148883284, "learning_rate": 9.204866180048663e-06, "loss": 0.6063, "step": 20064 }, { "epoch": 0.5858223117573209, "grad_norm": 0.6267701899251824, "learning_rate": 9.204217356042174e-06, "loss": 0.5686, "step": 20065 }, { "epoch": 0.5858515079851683, "grad_norm": 0.6309358674289045, "learning_rate": 9.203568532035686e-06, "loss": 0.5564, "step": 20066 }, { "epoch": 0.5858807042130156, "grad_norm": 0.6631999060900756, "learning_rate": 9.202919708029198e-06, "loss": 0.6224, "step": 20067 }, { "epoch": 0.585909900440863, "grad_norm": 0.6525919849307416, "learning_rate": 9.20227088402271e-06, "loss": 0.6309, "step": 20068 }, { "epoch": 0.5859390966687104, "grad_norm": 0.6596596181315962, "learning_rate": 9.201622060016222e-06, "loss": 0.5756, "step": 20069 }, { "epoch": 0.5859682928965577, "grad_norm": 0.6427072695169539, "learning_rate": 9.200973236009732e-06, "loss": 0.6356, "step": 20070 }, { "epoch": 0.5859974891244051, "grad_norm": 0.6348628672634536, "learning_rate": 9.200324412003246e-06, "loss": 0.56, "step": 20071 }, { "epoch": 0.5860266853522524, "grad_norm": 0.6342394107059955, "learning_rate": 9.199675587996758e-06, "loss": 0.5979, "step": 20072 }, { "epoch": 0.5860558815800998, "grad_norm": 0.6716352563105665, "learning_rate": 9.199026763990268e-06, "loss": 0.6619, "step": 20073 }, { "epoch": 0.5860850778079472, "grad_norm": 0.6355623723376957, "learning_rate": 9.19837793998378e-06, "loss": 0.6682, "step": 20074 }, { "epoch": 0.5861142740357945, "grad_norm": 0.6726527282658687, "learning_rate": 9.197729115977292e-06, "loss": 0.6796, "step": 20075 }, { "epoch": 0.5861434702636419, "grad_norm": 0.6320708813728385, "learning_rate": 9.197080291970804e-06, "loss": 0.6057, "step": 20076 }, { "epoch": 0.5861726664914892, "grad_norm": 0.5939281258843505, "learning_rate": 9.196431467964314e-06, "loss": 0.5422, "step": 20077 }, { "epoch": 0.5862018627193366, "grad_norm": 0.6878246725374031, "learning_rate": 9.195782643957826e-06, "loss": 0.6735, "step": 20078 }, { "epoch": 0.586231058947184, "grad_norm": 0.5921007777664847, "learning_rate": 9.195133819951339e-06, "loss": 0.536, "step": 20079 }, { "epoch": 0.5862602551750313, "grad_norm": 0.624993223467527, "learning_rate": 9.19448499594485e-06, "loss": 0.5531, "step": 20080 }, { "epoch": 0.5862894514028788, "grad_norm": 0.6222836090013378, "learning_rate": 9.193836171938363e-06, "loss": 0.5939, "step": 20081 }, { "epoch": 0.5863186476307262, "grad_norm": 0.5835692906459022, "learning_rate": 9.193187347931875e-06, "loss": 0.5294, "step": 20082 }, { "epoch": 0.5863478438585735, "grad_norm": 0.6666408592773994, "learning_rate": 9.192538523925387e-06, "loss": 0.6209, "step": 20083 }, { "epoch": 0.5863770400864209, "grad_norm": 0.6445825226103309, "learning_rate": 9.191889699918897e-06, "loss": 0.6052, "step": 20084 }, { "epoch": 0.5864062363142682, "grad_norm": 0.6640646839518352, "learning_rate": 9.191240875912409e-06, "loss": 0.6331, "step": 20085 }, { "epoch": 0.5864354325421156, "grad_norm": 0.6243920448833761, "learning_rate": 9.190592051905921e-06, "loss": 0.586, "step": 20086 }, { "epoch": 0.586464628769963, "grad_norm": 0.6250870202905903, "learning_rate": 9.189943227899433e-06, "loss": 0.5453, "step": 20087 }, { "epoch": 0.5864938249978103, "grad_norm": 0.6723094162019705, "learning_rate": 9.189294403892945e-06, "loss": 0.6499, "step": 20088 }, { "epoch": 0.5865230212256577, "grad_norm": 0.686777445560653, "learning_rate": 9.188645579886455e-06, "loss": 0.6057, "step": 20089 }, { "epoch": 0.586552217453505, "grad_norm": 0.6257739068546067, "learning_rate": 9.187996755879969e-06, "loss": 0.5584, "step": 20090 }, { "epoch": 0.5865814136813524, "grad_norm": 0.705039032654032, "learning_rate": 9.187347931873481e-06, "loss": 0.649, "step": 20091 }, { "epoch": 0.5866106099091998, "grad_norm": 0.6849107492643852, "learning_rate": 9.186699107866991e-06, "loss": 0.6873, "step": 20092 }, { "epoch": 0.5866398061370471, "grad_norm": 0.7077124343490483, "learning_rate": 9.186050283860504e-06, "loss": 0.663, "step": 20093 }, { "epoch": 0.5866690023648945, "grad_norm": 0.6211606357823853, "learning_rate": 9.185401459854016e-06, "loss": 0.5649, "step": 20094 }, { "epoch": 0.5866981985927419, "grad_norm": 0.6497197631443703, "learning_rate": 9.184752635847528e-06, "loss": 0.5691, "step": 20095 }, { "epoch": 0.5867273948205892, "grad_norm": 0.5964726747563343, "learning_rate": 9.184103811841038e-06, "loss": 0.5246, "step": 20096 }, { "epoch": 0.5867565910484366, "grad_norm": 0.6065628179771013, "learning_rate": 9.18345498783455e-06, "loss": 0.5407, "step": 20097 }, { "epoch": 0.5867857872762839, "grad_norm": 0.5947111255640486, "learning_rate": 9.182806163828062e-06, "loss": 0.5037, "step": 20098 }, { "epoch": 0.5868149835041313, "grad_norm": 0.6855871919961197, "learning_rate": 9.182157339821574e-06, "loss": 0.6634, "step": 20099 }, { "epoch": 0.5868441797319787, "grad_norm": 0.66661516099372, "learning_rate": 9.181508515815086e-06, "loss": 0.6618, "step": 20100 }, { "epoch": 0.586873375959826, "grad_norm": 0.6872151960790364, "learning_rate": 9.180859691808598e-06, "loss": 0.6458, "step": 20101 }, { "epoch": 0.5869025721876734, "grad_norm": 0.6469254720410821, "learning_rate": 9.18021086780211e-06, "loss": 0.5975, "step": 20102 }, { "epoch": 0.5869317684155207, "grad_norm": 0.6008871932825113, "learning_rate": 9.17956204379562e-06, "loss": 0.5468, "step": 20103 }, { "epoch": 0.5869609646433681, "grad_norm": 0.612214890271585, "learning_rate": 9.178913219789132e-06, "loss": 0.5442, "step": 20104 }, { "epoch": 0.5869901608712155, "grad_norm": 0.6510700195081395, "learning_rate": 9.178264395782644e-06, "loss": 0.579, "step": 20105 }, { "epoch": 0.5870193570990628, "grad_norm": 0.6389210604228364, "learning_rate": 9.177615571776156e-06, "loss": 0.6049, "step": 20106 }, { "epoch": 0.5870485533269102, "grad_norm": 0.7537032067575334, "learning_rate": 9.176966747769669e-06, "loss": 0.6479, "step": 20107 }, { "epoch": 0.5870777495547576, "grad_norm": 0.6221933729582658, "learning_rate": 9.17631792376318e-06, "loss": 0.5658, "step": 20108 }, { "epoch": 0.5871069457826049, "grad_norm": 0.5855992877586762, "learning_rate": 9.175669099756693e-06, "loss": 0.5103, "step": 20109 }, { "epoch": 0.5871361420104523, "grad_norm": 0.6593074394953816, "learning_rate": 9.175020275750205e-06, "loss": 0.6516, "step": 20110 }, { "epoch": 0.5871653382382996, "grad_norm": 0.6586850386954649, "learning_rate": 9.174371451743715e-06, "loss": 0.6202, "step": 20111 }, { "epoch": 0.587194534466147, "grad_norm": 0.6356362358230876, "learning_rate": 9.173722627737227e-06, "loss": 0.6298, "step": 20112 }, { "epoch": 0.5872237306939944, "grad_norm": 0.6413357056610085, "learning_rate": 9.173073803730739e-06, "loss": 0.5599, "step": 20113 }, { "epoch": 0.5872529269218417, "grad_norm": 0.6375204664758323, "learning_rate": 9.172424979724251e-06, "loss": 0.5874, "step": 20114 }, { "epoch": 0.5872821231496891, "grad_norm": 0.6337720553112377, "learning_rate": 9.171776155717761e-06, "loss": 0.5937, "step": 20115 }, { "epoch": 0.5873113193775364, "grad_norm": 0.6561348526691873, "learning_rate": 9.171127331711273e-06, "loss": 0.6042, "step": 20116 }, { "epoch": 0.5873405156053838, "grad_norm": 0.7024388420451801, "learning_rate": 9.170478507704785e-06, "loss": 0.6631, "step": 20117 }, { "epoch": 0.5873697118332312, "grad_norm": 0.6121888759970076, "learning_rate": 9.169829683698297e-06, "loss": 0.6007, "step": 20118 }, { "epoch": 0.5873989080610785, "grad_norm": 0.6523423038682988, "learning_rate": 9.16918085969181e-06, "loss": 0.6923, "step": 20119 }, { "epoch": 0.5874281042889259, "grad_norm": 0.6272449499959846, "learning_rate": 9.168532035685321e-06, "loss": 0.5816, "step": 20120 }, { "epoch": 0.5874573005167733, "grad_norm": 0.6532065354604037, "learning_rate": 9.167883211678833e-06, "loss": 0.5682, "step": 20121 }, { "epoch": 0.5874864967446206, "grad_norm": 0.6107798953254626, "learning_rate": 9.167234387672344e-06, "loss": 0.569, "step": 20122 }, { "epoch": 0.587515692972468, "grad_norm": 0.632909258352352, "learning_rate": 9.166585563665856e-06, "loss": 0.5468, "step": 20123 }, { "epoch": 0.5875448892003153, "grad_norm": 0.6744425528485879, "learning_rate": 9.165936739659368e-06, "loss": 0.5909, "step": 20124 }, { "epoch": 0.5875740854281627, "grad_norm": 0.6007680905342386, "learning_rate": 9.16528791565288e-06, "loss": 0.5237, "step": 20125 }, { "epoch": 0.5876032816560101, "grad_norm": 0.6050538325997832, "learning_rate": 9.164639091646392e-06, "loss": 0.5614, "step": 20126 }, { "epoch": 0.5876324778838574, "grad_norm": 0.6096246446257154, "learning_rate": 9.163990267639904e-06, "loss": 0.5448, "step": 20127 }, { "epoch": 0.5876616741117048, "grad_norm": 0.6465751648931084, "learning_rate": 9.163341443633416e-06, "loss": 0.6276, "step": 20128 }, { "epoch": 0.5876908703395521, "grad_norm": 0.6106216958609413, "learning_rate": 9.162692619626928e-06, "loss": 0.5622, "step": 20129 }, { "epoch": 0.5877200665673995, "grad_norm": 0.6448087212193647, "learning_rate": 9.162043795620438e-06, "loss": 0.599, "step": 20130 }, { "epoch": 0.5877492627952469, "grad_norm": 0.6411053016377942, "learning_rate": 9.16139497161395e-06, "loss": 0.6041, "step": 20131 }, { "epoch": 0.5877784590230942, "grad_norm": 0.681828500329459, "learning_rate": 9.160746147607462e-06, "loss": 0.6901, "step": 20132 }, { "epoch": 0.5878076552509416, "grad_norm": 0.6421875446823074, "learning_rate": 9.160097323600974e-06, "loss": 0.6071, "step": 20133 }, { "epoch": 0.587836851478789, "grad_norm": 0.7314297796526178, "learning_rate": 9.159448499594485e-06, "loss": 0.6848, "step": 20134 }, { "epoch": 0.5878660477066363, "grad_norm": 0.6809163574870252, "learning_rate": 9.158799675587997e-06, "loss": 0.6729, "step": 20135 }, { "epoch": 0.5878952439344837, "grad_norm": 0.6703852287611979, "learning_rate": 9.158150851581509e-06, "loss": 0.6881, "step": 20136 }, { "epoch": 0.587924440162331, "grad_norm": 0.6084482519908714, "learning_rate": 9.157502027575021e-06, "loss": 0.5388, "step": 20137 }, { "epoch": 0.5879536363901784, "grad_norm": 0.6841074075684701, "learning_rate": 9.156853203568533e-06, "loss": 0.6603, "step": 20138 }, { "epoch": 0.5879828326180258, "grad_norm": 0.6426425078632962, "learning_rate": 9.156204379562045e-06, "loss": 0.5759, "step": 20139 }, { "epoch": 0.5880120288458731, "grad_norm": 0.6244578872998563, "learning_rate": 9.155555555555557e-06, "loss": 0.5616, "step": 20140 }, { "epoch": 0.5880412250737205, "grad_norm": 0.6361181915140817, "learning_rate": 9.154906731549067e-06, "loss": 0.5993, "step": 20141 }, { "epoch": 0.5880704213015678, "grad_norm": 0.6310484129862539, "learning_rate": 9.15425790754258e-06, "loss": 0.5909, "step": 20142 }, { "epoch": 0.5880996175294152, "grad_norm": 0.6606492081398829, "learning_rate": 9.153609083536091e-06, "loss": 0.6021, "step": 20143 }, { "epoch": 0.5881288137572626, "grad_norm": 0.6755791317172658, "learning_rate": 9.152960259529603e-06, "loss": 0.6509, "step": 20144 }, { "epoch": 0.5881580099851099, "grad_norm": 0.6380588589252625, "learning_rate": 9.152311435523115e-06, "loss": 0.6029, "step": 20145 }, { "epoch": 0.5881872062129573, "grad_norm": 0.6762148182883746, "learning_rate": 9.151662611516627e-06, "loss": 0.6628, "step": 20146 }, { "epoch": 0.5882164024408046, "grad_norm": 0.6680244062988911, "learning_rate": 9.15101378751014e-06, "loss": 0.671, "step": 20147 }, { "epoch": 0.588245598668652, "grad_norm": 0.6230050016993285, "learning_rate": 9.150364963503651e-06, "loss": 0.5019, "step": 20148 }, { "epoch": 0.5882747948964994, "grad_norm": 0.5934646956222647, "learning_rate": 9.149716139497162e-06, "loss": 0.4799, "step": 20149 }, { "epoch": 0.5883039911243467, "grad_norm": 0.6096976490443348, "learning_rate": 9.149067315490674e-06, "loss": 0.5928, "step": 20150 }, { "epoch": 0.5883331873521941, "grad_norm": 0.627901185227964, "learning_rate": 9.148418491484186e-06, "loss": 0.5503, "step": 20151 }, { "epoch": 0.5883623835800414, "grad_norm": 0.6976989746366944, "learning_rate": 9.147769667477698e-06, "loss": 0.698, "step": 20152 }, { "epoch": 0.5883915798078888, "grad_norm": 0.6500314172110098, "learning_rate": 9.147120843471208e-06, "loss": 0.6198, "step": 20153 }, { "epoch": 0.5884207760357362, "grad_norm": 0.6212343083439099, "learning_rate": 9.14647201946472e-06, "loss": 0.6011, "step": 20154 }, { "epoch": 0.5884499722635835, "grad_norm": 0.6569222475407833, "learning_rate": 9.145823195458232e-06, "loss": 0.6415, "step": 20155 }, { "epoch": 0.5884791684914309, "grad_norm": 0.6471210012291506, "learning_rate": 9.145174371451744e-06, "loss": 0.561, "step": 20156 }, { "epoch": 0.5885083647192783, "grad_norm": 0.6886390434288205, "learning_rate": 9.144525547445256e-06, "loss": 0.6626, "step": 20157 }, { "epoch": 0.5885375609471256, "grad_norm": 0.6063036821499609, "learning_rate": 9.143876723438768e-06, "loss": 0.504, "step": 20158 }, { "epoch": 0.588566757174973, "grad_norm": 0.659686105909924, "learning_rate": 9.14322789943228e-06, "loss": 0.6223, "step": 20159 }, { "epoch": 0.5885959534028203, "grad_norm": 0.6289447441828587, "learning_rate": 9.14257907542579e-06, "loss": 0.5527, "step": 20160 }, { "epoch": 0.5886251496306677, "grad_norm": 0.6590899059202034, "learning_rate": 9.141930251419303e-06, "loss": 0.666, "step": 20161 }, { "epoch": 0.5886543458585151, "grad_norm": 0.6186249126811096, "learning_rate": 9.141281427412815e-06, "loss": 0.5449, "step": 20162 }, { "epoch": 0.5886835420863624, "grad_norm": 0.6307838406341754, "learning_rate": 9.140632603406327e-06, "loss": 0.596, "step": 20163 }, { "epoch": 0.5887127383142098, "grad_norm": 0.6460431282832536, "learning_rate": 9.139983779399839e-06, "loss": 0.6219, "step": 20164 }, { "epoch": 0.5887419345420571, "grad_norm": 0.67155113448878, "learning_rate": 9.13933495539335e-06, "loss": 0.5952, "step": 20165 }, { "epoch": 0.5887711307699045, "grad_norm": 0.6426480255942976, "learning_rate": 9.138686131386863e-06, "loss": 0.5857, "step": 20166 }, { "epoch": 0.5888003269977519, "grad_norm": 0.6872932972861929, "learning_rate": 9.138037307380375e-06, "loss": 0.6736, "step": 20167 }, { "epoch": 0.5888295232255992, "grad_norm": 0.6375682135955857, "learning_rate": 9.137388483373885e-06, "loss": 0.6282, "step": 20168 }, { "epoch": 0.5888587194534466, "grad_norm": 0.6644289681889523, "learning_rate": 9.136739659367397e-06, "loss": 0.6279, "step": 20169 }, { "epoch": 0.588887915681294, "grad_norm": 0.6144663822795753, "learning_rate": 9.13609083536091e-06, "loss": 0.5749, "step": 20170 }, { "epoch": 0.5889171119091413, "grad_norm": 0.5787107720186275, "learning_rate": 9.135442011354421e-06, "loss": 0.4812, "step": 20171 }, { "epoch": 0.5889463081369887, "grad_norm": 0.6652438389344754, "learning_rate": 9.134793187347932e-06, "loss": 0.6184, "step": 20172 }, { "epoch": 0.588975504364836, "grad_norm": 0.6433224657676216, "learning_rate": 9.134144363341444e-06, "loss": 0.6243, "step": 20173 }, { "epoch": 0.5890047005926834, "grad_norm": 0.6282459761422787, "learning_rate": 9.133495539334957e-06, "loss": 0.5735, "step": 20174 }, { "epoch": 0.5890338968205308, "grad_norm": 0.6407251956060281, "learning_rate": 9.132846715328468e-06, "loss": 0.6337, "step": 20175 }, { "epoch": 0.5890630930483781, "grad_norm": 0.6028641451425802, "learning_rate": 9.13219789132198e-06, "loss": 0.5907, "step": 20176 }, { "epoch": 0.5890922892762255, "grad_norm": 0.6646656136478333, "learning_rate": 9.131549067315492e-06, "loss": 0.6886, "step": 20177 }, { "epoch": 0.5891214855040728, "grad_norm": 0.6361205392807824, "learning_rate": 9.130900243309004e-06, "loss": 0.6116, "step": 20178 }, { "epoch": 0.5891506817319202, "grad_norm": 0.6569613823608097, "learning_rate": 9.130251419302514e-06, "loss": 0.6568, "step": 20179 }, { "epoch": 0.5891798779597676, "grad_norm": 0.6237844370567924, "learning_rate": 9.129602595296026e-06, "loss": 0.5862, "step": 20180 }, { "epoch": 0.5892090741876149, "grad_norm": 0.6416054782236698, "learning_rate": 9.128953771289538e-06, "loss": 0.5942, "step": 20181 }, { "epoch": 0.5892382704154623, "grad_norm": 0.6140340277481376, "learning_rate": 9.12830494728305e-06, "loss": 0.5462, "step": 20182 }, { "epoch": 0.5892674666433096, "grad_norm": 0.6181096946034768, "learning_rate": 9.127656123276562e-06, "loss": 0.5389, "step": 20183 }, { "epoch": 0.589296662871157, "grad_norm": 0.5568631304489384, "learning_rate": 9.127007299270074e-06, "loss": 0.5127, "step": 20184 }, { "epoch": 0.5893258590990044, "grad_norm": 0.6994906072700244, "learning_rate": 9.126358475263586e-06, "loss": 0.6543, "step": 20185 }, { "epoch": 0.5893550553268517, "grad_norm": 0.6320224537267071, "learning_rate": 9.125709651257098e-06, "loss": 0.6128, "step": 20186 }, { "epoch": 0.5893842515546991, "grad_norm": 0.6458809627514958, "learning_rate": 9.125060827250609e-06, "loss": 0.6253, "step": 20187 }, { "epoch": 0.5894134477825465, "grad_norm": 0.6396441040288924, "learning_rate": 9.12441200324412e-06, "loss": 0.6174, "step": 20188 }, { "epoch": 0.5894426440103938, "grad_norm": 0.6779609636986634, "learning_rate": 9.123763179237633e-06, "loss": 0.6903, "step": 20189 }, { "epoch": 0.5894718402382412, "grad_norm": 0.6114904605005136, "learning_rate": 9.123114355231145e-06, "loss": 0.5643, "step": 20190 }, { "epoch": 0.5895010364660885, "grad_norm": 0.5776826323070149, "learning_rate": 9.122465531224655e-06, "loss": 0.5174, "step": 20191 }, { "epoch": 0.5895302326939359, "grad_norm": 0.696031534218335, "learning_rate": 9.121816707218167e-06, "loss": 0.6614, "step": 20192 }, { "epoch": 0.5895594289217833, "grad_norm": 0.6349936886081268, "learning_rate": 9.12116788321168e-06, "loss": 0.5739, "step": 20193 }, { "epoch": 0.5895886251496306, "grad_norm": 0.6655944819497186, "learning_rate": 9.120519059205191e-06, "loss": 0.6856, "step": 20194 }, { "epoch": 0.589617821377478, "grad_norm": 0.6134849952719978, "learning_rate": 9.119870235198703e-06, "loss": 0.5539, "step": 20195 }, { "epoch": 0.5896470176053253, "grad_norm": 0.6277588623213273, "learning_rate": 9.119221411192215e-06, "loss": 0.6217, "step": 20196 }, { "epoch": 0.5896762138331727, "grad_norm": 0.6882840796443266, "learning_rate": 9.118572587185727e-06, "loss": 0.6807, "step": 20197 }, { "epoch": 0.5897054100610201, "grad_norm": 0.6099337275730268, "learning_rate": 9.117923763179238e-06, "loss": 0.5852, "step": 20198 }, { "epoch": 0.5897346062888674, "grad_norm": 0.6695274804311552, "learning_rate": 9.11727493917275e-06, "loss": 0.652, "step": 20199 }, { "epoch": 0.5897638025167148, "grad_norm": 0.6086611030772553, "learning_rate": 9.116626115166262e-06, "loss": 0.5459, "step": 20200 }, { "epoch": 0.5897929987445623, "grad_norm": 0.6253640130703071, "learning_rate": 9.115977291159774e-06, "loss": 0.5794, "step": 20201 }, { "epoch": 0.5898221949724096, "grad_norm": 0.5884524560678107, "learning_rate": 9.115328467153286e-06, "loss": 0.5435, "step": 20202 }, { "epoch": 0.589851391200257, "grad_norm": 0.6818688772274863, "learning_rate": 9.114679643146798e-06, "loss": 0.6235, "step": 20203 }, { "epoch": 0.5898805874281043, "grad_norm": 0.6538888053386145, "learning_rate": 9.11403081914031e-06, "loss": 0.5604, "step": 20204 }, { "epoch": 0.5899097836559517, "grad_norm": 0.6387621190086403, "learning_rate": 9.113381995133822e-06, "loss": 0.6028, "step": 20205 }, { "epoch": 0.5899389798837991, "grad_norm": 0.6976613444183327, "learning_rate": 9.112733171127332e-06, "loss": 0.6336, "step": 20206 }, { "epoch": 0.5899681761116464, "grad_norm": 0.6647606837942212, "learning_rate": 9.112084347120844e-06, "loss": 0.5837, "step": 20207 }, { "epoch": 0.5899973723394938, "grad_norm": 0.6734698975604285, "learning_rate": 9.111435523114356e-06, "loss": 0.6591, "step": 20208 }, { "epoch": 0.5900265685673411, "grad_norm": 0.601247241524435, "learning_rate": 9.110786699107868e-06, "loss": 0.5471, "step": 20209 }, { "epoch": 0.5900557647951885, "grad_norm": 0.6243071748041845, "learning_rate": 9.110137875101379e-06, "loss": 0.5656, "step": 20210 }, { "epoch": 0.5900849610230359, "grad_norm": 0.6605804024465001, "learning_rate": 9.10948905109489e-06, "loss": 0.6237, "step": 20211 }, { "epoch": 0.5901141572508832, "grad_norm": 0.6317356498471041, "learning_rate": 9.108840227088404e-06, "loss": 0.6263, "step": 20212 }, { "epoch": 0.5901433534787306, "grad_norm": 0.7027555909077856, "learning_rate": 9.108191403081915e-06, "loss": 0.6371, "step": 20213 }, { "epoch": 0.590172549706578, "grad_norm": 0.6224871177331996, "learning_rate": 9.107542579075427e-06, "loss": 0.5502, "step": 20214 }, { "epoch": 0.5902017459344253, "grad_norm": 0.6830864820367248, "learning_rate": 9.106893755068939e-06, "loss": 0.6286, "step": 20215 }, { "epoch": 0.5902309421622727, "grad_norm": 0.593777434761453, "learning_rate": 9.10624493106245e-06, "loss": 0.551, "step": 20216 }, { "epoch": 0.59026013839012, "grad_norm": 0.6817249369789093, "learning_rate": 9.105596107055961e-06, "loss": 0.7164, "step": 20217 }, { "epoch": 0.5902893346179674, "grad_norm": 0.6356424747407381, "learning_rate": 9.104947283049473e-06, "loss": 0.5724, "step": 20218 }, { "epoch": 0.5903185308458148, "grad_norm": 0.6244545605884342, "learning_rate": 9.104298459042985e-06, "loss": 0.5519, "step": 20219 }, { "epoch": 0.5903477270736621, "grad_norm": 0.6122717864203852, "learning_rate": 9.103649635036497e-06, "loss": 0.5665, "step": 20220 }, { "epoch": 0.5903769233015095, "grad_norm": 0.7228163175805096, "learning_rate": 9.103000811030009e-06, "loss": 0.6581, "step": 20221 }, { "epoch": 0.5904061195293568, "grad_norm": 0.642760884951135, "learning_rate": 9.102351987023521e-06, "loss": 0.5858, "step": 20222 }, { "epoch": 0.5904353157572042, "grad_norm": 0.635143702290402, "learning_rate": 9.101703163017033e-06, "loss": 0.5781, "step": 20223 }, { "epoch": 0.5904645119850516, "grad_norm": 0.6298146219361007, "learning_rate": 9.101054339010545e-06, "loss": 0.5835, "step": 20224 }, { "epoch": 0.5904937082128989, "grad_norm": 0.66484203434999, "learning_rate": 9.100405515004056e-06, "loss": 0.6314, "step": 20225 }, { "epoch": 0.5905229044407463, "grad_norm": 0.6267351963150963, "learning_rate": 9.099756690997568e-06, "loss": 0.5783, "step": 20226 }, { "epoch": 0.5905521006685936, "grad_norm": 0.6179094750679947, "learning_rate": 9.09910786699108e-06, "loss": 0.5985, "step": 20227 }, { "epoch": 0.590581296896441, "grad_norm": 0.6083555553453097, "learning_rate": 9.098459042984592e-06, "loss": 0.5578, "step": 20228 }, { "epoch": 0.5906104931242884, "grad_norm": 0.6569367636656238, "learning_rate": 9.097810218978102e-06, "loss": 0.5913, "step": 20229 }, { "epoch": 0.5906396893521357, "grad_norm": 0.6298966699378128, "learning_rate": 9.097161394971614e-06, "loss": 0.5931, "step": 20230 }, { "epoch": 0.5906688855799831, "grad_norm": 0.7413104451033721, "learning_rate": 9.096512570965128e-06, "loss": 0.6628, "step": 20231 }, { "epoch": 0.5906980818078305, "grad_norm": 0.6069033991434188, "learning_rate": 9.095863746958638e-06, "loss": 0.5742, "step": 20232 }, { "epoch": 0.5907272780356778, "grad_norm": 0.6377102509550677, "learning_rate": 9.09521492295215e-06, "loss": 0.6294, "step": 20233 }, { "epoch": 0.5907564742635252, "grad_norm": 0.5790108831424863, "learning_rate": 9.094566098945662e-06, "loss": 0.5275, "step": 20234 }, { "epoch": 0.5907856704913725, "grad_norm": 0.6585606012526068, "learning_rate": 9.093917274939174e-06, "loss": 0.6305, "step": 20235 }, { "epoch": 0.5908148667192199, "grad_norm": 0.668505680679466, "learning_rate": 9.093268450932684e-06, "loss": 0.689, "step": 20236 }, { "epoch": 0.5908440629470673, "grad_norm": 0.5812800831159096, "learning_rate": 9.092619626926196e-06, "loss": 0.5121, "step": 20237 }, { "epoch": 0.5908732591749146, "grad_norm": 0.6103555667851617, "learning_rate": 9.091970802919708e-06, "loss": 0.578, "step": 20238 }, { "epoch": 0.590902455402762, "grad_norm": 0.6141165835974204, "learning_rate": 9.09132197891322e-06, "loss": 0.5293, "step": 20239 }, { "epoch": 0.5909316516306093, "grad_norm": 0.6239190456930531, "learning_rate": 9.090673154906733e-06, "loss": 0.6295, "step": 20240 }, { "epoch": 0.5909608478584567, "grad_norm": 0.6341773513141847, "learning_rate": 9.090024330900245e-06, "loss": 0.609, "step": 20241 }, { "epoch": 0.5909900440863041, "grad_norm": 0.7223864704110593, "learning_rate": 9.089375506893757e-06, "loss": 0.6151, "step": 20242 }, { "epoch": 0.5910192403141514, "grad_norm": 0.5890582850715976, "learning_rate": 9.088726682887269e-06, "loss": 0.4927, "step": 20243 }, { "epoch": 0.5910484365419988, "grad_norm": 0.7317285427254833, "learning_rate": 9.088077858880779e-06, "loss": 0.647, "step": 20244 }, { "epoch": 0.5910776327698462, "grad_norm": 0.6838045932581522, "learning_rate": 9.087429034874291e-06, "loss": 0.6629, "step": 20245 }, { "epoch": 0.5911068289976935, "grad_norm": 0.6361985880125862, "learning_rate": 9.086780210867803e-06, "loss": 0.5676, "step": 20246 }, { "epoch": 0.5911360252255409, "grad_norm": 0.5929737610493332, "learning_rate": 9.086131386861315e-06, "loss": 0.5086, "step": 20247 }, { "epoch": 0.5911652214533882, "grad_norm": 0.7048762764651134, "learning_rate": 9.085482562854825e-06, "loss": 0.6974, "step": 20248 }, { "epoch": 0.5911944176812356, "grad_norm": 0.6351049756859852, "learning_rate": 9.084833738848337e-06, "loss": 0.5979, "step": 20249 }, { "epoch": 0.591223613909083, "grad_norm": 0.6501263142604314, "learning_rate": 9.084184914841851e-06, "loss": 0.5958, "step": 20250 }, { "epoch": 0.5912528101369303, "grad_norm": 0.5940534259067636, "learning_rate": 9.083536090835361e-06, "loss": 0.5448, "step": 20251 }, { "epoch": 0.5912820063647777, "grad_norm": 0.7123979866048018, "learning_rate": 9.082887266828873e-06, "loss": 0.6625, "step": 20252 }, { "epoch": 0.591311202592625, "grad_norm": 0.6535515701593618, "learning_rate": 9.082238442822386e-06, "loss": 0.6023, "step": 20253 }, { "epoch": 0.5913403988204724, "grad_norm": 0.5926564110091115, "learning_rate": 9.081589618815898e-06, "loss": 0.5629, "step": 20254 }, { "epoch": 0.5913695950483198, "grad_norm": 0.6569968141967378, "learning_rate": 9.080940794809408e-06, "loss": 0.5963, "step": 20255 }, { "epoch": 0.5913987912761671, "grad_norm": 0.6750303783958335, "learning_rate": 9.08029197080292e-06, "loss": 0.6266, "step": 20256 }, { "epoch": 0.5914279875040145, "grad_norm": 0.6116155198521105, "learning_rate": 9.079643146796432e-06, "loss": 0.5694, "step": 20257 }, { "epoch": 0.5914571837318618, "grad_norm": 0.6189254921631294, "learning_rate": 9.078994322789944e-06, "loss": 0.5553, "step": 20258 }, { "epoch": 0.5914863799597092, "grad_norm": 0.6980783425007383, "learning_rate": 9.078345498783456e-06, "loss": 0.6306, "step": 20259 }, { "epoch": 0.5915155761875566, "grad_norm": 0.6178355231908331, "learning_rate": 9.077696674776968e-06, "loss": 0.5865, "step": 20260 }, { "epoch": 0.5915447724154039, "grad_norm": 0.5929797390954422, "learning_rate": 9.07704785077048e-06, "loss": 0.521, "step": 20261 }, { "epoch": 0.5915739686432513, "grad_norm": 0.5803041840512142, "learning_rate": 9.07639902676399e-06, "loss": 0.5251, "step": 20262 }, { "epoch": 0.5916031648710987, "grad_norm": 0.647883763400413, "learning_rate": 9.075750202757502e-06, "loss": 0.5882, "step": 20263 }, { "epoch": 0.591632361098946, "grad_norm": 0.7098169364978975, "learning_rate": 9.075101378751014e-06, "loss": 0.7407, "step": 20264 }, { "epoch": 0.5916615573267934, "grad_norm": 0.604668960884762, "learning_rate": 9.074452554744526e-06, "loss": 0.5594, "step": 20265 }, { "epoch": 0.5916907535546407, "grad_norm": 0.644186835096473, "learning_rate": 9.073803730738038e-06, "loss": 0.6245, "step": 20266 }, { "epoch": 0.5917199497824881, "grad_norm": 0.6215934109205291, "learning_rate": 9.073154906731549e-06, "loss": 0.552, "step": 20267 }, { "epoch": 0.5917491460103355, "grad_norm": 0.6723521496486766, "learning_rate": 9.07250608272506e-06, "loss": 0.6367, "step": 20268 }, { "epoch": 0.5917783422381828, "grad_norm": 0.6001075080387179, "learning_rate": 9.071857258718575e-06, "loss": 0.5421, "step": 20269 }, { "epoch": 0.5918075384660302, "grad_norm": 0.6262619304017923, "learning_rate": 9.071208434712085e-06, "loss": 0.5337, "step": 20270 }, { "epoch": 0.5918367346938775, "grad_norm": 0.6916742649473653, "learning_rate": 9.070559610705597e-06, "loss": 0.6917, "step": 20271 }, { "epoch": 0.5918659309217249, "grad_norm": 0.632359268678756, "learning_rate": 9.069910786699109e-06, "loss": 0.6151, "step": 20272 }, { "epoch": 0.5918951271495723, "grad_norm": 0.6288673813645883, "learning_rate": 9.069261962692621e-06, "loss": 0.5888, "step": 20273 }, { "epoch": 0.5919243233774196, "grad_norm": 0.631298256508935, "learning_rate": 9.068613138686131e-06, "loss": 0.5892, "step": 20274 }, { "epoch": 0.591953519605267, "grad_norm": 0.6684741500339837, "learning_rate": 9.067964314679643e-06, "loss": 0.6667, "step": 20275 }, { "epoch": 0.5919827158331143, "grad_norm": 0.5939712021720795, "learning_rate": 9.067315490673155e-06, "loss": 0.5494, "step": 20276 }, { "epoch": 0.5920119120609617, "grad_norm": 0.6260606687252285, "learning_rate": 9.066666666666667e-06, "loss": 0.5695, "step": 20277 }, { "epoch": 0.5920411082888091, "grad_norm": 0.5790046780194723, "learning_rate": 9.06601784266018e-06, "loss": 0.4846, "step": 20278 }, { "epoch": 0.5920703045166564, "grad_norm": 0.6727010495587882, "learning_rate": 9.065369018653691e-06, "loss": 0.6589, "step": 20279 }, { "epoch": 0.5920995007445038, "grad_norm": 0.6157283965880707, "learning_rate": 9.064720194647203e-06, "loss": 0.5561, "step": 20280 }, { "epoch": 0.5921286969723512, "grad_norm": 0.6662750132044235, "learning_rate": 9.064071370640714e-06, "loss": 0.5962, "step": 20281 }, { "epoch": 0.5921578932001985, "grad_norm": 0.6444316151958808, "learning_rate": 9.063422546634226e-06, "loss": 0.6212, "step": 20282 }, { "epoch": 0.5921870894280459, "grad_norm": 0.6919416526704666, "learning_rate": 9.062773722627738e-06, "loss": 0.7021, "step": 20283 }, { "epoch": 0.5922162856558932, "grad_norm": 0.6660416233600396, "learning_rate": 9.06212489862125e-06, "loss": 0.6604, "step": 20284 }, { "epoch": 0.5922454818837406, "grad_norm": 0.5803378441854536, "learning_rate": 9.061476074614762e-06, "loss": 0.5541, "step": 20285 }, { "epoch": 0.592274678111588, "grad_norm": 0.7329857665546581, "learning_rate": 9.060827250608272e-06, "loss": 0.6585, "step": 20286 }, { "epoch": 0.5923038743394353, "grad_norm": 0.6935898470730931, "learning_rate": 9.060178426601784e-06, "loss": 0.6326, "step": 20287 }, { "epoch": 0.5923330705672827, "grad_norm": 0.7237163003230943, "learning_rate": 9.059529602595298e-06, "loss": 0.6636, "step": 20288 }, { "epoch": 0.59236226679513, "grad_norm": 0.661116748876484, "learning_rate": 9.058880778588808e-06, "loss": 0.6377, "step": 20289 }, { "epoch": 0.5923914630229774, "grad_norm": 0.6388206729972564, "learning_rate": 9.05823195458232e-06, "loss": 0.6127, "step": 20290 }, { "epoch": 0.5924206592508248, "grad_norm": 0.6158729909578639, "learning_rate": 9.057583130575832e-06, "loss": 0.5673, "step": 20291 }, { "epoch": 0.5924498554786721, "grad_norm": 0.6222712928588777, "learning_rate": 9.056934306569344e-06, "loss": 0.6095, "step": 20292 }, { "epoch": 0.5924790517065195, "grad_norm": 0.6170767373500795, "learning_rate": 9.056285482562855e-06, "loss": 0.5706, "step": 20293 }, { "epoch": 0.5925082479343668, "grad_norm": 0.8267653463590061, "learning_rate": 9.055636658556367e-06, "loss": 0.6522, "step": 20294 }, { "epoch": 0.5925374441622142, "grad_norm": 0.6376688914617626, "learning_rate": 9.054987834549879e-06, "loss": 0.555, "step": 20295 }, { "epoch": 0.5925666403900616, "grad_norm": 0.6582978678256367, "learning_rate": 9.05433901054339e-06, "loss": 0.6032, "step": 20296 }, { "epoch": 0.5925958366179089, "grad_norm": 0.6524609097909676, "learning_rate": 9.053690186536903e-06, "loss": 0.6464, "step": 20297 }, { "epoch": 0.5926250328457563, "grad_norm": 0.5956296730011232, "learning_rate": 9.053041362530415e-06, "loss": 0.5397, "step": 20298 }, { "epoch": 0.5926542290736037, "grad_norm": 0.6233198192357488, "learning_rate": 9.052392538523927e-06, "loss": 0.5489, "step": 20299 }, { "epoch": 0.592683425301451, "grad_norm": 0.5953696026533952, "learning_rate": 9.051743714517437e-06, "loss": 0.5561, "step": 20300 }, { "epoch": 0.5927126215292984, "grad_norm": 0.6248585807042686, "learning_rate": 9.05109489051095e-06, "loss": 0.5268, "step": 20301 }, { "epoch": 0.5927418177571457, "grad_norm": 0.6867616381704174, "learning_rate": 9.050446066504461e-06, "loss": 0.5929, "step": 20302 }, { "epoch": 0.5927710139849931, "grad_norm": 0.9106449838785787, "learning_rate": 9.049797242497973e-06, "loss": 0.7047, "step": 20303 }, { "epoch": 0.5928002102128405, "grad_norm": 0.6546929048439762, "learning_rate": 9.049148418491485e-06, "loss": 0.6451, "step": 20304 }, { "epoch": 0.5928294064406878, "grad_norm": 0.6299052191198414, "learning_rate": 9.048499594484996e-06, "loss": 0.5534, "step": 20305 }, { "epoch": 0.5928586026685352, "grad_norm": 0.6689715667788996, "learning_rate": 9.047850770478508e-06, "loss": 0.6303, "step": 20306 }, { "epoch": 0.5928877988963825, "grad_norm": 0.6769810602220305, "learning_rate": 9.047201946472021e-06, "loss": 0.6482, "step": 20307 }, { "epoch": 0.5929169951242299, "grad_norm": 0.6224733761189322, "learning_rate": 9.046553122465532e-06, "loss": 0.5807, "step": 20308 }, { "epoch": 0.5929461913520773, "grad_norm": 0.6088597900684019, "learning_rate": 9.045904298459044e-06, "loss": 0.5683, "step": 20309 }, { "epoch": 0.5929753875799246, "grad_norm": 0.6559269433652387, "learning_rate": 9.045255474452556e-06, "loss": 0.5709, "step": 20310 }, { "epoch": 0.593004583807772, "grad_norm": 0.6780786707595077, "learning_rate": 9.044606650446068e-06, "loss": 0.6065, "step": 20311 }, { "epoch": 0.5930337800356194, "grad_norm": 0.6542343986409918, "learning_rate": 9.043957826439578e-06, "loss": 0.6291, "step": 20312 }, { "epoch": 0.5930629762634667, "grad_norm": 0.7224512648976877, "learning_rate": 9.04330900243309e-06, "loss": 0.5978, "step": 20313 }, { "epoch": 0.5930921724913141, "grad_norm": 0.6470862862290921, "learning_rate": 9.042660178426602e-06, "loss": 0.595, "step": 20314 }, { "epoch": 0.5931213687191614, "grad_norm": 0.5925206092807079, "learning_rate": 9.042011354420114e-06, "loss": 0.5168, "step": 20315 }, { "epoch": 0.5931505649470088, "grad_norm": 0.6302677453447821, "learning_rate": 9.041362530413626e-06, "loss": 0.5407, "step": 20316 }, { "epoch": 0.5931797611748562, "grad_norm": 0.6510036337255293, "learning_rate": 9.040713706407138e-06, "loss": 0.6626, "step": 20317 }, { "epoch": 0.5932089574027035, "grad_norm": 0.642959076463461, "learning_rate": 9.04006488240065e-06, "loss": 0.6065, "step": 20318 }, { "epoch": 0.5932381536305509, "grad_norm": 0.7857366191670605, "learning_rate": 9.03941605839416e-06, "loss": 0.7042, "step": 20319 }, { "epoch": 0.5932673498583982, "grad_norm": 0.6245228854722169, "learning_rate": 9.038767234387673e-06, "loss": 0.5798, "step": 20320 }, { "epoch": 0.5932965460862456, "grad_norm": 0.644080042150312, "learning_rate": 9.038118410381185e-06, "loss": 0.6502, "step": 20321 }, { "epoch": 0.5933257423140931, "grad_norm": 0.6378774562326164, "learning_rate": 9.037469586374697e-06, "loss": 0.6221, "step": 20322 }, { "epoch": 0.5933549385419404, "grad_norm": 0.6178526574807776, "learning_rate": 9.036820762368209e-06, "loss": 0.6028, "step": 20323 }, { "epoch": 0.5933841347697878, "grad_norm": 0.6303630521019298, "learning_rate": 9.036171938361719e-06, "loss": 0.554, "step": 20324 }, { "epoch": 0.5934133309976352, "grad_norm": 0.9455600593158678, "learning_rate": 9.035523114355231e-06, "loss": 0.7514, "step": 20325 }, { "epoch": 0.5934425272254825, "grad_norm": 0.6892389478741662, "learning_rate": 9.034874290348745e-06, "loss": 0.6659, "step": 20326 }, { "epoch": 0.5934717234533299, "grad_norm": 0.5808607809836321, "learning_rate": 9.034225466342255e-06, "loss": 0.534, "step": 20327 }, { "epoch": 0.5935009196811772, "grad_norm": 0.6691147338167908, "learning_rate": 9.033576642335767e-06, "loss": 0.6251, "step": 20328 }, { "epoch": 0.5935301159090246, "grad_norm": 0.6453084829686345, "learning_rate": 9.03292781832928e-06, "loss": 0.5793, "step": 20329 }, { "epoch": 0.593559312136872, "grad_norm": 0.627576323074314, "learning_rate": 9.032278994322791e-06, "loss": 0.6114, "step": 20330 }, { "epoch": 0.5935885083647193, "grad_norm": 0.6442856878604801, "learning_rate": 9.031630170316302e-06, "loss": 0.5844, "step": 20331 }, { "epoch": 0.5936177045925667, "grad_norm": 0.6166551846956092, "learning_rate": 9.030981346309814e-06, "loss": 0.5432, "step": 20332 }, { "epoch": 0.593646900820414, "grad_norm": 0.7185198616200039, "learning_rate": 9.030332522303326e-06, "loss": 0.6504, "step": 20333 }, { "epoch": 0.5936760970482614, "grad_norm": 0.6614701077121149, "learning_rate": 9.029683698296838e-06, "loss": 0.6313, "step": 20334 }, { "epoch": 0.5937052932761088, "grad_norm": 0.653568912852122, "learning_rate": 9.02903487429035e-06, "loss": 0.6265, "step": 20335 }, { "epoch": 0.5937344895039561, "grad_norm": 0.6519000268141936, "learning_rate": 9.028386050283862e-06, "loss": 0.6239, "step": 20336 }, { "epoch": 0.5937636857318035, "grad_norm": 0.6683016081385547, "learning_rate": 9.027737226277374e-06, "loss": 0.6213, "step": 20337 }, { "epoch": 0.5937928819596509, "grad_norm": 0.6599741152607235, "learning_rate": 9.027088402270884e-06, "loss": 0.6382, "step": 20338 }, { "epoch": 0.5938220781874982, "grad_norm": 0.6563247941954531, "learning_rate": 9.026439578264396e-06, "loss": 0.6651, "step": 20339 }, { "epoch": 0.5938512744153456, "grad_norm": 0.6304649203264666, "learning_rate": 9.025790754257908e-06, "loss": 0.5948, "step": 20340 }, { "epoch": 0.5938804706431929, "grad_norm": 0.6803969572164186, "learning_rate": 9.02514193025142e-06, "loss": 0.6865, "step": 20341 }, { "epoch": 0.5939096668710403, "grad_norm": 0.6513944108557563, "learning_rate": 9.024493106244932e-06, "loss": 0.5515, "step": 20342 }, { "epoch": 0.5939388630988877, "grad_norm": 0.6204791365286998, "learning_rate": 9.023844282238443e-06, "loss": 0.5688, "step": 20343 }, { "epoch": 0.593968059326735, "grad_norm": 0.6394671463318613, "learning_rate": 9.023195458231956e-06, "loss": 0.5973, "step": 20344 }, { "epoch": 0.5939972555545824, "grad_norm": 0.637149696986968, "learning_rate": 9.022546634225468e-06, "loss": 0.6224, "step": 20345 }, { "epoch": 0.5940264517824297, "grad_norm": 0.5905341459330576, "learning_rate": 9.021897810218979e-06, "loss": 0.5611, "step": 20346 }, { "epoch": 0.5940556480102771, "grad_norm": 0.6838462365210762, "learning_rate": 9.02124898621249e-06, "loss": 0.6424, "step": 20347 }, { "epoch": 0.5940848442381245, "grad_norm": 0.5961210491990717, "learning_rate": 9.020600162206003e-06, "loss": 0.5647, "step": 20348 }, { "epoch": 0.5941140404659718, "grad_norm": 0.6212596383652689, "learning_rate": 9.019951338199515e-06, "loss": 0.6046, "step": 20349 }, { "epoch": 0.5941432366938192, "grad_norm": 0.6409787070520788, "learning_rate": 9.019302514193025e-06, "loss": 0.6231, "step": 20350 }, { "epoch": 0.5941724329216665, "grad_norm": 0.6627610134133638, "learning_rate": 9.018653690186537e-06, "loss": 0.6094, "step": 20351 }, { "epoch": 0.5942016291495139, "grad_norm": 0.6497063325683062, "learning_rate": 9.018004866180049e-06, "loss": 0.6211, "step": 20352 }, { "epoch": 0.5942308253773613, "grad_norm": 0.6311040968992647, "learning_rate": 9.017356042173561e-06, "loss": 0.5809, "step": 20353 }, { "epoch": 0.5942600216052086, "grad_norm": 0.639486102547563, "learning_rate": 9.016707218167073e-06, "loss": 0.6182, "step": 20354 }, { "epoch": 0.594289217833056, "grad_norm": 0.6538337521199028, "learning_rate": 9.016058394160585e-06, "loss": 0.6432, "step": 20355 }, { "epoch": 0.5943184140609034, "grad_norm": 0.689448238301212, "learning_rate": 9.015409570154097e-06, "loss": 0.71, "step": 20356 }, { "epoch": 0.5943476102887507, "grad_norm": 0.6404351347191398, "learning_rate": 9.014760746147608e-06, "loss": 0.6333, "step": 20357 }, { "epoch": 0.5943768065165981, "grad_norm": 0.6297498013001919, "learning_rate": 9.01411192214112e-06, "loss": 0.6032, "step": 20358 }, { "epoch": 0.5944060027444454, "grad_norm": 0.6328669757903528, "learning_rate": 9.013463098134632e-06, "loss": 0.5837, "step": 20359 }, { "epoch": 0.5944351989722928, "grad_norm": 0.6297014007562147, "learning_rate": 9.012814274128144e-06, "loss": 0.6151, "step": 20360 }, { "epoch": 0.5944643952001402, "grad_norm": 0.6112737791901521, "learning_rate": 9.012165450121656e-06, "loss": 0.5472, "step": 20361 }, { "epoch": 0.5944935914279875, "grad_norm": 0.5998761302091047, "learning_rate": 9.011516626115166e-06, "loss": 0.5572, "step": 20362 }, { "epoch": 0.5945227876558349, "grad_norm": 0.6064964758149304, "learning_rate": 9.01086780210868e-06, "loss": 0.5585, "step": 20363 }, { "epoch": 0.5945519838836822, "grad_norm": 0.6608540774468336, "learning_rate": 9.010218978102192e-06, "loss": 0.6688, "step": 20364 }, { "epoch": 0.5945811801115296, "grad_norm": 0.6512118671189117, "learning_rate": 9.009570154095702e-06, "loss": 0.6105, "step": 20365 }, { "epoch": 0.594610376339377, "grad_norm": 0.6441684704020898, "learning_rate": 9.008921330089214e-06, "loss": 0.5828, "step": 20366 }, { "epoch": 0.5946395725672243, "grad_norm": 0.643957337935658, "learning_rate": 9.008272506082726e-06, "loss": 0.5652, "step": 20367 }, { "epoch": 0.5946687687950717, "grad_norm": 1.040827684735025, "learning_rate": 9.007623682076238e-06, "loss": 0.7323, "step": 20368 }, { "epoch": 0.594697965022919, "grad_norm": 0.6376988573035685, "learning_rate": 9.006974858069748e-06, "loss": 0.6471, "step": 20369 }, { "epoch": 0.5947271612507664, "grad_norm": 0.6568186220473002, "learning_rate": 9.00632603406326e-06, "loss": 0.6485, "step": 20370 }, { "epoch": 0.5947563574786138, "grad_norm": 0.6924476587457696, "learning_rate": 9.005677210056773e-06, "loss": 0.6698, "step": 20371 }, { "epoch": 0.5947855537064611, "grad_norm": 0.6854602064152603, "learning_rate": 9.005028386050285e-06, "loss": 0.6618, "step": 20372 }, { "epoch": 0.5948147499343085, "grad_norm": 0.6275890140659138, "learning_rate": 9.004379562043797e-06, "loss": 0.6146, "step": 20373 }, { "epoch": 0.5948439461621559, "grad_norm": 0.6816145694186491, "learning_rate": 9.003730738037309e-06, "loss": 0.6556, "step": 20374 }, { "epoch": 0.5948731423900032, "grad_norm": 0.5932143049878501, "learning_rate": 9.00308191403082e-06, "loss": 0.5463, "step": 20375 }, { "epoch": 0.5949023386178506, "grad_norm": 0.6569599682792979, "learning_rate": 9.002433090024331e-06, "loss": 0.6058, "step": 20376 }, { "epoch": 0.5949315348456979, "grad_norm": 0.76082424622695, "learning_rate": 9.001784266017843e-06, "loss": 0.5549, "step": 20377 }, { "epoch": 0.5949607310735453, "grad_norm": 0.6692950674088289, "learning_rate": 9.001135442011355e-06, "loss": 0.6522, "step": 20378 }, { "epoch": 0.5949899273013927, "grad_norm": 0.6566001493823151, "learning_rate": 9.000486618004867e-06, "loss": 0.6167, "step": 20379 }, { "epoch": 0.59501912352924, "grad_norm": 0.6650586400695561, "learning_rate": 8.999837793998379e-06, "loss": 0.6345, "step": 20380 }, { "epoch": 0.5950483197570874, "grad_norm": 0.6768414328279447, "learning_rate": 8.99918896999189e-06, "loss": 0.6402, "step": 20381 }, { "epoch": 0.5950775159849347, "grad_norm": 0.6340944987951632, "learning_rate": 8.998540145985403e-06, "loss": 0.5855, "step": 20382 }, { "epoch": 0.5951067122127821, "grad_norm": 0.6367168209899722, "learning_rate": 8.997891321978915e-06, "loss": 0.5945, "step": 20383 }, { "epoch": 0.5951359084406295, "grad_norm": 0.649486714175343, "learning_rate": 8.997242497972425e-06, "loss": 0.6199, "step": 20384 }, { "epoch": 0.5951651046684768, "grad_norm": 0.6045202321108093, "learning_rate": 8.996593673965938e-06, "loss": 0.5287, "step": 20385 }, { "epoch": 0.5951943008963242, "grad_norm": 0.6570428717545906, "learning_rate": 8.99594484995945e-06, "loss": 0.6108, "step": 20386 }, { "epoch": 0.5952234971241716, "grad_norm": 0.6731222257518475, "learning_rate": 8.995296025952962e-06, "loss": 0.6247, "step": 20387 }, { "epoch": 0.5952526933520189, "grad_norm": 0.6575842191074295, "learning_rate": 8.994647201946472e-06, "loss": 0.6015, "step": 20388 }, { "epoch": 0.5952818895798663, "grad_norm": 0.6182073962291013, "learning_rate": 8.993998377939984e-06, "loss": 0.5254, "step": 20389 }, { "epoch": 0.5953110858077136, "grad_norm": 0.6379580352184436, "learning_rate": 8.993349553933496e-06, "loss": 0.5915, "step": 20390 }, { "epoch": 0.595340282035561, "grad_norm": 0.6079595881270201, "learning_rate": 8.992700729927008e-06, "loss": 0.5802, "step": 20391 }, { "epoch": 0.5953694782634084, "grad_norm": 0.6546998446915905, "learning_rate": 8.99205190592052e-06, "loss": 0.6311, "step": 20392 }, { "epoch": 0.5953986744912557, "grad_norm": 0.6706295795099326, "learning_rate": 8.991403081914032e-06, "loss": 0.6751, "step": 20393 }, { "epoch": 0.5954278707191031, "grad_norm": 0.6638526904371883, "learning_rate": 8.990754257907544e-06, "loss": 0.646, "step": 20394 }, { "epoch": 0.5954570669469504, "grad_norm": 0.683811415895139, "learning_rate": 8.990105433901054e-06, "loss": 0.6413, "step": 20395 }, { "epoch": 0.5954862631747978, "grad_norm": 0.6628099236769638, "learning_rate": 8.989456609894566e-06, "loss": 0.6342, "step": 20396 }, { "epoch": 0.5955154594026452, "grad_norm": 0.5881885186745837, "learning_rate": 8.988807785888078e-06, "loss": 0.5291, "step": 20397 }, { "epoch": 0.5955446556304925, "grad_norm": 0.6650251633474924, "learning_rate": 8.98815896188159e-06, "loss": 0.6086, "step": 20398 }, { "epoch": 0.5955738518583399, "grad_norm": 0.639507727200238, "learning_rate": 8.987510137875103e-06, "loss": 0.6007, "step": 20399 }, { "epoch": 0.5956030480861872, "grad_norm": 0.7231451645657746, "learning_rate": 8.986861313868613e-06, "loss": 0.7444, "step": 20400 }, { "epoch": 0.5956322443140346, "grad_norm": 0.6662369393027641, "learning_rate": 8.986212489862127e-06, "loss": 0.5833, "step": 20401 }, { "epoch": 0.595661440541882, "grad_norm": 0.7020134535664715, "learning_rate": 8.985563665855639e-06, "loss": 0.7154, "step": 20402 }, { "epoch": 0.5956906367697293, "grad_norm": 0.6084465188361972, "learning_rate": 8.984914841849149e-06, "loss": 0.5233, "step": 20403 }, { "epoch": 0.5957198329975767, "grad_norm": 0.6913010736662327, "learning_rate": 8.984266017842661e-06, "loss": 0.6463, "step": 20404 }, { "epoch": 0.595749029225424, "grad_norm": 0.6731663308808884, "learning_rate": 8.983617193836173e-06, "loss": 0.682, "step": 20405 }, { "epoch": 0.5957782254532714, "grad_norm": 0.6959765292340832, "learning_rate": 8.982968369829685e-06, "loss": 0.6927, "step": 20406 }, { "epoch": 0.5958074216811188, "grad_norm": 0.6705359526952148, "learning_rate": 8.982319545823195e-06, "loss": 0.6415, "step": 20407 }, { "epoch": 0.5958366179089661, "grad_norm": 0.685482461170579, "learning_rate": 8.981670721816707e-06, "loss": 0.5987, "step": 20408 }, { "epoch": 0.5958658141368135, "grad_norm": 0.6368222325660111, "learning_rate": 8.98102189781022e-06, "loss": 0.5608, "step": 20409 }, { "epoch": 0.5958950103646609, "grad_norm": 0.6740959421490671, "learning_rate": 8.980373073803731e-06, "loss": 0.7351, "step": 20410 }, { "epoch": 0.5959242065925082, "grad_norm": 0.6224976577139996, "learning_rate": 8.979724249797243e-06, "loss": 0.5871, "step": 20411 }, { "epoch": 0.5959534028203556, "grad_norm": 0.6746375176794001, "learning_rate": 8.979075425790755e-06, "loss": 0.5996, "step": 20412 }, { "epoch": 0.5959825990482029, "grad_norm": 0.6136651095586966, "learning_rate": 8.978426601784268e-06, "loss": 0.505, "step": 20413 }, { "epoch": 0.5960117952760503, "grad_norm": 0.6828671580892637, "learning_rate": 8.977777777777778e-06, "loss": 0.7222, "step": 20414 }, { "epoch": 0.5960409915038977, "grad_norm": 0.6484083310270974, "learning_rate": 8.97712895377129e-06, "loss": 0.6192, "step": 20415 }, { "epoch": 0.596070187731745, "grad_norm": 0.5998520598128341, "learning_rate": 8.976480129764802e-06, "loss": 0.5314, "step": 20416 }, { "epoch": 0.5960993839595924, "grad_norm": 0.6447248432557086, "learning_rate": 8.975831305758314e-06, "loss": 0.6153, "step": 20417 }, { "epoch": 0.5961285801874397, "grad_norm": 0.6672650233278262, "learning_rate": 8.975182481751826e-06, "loss": 0.5535, "step": 20418 }, { "epoch": 0.5961577764152871, "grad_norm": 0.6618938722543671, "learning_rate": 8.974533657745336e-06, "loss": 0.6833, "step": 20419 }, { "epoch": 0.5961869726431345, "grad_norm": 0.6988340080804248, "learning_rate": 8.97388483373885e-06, "loss": 0.6559, "step": 20420 }, { "epoch": 0.5962161688709818, "grad_norm": 0.6963511580354271, "learning_rate": 8.973236009732362e-06, "loss": 0.669, "step": 20421 }, { "epoch": 0.5962453650988292, "grad_norm": 0.6561409682670828, "learning_rate": 8.972587185725872e-06, "loss": 0.6098, "step": 20422 }, { "epoch": 0.5962745613266766, "grad_norm": 0.6058598584096218, "learning_rate": 8.971938361719384e-06, "loss": 0.5283, "step": 20423 }, { "epoch": 0.5963037575545239, "grad_norm": 0.6329419962202723, "learning_rate": 8.971289537712896e-06, "loss": 0.6106, "step": 20424 }, { "epoch": 0.5963329537823713, "grad_norm": 0.6635031836726137, "learning_rate": 8.970640713706408e-06, "loss": 0.6292, "step": 20425 }, { "epoch": 0.5963621500102186, "grad_norm": 0.6629883665002986, "learning_rate": 8.969991889699919e-06, "loss": 0.6396, "step": 20426 }, { "epoch": 0.596391346238066, "grad_norm": 0.65728912947861, "learning_rate": 8.96934306569343e-06, "loss": 0.6044, "step": 20427 }, { "epoch": 0.5964205424659134, "grad_norm": 0.6315676990187191, "learning_rate": 8.968694241686943e-06, "loss": 0.628, "step": 20428 }, { "epoch": 0.5964497386937607, "grad_norm": 0.6172516088053431, "learning_rate": 8.968045417680455e-06, "loss": 0.5951, "step": 20429 }, { "epoch": 0.5964789349216081, "grad_norm": 0.6630611770522375, "learning_rate": 8.967396593673967e-06, "loss": 0.6706, "step": 20430 }, { "epoch": 0.5965081311494554, "grad_norm": 0.6231834741606239, "learning_rate": 8.966747769667479e-06, "loss": 0.5855, "step": 20431 }, { "epoch": 0.5965373273773028, "grad_norm": 0.6497459655405405, "learning_rate": 8.966098945660991e-06, "loss": 0.6176, "step": 20432 }, { "epoch": 0.5965665236051502, "grad_norm": 0.6023898590430278, "learning_rate": 8.965450121654501e-06, "loss": 0.5555, "step": 20433 }, { "epoch": 0.5965957198329975, "grad_norm": 0.6647634143724548, "learning_rate": 8.964801297648013e-06, "loss": 0.6557, "step": 20434 }, { "epoch": 0.5966249160608449, "grad_norm": 0.6356299816271742, "learning_rate": 8.964152473641525e-06, "loss": 0.617, "step": 20435 }, { "epoch": 0.5966541122886923, "grad_norm": 0.6851389129687754, "learning_rate": 8.963503649635037e-06, "loss": 0.6852, "step": 20436 }, { "epoch": 0.5966833085165396, "grad_norm": 0.6407317476771724, "learning_rate": 8.96285482562855e-06, "loss": 0.6352, "step": 20437 }, { "epoch": 0.596712504744387, "grad_norm": 0.636517012425266, "learning_rate": 8.96220600162206e-06, "loss": 0.6025, "step": 20438 }, { "epoch": 0.5967417009722343, "grad_norm": 0.6424676038194582, "learning_rate": 8.961557177615573e-06, "loss": 0.6283, "step": 20439 }, { "epoch": 0.5967708972000817, "grad_norm": 0.6605959732863631, "learning_rate": 8.960908353609085e-06, "loss": 0.667, "step": 20440 }, { "epoch": 0.5968000934279291, "grad_norm": 0.5956628539763789, "learning_rate": 8.960259529602596e-06, "loss": 0.5207, "step": 20441 }, { "epoch": 0.5968292896557764, "grad_norm": 0.6341532845018913, "learning_rate": 8.959610705596108e-06, "loss": 0.6142, "step": 20442 }, { "epoch": 0.5968584858836239, "grad_norm": 0.6389810231516423, "learning_rate": 8.95896188158962e-06, "loss": 0.6327, "step": 20443 }, { "epoch": 0.5968876821114713, "grad_norm": 0.6887772183482177, "learning_rate": 8.958313057583132e-06, "loss": 0.6912, "step": 20444 }, { "epoch": 0.5969168783393186, "grad_norm": 0.6560849542874754, "learning_rate": 8.957664233576642e-06, "loss": 0.639, "step": 20445 }, { "epoch": 0.596946074567166, "grad_norm": 0.6692143920076249, "learning_rate": 8.957015409570154e-06, "loss": 0.6696, "step": 20446 }, { "epoch": 0.5969752707950133, "grad_norm": 0.5932079954372984, "learning_rate": 8.956366585563666e-06, "loss": 0.57, "step": 20447 }, { "epoch": 0.5970044670228607, "grad_norm": 0.6450977039166226, "learning_rate": 8.955717761557178e-06, "loss": 0.6628, "step": 20448 }, { "epoch": 0.5970336632507081, "grad_norm": 0.6100209510564433, "learning_rate": 8.95506893755069e-06, "loss": 0.5371, "step": 20449 }, { "epoch": 0.5970628594785554, "grad_norm": 0.6416560740286451, "learning_rate": 8.954420113544202e-06, "loss": 0.6208, "step": 20450 }, { "epoch": 0.5970920557064028, "grad_norm": 0.6757627734219044, "learning_rate": 8.953771289537714e-06, "loss": 0.6678, "step": 20451 }, { "epoch": 0.5971212519342501, "grad_norm": 0.6198157414055797, "learning_rate": 8.953122465531225e-06, "loss": 0.5834, "step": 20452 }, { "epoch": 0.5971504481620975, "grad_norm": 0.5973843276466073, "learning_rate": 8.952473641524737e-06, "loss": 0.5325, "step": 20453 }, { "epoch": 0.5971796443899449, "grad_norm": 0.5566576012219724, "learning_rate": 8.951824817518249e-06, "loss": 0.4765, "step": 20454 }, { "epoch": 0.5972088406177922, "grad_norm": 0.649753672469761, "learning_rate": 8.95117599351176e-06, "loss": 0.5915, "step": 20455 }, { "epoch": 0.5972380368456396, "grad_norm": 0.6855645226334399, "learning_rate": 8.950527169505273e-06, "loss": 0.6596, "step": 20456 }, { "epoch": 0.597267233073487, "grad_norm": 0.6773907871948288, "learning_rate": 8.949878345498783e-06, "loss": 0.5928, "step": 20457 }, { "epoch": 0.5972964293013343, "grad_norm": 0.6668926725972234, "learning_rate": 8.949229521492297e-06, "loss": 0.6259, "step": 20458 }, { "epoch": 0.5973256255291817, "grad_norm": 0.6387867591267814, "learning_rate": 8.948580697485807e-06, "loss": 0.6057, "step": 20459 }, { "epoch": 0.597354821757029, "grad_norm": 0.6414882557340014, "learning_rate": 8.94793187347932e-06, "loss": 0.6115, "step": 20460 }, { "epoch": 0.5973840179848764, "grad_norm": 0.6758608010529369, "learning_rate": 8.947283049472831e-06, "loss": 0.6748, "step": 20461 }, { "epoch": 0.5974132142127238, "grad_norm": 0.6508120532153238, "learning_rate": 8.946634225466343e-06, "loss": 0.6388, "step": 20462 }, { "epoch": 0.5974424104405711, "grad_norm": 0.6670044400560594, "learning_rate": 8.945985401459855e-06, "loss": 0.6365, "step": 20463 }, { "epoch": 0.5974716066684185, "grad_norm": 0.6815160036658364, "learning_rate": 8.945336577453366e-06, "loss": 0.6187, "step": 20464 }, { "epoch": 0.5975008028962658, "grad_norm": 0.6360938770502825, "learning_rate": 8.944687753446878e-06, "loss": 0.569, "step": 20465 }, { "epoch": 0.5975299991241132, "grad_norm": 0.6342332557295439, "learning_rate": 8.94403892944039e-06, "loss": 0.5921, "step": 20466 }, { "epoch": 0.5975591953519606, "grad_norm": 0.60695467968696, "learning_rate": 8.943390105433902e-06, "loss": 0.5416, "step": 20467 }, { "epoch": 0.5975883915798079, "grad_norm": 0.655014764250171, "learning_rate": 8.942741281427414e-06, "loss": 0.5983, "step": 20468 }, { "epoch": 0.5976175878076553, "grad_norm": 0.6088523091744824, "learning_rate": 8.942092457420926e-06, "loss": 0.5836, "step": 20469 }, { "epoch": 0.5976467840355026, "grad_norm": 0.6726160417485906, "learning_rate": 8.941443633414438e-06, "loss": 0.6496, "step": 20470 }, { "epoch": 0.59767598026335, "grad_norm": 0.6892056209513169, "learning_rate": 8.940794809407948e-06, "loss": 0.6479, "step": 20471 }, { "epoch": 0.5977051764911974, "grad_norm": 0.6436605989282405, "learning_rate": 8.94014598540146e-06, "loss": 0.5634, "step": 20472 }, { "epoch": 0.5977343727190447, "grad_norm": 0.6483579746995786, "learning_rate": 8.939497161394972e-06, "loss": 0.6166, "step": 20473 }, { "epoch": 0.5977635689468921, "grad_norm": 0.6786053392130781, "learning_rate": 8.938848337388484e-06, "loss": 0.6341, "step": 20474 }, { "epoch": 0.5977927651747394, "grad_norm": 0.6666617401182237, "learning_rate": 8.938199513381996e-06, "loss": 0.6427, "step": 20475 }, { "epoch": 0.5978219614025868, "grad_norm": 0.6639342555230763, "learning_rate": 8.937550689375507e-06, "loss": 0.6624, "step": 20476 }, { "epoch": 0.5978511576304342, "grad_norm": 0.595210772814219, "learning_rate": 8.93690186536902e-06, "loss": 0.5257, "step": 20477 }, { "epoch": 0.5978803538582815, "grad_norm": 0.8304767796090735, "learning_rate": 8.93625304136253e-06, "loss": 0.649, "step": 20478 }, { "epoch": 0.5979095500861289, "grad_norm": 0.6490932724087837, "learning_rate": 8.935604217356043e-06, "loss": 0.6216, "step": 20479 }, { "epoch": 0.5979387463139763, "grad_norm": 0.5707134240057531, "learning_rate": 8.934955393349555e-06, "loss": 0.5477, "step": 20480 }, { "epoch": 0.5979679425418236, "grad_norm": 1.0238954833014988, "learning_rate": 8.934306569343067e-06, "loss": 0.632, "step": 20481 }, { "epoch": 0.597997138769671, "grad_norm": 0.649513093082812, "learning_rate": 8.933657745336579e-06, "loss": 0.5976, "step": 20482 }, { "epoch": 0.5980263349975183, "grad_norm": 0.6452972919406665, "learning_rate": 8.933008921330089e-06, "loss": 0.6248, "step": 20483 }, { "epoch": 0.5980555312253657, "grad_norm": 0.6283453422870237, "learning_rate": 8.932360097323601e-06, "loss": 0.5577, "step": 20484 }, { "epoch": 0.5980847274532131, "grad_norm": 0.6232481258389106, "learning_rate": 8.931711273317113e-06, "loss": 0.578, "step": 20485 }, { "epoch": 0.5981139236810604, "grad_norm": 0.6530704700519394, "learning_rate": 8.931062449310625e-06, "loss": 0.6477, "step": 20486 }, { "epoch": 0.5981431199089078, "grad_norm": 0.6236505560613009, "learning_rate": 8.930413625304137e-06, "loss": 0.5303, "step": 20487 }, { "epoch": 0.5981723161367551, "grad_norm": 0.6360236487440574, "learning_rate": 8.92976480129765e-06, "loss": 0.6138, "step": 20488 }, { "epoch": 0.5982015123646025, "grad_norm": 0.6697140337489232, "learning_rate": 8.929115977291161e-06, "loss": 0.6401, "step": 20489 }, { "epoch": 0.5982307085924499, "grad_norm": 0.6458182873284917, "learning_rate": 8.928467153284672e-06, "loss": 0.6301, "step": 20490 }, { "epoch": 0.5982599048202972, "grad_norm": 0.6673507768706093, "learning_rate": 8.927818329278184e-06, "loss": 0.6688, "step": 20491 }, { "epoch": 0.5982891010481446, "grad_norm": 0.6773445949060841, "learning_rate": 8.927169505271696e-06, "loss": 0.6271, "step": 20492 }, { "epoch": 0.598318297275992, "grad_norm": 0.6145317531922939, "learning_rate": 8.926520681265208e-06, "loss": 0.5231, "step": 20493 }, { "epoch": 0.5983474935038393, "grad_norm": 0.6200855805514746, "learning_rate": 8.92587185725872e-06, "loss": 0.5619, "step": 20494 }, { "epoch": 0.5983766897316867, "grad_norm": 0.6866858884632729, "learning_rate": 8.925223033252232e-06, "loss": 0.6883, "step": 20495 }, { "epoch": 0.598405885959534, "grad_norm": 0.7090684076830762, "learning_rate": 8.924574209245744e-06, "loss": 0.6629, "step": 20496 }, { "epoch": 0.5984350821873814, "grad_norm": 0.6311828081513619, "learning_rate": 8.923925385239254e-06, "loss": 0.6107, "step": 20497 }, { "epoch": 0.5984642784152288, "grad_norm": 0.6792384396702354, "learning_rate": 8.923276561232766e-06, "loss": 0.6825, "step": 20498 }, { "epoch": 0.5984934746430761, "grad_norm": 0.6473411714748383, "learning_rate": 8.922627737226278e-06, "loss": 0.6192, "step": 20499 }, { "epoch": 0.5985226708709235, "grad_norm": 0.6106279338938354, "learning_rate": 8.92197891321979e-06, "loss": 0.5546, "step": 20500 }, { "epoch": 0.5985518670987708, "grad_norm": 0.6720197803712904, "learning_rate": 8.921330089213302e-06, "loss": 0.6863, "step": 20501 }, { "epoch": 0.5985810633266182, "grad_norm": 0.7159653580196818, "learning_rate": 8.920681265206813e-06, "loss": 0.6345, "step": 20502 }, { "epoch": 0.5986102595544656, "grad_norm": 0.6220258155087315, "learning_rate": 8.920032441200325e-06, "loss": 0.6063, "step": 20503 }, { "epoch": 0.5986394557823129, "grad_norm": 0.6268467708378699, "learning_rate": 8.919383617193837e-06, "loss": 0.5974, "step": 20504 }, { "epoch": 0.5986686520101603, "grad_norm": 0.6506553717071786, "learning_rate": 8.918734793187349e-06, "loss": 0.572, "step": 20505 }, { "epoch": 0.5986978482380076, "grad_norm": 0.6864895278055835, "learning_rate": 8.91808596918086e-06, "loss": 0.6385, "step": 20506 }, { "epoch": 0.598727044465855, "grad_norm": 0.6563751400402839, "learning_rate": 8.917437145174373e-06, "loss": 0.6555, "step": 20507 }, { "epoch": 0.5987562406937024, "grad_norm": 0.6288554275876351, "learning_rate": 8.916788321167885e-06, "loss": 0.6029, "step": 20508 }, { "epoch": 0.5987854369215497, "grad_norm": 0.6493073165894503, "learning_rate": 8.916139497161395e-06, "loss": 0.5923, "step": 20509 }, { "epoch": 0.5988146331493971, "grad_norm": 0.626351720625328, "learning_rate": 8.915490673154907e-06, "loss": 0.5826, "step": 20510 }, { "epoch": 0.5988438293772445, "grad_norm": 0.6598304375249038, "learning_rate": 8.914841849148419e-06, "loss": 0.6171, "step": 20511 }, { "epoch": 0.5988730256050918, "grad_norm": 0.6648322650118315, "learning_rate": 8.914193025141931e-06, "loss": 0.6493, "step": 20512 }, { "epoch": 0.5989022218329392, "grad_norm": 0.6948381399643118, "learning_rate": 8.913544201135441e-06, "loss": 0.7106, "step": 20513 }, { "epoch": 0.5989314180607865, "grad_norm": 0.6126523178489053, "learning_rate": 8.912895377128955e-06, "loss": 0.5619, "step": 20514 }, { "epoch": 0.5989606142886339, "grad_norm": 0.6592886432623781, "learning_rate": 8.912246553122467e-06, "loss": 0.6863, "step": 20515 }, { "epoch": 0.5989898105164813, "grad_norm": 0.6720111441016169, "learning_rate": 8.911597729115978e-06, "loss": 0.6582, "step": 20516 }, { "epoch": 0.5990190067443286, "grad_norm": 0.6891527906484515, "learning_rate": 8.91094890510949e-06, "loss": 0.6827, "step": 20517 }, { "epoch": 0.599048202972176, "grad_norm": 0.6855584909269097, "learning_rate": 8.910300081103002e-06, "loss": 0.6882, "step": 20518 }, { "epoch": 0.5990773992000233, "grad_norm": 0.6438608921643812, "learning_rate": 8.909651257096514e-06, "loss": 0.6242, "step": 20519 }, { "epoch": 0.5991065954278707, "grad_norm": 0.6543582438411831, "learning_rate": 8.909002433090026e-06, "loss": 0.6253, "step": 20520 }, { "epoch": 0.5991357916557181, "grad_norm": 0.6369621530272578, "learning_rate": 8.908353609083536e-06, "loss": 0.5878, "step": 20521 }, { "epoch": 0.5991649878835654, "grad_norm": 0.6644265113568918, "learning_rate": 8.907704785077048e-06, "loss": 0.6044, "step": 20522 }, { "epoch": 0.5991941841114128, "grad_norm": 0.5928696687612585, "learning_rate": 8.90705596107056e-06, "loss": 0.5151, "step": 20523 }, { "epoch": 0.5992233803392601, "grad_norm": 0.6350462527052882, "learning_rate": 8.906407137064072e-06, "loss": 0.6218, "step": 20524 }, { "epoch": 0.5992525765671075, "grad_norm": 0.6533331570640658, "learning_rate": 8.905758313057584e-06, "loss": 0.6176, "step": 20525 }, { "epoch": 0.5992817727949549, "grad_norm": 0.6644378175890048, "learning_rate": 8.905109489051096e-06, "loss": 0.6555, "step": 20526 }, { "epoch": 0.5993109690228022, "grad_norm": 0.682945715668992, "learning_rate": 8.904460665044608e-06, "loss": 0.6609, "step": 20527 }, { "epoch": 0.5993401652506496, "grad_norm": 0.5984212017705507, "learning_rate": 8.903811841038118e-06, "loss": 0.5319, "step": 20528 }, { "epoch": 0.599369361478497, "grad_norm": 0.6462437877266258, "learning_rate": 8.90316301703163e-06, "loss": 0.6415, "step": 20529 }, { "epoch": 0.5993985577063443, "grad_norm": 0.6267740613915814, "learning_rate": 8.902514193025143e-06, "loss": 0.5862, "step": 20530 }, { "epoch": 0.5994277539341917, "grad_norm": 0.618483847253422, "learning_rate": 8.901865369018655e-06, "loss": 0.574, "step": 20531 }, { "epoch": 0.599456950162039, "grad_norm": 0.6669663134297906, "learning_rate": 8.901216545012165e-06, "loss": 0.6219, "step": 20532 }, { "epoch": 0.5994861463898864, "grad_norm": 0.6731350196174664, "learning_rate": 8.900567721005679e-06, "loss": 0.668, "step": 20533 }, { "epoch": 0.5995153426177338, "grad_norm": 0.5781822691756406, "learning_rate": 8.89991889699919e-06, "loss": 0.51, "step": 20534 }, { "epoch": 0.5995445388455811, "grad_norm": 0.6389560391103026, "learning_rate": 8.899270072992701e-06, "loss": 0.6142, "step": 20535 }, { "epoch": 0.5995737350734285, "grad_norm": 0.65273978234631, "learning_rate": 8.898621248986213e-06, "loss": 0.653, "step": 20536 }, { "epoch": 0.5996029313012758, "grad_norm": 0.6486902955855683, "learning_rate": 8.897972424979725e-06, "loss": 0.6312, "step": 20537 }, { "epoch": 0.5996321275291232, "grad_norm": 0.6241713338267814, "learning_rate": 8.897323600973237e-06, "loss": 0.5796, "step": 20538 }, { "epoch": 0.5996613237569706, "grad_norm": 0.6216534186741488, "learning_rate": 8.896674776966749e-06, "loss": 0.5629, "step": 20539 }, { "epoch": 0.5996905199848179, "grad_norm": 0.6357650988116327, "learning_rate": 8.89602595296026e-06, "loss": 0.6183, "step": 20540 }, { "epoch": 0.5997197162126653, "grad_norm": 0.6638911077629552, "learning_rate": 8.895377128953771e-06, "loss": 0.6164, "step": 20541 }, { "epoch": 0.5997489124405126, "grad_norm": 0.6435983230771736, "learning_rate": 8.894728304947283e-06, "loss": 0.6081, "step": 20542 }, { "epoch": 0.59977810866836, "grad_norm": 0.6786580468105994, "learning_rate": 8.894079480940795e-06, "loss": 0.6418, "step": 20543 }, { "epoch": 0.5998073048962074, "grad_norm": 0.6891577475821159, "learning_rate": 8.893430656934307e-06, "loss": 0.6532, "step": 20544 }, { "epoch": 0.5998365011240547, "grad_norm": 0.6111939694646628, "learning_rate": 8.89278183292782e-06, "loss": 0.5422, "step": 20545 }, { "epoch": 0.5998656973519021, "grad_norm": 0.6501131983826438, "learning_rate": 8.892133008921332e-06, "loss": 0.6341, "step": 20546 }, { "epoch": 0.5998948935797495, "grad_norm": 0.5912666619623524, "learning_rate": 8.891484184914842e-06, "loss": 0.5428, "step": 20547 }, { "epoch": 0.5999240898075968, "grad_norm": 0.6597821805350943, "learning_rate": 8.890835360908354e-06, "loss": 0.6451, "step": 20548 }, { "epoch": 0.5999532860354442, "grad_norm": 0.6748171820064005, "learning_rate": 8.890186536901866e-06, "loss": 0.6656, "step": 20549 }, { "epoch": 0.5999824822632915, "grad_norm": 0.6343608504630185, "learning_rate": 8.889537712895378e-06, "loss": 0.6458, "step": 20550 }, { "epoch": 0.6000116784911389, "grad_norm": 0.6116859237619751, "learning_rate": 8.888888888888888e-06, "loss": 0.5361, "step": 20551 }, { "epoch": 0.6000408747189863, "grad_norm": 0.644889112176084, "learning_rate": 8.888240064882402e-06, "loss": 0.5759, "step": 20552 }, { "epoch": 0.6000700709468336, "grad_norm": 0.6371605973858236, "learning_rate": 8.887591240875914e-06, "loss": 0.5842, "step": 20553 }, { "epoch": 0.600099267174681, "grad_norm": 0.6352955491165421, "learning_rate": 8.886942416869424e-06, "loss": 0.6297, "step": 20554 }, { "epoch": 0.6001284634025283, "grad_norm": 0.6668013089463555, "learning_rate": 8.886293592862936e-06, "loss": 0.6124, "step": 20555 }, { "epoch": 0.6001576596303757, "grad_norm": 0.6547557326943041, "learning_rate": 8.885644768856448e-06, "loss": 0.6103, "step": 20556 }, { "epoch": 0.6001868558582231, "grad_norm": 0.646172064141524, "learning_rate": 8.88499594484996e-06, "loss": 0.5981, "step": 20557 }, { "epoch": 0.6002160520860704, "grad_norm": 0.6572422211774955, "learning_rate": 8.884347120843472e-06, "loss": 0.6194, "step": 20558 }, { "epoch": 0.6002452483139178, "grad_norm": 0.6229447001023107, "learning_rate": 8.883698296836983e-06, "loss": 0.6032, "step": 20559 }, { "epoch": 0.6002744445417652, "grad_norm": 0.5733310324559128, "learning_rate": 8.883049472830495e-06, "loss": 0.5071, "step": 20560 }, { "epoch": 0.6003036407696125, "grad_norm": 0.6622027055403367, "learning_rate": 8.882400648824009e-06, "loss": 0.6114, "step": 20561 }, { "epoch": 0.6003328369974599, "grad_norm": 0.6449603575275195, "learning_rate": 8.881751824817519e-06, "loss": 0.6051, "step": 20562 }, { "epoch": 0.6003620332253073, "grad_norm": 0.6443698444133951, "learning_rate": 8.881103000811031e-06, "loss": 0.6338, "step": 20563 }, { "epoch": 0.6003912294531547, "grad_norm": 0.6961837832740508, "learning_rate": 8.880454176804543e-06, "loss": 0.6565, "step": 20564 }, { "epoch": 0.6004204256810021, "grad_norm": 0.6243676959561564, "learning_rate": 8.879805352798055e-06, "loss": 0.619, "step": 20565 }, { "epoch": 0.6004496219088494, "grad_norm": 0.6523768676584366, "learning_rate": 8.879156528791565e-06, "loss": 0.5818, "step": 20566 }, { "epoch": 0.6004788181366968, "grad_norm": 0.6496735299005891, "learning_rate": 8.878507704785077e-06, "loss": 0.6553, "step": 20567 }, { "epoch": 0.6005080143645442, "grad_norm": 0.66229394304164, "learning_rate": 8.87785888077859e-06, "loss": 0.5896, "step": 20568 }, { "epoch": 0.6005372105923915, "grad_norm": 0.6277178998829256, "learning_rate": 8.877210056772101e-06, "loss": 0.5774, "step": 20569 }, { "epoch": 0.6005664068202389, "grad_norm": 0.6602305932442162, "learning_rate": 8.876561232765612e-06, "loss": 0.6418, "step": 20570 }, { "epoch": 0.6005956030480862, "grad_norm": 0.5913391861108541, "learning_rate": 8.875912408759125e-06, "loss": 0.5289, "step": 20571 }, { "epoch": 0.6006247992759336, "grad_norm": 0.6634845735813187, "learning_rate": 8.875263584752637e-06, "loss": 0.6071, "step": 20572 }, { "epoch": 0.600653995503781, "grad_norm": 0.6792127241051721, "learning_rate": 8.874614760746148e-06, "loss": 0.6025, "step": 20573 }, { "epoch": 0.6006831917316283, "grad_norm": 0.6616824051307502, "learning_rate": 8.87396593673966e-06, "loss": 0.6058, "step": 20574 }, { "epoch": 0.6007123879594757, "grad_norm": 0.6884822864748695, "learning_rate": 8.873317112733172e-06, "loss": 0.6411, "step": 20575 }, { "epoch": 0.600741584187323, "grad_norm": 0.7686648624836168, "learning_rate": 8.872668288726684e-06, "loss": 0.6157, "step": 20576 }, { "epoch": 0.6007707804151704, "grad_norm": 0.6576480379743078, "learning_rate": 8.872019464720196e-06, "loss": 0.6101, "step": 20577 }, { "epoch": 0.6007999766430178, "grad_norm": 0.6782256566060888, "learning_rate": 8.871370640713706e-06, "loss": 0.6838, "step": 20578 }, { "epoch": 0.6008291728708651, "grad_norm": 0.6933588723789944, "learning_rate": 8.870721816707218e-06, "loss": 0.6143, "step": 20579 }, { "epoch": 0.6008583690987125, "grad_norm": 0.6941400335466229, "learning_rate": 8.870072992700732e-06, "loss": 0.7168, "step": 20580 }, { "epoch": 0.6008875653265598, "grad_norm": 0.5744377473882508, "learning_rate": 8.869424168694242e-06, "loss": 0.4994, "step": 20581 }, { "epoch": 0.6009167615544072, "grad_norm": 0.6640032257560023, "learning_rate": 8.868775344687754e-06, "loss": 0.6021, "step": 20582 }, { "epoch": 0.6009459577822546, "grad_norm": 0.6333819985031576, "learning_rate": 8.868126520681266e-06, "loss": 0.6098, "step": 20583 }, { "epoch": 0.6009751540101019, "grad_norm": 0.6738552704646175, "learning_rate": 8.867477696674778e-06, "loss": 0.6855, "step": 20584 }, { "epoch": 0.6010043502379493, "grad_norm": 0.6578223193894691, "learning_rate": 8.866828872668289e-06, "loss": 0.6235, "step": 20585 }, { "epoch": 0.6010335464657967, "grad_norm": 0.6703009256445408, "learning_rate": 8.8661800486618e-06, "loss": 0.6493, "step": 20586 }, { "epoch": 0.601062742693644, "grad_norm": 0.6457650602952565, "learning_rate": 8.865531224655313e-06, "loss": 0.6271, "step": 20587 }, { "epoch": 0.6010919389214914, "grad_norm": 0.6027237219083098, "learning_rate": 8.864882400648825e-06, "loss": 0.5418, "step": 20588 }, { "epoch": 0.6011211351493387, "grad_norm": 0.6422159798151535, "learning_rate": 8.864233576642335e-06, "loss": 0.5886, "step": 20589 }, { "epoch": 0.6011503313771861, "grad_norm": 0.6052504358074294, "learning_rate": 8.863584752635849e-06, "loss": 0.567, "step": 20590 }, { "epoch": 0.6011795276050335, "grad_norm": 0.7018749026524861, "learning_rate": 8.862935928629361e-06, "loss": 0.6523, "step": 20591 }, { "epoch": 0.6012087238328808, "grad_norm": 0.6219501597201678, "learning_rate": 8.862287104622871e-06, "loss": 0.5647, "step": 20592 }, { "epoch": 0.6012379200607282, "grad_norm": 0.6218184750562387, "learning_rate": 8.861638280616383e-06, "loss": 0.5658, "step": 20593 }, { "epoch": 0.6012671162885755, "grad_norm": 0.640108577354327, "learning_rate": 8.860989456609895e-06, "loss": 0.5704, "step": 20594 }, { "epoch": 0.6012963125164229, "grad_norm": 0.613306293029333, "learning_rate": 8.860340632603407e-06, "loss": 0.5804, "step": 20595 }, { "epoch": 0.6013255087442703, "grad_norm": 0.6735537347790503, "learning_rate": 8.85969180859692e-06, "loss": 0.5951, "step": 20596 }, { "epoch": 0.6013547049721176, "grad_norm": 0.6151459908068017, "learning_rate": 8.85904298459043e-06, "loss": 0.5606, "step": 20597 }, { "epoch": 0.601383901199965, "grad_norm": 0.6561457160677222, "learning_rate": 8.858394160583942e-06, "loss": 0.6142, "step": 20598 }, { "epoch": 0.6014130974278123, "grad_norm": 0.6343719243751184, "learning_rate": 8.857745336577455e-06, "loss": 0.5834, "step": 20599 }, { "epoch": 0.6014422936556597, "grad_norm": 0.6790281851418684, "learning_rate": 8.857096512570966e-06, "loss": 0.6555, "step": 20600 }, { "epoch": 0.6014714898835071, "grad_norm": 0.6500479870270919, "learning_rate": 8.856447688564478e-06, "loss": 0.6357, "step": 20601 }, { "epoch": 0.6015006861113544, "grad_norm": 0.6311747593649659, "learning_rate": 8.85579886455799e-06, "loss": 0.5756, "step": 20602 }, { "epoch": 0.6015298823392018, "grad_norm": 0.6668859558408949, "learning_rate": 8.855150040551502e-06, "loss": 0.6722, "step": 20603 }, { "epoch": 0.6015590785670492, "grad_norm": 0.5897491353204927, "learning_rate": 8.854501216545012e-06, "loss": 0.5267, "step": 20604 }, { "epoch": 0.6015882747948965, "grad_norm": 0.6116243713215239, "learning_rate": 8.853852392538524e-06, "loss": 0.4958, "step": 20605 }, { "epoch": 0.6016174710227439, "grad_norm": 0.6361994636801789, "learning_rate": 8.853203568532036e-06, "loss": 0.5609, "step": 20606 }, { "epoch": 0.6016466672505912, "grad_norm": 0.6748155842022439, "learning_rate": 8.852554744525548e-06, "loss": 0.6492, "step": 20607 }, { "epoch": 0.6016758634784386, "grad_norm": 0.6142211065120834, "learning_rate": 8.851905920519059e-06, "loss": 0.5807, "step": 20608 }, { "epoch": 0.601705059706286, "grad_norm": 0.6728200936314734, "learning_rate": 8.851257096512572e-06, "loss": 0.6735, "step": 20609 }, { "epoch": 0.6017342559341333, "grad_norm": 0.6332807166983931, "learning_rate": 8.850608272506084e-06, "loss": 0.6263, "step": 20610 }, { "epoch": 0.6017634521619807, "grad_norm": 0.6145483662359121, "learning_rate": 8.849959448499595e-06, "loss": 0.6025, "step": 20611 }, { "epoch": 0.601792648389828, "grad_norm": 0.66630516225107, "learning_rate": 8.849310624493107e-06, "loss": 0.6341, "step": 20612 }, { "epoch": 0.6018218446176754, "grad_norm": 0.6614288873718557, "learning_rate": 8.848661800486619e-06, "loss": 0.6751, "step": 20613 }, { "epoch": 0.6018510408455228, "grad_norm": 0.6138356146732871, "learning_rate": 8.84801297648013e-06, "loss": 0.5625, "step": 20614 }, { "epoch": 0.6018802370733701, "grad_norm": 0.6428162962487094, "learning_rate": 8.847364152473643e-06, "loss": 0.591, "step": 20615 }, { "epoch": 0.6019094333012175, "grad_norm": 0.6743745815294495, "learning_rate": 8.846715328467153e-06, "loss": 0.6496, "step": 20616 }, { "epoch": 0.6019386295290649, "grad_norm": 0.6608946094032607, "learning_rate": 8.846066504460665e-06, "loss": 0.6331, "step": 20617 }, { "epoch": 0.6019678257569122, "grad_norm": 0.6621973902509164, "learning_rate": 8.845417680454179e-06, "loss": 0.6227, "step": 20618 }, { "epoch": 0.6019970219847596, "grad_norm": 0.6675878658040759, "learning_rate": 8.84476885644769e-06, "loss": 0.6261, "step": 20619 }, { "epoch": 0.6020262182126069, "grad_norm": 0.6016391615814461, "learning_rate": 8.844120032441201e-06, "loss": 0.5675, "step": 20620 }, { "epoch": 0.6020554144404543, "grad_norm": 0.663970906242628, "learning_rate": 8.843471208434713e-06, "loss": 0.6772, "step": 20621 }, { "epoch": 0.6020846106683017, "grad_norm": 0.6508710019047045, "learning_rate": 8.842822384428225e-06, "loss": 0.6069, "step": 20622 }, { "epoch": 0.602113806896149, "grad_norm": 0.6417281538580941, "learning_rate": 8.842173560421736e-06, "loss": 0.6184, "step": 20623 }, { "epoch": 0.6021430031239964, "grad_norm": 0.6499647336675651, "learning_rate": 8.841524736415248e-06, "loss": 0.6218, "step": 20624 }, { "epoch": 0.6021721993518437, "grad_norm": 0.6601687947403558, "learning_rate": 8.84087591240876e-06, "loss": 0.6302, "step": 20625 }, { "epoch": 0.6022013955796911, "grad_norm": 0.6901179101294828, "learning_rate": 8.840227088402272e-06, "loss": 0.6299, "step": 20626 }, { "epoch": 0.6022305918075385, "grad_norm": 0.6874409345604402, "learning_rate": 8.839578264395782e-06, "loss": 0.6546, "step": 20627 }, { "epoch": 0.6022597880353858, "grad_norm": 0.6798490651185217, "learning_rate": 8.838929440389296e-06, "loss": 0.6331, "step": 20628 }, { "epoch": 0.6022889842632332, "grad_norm": 0.6481681628372473, "learning_rate": 8.838280616382808e-06, "loss": 0.5829, "step": 20629 }, { "epoch": 0.6023181804910805, "grad_norm": 0.6782096419135732, "learning_rate": 8.837631792376318e-06, "loss": 0.6294, "step": 20630 }, { "epoch": 0.6023473767189279, "grad_norm": 0.6376317693959467, "learning_rate": 8.83698296836983e-06, "loss": 0.5949, "step": 20631 }, { "epoch": 0.6023765729467753, "grad_norm": 0.6661695599535058, "learning_rate": 8.836334144363342e-06, "loss": 0.6656, "step": 20632 }, { "epoch": 0.6024057691746226, "grad_norm": 0.7057218282517465, "learning_rate": 8.835685320356854e-06, "loss": 0.6111, "step": 20633 }, { "epoch": 0.60243496540247, "grad_norm": 0.6615820619405792, "learning_rate": 8.835036496350366e-06, "loss": 0.6655, "step": 20634 }, { "epoch": 0.6024641616303174, "grad_norm": 0.6410606781030385, "learning_rate": 8.834387672343877e-06, "loss": 0.6059, "step": 20635 }, { "epoch": 0.6024933578581647, "grad_norm": 0.6604139383965657, "learning_rate": 8.833738848337389e-06, "loss": 0.6762, "step": 20636 }, { "epoch": 0.6025225540860121, "grad_norm": 0.6498292233701903, "learning_rate": 8.8330900243309e-06, "loss": 0.5841, "step": 20637 }, { "epoch": 0.6025517503138594, "grad_norm": 0.5900159695989277, "learning_rate": 8.832441200324413e-06, "loss": 0.4893, "step": 20638 }, { "epoch": 0.6025809465417068, "grad_norm": 0.660891472176585, "learning_rate": 8.831792376317925e-06, "loss": 0.6283, "step": 20639 }, { "epoch": 0.6026101427695542, "grad_norm": 0.6542567180875996, "learning_rate": 8.831143552311437e-06, "loss": 0.6264, "step": 20640 }, { "epoch": 0.6026393389974015, "grad_norm": 0.6210384246775877, "learning_rate": 8.830494728304949e-06, "loss": 0.6051, "step": 20641 }, { "epoch": 0.6026685352252489, "grad_norm": 0.6758103837027095, "learning_rate": 8.829845904298459e-06, "loss": 0.6014, "step": 20642 }, { "epoch": 0.6026977314530962, "grad_norm": 0.6822360028610558, "learning_rate": 8.829197080291971e-06, "loss": 0.6961, "step": 20643 }, { "epoch": 0.6027269276809436, "grad_norm": 0.5826780139243702, "learning_rate": 8.828548256285483e-06, "loss": 0.486, "step": 20644 }, { "epoch": 0.602756123908791, "grad_norm": 0.6036826384583652, "learning_rate": 8.827899432278995e-06, "loss": 0.5551, "step": 20645 }, { "epoch": 0.6027853201366383, "grad_norm": 0.63472386487654, "learning_rate": 8.827250608272507e-06, "loss": 0.6006, "step": 20646 }, { "epoch": 0.6028145163644857, "grad_norm": 0.6158866292072707, "learning_rate": 8.82660178426602e-06, "loss": 0.6138, "step": 20647 }, { "epoch": 0.602843712592333, "grad_norm": 0.6147490693726848, "learning_rate": 8.825952960259531e-06, "loss": 0.5657, "step": 20648 }, { "epoch": 0.6028729088201804, "grad_norm": 0.6244833983783998, "learning_rate": 8.825304136253042e-06, "loss": 0.6078, "step": 20649 }, { "epoch": 0.6029021050480278, "grad_norm": 0.6629190641561254, "learning_rate": 8.824655312246554e-06, "loss": 0.6691, "step": 20650 }, { "epoch": 0.6029313012758751, "grad_norm": 0.6580304163203903, "learning_rate": 8.824006488240066e-06, "loss": 0.6244, "step": 20651 }, { "epoch": 0.6029604975037225, "grad_norm": 0.6750692757348677, "learning_rate": 8.823357664233578e-06, "loss": 0.5863, "step": 20652 }, { "epoch": 0.6029896937315699, "grad_norm": 0.6232503499050009, "learning_rate": 8.82270884022709e-06, "loss": 0.572, "step": 20653 }, { "epoch": 0.6030188899594172, "grad_norm": 0.6758144763774485, "learning_rate": 8.8220600162206e-06, "loss": 0.6562, "step": 20654 }, { "epoch": 0.6030480861872646, "grad_norm": 0.6555208533217992, "learning_rate": 8.821411192214112e-06, "loss": 0.5826, "step": 20655 }, { "epoch": 0.6030772824151119, "grad_norm": 0.6888020205353239, "learning_rate": 8.820762368207624e-06, "loss": 0.6826, "step": 20656 }, { "epoch": 0.6031064786429593, "grad_norm": 0.7210861770657223, "learning_rate": 8.820113544201136e-06, "loss": 0.7196, "step": 20657 }, { "epoch": 0.6031356748708067, "grad_norm": 0.6682193782855866, "learning_rate": 8.819464720194648e-06, "loss": 0.6708, "step": 20658 }, { "epoch": 0.603164871098654, "grad_norm": 0.709030248848707, "learning_rate": 8.81881589618816e-06, "loss": 0.6684, "step": 20659 }, { "epoch": 0.6031940673265014, "grad_norm": 0.6350821743804367, "learning_rate": 8.818167072181672e-06, "loss": 0.5964, "step": 20660 }, { "epoch": 0.6032232635543487, "grad_norm": 0.6481617927775948, "learning_rate": 8.817518248175182e-06, "loss": 0.6302, "step": 20661 }, { "epoch": 0.6032524597821961, "grad_norm": 0.6567572089395168, "learning_rate": 8.816869424168695e-06, "loss": 0.6189, "step": 20662 }, { "epoch": 0.6032816560100435, "grad_norm": 0.5885446631974859, "learning_rate": 8.816220600162207e-06, "loss": 0.5468, "step": 20663 }, { "epoch": 0.6033108522378908, "grad_norm": 0.6409642183531011, "learning_rate": 8.815571776155719e-06, "loss": 0.5948, "step": 20664 }, { "epoch": 0.6033400484657382, "grad_norm": 0.651177329763338, "learning_rate": 8.81492295214923e-06, "loss": 0.6421, "step": 20665 }, { "epoch": 0.6033692446935855, "grad_norm": 0.6339367981447527, "learning_rate": 8.814274128142743e-06, "loss": 0.6432, "step": 20666 }, { "epoch": 0.6033984409214329, "grad_norm": 0.629645987810177, "learning_rate": 8.813625304136255e-06, "loss": 0.6113, "step": 20667 }, { "epoch": 0.6034276371492803, "grad_norm": 0.5701340628245415, "learning_rate": 8.812976480129765e-06, "loss": 0.5067, "step": 20668 }, { "epoch": 0.6034568333771276, "grad_norm": 0.6147287850396488, "learning_rate": 8.812327656123277e-06, "loss": 0.5801, "step": 20669 }, { "epoch": 0.603486029604975, "grad_norm": 0.6331141659188207, "learning_rate": 8.811678832116789e-06, "loss": 0.6087, "step": 20670 }, { "epoch": 0.6035152258328224, "grad_norm": 0.6670213269778803, "learning_rate": 8.811030008110301e-06, "loss": 0.58, "step": 20671 }, { "epoch": 0.6035444220606697, "grad_norm": 0.7140001753235284, "learning_rate": 8.810381184103813e-06, "loss": 0.7081, "step": 20672 }, { "epoch": 0.6035736182885171, "grad_norm": 0.618866703882657, "learning_rate": 8.809732360097323e-06, "loss": 0.5844, "step": 20673 }, { "epoch": 0.6036028145163644, "grad_norm": 0.6551244269503873, "learning_rate": 8.809083536090835e-06, "loss": 0.5678, "step": 20674 }, { "epoch": 0.6036320107442118, "grad_norm": 0.69168330835577, "learning_rate": 8.808434712084347e-06, "loss": 0.6255, "step": 20675 }, { "epoch": 0.6036612069720592, "grad_norm": 0.6627548961334663, "learning_rate": 8.80778588807786e-06, "loss": 0.6563, "step": 20676 }, { "epoch": 0.6036904031999065, "grad_norm": 0.6135228178458897, "learning_rate": 8.807137064071372e-06, "loss": 0.5601, "step": 20677 }, { "epoch": 0.6037195994277539, "grad_norm": 0.6560827457063448, "learning_rate": 8.806488240064884e-06, "loss": 0.6403, "step": 20678 }, { "epoch": 0.6037487956556012, "grad_norm": 0.606959494075357, "learning_rate": 8.805839416058396e-06, "loss": 0.596, "step": 20679 }, { "epoch": 0.6037779918834486, "grad_norm": 0.6588493659095007, "learning_rate": 8.805190592051906e-06, "loss": 0.6747, "step": 20680 }, { "epoch": 0.603807188111296, "grad_norm": 0.6178584397641793, "learning_rate": 8.804541768045418e-06, "loss": 0.5565, "step": 20681 }, { "epoch": 0.6038363843391433, "grad_norm": 0.6823791578695875, "learning_rate": 8.80389294403893e-06, "loss": 0.6429, "step": 20682 }, { "epoch": 0.6038655805669907, "grad_norm": 0.6127798470287767, "learning_rate": 8.803244120032442e-06, "loss": 0.5702, "step": 20683 }, { "epoch": 0.6038947767948382, "grad_norm": 0.6238312772675477, "learning_rate": 8.802595296025954e-06, "loss": 0.601, "step": 20684 }, { "epoch": 0.6039239730226855, "grad_norm": 0.6471590187621232, "learning_rate": 8.801946472019466e-06, "loss": 0.6316, "step": 20685 }, { "epoch": 0.6039531692505329, "grad_norm": 0.621491397217411, "learning_rate": 8.801297648012978e-06, "loss": 0.5873, "step": 20686 }, { "epoch": 0.6039823654783802, "grad_norm": 0.6264496169373096, "learning_rate": 8.800648824006488e-06, "loss": 0.5473, "step": 20687 }, { "epoch": 0.6040115617062276, "grad_norm": 0.6242650803091568, "learning_rate": 8.8e-06, "loss": 0.5719, "step": 20688 }, { "epoch": 0.604040757934075, "grad_norm": 0.6549400609465873, "learning_rate": 8.799351175993512e-06, "loss": 0.6083, "step": 20689 }, { "epoch": 0.6040699541619223, "grad_norm": 0.6772917370416544, "learning_rate": 8.798702351987025e-06, "loss": 0.5926, "step": 20690 }, { "epoch": 0.6040991503897697, "grad_norm": 0.6673013317391309, "learning_rate": 8.798053527980537e-06, "loss": 0.6232, "step": 20691 }, { "epoch": 0.604128346617617, "grad_norm": 0.6362869590852198, "learning_rate": 8.797404703974047e-06, "loss": 0.593, "step": 20692 }, { "epoch": 0.6041575428454644, "grad_norm": 0.6287392511475575, "learning_rate": 8.796755879967559e-06, "loss": 0.5837, "step": 20693 }, { "epoch": 0.6041867390733118, "grad_norm": 0.6120249723456006, "learning_rate": 8.796107055961071e-06, "loss": 0.5666, "step": 20694 }, { "epoch": 0.6042159353011591, "grad_norm": 0.6707913286622195, "learning_rate": 8.795458231954583e-06, "loss": 0.6259, "step": 20695 }, { "epoch": 0.6042451315290065, "grad_norm": 0.6526269605271163, "learning_rate": 8.794809407948095e-06, "loss": 0.6693, "step": 20696 }, { "epoch": 0.6042743277568539, "grad_norm": 0.618426541194172, "learning_rate": 8.794160583941607e-06, "loss": 0.6069, "step": 20697 }, { "epoch": 0.6043035239847012, "grad_norm": 0.6231311871556517, "learning_rate": 8.793511759935119e-06, "loss": 0.5563, "step": 20698 }, { "epoch": 0.6043327202125486, "grad_norm": 0.6551844904688051, "learning_rate": 8.79286293592863e-06, "loss": 0.6334, "step": 20699 }, { "epoch": 0.6043619164403959, "grad_norm": 0.632340455856682, "learning_rate": 8.792214111922141e-06, "loss": 0.5728, "step": 20700 }, { "epoch": 0.6043911126682433, "grad_norm": 0.6275118441320389, "learning_rate": 8.791565287915653e-06, "loss": 0.6088, "step": 20701 }, { "epoch": 0.6044203088960907, "grad_norm": 0.6682796938595797, "learning_rate": 8.790916463909165e-06, "loss": 0.6644, "step": 20702 }, { "epoch": 0.604449505123938, "grad_norm": 0.6182591764864803, "learning_rate": 8.790267639902677e-06, "loss": 0.515, "step": 20703 }, { "epoch": 0.6044787013517854, "grad_norm": 0.6587859526772597, "learning_rate": 8.78961881589619e-06, "loss": 0.604, "step": 20704 }, { "epoch": 0.6045078975796327, "grad_norm": 0.6076661193122679, "learning_rate": 8.788969991889702e-06, "loss": 0.5929, "step": 20705 }, { "epoch": 0.6045370938074801, "grad_norm": 0.6621428099364203, "learning_rate": 8.788321167883212e-06, "loss": 0.6186, "step": 20706 }, { "epoch": 0.6045662900353275, "grad_norm": 0.6357680246160742, "learning_rate": 8.787672343876724e-06, "loss": 0.5959, "step": 20707 }, { "epoch": 0.6045954862631748, "grad_norm": 0.5980835157172251, "learning_rate": 8.787023519870236e-06, "loss": 0.5381, "step": 20708 }, { "epoch": 0.6046246824910222, "grad_norm": 0.7476074282490653, "learning_rate": 8.786374695863748e-06, "loss": 0.7938, "step": 20709 }, { "epoch": 0.6046538787188696, "grad_norm": 0.6443222688548103, "learning_rate": 8.785725871857258e-06, "loss": 0.5969, "step": 20710 }, { "epoch": 0.6046830749467169, "grad_norm": 0.6178712347484124, "learning_rate": 8.78507704785077e-06, "loss": 0.5981, "step": 20711 }, { "epoch": 0.6047122711745643, "grad_norm": 0.6556046559592263, "learning_rate": 8.784428223844284e-06, "loss": 0.6457, "step": 20712 }, { "epoch": 0.6047414674024116, "grad_norm": 0.623665152574893, "learning_rate": 8.783779399837794e-06, "loss": 0.5644, "step": 20713 }, { "epoch": 0.604770663630259, "grad_norm": 0.6538678576067374, "learning_rate": 8.783130575831306e-06, "loss": 0.5958, "step": 20714 }, { "epoch": 0.6047998598581064, "grad_norm": 0.6101262463374156, "learning_rate": 8.782481751824818e-06, "loss": 0.5558, "step": 20715 }, { "epoch": 0.6048290560859537, "grad_norm": 0.77259655969403, "learning_rate": 8.78183292781833e-06, "loss": 0.6489, "step": 20716 }, { "epoch": 0.6048582523138011, "grad_norm": 0.667874701324128, "learning_rate": 8.781184103811842e-06, "loss": 0.6455, "step": 20717 }, { "epoch": 0.6048874485416484, "grad_norm": 0.7119355941393231, "learning_rate": 8.780535279805353e-06, "loss": 0.6668, "step": 20718 }, { "epoch": 0.6049166447694958, "grad_norm": 0.6845722574814308, "learning_rate": 8.779886455798865e-06, "loss": 0.6006, "step": 20719 }, { "epoch": 0.6049458409973432, "grad_norm": 0.6306102265296923, "learning_rate": 8.779237631792377e-06, "loss": 0.5693, "step": 20720 }, { "epoch": 0.6049750372251905, "grad_norm": 0.6343717692036984, "learning_rate": 8.778588807785889e-06, "loss": 0.577, "step": 20721 }, { "epoch": 0.6050042334530379, "grad_norm": 0.6611116312845124, "learning_rate": 8.777939983779401e-06, "loss": 0.6415, "step": 20722 }, { "epoch": 0.6050334296808852, "grad_norm": 0.6111847466527387, "learning_rate": 8.777291159772913e-06, "loss": 0.5786, "step": 20723 }, { "epoch": 0.6050626259087326, "grad_norm": 0.6563341111487291, "learning_rate": 8.776642335766425e-06, "loss": 0.6414, "step": 20724 }, { "epoch": 0.60509182213658, "grad_norm": 0.7189319806823788, "learning_rate": 8.775993511759935e-06, "loss": 0.6774, "step": 20725 }, { "epoch": 0.6051210183644273, "grad_norm": 0.5932182963630449, "learning_rate": 8.775344687753447e-06, "loss": 0.509, "step": 20726 }, { "epoch": 0.6051502145922747, "grad_norm": 0.6380684365208555, "learning_rate": 8.77469586374696e-06, "loss": 0.6085, "step": 20727 }, { "epoch": 0.605179410820122, "grad_norm": 0.6283138266030383, "learning_rate": 8.774047039740471e-06, "loss": 0.6009, "step": 20728 }, { "epoch": 0.6052086070479694, "grad_norm": 0.620308832266797, "learning_rate": 8.773398215733982e-06, "loss": 0.5779, "step": 20729 }, { "epoch": 0.6052378032758168, "grad_norm": 0.627455442659957, "learning_rate": 8.772749391727494e-06, "loss": 0.6018, "step": 20730 }, { "epoch": 0.6052669995036641, "grad_norm": 0.6852956452453776, "learning_rate": 8.772100567721007e-06, "loss": 0.6045, "step": 20731 }, { "epoch": 0.6052961957315115, "grad_norm": 0.6110246345507969, "learning_rate": 8.771451743714518e-06, "loss": 0.5836, "step": 20732 }, { "epoch": 0.6053253919593589, "grad_norm": 0.6382654104919332, "learning_rate": 8.77080291970803e-06, "loss": 0.5887, "step": 20733 }, { "epoch": 0.6053545881872062, "grad_norm": 0.6589921935384765, "learning_rate": 8.770154095701542e-06, "loss": 0.6406, "step": 20734 }, { "epoch": 0.6053837844150536, "grad_norm": 0.6278675208282553, "learning_rate": 8.769505271695054e-06, "loss": 0.5949, "step": 20735 }, { "epoch": 0.6054129806429009, "grad_norm": 0.628176750094885, "learning_rate": 8.768856447688566e-06, "loss": 0.5847, "step": 20736 }, { "epoch": 0.6054421768707483, "grad_norm": 0.6278094645486076, "learning_rate": 8.768207623682076e-06, "loss": 0.6014, "step": 20737 }, { "epoch": 0.6054713730985957, "grad_norm": 0.6161087534052051, "learning_rate": 8.767558799675588e-06, "loss": 0.5722, "step": 20738 }, { "epoch": 0.605500569326443, "grad_norm": 0.6382937543937076, "learning_rate": 8.7669099756691e-06, "loss": 0.5946, "step": 20739 }, { "epoch": 0.6055297655542904, "grad_norm": 0.6735357731130667, "learning_rate": 8.766261151662612e-06, "loss": 0.6158, "step": 20740 }, { "epoch": 0.6055589617821378, "grad_norm": 0.6749651813239832, "learning_rate": 8.765612327656124e-06, "loss": 0.6359, "step": 20741 }, { "epoch": 0.6055881580099851, "grad_norm": 0.7171592087234026, "learning_rate": 8.764963503649636e-06, "loss": 0.6174, "step": 20742 }, { "epoch": 0.6056173542378325, "grad_norm": 0.6888754691061535, "learning_rate": 8.764314679643148e-06, "loss": 0.6653, "step": 20743 }, { "epoch": 0.6056465504656798, "grad_norm": 0.6474337690559564, "learning_rate": 8.763665855636659e-06, "loss": 0.5719, "step": 20744 }, { "epoch": 0.6056757466935272, "grad_norm": 0.6285335341533729, "learning_rate": 8.76301703163017e-06, "loss": 0.5554, "step": 20745 }, { "epoch": 0.6057049429213746, "grad_norm": 0.605766322235292, "learning_rate": 8.762368207623683e-06, "loss": 0.5893, "step": 20746 }, { "epoch": 0.6057341391492219, "grad_norm": 0.7400005328894582, "learning_rate": 8.761719383617195e-06, "loss": 0.6867, "step": 20747 }, { "epoch": 0.6057633353770693, "grad_norm": 0.6419508834481032, "learning_rate": 8.761070559610705e-06, "loss": 0.6019, "step": 20748 }, { "epoch": 0.6057925316049166, "grad_norm": 0.6675525436493003, "learning_rate": 8.760421735604217e-06, "loss": 0.6966, "step": 20749 }, { "epoch": 0.605821727832764, "grad_norm": 0.6656255199751334, "learning_rate": 8.759772911597731e-06, "loss": 0.648, "step": 20750 }, { "epoch": 0.6058509240606114, "grad_norm": 0.6442738984062758, "learning_rate": 8.759124087591241e-06, "loss": 0.5926, "step": 20751 }, { "epoch": 0.6058801202884587, "grad_norm": 0.6341005298984258, "learning_rate": 8.758475263584753e-06, "loss": 0.5983, "step": 20752 }, { "epoch": 0.6059093165163061, "grad_norm": 0.6597961572775729, "learning_rate": 8.757826439578265e-06, "loss": 0.6105, "step": 20753 }, { "epoch": 0.6059385127441534, "grad_norm": 0.6362876927464968, "learning_rate": 8.757177615571777e-06, "loss": 0.5978, "step": 20754 }, { "epoch": 0.6059677089720008, "grad_norm": 0.6942295188362563, "learning_rate": 8.75652879156529e-06, "loss": 0.65, "step": 20755 }, { "epoch": 0.6059969051998482, "grad_norm": 0.6610911907621485, "learning_rate": 8.7558799675588e-06, "loss": 0.6237, "step": 20756 }, { "epoch": 0.6060261014276955, "grad_norm": 0.5742565498714745, "learning_rate": 8.755231143552312e-06, "loss": 0.48, "step": 20757 }, { "epoch": 0.6060552976555429, "grad_norm": 0.6540187654591637, "learning_rate": 8.754582319545824e-06, "loss": 0.6205, "step": 20758 }, { "epoch": 0.6060844938833903, "grad_norm": 0.6210750161041292, "learning_rate": 8.753933495539336e-06, "loss": 0.6089, "step": 20759 }, { "epoch": 0.6061136901112376, "grad_norm": 0.6608564408233899, "learning_rate": 8.753284671532848e-06, "loss": 0.6065, "step": 20760 }, { "epoch": 0.606142886339085, "grad_norm": 0.6277033149380413, "learning_rate": 8.75263584752636e-06, "loss": 0.5919, "step": 20761 }, { "epoch": 0.6061720825669323, "grad_norm": 0.6460143181724527, "learning_rate": 8.751987023519872e-06, "loss": 0.5894, "step": 20762 }, { "epoch": 0.6062012787947797, "grad_norm": 0.6666757741519028, "learning_rate": 8.751338199513382e-06, "loss": 0.6683, "step": 20763 }, { "epoch": 0.6062304750226271, "grad_norm": 0.7188475236625101, "learning_rate": 8.750689375506894e-06, "loss": 0.5469, "step": 20764 }, { "epoch": 0.6062596712504744, "grad_norm": 0.6365043267538909, "learning_rate": 8.750040551500406e-06, "loss": 0.5789, "step": 20765 }, { "epoch": 0.6062888674783218, "grad_norm": 0.6610187017461492, "learning_rate": 8.749391727493918e-06, "loss": 0.6692, "step": 20766 }, { "epoch": 0.6063180637061691, "grad_norm": 0.6903828849821999, "learning_rate": 8.748742903487429e-06, "loss": 0.6757, "step": 20767 }, { "epoch": 0.6063472599340165, "grad_norm": 0.6266948837911746, "learning_rate": 8.74809407948094e-06, "loss": 0.5693, "step": 20768 }, { "epoch": 0.6063764561618639, "grad_norm": 0.6166086239442735, "learning_rate": 8.747445255474454e-06, "loss": 0.5804, "step": 20769 }, { "epoch": 0.6064056523897112, "grad_norm": 0.6535862598851183, "learning_rate": 8.746796431467965e-06, "loss": 0.6438, "step": 20770 }, { "epoch": 0.6064348486175586, "grad_norm": 0.6610187174218725, "learning_rate": 8.746147607461477e-06, "loss": 0.631, "step": 20771 }, { "epoch": 0.606464044845406, "grad_norm": 0.6644861048168941, "learning_rate": 8.745498783454989e-06, "loss": 0.6239, "step": 20772 }, { "epoch": 0.6064932410732533, "grad_norm": 0.6273322018154301, "learning_rate": 8.7448499594485e-06, "loss": 0.5965, "step": 20773 }, { "epoch": 0.6065224373011007, "grad_norm": 0.6515416494962532, "learning_rate": 8.744201135442013e-06, "loss": 0.6027, "step": 20774 }, { "epoch": 0.606551633528948, "grad_norm": 0.6769152463243472, "learning_rate": 8.743552311435523e-06, "loss": 0.6234, "step": 20775 }, { "epoch": 0.6065808297567954, "grad_norm": 0.5876592885634779, "learning_rate": 8.742903487429035e-06, "loss": 0.5515, "step": 20776 }, { "epoch": 0.6066100259846428, "grad_norm": 0.6941749319965811, "learning_rate": 8.742254663422547e-06, "loss": 0.6035, "step": 20777 }, { "epoch": 0.6066392222124901, "grad_norm": 0.6100659587810994, "learning_rate": 8.74160583941606e-06, "loss": 0.5825, "step": 20778 }, { "epoch": 0.6066684184403375, "grad_norm": 0.6316858921803682, "learning_rate": 8.740957015409571e-06, "loss": 0.5798, "step": 20779 }, { "epoch": 0.6066976146681848, "grad_norm": 0.6873589151544771, "learning_rate": 8.740308191403083e-06, "loss": 0.6797, "step": 20780 }, { "epoch": 0.6067268108960322, "grad_norm": 0.6196764399962579, "learning_rate": 8.739659367396595e-06, "loss": 0.5926, "step": 20781 }, { "epoch": 0.6067560071238796, "grad_norm": 0.6150737591437605, "learning_rate": 8.739010543390106e-06, "loss": 0.5956, "step": 20782 }, { "epoch": 0.6067852033517269, "grad_norm": 0.6140325989016111, "learning_rate": 8.738361719383618e-06, "loss": 0.5917, "step": 20783 }, { "epoch": 0.6068143995795743, "grad_norm": 0.6386732116199437, "learning_rate": 8.73771289537713e-06, "loss": 0.5359, "step": 20784 }, { "epoch": 0.6068435958074216, "grad_norm": 0.5874245867858809, "learning_rate": 8.737064071370642e-06, "loss": 0.497, "step": 20785 }, { "epoch": 0.606872792035269, "grad_norm": 0.6747262014188654, "learning_rate": 8.736415247364152e-06, "loss": 0.6594, "step": 20786 }, { "epoch": 0.6069019882631164, "grad_norm": 0.6375287679788341, "learning_rate": 8.735766423357664e-06, "loss": 0.6073, "step": 20787 }, { "epoch": 0.6069311844909637, "grad_norm": 0.6292549438848765, "learning_rate": 8.735117599351178e-06, "loss": 0.6219, "step": 20788 }, { "epoch": 0.6069603807188111, "grad_norm": 0.6674722765606762, "learning_rate": 8.734468775344688e-06, "loss": 0.5958, "step": 20789 }, { "epoch": 0.6069895769466584, "grad_norm": 0.6691290465285749, "learning_rate": 8.7338199513382e-06, "loss": 0.6658, "step": 20790 }, { "epoch": 0.6070187731745058, "grad_norm": 0.6394730885479389, "learning_rate": 8.733171127331712e-06, "loss": 0.6238, "step": 20791 }, { "epoch": 0.6070479694023532, "grad_norm": 0.6787763041199899, "learning_rate": 8.732522303325224e-06, "loss": 0.6246, "step": 20792 }, { "epoch": 0.6070771656302005, "grad_norm": 0.6484044338646192, "learning_rate": 8.731873479318736e-06, "loss": 0.6439, "step": 20793 }, { "epoch": 0.6071063618580479, "grad_norm": 0.6337060492863573, "learning_rate": 8.731224655312247e-06, "loss": 0.6521, "step": 20794 }, { "epoch": 0.6071355580858953, "grad_norm": 0.6261243476916024, "learning_rate": 8.730575831305759e-06, "loss": 0.5559, "step": 20795 }, { "epoch": 0.6071647543137426, "grad_norm": 0.6722514231466793, "learning_rate": 8.72992700729927e-06, "loss": 0.69, "step": 20796 }, { "epoch": 0.60719395054159, "grad_norm": 0.6327556271379112, "learning_rate": 8.729278183292783e-06, "loss": 0.5519, "step": 20797 }, { "epoch": 0.6072231467694373, "grad_norm": 0.5642970079142993, "learning_rate": 8.728629359286295e-06, "loss": 0.4952, "step": 20798 }, { "epoch": 0.6072523429972847, "grad_norm": 0.6163810419953278, "learning_rate": 8.727980535279807e-06, "loss": 0.5547, "step": 20799 }, { "epoch": 0.6072815392251321, "grad_norm": 0.6746792052531234, "learning_rate": 8.727331711273319e-06, "loss": 0.6099, "step": 20800 }, { "epoch": 0.6073107354529794, "grad_norm": 0.6235219920591508, "learning_rate": 8.726682887266829e-06, "loss": 0.5811, "step": 20801 }, { "epoch": 0.6073399316808268, "grad_norm": 0.6457233840079598, "learning_rate": 8.726034063260341e-06, "loss": 0.5807, "step": 20802 }, { "epoch": 0.6073691279086741, "grad_norm": 0.6619376068321124, "learning_rate": 8.725385239253853e-06, "loss": 0.6011, "step": 20803 }, { "epoch": 0.6073983241365216, "grad_norm": 0.6326237178200531, "learning_rate": 8.724736415247365e-06, "loss": 0.609, "step": 20804 }, { "epoch": 0.607427520364369, "grad_norm": 0.6480595575314146, "learning_rate": 8.724087591240875e-06, "loss": 0.6183, "step": 20805 }, { "epoch": 0.6074567165922163, "grad_norm": 0.627061381085579, "learning_rate": 8.723438767234387e-06, "loss": 0.5641, "step": 20806 }, { "epoch": 0.6074859128200637, "grad_norm": 0.6581863654466349, "learning_rate": 8.722789943227901e-06, "loss": 0.6435, "step": 20807 }, { "epoch": 0.6075151090479111, "grad_norm": 0.6547245543310098, "learning_rate": 8.722141119221412e-06, "loss": 0.608, "step": 20808 }, { "epoch": 0.6075443052757584, "grad_norm": 0.681495007239258, "learning_rate": 8.721492295214924e-06, "loss": 0.6812, "step": 20809 }, { "epoch": 0.6075735015036058, "grad_norm": 0.6805015689036759, "learning_rate": 8.720843471208436e-06, "loss": 0.6436, "step": 20810 }, { "epoch": 0.6076026977314531, "grad_norm": 0.6043014230866036, "learning_rate": 8.720194647201948e-06, "loss": 0.5938, "step": 20811 }, { "epoch": 0.6076318939593005, "grad_norm": 0.6575372654572398, "learning_rate": 8.71954582319546e-06, "loss": 0.6305, "step": 20812 }, { "epoch": 0.6076610901871479, "grad_norm": 0.6983408535899359, "learning_rate": 8.71889699918897e-06, "loss": 0.6944, "step": 20813 }, { "epoch": 0.6076902864149952, "grad_norm": 0.6620283054866537, "learning_rate": 8.718248175182482e-06, "loss": 0.6319, "step": 20814 }, { "epoch": 0.6077194826428426, "grad_norm": 0.6428640083839763, "learning_rate": 8.717599351175994e-06, "loss": 0.6487, "step": 20815 }, { "epoch": 0.60774867887069, "grad_norm": 0.6627076237508569, "learning_rate": 8.716950527169506e-06, "loss": 0.6255, "step": 20816 }, { "epoch": 0.6077778750985373, "grad_norm": 0.6283639253528447, "learning_rate": 8.716301703163018e-06, "loss": 0.5771, "step": 20817 }, { "epoch": 0.6078070713263847, "grad_norm": 0.5808002998443801, "learning_rate": 8.71565287915653e-06, "loss": 0.4992, "step": 20818 }, { "epoch": 0.607836267554232, "grad_norm": 0.6346092842471438, "learning_rate": 8.715004055150042e-06, "loss": 0.6052, "step": 20819 }, { "epoch": 0.6078654637820794, "grad_norm": 0.6087395626463715, "learning_rate": 8.714355231143552e-06, "loss": 0.573, "step": 20820 }, { "epoch": 0.6078946600099268, "grad_norm": 0.584703958823009, "learning_rate": 8.713706407137064e-06, "loss": 0.5406, "step": 20821 }, { "epoch": 0.6079238562377741, "grad_norm": 0.6555196252117861, "learning_rate": 8.713057583130577e-06, "loss": 0.5947, "step": 20822 }, { "epoch": 0.6079530524656215, "grad_norm": 0.6309607764891255, "learning_rate": 8.712408759124089e-06, "loss": 0.6077, "step": 20823 }, { "epoch": 0.6079822486934688, "grad_norm": 0.6023963079236715, "learning_rate": 8.711759935117599e-06, "loss": 0.5651, "step": 20824 }, { "epoch": 0.6080114449213162, "grad_norm": 0.6499839169182846, "learning_rate": 8.711111111111111e-06, "loss": 0.6516, "step": 20825 }, { "epoch": 0.6080406411491636, "grad_norm": 0.6839438339171825, "learning_rate": 8.710462287104625e-06, "loss": 0.6337, "step": 20826 }, { "epoch": 0.6080698373770109, "grad_norm": 0.6649596628584599, "learning_rate": 8.709813463098135e-06, "loss": 0.5756, "step": 20827 }, { "epoch": 0.6080990336048583, "grad_norm": 0.7048182730911315, "learning_rate": 8.709164639091647e-06, "loss": 0.6703, "step": 20828 }, { "epoch": 0.6081282298327056, "grad_norm": 0.6483216081103915, "learning_rate": 8.708515815085159e-06, "loss": 0.608, "step": 20829 }, { "epoch": 0.608157426060553, "grad_norm": 0.6243028203159742, "learning_rate": 8.707866991078671e-06, "loss": 0.6187, "step": 20830 }, { "epoch": 0.6081866222884004, "grad_norm": 0.6706390112503917, "learning_rate": 8.707218167072183e-06, "loss": 0.6776, "step": 20831 }, { "epoch": 0.6082158185162477, "grad_norm": 0.6778948849647338, "learning_rate": 8.706569343065693e-06, "loss": 0.6618, "step": 20832 }, { "epoch": 0.6082450147440951, "grad_norm": 0.6900858645860672, "learning_rate": 8.705920519059205e-06, "loss": 0.6699, "step": 20833 }, { "epoch": 0.6082742109719425, "grad_norm": 0.6726434857812271, "learning_rate": 8.705271695052717e-06, "loss": 0.6723, "step": 20834 }, { "epoch": 0.6083034071997898, "grad_norm": 0.6560257818053217, "learning_rate": 8.70462287104623e-06, "loss": 0.4977, "step": 20835 }, { "epoch": 0.6083326034276372, "grad_norm": 0.6877097538625095, "learning_rate": 8.703974047039742e-06, "loss": 0.7007, "step": 20836 }, { "epoch": 0.6083617996554845, "grad_norm": 0.6702552895539864, "learning_rate": 8.703325223033254e-06, "loss": 0.6473, "step": 20837 }, { "epoch": 0.6083909958833319, "grad_norm": 0.6657178804025711, "learning_rate": 8.702676399026766e-06, "loss": 0.7078, "step": 20838 }, { "epoch": 0.6084201921111793, "grad_norm": 0.6840616644058809, "learning_rate": 8.702027575020276e-06, "loss": 0.65, "step": 20839 }, { "epoch": 0.6084493883390266, "grad_norm": 0.6259282070517382, "learning_rate": 8.701378751013788e-06, "loss": 0.5735, "step": 20840 }, { "epoch": 0.608478584566874, "grad_norm": 0.645372894560257, "learning_rate": 8.7007299270073e-06, "loss": 0.6601, "step": 20841 }, { "epoch": 0.6085077807947213, "grad_norm": 0.6172610145393168, "learning_rate": 8.700081103000812e-06, "loss": 0.5516, "step": 20842 }, { "epoch": 0.6085369770225687, "grad_norm": 0.6129291733134674, "learning_rate": 8.699432278994322e-06, "loss": 0.5622, "step": 20843 }, { "epoch": 0.6085661732504161, "grad_norm": 0.6435727669376772, "learning_rate": 8.698783454987834e-06, "loss": 0.6293, "step": 20844 }, { "epoch": 0.6085953694782634, "grad_norm": 0.6953792425526457, "learning_rate": 8.698134630981348e-06, "loss": 0.7432, "step": 20845 }, { "epoch": 0.6086245657061108, "grad_norm": 0.6512902996239577, "learning_rate": 8.697485806974858e-06, "loss": 0.6271, "step": 20846 }, { "epoch": 0.6086537619339581, "grad_norm": 0.627151098958385, "learning_rate": 8.69683698296837e-06, "loss": 0.5568, "step": 20847 }, { "epoch": 0.6086829581618055, "grad_norm": 0.7005342805091999, "learning_rate": 8.696188158961882e-06, "loss": 0.7209, "step": 20848 }, { "epoch": 0.6087121543896529, "grad_norm": 0.6987977665089067, "learning_rate": 8.695539334955394e-06, "loss": 0.6777, "step": 20849 }, { "epoch": 0.6087413506175002, "grad_norm": 0.6289639524510735, "learning_rate": 8.694890510948907e-06, "loss": 0.6291, "step": 20850 }, { "epoch": 0.6087705468453476, "grad_norm": 0.6125310430786108, "learning_rate": 8.694241686942417e-06, "loss": 0.5764, "step": 20851 }, { "epoch": 0.608799743073195, "grad_norm": 0.6165155858354555, "learning_rate": 8.693592862935929e-06, "loss": 0.5788, "step": 20852 }, { "epoch": 0.6088289393010423, "grad_norm": 0.6317709334038034, "learning_rate": 8.692944038929441e-06, "loss": 0.5823, "step": 20853 }, { "epoch": 0.6088581355288897, "grad_norm": 0.6387168171818106, "learning_rate": 8.692295214922953e-06, "loss": 0.6019, "step": 20854 }, { "epoch": 0.608887331756737, "grad_norm": 0.6202120035570717, "learning_rate": 8.691646390916465e-06, "loss": 0.5488, "step": 20855 }, { "epoch": 0.6089165279845844, "grad_norm": 0.6387664052808707, "learning_rate": 8.690997566909977e-06, "loss": 0.5794, "step": 20856 }, { "epoch": 0.6089457242124318, "grad_norm": 0.6621950323163746, "learning_rate": 8.690348742903489e-06, "loss": 0.6225, "step": 20857 }, { "epoch": 0.6089749204402791, "grad_norm": 0.6922127126745239, "learning_rate": 8.689699918897e-06, "loss": 0.6708, "step": 20858 }, { "epoch": 0.6090041166681265, "grad_norm": 0.6932899157579799, "learning_rate": 8.689051094890511e-06, "loss": 0.6779, "step": 20859 }, { "epoch": 0.6090333128959738, "grad_norm": 0.5784364167697443, "learning_rate": 8.688402270884023e-06, "loss": 0.4787, "step": 20860 }, { "epoch": 0.6090625091238212, "grad_norm": 0.6218743200904746, "learning_rate": 8.687753446877535e-06, "loss": 0.5459, "step": 20861 }, { "epoch": 0.6090917053516686, "grad_norm": 0.6629461773686496, "learning_rate": 8.687104622871046e-06, "loss": 0.652, "step": 20862 }, { "epoch": 0.6091209015795159, "grad_norm": 0.6804602940023766, "learning_rate": 8.68645579886456e-06, "loss": 0.6195, "step": 20863 }, { "epoch": 0.6091500978073633, "grad_norm": 0.6385520915271484, "learning_rate": 8.685806974858071e-06, "loss": 0.6056, "step": 20864 }, { "epoch": 0.6091792940352107, "grad_norm": 0.6927791110178593, "learning_rate": 8.685158150851582e-06, "loss": 0.6732, "step": 20865 }, { "epoch": 0.609208490263058, "grad_norm": 0.6162524242387197, "learning_rate": 8.684509326845094e-06, "loss": 0.5744, "step": 20866 }, { "epoch": 0.6092376864909054, "grad_norm": 0.5726021774536924, "learning_rate": 8.683860502838606e-06, "loss": 0.5089, "step": 20867 }, { "epoch": 0.6092668827187527, "grad_norm": 0.7059561986048196, "learning_rate": 8.683211678832118e-06, "loss": 0.6577, "step": 20868 }, { "epoch": 0.6092960789466001, "grad_norm": 0.6315376187101575, "learning_rate": 8.68256285482563e-06, "loss": 0.5994, "step": 20869 }, { "epoch": 0.6093252751744475, "grad_norm": 0.6528219891193354, "learning_rate": 8.68191403081914e-06, "loss": 0.6102, "step": 20870 }, { "epoch": 0.6093544714022948, "grad_norm": 0.6265122189452222, "learning_rate": 8.681265206812652e-06, "loss": 0.5767, "step": 20871 }, { "epoch": 0.6093836676301422, "grad_norm": 0.6286110006016401, "learning_rate": 8.680616382806164e-06, "loss": 0.5822, "step": 20872 }, { "epoch": 0.6094128638579895, "grad_norm": 0.6606120094559322, "learning_rate": 8.679967558799676e-06, "loss": 0.64, "step": 20873 }, { "epoch": 0.6094420600858369, "grad_norm": 0.618179693617935, "learning_rate": 8.679318734793188e-06, "loss": 0.5787, "step": 20874 }, { "epoch": 0.6094712563136843, "grad_norm": 0.5921572363584159, "learning_rate": 8.6786699107867e-06, "loss": 0.5709, "step": 20875 }, { "epoch": 0.6095004525415316, "grad_norm": 0.6745292120358818, "learning_rate": 8.678021086780212e-06, "loss": 0.6683, "step": 20876 }, { "epoch": 0.609529648769379, "grad_norm": 0.6144811918490477, "learning_rate": 8.677372262773723e-06, "loss": 0.5853, "step": 20877 }, { "epoch": 0.6095588449972263, "grad_norm": 0.6469886319963668, "learning_rate": 8.676723438767235e-06, "loss": 0.591, "step": 20878 }, { "epoch": 0.6095880412250737, "grad_norm": 0.612037548915798, "learning_rate": 8.676074614760747e-06, "loss": 0.5808, "step": 20879 }, { "epoch": 0.6096172374529211, "grad_norm": 0.6500430106861315, "learning_rate": 8.675425790754259e-06, "loss": 0.5608, "step": 20880 }, { "epoch": 0.6096464336807684, "grad_norm": 0.6495286632970951, "learning_rate": 8.67477696674777e-06, "loss": 0.6161, "step": 20881 }, { "epoch": 0.6096756299086158, "grad_norm": 0.6656740290934255, "learning_rate": 8.674128142741283e-06, "loss": 0.6808, "step": 20882 }, { "epoch": 0.6097048261364632, "grad_norm": 0.6488769750891987, "learning_rate": 8.673479318734795e-06, "loss": 0.5896, "step": 20883 }, { "epoch": 0.6097340223643105, "grad_norm": 0.6247803303790496, "learning_rate": 8.672830494728305e-06, "loss": 0.5592, "step": 20884 }, { "epoch": 0.6097632185921579, "grad_norm": 0.6292638322341166, "learning_rate": 8.672181670721817e-06, "loss": 0.5351, "step": 20885 }, { "epoch": 0.6097924148200052, "grad_norm": 0.6214051694325115, "learning_rate": 8.67153284671533e-06, "loss": 0.5657, "step": 20886 }, { "epoch": 0.6098216110478526, "grad_norm": 0.6811041522962936, "learning_rate": 8.670884022708841e-06, "loss": 0.6698, "step": 20887 }, { "epoch": 0.6098508072757, "grad_norm": 0.7071670570281354, "learning_rate": 8.670235198702353e-06, "loss": 0.6819, "step": 20888 }, { "epoch": 0.6098800035035473, "grad_norm": 0.6805198766397614, "learning_rate": 8.669586374695864e-06, "loss": 0.6816, "step": 20889 }, { "epoch": 0.6099091997313947, "grad_norm": 0.6666601561368108, "learning_rate": 8.668937550689376e-06, "loss": 0.6402, "step": 20890 }, { "epoch": 0.609938395959242, "grad_norm": 0.6325485643822109, "learning_rate": 8.668288726682888e-06, "loss": 0.5934, "step": 20891 }, { "epoch": 0.6099675921870894, "grad_norm": 0.6048546428299427, "learning_rate": 8.6676399026764e-06, "loss": 0.5458, "step": 20892 }, { "epoch": 0.6099967884149368, "grad_norm": 0.6541763039490492, "learning_rate": 8.666991078669912e-06, "loss": 0.6422, "step": 20893 }, { "epoch": 0.6100259846427841, "grad_norm": 0.6482106092109307, "learning_rate": 8.666342254663424e-06, "loss": 0.6128, "step": 20894 }, { "epoch": 0.6100551808706315, "grad_norm": 0.6958690650245802, "learning_rate": 8.665693430656936e-06, "loss": 0.6937, "step": 20895 }, { "epoch": 0.6100843770984788, "grad_norm": 0.6493010352991428, "learning_rate": 8.665044606650446e-06, "loss": 0.5591, "step": 20896 }, { "epoch": 0.6101135733263262, "grad_norm": 0.6274241912395614, "learning_rate": 8.664395782643958e-06, "loss": 0.5974, "step": 20897 }, { "epoch": 0.6101427695541736, "grad_norm": 0.6346305024840134, "learning_rate": 8.66374695863747e-06, "loss": 0.5839, "step": 20898 }, { "epoch": 0.6101719657820209, "grad_norm": 0.6349924758279559, "learning_rate": 8.663098134630982e-06, "loss": 0.6028, "step": 20899 }, { "epoch": 0.6102011620098683, "grad_norm": 0.6172281886964325, "learning_rate": 8.662449310624493e-06, "loss": 0.6078, "step": 20900 }, { "epoch": 0.6102303582377157, "grad_norm": 0.6551218532557764, "learning_rate": 8.661800486618006e-06, "loss": 0.6354, "step": 20901 }, { "epoch": 0.610259554465563, "grad_norm": 0.6798519683654248, "learning_rate": 8.661151662611518e-06, "loss": 0.6544, "step": 20902 }, { "epoch": 0.6102887506934104, "grad_norm": 0.6697749226907631, "learning_rate": 8.660502838605029e-06, "loss": 0.6776, "step": 20903 }, { "epoch": 0.6103179469212577, "grad_norm": 0.6372587634967324, "learning_rate": 8.65985401459854e-06, "loss": 0.5898, "step": 20904 }, { "epoch": 0.6103471431491051, "grad_norm": 0.633415810725238, "learning_rate": 8.659205190592053e-06, "loss": 0.6239, "step": 20905 }, { "epoch": 0.6103763393769525, "grad_norm": 0.6368364945228796, "learning_rate": 8.658556366585565e-06, "loss": 0.573, "step": 20906 }, { "epoch": 0.6104055356047998, "grad_norm": 0.6604459002127689, "learning_rate": 8.657907542579075e-06, "loss": 0.623, "step": 20907 }, { "epoch": 0.6104347318326472, "grad_norm": 0.6520859739366033, "learning_rate": 8.657258718572587e-06, "loss": 0.6285, "step": 20908 }, { "epoch": 0.6104639280604945, "grad_norm": 0.6789085688617725, "learning_rate": 8.6566098945661e-06, "loss": 0.67, "step": 20909 }, { "epoch": 0.6104931242883419, "grad_norm": 0.7002027221066811, "learning_rate": 8.655961070559611e-06, "loss": 0.6836, "step": 20910 }, { "epoch": 0.6105223205161893, "grad_norm": 0.6725555728605136, "learning_rate": 8.655312246553123e-06, "loss": 0.6404, "step": 20911 }, { "epoch": 0.6105515167440366, "grad_norm": 0.6720989427786677, "learning_rate": 8.654663422546635e-06, "loss": 0.6618, "step": 20912 }, { "epoch": 0.610580712971884, "grad_norm": 0.633913323813558, "learning_rate": 8.654014598540147e-06, "loss": 0.6032, "step": 20913 }, { "epoch": 0.6106099091997313, "grad_norm": 0.6620703455777933, "learning_rate": 8.65336577453366e-06, "loss": 0.6191, "step": 20914 }, { "epoch": 0.6106391054275787, "grad_norm": 0.664510627679564, "learning_rate": 8.65271695052717e-06, "loss": 0.6273, "step": 20915 }, { "epoch": 0.6106683016554261, "grad_norm": 0.6787478073180999, "learning_rate": 8.652068126520682e-06, "loss": 0.6504, "step": 20916 }, { "epoch": 0.6106974978832734, "grad_norm": 0.6248080985106063, "learning_rate": 8.651419302514194e-06, "loss": 0.5801, "step": 20917 }, { "epoch": 0.6107266941111208, "grad_norm": 0.7183990993812889, "learning_rate": 8.650770478507706e-06, "loss": 0.7345, "step": 20918 }, { "epoch": 0.6107558903389682, "grad_norm": 0.6239960244482232, "learning_rate": 8.650121654501216e-06, "loss": 0.5739, "step": 20919 }, { "epoch": 0.6107850865668155, "grad_norm": 0.6220190371062473, "learning_rate": 8.64947283049473e-06, "loss": 0.5756, "step": 20920 }, { "epoch": 0.6108142827946629, "grad_norm": 0.6467637626724372, "learning_rate": 8.648824006488242e-06, "loss": 0.6308, "step": 20921 }, { "epoch": 0.6108434790225102, "grad_norm": 0.7088687164181763, "learning_rate": 8.648175182481752e-06, "loss": 0.7266, "step": 20922 }, { "epoch": 0.6108726752503576, "grad_norm": 0.6696722095795452, "learning_rate": 8.647526358475264e-06, "loss": 0.6358, "step": 20923 }, { "epoch": 0.610901871478205, "grad_norm": 0.6580623733071781, "learning_rate": 8.646877534468776e-06, "loss": 0.6428, "step": 20924 }, { "epoch": 0.6109310677060524, "grad_norm": 0.6623077530390311, "learning_rate": 8.646228710462288e-06, "loss": 0.6279, "step": 20925 }, { "epoch": 0.6109602639338998, "grad_norm": 0.6140908323087746, "learning_rate": 8.645579886455799e-06, "loss": 0.5556, "step": 20926 }, { "epoch": 0.6109894601617472, "grad_norm": 0.5935492605775249, "learning_rate": 8.64493106244931e-06, "loss": 0.5621, "step": 20927 }, { "epoch": 0.6110186563895945, "grad_norm": 0.6708658570514306, "learning_rate": 8.644282238442823e-06, "loss": 0.6524, "step": 20928 }, { "epoch": 0.6110478526174419, "grad_norm": 0.6497815310180688, "learning_rate": 8.643633414436335e-06, "loss": 0.6202, "step": 20929 }, { "epoch": 0.6110770488452892, "grad_norm": 0.6519764617838356, "learning_rate": 8.642984590429847e-06, "loss": 0.6259, "step": 20930 }, { "epoch": 0.6111062450731366, "grad_norm": 0.6272550534886954, "learning_rate": 8.642335766423359e-06, "loss": 0.5814, "step": 20931 }, { "epoch": 0.611135441300984, "grad_norm": 0.5703921080932369, "learning_rate": 8.64168694241687e-06, "loss": 0.4624, "step": 20932 }, { "epoch": 0.6111646375288313, "grad_norm": 0.6439279309125304, "learning_rate": 8.641038118410383e-06, "loss": 0.6214, "step": 20933 }, { "epoch": 0.6111938337566787, "grad_norm": 0.6594145669728627, "learning_rate": 8.640389294403893e-06, "loss": 0.6343, "step": 20934 }, { "epoch": 0.611223029984526, "grad_norm": 0.6173952409312617, "learning_rate": 8.639740470397405e-06, "loss": 0.5974, "step": 20935 }, { "epoch": 0.6112522262123734, "grad_norm": 0.6193560518075291, "learning_rate": 8.639091646390917e-06, "loss": 0.5497, "step": 20936 }, { "epoch": 0.6112814224402208, "grad_norm": 0.6258787456187934, "learning_rate": 8.638442822384429e-06, "loss": 0.5535, "step": 20937 }, { "epoch": 0.6113106186680681, "grad_norm": 0.6542375466039451, "learning_rate": 8.63779399837794e-06, "loss": 0.6558, "step": 20938 }, { "epoch": 0.6113398148959155, "grad_norm": 0.6637893518699877, "learning_rate": 8.637145174371453e-06, "loss": 0.5868, "step": 20939 }, { "epoch": 0.6113690111237629, "grad_norm": 0.6505732221459906, "learning_rate": 8.636496350364965e-06, "loss": 0.6116, "step": 20940 }, { "epoch": 0.6113982073516102, "grad_norm": 0.6565482120279372, "learning_rate": 8.635847526358476e-06, "loss": 0.5978, "step": 20941 }, { "epoch": 0.6114274035794576, "grad_norm": 0.6097340791598869, "learning_rate": 8.635198702351988e-06, "loss": 0.5284, "step": 20942 }, { "epoch": 0.6114565998073049, "grad_norm": 0.5875951348599274, "learning_rate": 8.6345498783455e-06, "loss": 0.4891, "step": 20943 }, { "epoch": 0.6114857960351523, "grad_norm": 0.6274701380832788, "learning_rate": 8.633901054339012e-06, "loss": 0.5906, "step": 20944 }, { "epoch": 0.6115149922629997, "grad_norm": 0.6042446122086758, "learning_rate": 8.633252230332522e-06, "loss": 0.5708, "step": 20945 }, { "epoch": 0.611544188490847, "grad_norm": 0.6184213706203955, "learning_rate": 8.632603406326034e-06, "loss": 0.5624, "step": 20946 }, { "epoch": 0.6115733847186944, "grad_norm": 0.6051730293674463, "learning_rate": 8.631954582319546e-06, "loss": 0.5475, "step": 20947 }, { "epoch": 0.6116025809465417, "grad_norm": 0.6345845673685059, "learning_rate": 8.631305758313058e-06, "loss": 0.6113, "step": 20948 }, { "epoch": 0.6116317771743891, "grad_norm": 0.6771420518910362, "learning_rate": 8.63065693430657e-06, "loss": 0.6622, "step": 20949 }, { "epoch": 0.6116609734022365, "grad_norm": 0.666703402763446, "learning_rate": 8.630008110300082e-06, "loss": 0.6807, "step": 20950 }, { "epoch": 0.6116901696300838, "grad_norm": 0.632280982229534, "learning_rate": 8.629359286293594e-06, "loss": 0.5715, "step": 20951 }, { "epoch": 0.6117193658579312, "grad_norm": 0.6290599339840476, "learning_rate": 8.628710462287106e-06, "loss": 0.561, "step": 20952 }, { "epoch": 0.6117485620857785, "grad_norm": 0.6681692808058226, "learning_rate": 8.628061638280617e-06, "loss": 0.6467, "step": 20953 }, { "epoch": 0.6117777583136259, "grad_norm": 0.6423653394505514, "learning_rate": 8.627412814274129e-06, "loss": 0.6116, "step": 20954 }, { "epoch": 0.6118069545414733, "grad_norm": 0.6786469517504888, "learning_rate": 8.62676399026764e-06, "loss": 0.6465, "step": 20955 }, { "epoch": 0.6118361507693206, "grad_norm": 0.642792073852244, "learning_rate": 8.626115166261153e-06, "loss": 0.6601, "step": 20956 }, { "epoch": 0.611865346997168, "grad_norm": 0.6620616049903885, "learning_rate": 8.625466342254663e-06, "loss": 0.6452, "step": 20957 }, { "epoch": 0.6118945432250154, "grad_norm": 0.6134453879343039, "learning_rate": 8.624817518248177e-06, "loss": 0.5205, "step": 20958 }, { "epoch": 0.6119237394528627, "grad_norm": 0.6525949991299592, "learning_rate": 8.624168694241689e-06, "loss": 0.6069, "step": 20959 }, { "epoch": 0.6119529356807101, "grad_norm": 0.6337258421274277, "learning_rate": 8.623519870235199e-06, "loss": 0.5773, "step": 20960 }, { "epoch": 0.6119821319085574, "grad_norm": 0.6411735648022288, "learning_rate": 8.622871046228711e-06, "loss": 0.5816, "step": 20961 }, { "epoch": 0.6120113281364048, "grad_norm": 0.6830928478155082, "learning_rate": 8.622222222222223e-06, "loss": 0.6143, "step": 20962 }, { "epoch": 0.6120405243642522, "grad_norm": 0.6289584146723941, "learning_rate": 8.621573398215735e-06, "loss": 0.5818, "step": 20963 }, { "epoch": 0.6120697205920995, "grad_norm": 0.6262056939648677, "learning_rate": 8.620924574209245e-06, "loss": 0.5859, "step": 20964 }, { "epoch": 0.6120989168199469, "grad_norm": 0.6294012830537511, "learning_rate": 8.620275750202757e-06, "loss": 0.5686, "step": 20965 }, { "epoch": 0.6121281130477942, "grad_norm": 0.6088979540295083, "learning_rate": 8.61962692619627e-06, "loss": 0.5848, "step": 20966 }, { "epoch": 0.6121573092756416, "grad_norm": 0.6549407412292816, "learning_rate": 8.618978102189781e-06, "loss": 0.6392, "step": 20967 }, { "epoch": 0.612186505503489, "grad_norm": 0.6739880378967885, "learning_rate": 8.618329278183294e-06, "loss": 0.6037, "step": 20968 }, { "epoch": 0.6122157017313363, "grad_norm": 0.6691173368868054, "learning_rate": 8.617680454176806e-06, "loss": 0.6359, "step": 20969 }, { "epoch": 0.6122448979591837, "grad_norm": 0.6063269740556427, "learning_rate": 8.617031630170318e-06, "loss": 0.5826, "step": 20970 }, { "epoch": 0.612274094187031, "grad_norm": 0.6703007337456715, "learning_rate": 8.61638280616383e-06, "loss": 0.7044, "step": 20971 }, { "epoch": 0.6123032904148784, "grad_norm": 0.646716208848794, "learning_rate": 8.61573398215734e-06, "loss": 0.5893, "step": 20972 }, { "epoch": 0.6123324866427258, "grad_norm": 0.6488408763505863, "learning_rate": 8.615085158150852e-06, "loss": 0.6216, "step": 20973 }, { "epoch": 0.6123616828705731, "grad_norm": 0.6890749387517151, "learning_rate": 8.614436334144364e-06, "loss": 0.6067, "step": 20974 }, { "epoch": 0.6123908790984205, "grad_norm": 0.627133319391025, "learning_rate": 8.613787510137876e-06, "loss": 0.6307, "step": 20975 }, { "epoch": 0.6124200753262679, "grad_norm": 0.6636796449470062, "learning_rate": 8.613138686131386e-06, "loss": 0.6799, "step": 20976 }, { "epoch": 0.6124492715541152, "grad_norm": 0.6238339127433821, "learning_rate": 8.6124898621249e-06, "loss": 0.6122, "step": 20977 }, { "epoch": 0.6124784677819626, "grad_norm": 0.6900609358168569, "learning_rate": 8.611841038118412e-06, "loss": 0.6799, "step": 20978 }, { "epoch": 0.6125076640098099, "grad_norm": 0.6432294993739406, "learning_rate": 8.611192214111922e-06, "loss": 0.5679, "step": 20979 }, { "epoch": 0.6125368602376573, "grad_norm": 0.5978475041405442, "learning_rate": 8.610543390105434e-06, "loss": 0.5901, "step": 20980 }, { "epoch": 0.6125660564655047, "grad_norm": 0.6121115758128783, "learning_rate": 8.609894566098946e-06, "loss": 0.5415, "step": 20981 }, { "epoch": 0.612595252693352, "grad_norm": 0.7076959777757175, "learning_rate": 8.609245742092459e-06, "loss": 0.7074, "step": 20982 }, { "epoch": 0.6126244489211994, "grad_norm": 0.6641134389765909, "learning_rate": 8.608596918085969e-06, "loss": 0.6496, "step": 20983 }, { "epoch": 0.6126536451490467, "grad_norm": 0.6399960701468174, "learning_rate": 8.607948094079481e-06, "loss": 0.6103, "step": 20984 }, { "epoch": 0.6126828413768941, "grad_norm": 0.7013665990922189, "learning_rate": 8.607299270072993e-06, "loss": 0.701, "step": 20985 }, { "epoch": 0.6127120376047415, "grad_norm": 0.6623246830057274, "learning_rate": 8.606650446066505e-06, "loss": 0.6523, "step": 20986 }, { "epoch": 0.6127412338325888, "grad_norm": 0.6566241731147717, "learning_rate": 8.606001622060017e-06, "loss": 0.6397, "step": 20987 }, { "epoch": 0.6127704300604362, "grad_norm": 0.6291175029113028, "learning_rate": 8.605352798053529e-06, "loss": 0.5852, "step": 20988 }, { "epoch": 0.6127996262882835, "grad_norm": 0.6479518514021114, "learning_rate": 8.604703974047041e-06, "loss": 0.6181, "step": 20989 }, { "epoch": 0.6128288225161309, "grad_norm": 0.696610837317649, "learning_rate": 8.604055150040553e-06, "loss": 0.5766, "step": 20990 }, { "epoch": 0.6128580187439783, "grad_norm": 0.6031093443322376, "learning_rate": 8.603406326034063e-06, "loss": 0.5304, "step": 20991 }, { "epoch": 0.6128872149718256, "grad_norm": 0.7583934667875567, "learning_rate": 8.602757502027575e-06, "loss": 0.7312, "step": 20992 }, { "epoch": 0.612916411199673, "grad_norm": 0.7110877799476177, "learning_rate": 8.602108678021087e-06, "loss": 0.6494, "step": 20993 }, { "epoch": 0.6129456074275204, "grad_norm": 0.6289426846630938, "learning_rate": 8.6014598540146e-06, "loss": 0.5656, "step": 20994 }, { "epoch": 0.6129748036553677, "grad_norm": 0.6352829130353045, "learning_rate": 8.60081103000811e-06, "loss": 0.5761, "step": 20995 }, { "epoch": 0.6130039998832151, "grad_norm": 0.6571952015713067, "learning_rate": 8.600162206001624e-06, "loss": 0.6023, "step": 20996 }, { "epoch": 0.6130331961110624, "grad_norm": 0.6480027508240005, "learning_rate": 8.599513381995136e-06, "loss": 0.5717, "step": 20997 }, { "epoch": 0.6130623923389098, "grad_norm": 0.6403479843230847, "learning_rate": 8.598864557988646e-06, "loss": 0.6037, "step": 20998 }, { "epoch": 0.6130915885667572, "grad_norm": 0.640681995207834, "learning_rate": 8.598215733982158e-06, "loss": 0.6301, "step": 20999 }, { "epoch": 0.6131207847946045, "grad_norm": 0.6569542695187717, "learning_rate": 8.59756690997567e-06, "loss": 0.5985, "step": 21000 }, { "epoch": 0.6131499810224519, "grad_norm": 0.6381512865528529, "learning_rate": 8.596918085969182e-06, "loss": 0.5673, "step": 21001 }, { "epoch": 0.6131791772502992, "grad_norm": 0.6585439338972098, "learning_rate": 8.596269261962692e-06, "loss": 0.6655, "step": 21002 }, { "epoch": 0.6132083734781466, "grad_norm": 0.6539221238877236, "learning_rate": 8.595620437956204e-06, "loss": 0.6349, "step": 21003 }, { "epoch": 0.613237569705994, "grad_norm": 0.7003308833603769, "learning_rate": 8.594971613949716e-06, "loss": 0.6187, "step": 21004 }, { "epoch": 0.6132667659338413, "grad_norm": 0.5892058771736776, "learning_rate": 8.594322789943228e-06, "loss": 0.5304, "step": 21005 }, { "epoch": 0.6132959621616887, "grad_norm": 0.6492700357582106, "learning_rate": 8.59367396593674e-06, "loss": 0.6485, "step": 21006 }, { "epoch": 0.613325158389536, "grad_norm": 0.6011470067400687, "learning_rate": 8.593025141930252e-06, "loss": 0.5199, "step": 21007 }, { "epoch": 0.6133543546173834, "grad_norm": 0.6408777252618877, "learning_rate": 8.592376317923764e-06, "loss": 0.6063, "step": 21008 }, { "epoch": 0.6133835508452308, "grad_norm": 0.7355386099293324, "learning_rate": 8.591727493917276e-06, "loss": 0.6207, "step": 21009 }, { "epoch": 0.6134127470730781, "grad_norm": 0.635819470626973, "learning_rate": 8.591078669910787e-06, "loss": 0.5919, "step": 21010 }, { "epoch": 0.6134419433009255, "grad_norm": 0.6516252446957559, "learning_rate": 8.590429845904299e-06, "loss": 0.5825, "step": 21011 }, { "epoch": 0.6134711395287729, "grad_norm": 0.6696101306653912, "learning_rate": 8.589781021897811e-06, "loss": 0.6126, "step": 21012 }, { "epoch": 0.6135003357566202, "grad_norm": 0.6959583139301039, "learning_rate": 8.589132197891323e-06, "loss": 0.6825, "step": 21013 }, { "epoch": 0.6135295319844676, "grad_norm": 0.6560565732737059, "learning_rate": 8.588483373884835e-06, "loss": 0.5981, "step": 21014 }, { "epoch": 0.6135587282123149, "grad_norm": 0.630519749379965, "learning_rate": 8.587834549878347e-06, "loss": 0.6052, "step": 21015 }, { "epoch": 0.6135879244401623, "grad_norm": 0.6401803783554262, "learning_rate": 8.587185725871859e-06, "loss": 0.587, "step": 21016 }, { "epoch": 0.6136171206680097, "grad_norm": 0.6410442403956336, "learning_rate": 8.58653690186537e-06, "loss": 0.6104, "step": 21017 }, { "epoch": 0.613646316895857, "grad_norm": 0.6242188632000056, "learning_rate": 8.585888077858881e-06, "loss": 0.6069, "step": 21018 }, { "epoch": 0.6136755131237044, "grad_norm": 0.6111130039656665, "learning_rate": 8.585239253852393e-06, "loss": 0.5487, "step": 21019 }, { "epoch": 0.6137047093515517, "grad_norm": 0.6618341667453446, "learning_rate": 8.584590429845905e-06, "loss": 0.6584, "step": 21020 }, { "epoch": 0.6137339055793991, "grad_norm": 0.6516203723254025, "learning_rate": 8.583941605839416e-06, "loss": 0.6351, "step": 21021 }, { "epoch": 0.6137631018072465, "grad_norm": 0.6784460750806789, "learning_rate": 8.583292781832928e-06, "loss": 0.6563, "step": 21022 }, { "epoch": 0.6137922980350938, "grad_norm": 0.615321711588312, "learning_rate": 8.58264395782644e-06, "loss": 0.5925, "step": 21023 }, { "epoch": 0.6138214942629412, "grad_norm": 0.6628508522865972, "learning_rate": 8.581995133819952e-06, "loss": 0.6595, "step": 21024 }, { "epoch": 0.6138506904907886, "grad_norm": 0.6785991631201984, "learning_rate": 8.581346309813464e-06, "loss": 0.5809, "step": 21025 }, { "epoch": 0.6138798867186359, "grad_norm": 0.6729563249387105, "learning_rate": 8.580697485806976e-06, "loss": 0.6607, "step": 21026 }, { "epoch": 0.6139090829464833, "grad_norm": 0.645677804679317, "learning_rate": 8.580048661800488e-06, "loss": 0.634, "step": 21027 }, { "epoch": 0.6139382791743306, "grad_norm": 0.6541997472238943, "learning_rate": 8.579399837794e-06, "loss": 0.6069, "step": 21028 }, { "epoch": 0.613967475402178, "grad_norm": 0.6415810756369302, "learning_rate": 8.57875101378751e-06, "loss": 0.6114, "step": 21029 }, { "epoch": 0.6139966716300254, "grad_norm": 0.6178450003253863, "learning_rate": 8.578102189781022e-06, "loss": 0.6056, "step": 21030 }, { "epoch": 0.6140258678578727, "grad_norm": 0.6925476467559966, "learning_rate": 8.577453365774534e-06, "loss": 0.6975, "step": 21031 }, { "epoch": 0.6140550640857201, "grad_norm": 0.6125219204694525, "learning_rate": 8.576804541768046e-06, "loss": 0.6082, "step": 21032 }, { "epoch": 0.6140842603135674, "grad_norm": 0.6411274495402383, "learning_rate": 8.576155717761558e-06, "loss": 0.6215, "step": 21033 }, { "epoch": 0.6141134565414148, "grad_norm": 0.619229334877434, "learning_rate": 8.57550689375507e-06, "loss": 0.5521, "step": 21034 }, { "epoch": 0.6141426527692622, "grad_norm": 0.6923526336327508, "learning_rate": 8.574858069748582e-06, "loss": 0.6786, "step": 21035 }, { "epoch": 0.6141718489971095, "grad_norm": 0.6530989884525612, "learning_rate": 8.574209245742093e-06, "loss": 0.6629, "step": 21036 }, { "epoch": 0.6142010452249569, "grad_norm": 0.6735677229736852, "learning_rate": 8.573560421735605e-06, "loss": 0.6487, "step": 21037 }, { "epoch": 0.6142302414528042, "grad_norm": 0.6179947735226797, "learning_rate": 8.572911597729117e-06, "loss": 0.5886, "step": 21038 }, { "epoch": 0.6142594376806516, "grad_norm": 0.6271655254226173, "learning_rate": 8.572262773722629e-06, "loss": 0.5575, "step": 21039 }, { "epoch": 0.614288633908499, "grad_norm": 0.6312035610982212, "learning_rate": 8.57161394971614e-06, "loss": 0.605, "step": 21040 }, { "epoch": 0.6143178301363463, "grad_norm": 0.6222375711011846, "learning_rate": 8.570965125709651e-06, "loss": 0.5678, "step": 21041 }, { "epoch": 0.6143470263641937, "grad_norm": 0.6331497420574057, "learning_rate": 8.570316301703163e-06, "loss": 0.5783, "step": 21042 }, { "epoch": 0.614376222592041, "grad_norm": 0.6283424135005259, "learning_rate": 8.569667477696675e-06, "loss": 0.6051, "step": 21043 }, { "epoch": 0.6144054188198884, "grad_norm": 0.6703962065651287, "learning_rate": 8.569018653690187e-06, "loss": 0.6506, "step": 21044 }, { "epoch": 0.6144346150477358, "grad_norm": 0.6207946784206136, "learning_rate": 8.5683698296837e-06, "loss": 0.5944, "step": 21045 }, { "epoch": 0.6144638112755832, "grad_norm": 0.6465536556837687, "learning_rate": 8.567721005677211e-06, "loss": 0.6183, "step": 21046 }, { "epoch": 0.6144930075034306, "grad_norm": 0.6519546275701009, "learning_rate": 8.567072181670723e-06, "loss": 0.6358, "step": 21047 }, { "epoch": 0.614522203731278, "grad_norm": 0.6371770745587622, "learning_rate": 8.566423357664234e-06, "loss": 0.5413, "step": 21048 }, { "epoch": 0.6145513999591253, "grad_norm": 0.6306635014519266, "learning_rate": 8.565774533657746e-06, "loss": 0.5139, "step": 21049 }, { "epoch": 0.6145805961869727, "grad_norm": 0.6599993743299515, "learning_rate": 8.565125709651258e-06, "loss": 0.6929, "step": 21050 }, { "epoch": 0.61460979241482, "grad_norm": 0.6732530528787684, "learning_rate": 8.56447688564477e-06, "loss": 0.659, "step": 21051 }, { "epoch": 0.6146389886426674, "grad_norm": 0.603303694353651, "learning_rate": 8.563828061638282e-06, "loss": 0.554, "step": 21052 }, { "epoch": 0.6146681848705148, "grad_norm": 0.6574111875239296, "learning_rate": 8.563179237631794e-06, "loss": 0.6145, "step": 21053 }, { "epoch": 0.6146973810983621, "grad_norm": 0.6392865268992732, "learning_rate": 8.562530413625306e-06, "loss": 0.6403, "step": 21054 }, { "epoch": 0.6147265773262095, "grad_norm": 0.6403250417353009, "learning_rate": 8.561881589618816e-06, "loss": 0.5938, "step": 21055 }, { "epoch": 0.6147557735540569, "grad_norm": 0.6815707558878682, "learning_rate": 8.561232765612328e-06, "loss": 0.622, "step": 21056 }, { "epoch": 0.6147849697819042, "grad_norm": 0.6047260800606327, "learning_rate": 8.56058394160584e-06, "loss": 0.5664, "step": 21057 }, { "epoch": 0.6148141660097516, "grad_norm": 0.6519888185156888, "learning_rate": 8.559935117599352e-06, "loss": 0.6214, "step": 21058 }, { "epoch": 0.614843362237599, "grad_norm": 0.59363056287911, "learning_rate": 8.559286293592863e-06, "loss": 0.5538, "step": 21059 }, { "epoch": 0.6148725584654463, "grad_norm": 0.738866997084889, "learning_rate": 8.558637469586375e-06, "loss": 0.7046, "step": 21060 }, { "epoch": 0.6149017546932937, "grad_norm": 0.6076485572345539, "learning_rate": 8.557988645579887e-06, "loss": 0.5631, "step": 21061 }, { "epoch": 0.614930950921141, "grad_norm": 0.6517615214921461, "learning_rate": 8.557339821573399e-06, "loss": 0.6687, "step": 21062 }, { "epoch": 0.6149601471489884, "grad_norm": 0.608555986699333, "learning_rate": 8.55669099756691e-06, "loss": 0.603, "step": 21063 }, { "epoch": 0.6149893433768358, "grad_norm": 0.6747714982336156, "learning_rate": 8.556042173560423e-06, "loss": 0.6413, "step": 21064 }, { "epoch": 0.6150185396046831, "grad_norm": 0.6531514226527881, "learning_rate": 8.555393349553935e-06, "loss": 0.6071, "step": 21065 }, { "epoch": 0.6150477358325305, "grad_norm": 0.6064632140964276, "learning_rate": 8.554744525547447e-06, "loss": 0.585, "step": 21066 }, { "epoch": 0.6150769320603778, "grad_norm": 0.5941161922809457, "learning_rate": 8.554095701540957e-06, "loss": 0.5074, "step": 21067 }, { "epoch": 0.6151061282882252, "grad_norm": 0.6305588511063981, "learning_rate": 8.553446877534469e-06, "loss": 0.5954, "step": 21068 }, { "epoch": 0.6151353245160726, "grad_norm": 0.6083415940193081, "learning_rate": 8.552798053527981e-06, "loss": 0.5935, "step": 21069 }, { "epoch": 0.6151645207439199, "grad_norm": 0.6428648060383026, "learning_rate": 8.552149229521493e-06, "loss": 0.5641, "step": 21070 }, { "epoch": 0.6151937169717673, "grad_norm": 0.634965294180295, "learning_rate": 8.551500405515005e-06, "loss": 0.5816, "step": 21071 }, { "epoch": 0.6152229131996146, "grad_norm": 0.6768962160516266, "learning_rate": 8.550851581508517e-06, "loss": 0.6453, "step": 21072 }, { "epoch": 0.615252109427462, "grad_norm": 0.6159819925535559, "learning_rate": 8.55020275750203e-06, "loss": 0.6252, "step": 21073 }, { "epoch": 0.6152813056553094, "grad_norm": 0.6615883105815842, "learning_rate": 8.54955393349554e-06, "loss": 0.6453, "step": 21074 }, { "epoch": 0.6153105018831567, "grad_norm": 0.6606604001578987, "learning_rate": 8.548905109489052e-06, "loss": 0.6355, "step": 21075 }, { "epoch": 0.6153396981110041, "grad_norm": 0.6422082985060652, "learning_rate": 8.548256285482564e-06, "loss": 0.5699, "step": 21076 }, { "epoch": 0.6153688943388514, "grad_norm": 0.6707863818468691, "learning_rate": 8.547607461476076e-06, "loss": 0.6271, "step": 21077 }, { "epoch": 0.6153980905666988, "grad_norm": 0.6043131702397571, "learning_rate": 8.546958637469586e-06, "loss": 0.5881, "step": 21078 }, { "epoch": 0.6154272867945462, "grad_norm": 0.6447241329991303, "learning_rate": 8.546309813463098e-06, "loss": 0.6092, "step": 21079 }, { "epoch": 0.6154564830223935, "grad_norm": 0.6418931313663514, "learning_rate": 8.54566098945661e-06, "loss": 0.5944, "step": 21080 }, { "epoch": 0.6154856792502409, "grad_norm": 0.7024394450645025, "learning_rate": 8.545012165450122e-06, "loss": 0.708, "step": 21081 }, { "epoch": 0.6155148754780883, "grad_norm": 0.6396469784821776, "learning_rate": 8.544363341443634e-06, "loss": 0.6499, "step": 21082 }, { "epoch": 0.6155440717059356, "grad_norm": 0.6539067492323565, "learning_rate": 8.543714517437146e-06, "loss": 0.652, "step": 21083 }, { "epoch": 0.615573267933783, "grad_norm": 0.6617147899313004, "learning_rate": 8.543065693430658e-06, "loss": 0.6659, "step": 21084 }, { "epoch": 0.6156024641616303, "grad_norm": 0.619042073474912, "learning_rate": 8.54241686942417e-06, "loss": 0.5912, "step": 21085 }, { "epoch": 0.6156316603894777, "grad_norm": 0.7047653912234602, "learning_rate": 8.54176804541768e-06, "loss": 0.7263, "step": 21086 }, { "epoch": 0.6156608566173251, "grad_norm": 0.6482709171548, "learning_rate": 8.541119221411193e-06, "loss": 0.6034, "step": 21087 }, { "epoch": 0.6156900528451724, "grad_norm": 0.6524984121461633, "learning_rate": 8.540470397404705e-06, "loss": 0.6195, "step": 21088 }, { "epoch": 0.6157192490730198, "grad_norm": 0.6295978484738618, "learning_rate": 8.539821573398217e-06, "loss": 0.5627, "step": 21089 }, { "epoch": 0.6157484453008671, "grad_norm": 0.6115407780114991, "learning_rate": 8.539172749391729e-06, "loss": 0.5584, "step": 21090 }, { "epoch": 0.6157776415287145, "grad_norm": 0.6351219980661085, "learning_rate": 8.53852392538524e-06, "loss": 0.5739, "step": 21091 }, { "epoch": 0.6158068377565619, "grad_norm": 0.6134180760745948, "learning_rate": 8.537875101378753e-06, "loss": 0.5729, "step": 21092 }, { "epoch": 0.6158360339844092, "grad_norm": 0.6539409225048065, "learning_rate": 8.537226277372263e-06, "loss": 0.6332, "step": 21093 }, { "epoch": 0.6158652302122566, "grad_norm": 0.7137466053454536, "learning_rate": 8.536577453365775e-06, "loss": 0.5641, "step": 21094 }, { "epoch": 0.615894426440104, "grad_norm": 0.7061284761484004, "learning_rate": 8.535928629359287e-06, "loss": 0.6855, "step": 21095 }, { "epoch": 0.6159236226679513, "grad_norm": 0.6497828916280376, "learning_rate": 8.535279805352799e-06, "loss": 0.5673, "step": 21096 }, { "epoch": 0.6159528188957987, "grad_norm": 0.6789584317038398, "learning_rate": 8.53463098134631e-06, "loss": 0.6235, "step": 21097 }, { "epoch": 0.615982015123646, "grad_norm": 0.6943184278982862, "learning_rate": 8.533982157339821e-06, "loss": 0.6991, "step": 21098 }, { "epoch": 0.6160112113514934, "grad_norm": 0.651160888279189, "learning_rate": 8.533333333333335e-06, "loss": 0.5877, "step": 21099 }, { "epoch": 0.6160404075793408, "grad_norm": 0.6006307835529775, "learning_rate": 8.532684509326846e-06, "loss": 0.5035, "step": 21100 }, { "epoch": 0.6160696038071881, "grad_norm": 0.6610709138648224, "learning_rate": 8.532035685320358e-06, "loss": 0.686, "step": 21101 }, { "epoch": 0.6160988000350355, "grad_norm": 0.6485640121432779, "learning_rate": 8.53138686131387e-06, "loss": 0.6285, "step": 21102 }, { "epoch": 0.6161279962628828, "grad_norm": 0.8045270129026533, "learning_rate": 8.530738037307382e-06, "loss": 0.6575, "step": 21103 }, { "epoch": 0.6161571924907302, "grad_norm": 0.5719220994597048, "learning_rate": 8.530089213300892e-06, "loss": 0.5052, "step": 21104 }, { "epoch": 0.6161863887185776, "grad_norm": 0.674788259215221, "learning_rate": 8.529440389294404e-06, "loss": 0.6869, "step": 21105 }, { "epoch": 0.6162155849464249, "grad_norm": 0.706860429149634, "learning_rate": 8.528791565287916e-06, "loss": 0.64, "step": 21106 }, { "epoch": 0.6162447811742723, "grad_norm": 0.6130442137276947, "learning_rate": 8.528142741281428e-06, "loss": 0.5755, "step": 21107 }, { "epoch": 0.6162739774021196, "grad_norm": 0.64906587856795, "learning_rate": 8.52749391727494e-06, "loss": 0.5853, "step": 21108 }, { "epoch": 0.616303173629967, "grad_norm": 0.6428545124260759, "learning_rate": 8.526845093268452e-06, "loss": 0.6127, "step": 21109 }, { "epoch": 0.6163323698578144, "grad_norm": 0.694664390939099, "learning_rate": 8.526196269261964e-06, "loss": 0.6648, "step": 21110 }, { "epoch": 0.6163615660856617, "grad_norm": 0.6283842399185239, "learning_rate": 8.525547445255476e-06, "loss": 0.578, "step": 21111 }, { "epoch": 0.6163907623135091, "grad_norm": 0.6287247218104203, "learning_rate": 8.524898621248986e-06, "loss": 0.5943, "step": 21112 }, { "epoch": 0.6164199585413564, "grad_norm": 0.6268199832027389, "learning_rate": 8.524249797242499e-06, "loss": 0.6062, "step": 21113 }, { "epoch": 0.6164491547692038, "grad_norm": 0.6467224147180557, "learning_rate": 8.52360097323601e-06, "loss": 0.659, "step": 21114 }, { "epoch": 0.6164783509970512, "grad_norm": 0.651004337335557, "learning_rate": 8.522952149229523e-06, "loss": 0.6279, "step": 21115 }, { "epoch": 0.6165075472248985, "grad_norm": 0.6524039109737493, "learning_rate": 8.522303325223033e-06, "loss": 0.6295, "step": 21116 }, { "epoch": 0.6165367434527459, "grad_norm": 0.6489127587813406, "learning_rate": 8.521654501216545e-06, "loss": 0.6565, "step": 21117 }, { "epoch": 0.6165659396805933, "grad_norm": 0.6207156020802628, "learning_rate": 8.521005677210059e-06, "loss": 0.557, "step": 21118 }, { "epoch": 0.6165951359084406, "grad_norm": 0.6418187049741251, "learning_rate": 8.520356853203569e-06, "loss": 0.5785, "step": 21119 }, { "epoch": 0.616624332136288, "grad_norm": 0.6479255807795273, "learning_rate": 8.519708029197081e-06, "loss": 0.6088, "step": 21120 }, { "epoch": 0.6166535283641353, "grad_norm": 0.6046391202522541, "learning_rate": 8.519059205190593e-06, "loss": 0.5459, "step": 21121 }, { "epoch": 0.6166827245919827, "grad_norm": 0.6399520553013338, "learning_rate": 8.518410381184105e-06, "loss": 0.5704, "step": 21122 }, { "epoch": 0.6167119208198301, "grad_norm": 0.6251804840293176, "learning_rate": 8.517761557177615e-06, "loss": 0.6091, "step": 21123 }, { "epoch": 0.6167411170476774, "grad_norm": 0.637380476606731, "learning_rate": 8.517112733171127e-06, "loss": 0.5794, "step": 21124 }, { "epoch": 0.6167703132755248, "grad_norm": 0.6134671984671635, "learning_rate": 8.51646390916464e-06, "loss": 0.5647, "step": 21125 }, { "epoch": 0.6167995095033721, "grad_norm": 0.6470321799669506, "learning_rate": 8.515815085158151e-06, "loss": 0.6265, "step": 21126 }, { "epoch": 0.6168287057312195, "grad_norm": 0.6993465711398762, "learning_rate": 8.515166261151663e-06, "loss": 0.6351, "step": 21127 }, { "epoch": 0.6168579019590669, "grad_norm": 0.6700550707971232, "learning_rate": 8.514517437145176e-06, "loss": 0.6111, "step": 21128 }, { "epoch": 0.6168870981869142, "grad_norm": 0.6087057463898994, "learning_rate": 8.513868613138688e-06, "loss": 0.5668, "step": 21129 }, { "epoch": 0.6169162944147616, "grad_norm": 0.6053047168192581, "learning_rate": 8.5132197891322e-06, "loss": 0.5559, "step": 21130 }, { "epoch": 0.616945490642609, "grad_norm": 0.7251785479512668, "learning_rate": 8.51257096512571e-06, "loss": 0.6124, "step": 21131 }, { "epoch": 0.6169746868704563, "grad_norm": 0.7121654662881169, "learning_rate": 8.511922141119222e-06, "loss": 0.6324, "step": 21132 }, { "epoch": 0.6170038830983037, "grad_norm": 0.5844682939158364, "learning_rate": 8.511273317112734e-06, "loss": 0.5435, "step": 21133 }, { "epoch": 0.617033079326151, "grad_norm": 0.6686226769610424, "learning_rate": 8.510624493106246e-06, "loss": 0.6662, "step": 21134 }, { "epoch": 0.6170622755539984, "grad_norm": 0.6303480858505066, "learning_rate": 8.509975669099756e-06, "loss": 0.5589, "step": 21135 }, { "epoch": 0.6170914717818458, "grad_norm": 0.6158097819205939, "learning_rate": 8.509326845093268e-06, "loss": 0.5675, "step": 21136 }, { "epoch": 0.6171206680096931, "grad_norm": 0.6511041158477212, "learning_rate": 8.508678021086782e-06, "loss": 0.6503, "step": 21137 }, { "epoch": 0.6171498642375405, "grad_norm": 0.624953049111615, "learning_rate": 8.508029197080292e-06, "loss": 0.5945, "step": 21138 }, { "epoch": 0.6171790604653878, "grad_norm": 0.6694519663776796, "learning_rate": 8.507380373073804e-06, "loss": 0.6366, "step": 21139 }, { "epoch": 0.6172082566932352, "grad_norm": 0.6512109970794823, "learning_rate": 8.506731549067316e-06, "loss": 0.6023, "step": 21140 }, { "epoch": 0.6172374529210826, "grad_norm": 0.6749806911900443, "learning_rate": 8.506082725060828e-06, "loss": 0.6377, "step": 21141 }, { "epoch": 0.6172666491489299, "grad_norm": 0.62872397376522, "learning_rate": 8.505433901054339e-06, "loss": 0.6046, "step": 21142 }, { "epoch": 0.6172958453767773, "grad_norm": 0.66582588690694, "learning_rate": 8.504785077047851e-06, "loss": 0.6833, "step": 21143 }, { "epoch": 0.6173250416046246, "grad_norm": 0.6122702761364145, "learning_rate": 8.504136253041363e-06, "loss": 0.5588, "step": 21144 }, { "epoch": 0.617354237832472, "grad_norm": 0.5865637502372322, "learning_rate": 8.503487429034875e-06, "loss": 0.5411, "step": 21145 }, { "epoch": 0.6173834340603194, "grad_norm": 0.6626030338157498, "learning_rate": 8.502838605028387e-06, "loss": 0.6544, "step": 21146 }, { "epoch": 0.6174126302881667, "grad_norm": 0.6326024581707148, "learning_rate": 8.502189781021899e-06, "loss": 0.6422, "step": 21147 }, { "epoch": 0.6174418265160141, "grad_norm": 0.6232781110142738, "learning_rate": 8.501540957015411e-06, "loss": 0.5925, "step": 21148 }, { "epoch": 0.6174710227438615, "grad_norm": 0.6318567675571165, "learning_rate": 8.500892133008923e-06, "loss": 0.6134, "step": 21149 }, { "epoch": 0.6175002189717088, "grad_norm": 0.6664771429897337, "learning_rate": 8.500243309002433e-06, "loss": 0.6243, "step": 21150 }, { "epoch": 0.6175294151995562, "grad_norm": 0.6915864144932173, "learning_rate": 8.499594484995945e-06, "loss": 0.6659, "step": 21151 }, { "epoch": 0.6175586114274035, "grad_norm": 0.6182329160688985, "learning_rate": 8.498945660989457e-06, "loss": 0.5935, "step": 21152 }, { "epoch": 0.6175878076552509, "grad_norm": 0.6507723688331754, "learning_rate": 8.49829683698297e-06, "loss": 0.6366, "step": 21153 }, { "epoch": 0.6176170038830983, "grad_norm": 0.6125733520681512, "learning_rate": 8.49764801297648e-06, "loss": 0.5694, "step": 21154 }, { "epoch": 0.6176462001109456, "grad_norm": 0.644453410978912, "learning_rate": 8.496999188969992e-06, "loss": 0.6224, "step": 21155 }, { "epoch": 0.617675396338793, "grad_norm": 0.6093581478458786, "learning_rate": 8.496350364963506e-06, "loss": 0.5552, "step": 21156 }, { "epoch": 0.6177045925666403, "grad_norm": 0.6428995611801115, "learning_rate": 8.495701540957016e-06, "loss": 0.5816, "step": 21157 }, { "epoch": 0.6177337887944877, "grad_norm": 0.6369949233630563, "learning_rate": 8.495052716950528e-06, "loss": 0.5609, "step": 21158 }, { "epoch": 0.6177629850223351, "grad_norm": 0.6409221597929332, "learning_rate": 8.49440389294404e-06, "loss": 0.6074, "step": 21159 }, { "epoch": 0.6177921812501824, "grad_norm": 0.6295665692490925, "learning_rate": 8.493755068937552e-06, "loss": 0.6188, "step": 21160 }, { "epoch": 0.6178213774780298, "grad_norm": 0.5718466597715428, "learning_rate": 8.493106244931062e-06, "loss": 0.5221, "step": 21161 }, { "epoch": 0.6178505737058771, "grad_norm": 0.6953020782485348, "learning_rate": 8.492457420924574e-06, "loss": 0.6618, "step": 21162 }, { "epoch": 0.6178797699337245, "grad_norm": 0.6882916552849426, "learning_rate": 8.491808596918086e-06, "loss": 0.5542, "step": 21163 }, { "epoch": 0.6179089661615719, "grad_norm": 0.6147589304784219, "learning_rate": 8.491159772911598e-06, "loss": 0.5764, "step": 21164 }, { "epoch": 0.6179381623894192, "grad_norm": 0.6146045226172189, "learning_rate": 8.49051094890511e-06, "loss": 0.5368, "step": 21165 }, { "epoch": 0.6179673586172667, "grad_norm": 0.7061657920291681, "learning_rate": 8.489862124898622e-06, "loss": 0.6862, "step": 21166 }, { "epoch": 0.6179965548451141, "grad_norm": 0.6455446657854309, "learning_rate": 8.489213300892134e-06, "loss": 0.5833, "step": 21167 }, { "epoch": 0.6180257510729614, "grad_norm": 0.6730400973735187, "learning_rate": 8.488564476885646e-06, "loss": 0.6677, "step": 21168 }, { "epoch": 0.6180549473008088, "grad_norm": 0.6426187091566599, "learning_rate": 8.487915652879157e-06, "loss": 0.5954, "step": 21169 }, { "epoch": 0.6180841435286561, "grad_norm": 0.6369092580406487, "learning_rate": 8.487266828872669e-06, "loss": 0.5948, "step": 21170 }, { "epoch": 0.6181133397565035, "grad_norm": 0.6620815810590926, "learning_rate": 8.48661800486618e-06, "loss": 0.6359, "step": 21171 }, { "epoch": 0.6181425359843509, "grad_norm": 0.6335128833134963, "learning_rate": 8.485969180859693e-06, "loss": 0.563, "step": 21172 }, { "epoch": 0.6181717322121982, "grad_norm": 0.6818517766170622, "learning_rate": 8.485320356853203e-06, "loss": 0.6254, "step": 21173 }, { "epoch": 0.6182009284400456, "grad_norm": 0.605914650049086, "learning_rate": 8.484671532846715e-06, "loss": 0.5284, "step": 21174 }, { "epoch": 0.618230124667893, "grad_norm": 0.6394673264844455, "learning_rate": 8.484022708840229e-06, "loss": 0.5974, "step": 21175 }, { "epoch": 0.6182593208957403, "grad_norm": 0.6214956627109643, "learning_rate": 8.48337388483374e-06, "loss": 0.5315, "step": 21176 }, { "epoch": 0.6182885171235877, "grad_norm": 0.6555194885999389, "learning_rate": 8.482725060827251e-06, "loss": 0.6079, "step": 21177 }, { "epoch": 0.618317713351435, "grad_norm": 0.651224846219019, "learning_rate": 8.482076236820763e-06, "loss": 0.5943, "step": 21178 }, { "epoch": 0.6183469095792824, "grad_norm": 0.6024560863049556, "learning_rate": 8.481427412814275e-06, "loss": 0.5402, "step": 21179 }, { "epoch": 0.6183761058071298, "grad_norm": 0.6886787917672115, "learning_rate": 8.480778588807786e-06, "loss": 0.654, "step": 21180 }, { "epoch": 0.6184053020349771, "grad_norm": 0.6455727915485784, "learning_rate": 8.480129764801298e-06, "loss": 0.6148, "step": 21181 }, { "epoch": 0.6184344982628245, "grad_norm": 0.6321319323754447, "learning_rate": 8.47948094079481e-06, "loss": 0.5506, "step": 21182 }, { "epoch": 0.6184636944906718, "grad_norm": 0.6288334114711244, "learning_rate": 8.478832116788322e-06, "loss": 0.5776, "step": 21183 }, { "epoch": 0.6184928907185192, "grad_norm": 0.6636287036307417, "learning_rate": 8.478183292781834e-06, "loss": 0.618, "step": 21184 }, { "epoch": 0.6185220869463666, "grad_norm": 0.6019203569096095, "learning_rate": 8.477534468775346e-06, "loss": 0.5555, "step": 21185 }, { "epoch": 0.6185512831742139, "grad_norm": 0.701058879830371, "learning_rate": 8.476885644768858e-06, "loss": 0.7037, "step": 21186 }, { "epoch": 0.6185804794020613, "grad_norm": 0.6533490439550368, "learning_rate": 8.47623682076237e-06, "loss": 0.5614, "step": 21187 }, { "epoch": 0.6186096756299087, "grad_norm": 0.6558599052332507, "learning_rate": 8.47558799675588e-06, "loss": 0.676, "step": 21188 }, { "epoch": 0.618638871857756, "grad_norm": 0.6266630972164109, "learning_rate": 8.474939172749392e-06, "loss": 0.5532, "step": 21189 }, { "epoch": 0.6186680680856034, "grad_norm": 0.637786155809856, "learning_rate": 8.474290348742904e-06, "loss": 0.5622, "step": 21190 }, { "epoch": 0.6186972643134507, "grad_norm": 0.6215230557659821, "learning_rate": 8.473641524736416e-06, "loss": 0.5751, "step": 21191 }, { "epoch": 0.6187264605412981, "grad_norm": 0.659631487860127, "learning_rate": 8.472992700729927e-06, "loss": 0.5832, "step": 21192 }, { "epoch": 0.6187556567691455, "grad_norm": 0.645033068172826, "learning_rate": 8.472343876723439e-06, "loss": 0.6028, "step": 21193 }, { "epoch": 0.6187848529969928, "grad_norm": 0.6513767970086981, "learning_rate": 8.471695052716952e-06, "loss": 0.5972, "step": 21194 }, { "epoch": 0.6188140492248402, "grad_norm": 0.6617632805403865, "learning_rate": 8.471046228710463e-06, "loss": 0.6541, "step": 21195 }, { "epoch": 0.6188432454526875, "grad_norm": 0.6620412918849197, "learning_rate": 8.470397404703975e-06, "loss": 0.6689, "step": 21196 }, { "epoch": 0.6188724416805349, "grad_norm": 0.6724907129570098, "learning_rate": 8.469748580697487e-06, "loss": 0.6367, "step": 21197 }, { "epoch": 0.6189016379083823, "grad_norm": 0.6007908892878964, "learning_rate": 8.469099756690999e-06, "loss": 0.5409, "step": 21198 }, { "epoch": 0.6189308341362296, "grad_norm": 0.622089802521537, "learning_rate": 8.468450932684509e-06, "loss": 0.5878, "step": 21199 }, { "epoch": 0.618960030364077, "grad_norm": 0.6716787711779794, "learning_rate": 8.467802108678021e-06, "loss": 0.6484, "step": 21200 }, { "epoch": 0.6189892265919243, "grad_norm": 0.6734842980422485, "learning_rate": 8.467153284671533e-06, "loss": 0.6657, "step": 21201 }, { "epoch": 0.6190184228197717, "grad_norm": 0.6374539796250122, "learning_rate": 8.466504460665045e-06, "loss": 0.5592, "step": 21202 }, { "epoch": 0.6190476190476191, "grad_norm": 0.6353121824899975, "learning_rate": 8.465855636658557e-06, "loss": 0.6045, "step": 21203 }, { "epoch": 0.6190768152754664, "grad_norm": 0.6835331775918295, "learning_rate": 8.46520681265207e-06, "loss": 0.7118, "step": 21204 }, { "epoch": 0.6191060115033138, "grad_norm": 0.6019630418466416, "learning_rate": 8.464557988645581e-06, "loss": 0.5102, "step": 21205 }, { "epoch": 0.6191352077311612, "grad_norm": 0.6050755643374417, "learning_rate": 8.463909164639093e-06, "loss": 0.5671, "step": 21206 }, { "epoch": 0.6191644039590085, "grad_norm": 0.6473324929472142, "learning_rate": 8.463260340632604e-06, "loss": 0.5922, "step": 21207 }, { "epoch": 0.6191936001868559, "grad_norm": 0.6573375614466581, "learning_rate": 8.462611516626116e-06, "loss": 0.6445, "step": 21208 }, { "epoch": 0.6192227964147032, "grad_norm": 0.6151449226623976, "learning_rate": 8.461962692619628e-06, "loss": 0.5703, "step": 21209 }, { "epoch": 0.6192519926425506, "grad_norm": 0.6105822147111871, "learning_rate": 8.46131386861314e-06, "loss": 0.5735, "step": 21210 }, { "epoch": 0.619281188870398, "grad_norm": 0.6292709493449029, "learning_rate": 8.46066504460665e-06, "loss": 0.5846, "step": 21211 }, { "epoch": 0.6193103850982453, "grad_norm": 0.6024996096664392, "learning_rate": 8.460016220600162e-06, "loss": 0.539, "step": 21212 }, { "epoch": 0.6193395813260927, "grad_norm": 0.6445713532975942, "learning_rate": 8.459367396593676e-06, "loss": 0.6108, "step": 21213 }, { "epoch": 0.61936877755394, "grad_norm": 0.6647604335928089, "learning_rate": 8.458718572587186e-06, "loss": 0.5892, "step": 21214 }, { "epoch": 0.6193979737817874, "grad_norm": 0.6015667826042145, "learning_rate": 8.458069748580698e-06, "loss": 0.5554, "step": 21215 }, { "epoch": 0.6194271700096348, "grad_norm": 0.6143375289353538, "learning_rate": 8.45742092457421e-06, "loss": 0.5805, "step": 21216 }, { "epoch": 0.6194563662374821, "grad_norm": 0.6505659387052288, "learning_rate": 8.456772100567722e-06, "loss": 0.6223, "step": 21217 }, { "epoch": 0.6194855624653295, "grad_norm": 0.6344733035177448, "learning_rate": 8.456123276561233e-06, "loss": 0.6323, "step": 21218 }, { "epoch": 0.6195147586931768, "grad_norm": 0.6638329882827694, "learning_rate": 8.455474452554745e-06, "loss": 0.6225, "step": 21219 }, { "epoch": 0.6195439549210242, "grad_norm": 0.6500713829678019, "learning_rate": 8.454825628548257e-06, "loss": 0.6098, "step": 21220 }, { "epoch": 0.6195731511488716, "grad_norm": 0.6294181771824408, "learning_rate": 8.454176804541769e-06, "loss": 0.5573, "step": 21221 }, { "epoch": 0.6196023473767189, "grad_norm": 0.6537130686183226, "learning_rate": 8.45352798053528e-06, "loss": 0.6238, "step": 21222 }, { "epoch": 0.6196315436045663, "grad_norm": 0.6765439718553852, "learning_rate": 8.452879156528793e-06, "loss": 0.67, "step": 21223 }, { "epoch": 0.6196607398324137, "grad_norm": 0.6233493776431579, "learning_rate": 8.452230332522305e-06, "loss": 0.5849, "step": 21224 }, { "epoch": 0.619689936060261, "grad_norm": 0.6235145488119872, "learning_rate": 8.451581508515817e-06, "loss": 0.5978, "step": 21225 }, { "epoch": 0.6197191322881084, "grad_norm": 0.6628640560506501, "learning_rate": 8.450932684509327e-06, "loss": 0.6018, "step": 21226 }, { "epoch": 0.6197483285159557, "grad_norm": 0.7471801145142237, "learning_rate": 8.450283860502839e-06, "loss": 0.6274, "step": 21227 }, { "epoch": 0.6197775247438031, "grad_norm": 0.6088710809171436, "learning_rate": 8.449635036496351e-06, "loss": 0.6008, "step": 21228 }, { "epoch": 0.6198067209716505, "grad_norm": 0.6549167481628669, "learning_rate": 8.448986212489863e-06, "loss": 0.6264, "step": 21229 }, { "epoch": 0.6198359171994978, "grad_norm": 0.6175804770289254, "learning_rate": 8.448337388483373e-06, "loss": 0.5816, "step": 21230 }, { "epoch": 0.6198651134273452, "grad_norm": 0.6249115667035331, "learning_rate": 8.447688564476886e-06, "loss": 0.609, "step": 21231 }, { "epoch": 0.6198943096551925, "grad_norm": 0.6518801147635963, "learning_rate": 8.4470397404704e-06, "loss": 0.6217, "step": 21232 }, { "epoch": 0.6199235058830399, "grad_norm": 0.6138947966776919, "learning_rate": 8.44639091646391e-06, "loss": 0.5423, "step": 21233 }, { "epoch": 0.6199527021108873, "grad_norm": 0.6262592876932892, "learning_rate": 8.445742092457422e-06, "loss": 0.5714, "step": 21234 }, { "epoch": 0.6199818983387346, "grad_norm": 0.6391209434289481, "learning_rate": 8.445093268450934e-06, "loss": 0.603, "step": 21235 }, { "epoch": 0.620011094566582, "grad_norm": 0.6331017248140899, "learning_rate": 8.444444444444446e-06, "loss": 0.5828, "step": 21236 }, { "epoch": 0.6200402907944293, "grad_norm": 0.5965876805441921, "learning_rate": 8.443795620437956e-06, "loss": 0.5568, "step": 21237 }, { "epoch": 0.6200694870222767, "grad_norm": 0.6339804791686684, "learning_rate": 8.443146796431468e-06, "loss": 0.6335, "step": 21238 }, { "epoch": 0.6200986832501241, "grad_norm": 0.6072187995025292, "learning_rate": 8.44249797242498e-06, "loss": 0.5973, "step": 21239 }, { "epoch": 0.6201278794779714, "grad_norm": 0.6331741965585824, "learning_rate": 8.441849148418492e-06, "loss": 0.6147, "step": 21240 }, { "epoch": 0.6201570757058188, "grad_norm": 0.628450826219361, "learning_rate": 8.441200324412004e-06, "loss": 0.5878, "step": 21241 }, { "epoch": 0.6201862719336662, "grad_norm": 0.6090996527142294, "learning_rate": 8.440551500405516e-06, "loss": 0.5987, "step": 21242 }, { "epoch": 0.6202154681615135, "grad_norm": 0.6416246388342773, "learning_rate": 8.439902676399028e-06, "loss": 0.6042, "step": 21243 }, { "epoch": 0.6202446643893609, "grad_norm": 0.6211608076299989, "learning_rate": 8.43925385239254e-06, "loss": 0.5609, "step": 21244 }, { "epoch": 0.6202738606172082, "grad_norm": 0.6993194528561725, "learning_rate": 8.43860502838605e-06, "loss": 0.7221, "step": 21245 }, { "epoch": 0.6203030568450556, "grad_norm": 0.6188086856389202, "learning_rate": 8.437956204379563e-06, "loss": 0.5376, "step": 21246 }, { "epoch": 0.620332253072903, "grad_norm": 0.6169057274259901, "learning_rate": 8.437307380373075e-06, "loss": 0.5854, "step": 21247 }, { "epoch": 0.6203614493007503, "grad_norm": 0.7249417616434577, "learning_rate": 8.436658556366587e-06, "loss": 0.7058, "step": 21248 }, { "epoch": 0.6203906455285977, "grad_norm": 0.6374085612430609, "learning_rate": 8.436009732360097e-06, "loss": 0.5723, "step": 21249 }, { "epoch": 0.620419841756445, "grad_norm": 0.6474970074440133, "learning_rate": 8.43536090835361e-06, "loss": 0.6074, "step": 21250 }, { "epoch": 0.6204490379842924, "grad_norm": 0.618724531817918, "learning_rate": 8.434712084347123e-06, "loss": 0.5828, "step": 21251 }, { "epoch": 0.6204782342121398, "grad_norm": 0.5955618287744096, "learning_rate": 8.434063260340633e-06, "loss": 0.5366, "step": 21252 }, { "epoch": 0.6205074304399871, "grad_norm": 0.6225747776622212, "learning_rate": 8.433414436334145e-06, "loss": 0.5774, "step": 21253 }, { "epoch": 0.6205366266678345, "grad_norm": 0.652612740201055, "learning_rate": 8.432765612327657e-06, "loss": 0.5696, "step": 21254 }, { "epoch": 0.6205658228956819, "grad_norm": 0.6570482302324057, "learning_rate": 8.432116788321169e-06, "loss": 0.6374, "step": 21255 }, { "epoch": 0.6205950191235292, "grad_norm": 0.6231235325095553, "learning_rate": 8.43146796431468e-06, "loss": 0.5922, "step": 21256 }, { "epoch": 0.6206242153513766, "grad_norm": 0.6848506678312366, "learning_rate": 8.430819140308191e-06, "loss": 0.6247, "step": 21257 }, { "epoch": 0.6206534115792239, "grad_norm": 0.6212186727090426, "learning_rate": 8.430170316301703e-06, "loss": 0.5919, "step": 21258 }, { "epoch": 0.6206826078070713, "grad_norm": 0.6492777294436958, "learning_rate": 8.429521492295216e-06, "loss": 0.6131, "step": 21259 }, { "epoch": 0.6207118040349187, "grad_norm": 0.6764107821662799, "learning_rate": 8.428872668288728e-06, "loss": 0.6468, "step": 21260 }, { "epoch": 0.620741000262766, "grad_norm": 0.6168282254793913, "learning_rate": 8.42822384428224e-06, "loss": 0.5528, "step": 21261 }, { "epoch": 0.6207701964906134, "grad_norm": 0.6493590439342992, "learning_rate": 8.427575020275752e-06, "loss": 0.5809, "step": 21262 }, { "epoch": 0.6207993927184607, "grad_norm": 0.6484980605092948, "learning_rate": 8.426926196269264e-06, "loss": 0.6126, "step": 21263 }, { "epoch": 0.6208285889463081, "grad_norm": 0.703405747945503, "learning_rate": 8.426277372262774e-06, "loss": 0.7411, "step": 21264 }, { "epoch": 0.6208577851741555, "grad_norm": 0.6376742311206529, "learning_rate": 8.425628548256286e-06, "loss": 0.5923, "step": 21265 }, { "epoch": 0.6208869814020028, "grad_norm": 0.6787039485487778, "learning_rate": 8.424979724249798e-06, "loss": 0.642, "step": 21266 }, { "epoch": 0.6209161776298502, "grad_norm": 0.6906703440809717, "learning_rate": 8.42433090024331e-06, "loss": 0.7296, "step": 21267 }, { "epoch": 0.6209453738576975, "grad_norm": 0.6368360904651681, "learning_rate": 8.42368207623682e-06, "loss": 0.6145, "step": 21268 }, { "epoch": 0.6209745700855449, "grad_norm": 0.6210566799517122, "learning_rate": 8.423033252230334e-06, "loss": 0.5831, "step": 21269 }, { "epoch": 0.6210037663133923, "grad_norm": 0.6733456585829648, "learning_rate": 8.422384428223846e-06, "loss": 0.6624, "step": 21270 }, { "epoch": 0.6210329625412396, "grad_norm": 0.631714167227313, "learning_rate": 8.421735604217356e-06, "loss": 0.5899, "step": 21271 }, { "epoch": 0.621062158769087, "grad_norm": 0.6721143719565336, "learning_rate": 8.421086780210868e-06, "loss": 0.6259, "step": 21272 }, { "epoch": 0.6210913549969344, "grad_norm": 0.6224383028032394, "learning_rate": 8.42043795620438e-06, "loss": 0.5533, "step": 21273 }, { "epoch": 0.6211205512247817, "grad_norm": 0.6048305760042736, "learning_rate": 8.419789132197893e-06, "loss": 0.5573, "step": 21274 }, { "epoch": 0.6211497474526291, "grad_norm": 0.6524585578286298, "learning_rate": 8.419140308191403e-06, "loss": 0.6185, "step": 21275 }, { "epoch": 0.6211789436804764, "grad_norm": 0.6285298966102744, "learning_rate": 8.418491484184915e-06, "loss": 0.5505, "step": 21276 }, { "epoch": 0.6212081399083238, "grad_norm": 0.6271348405386343, "learning_rate": 8.417842660178427e-06, "loss": 0.6042, "step": 21277 }, { "epoch": 0.6212373361361712, "grad_norm": 0.6744454544020874, "learning_rate": 8.417193836171939e-06, "loss": 0.6518, "step": 21278 }, { "epoch": 0.6212665323640185, "grad_norm": 0.7452470113459705, "learning_rate": 8.416545012165451e-06, "loss": 0.6523, "step": 21279 }, { "epoch": 0.6212957285918659, "grad_norm": 0.7012739464698765, "learning_rate": 8.415896188158963e-06, "loss": 0.7511, "step": 21280 }, { "epoch": 0.6213249248197132, "grad_norm": 0.6573310733893969, "learning_rate": 8.415247364152475e-06, "loss": 0.6848, "step": 21281 }, { "epoch": 0.6213541210475606, "grad_norm": 0.6228565197909176, "learning_rate": 8.414598540145985e-06, "loss": 0.5773, "step": 21282 }, { "epoch": 0.621383317275408, "grad_norm": 0.613483660531963, "learning_rate": 8.413949716139497e-06, "loss": 0.5476, "step": 21283 }, { "epoch": 0.6214125135032553, "grad_norm": 0.608395418910807, "learning_rate": 8.41330089213301e-06, "loss": 0.5617, "step": 21284 }, { "epoch": 0.6214417097311027, "grad_norm": 0.6120317411171665, "learning_rate": 8.412652068126521e-06, "loss": 0.5567, "step": 21285 }, { "epoch": 0.62147090595895, "grad_norm": 0.6025840819460209, "learning_rate": 8.412003244120033e-06, "loss": 0.5234, "step": 21286 }, { "epoch": 0.6215001021867975, "grad_norm": 0.6597906383768543, "learning_rate": 8.411354420113544e-06, "loss": 0.5984, "step": 21287 }, { "epoch": 0.6215292984146449, "grad_norm": 0.6835070600802606, "learning_rate": 8.410705596107058e-06, "loss": 0.5688, "step": 21288 }, { "epoch": 0.6215584946424922, "grad_norm": 0.6437979940591989, "learning_rate": 8.41005677210057e-06, "loss": 0.6398, "step": 21289 }, { "epoch": 0.6215876908703396, "grad_norm": 0.6025591968561238, "learning_rate": 8.40940794809408e-06, "loss": 0.5034, "step": 21290 }, { "epoch": 0.621616887098187, "grad_norm": 0.6347090261498874, "learning_rate": 8.408759124087592e-06, "loss": 0.5857, "step": 21291 }, { "epoch": 0.6216460833260343, "grad_norm": 0.6568122402688253, "learning_rate": 8.408110300081104e-06, "loss": 0.6157, "step": 21292 }, { "epoch": 0.6216752795538817, "grad_norm": 0.6209700482138635, "learning_rate": 8.407461476074616e-06, "loss": 0.5806, "step": 21293 }, { "epoch": 0.621704475781729, "grad_norm": 0.6738150177374564, "learning_rate": 8.406812652068126e-06, "loss": 0.6165, "step": 21294 }, { "epoch": 0.6217336720095764, "grad_norm": 0.642470639867567, "learning_rate": 8.406163828061638e-06, "loss": 0.6137, "step": 21295 }, { "epoch": 0.6217628682374238, "grad_norm": 0.6034894514036863, "learning_rate": 8.40551500405515e-06, "loss": 0.5262, "step": 21296 }, { "epoch": 0.6217920644652711, "grad_norm": 0.6674606040771011, "learning_rate": 8.404866180048662e-06, "loss": 0.6347, "step": 21297 }, { "epoch": 0.6218212606931185, "grad_norm": 0.6157120640625487, "learning_rate": 8.404217356042174e-06, "loss": 0.5546, "step": 21298 }, { "epoch": 0.6218504569209659, "grad_norm": 0.7343872217328652, "learning_rate": 8.403568532035686e-06, "loss": 0.7733, "step": 21299 }, { "epoch": 0.6218796531488132, "grad_norm": 0.6012145889463351, "learning_rate": 8.402919708029198e-06, "loss": 0.5383, "step": 21300 }, { "epoch": 0.6219088493766606, "grad_norm": 0.5971483667936053, "learning_rate": 8.402270884022709e-06, "loss": 0.5302, "step": 21301 }, { "epoch": 0.6219380456045079, "grad_norm": 0.647471843817645, "learning_rate": 8.40162206001622e-06, "loss": 0.6339, "step": 21302 }, { "epoch": 0.6219672418323553, "grad_norm": 0.5922194188371368, "learning_rate": 8.400973236009733e-06, "loss": 0.5519, "step": 21303 }, { "epoch": 0.6219964380602027, "grad_norm": 0.6568685349083075, "learning_rate": 8.400324412003245e-06, "loss": 0.5801, "step": 21304 }, { "epoch": 0.62202563428805, "grad_norm": 0.6468497786759125, "learning_rate": 8.399675587996757e-06, "loss": 0.5768, "step": 21305 }, { "epoch": 0.6220548305158974, "grad_norm": 0.6135178705655345, "learning_rate": 8.399026763990267e-06, "loss": 0.5829, "step": 21306 }, { "epoch": 0.6220840267437447, "grad_norm": 0.6136874622134141, "learning_rate": 8.398377939983781e-06, "loss": 0.5611, "step": 21307 }, { "epoch": 0.6221132229715921, "grad_norm": 0.6346998038906826, "learning_rate": 8.397729115977293e-06, "loss": 0.602, "step": 21308 }, { "epoch": 0.6221424191994395, "grad_norm": 0.6661952507897658, "learning_rate": 8.397080291970803e-06, "loss": 0.567, "step": 21309 }, { "epoch": 0.6221716154272868, "grad_norm": 0.7596184028122955, "learning_rate": 8.396431467964315e-06, "loss": 0.7574, "step": 21310 }, { "epoch": 0.6222008116551342, "grad_norm": 0.6725282329550184, "learning_rate": 8.395782643957827e-06, "loss": 0.6515, "step": 21311 }, { "epoch": 0.6222300078829816, "grad_norm": 0.6570445934165042, "learning_rate": 8.39513381995134e-06, "loss": 0.6502, "step": 21312 }, { "epoch": 0.6222592041108289, "grad_norm": 0.6272161840795685, "learning_rate": 8.39448499594485e-06, "loss": 0.5993, "step": 21313 }, { "epoch": 0.6222884003386763, "grad_norm": 0.6434473334972921, "learning_rate": 8.393836171938362e-06, "loss": 0.5719, "step": 21314 }, { "epoch": 0.6223175965665236, "grad_norm": 0.691405012028488, "learning_rate": 8.393187347931874e-06, "loss": 0.6086, "step": 21315 }, { "epoch": 0.622346792794371, "grad_norm": 0.7024406827926625, "learning_rate": 8.392538523925386e-06, "loss": 0.6676, "step": 21316 }, { "epoch": 0.6223759890222184, "grad_norm": 0.6142399840676754, "learning_rate": 8.391889699918898e-06, "loss": 0.5556, "step": 21317 }, { "epoch": 0.6224051852500657, "grad_norm": 0.6434184579771257, "learning_rate": 8.39124087591241e-06, "loss": 0.6314, "step": 21318 }, { "epoch": 0.6224343814779131, "grad_norm": 0.6268608201939436, "learning_rate": 8.390592051905922e-06, "loss": 0.5988, "step": 21319 }, { "epoch": 0.6224635777057604, "grad_norm": 0.6351624274141733, "learning_rate": 8.389943227899432e-06, "loss": 0.6156, "step": 21320 }, { "epoch": 0.6224927739336078, "grad_norm": 0.598150793393895, "learning_rate": 8.389294403892944e-06, "loss": 0.5137, "step": 21321 }, { "epoch": 0.6225219701614552, "grad_norm": 0.6540473577242608, "learning_rate": 8.388645579886456e-06, "loss": 0.5567, "step": 21322 }, { "epoch": 0.6225511663893025, "grad_norm": 0.6667013669461432, "learning_rate": 8.387996755879968e-06, "loss": 0.6111, "step": 21323 }, { "epoch": 0.6225803626171499, "grad_norm": 0.6102084149327657, "learning_rate": 8.38734793187348e-06, "loss": 0.5533, "step": 21324 }, { "epoch": 0.6226095588449972, "grad_norm": 0.684630752050859, "learning_rate": 8.38669910786699e-06, "loss": 0.6919, "step": 21325 }, { "epoch": 0.6226387550728446, "grad_norm": 0.6202129856936746, "learning_rate": 8.386050283860504e-06, "loss": 0.5742, "step": 21326 }, { "epoch": 0.622667951300692, "grad_norm": 0.6391420270097253, "learning_rate": 8.385401459854016e-06, "loss": 0.6064, "step": 21327 }, { "epoch": 0.6226971475285393, "grad_norm": 0.6687256257100634, "learning_rate": 8.384752635847527e-06, "loss": 0.6433, "step": 21328 }, { "epoch": 0.6227263437563867, "grad_norm": 0.6120494124186864, "learning_rate": 8.384103811841039e-06, "loss": 0.5679, "step": 21329 }, { "epoch": 0.622755539984234, "grad_norm": 0.648267715540417, "learning_rate": 8.38345498783455e-06, "loss": 0.663, "step": 21330 }, { "epoch": 0.6227847362120814, "grad_norm": 0.6231808330283639, "learning_rate": 8.382806163828063e-06, "loss": 0.5867, "step": 21331 }, { "epoch": 0.6228139324399288, "grad_norm": 0.6347726130673271, "learning_rate": 8.382157339821573e-06, "loss": 0.6235, "step": 21332 }, { "epoch": 0.6228431286677761, "grad_norm": 0.6704813749601783, "learning_rate": 8.381508515815085e-06, "loss": 0.6472, "step": 21333 }, { "epoch": 0.6228723248956235, "grad_norm": 0.6273552294408276, "learning_rate": 8.380859691808597e-06, "loss": 0.6097, "step": 21334 }, { "epoch": 0.6229015211234709, "grad_norm": 0.6937169202732317, "learning_rate": 8.38021086780211e-06, "loss": 0.6772, "step": 21335 }, { "epoch": 0.6229307173513182, "grad_norm": 0.6589197805905803, "learning_rate": 8.379562043795621e-06, "loss": 0.6055, "step": 21336 }, { "epoch": 0.6229599135791656, "grad_norm": 0.614165980846175, "learning_rate": 8.378913219789133e-06, "loss": 0.5627, "step": 21337 }, { "epoch": 0.6229891098070129, "grad_norm": 0.6401827381375433, "learning_rate": 8.378264395782645e-06, "loss": 0.5927, "step": 21338 }, { "epoch": 0.6230183060348603, "grad_norm": 0.6523056834012931, "learning_rate": 8.377615571776156e-06, "loss": 0.6095, "step": 21339 }, { "epoch": 0.6230475022627077, "grad_norm": 0.6298427739928266, "learning_rate": 8.376966747769668e-06, "loss": 0.5643, "step": 21340 }, { "epoch": 0.623076698490555, "grad_norm": 0.6456157752803517, "learning_rate": 8.37631792376318e-06, "loss": 0.6448, "step": 21341 }, { "epoch": 0.6231058947184024, "grad_norm": 0.6188128300141466, "learning_rate": 8.375669099756692e-06, "loss": 0.5893, "step": 21342 }, { "epoch": 0.6231350909462497, "grad_norm": 0.7109583784906394, "learning_rate": 8.375020275750204e-06, "loss": 0.7108, "step": 21343 }, { "epoch": 0.6231642871740971, "grad_norm": 0.646226029096533, "learning_rate": 8.374371451743714e-06, "loss": 0.5972, "step": 21344 }, { "epoch": 0.6231934834019445, "grad_norm": 0.6510603116946894, "learning_rate": 8.373722627737228e-06, "loss": 0.5772, "step": 21345 }, { "epoch": 0.6232226796297918, "grad_norm": 0.6283395876219343, "learning_rate": 8.37307380373074e-06, "loss": 0.5563, "step": 21346 }, { "epoch": 0.6232518758576392, "grad_norm": 0.6576326765151043, "learning_rate": 8.37242497972425e-06, "loss": 0.6087, "step": 21347 }, { "epoch": 0.6232810720854866, "grad_norm": 0.6066987862541473, "learning_rate": 8.371776155717762e-06, "loss": 0.53, "step": 21348 }, { "epoch": 0.6233102683133339, "grad_norm": 0.6373541271987659, "learning_rate": 8.371127331711274e-06, "loss": 0.6031, "step": 21349 }, { "epoch": 0.6233394645411813, "grad_norm": 0.6480885335062644, "learning_rate": 8.370478507704786e-06, "loss": 0.6283, "step": 21350 }, { "epoch": 0.6233686607690286, "grad_norm": 0.6098462203373055, "learning_rate": 8.369829683698297e-06, "loss": 0.5754, "step": 21351 }, { "epoch": 0.623397856996876, "grad_norm": 0.6413553500614654, "learning_rate": 8.369180859691809e-06, "loss": 0.6001, "step": 21352 }, { "epoch": 0.6234270532247234, "grad_norm": 0.6624948616971742, "learning_rate": 8.36853203568532e-06, "loss": 0.5809, "step": 21353 }, { "epoch": 0.6234562494525707, "grad_norm": 0.6474602428412157, "learning_rate": 8.367883211678833e-06, "loss": 0.6077, "step": 21354 }, { "epoch": 0.6234854456804181, "grad_norm": 0.677453181438135, "learning_rate": 8.367234387672345e-06, "loss": 0.528, "step": 21355 }, { "epoch": 0.6235146419082654, "grad_norm": 0.6129142836414576, "learning_rate": 8.366585563665857e-06, "loss": 0.5435, "step": 21356 }, { "epoch": 0.6235438381361128, "grad_norm": 0.591033695285728, "learning_rate": 8.365936739659369e-06, "loss": 0.5611, "step": 21357 }, { "epoch": 0.6235730343639602, "grad_norm": 0.7080073760200825, "learning_rate": 8.365287915652879e-06, "loss": 0.6868, "step": 21358 }, { "epoch": 0.6236022305918075, "grad_norm": 0.6893092968542505, "learning_rate": 8.364639091646391e-06, "loss": 0.6407, "step": 21359 }, { "epoch": 0.6236314268196549, "grad_norm": 0.6245070892827537, "learning_rate": 8.363990267639903e-06, "loss": 0.5937, "step": 21360 }, { "epoch": 0.6236606230475022, "grad_norm": 0.6049918090612506, "learning_rate": 8.363341443633415e-06, "loss": 0.5437, "step": 21361 }, { "epoch": 0.6236898192753496, "grad_norm": 0.7304605504930647, "learning_rate": 8.362692619626927e-06, "loss": 0.61, "step": 21362 }, { "epoch": 0.623719015503197, "grad_norm": 0.6424037195356859, "learning_rate": 8.362043795620438e-06, "loss": 0.6305, "step": 21363 }, { "epoch": 0.6237482117310443, "grad_norm": 0.6362260417043201, "learning_rate": 8.361394971613951e-06, "loss": 0.5922, "step": 21364 }, { "epoch": 0.6237774079588917, "grad_norm": 0.6588891409254596, "learning_rate": 8.360746147607463e-06, "loss": 0.633, "step": 21365 }, { "epoch": 0.623806604186739, "grad_norm": 0.680321260806446, "learning_rate": 8.360097323600974e-06, "loss": 0.5852, "step": 21366 }, { "epoch": 0.6238358004145864, "grad_norm": 0.6598171406765648, "learning_rate": 8.359448499594486e-06, "loss": 0.6428, "step": 21367 }, { "epoch": 0.6238649966424338, "grad_norm": 0.6816261765282937, "learning_rate": 8.358799675587998e-06, "loss": 0.6674, "step": 21368 }, { "epoch": 0.6238941928702811, "grad_norm": 0.6739044159925303, "learning_rate": 8.35815085158151e-06, "loss": 0.6486, "step": 21369 }, { "epoch": 0.6239233890981285, "grad_norm": 0.6810969522590089, "learning_rate": 8.35750202757502e-06, "loss": 0.6346, "step": 21370 }, { "epoch": 0.6239525853259759, "grad_norm": 0.620348664757893, "learning_rate": 8.356853203568532e-06, "loss": 0.5575, "step": 21371 }, { "epoch": 0.6239817815538232, "grad_norm": 0.6080127749763908, "learning_rate": 8.356204379562044e-06, "loss": 0.5248, "step": 21372 }, { "epoch": 0.6240109777816706, "grad_norm": 0.5796349493523734, "learning_rate": 8.355555555555556e-06, "loss": 0.5172, "step": 21373 }, { "epoch": 0.624040174009518, "grad_norm": 0.6235501099123251, "learning_rate": 8.354906731549068e-06, "loss": 0.6051, "step": 21374 }, { "epoch": 0.6240693702373653, "grad_norm": 0.6624798437736426, "learning_rate": 8.35425790754258e-06, "loss": 0.647, "step": 21375 }, { "epoch": 0.6240985664652127, "grad_norm": 0.7227257282908307, "learning_rate": 8.353609083536092e-06, "loss": 0.6921, "step": 21376 }, { "epoch": 0.62412776269306, "grad_norm": 0.6231736833192798, "learning_rate": 8.352960259529603e-06, "loss": 0.5832, "step": 21377 }, { "epoch": 0.6241569589209074, "grad_norm": 0.6611612043905417, "learning_rate": 8.352311435523115e-06, "loss": 0.6563, "step": 21378 }, { "epoch": 0.6241861551487548, "grad_norm": 0.6527310650704736, "learning_rate": 8.351662611516627e-06, "loss": 0.6238, "step": 21379 }, { "epoch": 0.6242153513766021, "grad_norm": 0.6290005649589174, "learning_rate": 8.351013787510139e-06, "loss": 0.5752, "step": 21380 }, { "epoch": 0.6242445476044495, "grad_norm": 0.5827646434151976, "learning_rate": 8.35036496350365e-06, "loss": 0.5034, "step": 21381 }, { "epoch": 0.6242737438322968, "grad_norm": 0.6403740696068981, "learning_rate": 8.349716139497161e-06, "loss": 0.5979, "step": 21382 }, { "epoch": 0.6243029400601442, "grad_norm": 0.6400558555341509, "learning_rate": 8.349067315490675e-06, "loss": 0.6107, "step": 21383 }, { "epoch": 0.6243321362879916, "grad_norm": 0.6807669075180698, "learning_rate": 8.348418491484187e-06, "loss": 0.6598, "step": 21384 }, { "epoch": 0.6243613325158389, "grad_norm": 0.6924791908623437, "learning_rate": 8.347769667477697e-06, "loss": 0.6463, "step": 21385 }, { "epoch": 0.6243905287436863, "grad_norm": 0.6891959199626505, "learning_rate": 8.347120843471209e-06, "loss": 0.6742, "step": 21386 }, { "epoch": 0.6244197249715336, "grad_norm": 0.6479036935701198, "learning_rate": 8.346472019464721e-06, "loss": 0.6232, "step": 21387 }, { "epoch": 0.624448921199381, "grad_norm": 0.6615329371788682, "learning_rate": 8.345823195458233e-06, "loss": 0.6014, "step": 21388 }, { "epoch": 0.6244781174272284, "grad_norm": 0.7085953696866915, "learning_rate": 8.345174371451743e-06, "loss": 0.7091, "step": 21389 }, { "epoch": 0.6245073136550757, "grad_norm": 0.6380364499842764, "learning_rate": 8.344525547445255e-06, "loss": 0.6045, "step": 21390 }, { "epoch": 0.6245365098829231, "grad_norm": 0.6601036431873782, "learning_rate": 8.343876723438768e-06, "loss": 0.67, "step": 21391 }, { "epoch": 0.6245657061107704, "grad_norm": 0.710958450970904, "learning_rate": 8.34322789943228e-06, "loss": 0.726, "step": 21392 }, { "epoch": 0.6245949023386178, "grad_norm": 0.6690366802281182, "learning_rate": 8.342579075425792e-06, "loss": 0.6435, "step": 21393 }, { "epoch": 0.6246240985664652, "grad_norm": 0.6157845200502512, "learning_rate": 8.341930251419304e-06, "loss": 0.6206, "step": 21394 }, { "epoch": 0.6246532947943125, "grad_norm": 0.6321566590828045, "learning_rate": 8.341281427412816e-06, "loss": 0.6138, "step": 21395 }, { "epoch": 0.6246824910221599, "grad_norm": 0.6301319404545643, "learning_rate": 8.340632603406326e-06, "loss": 0.5635, "step": 21396 }, { "epoch": 0.6247116872500073, "grad_norm": 0.6071750651474564, "learning_rate": 8.339983779399838e-06, "loss": 0.5794, "step": 21397 }, { "epoch": 0.6247408834778546, "grad_norm": 0.6594255018390477, "learning_rate": 8.33933495539335e-06, "loss": 0.6286, "step": 21398 }, { "epoch": 0.624770079705702, "grad_norm": 0.6659149225908458, "learning_rate": 8.338686131386862e-06, "loss": 0.5673, "step": 21399 }, { "epoch": 0.6247992759335493, "grad_norm": 0.6325033037295927, "learning_rate": 8.338037307380374e-06, "loss": 0.577, "step": 21400 }, { "epoch": 0.6248284721613967, "grad_norm": 0.5753692805480131, "learning_rate": 8.337388483373886e-06, "loss": 0.4928, "step": 21401 }, { "epoch": 0.6248576683892441, "grad_norm": 0.6422438102310305, "learning_rate": 8.336739659367398e-06, "loss": 0.5946, "step": 21402 }, { "epoch": 0.6248868646170914, "grad_norm": 0.667668709454958, "learning_rate": 8.33609083536091e-06, "loss": 0.668, "step": 21403 }, { "epoch": 0.6249160608449388, "grad_norm": 0.6273929952502807, "learning_rate": 8.33544201135442e-06, "loss": 0.5866, "step": 21404 }, { "epoch": 0.6249452570727861, "grad_norm": 0.6009071690602251, "learning_rate": 8.334793187347933e-06, "loss": 0.5737, "step": 21405 }, { "epoch": 0.6249744533006335, "grad_norm": 0.6858716106214405, "learning_rate": 8.334144363341445e-06, "loss": 0.6222, "step": 21406 }, { "epoch": 0.625003649528481, "grad_norm": 0.6249188900455489, "learning_rate": 8.333495539334957e-06, "loss": 0.5907, "step": 21407 }, { "epoch": 0.6250328457563283, "grad_norm": 0.6272155348905449, "learning_rate": 8.332846715328467e-06, "loss": 0.5605, "step": 21408 }, { "epoch": 0.6250620419841757, "grad_norm": 0.578372878275031, "learning_rate": 8.332197891321979e-06, "loss": 0.5149, "step": 21409 }, { "epoch": 0.6250912382120231, "grad_norm": 0.5977314729369789, "learning_rate": 8.331549067315491e-06, "loss": 0.5476, "step": 21410 }, { "epoch": 0.6251204344398704, "grad_norm": 0.6868087040817599, "learning_rate": 8.330900243309003e-06, "loss": 0.6557, "step": 21411 }, { "epoch": 0.6251496306677178, "grad_norm": 0.6432312852105403, "learning_rate": 8.330251419302515e-06, "loss": 0.5661, "step": 21412 }, { "epoch": 0.6251788268955651, "grad_norm": 0.6175677818811388, "learning_rate": 8.329602595296027e-06, "loss": 0.561, "step": 21413 }, { "epoch": 0.6252080231234125, "grad_norm": 0.5922217261098536, "learning_rate": 8.328953771289539e-06, "loss": 0.5032, "step": 21414 }, { "epoch": 0.6252372193512599, "grad_norm": 0.6425061432081868, "learning_rate": 8.32830494728305e-06, "loss": 0.571, "step": 21415 }, { "epoch": 0.6252664155791072, "grad_norm": 0.6649063308143572, "learning_rate": 8.327656123276561e-06, "loss": 0.6501, "step": 21416 }, { "epoch": 0.6252956118069546, "grad_norm": 0.642371489255574, "learning_rate": 8.327007299270073e-06, "loss": 0.6183, "step": 21417 }, { "epoch": 0.625324808034802, "grad_norm": 0.644685388477359, "learning_rate": 8.326358475263585e-06, "loss": 0.63, "step": 21418 }, { "epoch": 0.6253540042626493, "grad_norm": 0.6929814874869127, "learning_rate": 8.325709651257098e-06, "loss": 0.6815, "step": 21419 }, { "epoch": 0.6253832004904967, "grad_norm": 0.6323996483278206, "learning_rate": 8.32506082725061e-06, "loss": 0.5764, "step": 21420 }, { "epoch": 0.625412396718344, "grad_norm": 0.66565658382755, "learning_rate": 8.324412003244122e-06, "loss": 0.6432, "step": 21421 }, { "epoch": 0.6254415929461914, "grad_norm": 0.6398634937600235, "learning_rate": 8.323763179237634e-06, "loss": 0.5896, "step": 21422 }, { "epoch": 0.6254707891740388, "grad_norm": 0.6163904606175101, "learning_rate": 8.323114355231144e-06, "loss": 0.5918, "step": 21423 }, { "epoch": 0.6254999854018861, "grad_norm": 0.6186562695744456, "learning_rate": 8.322465531224656e-06, "loss": 0.5665, "step": 21424 }, { "epoch": 0.6255291816297335, "grad_norm": 0.6422117795631084, "learning_rate": 8.321816707218168e-06, "loss": 0.593, "step": 21425 }, { "epoch": 0.6255583778575808, "grad_norm": 0.6476884213419815, "learning_rate": 8.32116788321168e-06, "loss": 0.5929, "step": 21426 }, { "epoch": 0.6255875740854282, "grad_norm": 0.6585016839253203, "learning_rate": 8.32051905920519e-06, "loss": 0.5832, "step": 21427 }, { "epoch": 0.6256167703132756, "grad_norm": 0.6354378867520025, "learning_rate": 8.319870235198702e-06, "loss": 0.5931, "step": 21428 }, { "epoch": 0.6256459665411229, "grad_norm": 0.6051074439813127, "learning_rate": 8.319221411192214e-06, "loss": 0.5073, "step": 21429 }, { "epoch": 0.6256751627689703, "grad_norm": 0.6165646315265826, "learning_rate": 8.318572587185726e-06, "loss": 0.5944, "step": 21430 }, { "epoch": 0.6257043589968176, "grad_norm": 0.7274762176078161, "learning_rate": 8.317923763179238e-06, "loss": 0.6633, "step": 21431 }, { "epoch": 0.625733555224665, "grad_norm": 0.5975626503070742, "learning_rate": 8.31727493917275e-06, "loss": 0.5269, "step": 21432 }, { "epoch": 0.6257627514525124, "grad_norm": 0.6371490471605221, "learning_rate": 8.316626115166263e-06, "loss": 0.5934, "step": 21433 }, { "epoch": 0.6257919476803597, "grad_norm": 0.6051419576971003, "learning_rate": 8.315977291159773e-06, "loss": 0.5538, "step": 21434 }, { "epoch": 0.6258211439082071, "grad_norm": 0.6130749077851946, "learning_rate": 8.315328467153285e-06, "loss": 0.6162, "step": 21435 }, { "epoch": 0.6258503401360545, "grad_norm": 0.668194470588708, "learning_rate": 8.314679643146797e-06, "loss": 0.6666, "step": 21436 }, { "epoch": 0.6258795363639018, "grad_norm": 0.6516369190206855, "learning_rate": 8.314030819140309e-06, "loss": 0.6172, "step": 21437 }, { "epoch": 0.6259087325917492, "grad_norm": 0.6473530964795366, "learning_rate": 8.313381995133821e-06, "loss": 0.6032, "step": 21438 }, { "epoch": 0.6259379288195965, "grad_norm": 0.6085419171560957, "learning_rate": 8.312733171127333e-06, "loss": 0.5581, "step": 21439 }, { "epoch": 0.6259671250474439, "grad_norm": 0.7010491082510558, "learning_rate": 8.312084347120845e-06, "loss": 0.6707, "step": 21440 }, { "epoch": 0.6259963212752913, "grad_norm": 0.6529999131665299, "learning_rate": 8.311435523114357e-06, "loss": 0.5944, "step": 21441 }, { "epoch": 0.6260255175031386, "grad_norm": 0.6609421422781318, "learning_rate": 8.310786699107867e-06, "loss": 0.6653, "step": 21442 }, { "epoch": 0.626054713730986, "grad_norm": 0.6668869714441482, "learning_rate": 8.31013787510138e-06, "loss": 0.6137, "step": 21443 }, { "epoch": 0.6260839099588333, "grad_norm": 0.67161559671806, "learning_rate": 8.309489051094891e-06, "loss": 0.6772, "step": 21444 }, { "epoch": 0.6261131061866807, "grad_norm": 0.6279142221181875, "learning_rate": 8.308840227088403e-06, "loss": 0.5961, "step": 21445 }, { "epoch": 0.6261423024145281, "grad_norm": 0.6331397989056844, "learning_rate": 8.308191403081914e-06, "loss": 0.6259, "step": 21446 }, { "epoch": 0.6261714986423754, "grad_norm": 0.6800691608644541, "learning_rate": 8.307542579075426e-06, "loss": 0.6151, "step": 21447 }, { "epoch": 0.6262006948702228, "grad_norm": 0.6523279823495884, "learning_rate": 8.306893755068938e-06, "loss": 0.637, "step": 21448 }, { "epoch": 0.6262298910980701, "grad_norm": 0.6584185995178913, "learning_rate": 8.30624493106245e-06, "loss": 0.6773, "step": 21449 }, { "epoch": 0.6262590873259175, "grad_norm": 0.6125668094425601, "learning_rate": 8.305596107055962e-06, "loss": 0.555, "step": 21450 }, { "epoch": 0.6262882835537649, "grad_norm": 0.6569442142965447, "learning_rate": 8.304947283049474e-06, "loss": 0.6498, "step": 21451 }, { "epoch": 0.6263174797816122, "grad_norm": 0.6121295810131968, "learning_rate": 8.304298459042986e-06, "loss": 0.5578, "step": 21452 }, { "epoch": 0.6263466760094596, "grad_norm": 0.592768567594757, "learning_rate": 8.303649635036496e-06, "loss": 0.5203, "step": 21453 }, { "epoch": 0.626375872237307, "grad_norm": 0.6573343995973638, "learning_rate": 8.303000811030008e-06, "loss": 0.6387, "step": 21454 }, { "epoch": 0.6264050684651543, "grad_norm": 0.6396262865935743, "learning_rate": 8.30235198702352e-06, "loss": 0.5593, "step": 21455 }, { "epoch": 0.6264342646930017, "grad_norm": 0.6103926980503324, "learning_rate": 8.301703163017032e-06, "loss": 0.5478, "step": 21456 }, { "epoch": 0.626463460920849, "grad_norm": 0.6486721376103572, "learning_rate": 8.301054339010544e-06, "loss": 0.592, "step": 21457 }, { "epoch": 0.6264926571486964, "grad_norm": 0.5978218044821603, "learning_rate": 8.300405515004056e-06, "loss": 0.5646, "step": 21458 }, { "epoch": 0.6265218533765438, "grad_norm": 0.6920645991426407, "learning_rate": 8.299756690997568e-06, "loss": 0.6305, "step": 21459 }, { "epoch": 0.6265510496043911, "grad_norm": 0.6038674284555193, "learning_rate": 8.29910786699108e-06, "loss": 0.5798, "step": 21460 }, { "epoch": 0.6265802458322385, "grad_norm": 0.665199946418214, "learning_rate": 8.29845904298459e-06, "loss": 0.6618, "step": 21461 }, { "epoch": 0.6266094420600858, "grad_norm": 0.6508404250062199, "learning_rate": 8.297810218978103e-06, "loss": 0.5782, "step": 21462 }, { "epoch": 0.6266386382879332, "grad_norm": 0.6071067572936819, "learning_rate": 8.297161394971615e-06, "loss": 0.5544, "step": 21463 }, { "epoch": 0.6266678345157806, "grad_norm": 0.6438522008958926, "learning_rate": 8.296512570965127e-06, "loss": 0.6144, "step": 21464 }, { "epoch": 0.6266970307436279, "grad_norm": 0.6057510348007278, "learning_rate": 8.295863746958637e-06, "loss": 0.5245, "step": 21465 }, { "epoch": 0.6267262269714753, "grad_norm": 0.7055933541269346, "learning_rate": 8.29521492295215e-06, "loss": 0.7226, "step": 21466 }, { "epoch": 0.6267554231993226, "grad_norm": 0.6917203614778071, "learning_rate": 8.294566098945661e-06, "loss": 0.6825, "step": 21467 }, { "epoch": 0.62678461942717, "grad_norm": 0.6472515661838544, "learning_rate": 8.293917274939173e-06, "loss": 0.5959, "step": 21468 }, { "epoch": 0.6268138156550174, "grad_norm": 0.617711792997819, "learning_rate": 8.293268450932685e-06, "loss": 0.5938, "step": 21469 }, { "epoch": 0.6268430118828647, "grad_norm": 0.6344456868385813, "learning_rate": 8.292619626926197e-06, "loss": 0.6048, "step": 21470 }, { "epoch": 0.6268722081107121, "grad_norm": 0.6297579093693326, "learning_rate": 8.29197080291971e-06, "loss": 0.5804, "step": 21471 }, { "epoch": 0.6269014043385595, "grad_norm": 0.6507256391330862, "learning_rate": 8.29132197891322e-06, "loss": 0.6383, "step": 21472 }, { "epoch": 0.6269306005664068, "grad_norm": 0.6821075747192088, "learning_rate": 8.290673154906732e-06, "loss": 0.7288, "step": 21473 }, { "epoch": 0.6269597967942542, "grad_norm": 0.6727526543479205, "learning_rate": 8.290024330900244e-06, "loss": 0.6366, "step": 21474 }, { "epoch": 0.6269889930221015, "grad_norm": 0.6292710389124195, "learning_rate": 8.289375506893756e-06, "loss": 0.6463, "step": 21475 }, { "epoch": 0.6270181892499489, "grad_norm": 0.5920682831010954, "learning_rate": 8.288726682887268e-06, "loss": 0.5465, "step": 21476 }, { "epoch": 0.6270473854777963, "grad_norm": 0.6920100943675696, "learning_rate": 8.28807785888078e-06, "loss": 0.6468, "step": 21477 }, { "epoch": 0.6270765817056436, "grad_norm": 0.6341637159184856, "learning_rate": 8.287429034874292e-06, "loss": 0.5856, "step": 21478 }, { "epoch": 0.627105777933491, "grad_norm": 0.6193927931486991, "learning_rate": 8.286780210867802e-06, "loss": 0.5886, "step": 21479 }, { "epoch": 0.6271349741613383, "grad_norm": 0.646996152309469, "learning_rate": 8.286131386861314e-06, "loss": 0.6562, "step": 21480 }, { "epoch": 0.6271641703891857, "grad_norm": 0.6358351911895446, "learning_rate": 8.285482562854826e-06, "loss": 0.6001, "step": 21481 }, { "epoch": 0.6271933666170331, "grad_norm": 0.6253309273850011, "learning_rate": 8.284833738848338e-06, "loss": 0.5642, "step": 21482 }, { "epoch": 0.6272225628448804, "grad_norm": 0.6988222267090519, "learning_rate": 8.28418491484185e-06, "loss": 0.6869, "step": 21483 }, { "epoch": 0.6272517590727278, "grad_norm": 0.7175999298920126, "learning_rate": 8.28353609083536e-06, "loss": 0.6419, "step": 21484 }, { "epoch": 0.6272809553005751, "grad_norm": 0.6701962032424938, "learning_rate": 8.282887266828873e-06, "loss": 0.6719, "step": 21485 }, { "epoch": 0.6273101515284225, "grad_norm": 0.6428879562166245, "learning_rate": 8.282238442822386e-06, "loss": 0.5969, "step": 21486 }, { "epoch": 0.6273393477562699, "grad_norm": 0.6090678518414743, "learning_rate": 8.281589618815897e-06, "loss": 0.5879, "step": 21487 }, { "epoch": 0.6273685439841172, "grad_norm": 0.6508651493275306, "learning_rate": 8.280940794809409e-06, "loss": 0.5651, "step": 21488 }, { "epoch": 0.6273977402119646, "grad_norm": 0.6543889079609102, "learning_rate": 8.28029197080292e-06, "loss": 0.5679, "step": 21489 }, { "epoch": 0.627426936439812, "grad_norm": 0.6447865415431552, "learning_rate": 8.279643146796433e-06, "loss": 0.6036, "step": 21490 }, { "epoch": 0.6274561326676593, "grad_norm": 0.6564245478183808, "learning_rate": 8.278994322789943e-06, "loss": 0.6397, "step": 21491 }, { "epoch": 0.6274853288955067, "grad_norm": 0.6776235145173155, "learning_rate": 8.278345498783455e-06, "loss": 0.6488, "step": 21492 }, { "epoch": 0.627514525123354, "grad_norm": 0.6746927880552814, "learning_rate": 8.277696674776967e-06, "loss": 0.6911, "step": 21493 }, { "epoch": 0.6275437213512014, "grad_norm": 0.6685571527142917, "learning_rate": 8.27704785077048e-06, "loss": 0.605, "step": 21494 }, { "epoch": 0.6275729175790488, "grad_norm": 0.6251975443913191, "learning_rate": 8.276399026763991e-06, "loss": 0.6004, "step": 21495 }, { "epoch": 0.6276021138068961, "grad_norm": 0.6499826757150358, "learning_rate": 8.275750202757503e-06, "loss": 0.653, "step": 21496 }, { "epoch": 0.6276313100347435, "grad_norm": 0.6213008595106851, "learning_rate": 8.275101378751015e-06, "loss": 0.5725, "step": 21497 }, { "epoch": 0.6276605062625908, "grad_norm": 0.616271770227154, "learning_rate": 8.274452554744526e-06, "loss": 0.5197, "step": 21498 }, { "epoch": 0.6276897024904382, "grad_norm": 0.5984216949726537, "learning_rate": 8.273803730738038e-06, "loss": 0.5282, "step": 21499 }, { "epoch": 0.6277188987182856, "grad_norm": 0.5919985449401274, "learning_rate": 8.27315490673155e-06, "loss": 0.5447, "step": 21500 }, { "epoch": 0.6277480949461329, "grad_norm": 0.6424426223364511, "learning_rate": 8.272506082725062e-06, "loss": 0.6632, "step": 21501 }, { "epoch": 0.6277772911739803, "grad_norm": 0.6280746160572733, "learning_rate": 8.271857258718574e-06, "loss": 0.5715, "step": 21502 }, { "epoch": 0.6278064874018277, "grad_norm": 0.643526012301984, "learning_rate": 8.271208434712084e-06, "loss": 0.5852, "step": 21503 }, { "epoch": 0.627835683629675, "grad_norm": 0.655980869756206, "learning_rate": 8.270559610705596e-06, "loss": 0.6322, "step": 21504 }, { "epoch": 0.6278648798575224, "grad_norm": 0.5977644953424679, "learning_rate": 8.26991078669911e-06, "loss": 0.5555, "step": 21505 }, { "epoch": 0.6278940760853697, "grad_norm": 0.6375925526016922, "learning_rate": 8.26926196269262e-06, "loss": 0.6018, "step": 21506 }, { "epoch": 0.6279232723132171, "grad_norm": 0.6435163464622717, "learning_rate": 8.268613138686132e-06, "loss": 0.6274, "step": 21507 }, { "epoch": 0.6279524685410645, "grad_norm": 0.6010575896424424, "learning_rate": 8.267964314679644e-06, "loss": 0.5578, "step": 21508 }, { "epoch": 0.6279816647689118, "grad_norm": 0.6524045001035773, "learning_rate": 8.267315490673156e-06, "loss": 0.6164, "step": 21509 }, { "epoch": 0.6280108609967592, "grad_norm": 0.6045809669076401, "learning_rate": 8.266666666666667e-06, "loss": 0.5343, "step": 21510 }, { "epoch": 0.6280400572246065, "grad_norm": 0.6655301554296993, "learning_rate": 8.266017842660179e-06, "loss": 0.6341, "step": 21511 }, { "epoch": 0.6280692534524539, "grad_norm": 0.6274776613379192, "learning_rate": 8.26536901865369e-06, "loss": 0.5768, "step": 21512 }, { "epoch": 0.6280984496803013, "grad_norm": 0.6559395013382577, "learning_rate": 8.264720194647203e-06, "loss": 0.6755, "step": 21513 }, { "epoch": 0.6281276459081486, "grad_norm": 0.6444885125999259, "learning_rate": 8.264071370640715e-06, "loss": 0.5684, "step": 21514 }, { "epoch": 0.628156842135996, "grad_norm": 0.654690917159265, "learning_rate": 8.263422546634227e-06, "loss": 0.6096, "step": 21515 }, { "epoch": 0.6281860383638433, "grad_norm": 0.6502453471667617, "learning_rate": 8.262773722627739e-06, "loss": 0.6165, "step": 21516 }, { "epoch": 0.6282152345916907, "grad_norm": 0.6401199127062178, "learning_rate": 8.262124898621249e-06, "loss": 0.5947, "step": 21517 }, { "epoch": 0.6282444308195381, "grad_norm": 0.6386614191886321, "learning_rate": 8.261476074614761e-06, "loss": 0.5904, "step": 21518 }, { "epoch": 0.6282736270473854, "grad_norm": 0.6853496739868589, "learning_rate": 8.260827250608273e-06, "loss": 0.6567, "step": 21519 }, { "epoch": 0.6283028232752328, "grad_norm": 0.6067701880993185, "learning_rate": 8.260178426601785e-06, "loss": 0.5534, "step": 21520 }, { "epoch": 0.6283320195030802, "grad_norm": 0.6621469037901468, "learning_rate": 8.259529602595297e-06, "loss": 0.6284, "step": 21521 }, { "epoch": 0.6283612157309275, "grad_norm": 0.6848121936398988, "learning_rate": 8.258880778588808e-06, "loss": 0.7106, "step": 21522 }, { "epoch": 0.6283904119587749, "grad_norm": 0.6645043789624328, "learning_rate": 8.25823195458232e-06, "loss": 0.6037, "step": 21523 }, { "epoch": 0.6284196081866222, "grad_norm": 0.5906731924334663, "learning_rate": 8.257583130575833e-06, "loss": 0.525, "step": 21524 }, { "epoch": 0.6284488044144696, "grad_norm": 0.6295727642636587, "learning_rate": 8.256934306569344e-06, "loss": 0.6176, "step": 21525 }, { "epoch": 0.628478000642317, "grad_norm": 0.6748441334680184, "learning_rate": 8.256285482562856e-06, "loss": 0.6072, "step": 21526 }, { "epoch": 0.6285071968701643, "grad_norm": 0.689372584734117, "learning_rate": 8.255636658556368e-06, "loss": 0.6761, "step": 21527 }, { "epoch": 0.6285363930980118, "grad_norm": 0.5998315035766068, "learning_rate": 8.25498783454988e-06, "loss": 0.5458, "step": 21528 }, { "epoch": 0.6285655893258592, "grad_norm": 0.7167356427865784, "learning_rate": 8.25433901054339e-06, "loss": 0.644, "step": 21529 }, { "epoch": 0.6285947855537065, "grad_norm": 0.6232323327526038, "learning_rate": 8.253690186536902e-06, "loss": 0.5845, "step": 21530 }, { "epoch": 0.6286239817815539, "grad_norm": 0.6608345958229608, "learning_rate": 8.253041362530414e-06, "loss": 0.6478, "step": 21531 }, { "epoch": 0.6286531780094012, "grad_norm": 0.6646850760348136, "learning_rate": 8.252392538523926e-06, "loss": 0.6131, "step": 21532 }, { "epoch": 0.6286823742372486, "grad_norm": 0.6658585585493222, "learning_rate": 8.251743714517438e-06, "loss": 0.6524, "step": 21533 }, { "epoch": 0.628711570465096, "grad_norm": 0.6407120642092702, "learning_rate": 8.25109489051095e-06, "loss": 0.6216, "step": 21534 }, { "epoch": 0.6287407666929433, "grad_norm": 0.6678826057538427, "learning_rate": 8.250446066504462e-06, "loss": 0.6229, "step": 21535 }, { "epoch": 0.6287699629207907, "grad_norm": 0.6488269451267591, "learning_rate": 8.249797242497973e-06, "loss": 0.5856, "step": 21536 }, { "epoch": 0.628799159148638, "grad_norm": 0.653148935257757, "learning_rate": 8.249148418491485e-06, "loss": 0.6192, "step": 21537 }, { "epoch": 0.6288283553764854, "grad_norm": 0.6646074517260515, "learning_rate": 8.248499594484997e-06, "loss": 0.6095, "step": 21538 }, { "epoch": 0.6288575516043328, "grad_norm": 0.6345628399025482, "learning_rate": 8.247850770478509e-06, "loss": 0.5533, "step": 21539 }, { "epoch": 0.6288867478321801, "grad_norm": 0.6294096449734401, "learning_rate": 8.24720194647202e-06, "loss": 0.6149, "step": 21540 }, { "epoch": 0.6289159440600275, "grad_norm": 0.6286933020664339, "learning_rate": 8.246553122465531e-06, "loss": 0.5661, "step": 21541 }, { "epoch": 0.6289451402878748, "grad_norm": 0.6394633778766656, "learning_rate": 8.245904298459043e-06, "loss": 0.5862, "step": 21542 }, { "epoch": 0.6289743365157222, "grad_norm": 0.6393767362760423, "learning_rate": 8.245255474452557e-06, "loss": 0.5982, "step": 21543 }, { "epoch": 0.6290035327435696, "grad_norm": 0.6401093120999515, "learning_rate": 8.244606650446067e-06, "loss": 0.6205, "step": 21544 }, { "epoch": 0.6290327289714169, "grad_norm": 0.6088278275850476, "learning_rate": 8.243957826439579e-06, "loss": 0.5313, "step": 21545 }, { "epoch": 0.6290619251992643, "grad_norm": 0.6712260476766021, "learning_rate": 8.243309002433091e-06, "loss": 0.6183, "step": 21546 }, { "epoch": 0.6290911214271117, "grad_norm": 0.640727717553206, "learning_rate": 8.242660178426603e-06, "loss": 0.5435, "step": 21547 }, { "epoch": 0.629120317654959, "grad_norm": 0.7177955682029048, "learning_rate": 8.242011354420113e-06, "loss": 0.6694, "step": 21548 }, { "epoch": 0.6291495138828064, "grad_norm": 0.6755724123270785, "learning_rate": 8.241362530413625e-06, "loss": 0.6454, "step": 21549 }, { "epoch": 0.6291787101106537, "grad_norm": 0.641972298620241, "learning_rate": 8.240713706407137e-06, "loss": 0.5933, "step": 21550 }, { "epoch": 0.6292079063385011, "grad_norm": 0.6695768033488392, "learning_rate": 8.24006488240065e-06, "loss": 0.6355, "step": 21551 }, { "epoch": 0.6292371025663485, "grad_norm": 0.6672130059388003, "learning_rate": 8.239416058394162e-06, "loss": 0.6219, "step": 21552 }, { "epoch": 0.6292662987941958, "grad_norm": 0.7163760571157265, "learning_rate": 8.238767234387674e-06, "loss": 0.6557, "step": 21553 }, { "epoch": 0.6292954950220432, "grad_norm": 0.6187241371570005, "learning_rate": 8.238118410381186e-06, "loss": 0.6031, "step": 21554 }, { "epoch": 0.6293246912498905, "grad_norm": 0.645107354011954, "learning_rate": 8.237469586374696e-06, "loss": 0.6262, "step": 21555 }, { "epoch": 0.6293538874777379, "grad_norm": 0.6639479978886287, "learning_rate": 8.236820762368208e-06, "loss": 0.6422, "step": 21556 }, { "epoch": 0.6293830837055853, "grad_norm": 0.6177376093352362, "learning_rate": 8.23617193836172e-06, "loss": 0.5781, "step": 21557 }, { "epoch": 0.6294122799334326, "grad_norm": 0.6628837717717772, "learning_rate": 8.235523114355232e-06, "loss": 0.6194, "step": 21558 }, { "epoch": 0.62944147616128, "grad_norm": 0.6751764757125477, "learning_rate": 8.234874290348744e-06, "loss": 0.6775, "step": 21559 }, { "epoch": 0.6294706723891274, "grad_norm": 0.6685222366494004, "learning_rate": 8.234225466342254e-06, "loss": 0.6815, "step": 21560 }, { "epoch": 0.6294998686169747, "grad_norm": 0.6306758386196947, "learning_rate": 8.233576642335766e-06, "loss": 0.6137, "step": 21561 }, { "epoch": 0.6295290648448221, "grad_norm": 0.681919821611512, "learning_rate": 8.23292781832928e-06, "loss": 0.6664, "step": 21562 }, { "epoch": 0.6295582610726694, "grad_norm": 0.6694832291882932, "learning_rate": 8.23227899432279e-06, "loss": 0.6461, "step": 21563 }, { "epoch": 0.6295874573005168, "grad_norm": 0.7347069603815052, "learning_rate": 8.231630170316302e-06, "loss": 0.5831, "step": 21564 }, { "epoch": 0.6296166535283642, "grad_norm": 0.6384203294389948, "learning_rate": 8.230981346309815e-06, "loss": 0.5444, "step": 21565 }, { "epoch": 0.6296458497562115, "grad_norm": 0.6259961699834558, "learning_rate": 8.230332522303327e-06, "loss": 0.5036, "step": 21566 }, { "epoch": 0.6296750459840589, "grad_norm": 0.6542165188890587, "learning_rate": 8.229683698296837e-06, "loss": 0.6433, "step": 21567 }, { "epoch": 0.6297042422119062, "grad_norm": 0.6046046964818235, "learning_rate": 8.229034874290349e-06, "loss": 0.5558, "step": 21568 }, { "epoch": 0.6297334384397536, "grad_norm": 0.6316383400428933, "learning_rate": 8.228386050283861e-06, "loss": 0.5778, "step": 21569 }, { "epoch": 0.629762634667601, "grad_norm": 0.6484954315728567, "learning_rate": 8.227737226277373e-06, "loss": 0.5861, "step": 21570 }, { "epoch": 0.6297918308954483, "grad_norm": 0.6210486799593422, "learning_rate": 8.227088402270885e-06, "loss": 0.5585, "step": 21571 }, { "epoch": 0.6298210271232957, "grad_norm": 0.6396596733002025, "learning_rate": 8.226439578264397e-06, "loss": 0.6099, "step": 21572 }, { "epoch": 0.629850223351143, "grad_norm": 0.7120491956952453, "learning_rate": 8.225790754257909e-06, "loss": 0.7515, "step": 21573 }, { "epoch": 0.6298794195789904, "grad_norm": 0.6701108983476781, "learning_rate": 8.22514193025142e-06, "loss": 0.6065, "step": 21574 }, { "epoch": 0.6299086158068378, "grad_norm": 0.7133229535675808, "learning_rate": 8.224493106244931e-06, "loss": 0.6362, "step": 21575 }, { "epoch": 0.6299378120346851, "grad_norm": 0.5986180229137774, "learning_rate": 8.223844282238443e-06, "loss": 0.5603, "step": 21576 }, { "epoch": 0.6299670082625325, "grad_norm": 0.6416369179913475, "learning_rate": 8.223195458231955e-06, "loss": 0.6255, "step": 21577 }, { "epoch": 0.6299962044903799, "grad_norm": 0.6119308502454653, "learning_rate": 8.222546634225467e-06, "loss": 0.5767, "step": 21578 }, { "epoch": 0.6300254007182272, "grad_norm": 0.7078764388341059, "learning_rate": 8.221897810218978e-06, "loss": 0.6702, "step": 21579 }, { "epoch": 0.6300545969460746, "grad_norm": 0.6372870966821008, "learning_rate": 8.22124898621249e-06, "loss": 0.5915, "step": 21580 }, { "epoch": 0.6300837931739219, "grad_norm": 0.6281922401224063, "learning_rate": 8.220600162206004e-06, "loss": 0.595, "step": 21581 }, { "epoch": 0.6301129894017693, "grad_norm": 0.6844341671266925, "learning_rate": 8.219951338199514e-06, "loss": 0.6823, "step": 21582 }, { "epoch": 0.6301421856296167, "grad_norm": 0.6138183222209974, "learning_rate": 8.219302514193026e-06, "loss": 0.5778, "step": 21583 }, { "epoch": 0.630171381857464, "grad_norm": 0.6602907850945658, "learning_rate": 8.218653690186538e-06, "loss": 0.629, "step": 21584 }, { "epoch": 0.6302005780853114, "grad_norm": 0.6810170183481338, "learning_rate": 8.21800486618005e-06, "loss": 0.6088, "step": 21585 }, { "epoch": 0.6302297743131587, "grad_norm": 0.6075633608657148, "learning_rate": 8.21735604217356e-06, "loss": 0.543, "step": 21586 }, { "epoch": 0.6302589705410061, "grad_norm": 0.7061242427915032, "learning_rate": 8.216707218167072e-06, "loss": 0.589, "step": 21587 }, { "epoch": 0.6302881667688535, "grad_norm": 0.6538500691970314, "learning_rate": 8.216058394160584e-06, "loss": 0.6202, "step": 21588 }, { "epoch": 0.6303173629967008, "grad_norm": 0.6539588026493814, "learning_rate": 8.215409570154096e-06, "loss": 0.5883, "step": 21589 }, { "epoch": 0.6303465592245482, "grad_norm": 0.6883249495474292, "learning_rate": 8.214760746147608e-06, "loss": 0.6321, "step": 21590 }, { "epoch": 0.6303757554523955, "grad_norm": 0.6748455586017726, "learning_rate": 8.21411192214112e-06, "loss": 0.6574, "step": 21591 }, { "epoch": 0.6304049516802429, "grad_norm": 0.6031822574487405, "learning_rate": 8.213463098134632e-06, "loss": 0.5314, "step": 21592 }, { "epoch": 0.6304341479080903, "grad_norm": 0.6483080089299336, "learning_rate": 8.212814274128143e-06, "loss": 0.6008, "step": 21593 }, { "epoch": 0.6304633441359376, "grad_norm": 0.6880779560783594, "learning_rate": 8.212165450121655e-06, "loss": 0.5658, "step": 21594 }, { "epoch": 0.630492540363785, "grad_norm": 0.5758681244872563, "learning_rate": 8.211516626115167e-06, "loss": 0.5406, "step": 21595 }, { "epoch": 0.6305217365916324, "grad_norm": 0.6307386863638376, "learning_rate": 8.210867802108679e-06, "loss": 0.5733, "step": 21596 }, { "epoch": 0.6305509328194797, "grad_norm": 0.6257186670572296, "learning_rate": 8.210218978102191e-06, "loss": 0.6102, "step": 21597 }, { "epoch": 0.6305801290473271, "grad_norm": 0.6511338247032593, "learning_rate": 8.209570154095701e-06, "loss": 0.6085, "step": 21598 }, { "epoch": 0.6306093252751744, "grad_norm": 0.6821331391613229, "learning_rate": 8.208921330089213e-06, "loss": 0.6272, "step": 21599 }, { "epoch": 0.6306385215030218, "grad_norm": 0.6610577141206269, "learning_rate": 8.208272506082727e-06, "loss": 0.6264, "step": 21600 }, { "epoch": 0.6306677177308692, "grad_norm": 0.6854838503556145, "learning_rate": 8.207623682076237e-06, "loss": 0.6501, "step": 21601 }, { "epoch": 0.6306969139587165, "grad_norm": 0.6917830382219643, "learning_rate": 8.20697485806975e-06, "loss": 0.6109, "step": 21602 }, { "epoch": 0.6307261101865639, "grad_norm": 0.6348527835018372, "learning_rate": 8.206326034063261e-06, "loss": 0.5826, "step": 21603 }, { "epoch": 0.6307553064144112, "grad_norm": 0.6611675456473689, "learning_rate": 8.205677210056773e-06, "loss": 0.6377, "step": 21604 }, { "epoch": 0.6307845026422586, "grad_norm": 0.6127431063353979, "learning_rate": 8.205028386050284e-06, "loss": 0.5821, "step": 21605 }, { "epoch": 0.630813698870106, "grad_norm": 0.667745488448759, "learning_rate": 8.204379562043796e-06, "loss": 0.6747, "step": 21606 }, { "epoch": 0.6308428950979533, "grad_norm": 0.6819108776465332, "learning_rate": 8.203730738037308e-06, "loss": 0.698, "step": 21607 }, { "epoch": 0.6308720913258007, "grad_norm": 0.6560028211600943, "learning_rate": 8.20308191403082e-06, "loss": 0.5488, "step": 21608 }, { "epoch": 0.630901287553648, "grad_norm": 0.6562163238849252, "learning_rate": 8.202433090024332e-06, "loss": 0.6384, "step": 21609 }, { "epoch": 0.6309304837814954, "grad_norm": 0.641411508978661, "learning_rate": 8.201784266017844e-06, "loss": 0.5828, "step": 21610 }, { "epoch": 0.6309596800093428, "grad_norm": 0.6148301519652317, "learning_rate": 8.201135442011356e-06, "loss": 0.5546, "step": 21611 }, { "epoch": 0.6309888762371901, "grad_norm": 0.674250354600401, "learning_rate": 8.200486618004866e-06, "loss": 0.6541, "step": 21612 }, { "epoch": 0.6310180724650375, "grad_norm": 0.6764720668694709, "learning_rate": 8.199837793998378e-06, "loss": 0.6247, "step": 21613 }, { "epoch": 0.6310472686928849, "grad_norm": 0.6450171545780541, "learning_rate": 8.19918896999189e-06, "loss": 0.6225, "step": 21614 }, { "epoch": 0.6310764649207322, "grad_norm": 0.6648728431624659, "learning_rate": 8.198540145985402e-06, "loss": 0.6163, "step": 21615 }, { "epoch": 0.6311056611485796, "grad_norm": 0.6709551205601012, "learning_rate": 8.197891321978914e-06, "loss": 0.61, "step": 21616 }, { "epoch": 0.6311348573764269, "grad_norm": 0.6487569493786616, "learning_rate": 8.197242497972425e-06, "loss": 0.5899, "step": 21617 }, { "epoch": 0.6311640536042743, "grad_norm": 0.6900841332069056, "learning_rate": 8.196593673965937e-06, "loss": 0.6681, "step": 21618 }, { "epoch": 0.6311932498321217, "grad_norm": 0.6315757326896704, "learning_rate": 8.19594484995945e-06, "loss": 0.5898, "step": 21619 }, { "epoch": 0.631222446059969, "grad_norm": 0.6418268676414691, "learning_rate": 8.19529602595296e-06, "loss": 0.6263, "step": 21620 }, { "epoch": 0.6312516422878164, "grad_norm": 0.6868377968833436, "learning_rate": 8.194647201946473e-06, "loss": 0.6569, "step": 21621 }, { "epoch": 0.6312808385156637, "grad_norm": 0.6779123925577397, "learning_rate": 8.193998377939985e-06, "loss": 0.6309, "step": 21622 }, { "epoch": 0.6313100347435111, "grad_norm": 0.6449109600109354, "learning_rate": 8.193349553933497e-06, "loss": 0.6125, "step": 21623 }, { "epoch": 0.6313392309713585, "grad_norm": 0.6664753508587902, "learning_rate": 8.192700729927007e-06, "loss": 0.643, "step": 21624 }, { "epoch": 0.6313684271992058, "grad_norm": 0.5853290581461085, "learning_rate": 8.19205190592052e-06, "loss": 0.5066, "step": 21625 }, { "epoch": 0.6313976234270532, "grad_norm": 0.6450722329503844, "learning_rate": 8.191403081914031e-06, "loss": 0.6159, "step": 21626 }, { "epoch": 0.6314268196549006, "grad_norm": 0.6578844417375912, "learning_rate": 8.190754257907543e-06, "loss": 0.6099, "step": 21627 }, { "epoch": 0.6314560158827479, "grad_norm": 0.6903993904299351, "learning_rate": 8.190105433901055e-06, "loss": 0.6678, "step": 21628 }, { "epoch": 0.6314852121105953, "grad_norm": 0.6296235114656425, "learning_rate": 8.189456609894567e-06, "loss": 0.5662, "step": 21629 }, { "epoch": 0.6315144083384426, "grad_norm": 0.6406784320481995, "learning_rate": 8.18880778588808e-06, "loss": 0.6001, "step": 21630 }, { "epoch": 0.63154360456629, "grad_norm": 0.6858949713916342, "learning_rate": 8.18815896188159e-06, "loss": 0.7034, "step": 21631 }, { "epoch": 0.6315728007941374, "grad_norm": 0.6797175388081833, "learning_rate": 8.187510137875102e-06, "loss": 0.6414, "step": 21632 }, { "epoch": 0.6316019970219847, "grad_norm": 0.6421571311144619, "learning_rate": 8.186861313868614e-06, "loss": 0.5656, "step": 21633 }, { "epoch": 0.6316311932498321, "grad_norm": 0.6200400120763929, "learning_rate": 8.186212489862126e-06, "loss": 0.5518, "step": 21634 }, { "epoch": 0.6316603894776794, "grad_norm": 0.6635764108072804, "learning_rate": 8.185563665855638e-06, "loss": 0.6821, "step": 21635 }, { "epoch": 0.6316895857055268, "grad_norm": 0.5938553246677644, "learning_rate": 8.184914841849148e-06, "loss": 0.5525, "step": 21636 }, { "epoch": 0.6317187819333742, "grad_norm": 0.6618900330425248, "learning_rate": 8.184266017842662e-06, "loss": 0.6404, "step": 21637 }, { "epoch": 0.6317479781612215, "grad_norm": 0.6507080127335242, "learning_rate": 8.183617193836174e-06, "loss": 0.6317, "step": 21638 }, { "epoch": 0.6317771743890689, "grad_norm": 0.686635222036498, "learning_rate": 8.182968369829684e-06, "loss": 0.6912, "step": 21639 }, { "epoch": 0.6318063706169162, "grad_norm": 0.6669108720083655, "learning_rate": 8.182319545823196e-06, "loss": 0.5814, "step": 21640 }, { "epoch": 0.6318355668447636, "grad_norm": 0.6803404078403813, "learning_rate": 8.181670721816708e-06, "loss": 0.6632, "step": 21641 }, { "epoch": 0.631864763072611, "grad_norm": 0.6293396149092171, "learning_rate": 8.18102189781022e-06, "loss": 0.5505, "step": 21642 }, { "epoch": 0.6318939593004583, "grad_norm": 0.6511321320200179, "learning_rate": 8.18037307380373e-06, "loss": 0.6132, "step": 21643 }, { "epoch": 0.6319231555283057, "grad_norm": 0.6522765091978371, "learning_rate": 8.179724249797243e-06, "loss": 0.6225, "step": 21644 }, { "epoch": 0.631952351756153, "grad_norm": 0.6686040937693186, "learning_rate": 8.179075425790755e-06, "loss": 0.6483, "step": 21645 }, { "epoch": 0.6319815479840004, "grad_norm": 0.6483297579741284, "learning_rate": 8.178426601784267e-06, "loss": 0.6333, "step": 21646 }, { "epoch": 0.6320107442118478, "grad_norm": 0.653198054498098, "learning_rate": 8.177777777777779e-06, "loss": 0.6467, "step": 21647 }, { "epoch": 0.6320399404396951, "grad_norm": 0.735572755561921, "learning_rate": 8.17712895377129e-06, "loss": 0.705, "step": 21648 }, { "epoch": 0.6320691366675426, "grad_norm": 0.6044870599907697, "learning_rate": 8.176480129764803e-06, "loss": 0.5233, "step": 21649 }, { "epoch": 0.63209833289539, "grad_norm": 0.6162581055783601, "learning_rate": 8.175831305758313e-06, "loss": 0.5695, "step": 21650 }, { "epoch": 0.6321275291232373, "grad_norm": 0.5953788567075962, "learning_rate": 8.175182481751825e-06, "loss": 0.5178, "step": 21651 }, { "epoch": 0.6321567253510847, "grad_norm": 0.6369527222707521, "learning_rate": 8.174533657745337e-06, "loss": 0.5985, "step": 21652 }, { "epoch": 0.632185921578932, "grad_norm": 0.640426979779998, "learning_rate": 8.17388483373885e-06, "loss": 0.6202, "step": 21653 }, { "epoch": 0.6322151178067794, "grad_norm": 0.6650403179692661, "learning_rate": 8.173236009732361e-06, "loss": 0.6853, "step": 21654 }, { "epoch": 0.6322443140346268, "grad_norm": 0.653203108736522, "learning_rate": 8.172587185725872e-06, "loss": 0.5538, "step": 21655 }, { "epoch": 0.6322735102624741, "grad_norm": 0.6130658746586415, "learning_rate": 8.171938361719385e-06, "loss": 0.5527, "step": 21656 }, { "epoch": 0.6323027064903215, "grad_norm": 0.64100253253951, "learning_rate": 8.171289537712897e-06, "loss": 0.607, "step": 21657 }, { "epoch": 0.6323319027181689, "grad_norm": 0.6076353579398954, "learning_rate": 8.170640713706408e-06, "loss": 0.5886, "step": 21658 }, { "epoch": 0.6323610989460162, "grad_norm": 0.633659016212039, "learning_rate": 8.16999188969992e-06, "loss": 0.6059, "step": 21659 }, { "epoch": 0.6323902951738636, "grad_norm": 0.6679541577745821, "learning_rate": 8.169343065693432e-06, "loss": 0.6148, "step": 21660 }, { "epoch": 0.6324194914017109, "grad_norm": 0.69207686371284, "learning_rate": 8.168694241686944e-06, "loss": 0.5747, "step": 21661 }, { "epoch": 0.6324486876295583, "grad_norm": 0.668824991691626, "learning_rate": 8.168045417680454e-06, "loss": 0.6696, "step": 21662 }, { "epoch": 0.6324778838574057, "grad_norm": 0.6499427092244596, "learning_rate": 8.167396593673966e-06, "loss": 0.6197, "step": 21663 }, { "epoch": 0.632507080085253, "grad_norm": 0.6868651414400461, "learning_rate": 8.166747769667478e-06, "loss": 0.6678, "step": 21664 }, { "epoch": 0.6325362763131004, "grad_norm": 0.6690746296214019, "learning_rate": 8.16609894566099e-06, "loss": 0.6432, "step": 21665 }, { "epoch": 0.6325654725409477, "grad_norm": 0.6440078928663178, "learning_rate": 8.165450121654502e-06, "loss": 0.6304, "step": 21666 }, { "epoch": 0.6325946687687951, "grad_norm": 0.6149585502789422, "learning_rate": 8.164801297648014e-06, "loss": 0.5636, "step": 21667 }, { "epoch": 0.6326238649966425, "grad_norm": 0.597687346505113, "learning_rate": 8.164152473641526e-06, "loss": 0.5283, "step": 21668 }, { "epoch": 0.6326530612244898, "grad_norm": 0.6243485916422785, "learning_rate": 8.163503649635037e-06, "loss": 0.5654, "step": 21669 }, { "epoch": 0.6326822574523372, "grad_norm": 0.73554169860815, "learning_rate": 8.162854825628549e-06, "loss": 0.7435, "step": 21670 }, { "epoch": 0.6327114536801846, "grad_norm": 0.6473385176391401, "learning_rate": 8.16220600162206e-06, "loss": 0.5597, "step": 21671 }, { "epoch": 0.6327406499080319, "grad_norm": 0.6001966850850833, "learning_rate": 8.161557177615573e-06, "loss": 0.5767, "step": 21672 }, { "epoch": 0.6327698461358793, "grad_norm": 0.6278185797717494, "learning_rate": 8.160908353609085e-06, "loss": 0.6159, "step": 21673 }, { "epoch": 0.6327990423637266, "grad_norm": 0.5646753205920428, "learning_rate": 8.160259529602595e-06, "loss": 0.4792, "step": 21674 }, { "epoch": 0.632828238591574, "grad_norm": 0.628102419028849, "learning_rate": 8.159610705596109e-06, "loss": 0.6228, "step": 21675 }, { "epoch": 0.6328574348194214, "grad_norm": 0.6184951104316957, "learning_rate": 8.158961881589619e-06, "loss": 0.5291, "step": 21676 }, { "epoch": 0.6328866310472687, "grad_norm": 0.6331098501955635, "learning_rate": 8.158313057583131e-06, "loss": 0.6071, "step": 21677 }, { "epoch": 0.6329158272751161, "grad_norm": 0.6216303580907644, "learning_rate": 8.157664233576643e-06, "loss": 0.608, "step": 21678 }, { "epoch": 0.6329450235029634, "grad_norm": 0.597113682213433, "learning_rate": 8.157015409570155e-06, "loss": 0.5236, "step": 21679 }, { "epoch": 0.6329742197308108, "grad_norm": 0.6737097112054984, "learning_rate": 8.156366585563667e-06, "loss": 0.6767, "step": 21680 }, { "epoch": 0.6330034159586582, "grad_norm": 0.68067887051523, "learning_rate": 8.155717761557177e-06, "loss": 0.6712, "step": 21681 }, { "epoch": 0.6330326121865055, "grad_norm": 0.5946433836061563, "learning_rate": 8.15506893755069e-06, "loss": 0.5846, "step": 21682 }, { "epoch": 0.6330618084143529, "grad_norm": 0.6844867098238574, "learning_rate": 8.154420113544202e-06, "loss": 0.6482, "step": 21683 }, { "epoch": 0.6330910046422003, "grad_norm": 0.613123901324483, "learning_rate": 8.153771289537714e-06, "loss": 0.5574, "step": 21684 }, { "epoch": 0.6331202008700476, "grad_norm": 0.6435634531845165, "learning_rate": 8.153122465531226e-06, "loss": 0.6202, "step": 21685 }, { "epoch": 0.633149397097895, "grad_norm": 0.5834230355343247, "learning_rate": 8.152473641524738e-06, "loss": 0.5194, "step": 21686 }, { "epoch": 0.6331785933257423, "grad_norm": 0.6317568485295795, "learning_rate": 8.15182481751825e-06, "loss": 0.5698, "step": 21687 }, { "epoch": 0.6332077895535897, "grad_norm": 0.6299522761110125, "learning_rate": 8.15117599351176e-06, "loss": 0.5529, "step": 21688 }, { "epoch": 0.6332369857814371, "grad_norm": 0.6138172189018034, "learning_rate": 8.150527169505272e-06, "loss": 0.5443, "step": 21689 }, { "epoch": 0.6332661820092844, "grad_norm": 0.6409102067074113, "learning_rate": 8.149878345498784e-06, "loss": 0.6053, "step": 21690 }, { "epoch": 0.6332953782371318, "grad_norm": 0.6573777294019969, "learning_rate": 8.149229521492296e-06, "loss": 0.5776, "step": 21691 }, { "epoch": 0.6333245744649791, "grad_norm": 0.679166747360059, "learning_rate": 8.148580697485808e-06, "loss": 0.6779, "step": 21692 }, { "epoch": 0.6333537706928265, "grad_norm": 0.6387977496262395, "learning_rate": 8.147931873479318e-06, "loss": 0.598, "step": 21693 }, { "epoch": 0.6333829669206739, "grad_norm": 0.6802314825494565, "learning_rate": 8.147283049472832e-06, "loss": 0.5677, "step": 21694 }, { "epoch": 0.6334121631485212, "grad_norm": 0.659659284129629, "learning_rate": 8.146634225466342e-06, "loss": 0.6355, "step": 21695 }, { "epoch": 0.6334413593763686, "grad_norm": 0.6039600070823951, "learning_rate": 8.145985401459855e-06, "loss": 0.5236, "step": 21696 }, { "epoch": 0.633470555604216, "grad_norm": 0.5967164708127759, "learning_rate": 8.145336577453367e-06, "loss": 0.5257, "step": 21697 }, { "epoch": 0.6334997518320633, "grad_norm": 0.6896468066529629, "learning_rate": 8.144687753446879e-06, "loss": 0.6668, "step": 21698 }, { "epoch": 0.6335289480599107, "grad_norm": 0.6560693100748838, "learning_rate": 8.14403892944039e-06, "loss": 0.5944, "step": 21699 }, { "epoch": 0.633558144287758, "grad_norm": 0.6866154709079539, "learning_rate": 8.143390105433901e-06, "loss": 0.6962, "step": 21700 }, { "epoch": 0.6335873405156054, "grad_norm": 0.624421006097594, "learning_rate": 8.142741281427413e-06, "loss": 0.6018, "step": 21701 }, { "epoch": 0.6336165367434528, "grad_norm": 0.6919786628574309, "learning_rate": 8.142092457420925e-06, "loss": 0.6958, "step": 21702 }, { "epoch": 0.6336457329713001, "grad_norm": 0.5986222880521122, "learning_rate": 8.141443633414437e-06, "loss": 0.5597, "step": 21703 }, { "epoch": 0.6336749291991475, "grad_norm": 0.6656307030166557, "learning_rate": 8.140794809407949e-06, "loss": 0.6986, "step": 21704 }, { "epoch": 0.6337041254269948, "grad_norm": 0.5934327765197482, "learning_rate": 8.140145985401461e-06, "loss": 0.5265, "step": 21705 }, { "epoch": 0.6337333216548422, "grad_norm": 0.6498547298354947, "learning_rate": 8.139497161394973e-06, "loss": 0.6401, "step": 21706 }, { "epoch": 0.6337625178826896, "grad_norm": 0.6279358294874083, "learning_rate": 8.138848337388483e-06, "loss": 0.5739, "step": 21707 }, { "epoch": 0.6337917141105369, "grad_norm": 0.6246907437554005, "learning_rate": 8.138199513381995e-06, "loss": 0.5783, "step": 21708 }, { "epoch": 0.6338209103383843, "grad_norm": 0.5960354452600385, "learning_rate": 8.137550689375507e-06, "loss": 0.5856, "step": 21709 }, { "epoch": 0.6338501065662316, "grad_norm": 0.6372761232476829, "learning_rate": 8.13690186536902e-06, "loss": 0.6247, "step": 21710 }, { "epoch": 0.633879302794079, "grad_norm": 0.6249950832300913, "learning_rate": 8.136253041362532e-06, "loss": 0.5975, "step": 21711 }, { "epoch": 0.6339084990219264, "grad_norm": 0.6236768099552521, "learning_rate": 8.135604217356042e-06, "loss": 0.574, "step": 21712 }, { "epoch": 0.6339376952497737, "grad_norm": 0.6055317626241412, "learning_rate": 8.134955393349556e-06, "loss": 0.5551, "step": 21713 }, { "epoch": 0.6339668914776211, "grad_norm": 0.6344055581686561, "learning_rate": 8.134306569343066e-06, "loss": 0.5929, "step": 21714 }, { "epoch": 0.6339960877054684, "grad_norm": 0.5894212429937943, "learning_rate": 8.133657745336578e-06, "loss": 0.5229, "step": 21715 }, { "epoch": 0.6340252839333158, "grad_norm": 0.6555807995920848, "learning_rate": 8.13300892133009e-06, "loss": 0.6288, "step": 21716 }, { "epoch": 0.6340544801611632, "grad_norm": 0.5917163356764765, "learning_rate": 8.132360097323602e-06, "loss": 0.5224, "step": 21717 }, { "epoch": 0.6340836763890105, "grad_norm": 0.6110580655991992, "learning_rate": 8.131711273317114e-06, "loss": 0.5462, "step": 21718 }, { "epoch": 0.6341128726168579, "grad_norm": 0.6388956051399339, "learning_rate": 8.131062449310624e-06, "loss": 0.5765, "step": 21719 }, { "epoch": 0.6341420688447053, "grad_norm": 0.6135950952621861, "learning_rate": 8.130413625304136e-06, "loss": 0.5995, "step": 21720 }, { "epoch": 0.6341712650725526, "grad_norm": 0.6249646263302291, "learning_rate": 8.129764801297648e-06, "loss": 0.5552, "step": 21721 }, { "epoch": 0.6342004613004, "grad_norm": 0.6864330958101188, "learning_rate": 8.12911597729116e-06, "loss": 0.6067, "step": 21722 }, { "epoch": 0.6342296575282473, "grad_norm": 0.6419621873215987, "learning_rate": 8.128467153284672e-06, "loss": 0.6355, "step": 21723 }, { "epoch": 0.6342588537560947, "grad_norm": 0.6709187366927116, "learning_rate": 8.127818329278184e-06, "loss": 0.6609, "step": 21724 }, { "epoch": 0.6342880499839421, "grad_norm": 0.6610213502927564, "learning_rate": 8.127169505271697e-06, "loss": 0.6362, "step": 21725 }, { "epoch": 0.6343172462117894, "grad_norm": 0.7764561677109229, "learning_rate": 8.126520681265207e-06, "loss": 0.7637, "step": 21726 }, { "epoch": 0.6343464424396368, "grad_norm": 0.6437190931900584, "learning_rate": 8.125871857258719e-06, "loss": 0.6105, "step": 21727 }, { "epoch": 0.6343756386674841, "grad_norm": 0.6462202335880596, "learning_rate": 8.125223033252231e-06, "loss": 0.588, "step": 21728 }, { "epoch": 0.6344048348953315, "grad_norm": 0.7073394727576103, "learning_rate": 8.124574209245743e-06, "loss": 0.6812, "step": 21729 }, { "epoch": 0.6344340311231789, "grad_norm": 0.7429295744180318, "learning_rate": 8.123925385239255e-06, "loss": 0.6451, "step": 21730 }, { "epoch": 0.6344632273510262, "grad_norm": 0.5887320549075847, "learning_rate": 8.123276561232765e-06, "loss": 0.5697, "step": 21731 }, { "epoch": 0.6344924235788736, "grad_norm": 0.6308525672373094, "learning_rate": 8.122627737226279e-06, "loss": 0.6084, "step": 21732 }, { "epoch": 0.634521619806721, "grad_norm": 0.6230220136742631, "learning_rate": 8.12197891321979e-06, "loss": 0.6043, "step": 21733 }, { "epoch": 0.6345508160345683, "grad_norm": 0.6597127865047681, "learning_rate": 8.121330089213301e-06, "loss": 0.6107, "step": 21734 }, { "epoch": 0.6345800122624157, "grad_norm": 0.6816340942043063, "learning_rate": 8.120681265206813e-06, "loss": 0.6522, "step": 21735 }, { "epoch": 0.634609208490263, "grad_norm": 0.6642346202715871, "learning_rate": 8.120032441200325e-06, "loss": 0.583, "step": 21736 }, { "epoch": 0.6346384047181104, "grad_norm": 0.6326493712002993, "learning_rate": 8.119383617193837e-06, "loss": 0.6191, "step": 21737 }, { "epoch": 0.6346676009459578, "grad_norm": 0.6610449169427719, "learning_rate": 8.118734793187348e-06, "loss": 0.6041, "step": 21738 }, { "epoch": 0.6346967971738051, "grad_norm": 0.5764605771069452, "learning_rate": 8.11808596918086e-06, "loss": 0.4676, "step": 21739 }, { "epoch": 0.6347259934016525, "grad_norm": 0.6100468259766665, "learning_rate": 8.117437145174372e-06, "loss": 0.5618, "step": 21740 }, { "epoch": 0.6347551896294998, "grad_norm": 0.6574388015002001, "learning_rate": 8.116788321167884e-06, "loss": 0.627, "step": 21741 }, { "epoch": 0.6347843858573472, "grad_norm": 0.6298388073431371, "learning_rate": 8.116139497161396e-06, "loss": 0.5849, "step": 21742 }, { "epoch": 0.6348135820851946, "grad_norm": 0.6138503349256729, "learning_rate": 8.115490673154908e-06, "loss": 0.5852, "step": 21743 }, { "epoch": 0.6348427783130419, "grad_norm": 0.6454689495182611, "learning_rate": 8.11484184914842e-06, "loss": 0.5678, "step": 21744 }, { "epoch": 0.6348719745408893, "grad_norm": 0.6407144448170949, "learning_rate": 8.11419302514193e-06, "loss": 0.5579, "step": 21745 }, { "epoch": 0.6349011707687366, "grad_norm": 0.6475175522954673, "learning_rate": 8.113544201135442e-06, "loss": 0.6494, "step": 21746 }, { "epoch": 0.634930366996584, "grad_norm": 0.6909337092844333, "learning_rate": 8.112895377128954e-06, "loss": 0.6403, "step": 21747 }, { "epoch": 0.6349595632244314, "grad_norm": 0.7164223392139659, "learning_rate": 8.112246553122466e-06, "loss": 0.6927, "step": 21748 }, { "epoch": 0.6349887594522787, "grad_norm": 0.6523801033739693, "learning_rate": 8.111597729115977e-06, "loss": 0.6215, "step": 21749 }, { "epoch": 0.6350179556801261, "grad_norm": 0.6841829652919019, "learning_rate": 8.110948905109489e-06, "loss": 0.6631, "step": 21750 }, { "epoch": 0.6350471519079735, "grad_norm": 0.6048800812855204, "learning_rate": 8.110300081103002e-06, "loss": 0.6126, "step": 21751 }, { "epoch": 0.6350763481358208, "grad_norm": 0.670231843060461, "learning_rate": 8.109651257096513e-06, "loss": 0.6135, "step": 21752 }, { "epoch": 0.6351055443636682, "grad_norm": 0.5881933377877506, "learning_rate": 8.109002433090025e-06, "loss": 0.5349, "step": 21753 }, { "epoch": 0.6351347405915155, "grad_norm": 0.6836322693083424, "learning_rate": 8.108353609083537e-06, "loss": 0.6081, "step": 21754 }, { "epoch": 0.6351639368193629, "grad_norm": 0.6257093264290651, "learning_rate": 8.107704785077049e-06, "loss": 0.5774, "step": 21755 }, { "epoch": 0.6351931330472103, "grad_norm": 0.6292635708643838, "learning_rate": 8.107055961070561e-06, "loss": 0.5594, "step": 21756 }, { "epoch": 0.6352223292750576, "grad_norm": 0.6481711550921082, "learning_rate": 8.106407137064071e-06, "loss": 0.583, "step": 21757 }, { "epoch": 0.635251525502905, "grad_norm": 0.6674272236458575, "learning_rate": 8.105758313057583e-06, "loss": 0.6632, "step": 21758 }, { "epoch": 0.6352807217307523, "grad_norm": 0.5905930417885579, "learning_rate": 8.105109489051095e-06, "loss": 0.5148, "step": 21759 }, { "epoch": 0.6353099179585997, "grad_norm": 0.6365917330569464, "learning_rate": 8.104460665044607e-06, "loss": 0.6197, "step": 21760 }, { "epoch": 0.6353391141864471, "grad_norm": 0.6660330142630025, "learning_rate": 8.10381184103812e-06, "loss": 0.5564, "step": 21761 }, { "epoch": 0.6353683104142944, "grad_norm": 0.6231429267749732, "learning_rate": 8.103163017031631e-06, "loss": 0.5711, "step": 21762 }, { "epoch": 0.6353975066421418, "grad_norm": 0.8063140706726016, "learning_rate": 8.102514193025143e-06, "loss": 0.638, "step": 21763 }, { "epoch": 0.6354267028699891, "grad_norm": 0.6581618945113267, "learning_rate": 8.101865369018654e-06, "loss": 0.5752, "step": 21764 }, { "epoch": 0.6354558990978365, "grad_norm": 0.6394052673803787, "learning_rate": 8.101216545012166e-06, "loss": 0.5682, "step": 21765 }, { "epoch": 0.6354850953256839, "grad_norm": 0.6508696929277559, "learning_rate": 8.100567721005678e-06, "loss": 0.6264, "step": 21766 }, { "epoch": 0.6355142915535312, "grad_norm": 0.6683170491793939, "learning_rate": 8.09991889699919e-06, "loss": 0.655, "step": 21767 }, { "epoch": 0.6355434877813786, "grad_norm": 0.6744987018602644, "learning_rate": 8.0992700729927e-06, "loss": 0.5956, "step": 21768 }, { "epoch": 0.6355726840092261, "grad_norm": 0.6587204476188797, "learning_rate": 8.098621248986212e-06, "loss": 0.6311, "step": 21769 }, { "epoch": 0.6356018802370734, "grad_norm": 0.6099809180843232, "learning_rate": 8.097972424979726e-06, "loss": 0.5373, "step": 21770 }, { "epoch": 0.6356310764649208, "grad_norm": 0.6246407230444654, "learning_rate": 8.097323600973236e-06, "loss": 0.5858, "step": 21771 }, { "epoch": 0.6356602726927681, "grad_norm": 0.6764678834689427, "learning_rate": 8.096674776966748e-06, "loss": 0.5413, "step": 21772 }, { "epoch": 0.6356894689206155, "grad_norm": 0.6658670299742848, "learning_rate": 8.09602595296026e-06, "loss": 0.6502, "step": 21773 }, { "epoch": 0.6357186651484629, "grad_norm": 0.6304311004521257, "learning_rate": 8.095377128953772e-06, "loss": 0.5699, "step": 21774 }, { "epoch": 0.6357478613763102, "grad_norm": 0.6099798205269553, "learning_rate": 8.094728304947284e-06, "loss": 0.5602, "step": 21775 }, { "epoch": 0.6357770576041576, "grad_norm": 0.6428942851011838, "learning_rate": 8.094079480940795e-06, "loss": 0.607, "step": 21776 }, { "epoch": 0.635806253832005, "grad_norm": 0.6324535867250095, "learning_rate": 8.093430656934307e-06, "loss": 0.6252, "step": 21777 }, { "epoch": 0.6358354500598523, "grad_norm": 0.6460100032467125, "learning_rate": 8.092781832927819e-06, "loss": 0.6226, "step": 21778 }, { "epoch": 0.6358646462876997, "grad_norm": 0.6461525977079238, "learning_rate": 8.09213300892133e-06, "loss": 0.5661, "step": 21779 }, { "epoch": 0.635893842515547, "grad_norm": 0.6243717365623837, "learning_rate": 8.091484184914843e-06, "loss": 0.5825, "step": 21780 }, { "epoch": 0.6359230387433944, "grad_norm": 0.6402733004795609, "learning_rate": 8.090835360908355e-06, "loss": 0.6065, "step": 21781 }, { "epoch": 0.6359522349712418, "grad_norm": 0.6238164613598474, "learning_rate": 8.090186536901867e-06, "loss": 0.6057, "step": 21782 }, { "epoch": 0.6359814311990891, "grad_norm": 0.6291547789583067, "learning_rate": 8.089537712895377e-06, "loss": 0.5854, "step": 21783 }, { "epoch": 0.6360106274269365, "grad_norm": 0.6691081550511989, "learning_rate": 8.08888888888889e-06, "loss": 0.6619, "step": 21784 }, { "epoch": 0.6360398236547838, "grad_norm": 0.6443232449638981, "learning_rate": 8.088240064882401e-06, "loss": 0.5961, "step": 21785 }, { "epoch": 0.6360690198826312, "grad_norm": 0.6311837503850832, "learning_rate": 8.087591240875913e-06, "loss": 0.554, "step": 21786 }, { "epoch": 0.6360982161104786, "grad_norm": 0.6804322013038472, "learning_rate": 8.086942416869424e-06, "loss": 0.6518, "step": 21787 }, { "epoch": 0.6361274123383259, "grad_norm": 0.6471013247129453, "learning_rate": 8.086293592862937e-06, "loss": 0.6451, "step": 21788 }, { "epoch": 0.6361566085661733, "grad_norm": 0.6001088599540306, "learning_rate": 8.08564476885645e-06, "loss": 0.5333, "step": 21789 }, { "epoch": 0.6361858047940206, "grad_norm": 0.5983644881838889, "learning_rate": 8.08499594484996e-06, "loss": 0.5645, "step": 21790 }, { "epoch": 0.636215001021868, "grad_norm": 0.5929117840996088, "learning_rate": 8.084347120843472e-06, "loss": 0.5281, "step": 21791 }, { "epoch": 0.6362441972497154, "grad_norm": 0.6489844510759406, "learning_rate": 8.083698296836984e-06, "loss": 0.6695, "step": 21792 }, { "epoch": 0.6362733934775627, "grad_norm": 0.6310143840276804, "learning_rate": 8.083049472830496e-06, "loss": 0.6016, "step": 21793 }, { "epoch": 0.6363025897054101, "grad_norm": 0.6219822961546728, "learning_rate": 8.082400648824008e-06, "loss": 0.5715, "step": 21794 }, { "epoch": 0.6363317859332575, "grad_norm": 0.6777086477816754, "learning_rate": 8.081751824817518e-06, "loss": 0.6816, "step": 21795 }, { "epoch": 0.6363609821611048, "grad_norm": 0.5851160004998424, "learning_rate": 8.08110300081103e-06, "loss": 0.5317, "step": 21796 }, { "epoch": 0.6363901783889522, "grad_norm": 0.6332069233899111, "learning_rate": 8.080454176804542e-06, "loss": 0.5843, "step": 21797 }, { "epoch": 0.6364193746167995, "grad_norm": 0.6701881322622839, "learning_rate": 8.079805352798054e-06, "loss": 0.6844, "step": 21798 }, { "epoch": 0.6364485708446469, "grad_norm": 0.6652829606616939, "learning_rate": 8.079156528791566e-06, "loss": 0.6583, "step": 21799 }, { "epoch": 0.6364777670724943, "grad_norm": 0.6259750045151539, "learning_rate": 8.078507704785078e-06, "loss": 0.5702, "step": 21800 }, { "epoch": 0.6365069633003416, "grad_norm": 0.651112188775757, "learning_rate": 8.07785888077859e-06, "loss": 0.6305, "step": 21801 }, { "epoch": 0.636536159528189, "grad_norm": 0.6783169729516402, "learning_rate": 8.0772100567721e-06, "loss": 0.6577, "step": 21802 }, { "epoch": 0.6365653557560363, "grad_norm": 0.6835130943191648, "learning_rate": 8.076561232765613e-06, "loss": 0.6633, "step": 21803 }, { "epoch": 0.6365945519838837, "grad_norm": 0.7085300420292616, "learning_rate": 8.075912408759125e-06, "loss": 0.6935, "step": 21804 }, { "epoch": 0.6366237482117311, "grad_norm": 0.6522841253080879, "learning_rate": 8.075263584752637e-06, "loss": 0.5558, "step": 21805 }, { "epoch": 0.6366529444395784, "grad_norm": 0.6487623551100006, "learning_rate": 8.074614760746147e-06, "loss": 0.6238, "step": 21806 }, { "epoch": 0.6366821406674258, "grad_norm": 0.7112520274235844, "learning_rate": 8.07396593673966e-06, "loss": 0.6951, "step": 21807 }, { "epoch": 0.6367113368952732, "grad_norm": 0.6788033132640434, "learning_rate": 8.073317112733173e-06, "loss": 0.6196, "step": 21808 }, { "epoch": 0.6367405331231205, "grad_norm": 0.6602925238078734, "learning_rate": 8.072668288726683e-06, "loss": 0.6385, "step": 21809 }, { "epoch": 0.6367697293509679, "grad_norm": 0.63289467267141, "learning_rate": 8.072019464720195e-06, "loss": 0.5869, "step": 21810 }, { "epoch": 0.6367989255788152, "grad_norm": 0.6504760473608237, "learning_rate": 8.071370640713707e-06, "loss": 0.6645, "step": 21811 }, { "epoch": 0.6368281218066626, "grad_norm": 0.617145564894848, "learning_rate": 8.07072181670722e-06, "loss": 0.5391, "step": 21812 }, { "epoch": 0.63685731803451, "grad_norm": 0.6284227691736968, "learning_rate": 8.070072992700731e-06, "loss": 0.5508, "step": 21813 }, { "epoch": 0.6368865142623573, "grad_norm": 0.6377939803072986, "learning_rate": 8.069424168694242e-06, "loss": 0.6483, "step": 21814 }, { "epoch": 0.6369157104902047, "grad_norm": 0.6186858193027892, "learning_rate": 8.068775344687754e-06, "loss": 0.613, "step": 21815 }, { "epoch": 0.636944906718052, "grad_norm": 0.6789672807141368, "learning_rate": 8.068126520681266e-06, "loss": 0.6458, "step": 21816 }, { "epoch": 0.6369741029458994, "grad_norm": 0.6291338695300996, "learning_rate": 8.067477696674778e-06, "loss": 0.5459, "step": 21817 }, { "epoch": 0.6370032991737468, "grad_norm": 0.606213657828733, "learning_rate": 8.06682887266829e-06, "loss": 0.5803, "step": 21818 }, { "epoch": 0.6370324954015941, "grad_norm": 0.5711952731966372, "learning_rate": 8.066180048661802e-06, "loss": 0.5117, "step": 21819 }, { "epoch": 0.6370616916294415, "grad_norm": 0.6201415126784416, "learning_rate": 8.065531224655314e-06, "loss": 0.5989, "step": 21820 }, { "epoch": 0.6370908878572888, "grad_norm": 0.6722758803316158, "learning_rate": 8.064882400648824e-06, "loss": 0.663, "step": 21821 }, { "epoch": 0.6371200840851362, "grad_norm": 0.6060575284630842, "learning_rate": 8.064233576642336e-06, "loss": 0.5706, "step": 21822 }, { "epoch": 0.6371492803129836, "grad_norm": 0.674973020936507, "learning_rate": 8.063584752635848e-06, "loss": 0.6602, "step": 21823 }, { "epoch": 0.6371784765408309, "grad_norm": 0.6252632740059695, "learning_rate": 8.06293592862936e-06, "loss": 0.587, "step": 21824 }, { "epoch": 0.6372076727686783, "grad_norm": 0.5981011933706821, "learning_rate": 8.06228710462287e-06, "loss": 0.5072, "step": 21825 }, { "epoch": 0.6372368689965257, "grad_norm": 0.6210207836127644, "learning_rate": 8.061638280616384e-06, "loss": 0.5669, "step": 21826 }, { "epoch": 0.637266065224373, "grad_norm": 0.618182124513789, "learning_rate": 8.060989456609896e-06, "loss": 0.5602, "step": 21827 }, { "epoch": 0.6372952614522204, "grad_norm": 0.6955042326496026, "learning_rate": 8.060340632603407e-06, "loss": 0.6119, "step": 21828 }, { "epoch": 0.6373244576800677, "grad_norm": 0.6425515375459334, "learning_rate": 8.059691808596919e-06, "loss": 0.6303, "step": 21829 }, { "epoch": 0.6373536539079151, "grad_norm": 0.6219469313430984, "learning_rate": 8.05904298459043e-06, "loss": 0.6227, "step": 21830 }, { "epoch": 0.6373828501357625, "grad_norm": 0.6205057120999138, "learning_rate": 8.058394160583943e-06, "loss": 0.5673, "step": 21831 }, { "epoch": 0.6374120463636098, "grad_norm": 0.6133388229991616, "learning_rate": 8.057745336577455e-06, "loss": 0.557, "step": 21832 }, { "epoch": 0.6374412425914572, "grad_norm": 0.624329690439207, "learning_rate": 8.057096512570965e-06, "loss": 0.5708, "step": 21833 }, { "epoch": 0.6374704388193045, "grad_norm": 0.6131817664046146, "learning_rate": 8.056447688564477e-06, "loss": 0.5589, "step": 21834 }, { "epoch": 0.6374996350471519, "grad_norm": 0.6232337409237126, "learning_rate": 8.055798864557989e-06, "loss": 0.5386, "step": 21835 }, { "epoch": 0.6375288312749993, "grad_norm": 0.7189865074879812, "learning_rate": 8.055150040551501e-06, "loss": 0.6452, "step": 21836 }, { "epoch": 0.6375580275028466, "grad_norm": 0.6840909276606545, "learning_rate": 8.054501216545013e-06, "loss": 0.6484, "step": 21837 }, { "epoch": 0.637587223730694, "grad_norm": 0.6634420349269171, "learning_rate": 8.053852392538525e-06, "loss": 0.6458, "step": 21838 }, { "epoch": 0.6376164199585413, "grad_norm": 0.7165254553567828, "learning_rate": 8.053203568532037e-06, "loss": 0.6604, "step": 21839 }, { "epoch": 0.6376456161863887, "grad_norm": 0.6461797263075363, "learning_rate": 8.052554744525547e-06, "loss": 0.6041, "step": 21840 }, { "epoch": 0.6376748124142361, "grad_norm": 0.6599209296296608, "learning_rate": 8.05190592051906e-06, "loss": 0.6112, "step": 21841 }, { "epoch": 0.6377040086420834, "grad_norm": 0.6172087007124767, "learning_rate": 8.051257096512572e-06, "loss": 0.5791, "step": 21842 }, { "epoch": 0.6377332048699308, "grad_norm": 0.6395836291794359, "learning_rate": 8.050608272506084e-06, "loss": 0.6476, "step": 21843 }, { "epoch": 0.6377624010977782, "grad_norm": 0.6103493470209386, "learning_rate": 8.049959448499594e-06, "loss": 0.5492, "step": 21844 }, { "epoch": 0.6377915973256255, "grad_norm": 0.6562445697976559, "learning_rate": 8.049310624493108e-06, "loss": 0.5678, "step": 21845 }, { "epoch": 0.6378207935534729, "grad_norm": 0.6427944194637422, "learning_rate": 8.04866180048662e-06, "loss": 0.6314, "step": 21846 }, { "epoch": 0.6378499897813202, "grad_norm": 0.6000114176963569, "learning_rate": 8.04801297648013e-06, "loss": 0.5372, "step": 21847 }, { "epoch": 0.6378791860091676, "grad_norm": 0.6680217670768925, "learning_rate": 8.047364152473642e-06, "loss": 0.63, "step": 21848 }, { "epoch": 0.637908382237015, "grad_norm": 0.6644057230201658, "learning_rate": 8.046715328467154e-06, "loss": 0.6631, "step": 21849 }, { "epoch": 0.6379375784648623, "grad_norm": 0.6774895879841039, "learning_rate": 8.046066504460666e-06, "loss": 0.5872, "step": 21850 }, { "epoch": 0.6379667746927097, "grad_norm": 0.669151344662235, "learning_rate": 8.045417680454178e-06, "loss": 0.6444, "step": 21851 }, { "epoch": 0.637995970920557, "grad_norm": 0.6122311259830696, "learning_rate": 8.044768856447688e-06, "loss": 0.5389, "step": 21852 }, { "epoch": 0.6380251671484044, "grad_norm": 0.6346277708752621, "learning_rate": 8.0441200324412e-06, "loss": 0.5768, "step": 21853 }, { "epoch": 0.6380543633762518, "grad_norm": 0.5889894835375499, "learning_rate": 8.043471208434712e-06, "loss": 0.5162, "step": 21854 }, { "epoch": 0.6380835596040991, "grad_norm": 0.6176763629169287, "learning_rate": 8.042822384428224e-06, "loss": 0.5617, "step": 21855 }, { "epoch": 0.6381127558319465, "grad_norm": 0.6524101459532592, "learning_rate": 8.042173560421737e-06, "loss": 0.6198, "step": 21856 }, { "epoch": 0.6381419520597938, "grad_norm": 0.6076743934960921, "learning_rate": 8.041524736415249e-06, "loss": 0.5637, "step": 21857 }, { "epoch": 0.6381711482876412, "grad_norm": 0.6816628328011969, "learning_rate": 8.04087591240876e-06, "loss": 0.6216, "step": 21858 }, { "epoch": 0.6382003445154886, "grad_norm": 0.6341623035266715, "learning_rate": 8.040227088402271e-06, "loss": 0.6017, "step": 21859 }, { "epoch": 0.6382295407433359, "grad_norm": 0.608162275478218, "learning_rate": 8.039578264395783e-06, "loss": 0.556, "step": 21860 }, { "epoch": 0.6382587369711833, "grad_norm": 0.6327539889985226, "learning_rate": 8.038929440389295e-06, "loss": 0.5886, "step": 21861 }, { "epoch": 0.6382879331990307, "grad_norm": 0.6719685319954559, "learning_rate": 8.038280616382807e-06, "loss": 0.6711, "step": 21862 }, { "epoch": 0.638317129426878, "grad_norm": 0.6070147023150382, "learning_rate": 8.037631792376317e-06, "loss": 0.559, "step": 21863 }, { "epoch": 0.6383463256547254, "grad_norm": 0.6346553696916154, "learning_rate": 8.036982968369831e-06, "loss": 0.5898, "step": 21864 }, { "epoch": 0.6383755218825727, "grad_norm": 0.6351237126911126, "learning_rate": 8.036334144363343e-06, "loss": 0.5992, "step": 21865 }, { "epoch": 0.6384047181104201, "grad_norm": 0.6014070207457025, "learning_rate": 8.035685320356853e-06, "loss": 0.5408, "step": 21866 }, { "epoch": 0.6384339143382675, "grad_norm": 0.6647069004904608, "learning_rate": 8.035036496350365e-06, "loss": 0.618, "step": 21867 }, { "epoch": 0.6384631105661148, "grad_norm": 0.6535517727505866, "learning_rate": 8.034387672343877e-06, "loss": 0.6245, "step": 21868 }, { "epoch": 0.6384923067939622, "grad_norm": 0.6556729739800978, "learning_rate": 8.03373884833739e-06, "loss": 0.633, "step": 21869 }, { "epoch": 0.6385215030218095, "grad_norm": 0.6591745453218133, "learning_rate": 8.033090024330901e-06, "loss": 0.594, "step": 21870 }, { "epoch": 0.6385506992496569, "grad_norm": 0.6809756569508182, "learning_rate": 8.032441200324412e-06, "loss": 0.5788, "step": 21871 }, { "epoch": 0.6385798954775043, "grad_norm": 0.6183144004564214, "learning_rate": 8.031792376317924e-06, "loss": 0.553, "step": 21872 }, { "epoch": 0.6386090917053516, "grad_norm": 0.6190465694071857, "learning_rate": 8.031143552311436e-06, "loss": 0.568, "step": 21873 }, { "epoch": 0.638638287933199, "grad_norm": 0.6551800751604167, "learning_rate": 8.030494728304948e-06, "loss": 0.6304, "step": 21874 }, { "epoch": 0.6386674841610464, "grad_norm": 0.6491058081839279, "learning_rate": 8.02984590429846e-06, "loss": 0.6081, "step": 21875 }, { "epoch": 0.6386966803888937, "grad_norm": 0.6903318677202003, "learning_rate": 8.029197080291972e-06, "loss": 0.6858, "step": 21876 }, { "epoch": 0.6387258766167411, "grad_norm": 0.6914938015286893, "learning_rate": 8.028548256285484e-06, "loss": 0.6701, "step": 21877 }, { "epoch": 0.6387550728445884, "grad_norm": 0.6927327322502417, "learning_rate": 8.027899432278994e-06, "loss": 0.703, "step": 21878 }, { "epoch": 0.6387842690724358, "grad_norm": 0.6259457261355622, "learning_rate": 8.027250608272506e-06, "loss": 0.5601, "step": 21879 }, { "epoch": 0.6388134653002832, "grad_norm": 0.6432963900776615, "learning_rate": 8.026601784266018e-06, "loss": 0.6028, "step": 21880 }, { "epoch": 0.6388426615281305, "grad_norm": 0.6938086870329808, "learning_rate": 8.02595296025953e-06, "loss": 0.718, "step": 21881 }, { "epoch": 0.6388718577559779, "grad_norm": 0.5967690401200496, "learning_rate": 8.02530413625304e-06, "loss": 0.5054, "step": 21882 }, { "epoch": 0.6389010539838252, "grad_norm": 0.6454318231789518, "learning_rate": 8.024655312246554e-06, "loss": 0.591, "step": 21883 }, { "epoch": 0.6389302502116726, "grad_norm": 0.870978381872921, "learning_rate": 8.024006488240066e-06, "loss": 0.7202, "step": 21884 }, { "epoch": 0.63895944643952, "grad_norm": 0.645291809832626, "learning_rate": 8.023357664233577e-06, "loss": 0.6069, "step": 21885 }, { "epoch": 0.6389886426673673, "grad_norm": 0.6675507795134722, "learning_rate": 8.022708840227089e-06, "loss": 0.6176, "step": 21886 }, { "epoch": 0.6390178388952147, "grad_norm": 0.6318994731861776, "learning_rate": 8.022060016220601e-06, "loss": 0.5881, "step": 21887 }, { "epoch": 0.639047035123062, "grad_norm": 0.639759861787559, "learning_rate": 8.021411192214113e-06, "loss": 0.6498, "step": 21888 }, { "epoch": 0.6390762313509094, "grad_norm": 0.6695330419240891, "learning_rate": 8.020762368207625e-06, "loss": 0.6628, "step": 21889 }, { "epoch": 0.6391054275787569, "grad_norm": 0.5857584071803862, "learning_rate": 8.020113544201135e-06, "loss": 0.522, "step": 21890 }, { "epoch": 0.6391346238066042, "grad_norm": 0.6521768427804292, "learning_rate": 8.019464720194647e-06, "loss": 0.5875, "step": 21891 }, { "epoch": 0.6391638200344516, "grad_norm": 0.6568672345945876, "learning_rate": 8.01881589618816e-06, "loss": 0.6501, "step": 21892 }, { "epoch": 0.639193016262299, "grad_norm": 0.6441240951185702, "learning_rate": 8.018167072181671e-06, "loss": 0.6006, "step": 21893 }, { "epoch": 0.6392222124901463, "grad_norm": 0.613807544871535, "learning_rate": 8.017518248175183e-06, "loss": 0.5721, "step": 21894 }, { "epoch": 0.6392514087179937, "grad_norm": 0.6091521347946058, "learning_rate": 8.016869424168695e-06, "loss": 0.5494, "step": 21895 }, { "epoch": 0.639280604945841, "grad_norm": 0.632883515383808, "learning_rate": 8.016220600162207e-06, "loss": 0.5423, "step": 21896 }, { "epoch": 0.6393098011736884, "grad_norm": 0.610178700156163, "learning_rate": 8.015571776155718e-06, "loss": 0.5589, "step": 21897 }, { "epoch": 0.6393389974015358, "grad_norm": 0.5740785924318308, "learning_rate": 8.01492295214923e-06, "loss": 0.5092, "step": 21898 }, { "epoch": 0.6393681936293831, "grad_norm": 0.6355192761972343, "learning_rate": 8.014274128142742e-06, "loss": 0.5738, "step": 21899 }, { "epoch": 0.6393973898572305, "grad_norm": 0.5835893356324076, "learning_rate": 8.013625304136254e-06, "loss": 0.5227, "step": 21900 }, { "epoch": 0.6394265860850779, "grad_norm": 0.6251075784725685, "learning_rate": 8.012976480129764e-06, "loss": 0.5654, "step": 21901 }, { "epoch": 0.6394557823129252, "grad_norm": 0.6442203910812224, "learning_rate": 8.012327656123278e-06, "loss": 0.6351, "step": 21902 }, { "epoch": 0.6394849785407726, "grad_norm": 0.6326531004522525, "learning_rate": 8.01167883211679e-06, "loss": 0.5742, "step": 21903 }, { "epoch": 0.6395141747686199, "grad_norm": 0.679070254526134, "learning_rate": 8.0110300081103e-06, "loss": 0.7211, "step": 21904 }, { "epoch": 0.6395433709964673, "grad_norm": 0.6143591427649603, "learning_rate": 8.010381184103812e-06, "loss": 0.5086, "step": 21905 }, { "epoch": 0.6395725672243147, "grad_norm": 0.6063953452118912, "learning_rate": 8.009732360097324e-06, "loss": 0.5944, "step": 21906 }, { "epoch": 0.639601763452162, "grad_norm": 0.7460793912228442, "learning_rate": 8.009083536090836e-06, "loss": 0.6731, "step": 21907 }, { "epoch": 0.6396309596800094, "grad_norm": 0.6546655506026077, "learning_rate": 8.008434712084348e-06, "loss": 0.6324, "step": 21908 }, { "epoch": 0.6396601559078567, "grad_norm": 0.64195407366086, "learning_rate": 8.007785888077859e-06, "loss": 0.5919, "step": 21909 }, { "epoch": 0.6396893521357041, "grad_norm": 0.6856301743978027, "learning_rate": 8.00713706407137e-06, "loss": 0.6538, "step": 21910 }, { "epoch": 0.6397185483635515, "grad_norm": 0.669290300047822, "learning_rate": 8.006488240064883e-06, "loss": 0.5981, "step": 21911 }, { "epoch": 0.6397477445913988, "grad_norm": 0.6063579481521862, "learning_rate": 8.005839416058395e-06, "loss": 0.5283, "step": 21912 }, { "epoch": 0.6397769408192462, "grad_norm": 0.5965008830361523, "learning_rate": 8.005190592051907e-06, "loss": 0.5873, "step": 21913 }, { "epoch": 0.6398061370470935, "grad_norm": 0.6295668864977899, "learning_rate": 8.004541768045419e-06, "loss": 0.5745, "step": 21914 }, { "epoch": 0.6398353332749409, "grad_norm": 0.6904639008073649, "learning_rate": 8.003892944038931e-06, "loss": 0.6737, "step": 21915 }, { "epoch": 0.6398645295027883, "grad_norm": 0.6124974469638169, "learning_rate": 8.003244120032441e-06, "loss": 0.5644, "step": 21916 }, { "epoch": 0.6398937257306356, "grad_norm": 0.8380035066796949, "learning_rate": 8.002595296025953e-06, "loss": 0.6787, "step": 21917 }, { "epoch": 0.639922921958483, "grad_norm": 0.6748474598117027, "learning_rate": 8.001946472019465e-06, "loss": 0.7026, "step": 21918 }, { "epoch": 0.6399521181863304, "grad_norm": 0.6273887463304741, "learning_rate": 8.001297648012977e-06, "loss": 0.5939, "step": 21919 }, { "epoch": 0.6399813144141777, "grad_norm": 0.6249497382765931, "learning_rate": 8.000648824006488e-06, "loss": 0.5932, "step": 21920 }, { "epoch": 0.6400105106420251, "grad_norm": 0.6339003305934214, "learning_rate": 8.000000000000001e-06, "loss": 0.5279, "step": 21921 }, { "epoch": 0.6400397068698724, "grad_norm": 0.6564064527761024, "learning_rate": 7.999351175993513e-06, "loss": 0.5937, "step": 21922 }, { "epoch": 0.6400689030977198, "grad_norm": 0.6545832067715852, "learning_rate": 7.998702351987024e-06, "loss": 0.6111, "step": 21923 }, { "epoch": 0.6400980993255672, "grad_norm": 0.5877105743030093, "learning_rate": 7.998053527980536e-06, "loss": 0.5104, "step": 21924 }, { "epoch": 0.6401272955534145, "grad_norm": 0.6951804642007987, "learning_rate": 7.997404703974048e-06, "loss": 0.6574, "step": 21925 }, { "epoch": 0.6401564917812619, "grad_norm": 0.6676633409803491, "learning_rate": 7.99675587996756e-06, "loss": 0.658, "step": 21926 }, { "epoch": 0.6401856880091092, "grad_norm": 0.677039511463208, "learning_rate": 7.99610705596107e-06, "loss": 0.6639, "step": 21927 }, { "epoch": 0.6402148842369566, "grad_norm": 0.6673656077979335, "learning_rate": 7.995458231954582e-06, "loss": 0.6913, "step": 21928 }, { "epoch": 0.640244080464804, "grad_norm": 0.6746177673975932, "learning_rate": 7.994809407948094e-06, "loss": 0.6035, "step": 21929 }, { "epoch": 0.6402732766926513, "grad_norm": 0.6486157581084213, "learning_rate": 7.994160583941606e-06, "loss": 0.6294, "step": 21930 }, { "epoch": 0.6403024729204987, "grad_norm": 0.6008241008086517, "learning_rate": 7.993511759935118e-06, "loss": 0.5555, "step": 21931 }, { "epoch": 0.640331669148346, "grad_norm": 0.6761839274287036, "learning_rate": 7.99286293592863e-06, "loss": 0.6456, "step": 21932 }, { "epoch": 0.6403608653761934, "grad_norm": 0.6005712327859931, "learning_rate": 7.992214111922142e-06, "loss": 0.5267, "step": 21933 }, { "epoch": 0.6403900616040408, "grad_norm": 0.6856326071591179, "learning_rate": 7.991565287915654e-06, "loss": 0.6648, "step": 21934 }, { "epoch": 0.6404192578318881, "grad_norm": 0.6066361886982703, "learning_rate": 7.990916463909165e-06, "loss": 0.5808, "step": 21935 }, { "epoch": 0.6404484540597355, "grad_norm": 0.6385579437554595, "learning_rate": 7.990267639902677e-06, "loss": 0.6, "step": 21936 }, { "epoch": 0.6404776502875829, "grad_norm": 0.6932997385056191, "learning_rate": 7.989618815896189e-06, "loss": 0.7125, "step": 21937 }, { "epoch": 0.6405068465154302, "grad_norm": 0.6294070975074533, "learning_rate": 7.9889699918897e-06, "loss": 0.5785, "step": 21938 }, { "epoch": 0.6405360427432776, "grad_norm": 0.6822863423237753, "learning_rate": 7.988321167883213e-06, "loss": 0.6276, "step": 21939 }, { "epoch": 0.6405652389711249, "grad_norm": 0.6302611106755409, "learning_rate": 7.987672343876725e-06, "loss": 0.6088, "step": 21940 }, { "epoch": 0.6405944351989723, "grad_norm": 0.7076442440065267, "learning_rate": 7.987023519870237e-06, "loss": 0.7169, "step": 21941 }, { "epoch": 0.6406236314268197, "grad_norm": 0.6513121285865872, "learning_rate": 7.986374695863747e-06, "loss": 0.6554, "step": 21942 }, { "epoch": 0.640652827654667, "grad_norm": 0.6025054729907077, "learning_rate": 7.98572587185726e-06, "loss": 0.538, "step": 21943 }, { "epoch": 0.6406820238825144, "grad_norm": 0.6313812625821639, "learning_rate": 7.985077047850771e-06, "loss": 0.5748, "step": 21944 }, { "epoch": 0.6407112201103617, "grad_norm": 0.6356507189999552, "learning_rate": 7.984428223844283e-06, "loss": 0.5596, "step": 21945 }, { "epoch": 0.6407404163382091, "grad_norm": 0.6097676152833783, "learning_rate": 7.983779399837794e-06, "loss": 0.568, "step": 21946 }, { "epoch": 0.6407696125660565, "grad_norm": 0.6358633572921942, "learning_rate": 7.983130575831306e-06, "loss": 0.6339, "step": 21947 }, { "epoch": 0.6407988087939038, "grad_norm": 0.6180772619027288, "learning_rate": 7.982481751824818e-06, "loss": 0.5621, "step": 21948 }, { "epoch": 0.6408280050217512, "grad_norm": 0.8797713251676114, "learning_rate": 7.98183292781833e-06, "loss": 0.6022, "step": 21949 }, { "epoch": 0.6408572012495986, "grad_norm": 0.6667008529189163, "learning_rate": 7.981184103811842e-06, "loss": 0.6184, "step": 21950 }, { "epoch": 0.6408863974774459, "grad_norm": 0.6904360834379911, "learning_rate": 7.980535279805354e-06, "loss": 0.6404, "step": 21951 }, { "epoch": 0.6409155937052933, "grad_norm": 0.611839003609428, "learning_rate": 7.979886455798866e-06, "loss": 0.5519, "step": 21952 }, { "epoch": 0.6409447899331406, "grad_norm": 0.6658742675495296, "learning_rate": 7.979237631792378e-06, "loss": 0.6551, "step": 21953 }, { "epoch": 0.640973986160988, "grad_norm": 0.6594459513983347, "learning_rate": 7.978588807785888e-06, "loss": 0.6523, "step": 21954 }, { "epoch": 0.6410031823888354, "grad_norm": 0.6466836604562431, "learning_rate": 7.9779399837794e-06, "loss": 0.6312, "step": 21955 }, { "epoch": 0.6410323786166827, "grad_norm": 0.6972541996209585, "learning_rate": 7.977291159772912e-06, "loss": 0.7265, "step": 21956 }, { "epoch": 0.6410615748445301, "grad_norm": 0.6789712523039604, "learning_rate": 7.976642335766424e-06, "loss": 0.6794, "step": 21957 }, { "epoch": 0.6410907710723774, "grad_norm": 0.683413512839426, "learning_rate": 7.975993511759936e-06, "loss": 0.659, "step": 21958 }, { "epoch": 0.6411199673002248, "grad_norm": 0.6183744600348761, "learning_rate": 7.975344687753448e-06, "loss": 0.5289, "step": 21959 }, { "epoch": 0.6411491635280722, "grad_norm": 0.6451284725713202, "learning_rate": 7.97469586374696e-06, "loss": 0.6335, "step": 21960 }, { "epoch": 0.6411783597559195, "grad_norm": 0.5824590769128593, "learning_rate": 7.97404703974047e-06, "loss": 0.5159, "step": 21961 }, { "epoch": 0.6412075559837669, "grad_norm": 0.6540520011736949, "learning_rate": 7.973398215733983e-06, "loss": 0.6589, "step": 21962 }, { "epoch": 0.6412367522116142, "grad_norm": 0.6159892144638153, "learning_rate": 7.972749391727495e-06, "loss": 0.5753, "step": 21963 }, { "epoch": 0.6412659484394616, "grad_norm": 0.6410575772598816, "learning_rate": 7.972100567721007e-06, "loss": 0.5886, "step": 21964 }, { "epoch": 0.641295144667309, "grad_norm": 0.6402861862068173, "learning_rate": 7.971451743714517e-06, "loss": 0.6041, "step": 21965 }, { "epoch": 0.6413243408951563, "grad_norm": 0.648056456550422, "learning_rate": 7.970802919708029e-06, "loss": 0.6773, "step": 21966 }, { "epoch": 0.6413535371230037, "grad_norm": 0.6597847675658086, "learning_rate": 7.970154095701541e-06, "loss": 0.6137, "step": 21967 }, { "epoch": 0.641382733350851, "grad_norm": 0.6809203962853642, "learning_rate": 7.969505271695053e-06, "loss": 0.6208, "step": 21968 }, { "epoch": 0.6414119295786984, "grad_norm": 0.6237010492329523, "learning_rate": 7.968856447688565e-06, "loss": 0.5812, "step": 21969 }, { "epoch": 0.6414411258065458, "grad_norm": 0.6098903859840364, "learning_rate": 7.968207623682077e-06, "loss": 0.5616, "step": 21970 }, { "epoch": 0.6414703220343931, "grad_norm": 0.6776601067744983, "learning_rate": 7.967558799675589e-06, "loss": 0.6852, "step": 21971 }, { "epoch": 0.6414995182622405, "grad_norm": 0.6480336642109769, "learning_rate": 7.966909975669101e-06, "loss": 0.6094, "step": 21972 }, { "epoch": 0.6415287144900879, "grad_norm": 0.6385120244261054, "learning_rate": 7.966261151662611e-06, "loss": 0.5583, "step": 21973 }, { "epoch": 0.6415579107179352, "grad_norm": 0.6101946047191684, "learning_rate": 7.965612327656124e-06, "loss": 0.5785, "step": 21974 }, { "epoch": 0.6415871069457826, "grad_norm": 0.5940018891504262, "learning_rate": 7.964963503649636e-06, "loss": 0.5248, "step": 21975 }, { "epoch": 0.6416163031736299, "grad_norm": 0.6194862109577168, "learning_rate": 7.964314679643148e-06, "loss": 0.5743, "step": 21976 }, { "epoch": 0.6416454994014773, "grad_norm": 0.6760539505625569, "learning_rate": 7.96366585563666e-06, "loss": 0.6371, "step": 21977 }, { "epoch": 0.6416746956293247, "grad_norm": 0.5751435761800301, "learning_rate": 7.963017031630172e-06, "loss": 0.5152, "step": 21978 }, { "epoch": 0.641703891857172, "grad_norm": 0.6509345689746965, "learning_rate": 7.962368207623684e-06, "loss": 0.6322, "step": 21979 }, { "epoch": 0.6417330880850194, "grad_norm": 0.7223194695606001, "learning_rate": 7.961719383617194e-06, "loss": 0.6275, "step": 21980 }, { "epoch": 0.6417622843128667, "grad_norm": 0.6426443861118876, "learning_rate": 7.961070559610706e-06, "loss": 0.5951, "step": 21981 }, { "epoch": 0.6417914805407141, "grad_norm": 0.6568935735310475, "learning_rate": 7.960421735604218e-06, "loss": 0.6337, "step": 21982 }, { "epoch": 0.6418206767685615, "grad_norm": 0.6238172987715592, "learning_rate": 7.95977291159773e-06, "loss": 0.5884, "step": 21983 }, { "epoch": 0.6418498729964088, "grad_norm": 0.684210134746161, "learning_rate": 7.95912408759124e-06, "loss": 0.6412, "step": 21984 }, { "epoch": 0.6418790692242562, "grad_norm": 0.6409112482036847, "learning_rate": 7.958475263584752e-06, "loss": 0.6279, "step": 21985 }, { "epoch": 0.6419082654521036, "grad_norm": 0.639280619712136, "learning_rate": 7.957826439578264e-06, "loss": 0.6445, "step": 21986 }, { "epoch": 0.6419374616799509, "grad_norm": 0.6467215557536171, "learning_rate": 7.957177615571776e-06, "loss": 0.6141, "step": 21987 }, { "epoch": 0.6419666579077983, "grad_norm": 0.6849902759364679, "learning_rate": 7.956528791565289e-06, "loss": 0.5989, "step": 21988 }, { "epoch": 0.6419958541356456, "grad_norm": 0.6352961973528483, "learning_rate": 7.9558799675588e-06, "loss": 0.623, "step": 21989 }, { "epoch": 0.642025050363493, "grad_norm": 0.6316800530715324, "learning_rate": 7.955231143552313e-06, "loss": 0.5726, "step": 21990 }, { "epoch": 0.6420542465913404, "grad_norm": 0.6615010360649026, "learning_rate": 7.954582319545825e-06, "loss": 0.585, "step": 21991 }, { "epoch": 0.6420834428191877, "grad_norm": 0.6783724914943987, "learning_rate": 7.953933495539335e-06, "loss": 0.6839, "step": 21992 }, { "epoch": 0.6421126390470351, "grad_norm": 0.5995607603224367, "learning_rate": 7.953284671532847e-06, "loss": 0.5256, "step": 21993 }, { "epoch": 0.6421418352748824, "grad_norm": 0.6469476327990831, "learning_rate": 7.952635847526359e-06, "loss": 0.6403, "step": 21994 }, { "epoch": 0.6421710315027298, "grad_norm": 0.6614506067370562, "learning_rate": 7.951987023519871e-06, "loss": 0.6007, "step": 21995 }, { "epoch": 0.6422002277305772, "grad_norm": 0.6131211348873987, "learning_rate": 7.951338199513383e-06, "loss": 0.5855, "step": 21996 }, { "epoch": 0.6422294239584245, "grad_norm": 0.6141756650848393, "learning_rate": 7.950689375506895e-06, "loss": 0.5654, "step": 21997 }, { "epoch": 0.6422586201862719, "grad_norm": 0.6046067105197725, "learning_rate": 7.950040551500407e-06, "loss": 0.5675, "step": 21998 }, { "epoch": 0.6422878164141193, "grad_norm": 0.6527722127077391, "learning_rate": 7.949391727493917e-06, "loss": 0.6383, "step": 21999 }, { "epoch": 0.6423170126419666, "grad_norm": 0.602849214346396, "learning_rate": 7.94874290348743e-06, "loss": 0.5134, "step": 22000 }, { "epoch": 0.642346208869814, "grad_norm": 0.6541858437612943, "learning_rate": 7.948094079480941e-06, "loss": 0.5929, "step": 22001 }, { "epoch": 0.6423754050976613, "grad_norm": 0.6178371373690307, "learning_rate": 7.947445255474454e-06, "loss": 0.5954, "step": 22002 }, { "epoch": 0.6424046013255087, "grad_norm": 0.6232929636458052, "learning_rate": 7.946796431467964e-06, "loss": 0.5926, "step": 22003 }, { "epoch": 0.6424337975533561, "grad_norm": 0.6187655587699712, "learning_rate": 7.946147607461476e-06, "loss": 0.5797, "step": 22004 }, { "epoch": 0.6424629937812034, "grad_norm": 0.6419517446766217, "learning_rate": 7.94549878345499e-06, "loss": 0.6272, "step": 22005 }, { "epoch": 0.6424921900090508, "grad_norm": 0.6823347858615847, "learning_rate": 7.9448499594485e-06, "loss": 0.635, "step": 22006 }, { "epoch": 0.6425213862368981, "grad_norm": 0.6244704335771694, "learning_rate": 7.944201135442012e-06, "loss": 0.5916, "step": 22007 }, { "epoch": 0.6425505824647455, "grad_norm": 0.5843061821403362, "learning_rate": 7.943552311435524e-06, "loss": 0.5139, "step": 22008 }, { "epoch": 0.6425797786925929, "grad_norm": 0.6594981994408522, "learning_rate": 7.942903487429036e-06, "loss": 0.652, "step": 22009 }, { "epoch": 0.6426089749204403, "grad_norm": 0.6531112109246149, "learning_rate": 7.942254663422548e-06, "loss": 0.6308, "step": 22010 }, { "epoch": 0.6426381711482877, "grad_norm": 0.6574478840517205, "learning_rate": 7.941605839416058e-06, "loss": 0.6221, "step": 22011 }, { "epoch": 0.6426673673761351, "grad_norm": 0.6324682081406751, "learning_rate": 7.94095701540957e-06, "loss": 0.5878, "step": 22012 }, { "epoch": 0.6426965636039824, "grad_norm": 0.6645406769491125, "learning_rate": 7.940308191403082e-06, "loss": 0.6442, "step": 22013 }, { "epoch": 0.6427257598318298, "grad_norm": 0.639325984562909, "learning_rate": 7.939659367396594e-06, "loss": 0.5442, "step": 22014 }, { "epoch": 0.6427549560596771, "grad_norm": 0.5828754305887947, "learning_rate": 7.939010543390106e-06, "loss": 0.5128, "step": 22015 }, { "epoch": 0.6427841522875245, "grad_norm": 0.645288611697911, "learning_rate": 7.938361719383619e-06, "loss": 0.5517, "step": 22016 }, { "epoch": 0.6428133485153719, "grad_norm": 0.6346212128405517, "learning_rate": 7.93771289537713e-06, "loss": 0.5739, "step": 22017 }, { "epoch": 0.6428425447432192, "grad_norm": 0.6644021504871912, "learning_rate": 7.937064071370641e-06, "loss": 0.6024, "step": 22018 }, { "epoch": 0.6428717409710666, "grad_norm": 0.6939922139710716, "learning_rate": 7.936415247364153e-06, "loss": 0.7156, "step": 22019 }, { "epoch": 0.642900937198914, "grad_norm": 0.6261082919097891, "learning_rate": 7.935766423357665e-06, "loss": 0.6037, "step": 22020 }, { "epoch": 0.6429301334267613, "grad_norm": 0.5902871089357882, "learning_rate": 7.935117599351177e-06, "loss": 0.5392, "step": 22021 }, { "epoch": 0.6429593296546087, "grad_norm": 0.6448684466346788, "learning_rate": 7.934468775344687e-06, "loss": 0.6248, "step": 22022 }, { "epoch": 0.642988525882456, "grad_norm": 0.6706872531019749, "learning_rate": 7.9338199513382e-06, "loss": 0.622, "step": 22023 }, { "epoch": 0.6430177221103034, "grad_norm": 0.6090540637290454, "learning_rate": 7.933171127331713e-06, "loss": 0.5714, "step": 22024 }, { "epoch": 0.6430469183381508, "grad_norm": 0.6622422988848202, "learning_rate": 7.932522303325223e-06, "loss": 0.5904, "step": 22025 }, { "epoch": 0.6430761145659981, "grad_norm": 0.6526158950155551, "learning_rate": 7.931873479318735e-06, "loss": 0.6449, "step": 22026 }, { "epoch": 0.6431053107938455, "grad_norm": 0.6701555654288501, "learning_rate": 7.931224655312247e-06, "loss": 0.6264, "step": 22027 }, { "epoch": 0.6431345070216928, "grad_norm": 0.6724025496682025, "learning_rate": 7.93057583130576e-06, "loss": 0.6243, "step": 22028 }, { "epoch": 0.6431637032495402, "grad_norm": 0.6346696646889218, "learning_rate": 7.929927007299271e-06, "loss": 0.5959, "step": 22029 }, { "epoch": 0.6431928994773876, "grad_norm": 0.678366682006271, "learning_rate": 7.929278183292782e-06, "loss": 0.6497, "step": 22030 }, { "epoch": 0.6432220957052349, "grad_norm": 0.634627948767292, "learning_rate": 7.928629359286294e-06, "loss": 0.6081, "step": 22031 }, { "epoch": 0.6432512919330823, "grad_norm": 0.6585130085422986, "learning_rate": 7.927980535279806e-06, "loss": 0.6225, "step": 22032 }, { "epoch": 0.6432804881609296, "grad_norm": 0.715772685665972, "learning_rate": 7.927331711273318e-06, "loss": 0.7217, "step": 22033 }, { "epoch": 0.643309684388777, "grad_norm": 0.6162803204094853, "learning_rate": 7.92668288726683e-06, "loss": 0.592, "step": 22034 }, { "epoch": 0.6433388806166244, "grad_norm": 0.6359480527367899, "learning_rate": 7.926034063260342e-06, "loss": 0.6188, "step": 22035 }, { "epoch": 0.6433680768444717, "grad_norm": 0.6352576365164642, "learning_rate": 7.925385239253854e-06, "loss": 0.6069, "step": 22036 }, { "epoch": 0.6433972730723191, "grad_norm": 0.6467188699077151, "learning_rate": 7.924736415247364e-06, "loss": 0.6343, "step": 22037 }, { "epoch": 0.6434264693001664, "grad_norm": 0.6177205455276982, "learning_rate": 7.924087591240876e-06, "loss": 0.5804, "step": 22038 }, { "epoch": 0.6434556655280138, "grad_norm": 0.60970421761962, "learning_rate": 7.923438767234388e-06, "loss": 0.551, "step": 22039 }, { "epoch": 0.6434848617558612, "grad_norm": 0.7084665174325786, "learning_rate": 7.9227899432279e-06, "loss": 0.6947, "step": 22040 }, { "epoch": 0.6435140579837085, "grad_norm": 0.5884911128561717, "learning_rate": 7.92214111922141e-06, "loss": 0.5463, "step": 22041 }, { "epoch": 0.6435432542115559, "grad_norm": 0.5925405614036091, "learning_rate": 7.921492295214923e-06, "loss": 0.5098, "step": 22042 }, { "epoch": 0.6435724504394033, "grad_norm": 0.6331917035688182, "learning_rate": 7.920843471208436e-06, "loss": 0.5242, "step": 22043 }, { "epoch": 0.6436016466672506, "grad_norm": 0.691280285894146, "learning_rate": 7.920194647201947e-06, "loss": 0.6498, "step": 22044 }, { "epoch": 0.643630842895098, "grad_norm": 0.5965869760016651, "learning_rate": 7.919545823195459e-06, "loss": 0.5468, "step": 22045 }, { "epoch": 0.6436600391229453, "grad_norm": 0.5988750697658834, "learning_rate": 7.918896999188971e-06, "loss": 0.5662, "step": 22046 }, { "epoch": 0.6436892353507927, "grad_norm": 0.6142202257113489, "learning_rate": 7.918248175182483e-06, "loss": 0.5787, "step": 22047 }, { "epoch": 0.6437184315786401, "grad_norm": 0.6848992138606783, "learning_rate": 7.917599351175995e-06, "loss": 0.7253, "step": 22048 }, { "epoch": 0.6437476278064874, "grad_norm": 0.6441489394772858, "learning_rate": 7.916950527169505e-06, "loss": 0.5995, "step": 22049 }, { "epoch": 0.6437768240343348, "grad_norm": 0.6651762387041095, "learning_rate": 7.916301703163017e-06, "loss": 0.6895, "step": 22050 }, { "epoch": 0.6438060202621821, "grad_norm": 0.6778451420845899, "learning_rate": 7.91565287915653e-06, "loss": 0.585, "step": 22051 }, { "epoch": 0.6438352164900295, "grad_norm": 0.6147946641822662, "learning_rate": 7.915004055150041e-06, "loss": 0.584, "step": 22052 }, { "epoch": 0.6438644127178769, "grad_norm": 0.7614434703416342, "learning_rate": 7.914355231143553e-06, "loss": 0.7637, "step": 22053 }, { "epoch": 0.6438936089457242, "grad_norm": 0.6506399027553619, "learning_rate": 7.913706407137065e-06, "loss": 0.6518, "step": 22054 }, { "epoch": 0.6439228051735716, "grad_norm": 0.6916503772455075, "learning_rate": 7.913057583130577e-06, "loss": 0.6681, "step": 22055 }, { "epoch": 0.643952001401419, "grad_norm": 0.6230727356979406, "learning_rate": 7.912408759124088e-06, "loss": 0.63, "step": 22056 }, { "epoch": 0.6439811976292663, "grad_norm": 0.6752564863974142, "learning_rate": 7.9117599351176e-06, "loss": 0.7026, "step": 22057 }, { "epoch": 0.6440103938571137, "grad_norm": 0.576817208476097, "learning_rate": 7.911111111111112e-06, "loss": 0.5214, "step": 22058 }, { "epoch": 0.644039590084961, "grad_norm": 0.667487992478905, "learning_rate": 7.910462287104624e-06, "loss": 0.5912, "step": 22059 }, { "epoch": 0.6440687863128084, "grad_norm": 0.6050778708959261, "learning_rate": 7.909813463098134e-06, "loss": 0.544, "step": 22060 }, { "epoch": 0.6440979825406558, "grad_norm": 0.6232356276679132, "learning_rate": 7.909164639091646e-06, "loss": 0.5526, "step": 22061 }, { "epoch": 0.6441271787685031, "grad_norm": 0.6673048314982051, "learning_rate": 7.90851581508516e-06, "loss": 0.6265, "step": 22062 }, { "epoch": 0.6441563749963505, "grad_norm": 0.5993608921219427, "learning_rate": 7.90786699107867e-06, "loss": 0.5575, "step": 22063 }, { "epoch": 0.6441855712241978, "grad_norm": 0.5946762729000178, "learning_rate": 7.907218167072182e-06, "loss": 0.5361, "step": 22064 }, { "epoch": 0.6442147674520452, "grad_norm": 0.617881705471675, "learning_rate": 7.906569343065694e-06, "loss": 0.61, "step": 22065 }, { "epoch": 0.6442439636798926, "grad_norm": 0.6536535901950418, "learning_rate": 7.905920519059206e-06, "loss": 0.6732, "step": 22066 }, { "epoch": 0.6442731599077399, "grad_norm": 0.6723422918175754, "learning_rate": 7.905271695052718e-06, "loss": 0.5487, "step": 22067 }, { "epoch": 0.6443023561355873, "grad_norm": 0.628867334799155, "learning_rate": 7.904622871046229e-06, "loss": 0.5877, "step": 22068 }, { "epoch": 0.6443315523634346, "grad_norm": 0.6421431323243472, "learning_rate": 7.90397404703974e-06, "loss": 0.6279, "step": 22069 }, { "epoch": 0.644360748591282, "grad_norm": 0.6689345374414074, "learning_rate": 7.903325223033253e-06, "loss": 0.5795, "step": 22070 }, { "epoch": 0.6443899448191294, "grad_norm": 0.6660863698418065, "learning_rate": 7.902676399026765e-06, "loss": 0.595, "step": 22071 }, { "epoch": 0.6444191410469767, "grad_norm": 0.5944485569407247, "learning_rate": 7.902027575020277e-06, "loss": 0.5472, "step": 22072 }, { "epoch": 0.6444483372748241, "grad_norm": 0.605179497879286, "learning_rate": 7.901378751013789e-06, "loss": 0.5834, "step": 22073 }, { "epoch": 0.6444775335026715, "grad_norm": 0.6470524812520744, "learning_rate": 7.9007299270073e-06, "loss": 0.6229, "step": 22074 }, { "epoch": 0.6445067297305188, "grad_norm": 0.6322220691612452, "learning_rate": 7.900081103000811e-06, "loss": 0.6306, "step": 22075 }, { "epoch": 0.6445359259583662, "grad_norm": 0.5467605532234121, "learning_rate": 7.899432278994323e-06, "loss": 0.4743, "step": 22076 }, { "epoch": 0.6445651221862135, "grad_norm": 0.6520140292016227, "learning_rate": 7.898783454987835e-06, "loss": 0.6456, "step": 22077 }, { "epoch": 0.6445943184140609, "grad_norm": 0.6727342318868456, "learning_rate": 7.898134630981347e-06, "loss": 0.6465, "step": 22078 }, { "epoch": 0.6446235146419083, "grad_norm": 0.6316234243539586, "learning_rate": 7.897485806974858e-06, "loss": 0.5782, "step": 22079 }, { "epoch": 0.6446527108697556, "grad_norm": 0.6402637884940281, "learning_rate": 7.89683698296837e-06, "loss": 0.5617, "step": 22080 }, { "epoch": 0.644681907097603, "grad_norm": 0.6577047709440532, "learning_rate": 7.896188158961883e-06, "loss": 0.6351, "step": 22081 }, { "epoch": 0.6447111033254503, "grad_norm": 0.6437662743063121, "learning_rate": 7.895539334955394e-06, "loss": 0.5771, "step": 22082 }, { "epoch": 0.6447402995532977, "grad_norm": 0.6320115073561546, "learning_rate": 7.894890510948906e-06, "loss": 0.5478, "step": 22083 }, { "epoch": 0.6447694957811451, "grad_norm": 0.6055559545447757, "learning_rate": 7.894241686942418e-06, "loss": 0.5474, "step": 22084 }, { "epoch": 0.6447986920089924, "grad_norm": 0.6658980127738267, "learning_rate": 7.89359286293593e-06, "loss": 0.6446, "step": 22085 }, { "epoch": 0.6448278882368398, "grad_norm": 0.6239071127199426, "learning_rate": 7.892944038929442e-06, "loss": 0.5588, "step": 22086 }, { "epoch": 0.6448570844646871, "grad_norm": 0.5988923978960816, "learning_rate": 7.892295214922952e-06, "loss": 0.5294, "step": 22087 }, { "epoch": 0.6448862806925345, "grad_norm": 0.65431527129693, "learning_rate": 7.891646390916464e-06, "loss": 0.6461, "step": 22088 }, { "epoch": 0.6449154769203819, "grad_norm": 0.6110229212228395, "learning_rate": 7.890997566909976e-06, "loss": 0.5186, "step": 22089 }, { "epoch": 0.6449446731482292, "grad_norm": 0.7256358775744495, "learning_rate": 7.890348742903488e-06, "loss": 0.7116, "step": 22090 }, { "epoch": 0.6449738693760766, "grad_norm": 0.6210810863835337, "learning_rate": 7.889699918897e-06, "loss": 0.568, "step": 22091 }, { "epoch": 0.645003065603924, "grad_norm": 0.5658314302074445, "learning_rate": 7.889051094890512e-06, "loss": 0.4725, "step": 22092 }, { "epoch": 0.6450322618317713, "grad_norm": 0.6583036338215807, "learning_rate": 7.888402270884024e-06, "loss": 0.5774, "step": 22093 }, { "epoch": 0.6450614580596187, "grad_norm": 0.6610090306050374, "learning_rate": 7.887753446877535e-06, "loss": 0.6544, "step": 22094 }, { "epoch": 0.645090654287466, "grad_norm": 0.6925703245143476, "learning_rate": 7.887104622871047e-06, "loss": 0.6911, "step": 22095 }, { "epoch": 0.6451198505153134, "grad_norm": 0.6244030842748379, "learning_rate": 7.886455798864559e-06, "loss": 0.5766, "step": 22096 }, { "epoch": 0.6451490467431608, "grad_norm": 0.6337820379348352, "learning_rate": 7.88580697485807e-06, "loss": 0.5685, "step": 22097 }, { "epoch": 0.6451782429710081, "grad_norm": 0.6437949710729686, "learning_rate": 7.885158150851581e-06, "loss": 0.6271, "step": 22098 }, { "epoch": 0.6452074391988555, "grad_norm": 0.6428390876895912, "learning_rate": 7.884509326845093e-06, "loss": 0.6001, "step": 22099 }, { "epoch": 0.6452366354267028, "grad_norm": 0.6550280817518878, "learning_rate": 7.883860502838607e-06, "loss": 0.6023, "step": 22100 }, { "epoch": 0.6452658316545502, "grad_norm": 0.6453964551648874, "learning_rate": 7.883211678832117e-06, "loss": 0.6223, "step": 22101 }, { "epoch": 0.6452950278823976, "grad_norm": 0.6476461675039533, "learning_rate": 7.882562854825629e-06, "loss": 0.6094, "step": 22102 }, { "epoch": 0.6453242241102449, "grad_norm": 0.6115273080538352, "learning_rate": 7.881914030819141e-06, "loss": 0.5355, "step": 22103 }, { "epoch": 0.6453534203380923, "grad_norm": 0.6328297756455585, "learning_rate": 7.881265206812653e-06, "loss": 0.622, "step": 22104 }, { "epoch": 0.6453826165659396, "grad_norm": 0.6297912699675332, "learning_rate": 7.880616382806165e-06, "loss": 0.6045, "step": 22105 }, { "epoch": 0.645411812793787, "grad_norm": 0.6579701876077697, "learning_rate": 7.879967558799676e-06, "loss": 0.6347, "step": 22106 }, { "epoch": 0.6454410090216344, "grad_norm": 0.6468354889209531, "learning_rate": 7.879318734793188e-06, "loss": 0.6207, "step": 22107 }, { "epoch": 0.6454702052494817, "grad_norm": 0.6555635059479096, "learning_rate": 7.8786699107867e-06, "loss": 0.5915, "step": 22108 }, { "epoch": 0.6454994014773291, "grad_norm": 0.6575182073854456, "learning_rate": 7.878021086780212e-06, "loss": 0.6438, "step": 22109 }, { "epoch": 0.6455285977051765, "grad_norm": 0.6144735310232288, "learning_rate": 7.877372262773724e-06, "loss": 0.5569, "step": 22110 }, { "epoch": 0.6455577939330238, "grad_norm": 0.6482573868846306, "learning_rate": 7.876723438767236e-06, "loss": 0.6081, "step": 22111 }, { "epoch": 0.6455869901608712, "grad_norm": 0.6808502039084181, "learning_rate": 7.876074614760748e-06, "loss": 0.6771, "step": 22112 }, { "epoch": 0.6456161863887185, "grad_norm": 0.6624921816804618, "learning_rate": 7.875425790754258e-06, "loss": 0.6277, "step": 22113 }, { "epoch": 0.6456453826165659, "grad_norm": 0.6557925319946111, "learning_rate": 7.87477696674777e-06, "loss": 0.6396, "step": 22114 }, { "epoch": 0.6456745788444133, "grad_norm": 0.6259854442171854, "learning_rate": 7.874128142741282e-06, "loss": 0.5954, "step": 22115 }, { "epoch": 0.6457037750722606, "grad_norm": 0.6501556917553262, "learning_rate": 7.873479318734794e-06, "loss": 0.6163, "step": 22116 }, { "epoch": 0.645732971300108, "grad_norm": 0.6160508298971856, "learning_rate": 7.872830494728304e-06, "loss": 0.5651, "step": 22117 }, { "epoch": 0.6457621675279553, "grad_norm": 0.6317299246790996, "learning_rate": 7.872181670721816e-06, "loss": 0.5752, "step": 22118 }, { "epoch": 0.6457913637558027, "grad_norm": 0.6604451588043604, "learning_rate": 7.87153284671533e-06, "loss": 0.6048, "step": 22119 }, { "epoch": 0.6458205599836501, "grad_norm": 0.6740838831392754, "learning_rate": 7.87088402270884e-06, "loss": 0.5829, "step": 22120 }, { "epoch": 0.6458497562114974, "grad_norm": 0.6146895868955959, "learning_rate": 7.870235198702353e-06, "loss": 0.5794, "step": 22121 }, { "epoch": 0.6458789524393448, "grad_norm": 0.6139805487002589, "learning_rate": 7.869586374695865e-06, "loss": 0.5822, "step": 22122 }, { "epoch": 0.6459081486671922, "grad_norm": 0.6658768192230708, "learning_rate": 7.868937550689377e-06, "loss": 0.5965, "step": 22123 }, { "epoch": 0.6459373448950395, "grad_norm": 0.6165673035801146, "learning_rate": 7.868288726682887e-06, "loss": 0.5485, "step": 22124 }, { "epoch": 0.6459665411228869, "grad_norm": 0.6391615929108099, "learning_rate": 7.867639902676399e-06, "loss": 0.6167, "step": 22125 }, { "epoch": 0.6459957373507342, "grad_norm": 0.6695586205899817, "learning_rate": 7.866991078669911e-06, "loss": 0.6119, "step": 22126 }, { "epoch": 0.6460249335785816, "grad_norm": 0.6232372321985696, "learning_rate": 7.866342254663423e-06, "loss": 0.6191, "step": 22127 }, { "epoch": 0.646054129806429, "grad_norm": 0.6445534216671882, "learning_rate": 7.865693430656935e-06, "loss": 0.5648, "step": 22128 }, { "epoch": 0.6460833260342763, "grad_norm": 0.6716804787243374, "learning_rate": 7.865044606650447e-06, "loss": 0.6863, "step": 22129 }, { "epoch": 0.6461125222621237, "grad_norm": 0.6443157434730975, "learning_rate": 7.864395782643959e-06, "loss": 0.6546, "step": 22130 }, { "epoch": 0.6461417184899712, "grad_norm": 0.6168025375361154, "learning_rate": 7.863746958637471e-06, "loss": 0.5664, "step": 22131 }, { "epoch": 0.6461709147178185, "grad_norm": 0.6629502986092519, "learning_rate": 7.863098134630981e-06, "loss": 0.6549, "step": 22132 }, { "epoch": 0.6462001109456659, "grad_norm": 0.6416235787107216, "learning_rate": 7.862449310624493e-06, "loss": 0.5815, "step": 22133 }, { "epoch": 0.6462293071735132, "grad_norm": 0.6444700228561142, "learning_rate": 7.861800486618006e-06, "loss": 0.5898, "step": 22134 }, { "epoch": 0.6462585034013606, "grad_norm": 0.6394521435979247, "learning_rate": 7.861151662611518e-06, "loss": 0.6133, "step": 22135 }, { "epoch": 0.646287699629208, "grad_norm": 0.653850976871317, "learning_rate": 7.860502838605028e-06, "loss": 0.6001, "step": 22136 }, { "epoch": 0.6463168958570553, "grad_norm": 0.6602964304180516, "learning_rate": 7.85985401459854e-06, "loss": 0.6351, "step": 22137 }, { "epoch": 0.6463460920849027, "grad_norm": 0.6328959981021812, "learning_rate": 7.859205190592054e-06, "loss": 0.5537, "step": 22138 }, { "epoch": 0.64637528831275, "grad_norm": 0.6230020453490118, "learning_rate": 7.858556366585564e-06, "loss": 0.6071, "step": 22139 }, { "epoch": 0.6464044845405974, "grad_norm": 0.6075074446424032, "learning_rate": 7.857907542579076e-06, "loss": 0.5714, "step": 22140 }, { "epoch": 0.6464336807684448, "grad_norm": 0.6516682615841685, "learning_rate": 7.857258718572588e-06, "loss": 0.6518, "step": 22141 }, { "epoch": 0.6464628769962921, "grad_norm": 0.5967921401126909, "learning_rate": 7.8566098945661e-06, "loss": 0.5652, "step": 22142 }, { "epoch": 0.6464920732241395, "grad_norm": 0.6682450006327518, "learning_rate": 7.85596107055961e-06, "loss": 0.6621, "step": 22143 }, { "epoch": 0.6465212694519868, "grad_norm": 0.6119636265842541, "learning_rate": 7.855312246553122e-06, "loss": 0.4993, "step": 22144 }, { "epoch": 0.6465504656798342, "grad_norm": 0.6594017020137705, "learning_rate": 7.854663422546634e-06, "loss": 0.6628, "step": 22145 }, { "epoch": 0.6465796619076816, "grad_norm": 0.787801854310741, "learning_rate": 7.854014598540146e-06, "loss": 0.6267, "step": 22146 }, { "epoch": 0.6466088581355289, "grad_norm": 0.6454599244498506, "learning_rate": 7.853365774533658e-06, "loss": 0.6061, "step": 22147 }, { "epoch": 0.6466380543633763, "grad_norm": 0.7106794429197093, "learning_rate": 7.85271695052717e-06, "loss": 0.6738, "step": 22148 }, { "epoch": 0.6466672505912237, "grad_norm": 0.6669992993201057, "learning_rate": 7.852068126520683e-06, "loss": 0.6201, "step": 22149 }, { "epoch": 0.646696446819071, "grad_norm": 0.6565151138867027, "learning_rate": 7.851419302514195e-06, "loss": 0.639, "step": 22150 }, { "epoch": 0.6467256430469184, "grad_norm": 0.6742574013797941, "learning_rate": 7.850770478507705e-06, "loss": 0.6726, "step": 22151 }, { "epoch": 0.6467548392747657, "grad_norm": 0.6547950044483327, "learning_rate": 7.850121654501217e-06, "loss": 0.5986, "step": 22152 }, { "epoch": 0.6467840355026131, "grad_norm": 0.6303840203770787, "learning_rate": 7.849472830494729e-06, "loss": 0.5717, "step": 22153 }, { "epoch": 0.6468132317304605, "grad_norm": 0.6302412024249179, "learning_rate": 7.848824006488241e-06, "loss": 0.5233, "step": 22154 }, { "epoch": 0.6468424279583078, "grad_norm": 0.6372496813601081, "learning_rate": 7.848175182481751e-06, "loss": 0.6266, "step": 22155 }, { "epoch": 0.6468716241861552, "grad_norm": 0.6334952530311118, "learning_rate": 7.847526358475263e-06, "loss": 0.6108, "step": 22156 }, { "epoch": 0.6469008204140025, "grad_norm": 0.6246280108597722, "learning_rate": 7.846877534468777e-06, "loss": 0.5785, "step": 22157 }, { "epoch": 0.6469300166418499, "grad_norm": 0.6160664169238801, "learning_rate": 7.846228710462287e-06, "loss": 0.5385, "step": 22158 }, { "epoch": 0.6469592128696973, "grad_norm": 0.6481587974834804, "learning_rate": 7.8455798864558e-06, "loss": 0.6217, "step": 22159 }, { "epoch": 0.6469884090975446, "grad_norm": 0.6303872700937848, "learning_rate": 7.844931062449311e-06, "loss": 0.6003, "step": 22160 }, { "epoch": 0.647017605325392, "grad_norm": 0.7212263204359408, "learning_rate": 7.844282238442823e-06, "loss": 0.7425, "step": 22161 }, { "epoch": 0.6470468015532393, "grad_norm": 0.6143696798411177, "learning_rate": 7.843633414436334e-06, "loss": 0.6302, "step": 22162 }, { "epoch": 0.6470759977810867, "grad_norm": 0.6342842296032369, "learning_rate": 7.842984590429846e-06, "loss": 0.6201, "step": 22163 }, { "epoch": 0.6471051940089341, "grad_norm": 0.618536790111003, "learning_rate": 7.842335766423358e-06, "loss": 0.605, "step": 22164 }, { "epoch": 0.6471343902367814, "grad_norm": 0.6300660165979712, "learning_rate": 7.84168694241687e-06, "loss": 0.5986, "step": 22165 }, { "epoch": 0.6471635864646288, "grad_norm": 0.6558611182578556, "learning_rate": 7.841038118410382e-06, "loss": 0.6136, "step": 22166 }, { "epoch": 0.6471927826924762, "grad_norm": 0.7204528055121728, "learning_rate": 7.840389294403894e-06, "loss": 0.6817, "step": 22167 }, { "epoch": 0.6472219789203235, "grad_norm": 0.6978514752769652, "learning_rate": 7.839740470397406e-06, "loss": 0.618, "step": 22168 }, { "epoch": 0.6472511751481709, "grad_norm": 0.6039737025958523, "learning_rate": 7.839091646390918e-06, "loss": 0.5662, "step": 22169 }, { "epoch": 0.6472803713760182, "grad_norm": 0.6811944094269564, "learning_rate": 7.838442822384428e-06, "loss": 0.6201, "step": 22170 }, { "epoch": 0.6473095676038656, "grad_norm": 0.6542432179051373, "learning_rate": 7.83779399837794e-06, "loss": 0.5985, "step": 22171 }, { "epoch": 0.647338763831713, "grad_norm": 0.6239378115881378, "learning_rate": 7.837145174371452e-06, "loss": 0.6073, "step": 22172 }, { "epoch": 0.6473679600595603, "grad_norm": 0.6213557938714199, "learning_rate": 7.836496350364964e-06, "loss": 0.5424, "step": 22173 }, { "epoch": 0.6473971562874077, "grad_norm": 0.6307014624857891, "learning_rate": 7.835847526358475e-06, "loss": 0.6166, "step": 22174 }, { "epoch": 0.647426352515255, "grad_norm": 0.6138779777634564, "learning_rate": 7.835198702351988e-06, "loss": 0.5722, "step": 22175 }, { "epoch": 0.6474555487431024, "grad_norm": 0.6701985953731939, "learning_rate": 7.8345498783455e-06, "loss": 0.6307, "step": 22176 }, { "epoch": 0.6474847449709498, "grad_norm": 0.6847680427394247, "learning_rate": 7.83390105433901e-06, "loss": 0.6565, "step": 22177 }, { "epoch": 0.6475139411987971, "grad_norm": 0.660665407854269, "learning_rate": 7.833252230332523e-06, "loss": 0.643, "step": 22178 }, { "epoch": 0.6475431374266445, "grad_norm": 0.618649933554992, "learning_rate": 7.832603406326035e-06, "loss": 0.6033, "step": 22179 }, { "epoch": 0.6475723336544918, "grad_norm": 0.6154745902334764, "learning_rate": 7.831954582319547e-06, "loss": 0.557, "step": 22180 }, { "epoch": 0.6476015298823392, "grad_norm": 0.6395847424894012, "learning_rate": 7.831305758313057e-06, "loss": 0.6219, "step": 22181 }, { "epoch": 0.6476307261101866, "grad_norm": 0.6819624386821381, "learning_rate": 7.83065693430657e-06, "loss": 0.6422, "step": 22182 }, { "epoch": 0.6476599223380339, "grad_norm": 0.670980830172022, "learning_rate": 7.830008110300081e-06, "loss": 0.6605, "step": 22183 }, { "epoch": 0.6476891185658813, "grad_norm": 0.5904951079580464, "learning_rate": 7.829359286293593e-06, "loss": 0.5245, "step": 22184 }, { "epoch": 0.6477183147937287, "grad_norm": 0.6730009465454985, "learning_rate": 7.828710462287105e-06, "loss": 0.6151, "step": 22185 }, { "epoch": 0.647747511021576, "grad_norm": 0.619837767237806, "learning_rate": 7.828061638280617e-06, "loss": 0.5379, "step": 22186 }, { "epoch": 0.6477767072494234, "grad_norm": 0.6214759566064121, "learning_rate": 7.82741281427413e-06, "loss": 0.5988, "step": 22187 }, { "epoch": 0.6478059034772707, "grad_norm": 0.6200855085029465, "learning_rate": 7.826763990267641e-06, "loss": 0.5544, "step": 22188 }, { "epoch": 0.6478350997051181, "grad_norm": 0.6823816206711376, "learning_rate": 7.826115166261152e-06, "loss": 0.6869, "step": 22189 }, { "epoch": 0.6478642959329655, "grad_norm": 0.635153251106979, "learning_rate": 7.825466342254664e-06, "loss": 0.5413, "step": 22190 }, { "epoch": 0.6478934921608128, "grad_norm": 0.7524942726235657, "learning_rate": 7.824817518248176e-06, "loss": 0.6083, "step": 22191 }, { "epoch": 0.6479226883886602, "grad_norm": 0.6750181251477504, "learning_rate": 7.824168694241688e-06, "loss": 0.7028, "step": 22192 }, { "epoch": 0.6479518846165075, "grad_norm": 0.6459556919265792, "learning_rate": 7.823519870235198e-06, "loss": 0.6128, "step": 22193 }, { "epoch": 0.6479810808443549, "grad_norm": 0.6897006669352008, "learning_rate": 7.822871046228712e-06, "loss": 0.626, "step": 22194 }, { "epoch": 0.6480102770722023, "grad_norm": 0.6732480556275274, "learning_rate": 7.822222222222224e-06, "loss": 0.6768, "step": 22195 }, { "epoch": 0.6480394733000496, "grad_norm": 0.668311660521437, "learning_rate": 7.821573398215734e-06, "loss": 0.6535, "step": 22196 }, { "epoch": 0.648068669527897, "grad_norm": 0.6829536623972187, "learning_rate": 7.820924574209246e-06, "loss": 0.6503, "step": 22197 }, { "epoch": 0.6480978657557444, "grad_norm": 0.6502650177503156, "learning_rate": 7.820275750202758e-06, "loss": 0.5684, "step": 22198 }, { "epoch": 0.6481270619835917, "grad_norm": 0.6275696979294627, "learning_rate": 7.81962692619627e-06, "loss": 0.5946, "step": 22199 }, { "epoch": 0.6481562582114391, "grad_norm": 0.6188632827486531, "learning_rate": 7.81897810218978e-06, "loss": 0.5548, "step": 22200 }, { "epoch": 0.6481854544392864, "grad_norm": 0.6078355327344538, "learning_rate": 7.818329278183293e-06, "loss": 0.5306, "step": 22201 }, { "epoch": 0.6482146506671338, "grad_norm": 0.6229919084139143, "learning_rate": 7.817680454176805e-06, "loss": 0.5486, "step": 22202 }, { "epoch": 0.6482438468949812, "grad_norm": 0.6090548924490318, "learning_rate": 7.817031630170317e-06, "loss": 0.5809, "step": 22203 }, { "epoch": 0.6482730431228285, "grad_norm": 0.6832303831327629, "learning_rate": 7.816382806163829e-06, "loss": 0.6334, "step": 22204 }, { "epoch": 0.6483022393506759, "grad_norm": 0.596305439097313, "learning_rate": 7.81573398215734e-06, "loss": 0.5348, "step": 22205 }, { "epoch": 0.6483314355785232, "grad_norm": 0.5906380084898484, "learning_rate": 7.815085158150853e-06, "loss": 0.5257, "step": 22206 }, { "epoch": 0.6483606318063706, "grad_norm": 0.7026619364058682, "learning_rate": 7.814436334144365e-06, "loss": 0.658, "step": 22207 }, { "epoch": 0.648389828034218, "grad_norm": 0.6136363465775133, "learning_rate": 7.813787510137875e-06, "loss": 0.5336, "step": 22208 }, { "epoch": 0.6484190242620653, "grad_norm": 0.6324204932647606, "learning_rate": 7.813138686131387e-06, "loss": 0.5861, "step": 22209 }, { "epoch": 0.6484482204899127, "grad_norm": 0.6118737016875088, "learning_rate": 7.8124898621249e-06, "loss": 0.5625, "step": 22210 }, { "epoch": 0.64847741671776, "grad_norm": 0.6519583914570728, "learning_rate": 7.811841038118411e-06, "loss": 0.6314, "step": 22211 }, { "epoch": 0.6485066129456074, "grad_norm": 0.6112444757492609, "learning_rate": 7.811192214111922e-06, "loss": 0.5518, "step": 22212 }, { "epoch": 0.6485358091734548, "grad_norm": 0.6575731241514534, "learning_rate": 7.810543390105435e-06, "loss": 0.633, "step": 22213 }, { "epoch": 0.6485650054013021, "grad_norm": 0.6660326989727963, "learning_rate": 7.809894566098947e-06, "loss": 0.6402, "step": 22214 }, { "epoch": 0.6485942016291495, "grad_norm": 0.6965170815967113, "learning_rate": 7.809245742092458e-06, "loss": 0.7163, "step": 22215 }, { "epoch": 0.6486233978569969, "grad_norm": 0.6143589941900657, "learning_rate": 7.80859691808597e-06, "loss": 0.5842, "step": 22216 }, { "epoch": 0.6486525940848442, "grad_norm": 0.7233112059223508, "learning_rate": 7.807948094079482e-06, "loss": 0.6246, "step": 22217 }, { "epoch": 0.6486817903126916, "grad_norm": 0.6587115447095884, "learning_rate": 7.807299270072994e-06, "loss": 0.6294, "step": 22218 }, { "epoch": 0.6487109865405389, "grad_norm": 0.676909889229033, "learning_rate": 7.806650446066504e-06, "loss": 0.6174, "step": 22219 }, { "epoch": 0.6487401827683863, "grad_norm": 0.6337145124322817, "learning_rate": 7.806001622060016e-06, "loss": 0.6369, "step": 22220 }, { "epoch": 0.6487693789962337, "grad_norm": 0.6532810196381044, "learning_rate": 7.805352798053528e-06, "loss": 0.5961, "step": 22221 }, { "epoch": 0.648798575224081, "grad_norm": 0.6346836768986871, "learning_rate": 7.80470397404704e-06, "loss": 0.5606, "step": 22222 }, { "epoch": 0.6488277714519284, "grad_norm": 0.6521936810772779, "learning_rate": 7.804055150040552e-06, "loss": 0.5954, "step": 22223 }, { "epoch": 0.6488569676797757, "grad_norm": 0.625250073366656, "learning_rate": 7.803406326034064e-06, "loss": 0.573, "step": 22224 }, { "epoch": 0.6488861639076231, "grad_norm": 0.6500453800308281, "learning_rate": 7.802757502027576e-06, "loss": 0.576, "step": 22225 }, { "epoch": 0.6489153601354705, "grad_norm": 0.6339342109402547, "learning_rate": 7.802108678021088e-06, "loss": 0.5728, "step": 22226 }, { "epoch": 0.6489445563633178, "grad_norm": 0.676896981908936, "learning_rate": 7.801459854014599e-06, "loss": 0.648, "step": 22227 }, { "epoch": 0.6489737525911652, "grad_norm": 0.6602114455512796, "learning_rate": 7.80081103000811e-06, "loss": 0.655, "step": 22228 }, { "epoch": 0.6490029488190125, "grad_norm": 0.6873218594786882, "learning_rate": 7.800162206001623e-06, "loss": 0.6617, "step": 22229 }, { "epoch": 0.6490321450468599, "grad_norm": 0.6678814588896114, "learning_rate": 7.799513381995135e-06, "loss": 0.6035, "step": 22230 }, { "epoch": 0.6490613412747073, "grad_norm": 0.6234374769915664, "learning_rate": 7.798864557988645e-06, "loss": 0.5557, "step": 22231 }, { "epoch": 0.6490905375025546, "grad_norm": 0.6291006588320134, "learning_rate": 7.798215733982159e-06, "loss": 0.6103, "step": 22232 }, { "epoch": 0.649119733730402, "grad_norm": 0.6609582426716074, "learning_rate": 7.79756690997567e-06, "loss": 0.5849, "step": 22233 }, { "epoch": 0.6491489299582494, "grad_norm": 0.6755446828044641, "learning_rate": 7.796918085969181e-06, "loss": 0.6212, "step": 22234 }, { "epoch": 0.6491781261860967, "grad_norm": 0.6408266708177016, "learning_rate": 7.796269261962693e-06, "loss": 0.6044, "step": 22235 }, { "epoch": 0.6492073224139441, "grad_norm": 0.6093504505458748, "learning_rate": 7.795620437956205e-06, "loss": 0.5504, "step": 22236 }, { "epoch": 0.6492365186417914, "grad_norm": 0.6619219926117907, "learning_rate": 7.794971613949717e-06, "loss": 0.6228, "step": 22237 }, { "epoch": 0.6492657148696388, "grad_norm": 0.6225963091344725, "learning_rate": 7.794322789943228e-06, "loss": 0.5849, "step": 22238 }, { "epoch": 0.6492949110974862, "grad_norm": 0.6493836532688951, "learning_rate": 7.79367396593674e-06, "loss": 0.6239, "step": 22239 }, { "epoch": 0.6493241073253335, "grad_norm": 0.6972689511277191, "learning_rate": 7.793025141930252e-06, "loss": 0.6994, "step": 22240 }, { "epoch": 0.6493533035531809, "grad_norm": 0.6602746197370108, "learning_rate": 7.792376317923764e-06, "loss": 0.6099, "step": 22241 }, { "epoch": 0.6493824997810282, "grad_norm": 0.6177503206704134, "learning_rate": 7.791727493917276e-06, "loss": 0.5364, "step": 22242 }, { "epoch": 0.6494116960088756, "grad_norm": 0.6807543483858232, "learning_rate": 7.791078669910788e-06, "loss": 0.666, "step": 22243 }, { "epoch": 0.649440892236723, "grad_norm": 0.6436579518345865, "learning_rate": 7.7904298459043e-06, "loss": 0.6263, "step": 22244 }, { "epoch": 0.6494700884645703, "grad_norm": 0.6622528878250467, "learning_rate": 7.789781021897812e-06, "loss": 0.671, "step": 22245 }, { "epoch": 0.6494992846924177, "grad_norm": 0.6180835026988497, "learning_rate": 7.789132197891322e-06, "loss": 0.5688, "step": 22246 }, { "epoch": 0.649528480920265, "grad_norm": 0.6265003304724333, "learning_rate": 7.788483373884834e-06, "loss": 0.5814, "step": 22247 }, { "epoch": 0.6495576771481124, "grad_norm": 0.6248545211264893, "learning_rate": 7.787834549878346e-06, "loss": 0.5631, "step": 22248 }, { "epoch": 0.6495868733759598, "grad_norm": 0.6388063405410648, "learning_rate": 7.787185725871858e-06, "loss": 0.6001, "step": 22249 }, { "epoch": 0.6496160696038071, "grad_norm": 0.6702944064738836, "learning_rate": 7.786536901865368e-06, "loss": 0.6401, "step": 22250 }, { "epoch": 0.6496452658316545, "grad_norm": 0.6500004649106457, "learning_rate": 7.785888077858882e-06, "loss": 0.5553, "step": 22251 }, { "epoch": 0.649674462059502, "grad_norm": 0.6336595465781154, "learning_rate": 7.785239253852394e-06, "loss": 0.6113, "step": 22252 }, { "epoch": 0.6497036582873493, "grad_norm": 0.6743683824130194, "learning_rate": 7.784590429845905e-06, "loss": 0.6452, "step": 22253 }, { "epoch": 0.6497328545151967, "grad_norm": 0.6346515793415811, "learning_rate": 7.783941605839417e-06, "loss": 0.5854, "step": 22254 }, { "epoch": 0.649762050743044, "grad_norm": 0.6371204676805624, "learning_rate": 7.783292781832929e-06, "loss": 0.5899, "step": 22255 }, { "epoch": 0.6497912469708914, "grad_norm": 0.6143333549777401, "learning_rate": 7.78264395782644e-06, "loss": 0.5946, "step": 22256 }, { "epoch": 0.6498204431987388, "grad_norm": 0.6490362045390599, "learning_rate": 7.781995133819951e-06, "loss": 0.6239, "step": 22257 }, { "epoch": 0.6498496394265861, "grad_norm": 0.6201862975423639, "learning_rate": 7.781346309813463e-06, "loss": 0.5729, "step": 22258 }, { "epoch": 0.6498788356544335, "grad_norm": 0.6054266770941047, "learning_rate": 7.780697485806975e-06, "loss": 0.5569, "step": 22259 }, { "epoch": 0.6499080318822809, "grad_norm": 0.5860291501399489, "learning_rate": 7.780048661800487e-06, "loss": 0.5333, "step": 22260 }, { "epoch": 0.6499372281101282, "grad_norm": 0.5789630323752478, "learning_rate": 7.779399837793999e-06, "loss": 0.5122, "step": 22261 }, { "epoch": 0.6499664243379756, "grad_norm": 0.5873338822635993, "learning_rate": 7.778751013787511e-06, "loss": 0.5122, "step": 22262 }, { "epoch": 0.6499956205658229, "grad_norm": 0.6914660582349577, "learning_rate": 7.778102189781023e-06, "loss": 0.6598, "step": 22263 }, { "epoch": 0.6500248167936703, "grad_norm": 0.7156173138303231, "learning_rate": 7.777453365774535e-06, "loss": 0.7036, "step": 22264 }, { "epoch": 0.6500540130215177, "grad_norm": 0.577877789470291, "learning_rate": 7.776804541768046e-06, "loss": 0.4641, "step": 22265 }, { "epoch": 0.650083209249365, "grad_norm": 0.6707311912151913, "learning_rate": 7.776155717761558e-06, "loss": 0.6391, "step": 22266 }, { "epoch": 0.6501124054772124, "grad_norm": 0.6341937673842122, "learning_rate": 7.77550689375507e-06, "loss": 0.5601, "step": 22267 }, { "epoch": 0.6501416017050597, "grad_norm": 0.6771180783481142, "learning_rate": 7.774858069748582e-06, "loss": 0.6094, "step": 22268 }, { "epoch": 0.6501707979329071, "grad_norm": 0.5955590442738177, "learning_rate": 7.774209245742092e-06, "loss": 0.5526, "step": 22269 }, { "epoch": 0.6501999941607545, "grad_norm": 0.6131697540126758, "learning_rate": 7.773560421735606e-06, "loss": 0.5374, "step": 22270 }, { "epoch": 0.6502291903886018, "grad_norm": 0.6535888828359018, "learning_rate": 7.772911597729118e-06, "loss": 0.6265, "step": 22271 }, { "epoch": 0.6502583866164492, "grad_norm": 0.6163797561550434, "learning_rate": 7.772262773722628e-06, "loss": 0.5812, "step": 22272 }, { "epoch": 0.6502875828442966, "grad_norm": 0.6827281806573084, "learning_rate": 7.77161394971614e-06, "loss": 0.6046, "step": 22273 }, { "epoch": 0.6503167790721439, "grad_norm": 0.6884791741250671, "learning_rate": 7.770965125709652e-06, "loss": 0.6455, "step": 22274 }, { "epoch": 0.6503459752999913, "grad_norm": 0.6360840531297446, "learning_rate": 7.770316301703164e-06, "loss": 0.591, "step": 22275 }, { "epoch": 0.6503751715278386, "grad_norm": 0.6518424935019136, "learning_rate": 7.769667477696674e-06, "loss": 0.6233, "step": 22276 }, { "epoch": 0.650404367755686, "grad_norm": 0.6363830184927755, "learning_rate": 7.769018653690186e-06, "loss": 0.5893, "step": 22277 }, { "epoch": 0.6504335639835334, "grad_norm": 0.7399398074127274, "learning_rate": 7.768369829683698e-06, "loss": 0.657, "step": 22278 }, { "epoch": 0.6504627602113807, "grad_norm": 0.6637617196129975, "learning_rate": 7.76772100567721e-06, "loss": 0.6292, "step": 22279 }, { "epoch": 0.6504919564392281, "grad_norm": 0.788647364203475, "learning_rate": 7.767072181670723e-06, "loss": 0.6138, "step": 22280 }, { "epoch": 0.6505211526670754, "grad_norm": 0.6572881525324857, "learning_rate": 7.766423357664235e-06, "loss": 0.6432, "step": 22281 }, { "epoch": 0.6505503488949228, "grad_norm": 0.6654574369334243, "learning_rate": 7.765774533657747e-06, "loss": 0.6467, "step": 22282 }, { "epoch": 0.6505795451227702, "grad_norm": 0.6644655767479116, "learning_rate": 7.765125709651259e-06, "loss": 0.6677, "step": 22283 }, { "epoch": 0.6506087413506175, "grad_norm": 0.6296899892759419, "learning_rate": 7.764476885644769e-06, "loss": 0.5994, "step": 22284 }, { "epoch": 0.6506379375784649, "grad_norm": 0.6461228487761314, "learning_rate": 7.763828061638281e-06, "loss": 0.5799, "step": 22285 }, { "epoch": 0.6506671338063122, "grad_norm": 0.7260299335592835, "learning_rate": 7.763179237631793e-06, "loss": 0.6455, "step": 22286 }, { "epoch": 0.6506963300341596, "grad_norm": 0.7017823240717603, "learning_rate": 7.762530413625305e-06, "loss": 0.6759, "step": 22287 }, { "epoch": 0.650725526262007, "grad_norm": 0.6604225410217462, "learning_rate": 7.761881589618815e-06, "loss": 0.6364, "step": 22288 }, { "epoch": 0.6507547224898543, "grad_norm": 0.600169447450653, "learning_rate": 7.761232765612329e-06, "loss": 0.506, "step": 22289 }, { "epoch": 0.6507839187177017, "grad_norm": 0.601037650647808, "learning_rate": 7.760583941605841e-06, "loss": 0.5492, "step": 22290 }, { "epoch": 0.650813114945549, "grad_norm": 0.616629046700795, "learning_rate": 7.759935117599351e-06, "loss": 0.5529, "step": 22291 }, { "epoch": 0.6508423111733964, "grad_norm": 0.6289764893552969, "learning_rate": 7.759286293592863e-06, "loss": 0.6072, "step": 22292 }, { "epoch": 0.6508715074012438, "grad_norm": 0.6322630536661902, "learning_rate": 7.758637469586375e-06, "loss": 0.5755, "step": 22293 }, { "epoch": 0.6509007036290911, "grad_norm": 0.6307995454973478, "learning_rate": 7.757988645579888e-06, "loss": 0.598, "step": 22294 }, { "epoch": 0.6509298998569385, "grad_norm": 0.6155488349263781, "learning_rate": 7.757339821573398e-06, "loss": 0.5658, "step": 22295 }, { "epoch": 0.6509590960847859, "grad_norm": 0.612993556773737, "learning_rate": 7.75669099756691e-06, "loss": 0.5777, "step": 22296 }, { "epoch": 0.6509882923126332, "grad_norm": 0.6411470968762742, "learning_rate": 7.756042173560422e-06, "loss": 0.6522, "step": 22297 }, { "epoch": 0.6510174885404806, "grad_norm": 0.6457403825792041, "learning_rate": 7.755393349553934e-06, "loss": 0.5687, "step": 22298 }, { "epoch": 0.6510466847683279, "grad_norm": 0.6876412716241161, "learning_rate": 7.754744525547446e-06, "loss": 0.6314, "step": 22299 }, { "epoch": 0.6510758809961753, "grad_norm": 0.641118174900825, "learning_rate": 7.754095701540958e-06, "loss": 0.6298, "step": 22300 }, { "epoch": 0.6511050772240227, "grad_norm": 0.701236443290422, "learning_rate": 7.75344687753447e-06, "loss": 0.5861, "step": 22301 }, { "epoch": 0.65113427345187, "grad_norm": 0.6193345577847493, "learning_rate": 7.752798053527982e-06, "loss": 0.5902, "step": 22302 }, { "epoch": 0.6511634696797174, "grad_norm": 0.6559169065473398, "learning_rate": 7.752149229521492e-06, "loss": 0.6761, "step": 22303 }, { "epoch": 0.6511926659075647, "grad_norm": 0.6117961788334424, "learning_rate": 7.751500405515004e-06, "loss": 0.5548, "step": 22304 }, { "epoch": 0.6512218621354121, "grad_norm": 0.6794437033130295, "learning_rate": 7.750851581508516e-06, "loss": 0.6598, "step": 22305 }, { "epoch": 0.6512510583632595, "grad_norm": 0.6872847521239526, "learning_rate": 7.750202757502028e-06, "loss": 0.6516, "step": 22306 }, { "epoch": 0.6512802545911068, "grad_norm": 0.6549085587626785, "learning_rate": 7.749553933495539e-06, "loss": 0.6477, "step": 22307 }, { "epoch": 0.6513094508189542, "grad_norm": 0.6470032972785734, "learning_rate": 7.748905109489053e-06, "loss": 0.6207, "step": 22308 }, { "epoch": 0.6513386470468016, "grad_norm": 0.7315673560624364, "learning_rate": 7.748256285482565e-06, "loss": 0.693, "step": 22309 }, { "epoch": 0.6513678432746489, "grad_norm": 0.6200302616334052, "learning_rate": 7.747607461476075e-06, "loss": 0.5579, "step": 22310 }, { "epoch": 0.6513970395024963, "grad_norm": 0.6398081221961898, "learning_rate": 7.746958637469587e-06, "loss": 0.6029, "step": 22311 }, { "epoch": 0.6514262357303436, "grad_norm": 0.6190051067116646, "learning_rate": 7.746309813463099e-06, "loss": 0.5528, "step": 22312 }, { "epoch": 0.651455431958191, "grad_norm": 0.6427508581787141, "learning_rate": 7.745660989456611e-06, "loss": 0.6323, "step": 22313 }, { "epoch": 0.6514846281860384, "grad_norm": 0.6658975640974298, "learning_rate": 7.745012165450121e-06, "loss": 0.6242, "step": 22314 }, { "epoch": 0.6515138244138857, "grad_norm": 0.6457615239877802, "learning_rate": 7.744363341443633e-06, "loss": 0.5929, "step": 22315 }, { "epoch": 0.6515430206417331, "grad_norm": 0.6160566807040405, "learning_rate": 7.743714517437145e-06, "loss": 0.5691, "step": 22316 }, { "epoch": 0.6515722168695804, "grad_norm": 0.6584481703343338, "learning_rate": 7.743065693430657e-06, "loss": 0.6251, "step": 22317 }, { "epoch": 0.6516014130974278, "grad_norm": 0.6406367668290843, "learning_rate": 7.74241686942417e-06, "loss": 0.6322, "step": 22318 }, { "epoch": 0.6516306093252752, "grad_norm": 0.6671855933731364, "learning_rate": 7.741768045417681e-06, "loss": 0.6513, "step": 22319 }, { "epoch": 0.6516598055531225, "grad_norm": 0.6770355824998681, "learning_rate": 7.741119221411193e-06, "loss": 0.701, "step": 22320 }, { "epoch": 0.6516890017809699, "grad_norm": 0.6790417712095139, "learning_rate": 7.740470397404704e-06, "loss": 0.6875, "step": 22321 }, { "epoch": 0.6517181980088173, "grad_norm": 0.6810566081128493, "learning_rate": 7.739821573398216e-06, "loss": 0.6657, "step": 22322 }, { "epoch": 0.6517473942366646, "grad_norm": 0.6114910925399234, "learning_rate": 7.739172749391728e-06, "loss": 0.5302, "step": 22323 }, { "epoch": 0.651776590464512, "grad_norm": 0.6427461455870165, "learning_rate": 7.73852392538524e-06, "loss": 0.6056, "step": 22324 }, { "epoch": 0.6518057866923593, "grad_norm": 0.7253039980234924, "learning_rate": 7.737875101378752e-06, "loss": 0.6136, "step": 22325 }, { "epoch": 0.6518349829202067, "grad_norm": 0.6368301854385954, "learning_rate": 7.737226277372264e-06, "loss": 0.6167, "step": 22326 }, { "epoch": 0.6518641791480541, "grad_norm": 0.6702706444475774, "learning_rate": 7.736577453365776e-06, "loss": 0.6617, "step": 22327 }, { "epoch": 0.6518933753759014, "grad_norm": 0.6639959471511105, "learning_rate": 7.735928629359288e-06, "loss": 0.6667, "step": 22328 }, { "epoch": 0.6519225716037488, "grad_norm": 0.6599045076313759, "learning_rate": 7.735279805352798e-06, "loss": 0.6351, "step": 22329 }, { "epoch": 0.6519517678315961, "grad_norm": 0.6627167331780544, "learning_rate": 7.73463098134631e-06, "loss": 0.6453, "step": 22330 }, { "epoch": 0.6519809640594435, "grad_norm": 0.6128859171695101, "learning_rate": 7.733982157339822e-06, "loss": 0.548, "step": 22331 }, { "epoch": 0.6520101602872909, "grad_norm": 0.640746134841982, "learning_rate": 7.733333333333334e-06, "loss": 0.6226, "step": 22332 }, { "epoch": 0.6520393565151382, "grad_norm": 0.5852485805525494, "learning_rate": 7.732684509326845e-06, "loss": 0.5078, "step": 22333 }, { "epoch": 0.6520685527429856, "grad_norm": 0.64452283051024, "learning_rate": 7.732035685320357e-06, "loss": 0.6204, "step": 22334 }, { "epoch": 0.652097748970833, "grad_norm": 0.6796183732820364, "learning_rate": 7.731386861313869e-06, "loss": 0.6404, "step": 22335 }, { "epoch": 0.6521269451986803, "grad_norm": 0.6751474754663407, "learning_rate": 7.73073803730738e-06, "loss": 0.5729, "step": 22336 }, { "epoch": 0.6521561414265277, "grad_norm": 0.6383244719154437, "learning_rate": 7.730089213300893e-06, "loss": 0.6286, "step": 22337 }, { "epoch": 0.652185337654375, "grad_norm": 0.6568855868969683, "learning_rate": 7.729440389294405e-06, "loss": 0.6368, "step": 22338 }, { "epoch": 0.6522145338822224, "grad_norm": 0.6717307631712169, "learning_rate": 7.728791565287917e-06, "loss": 0.6129, "step": 22339 }, { "epoch": 0.6522437301100698, "grad_norm": 0.6902795003913401, "learning_rate": 7.728142741281427e-06, "loss": 0.673, "step": 22340 }, { "epoch": 0.6522729263379171, "grad_norm": 0.6474264046698722, "learning_rate": 7.72749391727494e-06, "loss": 0.6262, "step": 22341 }, { "epoch": 0.6523021225657645, "grad_norm": 0.6535890011588611, "learning_rate": 7.726845093268451e-06, "loss": 0.5726, "step": 22342 }, { "epoch": 0.6523313187936118, "grad_norm": 0.5771402174395431, "learning_rate": 7.726196269261963e-06, "loss": 0.5226, "step": 22343 }, { "epoch": 0.6523605150214592, "grad_norm": 0.6639306029655239, "learning_rate": 7.725547445255475e-06, "loss": 0.6091, "step": 22344 }, { "epoch": 0.6523897112493066, "grad_norm": 0.6285958989735153, "learning_rate": 7.724898621248987e-06, "loss": 0.5675, "step": 22345 }, { "epoch": 0.6524189074771539, "grad_norm": 0.6791886321203099, "learning_rate": 7.7242497972425e-06, "loss": 0.685, "step": 22346 }, { "epoch": 0.6524481037050013, "grad_norm": 0.6349772697539356, "learning_rate": 7.723600973236011e-06, "loss": 0.5895, "step": 22347 }, { "epoch": 0.6524772999328486, "grad_norm": 0.6493377432965866, "learning_rate": 7.722952149229522e-06, "loss": 0.5797, "step": 22348 }, { "epoch": 0.652506496160696, "grad_norm": 0.6718924614768802, "learning_rate": 7.722303325223034e-06, "loss": 0.63, "step": 22349 }, { "epoch": 0.6525356923885434, "grad_norm": 0.5840213939055025, "learning_rate": 7.721654501216546e-06, "loss": 0.5351, "step": 22350 }, { "epoch": 0.6525648886163907, "grad_norm": 0.6356888113893451, "learning_rate": 7.721005677210058e-06, "loss": 0.6043, "step": 22351 }, { "epoch": 0.6525940848442381, "grad_norm": 0.5983645349494301, "learning_rate": 7.720356853203568e-06, "loss": 0.5224, "step": 22352 }, { "epoch": 0.6526232810720854, "grad_norm": 0.6520704316392146, "learning_rate": 7.71970802919708e-06, "loss": 0.6012, "step": 22353 }, { "epoch": 0.6526524772999328, "grad_norm": 0.71015618937684, "learning_rate": 7.719059205190592e-06, "loss": 0.7136, "step": 22354 }, { "epoch": 0.6526816735277802, "grad_norm": 0.6298891268451321, "learning_rate": 7.718410381184104e-06, "loss": 0.5549, "step": 22355 }, { "epoch": 0.6527108697556275, "grad_norm": 0.6448777879747991, "learning_rate": 7.717761557177616e-06, "loss": 0.5719, "step": 22356 }, { "epoch": 0.6527400659834749, "grad_norm": 0.6326816208333259, "learning_rate": 7.717112733171128e-06, "loss": 0.6003, "step": 22357 }, { "epoch": 0.6527692622113223, "grad_norm": 0.6448594029247926, "learning_rate": 7.71646390916464e-06, "loss": 0.5896, "step": 22358 }, { "epoch": 0.6527984584391696, "grad_norm": 0.6541602898361744, "learning_rate": 7.71581508515815e-06, "loss": 0.6206, "step": 22359 }, { "epoch": 0.652827654667017, "grad_norm": 0.5922816326102338, "learning_rate": 7.715166261151663e-06, "loss": 0.5506, "step": 22360 }, { "epoch": 0.6528568508948643, "grad_norm": 0.6293810072143434, "learning_rate": 7.714517437145175e-06, "loss": 0.6411, "step": 22361 }, { "epoch": 0.6528860471227117, "grad_norm": 0.6623293924388451, "learning_rate": 7.713868613138687e-06, "loss": 0.5636, "step": 22362 }, { "epoch": 0.6529152433505591, "grad_norm": 0.6925161218219112, "learning_rate": 7.713219789132199e-06, "loss": 0.6924, "step": 22363 }, { "epoch": 0.6529444395784064, "grad_norm": 0.6488689641464688, "learning_rate": 7.71257096512571e-06, "loss": 0.572, "step": 22364 }, { "epoch": 0.6529736358062538, "grad_norm": 0.6913162543711332, "learning_rate": 7.711922141119223e-06, "loss": 0.6561, "step": 22365 }, { "epoch": 0.6530028320341011, "grad_norm": 0.6382653047430136, "learning_rate": 7.711273317112735e-06, "loss": 0.5668, "step": 22366 }, { "epoch": 0.6530320282619485, "grad_norm": 0.634823942764686, "learning_rate": 7.710624493106245e-06, "loss": 0.5925, "step": 22367 }, { "epoch": 0.6530612244897959, "grad_norm": 0.6796481845348031, "learning_rate": 7.709975669099757e-06, "loss": 0.6283, "step": 22368 }, { "epoch": 0.6530904207176432, "grad_norm": 0.5910863904801982, "learning_rate": 7.70932684509327e-06, "loss": 0.5419, "step": 22369 }, { "epoch": 0.6531196169454906, "grad_norm": 0.6502786443864259, "learning_rate": 7.708678021086781e-06, "loss": 0.6488, "step": 22370 }, { "epoch": 0.653148813173338, "grad_norm": 0.6045424611524968, "learning_rate": 7.708029197080292e-06, "loss": 0.5634, "step": 22371 }, { "epoch": 0.6531780094011854, "grad_norm": 0.6516384101889865, "learning_rate": 7.707380373073804e-06, "loss": 0.6436, "step": 22372 }, { "epoch": 0.6532072056290328, "grad_norm": 0.6922022763466138, "learning_rate": 7.706731549067316e-06, "loss": 0.6983, "step": 22373 }, { "epoch": 0.6532364018568801, "grad_norm": 0.637270597894051, "learning_rate": 7.706082725060828e-06, "loss": 0.6297, "step": 22374 }, { "epoch": 0.6532655980847275, "grad_norm": 0.6629969331406638, "learning_rate": 7.70543390105434e-06, "loss": 0.624, "step": 22375 }, { "epoch": 0.6532947943125749, "grad_norm": 0.6637514020839257, "learning_rate": 7.704785077047852e-06, "loss": 0.6367, "step": 22376 }, { "epoch": 0.6533239905404222, "grad_norm": 0.646705556548345, "learning_rate": 7.704136253041364e-06, "loss": 0.5795, "step": 22377 }, { "epoch": 0.6533531867682696, "grad_norm": 0.6247896857629032, "learning_rate": 7.703487429034874e-06, "loss": 0.5702, "step": 22378 }, { "epoch": 0.653382382996117, "grad_norm": 0.6316092257979224, "learning_rate": 7.702838605028386e-06, "loss": 0.6188, "step": 22379 }, { "epoch": 0.6534115792239643, "grad_norm": 0.6978634548568794, "learning_rate": 7.702189781021898e-06, "loss": 0.6717, "step": 22380 }, { "epoch": 0.6534407754518117, "grad_norm": 0.6521715504898292, "learning_rate": 7.70154095701541e-06, "loss": 0.6353, "step": 22381 }, { "epoch": 0.653469971679659, "grad_norm": 0.6366655781642855, "learning_rate": 7.700892133008922e-06, "loss": 0.5728, "step": 22382 }, { "epoch": 0.6534991679075064, "grad_norm": 0.6208400121944122, "learning_rate": 7.700243309002434e-06, "loss": 0.5774, "step": 22383 }, { "epoch": 0.6535283641353538, "grad_norm": 0.6340411632458908, "learning_rate": 7.699594484995946e-06, "loss": 0.6102, "step": 22384 }, { "epoch": 0.6535575603632011, "grad_norm": 0.6433095276756869, "learning_rate": 7.698945660989458e-06, "loss": 0.6399, "step": 22385 }, { "epoch": 0.6535867565910485, "grad_norm": 0.6623059871735351, "learning_rate": 7.698296836982969e-06, "loss": 0.6716, "step": 22386 }, { "epoch": 0.6536159528188958, "grad_norm": 0.7028868991294513, "learning_rate": 7.69764801297648e-06, "loss": 0.6549, "step": 22387 }, { "epoch": 0.6536451490467432, "grad_norm": 0.6083780648215491, "learning_rate": 7.696999188969993e-06, "loss": 0.563, "step": 22388 }, { "epoch": 0.6536743452745906, "grad_norm": 0.6016563688977226, "learning_rate": 7.696350364963505e-06, "loss": 0.5601, "step": 22389 }, { "epoch": 0.6537035415024379, "grad_norm": 0.6588719079431442, "learning_rate": 7.695701540957015e-06, "loss": 0.6223, "step": 22390 }, { "epoch": 0.6537327377302853, "grad_norm": 0.637648713760685, "learning_rate": 7.695052716950527e-06, "loss": 0.593, "step": 22391 }, { "epoch": 0.6537619339581326, "grad_norm": 0.6671845576115831, "learning_rate": 7.69440389294404e-06, "loss": 0.6159, "step": 22392 }, { "epoch": 0.65379113018598, "grad_norm": 0.664860493303258, "learning_rate": 7.693755068937551e-06, "loss": 0.6304, "step": 22393 }, { "epoch": 0.6538203264138274, "grad_norm": 0.6770038212826591, "learning_rate": 7.693106244931063e-06, "loss": 0.6482, "step": 22394 }, { "epoch": 0.6538495226416747, "grad_norm": 0.543901871133439, "learning_rate": 7.692457420924575e-06, "loss": 0.4593, "step": 22395 }, { "epoch": 0.6538787188695221, "grad_norm": 0.5911637406076758, "learning_rate": 7.691808596918087e-06, "loss": 0.5116, "step": 22396 }, { "epoch": 0.6539079150973695, "grad_norm": 0.713440170003614, "learning_rate": 7.691159772911598e-06, "loss": 0.6266, "step": 22397 }, { "epoch": 0.6539371113252168, "grad_norm": 0.7370175440870851, "learning_rate": 7.69051094890511e-06, "loss": 0.6325, "step": 22398 }, { "epoch": 0.6539663075530642, "grad_norm": 0.6234178411780202, "learning_rate": 7.689862124898622e-06, "loss": 0.5914, "step": 22399 }, { "epoch": 0.6539955037809115, "grad_norm": 0.627364073155844, "learning_rate": 7.689213300892134e-06, "loss": 0.5903, "step": 22400 }, { "epoch": 0.6540247000087589, "grad_norm": 0.627222278652171, "learning_rate": 7.688564476885646e-06, "loss": 0.583, "step": 22401 }, { "epoch": 0.6540538962366063, "grad_norm": 0.6484349301234517, "learning_rate": 7.687915652879158e-06, "loss": 0.6086, "step": 22402 }, { "epoch": 0.6540830924644536, "grad_norm": 0.6597145686574213, "learning_rate": 7.68726682887267e-06, "loss": 0.6645, "step": 22403 }, { "epoch": 0.654112288692301, "grad_norm": 0.6155863460296447, "learning_rate": 7.686618004866182e-06, "loss": 0.548, "step": 22404 }, { "epoch": 0.6541414849201483, "grad_norm": 0.6691813036525511, "learning_rate": 7.685969180859692e-06, "loss": 0.6778, "step": 22405 }, { "epoch": 0.6541706811479957, "grad_norm": 0.6487686265846617, "learning_rate": 7.685320356853204e-06, "loss": 0.5679, "step": 22406 }, { "epoch": 0.6541998773758431, "grad_norm": 0.646303500870888, "learning_rate": 7.684671532846716e-06, "loss": 0.5937, "step": 22407 }, { "epoch": 0.6542290736036904, "grad_norm": 0.6383777929739167, "learning_rate": 7.684022708840228e-06, "loss": 0.5879, "step": 22408 }, { "epoch": 0.6542582698315378, "grad_norm": 0.6150685479436935, "learning_rate": 7.683373884833738e-06, "loss": 0.5136, "step": 22409 }, { "epoch": 0.6542874660593851, "grad_norm": 0.7120598562205773, "learning_rate": 7.68272506082725e-06, "loss": 0.7011, "step": 22410 }, { "epoch": 0.6543166622872325, "grad_norm": 0.6265763620566139, "learning_rate": 7.682076236820764e-06, "loss": 0.5858, "step": 22411 }, { "epoch": 0.6543458585150799, "grad_norm": 0.6674673837513361, "learning_rate": 7.681427412814275e-06, "loss": 0.5997, "step": 22412 }, { "epoch": 0.6543750547429272, "grad_norm": 0.6488533704651686, "learning_rate": 7.680778588807787e-06, "loss": 0.5773, "step": 22413 }, { "epoch": 0.6544042509707746, "grad_norm": 0.6317943975132855, "learning_rate": 7.680129764801299e-06, "loss": 0.6274, "step": 22414 }, { "epoch": 0.654433447198622, "grad_norm": 0.6070391972198813, "learning_rate": 7.67948094079481e-06, "loss": 0.5518, "step": 22415 }, { "epoch": 0.6544626434264693, "grad_norm": 0.5753015175281072, "learning_rate": 7.678832116788321e-06, "loss": 0.5197, "step": 22416 }, { "epoch": 0.6544918396543167, "grad_norm": 0.6050497999308638, "learning_rate": 7.678183292781833e-06, "loss": 0.5586, "step": 22417 }, { "epoch": 0.654521035882164, "grad_norm": 0.6016821805452132, "learning_rate": 7.677534468775345e-06, "loss": 0.5394, "step": 22418 }, { "epoch": 0.6545502321100114, "grad_norm": 0.6313684002298375, "learning_rate": 7.676885644768857e-06, "loss": 0.6139, "step": 22419 }, { "epoch": 0.6545794283378588, "grad_norm": 0.7006447394137264, "learning_rate": 7.676236820762369e-06, "loss": 0.6768, "step": 22420 }, { "epoch": 0.6546086245657061, "grad_norm": 0.6665079169643063, "learning_rate": 7.675587996755881e-06, "loss": 0.6207, "step": 22421 }, { "epoch": 0.6546378207935535, "grad_norm": 0.5730845669822415, "learning_rate": 7.674939172749393e-06, "loss": 0.5166, "step": 22422 }, { "epoch": 0.6546670170214008, "grad_norm": 0.6385113012165464, "learning_rate": 7.674290348742905e-06, "loss": 0.6156, "step": 22423 }, { "epoch": 0.6546962132492482, "grad_norm": 0.5996595382059154, "learning_rate": 7.673641524736415e-06, "loss": 0.5664, "step": 22424 }, { "epoch": 0.6547254094770956, "grad_norm": 0.6003435837776218, "learning_rate": 7.672992700729928e-06, "loss": 0.5704, "step": 22425 }, { "epoch": 0.6547546057049429, "grad_norm": 0.6553027105545853, "learning_rate": 7.67234387672344e-06, "loss": 0.5893, "step": 22426 }, { "epoch": 0.6547838019327903, "grad_norm": 0.7015740109225089, "learning_rate": 7.671695052716952e-06, "loss": 0.6538, "step": 22427 }, { "epoch": 0.6548129981606376, "grad_norm": 0.6072057875528007, "learning_rate": 7.671046228710462e-06, "loss": 0.5314, "step": 22428 }, { "epoch": 0.654842194388485, "grad_norm": 0.6994616606946203, "learning_rate": 7.670397404703974e-06, "loss": 0.696, "step": 22429 }, { "epoch": 0.6548713906163324, "grad_norm": 0.6372526259102437, "learning_rate": 7.669748580697488e-06, "loss": 0.6088, "step": 22430 }, { "epoch": 0.6549005868441797, "grad_norm": 0.6946764209432161, "learning_rate": 7.669099756690998e-06, "loss": 0.6769, "step": 22431 }, { "epoch": 0.6549297830720271, "grad_norm": 0.6609455388606679, "learning_rate": 7.66845093268451e-06, "loss": 0.6086, "step": 22432 }, { "epoch": 0.6549589792998745, "grad_norm": 0.6602001559540484, "learning_rate": 7.667802108678022e-06, "loss": 0.637, "step": 22433 }, { "epoch": 0.6549881755277218, "grad_norm": 0.6297076212101621, "learning_rate": 7.667153284671534e-06, "loss": 0.5798, "step": 22434 }, { "epoch": 0.6550173717555692, "grad_norm": 0.6254426937407748, "learning_rate": 7.666504460665044e-06, "loss": 0.5944, "step": 22435 }, { "epoch": 0.6550465679834165, "grad_norm": 0.643233152649651, "learning_rate": 7.665855636658556e-06, "loss": 0.5982, "step": 22436 }, { "epoch": 0.6550757642112639, "grad_norm": 0.6576950805510762, "learning_rate": 7.665206812652068e-06, "loss": 0.6473, "step": 22437 }, { "epoch": 0.6551049604391113, "grad_norm": 0.6056809928053681, "learning_rate": 7.66455798864558e-06, "loss": 0.5757, "step": 22438 }, { "epoch": 0.6551341566669586, "grad_norm": 0.6914615084722097, "learning_rate": 7.663909164639093e-06, "loss": 0.6828, "step": 22439 }, { "epoch": 0.655163352894806, "grad_norm": 0.6624363123897369, "learning_rate": 7.663260340632605e-06, "loss": 0.6198, "step": 22440 }, { "epoch": 0.6551925491226533, "grad_norm": 0.6335256154384129, "learning_rate": 7.662611516626117e-06, "loss": 0.573, "step": 22441 }, { "epoch": 0.6552217453505007, "grad_norm": 0.6329115957740814, "learning_rate": 7.661962692619629e-06, "loss": 0.5673, "step": 22442 }, { "epoch": 0.6552509415783481, "grad_norm": 0.6735048430517808, "learning_rate": 7.661313868613139e-06, "loss": 0.5794, "step": 22443 }, { "epoch": 0.6552801378061954, "grad_norm": 0.6387397524341062, "learning_rate": 7.660665044606651e-06, "loss": 0.5876, "step": 22444 }, { "epoch": 0.6553093340340428, "grad_norm": 0.6723749294407936, "learning_rate": 7.660016220600163e-06, "loss": 0.6312, "step": 22445 }, { "epoch": 0.6553385302618902, "grad_norm": 0.6458416442612337, "learning_rate": 7.659367396593675e-06, "loss": 0.5911, "step": 22446 }, { "epoch": 0.6553677264897375, "grad_norm": 0.6290207256013401, "learning_rate": 7.658718572587185e-06, "loss": 0.5793, "step": 22447 }, { "epoch": 0.6553969227175849, "grad_norm": 0.6068732267481639, "learning_rate": 7.658069748580697e-06, "loss": 0.5858, "step": 22448 }, { "epoch": 0.6554261189454322, "grad_norm": 0.5938622153217903, "learning_rate": 7.657420924574211e-06, "loss": 0.5465, "step": 22449 }, { "epoch": 0.6554553151732796, "grad_norm": 0.625770994823601, "learning_rate": 7.656772100567721e-06, "loss": 0.5696, "step": 22450 }, { "epoch": 0.655484511401127, "grad_norm": 0.685524431122082, "learning_rate": 7.656123276561233e-06, "loss": 0.6114, "step": 22451 }, { "epoch": 0.6555137076289743, "grad_norm": 0.61496257200848, "learning_rate": 7.655474452554745e-06, "loss": 0.5656, "step": 22452 }, { "epoch": 0.6555429038568217, "grad_norm": 0.633261561454991, "learning_rate": 7.654825628548257e-06, "loss": 0.584, "step": 22453 }, { "epoch": 0.655572100084669, "grad_norm": 0.6506848484241538, "learning_rate": 7.654176804541768e-06, "loss": 0.5738, "step": 22454 }, { "epoch": 0.6556012963125164, "grad_norm": 0.6742377360161719, "learning_rate": 7.65352798053528e-06, "loss": 0.6881, "step": 22455 }, { "epoch": 0.6556304925403638, "grad_norm": 0.6516716877289738, "learning_rate": 7.652879156528792e-06, "loss": 0.6179, "step": 22456 }, { "epoch": 0.6556596887682111, "grad_norm": 0.644003958315679, "learning_rate": 7.652230332522304e-06, "loss": 0.6019, "step": 22457 }, { "epoch": 0.6556888849960585, "grad_norm": 0.6292352497002885, "learning_rate": 7.651581508515816e-06, "loss": 0.6057, "step": 22458 }, { "epoch": 0.6557180812239058, "grad_norm": 0.6522878322138255, "learning_rate": 7.650932684509328e-06, "loss": 0.5624, "step": 22459 }, { "epoch": 0.6557472774517532, "grad_norm": 0.5996820312100616, "learning_rate": 7.65028386050284e-06, "loss": 0.525, "step": 22460 }, { "epoch": 0.6557764736796006, "grad_norm": 0.6460328061682966, "learning_rate": 7.649635036496352e-06, "loss": 0.5989, "step": 22461 }, { "epoch": 0.6558056699074479, "grad_norm": 0.622342660875891, "learning_rate": 7.648986212489862e-06, "loss": 0.6111, "step": 22462 }, { "epoch": 0.6558348661352953, "grad_norm": 0.6446502442996994, "learning_rate": 7.648337388483374e-06, "loss": 0.6367, "step": 22463 }, { "epoch": 0.6558640623631427, "grad_norm": 0.7196293211025497, "learning_rate": 7.647688564476886e-06, "loss": 0.6787, "step": 22464 }, { "epoch": 0.65589325859099, "grad_norm": 0.5953804647929105, "learning_rate": 7.647039740470398e-06, "loss": 0.5582, "step": 22465 }, { "epoch": 0.6559224548188374, "grad_norm": 0.6416276696323536, "learning_rate": 7.646390916463909e-06, "loss": 0.612, "step": 22466 }, { "epoch": 0.6559516510466847, "grad_norm": 0.6652119808756273, "learning_rate": 7.64574209245742e-06, "loss": 0.5593, "step": 22467 }, { "epoch": 0.6559808472745321, "grad_norm": 0.6509569255266131, "learning_rate": 7.645093268450935e-06, "loss": 0.5753, "step": 22468 }, { "epoch": 0.6560100435023795, "grad_norm": 0.5700634268401854, "learning_rate": 7.644444444444445e-06, "loss": 0.5072, "step": 22469 }, { "epoch": 0.6560392397302268, "grad_norm": 0.6482906925911208, "learning_rate": 7.643795620437957e-06, "loss": 0.6049, "step": 22470 }, { "epoch": 0.6560684359580742, "grad_norm": 0.6381625029347238, "learning_rate": 7.643146796431469e-06, "loss": 0.5579, "step": 22471 }, { "epoch": 0.6560976321859215, "grad_norm": 0.6539618158398524, "learning_rate": 7.642497972424981e-06, "loss": 0.6661, "step": 22472 }, { "epoch": 0.6561268284137689, "grad_norm": 0.6319100057212724, "learning_rate": 7.641849148418491e-06, "loss": 0.5759, "step": 22473 }, { "epoch": 0.6561560246416163, "grad_norm": 0.6489410784523182, "learning_rate": 7.641200324412003e-06, "loss": 0.6084, "step": 22474 }, { "epoch": 0.6561852208694636, "grad_norm": 0.6052446056710183, "learning_rate": 7.640551500405515e-06, "loss": 0.5383, "step": 22475 }, { "epoch": 0.656214417097311, "grad_norm": 0.6196698260555157, "learning_rate": 7.639902676399027e-06, "loss": 0.5707, "step": 22476 }, { "epoch": 0.6562436133251583, "grad_norm": 0.6437923299353383, "learning_rate": 7.63925385239254e-06, "loss": 0.5691, "step": 22477 }, { "epoch": 0.6562728095530057, "grad_norm": 0.6205638753475932, "learning_rate": 7.638605028386051e-06, "loss": 0.6083, "step": 22478 }, { "epoch": 0.6563020057808531, "grad_norm": 0.6664526727590845, "learning_rate": 7.637956204379563e-06, "loss": 0.6164, "step": 22479 }, { "epoch": 0.6563312020087004, "grad_norm": 0.5843140802040304, "learning_rate": 7.637307380373075e-06, "loss": 0.5433, "step": 22480 }, { "epoch": 0.6563603982365478, "grad_norm": 0.6447516364577539, "learning_rate": 7.636658556366586e-06, "loss": 0.6393, "step": 22481 }, { "epoch": 0.6563895944643952, "grad_norm": 0.6702026164814044, "learning_rate": 7.636009732360098e-06, "loss": 0.6265, "step": 22482 }, { "epoch": 0.6564187906922425, "grad_norm": 0.5726641118333553, "learning_rate": 7.63536090835361e-06, "loss": 0.4987, "step": 22483 }, { "epoch": 0.6564479869200899, "grad_norm": 0.624450628015729, "learning_rate": 7.634712084347122e-06, "loss": 0.6098, "step": 22484 }, { "epoch": 0.6564771831479372, "grad_norm": 0.6452264608296833, "learning_rate": 7.634063260340632e-06, "loss": 0.574, "step": 22485 }, { "epoch": 0.6565063793757846, "grad_norm": 0.680124862837905, "learning_rate": 7.633414436334144e-06, "loss": 0.694, "step": 22486 }, { "epoch": 0.656535575603632, "grad_norm": 0.6088226890918926, "learning_rate": 7.632765612327658e-06, "loss": 0.5517, "step": 22487 }, { "epoch": 0.6565647718314793, "grad_norm": 0.6554627975157931, "learning_rate": 7.632116788321168e-06, "loss": 0.6222, "step": 22488 }, { "epoch": 0.6565939680593267, "grad_norm": 0.6258301748387766, "learning_rate": 7.63146796431468e-06, "loss": 0.5883, "step": 22489 }, { "epoch": 0.656623164287174, "grad_norm": 0.6106467176383579, "learning_rate": 7.630819140308192e-06, "loss": 0.5201, "step": 22490 }, { "epoch": 0.6566523605150214, "grad_norm": 0.620005265830839, "learning_rate": 7.630170316301704e-06, "loss": 0.5263, "step": 22491 }, { "epoch": 0.6566815567428688, "grad_norm": 0.6774674782340578, "learning_rate": 7.629521492295215e-06, "loss": 0.652, "step": 22492 }, { "epoch": 0.6567107529707162, "grad_norm": 0.6175529733684656, "learning_rate": 7.628872668288727e-06, "loss": 0.6032, "step": 22493 }, { "epoch": 0.6567399491985636, "grad_norm": 0.6498365813985242, "learning_rate": 7.628223844282239e-06, "loss": 0.5931, "step": 22494 }, { "epoch": 0.656769145426411, "grad_norm": 0.6526627449845265, "learning_rate": 7.627575020275751e-06, "loss": 0.6057, "step": 22495 }, { "epoch": 0.6567983416542583, "grad_norm": 0.616306311158254, "learning_rate": 7.626926196269263e-06, "loss": 0.4935, "step": 22496 }, { "epoch": 0.6568275378821057, "grad_norm": 0.6337137614642141, "learning_rate": 7.626277372262775e-06, "loss": 0.5549, "step": 22497 }, { "epoch": 0.656856734109953, "grad_norm": 0.6445750119051732, "learning_rate": 7.625628548256286e-06, "loss": 0.6088, "step": 22498 }, { "epoch": 0.6568859303378004, "grad_norm": 0.6246740134072429, "learning_rate": 7.624979724249798e-06, "loss": 0.5835, "step": 22499 }, { "epoch": 0.6569151265656478, "grad_norm": 0.6403256147386918, "learning_rate": 7.62433090024331e-06, "loss": 0.6039, "step": 22500 }, { "epoch": 0.6569443227934951, "grad_norm": 0.6530765748620269, "learning_rate": 7.623682076236821e-06, "loss": 0.6253, "step": 22501 }, { "epoch": 0.6569735190213425, "grad_norm": 0.6589810157560221, "learning_rate": 7.623033252230333e-06, "loss": 0.6055, "step": 22502 }, { "epoch": 0.6570027152491899, "grad_norm": 0.606444484497838, "learning_rate": 7.6223844282238445e-06, "loss": 0.5406, "step": 22503 }, { "epoch": 0.6570319114770372, "grad_norm": 0.6071945759256622, "learning_rate": 7.6217356042173565e-06, "loss": 0.5205, "step": 22504 }, { "epoch": 0.6570611077048846, "grad_norm": 0.7206913966484838, "learning_rate": 7.621086780210868e-06, "loss": 0.6828, "step": 22505 }, { "epoch": 0.6570903039327319, "grad_norm": 0.6879223241217995, "learning_rate": 7.6204379562043805e-06, "loss": 0.6184, "step": 22506 }, { "epoch": 0.6571195001605793, "grad_norm": 0.6256589886573849, "learning_rate": 7.6197891321978926e-06, "loss": 0.536, "step": 22507 }, { "epoch": 0.6571486963884267, "grad_norm": 0.6311139436127035, "learning_rate": 7.619140308191404e-06, "loss": 0.5801, "step": 22508 }, { "epoch": 0.657177892616274, "grad_norm": 0.6423551817085852, "learning_rate": 7.618491484184916e-06, "loss": 0.5911, "step": 22509 }, { "epoch": 0.6572070888441214, "grad_norm": 0.6272766014767395, "learning_rate": 7.617842660178427e-06, "loss": 0.5708, "step": 22510 }, { "epoch": 0.6572362850719687, "grad_norm": 0.5973690751878264, "learning_rate": 7.617193836171939e-06, "loss": 0.5372, "step": 22511 }, { "epoch": 0.6572654812998161, "grad_norm": 0.6360024953207816, "learning_rate": 7.61654501216545e-06, "loss": 0.5854, "step": 22512 }, { "epoch": 0.6572946775276635, "grad_norm": 0.6757809427442226, "learning_rate": 7.615896188158962e-06, "loss": 0.5882, "step": 22513 }, { "epoch": 0.6573238737555108, "grad_norm": 0.6107488502679598, "learning_rate": 7.615247364152473e-06, "loss": 0.5226, "step": 22514 }, { "epoch": 0.6573530699833582, "grad_norm": 0.670822307178729, "learning_rate": 7.614598540145986e-06, "loss": 0.6407, "step": 22515 }, { "epoch": 0.6573822662112055, "grad_norm": 0.6910385670822846, "learning_rate": 7.613949716139498e-06, "loss": 0.6693, "step": 22516 }, { "epoch": 0.6574114624390529, "grad_norm": 0.603330560234004, "learning_rate": 7.6133008921330095e-06, "loss": 0.5019, "step": 22517 }, { "epoch": 0.6574406586669003, "grad_norm": 0.5955016944981577, "learning_rate": 7.6126520681265215e-06, "loss": 0.5317, "step": 22518 }, { "epoch": 0.6574698548947476, "grad_norm": 0.6253236090422012, "learning_rate": 7.6120032441200335e-06, "loss": 0.5969, "step": 22519 }, { "epoch": 0.657499051122595, "grad_norm": 0.5914642240099544, "learning_rate": 7.611354420113545e-06, "loss": 0.5519, "step": 22520 }, { "epoch": 0.6575282473504424, "grad_norm": 0.7291233095325265, "learning_rate": 7.610705596107057e-06, "loss": 0.6558, "step": 22521 }, { "epoch": 0.6575574435782897, "grad_norm": 0.6827482957517497, "learning_rate": 7.610056772100568e-06, "loss": 0.6939, "step": 22522 }, { "epoch": 0.6575866398061371, "grad_norm": 0.6575535252508493, "learning_rate": 7.60940794809408e-06, "loss": 0.6004, "step": 22523 }, { "epoch": 0.6576158360339844, "grad_norm": 0.659359286879242, "learning_rate": 7.608759124087591e-06, "loss": 0.6058, "step": 22524 }, { "epoch": 0.6576450322618318, "grad_norm": 0.6700123048021139, "learning_rate": 7.608110300081104e-06, "loss": 0.6006, "step": 22525 }, { "epoch": 0.6576742284896792, "grad_norm": 0.6546774211884159, "learning_rate": 7.607461476074616e-06, "loss": 0.5965, "step": 22526 }, { "epoch": 0.6577034247175265, "grad_norm": 0.6429994918160586, "learning_rate": 7.606812652068127e-06, "loss": 0.5981, "step": 22527 }, { "epoch": 0.6577326209453739, "grad_norm": 0.5991196624009847, "learning_rate": 7.606163828061639e-06, "loss": 0.5632, "step": 22528 }, { "epoch": 0.6577618171732212, "grad_norm": 0.6996550940363354, "learning_rate": 7.60551500405515e-06, "loss": 0.6744, "step": 22529 }, { "epoch": 0.6577910134010686, "grad_norm": 0.6574135423870334, "learning_rate": 7.604866180048662e-06, "loss": 0.6222, "step": 22530 }, { "epoch": 0.657820209628916, "grad_norm": 0.6424834060823842, "learning_rate": 7.604217356042174e-06, "loss": 0.598, "step": 22531 }, { "epoch": 0.6578494058567633, "grad_norm": 0.6673360819435614, "learning_rate": 7.603568532035686e-06, "loss": 0.6529, "step": 22532 }, { "epoch": 0.6578786020846107, "grad_norm": 0.6608404209644109, "learning_rate": 7.602919708029197e-06, "loss": 0.6097, "step": 22533 }, { "epoch": 0.657907798312458, "grad_norm": 0.6046363476846575, "learning_rate": 7.60227088402271e-06, "loss": 0.5694, "step": 22534 }, { "epoch": 0.6579369945403054, "grad_norm": 0.6539024150237597, "learning_rate": 7.601622060016222e-06, "loss": 0.6061, "step": 22535 }, { "epoch": 0.6579661907681528, "grad_norm": 0.6519415667667718, "learning_rate": 7.600973236009733e-06, "loss": 0.5928, "step": 22536 }, { "epoch": 0.6579953869960001, "grad_norm": 0.6129108668755332, "learning_rate": 7.600324412003245e-06, "loss": 0.5407, "step": 22537 }, { "epoch": 0.6580245832238475, "grad_norm": 0.6629094951408551, "learning_rate": 7.599675587996757e-06, "loss": 0.631, "step": 22538 }, { "epoch": 0.6580537794516949, "grad_norm": 0.6999936568825833, "learning_rate": 7.599026763990268e-06, "loss": 0.6528, "step": 22539 }, { "epoch": 0.6580829756795422, "grad_norm": 0.6161664239110796, "learning_rate": 7.59837793998378e-06, "loss": 0.5293, "step": 22540 }, { "epoch": 0.6581121719073896, "grad_norm": 0.6574900156462299, "learning_rate": 7.597729115977291e-06, "loss": 0.6359, "step": 22541 }, { "epoch": 0.6581413681352369, "grad_norm": 0.6695277075820376, "learning_rate": 7.597080291970803e-06, "loss": 0.5918, "step": 22542 }, { "epoch": 0.6581705643630843, "grad_norm": 0.6781475009694551, "learning_rate": 7.596431467964316e-06, "loss": 0.5576, "step": 22543 }, { "epoch": 0.6581997605909317, "grad_norm": 0.6388668556985081, "learning_rate": 7.595782643957827e-06, "loss": 0.5976, "step": 22544 }, { "epoch": 0.658228956818779, "grad_norm": 0.6692780478187408, "learning_rate": 7.5951338199513394e-06, "loss": 0.6113, "step": 22545 }, { "epoch": 0.6582581530466264, "grad_norm": 0.6311473062448155, "learning_rate": 7.594484995944851e-06, "loss": 0.5644, "step": 22546 }, { "epoch": 0.6582873492744737, "grad_norm": 0.6543325517575034, "learning_rate": 7.593836171938363e-06, "loss": 0.6114, "step": 22547 }, { "epoch": 0.6583165455023211, "grad_norm": 0.6232317521164904, "learning_rate": 7.593187347931874e-06, "loss": 0.584, "step": 22548 }, { "epoch": 0.6583457417301685, "grad_norm": 0.6471846909245084, "learning_rate": 7.592538523925386e-06, "loss": 0.6141, "step": 22549 }, { "epoch": 0.6583749379580158, "grad_norm": 0.6460899872728223, "learning_rate": 7.591889699918897e-06, "loss": 0.6001, "step": 22550 }, { "epoch": 0.6584041341858632, "grad_norm": 0.6359068300128184, "learning_rate": 7.591240875912409e-06, "loss": 0.5786, "step": 22551 }, { "epoch": 0.6584333304137105, "grad_norm": 0.5760443051939065, "learning_rate": 7.59059205190592e-06, "loss": 0.5158, "step": 22552 }, { "epoch": 0.6584625266415579, "grad_norm": 0.586142121572263, "learning_rate": 7.589943227899433e-06, "loss": 0.537, "step": 22553 }, { "epoch": 0.6584917228694053, "grad_norm": 0.6544556298314713, "learning_rate": 7.589294403892945e-06, "loss": 0.6407, "step": 22554 }, { "epoch": 0.6585209190972526, "grad_norm": 0.6462476615004261, "learning_rate": 7.588645579886456e-06, "loss": 0.6162, "step": 22555 }, { "epoch": 0.6585501153251, "grad_norm": 0.6628568600119658, "learning_rate": 7.587996755879968e-06, "loss": 0.635, "step": 22556 }, { "epoch": 0.6585793115529474, "grad_norm": 0.689402814235438, "learning_rate": 7.58734793187348e-06, "loss": 0.5744, "step": 22557 }, { "epoch": 0.6586085077807947, "grad_norm": 0.6306901390747214, "learning_rate": 7.5866991078669916e-06, "loss": 0.5647, "step": 22558 }, { "epoch": 0.6586377040086421, "grad_norm": 0.6586905187879235, "learning_rate": 7.586050283860504e-06, "loss": 0.6324, "step": 22559 }, { "epoch": 0.6586669002364894, "grad_norm": 0.6267271101749871, "learning_rate": 7.585401459854015e-06, "loss": 0.5107, "step": 22560 }, { "epoch": 0.6586960964643368, "grad_norm": 0.6688777997217489, "learning_rate": 7.584752635847527e-06, "loss": 0.6711, "step": 22561 }, { "epoch": 0.6587252926921842, "grad_norm": 0.6363538164503131, "learning_rate": 7.58410381184104e-06, "loss": 0.5802, "step": 22562 }, { "epoch": 0.6587544889200315, "grad_norm": 0.6485569675245108, "learning_rate": 7.583454987834551e-06, "loss": 0.6228, "step": 22563 }, { "epoch": 0.6587836851478789, "grad_norm": 0.6254109653974447, "learning_rate": 7.582806163828063e-06, "loss": 0.5794, "step": 22564 }, { "epoch": 0.6588128813757262, "grad_norm": 0.6706625929709166, "learning_rate": 7.582157339821574e-06, "loss": 0.6849, "step": 22565 }, { "epoch": 0.6588420776035736, "grad_norm": 0.614646100796519, "learning_rate": 7.581508515815086e-06, "loss": 0.5505, "step": 22566 }, { "epoch": 0.658871273831421, "grad_norm": 0.6873433661174273, "learning_rate": 7.580859691808597e-06, "loss": 0.6341, "step": 22567 }, { "epoch": 0.6589004700592683, "grad_norm": 0.5952175077864249, "learning_rate": 7.580210867802109e-06, "loss": 0.5392, "step": 22568 }, { "epoch": 0.6589296662871157, "grad_norm": 0.628258992599524, "learning_rate": 7.5795620437956205e-06, "loss": 0.5678, "step": 22569 }, { "epoch": 0.658958862514963, "grad_norm": 0.6117051260017274, "learning_rate": 7.5789132197891325e-06, "loss": 0.5502, "step": 22570 }, { "epoch": 0.6589880587428104, "grad_norm": 0.686731530444182, "learning_rate": 7.578264395782644e-06, "loss": 0.6806, "step": 22571 }, { "epoch": 0.6590172549706578, "grad_norm": 0.6447731041271593, "learning_rate": 7.5776155717761565e-06, "loss": 0.6443, "step": 22572 }, { "epoch": 0.6590464511985051, "grad_norm": 0.7050378964915899, "learning_rate": 7.5769667477696686e-06, "loss": 0.6485, "step": 22573 }, { "epoch": 0.6590756474263525, "grad_norm": 0.6205075546272665, "learning_rate": 7.57631792376318e-06, "loss": 0.5659, "step": 22574 }, { "epoch": 0.6591048436541999, "grad_norm": 0.6682250122813836, "learning_rate": 7.575669099756692e-06, "loss": 0.6006, "step": 22575 }, { "epoch": 0.6591340398820472, "grad_norm": 0.6378705400067387, "learning_rate": 7.575020275750203e-06, "loss": 0.5954, "step": 22576 }, { "epoch": 0.6591632361098946, "grad_norm": 0.6626433283219755, "learning_rate": 7.574371451743715e-06, "loss": 0.647, "step": 22577 }, { "epoch": 0.6591924323377419, "grad_norm": 0.6669913212849922, "learning_rate": 7.573722627737227e-06, "loss": 0.6063, "step": 22578 }, { "epoch": 0.6592216285655893, "grad_norm": 0.6452029371109717, "learning_rate": 7.573073803730738e-06, "loss": 0.5849, "step": 22579 }, { "epoch": 0.6592508247934367, "grad_norm": 0.6370513761902885, "learning_rate": 7.57242497972425e-06, "loss": 0.6137, "step": 22580 }, { "epoch": 0.659280021021284, "grad_norm": 0.6231599058463163, "learning_rate": 7.571776155717763e-06, "loss": 0.6072, "step": 22581 }, { "epoch": 0.6593092172491314, "grad_norm": 0.7052265297864404, "learning_rate": 7.571127331711274e-06, "loss": 0.5668, "step": 22582 }, { "epoch": 0.6593384134769787, "grad_norm": 0.574216179187129, "learning_rate": 7.570478507704786e-06, "loss": 0.441, "step": 22583 }, { "epoch": 0.6593676097048261, "grad_norm": 0.5952593776045593, "learning_rate": 7.5698296836982975e-06, "loss": 0.5113, "step": 22584 }, { "epoch": 0.6593968059326735, "grad_norm": 0.6122392922195199, "learning_rate": 7.5691808596918095e-06, "loss": 0.5479, "step": 22585 }, { "epoch": 0.6594260021605208, "grad_norm": 0.6814502531491397, "learning_rate": 7.568532035685321e-06, "loss": 0.6545, "step": 22586 }, { "epoch": 0.6594551983883682, "grad_norm": 0.6838591323311242, "learning_rate": 7.567883211678833e-06, "loss": 0.6318, "step": 22587 }, { "epoch": 0.6594843946162156, "grad_norm": 0.6377714164874904, "learning_rate": 7.567234387672344e-06, "loss": 0.6029, "step": 22588 }, { "epoch": 0.6595135908440629, "grad_norm": 0.683870087714161, "learning_rate": 7.566585563665856e-06, "loss": 0.6662, "step": 22589 }, { "epoch": 0.6595427870719103, "grad_norm": 0.6986947158265198, "learning_rate": 7.565936739659367e-06, "loss": 0.7278, "step": 22590 }, { "epoch": 0.6595719832997576, "grad_norm": 0.6167453840231005, "learning_rate": 7.56528791565288e-06, "loss": 0.5664, "step": 22591 }, { "epoch": 0.659601179527605, "grad_norm": 0.6036399921964685, "learning_rate": 7.564639091646392e-06, "loss": 0.5532, "step": 22592 }, { "epoch": 0.6596303757554524, "grad_norm": 0.6065296341839399, "learning_rate": 7.563990267639903e-06, "loss": 0.553, "step": 22593 }, { "epoch": 0.6596595719832997, "grad_norm": 0.6592439073939713, "learning_rate": 7.563341443633415e-06, "loss": 0.6399, "step": 22594 }, { "epoch": 0.6596887682111471, "grad_norm": 0.6340617383860391, "learning_rate": 7.562692619626926e-06, "loss": 0.5947, "step": 22595 }, { "epoch": 0.6597179644389944, "grad_norm": 0.6220279967213771, "learning_rate": 7.5620437956204384e-06, "loss": 0.5953, "step": 22596 }, { "epoch": 0.6597471606668418, "grad_norm": 0.6740293247196044, "learning_rate": 7.5613949716139505e-06, "loss": 0.6269, "step": 22597 }, { "epoch": 0.6597763568946892, "grad_norm": 0.6275702768605608, "learning_rate": 7.560746147607462e-06, "loss": 0.6127, "step": 22598 }, { "epoch": 0.6598055531225365, "grad_norm": 0.6853397062108808, "learning_rate": 7.560097323600974e-06, "loss": 0.6648, "step": 22599 }, { "epoch": 0.6598347493503839, "grad_norm": 0.6563586402143927, "learning_rate": 7.5594484995944865e-06, "loss": 0.6151, "step": 22600 }, { "epoch": 0.6598639455782312, "grad_norm": 0.6513234870063765, "learning_rate": 7.558799675587998e-06, "loss": 0.6058, "step": 22601 }, { "epoch": 0.6598931418060786, "grad_norm": 0.6567605863812641, "learning_rate": 7.55815085158151e-06, "loss": 0.6151, "step": 22602 }, { "epoch": 0.659922338033926, "grad_norm": 0.6478124663838212, "learning_rate": 7.557502027575021e-06, "loss": 0.5853, "step": 22603 }, { "epoch": 0.6599515342617733, "grad_norm": 0.7104214987759595, "learning_rate": 7.556853203568533e-06, "loss": 0.6657, "step": 22604 }, { "epoch": 0.6599807304896207, "grad_norm": 0.637858422973144, "learning_rate": 7.556204379562044e-06, "loss": 0.5837, "step": 22605 }, { "epoch": 0.660009926717468, "grad_norm": 0.681923939094577, "learning_rate": 7.555555555555556e-06, "loss": 0.6341, "step": 22606 }, { "epoch": 0.6600391229453154, "grad_norm": 0.6542551217274704, "learning_rate": 7.554906731549067e-06, "loss": 0.6063, "step": 22607 }, { "epoch": 0.6600683191731628, "grad_norm": 0.6532346500551268, "learning_rate": 7.554257907542579e-06, "loss": 0.6352, "step": 22608 }, { "epoch": 0.6600975154010101, "grad_norm": 0.6615993984092842, "learning_rate": 7.5536090835360905e-06, "loss": 0.6787, "step": 22609 }, { "epoch": 0.6601267116288575, "grad_norm": 0.6731831189980575, "learning_rate": 7.552960259529603e-06, "loss": 0.6519, "step": 22610 }, { "epoch": 0.6601559078567049, "grad_norm": 0.5966621151886525, "learning_rate": 7.5523114355231154e-06, "loss": 0.5388, "step": 22611 }, { "epoch": 0.6601851040845522, "grad_norm": 0.5997510379344002, "learning_rate": 7.551662611516627e-06, "loss": 0.547, "step": 22612 }, { "epoch": 0.6602143003123997, "grad_norm": 0.6669954869282861, "learning_rate": 7.551013787510139e-06, "loss": 0.6566, "step": 22613 }, { "epoch": 0.660243496540247, "grad_norm": 0.6719603938976785, "learning_rate": 7.55036496350365e-06, "loss": 0.623, "step": 22614 }, { "epoch": 0.6602726927680944, "grad_norm": 0.6652055871486405, "learning_rate": 7.549716139497162e-06, "loss": 0.6422, "step": 22615 }, { "epoch": 0.6603018889959418, "grad_norm": 0.6070503629798338, "learning_rate": 7.549067315490674e-06, "loss": 0.5182, "step": 22616 }, { "epoch": 0.6603310852237891, "grad_norm": 0.6092356356068296, "learning_rate": 7.548418491484185e-06, "loss": 0.5692, "step": 22617 }, { "epoch": 0.6603602814516365, "grad_norm": 0.6185092364551792, "learning_rate": 7.547769667477697e-06, "loss": 0.5642, "step": 22618 }, { "epoch": 0.6603894776794839, "grad_norm": 0.6559517352528441, "learning_rate": 7.54712084347121e-06, "loss": 0.6229, "step": 22619 }, { "epoch": 0.6604186739073312, "grad_norm": 0.701433354212712, "learning_rate": 7.546472019464721e-06, "loss": 0.6604, "step": 22620 }, { "epoch": 0.6604478701351786, "grad_norm": 0.631469104020101, "learning_rate": 7.545823195458233e-06, "loss": 0.5442, "step": 22621 }, { "epoch": 0.6604770663630259, "grad_norm": 0.6565948190851881, "learning_rate": 7.545174371451744e-06, "loss": 0.587, "step": 22622 }, { "epoch": 0.6605062625908733, "grad_norm": 0.6307904327738051, "learning_rate": 7.544525547445256e-06, "loss": 0.5936, "step": 22623 }, { "epoch": 0.6605354588187207, "grad_norm": 0.6455017400581161, "learning_rate": 7.5438767234387676e-06, "loss": 0.6262, "step": 22624 }, { "epoch": 0.660564655046568, "grad_norm": 0.6778618039950376, "learning_rate": 7.54322789943228e-06, "loss": 0.6297, "step": 22625 }, { "epoch": 0.6605938512744154, "grad_norm": 0.5954101410003119, "learning_rate": 7.542579075425791e-06, "loss": 0.5493, "step": 22626 }, { "epoch": 0.6606230475022628, "grad_norm": 0.6707991789335107, "learning_rate": 7.541930251419303e-06, "loss": 0.6292, "step": 22627 }, { "epoch": 0.6606522437301101, "grad_norm": 0.7324364539211813, "learning_rate": 7.541281427412816e-06, "loss": 0.7053, "step": 22628 }, { "epoch": 0.6606814399579575, "grad_norm": 0.7029064564708464, "learning_rate": 7.540632603406327e-06, "loss": 0.6919, "step": 22629 }, { "epoch": 0.6607106361858048, "grad_norm": 0.659655198076234, "learning_rate": 7.539983779399839e-06, "loss": 0.5749, "step": 22630 }, { "epoch": 0.6607398324136522, "grad_norm": 0.6280939698888398, "learning_rate": 7.53933495539335e-06, "loss": 0.6086, "step": 22631 }, { "epoch": 0.6607690286414996, "grad_norm": 0.6146785654583032, "learning_rate": 7.538686131386862e-06, "loss": 0.5957, "step": 22632 }, { "epoch": 0.6607982248693469, "grad_norm": 0.7075249963695684, "learning_rate": 7.538037307380373e-06, "loss": 0.6534, "step": 22633 }, { "epoch": 0.6608274210971943, "grad_norm": 0.9651779908185842, "learning_rate": 7.537388483373885e-06, "loss": 0.64, "step": 22634 }, { "epoch": 0.6608566173250416, "grad_norm": 0.6336530662820294, "learning_rate": 7.536739659367397e-06, "loss": 0.5836, "step": 22635 }, { "epoch": 0.660885813552889, "grad_norm": 0.6833774157254824, "learning_rate": 7.5360908353609085e-06, "loss": 0.6643, "step": 22636 }, { "epoch": 0.6609150097807364, "grad_norm": 0.6017529263258375, "learning_rate": 7.5354420113544205e-06, "loss": 0.5212, "step": 22637 }, { "epoch": 0.6609442060085837, "grad_norm": 0.6087081974353632, "learning_rate": 7.5347931873479326e-06, "loss": 0.5572, "step": 22638 }, { "epoch": 0.6609734022364311, "grad_norm": 0.678501031380377, "learning_rate": 7.534144363341445e-06, "loss": 0.6689, "step": 22639 }, { "epoch": 0.6610025984642784, "grad_norm": 0.7184862404082352, "learning_rate": 7.533495539334957e-06, "loss": 0.6552, "step": 22640 }, { "epoch": 0.6610317946921258, "grad_norm": 0.6352827981155864, "learning_rate": 7.532846715328468e-06, "loss": 0.587, "step": 22641 }, { "epoch": 0.6610609909199732, "grad_norm": 0.6238538896276395, "learning_rate": 7.53219789132198e-06, "loss": 0.5617, "step": 22642 }, { "epoch": 0.6610901871478205, "grad_norm": 0.6517185351582842, "learning_rate": 7.531549067315491e-06, "loss": 0.5911, "step": 22643 }, { "epoch": 0.6611193833756679, "grad_norm": 0.6676609757785168, "learning_rate": 7.530900243309003e-06, "loss": 0.6237, "step": 22644 }, { "epoch": 0.6611485796035153, "grad_norm": 0.6445907948506485, "learning_rate": 7.530251419302514e-06, "loss": 0.5923, "step": 22645 }, { "epoch": 0.6611777758313626, "grad_norm": 0.6737714154716753, "learning_rate": 7.529602595296026e-06, "loss": 0.6497, "step": 22646 }, { "epoch": 0.66120697205921, "grad_norm": 0.6417007212810176, "learning_rate": 7.528953771289539e-06, "loss": 0.6047, "step": 22647 }, { "epoch": 0.6612361682870573, "grad_norm": 0.5913598810754839, "learning_rate": 7.52830494728305e-06, "loss": 0.5102, "step": 22648 }, { "epoch": 0.6612653645149047, "grad_norm": 0.6505763241426071, "learning_rate": 7.527656123276562e-06, "loss": 0.5915, "step": 22649 }, { "epoch": 0.6612945607427521, "grad_norm": 0.6438552543122662, "learning_rate": 7.5270072992700735e-06, "loss": 0.6293, "step": 22650 }, { "epoch": 0.6613237569705994, "grad_norm": 0.6647033600527741, "learning_rate": 7.5263584752635855e-06, "loss": 0.6053, "step": 22651 }, { "epoch": 0.6613529531984468, "grad_norm": 0.6352711866515173, "learning_rate": 7.525709651257097e-06, "loss": 0.5701, "step": 22652 }, { "epoch": 0.6613821494262941, "grad_norm": 0.6211963316750742, "learning_rate": 7.525060827250609e-06, "loss": 0.5438, "step": 22653 }, { "epoch": 0.6614113456541415, "grad_norm": 0.6412915853988805, "learning_rate": 7.524412003244121e-06, "loss": 0.6234, "step": 22654 }, { "epoch": 0.6614405418819889, "grad_norm": 0.7331262482696178, "learning_rate": 7.523763179237632e-06, "loss": 0.7015, "step": 22655 }, { "epoch": 0.6614697381098362, "grad_norm": 0.6338153527756077, "learning_rate": 7.523114355231144e-06, "loss": 0.6175, "step": 22656 }, { "epoch": 0.6614989343376836, "grad_norm": 0.6091553489000506, "learning_rate": 7.522465531224656e-06, "loss": 0.5597, "step": 22657 }, { "epoch": 0.661528130565531, "grad_norm": 0.6252014054116498, "learning_rate": 7.521816707218168e-06, "loss": 0.6318, "step": 22658 }, { "epoch": 0.6615573267933783, "grad_norm": 0.6246849152076686, "learning_rate": 7.52116788321168e-06, "loss": 0.6011, "step": 22659 }, { "epoch": 0.6615865230212257, "grad_norm": 0.6878539495364481, "learning_rate": 7.520519059205191e-06, "loss": 0.5959, "step": 22660 }, { "epoch": 0.661615719249073, "grad_norm": 0.6700606434311354, "learning_rate": 7.519870235198703e-06, "loss": 0.652, "step": 22661 }, { "epoch": 0.6616449154769204, "grad_norm": 0.6738548728846636, "learning_rate": 7.5192214111922144e-06, "loss": 0.6303, "step": 22662 }, { "epoch": 0.6616741117047678, "grad_norm": 0.714504660046049, "learning_rate": 7.5185725871857265e-06, "loss": 0.6585, "step": 22663 }, { "epoch": 0.6617033079326151, "grad_norm": 0.7411265506740032, "learning_rate": 7.517923763179238e-06, "loss": 0.7147, "step": 22664 }, { "epoch": 0.6617325041604625, "grad_norm": 0.6393218510941658, "learning_rate": 7.51727493917275e-06, "loss": 0.6007, "step": 22665 }, { "epoch": 0.6617617003883098, "grad_norm": 0.6850717192418091, "learning_rate": 7.5166261151662625e-06, "loss": 0.6584, "step": 22666 }, { "epoch": 0.6617908966161572, "grad_norm": 0.6494907562140573, "learning_rate": 7.515977291159774e-06, "loss": 0.6469, "step": 22667 }, { "epoch": 0.6618200928440046, "grad_norm": 0.635195503316975, "learning_rate": 7.515328467153286e-06, "loss": 0.58, "step": 22668 }, { "epoch": 0.6618492890718519, "grad_norm": 0.601490755063999, "learning_rate": 7.514679643146797e-06, "loss": 0.5572, "step": 22669 }, { "epoch": 0.6618784852996993, "grad_norm": 0.6349023552232443, "learning_rate": 7.514030819140309e-06, "loss": 0.547, "step": 22670 }, { "epoch": 0.6619076815275466, "grad_norm": 0.5576365826591592, "learning_rate": 7.51338199513382e-06, "loss": 0.4873, "step": 22671 }, { "epoch": 0.661936877755394, "grad_norm": 0.5933644020365488, "learning_rate": 7.512733171127332e-06, "loss": 0.5327, "step": 22672 }, { "epoch": 0.6619660739832414, "grad_norm": 0.6099891336988398, "learning_rate": 7.512084347120844e-06, "loss": 0.5652, "step": 22673 }, { "epoch": 0.6619952702110887, "grad_norm": 0.6683662287533221, "learning_rate": 7.511435523114355e-06, "loss": 0.6518, "step": 22674 }, { "epoch": 0.6620244664389361, "grad_norm": 0.6158174655724231, "learning_rate": 7.510786699107867e-06, "loss": 0.5823, "step": 22675 }, { "epoch": 0.6620536626667834, "grad_norm": 0.6626114211004014, "learning_rate": 7.5101378751013794e-06, "loss": 0.588, "step": 22676 }, { "epoch": 0.6620828588946308, "grad_norm": 0.6326274503588802, "learning_rate": 7.5094890510948915e-06, "loss": 0.6335, "step": 22677 }, { "epoch": 0.6621120551224782, "grad_norm": 0.6565687182744517, "learning_rate": 7.5088402270884035e-06, "loss": 0.5878, "step": 22678 }, { "epoch": 0.6621412513503255, "grad_norm": 0.591671158427845, "learning_rate": 7.508191403081915e-06, "loss": 0.5212, "step": 22679 }, { "epoch": 0.6621704475781729, "grad_norm": 0.6664815062982358, "learning_rate": 7.507542579075427e-06, "loss": 0.6513, "step": 22680 }, { "epoch": 0.6621996438060203, "grad_norm": 0.6624468562971019, "learning_rate": 7.506893755068938e-06, "loss": 0.5959, "step": 22681 }, { "epoch": 0.6622288400338676, "grad_norm": 0.6551796900669923, "learning_rate": 7.50624493106245e-06, "loss": 0.6874, "step": 22682 }, { "epoch": 0.662258036261715, "grad_norm": 0.6502582674177515, "learning_rate": 7.505596107055961e-06, "loss": 0.6081, "step": 22683 }, { "epoch": 0.6622872324895623, "grad_norm": 0.5587202480092583, "learning_rate": 7.504947283049473e-06, "loss": 0.4879, "step": 22684 }, { "epoch": 0.6623164287174097, "grad_norm": 0.7713239123023268, "learning_rate": 7.504298459042986e-06, "loss": 0.7303, "step": 22685 }, { "epoch": 0.6623456249452571, "grad_norm": 0.59934523228116, "learning_rate": 7.503649635036497e-06, "loss": 0.5243, "step": 22686 }, { "epoch": 0.6623748211731044, "grad_norm": 0.6729254844945831, "learning_rate": 7.503000811030009e-06, "loss": 0.6497, "step": 22687 }, { "epoch": 0.6624040174009518, "grad_norm": 0.6619624321324583, "learning_rate": 7.50235198702352e-06, "loss": 0.5965, "step": 22688 }, { "epoch": 0.6624332136287991, "grad_norm": 0.7316077563700754, "learning_rate": 7.501703163017032e-06, "loss": 0.6921, "step": 22689 }, { "epoch": 0.6624624098566465, "grad_norm": 0.6622372954818279, "learning_rate": 7.5010543390105436e-06, "loss": 0.6183, "step": 22690 }, { "epoch": 0.6624916060844939, "grad_norm": 0.6564677189873882, "learning_rate": 7.500405515004056e-06, "loss": 0.646, "step": 22691 }, { "epoch": 0.6625208023123412, "grad_norm": 0.6768016868446728, "learning_rate": 7.499756690997568e-06, "loss": 0.5842, "step": 22692 }, { "epoch": 0.6625499985401886, "grad_norm": 0.6750394360826238, "learning_rate": 7.499107866991079e-06, "loss": 0.6283, "step": 22693 }, { "epoch": 0.662579194768036, "grad_norm": 0.6593788733121159, "learning_rate": 7.498459042984592e-06, "loss": 0.6243, "step": 22694 }, { "epoch": 0.6626083909958833, "grad_norm": 0.6851431392050739, "learning_rate": 7.497810218978103e-06, "loss": 0.6807, "step": 22695 }, { "epoch": 0.6626375872237307, "grad_norm": 0.6572348323113498, "learning_rate": 7.497161394971615e-06, "loss": 0.5727, "step": 22696 }, { "epoch": 0.662666783451578, "grad_norm": 0.6410026451371625, "learning_rate": 7.496512570965127e-06, "loss": 0.6243, "step": 22697 }, { "epoch": 0.6626959796794254, "grad_norm": 0.685263974858832, "learning_rate": 7.495863746958638e-06, "loss": 0.6334, "step": 22698 }, { "epoch": 0.6627251759072728, "grad_norm": 0.6370401496555522, "learning_rate": 7.49521492295215e-06, "loss": 0.611, "step": 22699 }, { "epoch": 0.6627543721351201, "grad_norm": 0.6590524815334532, "learning_rate": 7.494566098945661e-06, "loss": 0.645, "step": 22700 }, { "epoch": 0.6627835683629675, "grad_norm": 0.7085899428434476, "learning_rate": 7.493917274939173e-06, "loss": 0.6712, "step": 22701 }, { "epoch": 0.6628127645908148, "grad_norm": 0.6159644570982407, "learning_rate": 7.4932684509326845e-06, "loss": 0.5095, "step": 22702 }, { "epoch": 0.6628419608186622, "grad_norm": 0.6386403877451018, "learning_rate": 7.4926196269261965e-06, "loss": 0.5945, "step": 22703 }, { "epoch": 0.6628711570465096, "grad_norm": 0.6400545476611527, "learning_rate": 7.491970802919709e-06, "loss": 0.5616, "step": 22704 }, { "epoch": 0.6629003532743569, "grad_norm": 0.6477919356422895, "learning_rate": 7.491321978913221e-06, "loss": 0.6342, "step": 22705 }, { "epoch": 0.6629295495022043, "grad_norm": 0.6561104191077423, "learning_rate": 7.490673154906733e-06, "loss": 0.6343, "step": 22706 }, { "epoch": 0.6629587457300516, "grad_norm": 0.6427572025930531, "learning_rate": 7.490024330900244e-06, "loss": 0.6215, "step": 22707 }, { "epoch": 0.662987941957899, "grad_norm": 0.7494187258223851, "learning_rate": 7.489375506893756e-06, "loss": 0.6793, "step": 22708 }, { "epoch": 0.6630171381857464, "grad_norm": 0.5759144616861636, "learning_rate": 7.488726682887267e-06, "loss": 0.5042, "step": 22709 }, { "epoch": 0.6630463344135937, "grad_norm": 0.6406393039384168, "learning_rate": 7.488077858880779e-06, "loss": 0.6123, "step": 22710 }, { "epoch": 0.6630755306414411, "grad_norm": 0.6681892921710698, "learning_rate": 7.48742903487429e-06, "loss": 0.6474, "step": 22711 }, { "epoch": 0.6631047268692885, "grad_norm": 0.6249526458424852, "learning_rate": 7.486780210867802e-06, "loss": 0.5675, "step": 22712 }, { "epoch": 0.6631339230971358, "grad_norm": 0.6688390144252896, "learning_rate": 7.486131386861315e-06, "loss": 0.6792, "step": 22713 }, { "epoch": 0.6631631193249832, "grad_norm": 0.6518965340958656, "learning_rate": 7.485482562854826e-06, "loss": 0.6226, "step": 22714 }, { "epoch": 0.6631923155528305, "grad_norm": 0.6543304796351992, "learning_rate": 7.484833738848338e-06, "loss": 0.6294, "step": 22715 }, { "epoch": 0.6632215117806779, "grad_norm": 0.6456512200306199, "learning_rate": 7.48418491484185e-06, "loss": 0.5657, "step": 22716 }, { "epoch": 0.6632507080085253, "grad_norm": 0.6624571064830946, "learning_rate": 7.4835360908353615e-06, "loss": 0.6176, "step": 22717 }, { "epoch": 0.6632799042363726, "grad_norm": 0.6153627123477783, "learning_rate": 7.4828872668288736e-06, "loss": 0.5091, "step": 22718 }, { "epoch": 0.66330910046422, "grad_norm": 0.6622702760995692, "learning_rate": 7.482238442822385e-06, "loss": 0.614, "step": 22719 }, { "epoch": 0.6633382966920673, "grad_norm": 0.7042803040944833, "learning_rate": 7.481589618815897e-06, "loss": 0.6647, "step": 22720 }, { "epoch": 0.6633674929199147, "grad_norm": 0.7443650494598762, "learning_rate": 7.480940794809408e-06, "loss": 0.6095, "step": 22721 }, { "epoch": 0.6633966891477621, "grad_norm": 0.6377472477495814, "learning_rate": 7.48029197080292e-06, "loss": 0.6074, "step": 22722 }, { "epoch": 0.6634258853756094, "grad_norm": 0.6263035779366766, "learning_rate": 7.479643146796433e-06, "loss": 0.5908, "step": 22723 }, { "epoch": 0.6634550816034568, "grad_norm": 0.58675534086795, "learning_rate": 7.478994322789944e-06, "loss": 0.512, "step": 22724 }, { "epoch": 0.6634842778313041, "grad_norm": 0.5860201255923474, "learning_rate": 7.478345498783456e-06, "loss": 0.5634, "step": 22725 }, { "epoch": 0.6635134740591515, "grad_norm": 0.644436856149861, "learning_rate": 7.477696674776967e-06, "loss": 0.5986, "step": 22726 }, { "epoch": 0.6635426702869989, "grad_norm": 0.624309075864882, "learning_rate": 7.477047850770479e-06, "loss": 0.5689, "step": 22727 }, { "epoch": 0.6635718665148462, "grad_norm": 0.6325488811691569, "learning_rate": 7.4763990267639904e-06, "loss": 0.5778, "step": 22728 }, { "epoch": 0.6636010627426936, "grad_norm": 0.7117187871129816, "learning_rate": 7.4757502027575025e-06, "loss": 0.622, "step": 22729 }, { "epoch": 0.663630258970541, "grad_norm": 0.6294882442416417, "learning_rate": 7.475101378751014e-06, "loss": 0.5784, "step": 22730 }, { "epoch": 0.6636594551983883, "grad_norm": 0.6327178402874463, "learning_rate": 7.474452554744526e-06, "loss": 0.5695, "step": 22731 }, { "epoch": 0.6636886514262357, "grad_norm": 0.6234366215436298, "learning_rate": 7.4738037307380385e-06, "loss": 0.6004, "step": 22732 }, { "epoch": 0.663717847654083, "grad_norm": 0.6110805071348363, "learning_rate": 7.47315490673155e-06, "loss": 0.5629, "step": 22733 }, { "epoch": 0.6637470438819305, "grad_norm": 0.6471195789849393, "learning_rate": 7.472506082725062e-06, "loss": 0.5923, "step": 22734 }, { "epoch": 0.6637762401097779, "grad_norm": 0.6602480850802822, "learning_rate": 7.471857258718574e-06, "loss": 0.6559, "step": 22735 }, { "epoch": 0.6638054363376252, "grad_norm": 0.6919224535325023, "learning_rate": 7.471208434712085e-06, "loss": 0.6174, "step": 22736 }, { "epoch": 0.6638346325654726, "grad_norm": 0.604774098249069, "learning_rate": 7.470559610705597e-06, "loss": 0.5837, "step": 22737 }, { "epoch": 0.66386382879332, "grad_norm": 0.6248603644830916, "learning_rate": 7.469910786699108e-06, "loss": 0.5569, "step": 22738 }, { "epoch": 0.6638930250211673, "grad_norm": 0.630710784465688, "learning_rate": 7.46926196269262e-06, "loss": 0.612, "step": 22739 }, { "epoch": 0.6639222212490147, "grad_norm": 0.596278831916978, "learning_rate": 7.468613138686131e-06, "loss": 0.5222, "step": 22740 }, { "epoch": 0.663951417476862, "grad_norm": 0.6058556896438473, "learning_rate": 7.467964314679643e-06, "loss": 0.5507, "step": 22741 }, { "epoch": 0.6639806137047094, "grad_norm": 0.6448196539787732, "learning_rate": 7.467315490673156e-06, "loss": 0.618, "step": 22742 }, { "epoch": 0.6640098099325568, "grad_norm": 0.681770507738818, "learning_rate": 7.4666666666666675e-06, "loss": 0.6315, "step": 22743 }, { "epoch": 0.6640390061604041, "grad_norm": 0.6829175529396737, "learning_rate": 7.4660178426601795e-06, "loss": 0.7133, "step": 22744 }, { "epoch": 0.6640682023882515, "grad_norm": 0.6649640516188927, "learning_rate": 7.465369018653691e-06, "loss": 0.583, "step": 22745 }, { "epoch": 0.6640973986160988, "grad_norm": 0.6469623099475467, "learning_rate": 7.464720194647203e-06, "loss": 0.6366, "step": 22746 }, { "epoch": 0.6641265948439462, "grad_norm": 0.6559379948809575, "learning_rate": 7.464071370640714e-06, "loss": 0.6182, "step": 22747 }, { "epoch": 0.6641557910717936, "grad_norm": 0.7675514575594483, "learning_rate": 7.463422546634226e-06, "loss": 0.7151, "step": 22748 }, { "epoch": 0.6641849872996409, "grad_norm": 0.6341799804340853, "learning_rate": 7.462773722627737e-06, "loss": 0.5758, "step": 22749 }, { "epoch": 0.6642141835274883, "grad_norm": 0.6077974726610463, "learning_rate": 7.462124898621249e-06, "loss": 0.5331, "step": 22750 }, { "epoch": 0.6642433797553357, "grad_norm": 0.6507707981937846, "learning_rate": 7.461476074614762e-06, "loss": 0.6259, "step": 22751 }, { "epoch": 0.664272575983183, "grad_norm": 0.6503899341563997, "learning_rate": 7.460827250608273e-06, "loss": 0.6152, "step": 22752 }, { "epoch": 0.6643017722110304, "grad_norm": 0.6394746901439808, "learning_rate": 7.460178426601785e-06, "loss": 0.6548, "step": 22753 }, { "epoch": 0.6643309684388777, "grad_norm": 0.6822643736056403, "learning_rate": 7.459529602595297e-06, "loss": 0.6035, "step": 22754 }, { "epoch": 0.6643601646667251, "grad_norm": 0.6242698645165136, "learning_rate": 7.458880778588808e-06, "loss": 0.5681, "step": 22755 }, { "epoch": 0.6643893608945725, "grad_norm": 0.6723089692123501, "learning_rate": 7.4582319545823204e-06, "loss": 0.6393, "step": 22756 }, { "epoch": 0.6644185571224198, "grad_norm": 0.6145807223286391, "learning_rate": 7.457583130575832e-06, "loss": 0.5624, "step": 22757 }, { "epoch": 0.6644477533502672, "grad_norm": 0.6317405672580274, "learning_rate": 7.456934306569344e-06, "loss": 0.564, "step": 22758 }, { "epoch": 0.6644769495781145, "grad_norm": 0.5830985781393536, "learning_rate": 7.456285482562855e-06, "loss": 0.5151, "step": 22759 }, { "epoch": 0.6645061458059619, "grad_norm": 0.6569671186448824, "learning_rate": 7.455636658556367e-06, "loss": 0.6474, "step": 22760 }, { "epoch": 0.6645353420338093, "grad_norm": 0.6500821997291846, "learning_rate": 7.45498783454988e-06, "loss": 0.6391, "step": 22761 }, { "epoch": 0.6645645382616566, "grad_norm": 0.5813456484930254, "learning_rate": 7.454339010543391e-06, "loss": 0.5276, "step": 22762 }, { "epoch": 0.664593734489504, "grad_norm": 0.6686141796374745, "learning_rate": 7.453690186536903e-06, "loss": 0.6553, "step": 22763 }, { "epoch": 0.6646229307173513, "grad_norm": 0.6296521673315055, "learning_rate": 7.453041362530414e-06, "loss": 0.5812, "step": 22764 }, { "epoch": 0.6646521269451987, "grad_norm": 0.6585631432532757, "learning_rate": 7.452392538523926e-06, "loss": 0.6775, "step": 22765 }, { "epoch": 0.6646813231730461, "grad_norm": 0.6471492722236787, "learning_rate": 7.451743714517437e-06, "loss": 0.6172, "step": 22766 }, { "epoch": 0.6647105194008934, "grad_norm": 0.7180448176217933, "learning_rate": 7.451094890510949e-06, "loss": 0.756, "step": 22767 }, { "epoch": 0.6647397156287408, "grad_norm": 0.6519513663587211, "learning_rate": 7.4504460665044605e-06, "loss": 0.6218, "step": 22768 }, { "epoch": 0.6647689118565882, "grad_norm": 0.654826040783102, "learning_rate": 7.4497972424979725e-06, "loss": 0.5636, "step": 22769 }, { "epoch": 0.6647981080844355, "grad_norm": 0.6847316518086384, "learning_rate": 7.449148418491485e-06, "loss": 0.6557, "step": 22770 }, { "epoch": 0.6648273043122829, "grad_norm": 0.6748942751466883, "learning_rate": 7.448499594484997e-06, "loss": 0.6236, "step": 22771 }, { "epoch": 0.6648565005401302, "grad_norm": 0.6140495611350466, "learning_rate": 7.447850770478509e-06, "loss": 0.5383, "step": 22772 }, { "epoch": 0.6648856967679776, "grad_norm": 0.6461990806214755, "learning_rate": 7.44720194647202e-06, "loss": 0.6208, "step": 22773 }, { "epoch": 0.664914892995825, "grad_norm": 0.6401948115004692, "learning_rate": 7.446553122465532e-06, "loss": 0.592, "step": 22774 }, { "epoch": 0.6649440892236723, "grad_norm": 0.674513520167398, "learning_rate": 7.445904298459044e-06, "loss": 0.6668, "step": 22775 }, { "epoch": 0.6649732854515197, "grad_norm": 0.6001078305427824, "learning_rate": 7.445255474452555e-06, "loss": 0.5252, "step": 22776 }, { "epoch": 0.665002481679367, "grad_norm": 0.5862818689438154, "learning_rate": 7.444606650446067e-06, "loss": 0.5227, "step": 22777 }, { "epoch": 0.6650316779072144, "grad_norm": 0.6137678640408443, "learning_rate": 7.443957826439578e-06, "loss": 0.5672, "step": 22778 }, { "epoch": 0.6650608741350618, "grad_norm": 0.6602659366337086, "learning_rate": 7.443309002433091e-06, "loss": 0.5389, "step": 22779 }, { "epoch": 0.6650900703629091, "grad_norm": 0.6636035194948398, "learning_rate": 7.442660178426603e-06, "loss": 0.6216, "step": 22780 }, { "epoch": 0.6651192665907565, "grad_norm": 0.5671028389776719, "learning_rate": 7.442011354420114e-06, "loss": 0.4956, "step": 22781 }, { "epoch": 0.6651484628186038, "grad_norm": 0.6384248414664001, "learning_rate": 7.441362530413626e-06, "loss": 0.6084, "step": 22782 }, { "epoch": 0.6651776590464512, "grad_norm": 0.5981266747419623, "learning_rate": 7.4407137064071375e-06, "loss": 0.5847, "step": 22783 }, { "epoch": 0.6652068552742986, "grad_norm": 0.6011062390807719, "learning_rate": 7.4400648824006496e-06, "loss": 0.5075, "step": 22784 }, { "epoch": 0.6652360515021459, "grad_norm": 0.6708368852794786, "learning_rate": 7.439416058394161e-06, "loss": 0.6424, "step": 22785 }, { "epoch": 0.6652652477299933, "grad_norm": 0.6444543241704718, "learning_rate": 7.438767234387673e-06, "loss": 0.645, "step": 22786 }, { "epoch": 0.6652944439578407, "grad_norm": 0.6500739288276186, "learning_rate": 7.438118410381184e-06, "loss": 0.5663, "step": 22787 }, { "epoch": 0.665323640185688, "grad_norm": 0.6732838854378681, "learning_rate": 7.437469586374696e-06, "loss": 0.6553, "step": 22788 }, { "epoch": 0.6653528364135354, "grad_norm": 0.650904255137961, "learning_rate": 7.436820762368209e-06, "loss": 0.6395, "step": 22789 }, { "epoch": 0.6653820326413827, "grad_norm": 0.600646513807585, "learning_rate": 7.43617193836172e-06, "loss": 0.5589, "step": 22790 }, { "epoch": 0.6654112288692301, "grad_norm": 0.6093850120724298, "learning_rate": 7.435523114355232e-06, "loss": 0.5475, "step": 22791 }, { "epoch": 0.6654404250970775, "grad_norm": 0.6060080015667119, "learning_rate": 7.434874290348743e-06, "loss": 0.5639, "step": 22792 }, { "epoch": 0.6654696213249248, "grad_norm": 0.6528068047371397, "learning_rate": 7.434225466342255e-06, "loss": 0.5881, "step": 22793 }, { "epoch": 0.6654988175527722, "grad_norm": 0.6154642402289329, "learning_rate": 7.433576642335767e-06, "loss": 0.5824, "step": 22794 }, { "epoch": 0.6655280137806195, "grad_norm": 0.6344945078321436, "learning_rate": 7.4329278183292785e-06, "loss": 0.5935, "step": 22795 }, { "epoch": 0.6655572100084669, "grad_norm": 0.601398938465726, "learning_rate": 7.4322789943227905e-06, "loss": 0.5519, "step": 22796 }, { "epoch": 0.6655864062363143, "grad_norm": 0.657070463056526, "learning_rate": 7.431630170316302e-06, "loss": 0.6413, "step": 22797 }, { "epoch": 0.6656156024641616, "grad_norm": 0.649398680841377, "learning_rate": 7.4309813463098146e-06, "loss": 0.6407, "step": 22798 }, { "epoch": 0.665644798692009, "grad_norm": 0.6003059521256581, "learning_rate": 7.430332522303327e-06, "loss": 0.5069, "step": 22799 }, { "epoch": 0.6656739949198563, "grad_norm": 0.5756593085615059, "learning_rate": 7.429683698296838e-06, "loss": 0.4672, "step": 22800 }, { "epoch": 0.6657031911477037, "grad_norm": 0.6418151006267085, "learning_rate": 7.42903487429035e-06, "loss": 0.6156, "step": 22801 }, { "epoch": 0.6657323873755511, "grad_norm": 0.6073818826665852, "learning_rate": 7.428386050283861e-06, "loss": 0.5276, "step": 22802 }, { "epoch": 0.6657615836033984, "grad_norm": 0.651554275195987, "learning_rate": 7.427737226277373e-06, "loss": 0.6165, "step": 22803 }, { "epoch": 0.6657907798312458, "grad_norm": 0.644157313739303, "learning_rate": 7.427088402270884e-06, "loss": 0.6486, "step": 22804 }, { "epoch": 0.6658199760590932, "grad_norm": 0.6140173066874024, "learning_rate": 7.426439578264396e-06, "loss": 0.5926, "step": 22805 }, { "epoch": 0.6658491722869405, "grad_norm": 0.6384678072482282, "learning_rate": 7.425790754257907e-06, "loss": 0.5872, "step": 22806 }, { "epoch": 0.6658783685147879, "grad_norm": 0.633632619251783, "learning_rate": 7.425141930251419e-06, "loss": 0.62, "step": 22807 }, { "epoch": 0.6659075647426352, "grad_norm": 0.5850959096423392, "learning_rate": 7.424493106244932e-06, "loss": 0.5215, "step": 22808 }, { "epoch": 0.6659367609704826, "grad_norm": 0.5508945062083669, "learning_rate": 7.4238442822384435e-06, "loss": 0.455, "step": 22809 }, { "epoch": 0.66596595719833, "grad_norm": 0.6742680520684168, "learning_rate": 7.4231954582319555e-06, "loss": 0.5808, "step": 22810 }, { "epoch": 0.6659951534261773, "grad_norm": 0.6619826869514975, "learning_rate": 7.422546634225467e-06, "loss": 0.6338, "step": 22811 }, { "epoch": 0.6660243496540247, "grad_norm": 0.6640500564119651, "learning_rate": 7.421897810218979e-06, "loss": 0.6277, "step": 22812 }, { "epoch": 0.666053545881872, "grad_norm": 0.6672931425572868, "learning_rate": 7.421248986212491e-06, "loss": 0.6078, "step": 22813 }, { "epoch": 0.6660827421097194, "grad_norm": 0.6002099028650256, "learning_rate": 7.420600162206002e-06, "loss": 0.5637, "step": 22814 }, { "epoch": 0.6661119383375668, "grad_norm": 0.6413497725868721, "learning_rate": 7.419951338199514e-06, "loss": 0.591, "step": 22815 }, { "epoch": 0.6661411345654141, "grad_norm": 0.6076583429594272, "learning_rate": 7.419302514193025e-06, "loss": 0.5893, "step": 22816 }, { "epoch": 0.6661703307932615, "grad_norm": 0.6547564714118389, "learning_rate": 7.418653690186538e-06, "loss": 0.6307, "step": 22817 }, { "epoch": 0.6661995270211089, "grad_norm": 0.597655964390657, "learning_rate": 7.41800486618005e-06, "loss": 0.5346, "step": 22818 }, { "epoch": 0.6662287232489562, "grad_norm": 0.6062460609276772, "learning_rate": 7.417356042173561e-06, "loss": 0.523, "step": 22819 }, { "epoch": 0.6662579194768036, "grad_norm": 0.6212777848400565, "learning_rate": 7.416707218167073e-06, "loss": 0.5621, "step": 22820 }, { "epoch": 0.6662871157046509, "grad_norm": 0.6438281354525344, "learning_rate": 7.416058394160584e-06, "loss": 0.56, "step": 22821 }, { "epoch": 0.6663163119324983, "grad_norm": 0.5803652483568229, "learning_rate": 7.4154095701540964e-06, "loss": 0.4912, "step": 22822 }, { "epoch": 0.6663455081603457, "grad_norm": 0.6939933501030898, "learning_rate": 7.414760746147608e-06, "loss": 0.6239, "step": 22823 }, { "epoch": 0.666374704388193, "grad_norm": 0.6687387808153634, "learning_rate": 7.41411192214112e-06, "loss": 0.6283, "step": 22824 }, { "epoch": 0.6664039006160404, "grad_norm": 0.670946010308793, "learning_rate": 7.413463098134631e-06, "loss": 0.6452, "step": 22825 }, { "epoch": 0.6664330968438877, "grad_norm": 0.6147807710943224, "learning_rate": 7.412814274128143e-06, "loss": 0.5354, "step": 22826 }, { "epoch": 0.6664622930717351, "grad_norm": 0.6932305347267163, "learning_rate": 7.412165450121656e-06, "loss": 0.661, "step": 22827 }, { "epoch": 0.6664914892995825, "grad_norm": 0.6489595885573873, "learning_rate": 7.411516626115167e-06, "loss": 0.5982, "step": 22828 }, { "epoch": 0.6665206855274298, "grad_norm": 0.6597505110424832, "learning_rate": 7.410867802108679e-06, "loss": 0.6434, "step": 22829 }, { "epoch": 0.6665498817552772, "grad_norm": 0.6087096341150459, "learning_rate": 7.41021897810219e-06, "loss": 0.5825, "step": 22830 }, { "epoch": 0.6665790779831245, "grad_norm": 0.6791784622669672, "learning_rate": 7.409570154095702e-06, "loss": 0.7171, "step": 22831 }, { "epoch": 0.6666082742109719, "grad_norm": 0.6559869712507789, "learning_rate": 7.408921330089214e-06, "loss": 0.6024, "step": 22832 }, { "epoch": 0.6666374704388193, "grad_norm": 0.7208560733966716, "learning_rate": 7.408272506082725e-06, "loss": 0.6598, "step": 22833 }, { "epoch": 0.6666666666666666, "grad_norm": 0.6152032655139531, "learning_rate": 7.407623682076237e-06, "loss": 0.5973, "step": 22834 }, { "epoch": 0.666695862894514, "grad_norm": 0.6915025207698089, "learning_rate": 7.4069748580697485e-06, "loss": 0.6671, "step": 22835 }, { "epoch": 0.6667250591223614, "grad_norm": 0.6263606771981329, "learning_rate": 7.406326034063261e-06, "loss": 0.5633, "step": 22836 }, { "epoch": 0.6667542553502087, "grad_norm": 0.5858829317255286, "learning_rate": 7.4056772100567735e-06, "loss": 0.5242, "step": 22837 }, { "epoch": 0.6667834515780561, "grad_norm": 0.6099877322105428, "learning_rate": 7.405028386050285e-06, "loss": 0.5241, "step": 22838 }, { "epoch": 0.6668126478059034, "grad_norm": 0.6626019389736925, "learning_rate": 7.404379562043797e-06, "loss": 0.6185, "step": 22839 }, { "epoch": 0.6668418440337508, "grad_norm": 0.6419165638669256, "learning_rate": 7.403730738037308e-06, "loss": 0.5867, "step": 22840 }, { "epoch": 0.6668710402615982, "grad_norm": 0.6647872699742065, "learning_rate": 7.40308191403082e-06, "loss": 0.6294, "step": 22841 }, { "epoch": 0.6669002364894455, "grad_norm": 0.674086430719137, "learning_rate": 7.402433090024331e-06, "loss": 0.6146, "step": 22842 }, { "epoch": 0.6669294327172929, "grad_norm": 0.6493298799610981, "learning_rate": 7.401784266017843e-06, "loss": 0.5901, "step": 22843 }, { "epoch": 0.6669586289451402, "grad_norm": 0.6878486068340175, "learning_rate": 7.401135442011354e-06, "loss": 0.7222, "step": 22844 }, { "epoch": 0.6669878251729876, "grad_norm": 0.68911341932444, "learning_rate": 7.400486618004867e-06, "loss": 0.6304, "step": 22845 }, { "epoch": 0.667017021400835, "grad_norm": 0.6726149273546872, "learning_rate": 7.399837793998379e-06, "loss": 0.6807, "step": 22846 }, { "epoch": 0.6670462176286823, "grad_norm": 0.7050401274540645, "learning_rate": 7.39918896999189e-06, "loss": 0.7189, "step": 22847 }, { "epoch": 0.6670754138565297, "grad_norm": 0.6343462165777344, "learning_rate": 7.398540145985402e-06, "loss": 0.6063, "step": 22848 }, { "epoch": 0.667104610084377, "grad_norm": 0.6208074277665325, "learning_rate": 7.3978913219789135e-06, "loss": 0.6038, "step": 22849 }, { "epoch": 0.6671338063122244, "grad_norm": 0.6966588774297966, "learning_rate": 7.3972424979724256e-06, "loss": 0.726, "step": 22850 }, { "epoch": 0.6671630025400718, "grad_norm": 0.5784928143635621, "learning_rate": 7.396593673965938e-06, "loss": 0.4875, "step": 22851 }, { "epoch": 0.6671921987679191, "grad_norm": 0.6886215498365272, "learning_rate": 7.395944849959449e-06, "loss": 0.6581, "step": 22852 }, { "epoch": 0.6672213949957665, "grad_norm": 0.6441319009254226, "learning_rate": 7.395296025952961e-06, "loss": 0.6102, "step": 22853 }, { "epoch": 0.6672505912236139, "grad_norm": 0.685872194222608, "learning_rate": 7.394647201946472e-06, "loss": 0.6576, "step": 22854 }, { "epoch": 0.6672797874514613, "grad_norm": 0.6331413220640659, "learning_rate": 7.393998377939985e-06, "loss": 0.5897, "step": 22855 }, { "epoch": 0.6673089836793087, "grad_norm": 0.628287914887019, "learning_rate": 7.393349553933497e-06, "loss": 0.6164, "step": 22856 }, { "epoch": 0.667338179907156, "grad_norm": 0.6703257520786865, "learning_rate": 7.392700729927008e-06, "loss": 0.6768, "step": 22857 }, { "epoch": 0.6673673761350034, "grad_norm": 0.6784187022491008, "learning_rate": 7.39205190592052e-06, "loss": 0.6087, "step": 22858 }, { "epoch": 0.6673965723628508, "grad_norm": 0.6543090452562975, "learning_rate": 7.391403081914031e-06, "loss": 0.6099, "step": 22859 }, { "epoch": 0.6674257685906981, "grad_norm": 0.6471492811406134, "learning_rate": 7.390754257907543e-06, "loss": 0.6228, "step": 22860 }, { "epoch": 0.6674549648185455, "grad_norm": 0.6622876893564029, "learning_rate": 7.3901054339010545e-06, "loss": 0.6137, "step": 22861 }, { "epoch": 0.6674841610463929, "grad_norm": 0.6209466504280282, "learning_rate": 7.3894566098945665e-06, "loss": 0.5493, "step": 22862 }, { "epoch": 0.6675133572742402, "grad_norm": 0.6744962935365354, "learning_rate": 7.388807785888078e-06, "loss": 0.6439, "step": 22863 }, { "epoch": 0.6675425535020876, "grad_norm": 0.7004816585151171, "learning_rate": 7.3881589618815906e-06, "loss": 0.6847, "step": 22864 }, { "epoch": 0.6675717497299349, "grad_norm": 0.5967548631595004, "learning_rate": 7.387510137875103e-06, "loss": 0.5534, "step": 22865 }, { "epoch": 0.6676009459577823, "grad_norm": 0.6578155910212985, "learning_rate": 7.386861313868614e-06, "loss": 0.5973, "step": 22866 }, { "epoch": 0.6676301421856297, "grad_norm": 0.6336821910433638, "learning_rate": 7.386212489862126e-06, "loss": 0.5905, "step": 22867 }, { "epoch": 0.667659338413477, "grad_norm": 0.6418009426097813, "learning_rate": 7.385563665855637e-06, "loss": 0.6042, "step": 22868 }, { "epoch": 0.6676885346413244, "grad_norm": 0.6558184161470141, "learning_rate": 7.384914841849149e-06, "loss": 0.6135, "step": 22869 }, { "epoch": 0.6677177308691717, "grad_norm": 0.61744178442127, "learning_rate": 7.384266017842661e-06, "loss": 0.573, "step": 22870 }, { "epoch": 0.6677469270970191, "grad_norm": 0.6372093702324594, "learning_rate": 7.383617193836172e-06, "loss": 0.5887, "step": 22871 }, { "epoch": 0.6677761233248665, "grad_norm": 0.62137613258747, "learning_rate": 7.382968369829684e-06, "loss": 0.5613, "step": 22872 }, { "epoch": 0.6678053195527138, "grad_norm": 0.6227307543793432, "learning_rate": 7.382319545823195e-06, "loss": 0.6252, "step": 22873 }, { "epoch": 0.6678345157805612, "grad_norm": 0.663209728961902, "learning_rate": 7.381670721816708e-06, "loss": 0.6248, "step": 22874 }, { "epoch": 0.6678637120084085, "grad_norm": 0.6644496047432965, "learning_rate": 7.38102189781022e-06, "loss": 0.6322, "step": 22875 }, { "epoch": 0.6678929082362559, "grad_norm": 0.6547480173654865, "learning_rate": 7.3803730738037315e-06, "loss": 0.6402, "step": 22876 }, { "epoch": 0.6679221044641033, "grad_norm": 0.6024156781327317, "learning_rate": 7.3797242497972435e-06, "loss": 0.5471, "step": 22877 }, { "epoch": 0.6679513006919506, "grad_norm": 0.6130807530599389, "learning_rate": 7.379075425790755e-06, "loss": 0.5282, "step": 22878 }, { "epoch": 0.667980496919798, "grad_norm": 0.5701053402091693, "learning_rate": 7.378426601784267e-06, "loss": 0.5011, "step": 22879 }, { "epoch": 0.6680096931476454, "grad_norm": 0.6514795189157778, "learning_rate": 7.377777777777778e-06, "loss": 0.591, "step": 22880 }, { "epoch": 0.6680388893754927, "grad_norm": 0.6248723648430559, "learning_rate": 7.37712895377129e-06, "loss": 0.5849, "step": 22881 }, { "epoch": 0.6680680856033401, "grad_norm": 0.6693731301384269, "learning_rate": 7.376480129764801e-06, "loss": 0.674, "step": 22882 }, { "epoch": 0.6680972818311874, "grad_norm": 0.6194084434027564, "learning_rate": 7.375831305758314e-06, "loss": 0.6142, "step": 22883 }, { "epoch": 0.6681264780590348, "grad_norm": 0.6408225649791268, "learning_rate": 7.375182481751826e-06, "loss": 0.5857, "step": 22884 }, { "epoch": 0.6681556742868822, "grad_norm": 0.622285074302738, "learning_rate": 7.374533657745337e-06, "loss": 0.572, "step": 22885 }, { "epoch": 0.6681848705147295, "grad_norm": 0.637832913584643, "learning_rate": 7.373884833738849e-06, "loss": 0.6172, "step": 22886 }, { "epoch": 0.6682140667425769, "grad_norm": 0.7011693908126835, "learning_rate": 7.37323600973236e-06, "loss": 0.68, "step": 22887 }, { "epoch": 0.6682432629704242, "grad_norm": 0.6376038612809648, "learning_rate": 7.3725871857258724e-06, "loss": 0.6195, "step": 22888 }, { "epoch": 0.6682724591982716, "grad_norm": 0.6710204913020946, "learning_rate": 7.371938361719384e-06, "loss": 0.6293, "step": 22889 }, { "epoch": 0.668301655426119, "grad_norm": 0.66382562963079, "learning_rate": 7.371289537712896e-06, "loss": 0.5942, "step": 22890 }, { "epoch": 0.6683308516539663, "grad_norm": 0.6495913129298203, "learning_rate": 7.370640713706408e-06, "loss": 0.6113, "step": 22891 }, { "epoch": 0.6683600478818137, "grad_norm": 0.6203882988562343, "learning_rate": 7.369991889699919e-06, "loss": 0.6138, "step": 22892 }, { "epoch": 0.668389244109661, "grad_norm": 0.6225978106087041, "learning_rate": 7.369343065693432e-06, "loss": 0.5763, "step": 22893 }, { "epoch": 0.6684184403375084, "grad_norm": 0.6539353321296254, "learning_rate": 7.368694241686944e-06, "loss": 0.6188, "step": 22894 }, { "epoch": 0.6684476365653558, "grad_norm": 0.5701756225960389, "learning_rate": 7.368045417680455e-06, "loss": 0.4937, "step": 22895 }, { "epoch": 0.6684768327932031, "grad_norm": 0.6298888760241578, "learning_rate": 7.367396593673967e-06, "loss": 0.5581, "step": 22896 }, { "epoch": 0.6685060290210505, "grad_norm": 0.6365628511309628, "learning_rate": 7.366747769667478e-06, "loss": 0.5954, "step": 22897 }, { "epoch": 0.6685352252488979, "grad_norm": 0.6975656416917112, "learning_rate": 7.36609894566099e-06, "loss": 0.6645, "step": 22898 }, { "epoch": 0.6685644214767452, "grad_norm": 0.5875545411845794, "learning_rate": 7.365450121654501e-06, "loss": 0.541, "step": 22899 }, { "epoch": 0.6685936177045926, "grad_norm": 0.6371948193999452, "learning_rate": 7.364801297648013e-06, "loss": 0.583, "step": 22900 }, { "epoch": 0.6686228139324399, "grad_norm": 0.6323622554862262, "learning_rate": 7.3641524736415246e-06, "loss": 0.5768, "step": 22901 }, { "epoch": 0.6686520101602873, "grad_norm": 0.6419193924752472, "learning_rate": 7.3635036496350374e-06, "loss": 0.5391, "step": 22902 }, { "epoch": 0.6686812063881347, "grad_norm": 0.5889343129598861, "learning_rate": 7.3628548256285495e-06, "loss": 0.4836, "step": 22903 }, { "epoch": 0.668710402615982, "grad_norm": 0.6441689054530717, "learning_rate": 7.362206001622061e-06, "loss": 0.5891, "step": 22904 }, { "epoch": 0.6687395988438294, "grad_norm": 0.6042965449679603, "learning_rate": 7.361557177615573e-06, "loss": 0.5118, "step": 22905 }, { "epoch": 0.6687687950716767, "grad_norm": 0.632926273438873, "learning_rate": 7.360908353609084e-06, "loss": 0.6137, "step": 22906 }, { "epoch": 0.6687979912995241, "grad_norm": 0.6775328576091608, "learning_rate": 7.360259529602596e-06, "loss": 0.6377, "step": 22907 }, { "epoch": 0.6688271875273715, "grad_norm": 0.655007683824894, "learning_rate": 7.359610705596107e-06, "loss": 0.6409, "step": 22908 }, { "epoch": 0.6688563837552188, "grad_norm": 0.6689538403743163, "learning_rate": 7.358961881589619e-06, "loss": 0.608, "step": 22909 }, { "epoch": 0.6688855799830662, "grad_norm": 0.6306841371700388, "learning_rate": 7.358313057583131e-06, "loss": 0.5659, "step": 22910 }, { "epoch": 0.6689147762109136, "grad_norm": 0.6723946397838269, "learning_rate": 7.357664233576642e-06, "loss": 0.6502, "step": 22911 }, { "epoch": 0.6689439724387609, "grad_norm": 0.7477865267742824, "learning_rate": 7.357015409570155e-06, "loss": 0.6894, "step": 22912 }, { "epoch": 0.6689731686666083, "grad_norm": 0.6078038468291509, "learning_rate": 7.356366585563667e-06, "loss": 0.5585, "step": 22913 }, { "epoch": 0.6690023648944556, "grad_norm": 0.6387727758322407, "learning_rate": 7.355717761557178e-06, "loss": 0.5775, "step": 22914 }, { "epoch": 0.669031561122303, "grad_norm": 0.6439298399452787, "learning_rate": 7.35506893755069e-06, "loss": 0.5654, "step": 22915 }, { "epoch": 0.6690607573501504, "grad_norm": 0.6835115947681575, "learning_rate": 7.3544201135442016e-06, "loss": 0.6353, "step": 22916 }, { "epoch": 0.6690899535779977, "grad_norm": 0.645274506110792, "learning_rate": 7.353771289537714e-06, "loss": 0.5688, "step": 22917 }, { "epoch": 0.6691191498058451, "grad_norm": 0.6714463841600611, "learning_rate": 7.353122465531225e-06, "loss": 0.6342, "step": 22918 }, { "epoch": 0.6691483460336924, "grad_norm": 0.626396324798465, "learning_rate": 7.352473641524737e-06, "loss": 0.6108, "step": 22919 }, { "epoch": 0.6691775422615398, "grad_norm": 0.595262755567002, "learning_rate": 7.351824817518248e-06, "loss": 0.4993, "step": 22920 }, { "epoch": 0.6692067384893872, "grad_norm": 0.6235766525165628, "learning_rate": 7.351175993511761e-06, "loss": 0.5883, "step": 22921 }, { "epoch": 0.6692359347172345, "grad_norm": 0.6449887124924182, "learning_rate": 7.350527169505273e-06, "loss": 0.58, "step": 22922 }, { "epoch": 0.6692651309450819, "grad_norm": 0.6653874028817766, "learning_rate": 7.349878345498784e-06, "loss": 0.6423, "step": 22923 }, { "epoch": 0.6692943271729292, "grad_norm": 0.6505264369632545, "learning_rate": 7.349229521492296e-06, "loss": 0.5885, "step": 22924 }, { "epoch": 0.6693235234007766, "grad_norm": 0.6569069459994237, "learning_rate": 7.348580697485807e-06, "loss": 0.6254, "step": 22925 }, { "epoch": 0.669352719628624, "grad_norm": 0.6858487342752918, "learning_rate": 7.347931873479319e-06, "loss": 0.6682, "step": 22926 }, { "epoch": 0.6693819158564713, "grad_norm": 0.6404313200523463, "learning_rate": 7.3472830494728305e-06, "loss": 0.5566, "step": 22927 }, { "epoch": 0.6694111120843187, "grad_norm": 0.6430016045698583, "learning_rate": 7.3466342254663425e-06, "loss": 0.6474, "step": 22928 }, { "epoch": 0.669440308312166, "grad_norm": 0.6073711375767068, "learning_rate": 7.3459854014598545e-06, "loss": 0.5292, "step": 22929 }, { "epoch": 0.6694695045400134, "grad_norm": 0.6295710197184574, "learning_rate": 7.3453365774533666e-06, "loss": 0.6096, "step": 22930 }, { "epoch": 0.6694987007678608, "grad_norm": 0.6149515366417161, "learning_rate": 7.344687753446879e-06, "loss": 0.57, "step": 22931 }, { "epoch": 0.6695278969957081, "grad_norm": 0.615406896688007, "learning_rate": 7.344038929440391e-06, "loss": 0.586, "step": 22932 }, { "epoch": 0.6695570932235555, "grad_norm": 0.6021124504003195, "learning_rate": 7.343390105433902e-06, "loss": 0.4844, "step": 22933 }, { "epoch": 0.6695862894514029, "grad_norm": 0.6321405440989437, "learning_rate": 7.342741281427414e-06, "loss": 0.6247, "step": 22934 }, { "epoch": 0.6696154856792502, "grad_norm": 0.570767991975217, "learning_rate": 7.342092457420925e-06, "loss": 0.4784, "step": 22935 }, { "epoch": 0.6696446819070976, "grad_norm": 0.6289202458796513, "learning_rate": 7.341443633414437e-06, "loss": 0.5741, "step": 22936 }, { "epoch": 0.6696738781349449, "grad_norm": 0.694787467550918, "learning_rate": 7.340794809407948e-06, "loss": 0.7315, "step": 22937 }, { "epoch": 0.6697030743627923, "grad_norm": 0.6742306061015368, "learning_rate": 7.34014598540146e-06, "loss": 0.6875, "step": 22938 }, { "epoch": 0.6697322705906397, "grad_norm": 0.5850191588857138, "learning_rate": 7.3394971613949714e-06, "loss": 0.5039, "step": 22939 }, { "epoch": 0.669761466818487, "grad_norm": 0.6229059888157645, "learning_rate": 7.338848337388484e-06, "loss": 0.59, "step": 22940 }, { "epoch": 0.6697906630463344, "grad_norm": 0.7384531956521801, "learning_rate": 7.338199513381996e-06, "loss": 0.647, "step": 22941 }, { "epoch": 0.6698198592741818, "grad_norm": 0.6513888497114934, "learning_rate": 7.3375506893755075e-06, "loss": 0.5875, "step": 22942 }, { "epoch": 0.6698490555020291, "grad_norm": 0.6456096037328094, "learning_rate": 7.3369018653690195e-06, "loss": 0.5742, "step": 22943 }, { "epoch": 0.6698782517298765, "grad_norm": 0.5986761033778486, "learning_rate": 7.336253041362531e-06, "loss": 0.5314, "step": 22944 }, { "epoch": 0.6699074479577238, "grad_norm": 0.6518319704790598, "learning_rate": 7.335604217356043e-06, "loss": 0.6926, "step": 22945 }, { "epoch": 0.6699366441855712, "grad_norm": 0.6304302441110761, "learning_rate": 7.334955393349554e-06, "loss": 0.6003, "step": 22946 }, { "epoch": 0.6699658404134186, "grad_norm": 0.5944402295794737, "learning_rate": 7.334306569343066e-06, "loss": 0.5089, "step": 22947 }, { "epoch": 0.6699950366412659, "grad_norm": 0.6607802741811858, "learning_rate": 7.333657745336578e-06, "loss": 0.6602, "step": 22948 }, { "epoch": 0.6700242328691133, "grad_norm": 0.6480259851141997, "learning_rate": 7.33300892133009e-06, "loss": 0.5488, "step": 22949 }, { "epoch": 0.6700534290969606, "grad_norm": 0.5972446084180496, "learning_rate": 7.332360097323602e-06, "loss": 0.5311, "step": 22950 }, { "epoch": 0.670082625324808, "grad_norm": 0.630118174033793, "learning_rate": 7.331711273317113e-06, "loss": 0.6103, "step": 22951 }, { "epoch": 0.6701118215526554, "grad_norm": 0.5909227752460728, "learning_rate": 7.331062449310625e-06, "loss": 0.5435, "step": 22952 }, { "epoch": 0.6701410177805027, "grad_norm": 0.5844514990618566, "learning_rate": 7.330413625304137e-06, "loss": 0.5177, "step": 22953 }, { "epoch": 0.6701702140083501, "grad_norm": 0.6574169840128382, "learning_rate": 7.3297648012976484e-06, "loss": 0.6423, "step": 22954 }, { "epoch": 0.6701994102361974, "grad_norm": 0.6626554670052391, "learning_rate": 7.3291159772911605e-06, "loss": 0.6341, "step": 22955 }, { "epoch": 0.6702286064640448, "grad_norm": 0.6562429933407348, "learning_rate": 7.328467153284672e-06, "loss": 0.6297, "step": 22956 }, { "epoch": 0.6702578026918922, "grad_norm": 0.6332693991680538, "learning_rate": 7.327818329278184e-06, "loss": 0.5894, "step": 22957 }, { "epoch": 0.6702869989197395, "grad_norm": 0.6259487861294613, "learning_rate": 7.327169505271695e-06, "loss": 0.5764, "step": 22958 }, { "epoch": 0.6703161951475869, "grad_norm": 0.6829414122829894, "learning_rate": 7.326520681265208e-06, "loss": 0.6248, "step": 22959 }, { "epoch": 0.6703453913754343, "grad_norm": 0.6321703694473049, "learning_rate": 7.32587185725872e-06, "loss": 0.5627, "step": 22960 }, { "epoch": 0.6703745876032816, "grad_norm": 0.6559394271675665, "learning_rate": 7.325223033252231e-06, "loss": 0.5296, "step": 22961 }, { "epoch": 0.670403783831129, "grad_norm": 0.6422634565138595, "learning_rate": 7.324574209245743e-06, "loss": 0.5823, "step": 22962 }, { "epoch": 0.6704329800589763, "grad_norm": 0.6346584065621037, "learning_rate": 7.323925385239254e-06, "loss": 0.5676, "step": 22963 }, { "epoch": 0.6704621762868237, "grad_norm": 0.6403357566099984, "learning_rate": 7.323276561232766e-06, "loss": 0.5602, "step": 22964 }, { "epoch": 0.6704913725146711, "grad_norm": 0.6676786515275795, "learning_rate": 7.322627737226277e-06, "loss": 0.661, "step": 22965 }, { "epoch": 0.6705205687425184, "grad_norm": 0.6303386400512535, "learning_rate": 7.321978913219789e-06, "loss": 0.6127, "step": 22966 }, { "epoch": 0.6705497649703658, "grad_norm": 0.6245678146840405, "learning_rate": 7.321330089213301e-06, "loss": 0.5537, "step": 22967 }, { "epoch": 0.6705789611982131, "grad_norm": 0.6540437918398182, "learning_rate": 7.3206812652068134e-06, "loss": 0.6229, "step": 22968 }, { "epoch": 0.6706081574260605, "grad_norm": 0.6849463819588172, "learning_rate": 7.3200324412003255e-06, "loss": 0.6027, "step": 22969 }, { "epoch": 0.6706373536539079, "grad_norm": 0.5627326341442576, "learning_rate": 7.319383617193837e-06, "loss": 0.5052, "step": 22970 }, { "epoch": 0.6706665498817552, "grad_norm": 0.6341294077780794, "learning_rate": 7.318734793187349e-06, "loss": 0.5772, "step": 22971 }, { "epoch": 0.6706957461096026, "grad_norm": 0.6068940356700794, "learning_rate": 7.318085969180861e-06, "loss": 0.5525, "step": 22972 }, { "epoch": 0.67072494233745, "grad_norm": 0.6328041435690684, "learning_rate": 7.317437145174372e-06, "loss": 0.5546, "step": 22973 }, { "epoch": 0.6707541385652973, "grad_norm": 0.6499230477613563, "learning_rate": 7.316788321167884e-06, "loss": 0.6368, "step": 22974 }, { "epoch": 0.6707833347931448, "grad_norm": 0.6271477208562498, "learning_rate": 7.316139497161395e-06, "loss": 0.6188, "step": 22975 }, { "epoch": 0.6708125310209921, "grad_norm": 0.7145542867929212, "learning_rate": 7.315490673154907e-06, "loss": 0.6523, "step": 22976 }, { "epoch": 0.6708417272488395, "grad_norm": 0.6797488177119043, "learning_rate": 7.314841849148418e-06, "loss": 0.681, "step": 22977 }, { "epoch": 0.6708709234766869, "grad_norm": 0.6629669951928966, "learning_rate": 7.314193025141931e-06, "loss": 0.633, "step": 22978 }, { "epoch": 0.6709001197045342, "grad_norm": 0.6139749009818081, "learning_rate": 7.313544201135443e-06, "loss": 0.5295, "step": 22979 }, { "epoch": 0.6709293159323816, "grad_norm": 0.6361769477694657, "learning_rate": 7.312895377128954e-06, "loss": 0.5685, "step": 22980 }, { "epoch": 0.670958512160229, "grad_norm": 0.6363447928560131, "learning_rate": 7.312246553122466e-06, "loss": 0.5971, "step": 22981 }, { "epoch": 0.6709877083880763, "grad_norm": 0.7128994169913908, "learning_rate": 7.311597729115978e-06, "loss": 0.6839, "step": 22982 }, { "epoch": 0.6710169046159237, "grad_norm": 0.6695065382126514, "learning_rate": 7.31094890510949e-06, "loss": 0.652, "step": 22983 }, { "epoch": 0.671046100843771, "grad_norm": 0.6472509813943569, "learning_rate": 7.310300081103001e-06, "loss": 0.6295, "step": 22984 }, { "epoch": 0.6710752970716184, "grad_norm": 0.6741632773215213, "learning_rate": 7.309651257096513e-06, "loss": 0.6321, "step": 22985 }, { "epoch": 0.6711044932994658, "grad_norm": 0.6248894170863398, "learning_rate": 7.309002433090025e-06, "loss": 0.5603, "step": 22986 }, { "epoch": 0.6711336895273131, "grad_norm": 0.6390734474169585, "learning_rate": 7.308353609083537e-06, "loss": 0.6012, "step": 22987 }, { "epoch": 0.6711628857551605, "grad_norm": 0.6092057262636776, "learning_rate": 7.307704785077049e-06, "loss": 0.5809, "step": 22988 }, { "epoch": 0.6711920819830078, "grad_norm": 0.6648533979586511, "learning_rate": 7.30705596107056e-06, "loss": 0.6201, "step": 22989 }, { "epoch": 0.6712212782108552, "grad_norm": 0.6847975702387417, "learning_rate": 7.306407137064072e-06, "loss": 0.6521, "step": 22990 }, { "epoch": 0.6712504744387026, "grad_norm": 0.6613502681142269, "learning_rate": 7.305758313057584e-06, "loss": 0.6421, "step": 22991 }, { "epoch": 0.6712796706665499, "grad_norm": 0.6121704982183128, "learning_rate": 7.305109489051095e-06, "loss": 0.5649, "step": 22992 }, { "epoch": 0.6713088668943973, "grad_norm": 0.6411902981253425, "learning_rate": 7.304460665044607e-06, "loss": 0.6315, "step": 22993 }, { "epoch": 0.6713380631222446, "grad_norm": 0.682354249698879, "learning_rate": 7.3038118410381185e-06, "loss": 0.6102, "step": 22994 }, { "epoch": 0.671367259350092, "grad_norm": 0.631054866734238, "learning_rate": 7.3031630170316305e-06, "loss": 0.5839, "step": 22995 }, { "epoch": 0.6713964555779394, "grad_norm": 0.6402521995994993, "learning_rate": 7.302514193025142e-06, "loss": 0.6127, "step": 22996 }, { "epoch": 0.6714256518057867, "grad_norm": 0.658414872941456, "learning_rate": 7.301865369018655e-06, "loss": 0.6549, "step": 22997 }, { "epoch": 0.6714548480336341, "grad_norm": 0.6585935759680172, "learning_rate": 7.301216545012167e-06, "loss": 0.6399, "step": 22998 }, { "epoch": 0.6714840442614814, "grad_norm": 0.6117729720525235, "learning_rate": 7.300567721005678e-06, "loss": 0.56, "step": 22999 }, { "epoch": 0.6715132404893288, "grad_norm": 0.6809308448689775, "learning_rate": 7.29991889699919e-06, "loss": 0.6199, "step": 23000 }, { "epoch": 0.6715424367171762, "grad_norm": 0.6421324436431138, "learning_rate": 7.299270072992701e-06, "loss": 0.5657, "step": 23001 }, { "epoch": 0.6715716329450235, "grad_norm": 0.6639542775683082, "learning_rate": 7.298621248986213e-06, "loss": 0.6417, "step": 23002 }, { "epoch": 0.6716008291728709, "grad_norm": 0.6325851913547246, "learning_rate": 7.297972424979724e-06, "loss": 0.5921, "step": 23003 }, { "epoch": 0.6716300254007183, "grad_norm": 0.6364409092234882, "learning_rate": 7.297323600973236e-06, "loss": 0.6038, "step": 23004 }, { "epoch": 0.6716592216285656, "grad_norm": 0.6678232630590993, "learning_rate": 7.296674776966748e-06, "loss": 0.6198, "step": 23005 }, { "epoch": 0.671688417856413, "grad_norm": 0.6279125215865009, "learning_rate": 7.29602595296026e-06, "loss": 0.6142, "step": 23006 }, { "epoch": 0.6717176140842603, "grad_norm": 0.646064089778763, "learning_rate": 7.295377128953772e-06, "loss": 0.6271, "step": 23007 }, { "epoch": 0.6717468103121077, "grad_norm": 0.6461360140837653, "learning_rate": 7.2947283049472835e-06, "loss": 0.6551, "step": 23008 }, { "epoch": 0.6717760065399551, "grad_norm": 0.6940106687779501, "learning_rate": 7.2940794809407955e-06, "loss": 0.6732, "step": 23009 }, { "epoch": 0.6718052027678024, "grad_norm": 0.5606410321856286, "learning_rate": 7.2934306569343076e-06, "loss": 0.4474, "step": 23010 }, { "epoch": 0.6718343989956498, "grad_norm": 0.6914330511484486, "learning_rate": 7.292781832927819e-06, "loss": 0.6183, "step": 23011 }, { "epoch": 0.6718635952234971, "grad_norm": 0.676259797388905, "learning_rate": 7.292133008921331e-06, "loss": 0.6655, "step": 23012 }, { "epoch": 0.6718927914513445, "grad_norm": 0.6958602686729405, "learning_rate": 7.291484184914842e-06, "loss": 0.7024, "step": 23013 }, { "epoch": 0.6719219876791919, "grad_norm": 0.6608897886912205, "learning_rate": 7.290835360908354e-06, "loss": 0.6513, "step": 23014 }, { "epoch": 0.6719511839070392, "grad_norm": 0.6461737206239879, "learning_rate": 7.290186536901867e-06, "loss": 0.5967, "step": 23015 }, { "epoch": 0.6719803801348866, "grad_norm": 0.6083392904426582, "learning_rate": 7.289537712895378e-06, "loss": 0.5541, "step": 23016 }, { "epoch": 0.672009576362734, "grad_norm": 0.6008317023436647, "learning_rate": 7.28888888888889e-06, "loss": 0.5368, "step": 23017 }, { "epoch": 0.6720387725905813, "grad_norm": 0.6570956145709224, "learning_rate": 7.288240064882401e-06, "loss": 0.5609, "step": 23018 }, { "epoch": 0.6720679688184287, "grad_norm": 0.6776248066452769, "learning_rate": 7.287591240875913e-06, "loss": 0.6469, "step": 23019 }, { "epoch": 0.672097165046276, "grad_norm": 0.6645719452148944, "learning_rate": 7.2869424168694245e-06, "loss": 0.6075, "step": 23020 }, { "epoch": 0.6721263612741234, "grad_norm": 0.6621622664886415, "learning_rate": 7.2862935928629365e-06, "loss": 0.6385, "step": 23021 }, { "epoch": 0.6721555575019708, "grad_norm": 0.6292962206625945, "learning_rate": 7.285644768856448e-06, "loss": 0.567, "step": 23022 }, { "epoch": 0.6721847537298181, "grad_norm": 0.6074539899779227, "learning_rate": 7.28499594484996e-06, "loss": 0.5366, "step": 23023 }, { "epoch": 0.6722139499576655, "grad_norm": 0.6266427780515539, "learning_rate": 7.284347120843471e-06, "loss": 0.5858, "step": 23024 }, { "epoch": 0.6722431461855128, "grad_norm": 0.6375161103349724, "learning_rate": 7.283698296836984e-06, "loss": 0.6219, "step": 23025 }, { "epoch": 0.6722723424133602, "grad_norm": 0.610466269830671, "learning_rate": 7.283049472830496e-06, "loss": 0.5432, "step": 23026 }, { "epoch": 0.6723015386412076, "grad_norm": 0.6320296743337307, "learning_rate": 7.282400648824007e-06, "loss": 0.5718, "step": 23027 }, { "epoch": 0.6723307348690549, "grad_norm": 0.6336745775464018, "learning_rate": 7.281751824817519e-06, "loss": 0.585, "step": 23028 }, { "epoch": 0.6723599310969023, "grad_norm": 0.6524441125361813, "learning_rate": 7.281103000811031e-06, "loss": 0.6233, "step": 23029 }, { "epoch": 0.6723891273247496, "grad_norm": 0.6346474694693467, "learning_rate": 7.280454176804542e-06, "loss": 0.6051, "step": 23030 }, { "epoch": 0.672418323552597, "grad_norm": 0.6785797188923606, "learning_rate": 7.279805352798054e-06, "loss": 0.6291, "step": 23031 }, { "epoch": 0.6724475197804444, "grad_norm": 0.5962707942638968, "learning_rate": 7.279156528791565e-06, "loss": 0.5205, "step": 23032 }, { "epoch": 0.6724767160082917, "grad_norm": 0.660330744522734, "learning_rate": 7.278507704785077e-06, "loss": 0.642, "step": 23033 }, { "epoch": 0.6725059122361391, "grad_norm": 0.5805214926961633, "learning_rate": 7.27785888077859e-06, "loss": 0.5249, "step": 23034 }, { "epoch": 0.6725351084639865, "grad_norm": 0.6605963904360859, "learning_rate": 7.2772100567721015e-06, "loss": 0.6566, "step": 23035 }, { "epoch": 0.6725643046918338, "grad_norm": 0.6420956598796695, "learning_rate": 7.2765612327656135e-06, "loss": 0.6103, "step": 23036 }, { "epoch": 0.6725935009196812, "grad_norm": 0.6439750553985514, "learning_rate": 7.275912408759125e-06, "loss": 0.5713, "step": 23037 }, { "epoch": 0.6726226971475285, "grad_norm": 0.6422760199320146, "learning_rate": 7.275263584752637e-06, "loss": 0.6022, "step": 23038 }, { "epoch": 0.6726518933753759, "grad_norm": 0.6031729296041012, "learning_rate": 7.274614760746148e-06, "loss": 0.5712, "step": 23039 }, { "epoch": 0.6726810896032233, "grad_norm": 0.646994209604644, "learning_rate": 7.27396593673966e-06, "loss": 0.5747, "step": 23040 }, { "epoch": 0.6727102858310706, "grad_norm": 0.6932842949179071, "learning_rate": 7.273317112733171e-06, "loss": 0.6626, "step": 23041 }, { "epoch": 0.672739482058918, "grad_norm": 0.6474251634550856, "learning_rate": 7.272668288726683e-06, "loss": 0.5646, "step": 23042 }, { "epoch": 0.6727686782867653, "grad_norm": 0.5780341983212515, "learning_rate": 7.272019464720194e-06, "loss": 0.501, "step": 23043 }, { "epoch": 0.6727978745146127, "grad_norm": 0.6749505287578925, "learning_rate": 7.271370640713707e-06, "loss": 0.6404, "step": 23044 }, { "epoch": 0.6728270707424601, "grad_norm": 0.682285581564408, "learning_rate": 7.270721816707219e-06, "loss": 0.6616, "step": 23045 }, { "epoch": 0.6728562669703074, "grad_norm": 0.6160703003208208, "learning_rate": 7.27007299270073e-06, "loss": 0.5207, "step": 23046 }, { "epoch": 0.6728854631981548, "grad_norm": 0.5884736768999542, "learning_rate": 7.269424168694242e-06, "loss": 0.5162, "step": 23047 }, { "epoch": 0.6729146594260021, "grad_norm": 0.6269354498783435, "learning_rate": 7.2687753446877544e-06, "loss": 0.5906, "step": 23048 }, { "epoch": 0.6729438556538495, "grad_norm": 0.649640776881519, "learning_rate": 7.268126520681266e-06, "loss": 0.6268, "step": 23049 }, { "epoch": 0.6729730518816969, "grad_norm": 0.6354212289516369, "learning_rate": 7.267477696674778e-06, "loss": 0.5765, "step": 23050 }, { "epoch": 0.6730022481095442, "grad_norm": 0.6341318119721878, "learning_rate": 7.266828872668289e-06, "loss": 0.6094, "step": 23051 }, { "epoch": 0.6730314443373916, "grad_norm": 0.6682614235018076, "learning_rate": 7.266180048661801e-06, "loss": 0.6052, "step": 23052 }, { "epoch": 0.673060640565239, "grad_norm": 0.6202993532644016, "learning_rate": 7.265531224655314e-06, "loss": 0.5753, "step": 23053 }, { "epoch": 0.6730898367930863, "grad_norm": 0.6184571666476998, "learning_rate": 7.264882400648825e-06, "loss": 0.594, "step": 23054 }, { "epoch": 0.6731190330209337, "grad_norm": 0.6820097970225868, "learning_rate": 7.264233576642337e-06, "loss": 0.6263, "step": 23055 }, { "epoch": 0.673148229248781, "grad_norm": 0.6209644378887546, "learning_rate": 7.263584752635848e-06, "loss": 0.5915, "step": 23056 }, { "epoch": 0.6731774254766284, "grad_norm": 0.6159692232486791, "learning_rate": 7.26293592862936e-06, "loss": 0.5633, "step": 23057 }, { "epoch": 0.6732066217044758, "grad_norm": 0.6379288920077135, "learning_rate": 7.262287104622871e-06, "loss": 0.601, "step": 23058 }, { "epoch": 0.6732358179323231, "grad_norm": 0.6436532554314155, "learning_rate": 7.261638280616383e-06, "loss": 0.6079, "step": 23059 }, { "epoch": 0.6732650141601705, "grad_norm": 0.6124875125652647, "learning_rate": 7.2609894566098945e-06, "loss": 0.5665, "step": 23060 }, { "epoch": 0.6732942103880178, "grad_norm": 0.5601052872465988, "learning_rate": 7.2603406326034066e-06, "loss": 0.4788, "step": 23061 }, { "epoch": 0.6733234066158652, "grad_norm": 0.6784870389842429, "learning_rate": 7.259691808596918e-06, "loss": 0.6511, "step": 23062 }, { "epoch": 0.6733526028437126, "grad_norm": 0.6329316173281324, "learning_rate": 7.259042984590431e-06, "loss": 0.5758, "step": 23063 }, { "epoch": 0.6733817990715599, "grad_norm": 0.618845996016019, "learning_rate": 7.258394160583943e-06, "loss": 0.5732, "step": 23064 }, { "epoch": 0.6734109952994073, "grad_norm": 0.6101988948455732, "learning_rate": 7.257745336577454e-06, "loss": 0.5523, "step": 23065 }, { "epoch": 0.6734401915272547, "grad_norm": 0.632893654981307, "learning_rate": 7.257096512570966e-06, "loss": 0.607, "step": 23066 }, { "epoch": 0.673469387755102, "grad_norm": 0.6012757505475713, "learning_rate": 7.256447688564478e-06, "loss": 0.5036, "step": 23067 }, { "epoch": 0.6734985839829494, "grad_norm": 0.6436208382221271, "learning_rate": 7.255798864557989e-06, "loss": 0.5954, "step": 23068 }, { "epoch": 0.6735277802107967, "grad_norm": 0.641533876787321, "learning_rate": 7.255150040551501e-06, "loss": 0.5527, "step": 23069 }, { "epoch": 0.6735569764386441, "grad_norm": 0.7173438055298825, "learning_rate": 7.254501216545012e-06, "loss": 0.7008, "step": 23070 }, { "epoch": 0.6735861726664915, "grad_norm": 0.687153730079126, "learning_rate": 7.253852392538524e-06, "loss": 0.6478, "step": 23071 }, { "epoch": 0.6736153688943388, "grad_norm": 0.6247816656821424, "learning_rate": 7.253203568532037e-06, "loss": 0.5666, "step": 23072 }, { "epoch": 0.6736445651221862, "grad_norm": 0.6524486822853837, "learning_rate": 7.252554744525548e-06, "loss": 0.5916, "step": 23073 }, { "epoch": 0.6736737613500335, "grad_norm": 0.6464802253931837, "learning_rate": 7.25190592051906e-06, "loss": 0.5677, "step": 23074 }, { "epoch": 0.6737029575778809, "grad_norm": 0.6363036735520438, "learning_rate": 7.2512570965125715e-06, "loss": 0.5855, "step": 23075 }, { "epoch": 0.6737321538057283, "grad_norm": 0.6399188476917012, "learning_rate": 7.2506082725060836e-06, "loss": 0.5786, "step": 23076 }, { "epoch": 0.6737613500335756, "grad_norm": 0.6096847425615101, "learning_rate": 7.249959448499595e-06, "loss": 0.5723, "step": 23077 }, { "epoch": 0.673790546261423, "grad_norm": 0.5808243394912496, "learning_rate": 7.249310624493107e-06, "loss": 0.5072, "step": 23078 }, { "epoch": 0.6738197424892703, "grad_norm": 0.672550073666022, "learning_rate": 7.248661800486618e-06, "loss": 0.6618, "step": 23079 }, { "epoch": 0.6738489387171177, "grad_norm": 0.6165173967302551, "learning_rate": 7.24801297648013e-06, "loss": 0.5716, "step": 23080 }, { "epoch": 0.6738781349449651, "grad_norm": 0.6535040230105622, "learning_rate": 7.247364152473643e-06, "loss": 0.6015, "step": 23081 }, { "epoch": 0.6739073311728124, "grad_norm": 0.667034601239468, "learning_rate": 7.246715328467154e-06, "loss": 0.6048, "step": 23082 }, { "epoch": 0.6739365274006598, "grad_norm": 0.6123801231477753, "learning_rate": 7.246066504460666e-06, "loss": 0.5469, "step": 23083 }, { "epoch": 0.6739657236285072, "grad_norm": 0.6592348544856983, "learning_rate": 7.245417680454177e-06, "loss": 0.5615, "step": 23084 }, { "epoch": 0.6739949198563545, "grad_norm": 0.6362756625970367, "learning_rate": 7.244768856447689e-06, "loss": 0.6039, "step": 23085 }, { "epoch": 0.6740241160842019, "grad_norm": 0.6858993947294535, "learning_rate": 7.2441200324412005e-06, "loss": 0.6174, "step": 23086 }, { "epoch": 0.6740533123120492, "grad_norm": 0.6836457540931045, "learning_rate": 7.2434712084347125e-06, "loss": 0.6554, "step": 23087 }, { "epoch": 0.6740825085398966, "grad_norm": 0.6440054199077793, "learning_rate": 7.2428223844282245e-06, "loss": 0.5791, "step": 23088 }, { "epoch": 0.674111704767744, "grad_norm": 0.6155746164534915, "learning_rate": 7.242173560421736e-06, "loss": 0.5009, "step": 23089 }, { "epoch": 0.6741409009955913, "grad_norm": 0.6429024484060898, "learning_rate": 7.241524736415248e-06, "loss": 0.6107, "step": 23090 }, { "epoch": 0.6741700972234387, "grad_norm": 0.6837496229880006, "learning_rate": 7.240875912408761e-06, "loss": 0.6418, "step": 23091 }, { "epoch": 0.674199293451286, "grad_norm": 0.6192992249495406, "learning_rate": 7.240227088402272e-06, "loss": 0.5719, "step": 23092 }, { "epoch": 0.6742284896791334, "grad_norm": 0.6845201118744815, "learning_rate": 7.239578264395784e-06, "loss": 0.6907, "step": 23093 }, { "epoch": 0.6742576859069808, "grad_norm": 0.6599385712652442, "learning_rate": 7.238929440389295e-06, "loss": 0.65, "step": 23094 }, { "epoch": 0.6742868821348281, "grad_norm": 0.6000471949940935, "learning_rate": 7.238280616382807e-06, "loss": 0.5161, "step": 23095 }, { "epoch": 0.6743160783626756, "grad_norm": 0.6490500371943312, "learning_rate": 7.237631792376318e-06, "loss": 0.6073, "step": 23096 }, { "epoch": 0.674345274590523, "grad_norm": 0.6655883601315371, "learning_rate": 7.23698296836983e-06, "loss": 0.641, "step": 23097 }, { "epoch": 0.6743744708183703, "grad_norm": 0.6315457139787546, "learning_rate": 7.236334144363341e-06, "loss": 0.5508, "step": 23098 }, { "epoch": 0.6744036670462177, "grad_norm": 0.6388565831056695, "learning_rate": 7.2356853203568534e-06, "loss": 0.5831, "step": 23099 }, { "epoch": 0.674432863274065, "grad_norm": 0.6561035531687791, "learning_rate": 7.235036496350366e-06, "loss": 0.6253, "step": 23100 }, { "epoch": 0.6744620595019124, "grad_norm": 0.622219373521345, "learning_rate": 7.2343876723438775e-06, "loss": 0.6015, "step": 23101 }, { "epoch": 0.6744912557297598, "grad_norm": 0.6726551637466935, "learning_rate": 7.2337388483373895e-06, "loss": 0.6469, "step": 23102 }, { "epoch": 0.6745204519576071, "grad_norm": 0.6858067259991162, "learning_rate": 7.233090024330901e-06, "loss": 0.645, "step": 23103 }, { "epoch": 0.6745496481854545, "grad_norm": 0.6569293112854716, "learning_rate": 7.232441200324413e-06, "loss": 0.6442, "step": 23104 }, { "epoch": 0.6745788444133018, "grad_norm": 0.6497570231435845, "learning_rate": 7.231792376317924e-06, "loss": 0.6244, "step": 23105 }, { "epoch": 0.6746080406411492, "grad_norm": 0.6222410498627787, "learning_rate": 7.231143552311436e-06, "loss": 0.5131, "step": 23106 }, { "epoch": 0.6746372368689966, "grad_norm": 0.6669329916959831, "learning_rate": 7.230494728304948e-06, "loss": 0.6532, "step": 23107 }, { "epoch": 0.6746664330968439, "grad_norm": 0.6164163288909509, "learning_rate": 7.229845904298459e-06, "loss": 0.5765, "step": 23108 }, { "epoch": 0.6746956293246913, "grad_norm": 0.678172491175662, "learning_rate": 7.229197080291971e-06, "loss": 0.6733, "step": 23109 }, { "epoch": 0.6747248255525387, "grad_norm": 0.5809593991504167, "learning_rate": 7.228548256285484e-06, "loss": 0.527, "step": 23110 }, { "epoch": 0.674754021780386, "grad_norm": 0.6360268124895692, "learning_rate": 7.227899432278995e-06, "loss": 0.5535, "step": 23111 }, { "epoch": 0.6747832180082334, "grad_norm": 0.6326072609769439, "learning_rate": 7.227250608272507e-06, "loss": 0.6042, "step": 23112 }, { "epoch": 0.6748124142360807, "grad_norm": 0.6316861423766301, "learning_rate": 7.226601784266018e-06, "loss": 0.5403, "step": 23113 }, { "epoch": 0.6748416104639281, "grad_norm": 0.618039759674634, "learning_rate": 7.2259529602595304e-06, "loss": 0.5835, "step": 23114 }, { "epoch": 0.6748708066917755, "grad_norm": 0.6539076110045691, "learning_rate": 7.225304136253042e-06, "loss": 0.6378, "step": 23115 }, { "epoch": 0.6749000029196228, "grad_norm": 0.6462673291791239, "learning_rate": 7.224655312246554e-06, "loss": 0.548, "step": 23116 }, { "epoch": 0.6749291991474702, "grad_norm": 0.6361401597902305, "learning_rate": 7.224006488240065e-06, "loss": 0.6091, "step": 23117 }, { "epoch": 0.6749583953753175, "grad_norm": 0.6327682758491121, "learning_rate": 7.223357664233577e-06, "loss": 0.5682, "step": 23118 }, { "epoch": 0.6749875916031649, "grad_norm": 0.6704528111957808, "learning_rate": 7.22270884022709e-06, "loss": 0.6411, "step": 23119 }, { "epoch": 0.6750167878310123, "grad_norm": 0.6305451227548621, "learning_rate": 7.222060016220601e-06, "loss": 0.5331, "step": 23120 }, { "epoch": 0.6750459840588596, "grad_norm": 0.628587907344662, "learning_rate": 7.221411192214113e-06, "loss": 0.5155, "step": 23121 }, { "epoch": 0.675075180286707, "grad_norm": 0.6752365895079094, "learning_rate": 7.220762368207624e-06, "loss": 0.6619, "step": 23122 }, { "epoch": 0.6751043765145543, "grad_norm": 0.6821847438198684, "learning_rate": 7.220113544201136e-06, "loss": 0.607, "step": 23123 }, { "epoch": 0.6751335727424017, "grad_norm": 0.6015004470815054, "learning_rate": 7.219464720194647e-06, "loss": 0.5504, "step": 23124 }, { "epoch": 0.6751627689702491, "grad_norm": 0.6426767241679605, "learning_rate": 7.218815896188159e-06, "loss": 0.6062, "step": 23125 }, { "epoch": 0.6751919651980964, "grad_norm": 0.6715223076774307, "learning_rate": 7.218167072181671e-06, "loss": 0.6574, "step": 23126 }, { "epoch": 0.6752211614259438, "grad_norm": 0.66995647343752, "learning_rate": 7.2175182481751826e-06, "loss": 0.6007, "step": 23127 }, { "epoch": 0.6752503576537912, "grad_norm": 0.7145741546931246, "learning_rate": 7.216869424168695e-06, "loss": 0.7144, "step": 23128 }, { "epoch": 0.6752795538816385, "grad_norm": 0.6371078493099442, "learning_rate": 7.2162206001622075e-06, "loss": 0.6096, "step": 23129 }, { "epoch": 0.6753087501094859, "grad_norm": 0.6195446407361093, "learning_rate": 7.215571776155719e-06, "loss": 0.5564, "step": 23130 }, { "epoch": 0.6753379463373332, "grad_norm": 0.6465490112956919, "learning_rate": 7.214922952149231e-06, "loss": 0.5511, "step": 23131 }, { "epoch": 0.6753671425651806, "grad_norm": 0.6466967343166544, "learning_rate": 7.214274128142742e-06, "loss": 0.5597, "step": 23132 }, { "epoch": 0.675396338793028, "grad_norm": 0.6012898764856035, "learning_rate": 7.213625304136254e-06, "loss": 0.5612, "step": 23133 }, { "epoch": 0.6754255350208753, "grad_norm": 0.6264657654209442, "learning_rate": 7.212976480129765e-06, "loss": 0.5872, "step": 23134 }, { "epoch": 0.6754547312487227, "grad_norm": 0.6032523677238933, "learning_rate": 7.212327656123277e-06, "loss": 0.511, "step": 23135 }, { "epoch": 0.67548392747657, "grad_norm": 0.6941577425000199, "learning_rate": 7.211678832116788e-06, "loss": 0.6627, "step": 23136 }, { "epoch": 0.6755131237044174, "grad_norm": 0.604457129364393, "learning_rate": 7.2110300081103e-06, "loss": 0.5437, "step": 23137 }, { "epoch": 0.6755423199322648, "grad_norm": 0.6063899227794466, "learning_rate": 7.210381184103813e-06, "loss": 0.5062, "step": 23138 }, { "epoch": 0.6755715161601121, "grad_norm": 0.6381943171721833, "learning_rate": 7.209732360097324e-06, "loss": 0.5975, "step": 23139 }, { "epoch": 0.6756007123879595, "grad_norm": 0.6410694697125975, "learning_rate": 7.209083536090836e-06, "loss": 0.6025, "step": 23140 }, { "epoch": 0.6756299086158069, "grad_norm": 0.6001952087717054, "learning_rate": 7.2084347120843476e-06, "loss": 0.5356, "step": 23141 }, { "epoch": 0.6756591048436542, "grad_norm": 0.6232702903187767, "learning_rate": 7.20778588807786e-06, "loss": 0.5958, "step": 23142 }, { "epoch": 0.6756883010715016, "grad_norm": 0.6950956373849801, "learning_rate": 7.207137064071371e-06, "loss": 0.6491, "step": 23143 }, { "epoch": 0.6757174972993489, "grad_norm": 0.6594901841834181, "learning_rate": 7.206488240064883e-06, "loss": 0.5902, "step": 23144 }, { "epoch": 0.6757466935271963, "grad_norm": 0.6544899235028345, "learning_rate": 7.205839416058395e-06, "loss": 0.6049, "step": 23145 }, { "epoch": 0.6757758897550437, "grad_norm": 0.6718662976037464, "learning_rate": 7.205190592051906e-06, "loss": 0.6536, "step": 23146 }, { "epoch": 0.675805085982891, "grad_norm": 0.6259764950536342, "learning_rate": 7.204541768045418e-06, "loss": 0.5575, "step": 23147 }, { "epoch": 0.6758342822107384, "grad_norm": 0.6469178700850228, "learning_rate": 7.20389294403893e-06, "loss": 0.6039, "step": 23148 }, { "epoch": 0.6758634784385857, "grad_norm": 0.6181080040341591, "learning_rate": 7.203244120032442e-06, "loss": 0.545, "step": 23149 }, { "epoch": 0.6758926746664331, "grad_norm": 0.6023579535704868, "learning_rate": 7.202595296025954e-06, "loss": 0.5291, "step": 23150 }, { "epoch": 0.6759218708942805, "grad_norm": 0.6122248021598674, "learning_rate": 7.201946472019465e-06, "loss": 0.5848, "step": 23151 }, { "epoch": 0.6759510671221278, "grad_norm": 0.6362987794786775, "learning_rate": 7.201297648012977e-06, "loss": 0.582, "step": 23152 }, { "epoch": 0.6759802633499752, "grad_norm": 0.6447357555909234, "learning_rate": 7.2006488240064885e-06, "loss": 0.586, "step": 23153 }, { "epoch": 0.6760094595778225, "grad_norm": 0.6981022532023098, "learning_rate": 7.2000000000000005e-06, "loss": 0.7201, "step": 23154 }, { "epoch": 0.6760386558056699, "grad_norm": 0.8556309368425413, "learning_rate": 7.199351175993512e-06, "loss": 0.6318, "step": 23155 }, { "epoch": 0.6760678520335173, "grad_norm": 0.6327162473934298, "learning_rate": 7.198702351987024e-06, "loss": 0.5466, "step": 23156 }, { "epoch": 0.6760970482613646, "grad_norm": 0.6434187219597101, "learning_rate": 7.198053527980537e-06, "loss": 0.6031, "step": 23157 }, { "epoch": 0.676126244489212, "grad_norm": 0.6478682816025465, "learning_rate": 7.197404703974048e-06, "loss": 0.6112, "step": 23158 }, { "epoch": 0.6761554407170594, "grad_norm": 0.6061415550068272, "learning_rate": 7.19675587996756e-06, "loss": 0.5489, "step": 23159 }, { "epoch": 0.6761846369449067, "grad_norm": 0.6339806010918981, "learning_rate": 7.196107055961071e-06, "loss": 0.5854, "step": 23160 }, { "epoch": 0.6762138331727541, "grad_norm": 0.6538008359823442, "learning_rate": 7.195458231954583e-06, "loss": 0.6316, "step": 23161 }, { "epoch": 0.6762430294006014, "grad_norm": 0.6621185626091168, "learning_rate": 7.194809407948094e-06, "loss": 0.6401, "step": 23162 }, { "epoch": 0.6762722256284488, "grad_norm": 0.6534214900653976, "learning_rate": 7.194160583941606e-06, "loss": 0.613, "step": 23163 }, { "epoch": 0.6763014218562962, "grad_norm": 0.6323860724423361, "learning_rate": 7.193511759935118e-06, "loss": 0.5764, "step": 23164 }, { "epoch": 0.6763306180841435, "grad_norm": 0.6474962212589638, "learning_rate": 7.1928629359286294e-06, "loss": 0.5994, "step": 23165 }, { "epoch": 0.6763598143119909, "grad_norm": 0.6870852930633902, "learning_rate": 7.192214111922142e-06, "loss": 0.6562, "step": 23166 }, { "epoch": 0.6763890105398382, "grad_norm": 0.6538410813603589, "learning_rate": 7.1915652879156535e-06, "loss": 0.6492, "step": 23167 }, { "epoch": 0.6764182067676856, "grad_norm": 0.7028574081430518, "learning_rate": 7.1909164639091655e-06, "loss": 0.6474, "step": 23168 }, { "epoch": 0.676447402995533, "grad_norm": 0.626998525093092, "learning_rate": 7.1902676399026775e-06, "loss": 0.5941, "step": 23169 }, { "epoch": 0.6764765992233803, "grad_norm": 0.5888697650536093, "learning_rate": 7.189618815896189e-06, "loss": 0.5102, "step": 23170 }, { "epoch": 0.6765057954512277, "grad_norm": 0.6868730397468237, "learning_rate": 7.188969991889701e-06, "loss": 0.6147, "step": 23171 }, { "epoch": 0.676534991679075, "grad_norm": 0.6466429335969922, "learning_rate": 7.188321167883212e-06, "loss": 0.5603, "step": 23172 }, { "epoch": 0.6765641879069224, "grad_norm": 0.6845390086636141, "learning_rate": 7.187672343876724e-06, "loss": 0.6362, "step": 23173 }, { "epoch": 0.6765933841347698, "grad_norm": 0.681875127132336, "learning_rate": 7.187023519870235e-06, "loss": 0.6078, "step": 23174 }, { "epoch": 0.6766225803626171, "grad_norm": 0.6348677124252072, "learning_rate": 7.186374695863747e-06, "loss": 0.5969, "step": 23175 }, { "epoch": 0.6766517765904645, "grad_norm": 0.7140012873508551, "learning_rate": 7.18572587185726e-06, "loss": 0.6604, "step": 23176 }, { "epoch": 0.6766809728183119, "grad_norm": 0.6454328314303615, "learning_rate": 7.185077047850771e-06, "loss": 0.5856, "step": 23177 }, { "epoch": 0.6767101690461592, "grad_norm": 0.5972976258155919, "learning_rate": 7.184428223844283e-06, "loss": 0.508, "step": 23178 }, { "epoch": 0.6767393652740066, "grad_norm": 0.6374327992041513, "learning_rate": 7.1837793998377944e-06, "loss": 0.5721, "step": 23179 }, { "epoch": 0.6767685615018539, "grad_norm": 0.6309362920566906, "learning_rate": 7.1831305758313065e-06, "loss": 0.5988, "step": 23180 }, { "epoch": 0.6767977577297013, "grad_norm": 0.6552318950640476, "learning_rate": 7.182481751824818e-06, "loss": 0.5895, "step": 23181 }, { "epoch": 0.6768269539575487, "grad_norm": 0.6540954279059863, "learning_rate": 7.18183292781833e-06, "loss": 0.5937, "step": 23182 }, { "epoch": 0.676856150185396, "grad_norm": 0.7369512084570335, "learning_rate": 7.181184103811842e-06, "loss": 0.7372, "step": 23183 }, { "epoch": 0.6768853464132434, "grad_norm": 0.6128257304788214, "learning_rate": 7.180535279805353e-06, "loss": 0.5311, "step": 23184 }, { "epoch": 0.6769145426410907, "grad_norm": 0.5895665258172705, "learning_rate": 7.179886455798866e-06, "loss": 0.5367, "step": 23185 }, { "epoch": 0.6769437388689381, "grad_norm": 0.6435090585008626, "learning_rate": 7.179237631792377e-06, "loss": 0.595, "step": 23186 }, { "epoch": 0.6769729350967855, "grad_norm": 0.6181380444287178, "learning_rate": 7.178588807785889e-06, "loss": 0.4978, "step": 23187 }, { "epoch": 0.6770021313246328, "grad_norm": 0.5997342765153246, "learning_rate": 7.177939983779401e-06, "loss": 0.5262, "step": 23188 }, { "epoch": 0.6770313275524802, "grad_norm": 0.6760627635789275, "learning_rate": 7.177291159772912e-06, "loss": 0.6634, "step": 23189 }, { "epoch": 0.6770605237803276, "grad_norm": 0.646424344199556, "learning_rate": 7.176642335766424e-06, "loss": 0.5767, "step": 23190 }, { "epoch": 0.6770897200081749, "grad_norm": 0.6002664241395772, "learning_rate": 7.175993511759935e-06, "loss": 0.5497, "step": 23191 }, { "epoch": 0.6771189162360223, "grad_norm": 0.6310323634289694, "learning_rate": 7.175344687753447e-06, "loss": 0.5955, "step": 23192 }, { "epoch": 0.6771481124638696, "grad_norm": 0.6647802805767227, "learning_rate": 7.1746958637469586e-06, "loss": 0.6843, "step": 23193 }, { "epoch": 0.677177308691717, "grad_norm": 0.6490647889823187, "learning_rate": 7.174047039740471e-06, "loss": 0.609, "step": 23194 }, { "epoch": 0.6772065049195644, "grad_norm": 0.6258452266302402, "learning_rate": 7.1733982157339835e-06, "loss": 0.6037, "step": 23195 }, { "epoch": 0.6772357011474117, "grad_norm": 0.6292086324597761, "learning_rate": 7.172749391727495e-06, "loss": 0.6036, "step": 23196 }, { "epoch": 0.6772648973752591, "grad_norm": 0.644098069614331, "learning_rate": 7.172100567721007e-06, "loss": 0.6203, "step": 23197 }, { "epoch": 0.6772940936031064, "grad_norm": 0.6848367957251172, "learning_rate": 7.171451743714518e-06, "loss": 0.6776, "step": 23198 }, { "epoch": 0.6773232898309538, "grad_norm": 0.6309728305719163, "learning_rate": 7.17080291970803e-06, "loss": 0.6143, "step": 23199 }, { "epoch": 0.6773524860588012, "grad_norm": 0.6383980969676221, "learning_rate": 7.170154095701541e-06, "loss": 0.6503, "step": 23200 }, { "epoch": 0.6773816822866485, "grad_norm": 0.6839770692665206, "learning_rate": 7.169505271695053e-06, "loss": 0.6281, "step": 23201 }, { "epoch": 0.6774108785144959, "grad_norm": 0.6857324968696978, "learning_rate": 7.168856447688565e-06, "loss": 0.6243, "step": 23202 }, { "epoch": 0.6774400747423432, "grad_norm": 0.590643451568127, "learning_rate": 7.168207623682076e-06, "loss": 0.5304, "step": 23203 }, { "epoch": 0.6774692709701906, "grad_norm": 0.6870118288980179, "learning_rate": 7.167558799675589e-06, "loss": 0.5462, "step": 23204 }, { "epoch": 0.677498467198038, "grad_norm": 0.6046975689509236, "learning_rate": 7.1669099756691e-06, "loss": 0.5543, "step": 23205 }, { "epoch": 0.6775276634258853, "grad_norm": 0.6231285465316855, "learning_rate": 7.166261151662612e-06, "loss": 0.5369, "step": 23206 }, { "epoch": 0.6775568596537327, "grad_norm": 0.6598707819694063, "learning_rate": 7.165612327656124e-06, "loss": 0.6267, "step": 23207 }, { "epoch": 0.67758605588158, "grad_norm": 0.6471039250978046, "learning_rate": 7.164963503649636e-06, "loss": 0.5956, "step": 23208 }, { "epoch": 0.6776152521094274, "grad_norm": 0.6215601135091687, "learning_rate": 7.164314679643148e-06, "loss": 0.5644, "step": 23209 }, { "epoch": 0.6776444483372748, "grad_norm": 0.6541105810426099, "learning_rate": 7.163665855636659e-06, "loss": 0.5924, "step": 23210 }, { "epoch": 0.6776736445651221, "grad_norm": 0.6986544517889844, "learning_rate": 7.163017031630171e-06, "loss": 0.6562, "step": 23211 }, { "epoch": 0.6777028407929695, "grad_norm": 0.6646174066688721, "learning_rate": 7.162368207623682e-06, "loss": 0.5623, "step": 23212 }, { "epoch": 0.6777320370208169, "grad_norm": 0.5836135138074738, "learning_rate": 7.161719383617194e-06, "loss": 0.5215, "step": 23213 }, { "epoch": 0.6777612332486642, "grad_norm": 0.6280038622581396, "learning_rate": 7.161070559610707e-06, "loss": 0.5614, "step": 23214 }, { "epoch": 0.6777904294765116, "grad_norm": 0.6056774816613189, "learning_rate": 7.160421735604218e-06, "loss": 0.524, "step": 23215 }, { "epoch": 0.677819625704359, "grad_norm": 0.6591415538663571, "learning_rate": 7.15977291159773e-06, "loss": 0.6292, "step": 23216 }, { "epoch": 0.6778488219322064, "grad_norm": 0.6126241858324861, "learning_rate": 7.159124087591241e-06, "loss": 0.5813, "step": 23217 }, { "epoch": 0.6778780181600538, "grad_norm": 0.632658023878574, "learning_rate": 7.158475263584753e-06, "loss": 0.5897, "step": 23218 }, { "epoch": 0.6779072143879011, "grad_norm": 0.6727828252697732, "learning_rate": 7.1578264395782645e-06, "loss": 0.6169, "step": 23219 }, { "epoch": 0.6779364106157485, "grad_norm": 0.6536055332326326, "learning_rate": 7.1571776155717765e-06, "loss": 0.6587, "step": 23220 }, { "epoch": 0.6779656068435959, "grad_norm": 0.592310887598804, "learning_rate": 7.156528791565288e-06, "loss": 0.5335, "step": 23221 }, { "epoch": 0.6779948030714432, "grad_norm": 0.6359424358171036, "learning_rate": 7.1558799675588e-06, "loss": 0.5862, "step": 23222 }, { "epoch": 0.6780239992992906, "grad_norm": 0.7795501113041493, "learning_rate": 7.155231143552313e-06, "loss": 0.6675, "step": 23223 }, { "epoch": 0.6780531955271379, "grad_norm": 0.623840244465001, "learning_rate": 7.154582319545824e-06, "loss": 0.5617, "step": 23224 }, { "epoch": 0.6780823917549853, "grad_norm": 0.6647763870243456, "learning_rate": 7.153933495539336e-06, "loss": 0.6209, "step": 23225 }, { "epoch": 0.6781115879828327, "grad_norm": 0.6746150598297964, "learning_rate": 7.153284671532848e-06, "loss": 0.6991, "step": 23226 }, { "epoch": 0.67814078421068, "grad_norm": 0.6594252856507571, "learning_rate": 7.152635847526359e-06, "loss": 0.6352, "step": 23227 }, { "epoch": 0.6781699804385274, "grad_norm": 0.6137649404511714, "learning_rate": 7.151987023519871e-06, "loss": 0.5738, "step": 23228 }, { "epoch": 0.6781991766663747, "grad_norm": 0.6916277001742653, "learning_rate": 7.151338199513382e-06, "loss": 0.6352, "step": 23229 }, { "epoch": 0.6782283728942221, "grad_norm": 0.6064224431358466, "learning_rate": 7.150689375506894e-06, "loss": 0.5793, "step": 23230 }, { "epoch": 0.6782575691220695, "grad_norm": 0.6408573759079278, "learning_rate": 7.1500405515004054e-06, "loss": 0.6147, "step": 23231 }, { "epoch": 0.6782867653499168, "grad_norm": 0.6357587491628679, "learning_rate": 7.149391727493918e-06, "loss": 0.6095, "step": 23232 }, { "epoch": 0.6783159615777642, "grad_norm": 0.693080353775421, "learning_rate": 7.14874290348743e-06, "loss": 0.6461, "step": 23233 }, { "epoch": 0.6783451578056116, "grad_norm": 0.6729025355650153, "learning_rate": 7.1480940794809415e-06, "loss": 0.6769, "step": 23234 }, { "epoch": 0.6783743540334589, "grad_norm": 0.6311063043459905, "learning_rate": 7.1474452554744535e-06, "loss": 0.6075, "step": 23235 }, { "epoch": 0.6784035502613063, "grad_norm": 0.5989574218260553, "learning_rate": 7.146796431467965e-06, "loss": 0.5455, "step": 23236 }, { "epoch": 0.6784327464891536, "grad_norm": 0.6470824658791792, "learning_rate": 7.146147607461477e-06, "loss": 0.6375, "step": 23237 }, { "epoch": 0.678461942717001, "grad_norm": 0.6198118521365515, "learning_rate": 7.145498783454988e-06, "loss": 0.5789, "step": 23238 }, { "epoch": 0.6784911389448484, "grad_norm": 0.6211206919705776, "learning_rate": 7.1448499594485e-06, "loss": 0.5914, "step": 23239 }, { "epoch": 0.6785203351726957, "grad_norm": 0.6680410690340206, "learning_rate": 7.144201135442011e-06, "loss": 0.6583, "step": 23240 }, { "epoch": 0.6785495314005431, "grad_norm": 0.590186866581426, "learning_rate": 7.143552311435523e-06, "loss": 0.5326, "step": 23241 }, { "epoch": 0.6785787276283904, "grad_norm": 0.6241569595820488, "learning_rate": 7.142903487429036e-06, "loss": 0.5889, "step": 23242 }, { "epoch": 0.6786079238562378, "grad_norm": 0.6118399887320516, "learning_rate": 7.142254663422547e-06, "loss": 0.5692, "step": 23243 }, { "epoch": 0.6786371200840852, "grad_norm": 0.6470723097589665, "learning_rate": 7.141605839416059e-06, "loss": 0.5788, "step": 23244 }, { "epoch": 0.6786663163119325, "grad_norm": 0.6783168328330773, "learning_rate": 7.140957015409571e-06, "loss": 0.6929, "step": 23245 }, { "epoch": 0.6786955125397799, "grad_norm": 0.6142885962909714, "learning_rate": 7.1403081914030825e-06, "loss": 0.5495, "step": 23246 }, { "epoch": 0.6787247087676272, "grad_norm": 0.589247140788537, "learning_rate": 7.1396593673965945e-06, "loss": 0.5003, "step": 23247 }, { "epoch": 0.6787539049954746, "grad_norm": 0.6640241363378786, "learning_rate": 7.139010543390106e-06, "loss": 0.5919, "step": 23248 }, { "epoch": 0.678783101223322, "grad_norm": 0.6470948638920143, "learning_rate": 7.138361719383618e-06, "loss": 0.5908, "step": 23249 }, { "epoch": 0.6788122974511693, "grad_norm": 0.6714283501654087, "learning_rate": 7.137712895377129e-06, "loss": 0.6585, "step": 23250 }, { "epoch": 0.6788414936790167, "grad_norm": 0.6709232710314554, "learning_rate": 7.137064071370642e-06, "loss": 0.6604, "step": 23251 }, { "epoch": 0.678870689906864, "grad_norm": 0.6250420247282783, "learning_rate": 7.136415247364154e-06, "loss": 0.5792, "step": 23252 }, { "epoch": 0.6788998861347114, "grad_norm": 0.650136044385148, "learning_rate": 7.135766423357665e-06, "loss": 0.6203, "step": 23253 }, { "epoch": 0.6789290823625588, "grad_norm": 0.6370250728639817, "learning_rate": 7.135117599351177e-06, "loss": 0.5809, "step": 23254 }, { "epoch": 0.6789582785904061, "grad_norm": 0.6541995296647451, "learning_rate": 7.134468775344688e-06, "loss": 0.6182, "step": 23255 }, { "epoch": 0.6789874748182535, "grad_norm": 0.6507509696064657, "learning_rate": 7.1338199513382e-06, "loss": 0.6145, "step": 23256 }, { "epoch": 0.6790166710461009, "grad_norm": 0.616424719033218, "learning_rate": 7.133171127331711e-06, "loss": 0.6145, "step": 23257 }, { "epoch": 0.6790458672739482, "grad_norm": 0.6668310544972872, "learning_rate": 7.132522303325223e-06, "loss": 0.6729, "step": 23258 }, { "epoch": 0.6790750635017956, "grad_norm": 0.6113781210015009, "learning_rate": 7.131873479318735e-06, "loss": 0.5848, "step": 23259 }, { "epoch": 0.679104259729643, "grad_norm": 0.6640260978660556, "learning_rate": 7.131224655312247e-06, "loss": 0.6206, "step": 23260 }, { "epoch": 0.6791334559574903, "grad_norm": 0.6664558888803693, "learning_rate": 7.1305758313057595e-06, "loss": 0.6335, "step": 23261 }, { "epoch": 0.6791626521853377, "grad_norm": 0.6393946030866986, "learning_rate": 7.129927007299271e-06, "loss": 0.6113, "step": 23262 }, { "epoch": 0.679191848413185, "grad_norm": 0.6242835741737652, "learning_rate": 7.129278183292783e-06, "loss": 0.5654, "step": 23263 }, { "epoch": 0.6792210446410324, "grad_norm": 0.63451700853104, "learning_rate": 7.128629359286295e-06, "loss": 0.5743, "step": 23264 }, { "epoch": 0.6792502408688798, "grad_norm": 0.601452145346431, "learning_rate": 7.127980535279806e-06, "loss": 0.5572, "step": 23265 }, { "epoch": 0.6792794370967271, "grad_norm": 0.669082430488162, "learning_rate": 7.127331711273318e-06, "loss": 0.6026, "step": 23266 }, { "epoch": 0.6793086333245745, "grad_norm": 0.6353149850452037, "learning_rate": 7.126682887266829e-06, "loss": 0.5827, "step": 23267 }, { "epoch": 0.6793378295524218, "grad_norm": 0.6993048767007295, "learning_rate": 7.126034063260341e-06, "loss": 0.708, "step": 23268 }, { "epoch": 0.6793670257802692, "grad_norm": 0.6538887499963734, "learning_rate": 7.125385239253852e-06, "loss": 0.6261, "step": 23269 }, { "epoch": 0.6793962220081166, "grad_norm": 0.6656621166662221, "learning_rate": 7.124736415247365e-06, "loss": 0.6141, "step": 23270 }, { "epoch": 0.6794254182359639, "grad_norm": 0.631948733357162, "learning_rate": 7.124087591240877e-06, "loss": 0.6256, "step": 23271 }, { "epoch": 0.6794546144638113, "grad_norm": 0.6563869692505769, "learning_rate": 7.123438767234388e-06, "loss": 0.5913, "step": 23272 }, { "epoch": 0.6794838106916586, "grad_norm": 0.6461989631690741, "learning_rate": 7.1227899432279e-06, "loss": 0.6064, "step": 23273 }, { "epoch": 0.679513006919506, "grad_norm": 0.6441822683890748, "learning_rate": 7.122141119221412e-06, "loss": 0.6445, "step": 23274 }, { "epoch": 0.6795422031473534, "grad_norm": 0.6393466887647868, "learning_rate": 7.121492295214924e-06, "loss": 0.5976, "step": 23275 }, { "epoch": 0.6795713993752007, "grad_norm": 0.6714287739288041, "learning_rate": 7.120843471208435e-06, "loss": 0.6315, "step": 23276 }, { "epoch": 0.6796005956030481, "grad_norm": 0.6268564982651115, "learning_rate": 7.120194647201947e-06, "loss": 0.5898, "step": 23277 }, { "epoch": 0.6796297918308954, "grad_norm": 0.6329001675325593, "learning_rate": 7.119545823195458e-06, "loss": 0.5843, "step": 23278 }, { "epoch": 0.6796589880587428, "grad_norm": 0.6416409777347069, "learning_rate": 7.11889699918897e-06, "loss": 0.6407, "step": 23279 }, { "epoch": 0.6796881842865902, "grad_norm": 0.6719075779172136, "learning_rate": 7.118248175182483e-06, "loss": 0.6571, "step": 23280 }, { "epoch": 0.6797173805144375, "grad_norm": 0.612764485017027, "learning_rate": 7.117599351175994e-06, "loss": 0.587, "step": 23281 }, { "epoch": 0.6797465767422849, "grad_norm": 0.6620010061139043, "learning_rate": 7.116950527169506e-06, "loss": 0.6377, "step": 23282 }, { "epoch": 0.6797757729701323, "grad_norm": 0.682818315811152, "learning_rate": 7.116301703163017e-06, "loss": 0.6012, "step": 23283 }, { "epoch": 0.6798049691979796, "grad_norm": 0.6260263339846841, "learning_rate": 7.115652879156529e-06, "loss": 0.5553, "step": 23284 }, { "epoch": 0.679834165425827, "grad_norm": 0.5561115059328425, "learning_rate": 7.115004055150041e-06, "loss": 0.4677, "step": 23285 }, { "epoch": 0.6798633616536743, "grad_norm": 0.6553458073391076, "learning_rate": 7.1143552311435525e-06, "loss": 0.5604, "step": 23286 }, { "epoch": 0.6798925578815217, "grad_norm": 0.6097396799262557, "learning_rate": 7.1137064071370646e-06, "loss": 0.553, "step": 23287 }, { "epoch": 0.6799217541093691, "grad_norm": 0.5972305143516399, "learning_rate": 7.113057583130576e-06, "loss": 0.5163, "step": 23288 }, { "epoch": 0.6799509503372164, "grad_norm": 0.6733294603192009, "learning_rate": 7.112408759124089e-06, "loss": 0.6086, "step": 23289 }, { "epoch": 0.6799801465650638, "grad_norm": 0.6498839997605604, "learning_rate": 7.111759935117601e-06, "loss": 0.5869, "step": 23290 }, { "epoch": 0.6800093427929111, "grad_norm": 0.6547651988720572, "learning_rate": 7.111111111111112e-06, "loss": 0.6412, "step": 23291 }, { "epoch": 0.6800385390207585, "grad_norm": 0.6530436879921757, "learning_rate": 7.110462287104624e-06, "loss": 0.6101, "step": 23292 }, { "epoch": 0.6800677352486059, "grad_norm": 0.6523986457472355, "learning_rate": 7.109813463098135e-06, "loss": 0.6079, "step": 23293 }, { "epoch": 0.6800969314764532, "grad_norm": 0.6664038745355935, "learning_rate": 7.109164639091647e-06, "loss": 0.6184, "step": 23294 }, { "epoch": 0.6801261277043006, "grad_norm": 0.6076410425828498, "learning_rate": 7.108515815085158e-06, "loss": 0.5564, "step": 23295 }, { "epoch": 0.680155323932148, "grad_norm": 0.673254318598457, "learning_rate": 7.10786699107867e-06, "loss": 0.6441, "step": 23296 }, { "epoch": 0.6801845201599953, "grad_norm": 0.6088301824532177, "learning_rate": 7.1072181670721814e-06, "loss": 0.5596, "step": 23297 }, { "epoch": 0.6802137163878427, "grad_norm": 0.6320852241988831, "learning_rate": 7.1065693430656935e-06, "loss": 0.5382, "step": 23298 }, { "epoch": 0.68024291261569, "grad_norm": 0.6460967698185265, "learning_rate": 7.105920519059206e-06, "loss": 0.5884, "step": 23299 }, { "epoch": 0.6802721088435374, "grad_norm": 0.7108309254134292, "learning_rate": 7.1052716950527175e-06, "loss": 0.7563, "step": 23300 }, { "epoch": 0.6803013050713848, "grad_norm": 0.6527622256924623, "learning_rate": 7.1046228710462296e-06, "loss": 0.6034, "step": 23301 }, { "epoch": 0.6803305012992321, "grad_norm": 0.6450436748763156, "learning_rate": 7.103974047039741e-06, "loss": 0.6113, "step": 23302 }, { "epoch": 0.6803596975270795, "grad_norm": 1.0394083931412936, "learning_rate": 7.103325223033253e-06, "loss": 0.6089, "step": 23303 }, { "epoch": 0.6803888937549268, "grad_norm": 0.6241986186693168, "learning_rate": 7.102676399026765e-06, "loss": 0.5804, "step": 23304 }, { "epoch": 0.6804180899827742, "grad_norm": 0.7136086426061122, "learning_rate": 7.102027575020276e-06, "loss": 0.7327, "step": 23305 }, { "epoch": 0.6804472862106216, "grad_norm": 0.6234636950397999, "learning_rate": 7.101378751013788e-06, "loss": 0.5524, "step": 23306 }, { "epoch": 0.6804764824384689, "grad_norm": 0.6805042633494136, "learning_rate": 7.100729927007299e-06, "loss": 0.6803, "step": 23307 }, { "epoch": 0.6805056786663163, "grad_norm": 0.6519936078417741, "learning_rate": 7.100081103000812e-06, "loss": 0.5895, "step": 23308 }, { "epoch": 0.6805348748941636, "grad_norm": 0.6254816150284446, "learning_rate": 7.099432278994324e-06, "loss": 0.6042, "step": 23309 }, { "epoch": 0.680564071122011, "grad_norm": 0.6260368261319431, "learning_rate": 7.098783454987835e-06, "loss": 0.6083, "step": 23310 }, { "epoch": 0.6805932673498584, "grad_norm": 0.6238761040993502, "learning_rate": 7.098134630981347e-06, "loss": 0.6091, "step": 23311 }, { "epoch": 0.6806224635777057, "grad_norm": 0.6309346150439018, "learning_rate": 7.0974858069748585e-06, "loss": 0.578, "step": 23312 }, { "epoch": 0.6806516598055531, "grad_norm": 0.5926627605265181, "learning_rate": 7.0968369829683705e-06, "loss": 0.5356, "step": 23313 }, { "epoch": 0.6806808560334004, "grad_norm": 0.5917704609758986, "learning_rate": 7.096188158961882e-06, "loss": 0.5504, "step": 23314 }, { "epoch": 0.6807100522612478, "grad_norm": 0.6104248119807323, "learning_rate": 7.095539334955394e-06, "loss": 0.5546, "step": 23315 }, { "epoch": 0.6807392484890952, "grad_norm": 0.620481458485018, "learning_rate": 7.094890510948905e-06, "loss": 0.5528, "step": 23316 }, { "epoch": 0.6807684447169425, "grad_norm": 0.6562611334173423, "learning_rate": 7.094241686942418e-06, "loss": 0.6244, "step": 23317 }, { "epoch": 0.6807976409447899, "grad_norm": 0.6561935211849133, "learning_rate": 7.09359286293593e-06, "loss": 0.6296, "step": 23318 }, { "epoch": 0.6808268371726373, "grad_norm": 0.645253720454153, "learning_rate": 7.092944038929441e-06, "loss": 0.6309, "step": 23319 }, { "epoch": 0.6808560334004846, "grad_norm": 0.6813112599903469, "learning_rate": 7.092295214922953e-06, "loss": 0.6532, "step": 23320 }, { "epoch": 0.680885229628332, "grad_norm": 0.6124885817009111, "learning_rate": 7.091646390916464e-06, "loss": 0.5731, "step": 23321 }, { "epoch": 0.6809144258561793, "grad_norm": 0.6866906864007223, "learning_rate": 7.090997566909976e-06, "loss": 0.7067, "step": 23322 }, { "epoch": 0.6809436220840267, "grad_norm": 0.5854080797983301, "learning_rate": 7.090348742903488e-06, "loss": 0.557, "step": 23323 }, { "epoch": 0.6809728183118741, "grad_norm": 0.6360848313692394, "learning_rate": 7.089699918896999e-06, "loss": 0.6145, "step": 23324 }, { "epoch": 0.6810020145397214, "grad_norm": 0.5764941030274543, "learning_rate": 7.0890510948905114e-06, "loss": 0.5236, "step": 23325 }, { "epoch": 0.6810312107675688, "grad_norm": 0.6420652857479617, "learning_rate": 7.088402270884023e-06, "loss": 0.6021, "step": 23326 }, { "epoch": 0.6810604069954161, "grad_norm": 0.6742693544920075, "learning_rate": 7.0877534468775355e-06, "loss": 0.6275, "step": 23327 }, { "epoch": 0.6810896032232635, "grad_norm": 0.6693131978707645, "learning_rate": 7.0871046228710475e-06, "loss": 0.6533, "step": 23328 }, { "epoch": 0.6811187994511109, "grad_norm": 0.6508442885965653, "learning_rate": 7.086455798864559e-06, "loss": 0.5976, "step": 23329 }, { "epoch": 0.6811479956789582, "grad_norm": 0.6444763813354573, "learning_rate": 7.085806974858071e-06, "loss": 0.6006, "step": 23330 }, { "epoch": 0.6811771919068056, "grad_norm": 0.6470367174453724, "learning_rate": 7.085158150851582e-06, "loss": 0.6259, "step": 23331 }, { "epoch": 0.681206388134653, "grad_norm": 0.6537303507387884, "learning_rate": 7.084509326845094e-06, "loss": 0.6147, "step": 23332 }, { "epoch": 0.6812355843625003, "grad_norm": 0.6747575405219556, "learning_rate": 7.083860502838605e-06, "loss": 0.6411, "step": 23333 }, { "epoch": 0.6812647805903477, "grad_norm": 0.6245309248421301, "learning_rate": 7.083211678832117e-06, "loss": 0.6056, "step": 23334 }, { "epoch": 0.681293976818195, "grad_norm": 0.6742484459515643, "learning_rate": 7.082562854825628e-06, "loss": 0.6722, "step": 23335 }, { "epoch": 0.6813231730460424, "grad_norm": 0.6350257350911483, "learning_rate": 7.081914030819141e-06, "loss": 0.5752, "step": 23336 }, { "epoch": 0.6813523692738899, "grad_norm": 0.627754031767318, "learning_rate": 7.081265206812653e-06, "loss": 0.5777, "step": 23337 }, { "epoch": 0.6813815655017372, "grad_norm": 0.6131893734758825, "learning_rate": 7.080616382806164e-06, "loss": 0.5353, "step": 23338 }, { "epoch": 0.6814107617295846, "grad_norm": 0.6291530178166498, "learning_rate": 7.0799675587996764e-06, "loss": 0.6266, "step": 23339 }, { "epoch": 0.681439957957432, "grad_norm": 0.6479162355196912, "learning_rate": 7.079318734793188e-06, "loss": 0.5826, "step": 23340 }, { "epoch": 0.6814691541852793, "grad_norm": 0.729614911403424, "learning_rate": 7.0786699107867e-06, "loss": 0.5524, "step": 23341 }, { "epoch": 0.6814983504131267, "grad_norm": 0.6754245180562002, "learning_rate": 7.078021086780212e-06, "loss": 0.5975, "step": 23342 }, { "epoch": 0.681527546640974, "grad_norm": 0.6972527243231231, "learning_rate": 7.077372262773723e-06, "loss": 0.6851, "step": 23343 }, { "epoch": 0.6815567428688214, "grad_norm": 0.6512320780029082, "learning_rate": 7.076723438767235e-06, "loss": 0.5943, "step": 23344 }, { "epoch": 0.6815859390966688, "grad_norm": 0.6379967131321956, "learning_rate": 7.076074614760746e-06, "loss": 0.667, "step": 23345 }, { "epoch": 0.6816151353245161, "grad_norm": 0.6410702057556535, "learning_rate": 7.075425790754259e-06, "loss": 0.5705, "step": 23346 }, { "epoch": 0.6816443315523635, "grad_norm": 0.6597629043654626, "learning_rate": 7.074776966747771e-06, "loss": 0.5929, "step": 23347 }, { "epoch": 0.6816735277802108, "grad_norm": 0.6076695900140456, "learning_rate": 7.074128142741282e-06, "loss": 0.5921, "step": 23348 }, { "epoch": 0.6817027240080582, "grad_norm": 0.6180121561405633, "learning_rate": 7.073479318734794e-06, "loss": 0.5742, "step": 23349 }, { "epoch": 0.6817319202359056, "grad_norm": 0.675127093852188, "learning_rate": 7.072830494728305e-06, "loss": 0.6983, "step": 23350 }, { "epoch": 0.6817611164637529, "grad_norm": 0.6734153469831733, "learning_rate": 7.072181670721817e-06, "loss": 0.7116, "step": 23351 }, { "epoch": 0.6817903126916003, "grad_norm": 0.6673586936918086, "learning_rate": 7.0715328467153285e-06, "loss": 0.7062, "step": 23352 }, { "epoch": 0.6818195089194476, "grad_norm": 0.6471229556715832, "learning_rate": 7.0708840227088406e-06, "loss": 0.5964, "step": 23353 }, { "epoch": 0.681848705147295, "grad_norm": 0.6709775908887475, "learning_rate": 7.070235198702352e-06, "loss": 0.6505, "step": 23354 }, { "epoch": 0.6818779013751424, "grad_norm": 0.6161654769803946, "learning_rate": 7.069586374695865e-06, "loss": 0.552, "step": 23355 }, { "epoch": 0.6819070976029897, "grad_norm": 0.6419207877466879, "learning_rate": 7.068937550689377e-06, "loss": 0.6331, "step": 23356 }, { "epoch": 0.6819362938308371, "grad_norm": 0.5876098819527442, "learning_rate": 7.068288726682888e-06, "loss": 0.5431, "step": 23357 }, { "epoch": 0.6819654900586845, "grad_norm": 0.6797676486019921, "learning_rate": 7.0676399026764e-06, "loss": 0.6493, "step": 23358 }, { "epoch": 0.6819946862865318, "grad_norm": 0.5856168185569686, "learning_rate": 7.066991078669911e-06, "loss": 0.5093, "step": 23359 }, { "epoch": 0.6820238825143792, "grad_norm": 0.6384614793568948, "learning_rate": 7.066342254663423e-06, "loss": 0.5528, "step": 23360 }, { "epoch": 0.6820530787422265, "grad_norm": 0.6549421785514572, "learning_rate": 7.065693430656935e-06, "loss": 0.6235, "step": 23361 }, { "epoch": 0.6820822749700739, "grad_norm": 0.6142981474512359, "learning_rate": 7.065044606650446e-06, "loss": 0.5669, "step": 23362 }, { "epoch": 0.6821114711979213, "grad_norm": 0.6363646402068621, "learning_rate": 7.064395782643958e-06, "loss": 0.5747, "step": 23363 }, { "epoch": 0.6821406674257686, "grad_norm": 0.6454329001947875, "learning_rate": 7.0637469586374695e-06, "loss": 0.6022, "step": 23364 }, { "epoch": 0.682169863653616, "grad_norm": 0.6302212141038186, "learning_rate": 7.063098134630982e-06, "loss": 0.6001, "step": 23365 }, { "epoch": 0.6821990598814633, "grad_norm": 0.6386230590614526, "learning_rate": 7.062449310624494e-06, "loss": 0.5695, "step": 23366 }, { "epoch": 0.6822282561093107, "grad_norm": 0.6051476528002675, "learning_rate": 7.0618004866180056e-06, "loss": 0.5065, "step": 23367 }, { "epoch": 0.6822574523371581, "grad_norm": 0.6610590294416522, "learning_rate": 7.061151662611518e-06, "loss": 0.6277, "step": 23368 }, { "epoch": 0.6822866485650054, "grad_norm": 0.647765485421372, "learning_rate": 7.060502838605029e-06, "loss": 0.5999, "step": 23369 }, { "epoch": 0.6823158447928528, "grad_norm": 0.6651978144021132, "learning_rate": 7.059854014598541e-06, "loss": 0.6212, "step": 23370 }, { "epoch": 0.6823450410207001, "grad_norm": 0.6076693064136984, "learning_rate": 7.059205190592052e-06, "loss": 0.5533, "step": 23371 }, { "epoch": 0.6823742372485475, "grad_norm": 0.6927562276952385, "learning_rate": 7.058556366585564e-06, "loss": 0.7277, "step": 23372 }, { "epoch": 0.6824034334763949, "grad_norm": 0.7124949486505151, "learning_rate": 7.057907542579075e-06, "loss": 0.6829, "step": 23373 }, { "epoch": 0.6824326297042422, "grad_norm": 0.639368652241448, "learning_rate": 7.057258718572588e-06, "loss": 0.5811, "step": 23374 }, { "epoch": 0.6824618259320896, "grad_norm": 0.6753063615472301, "learning_rate": 7.0566098945661e-06, "loss": 0.6746, "step": 23375 }, { "epoch": 0.682491022159937, "grad_norm": 0.6409001432943939, "learning_rate": 7.055961070559611e-06, "loss": 0.599, "step": 23376 }, { "epoch": 0.6825202183877843, "grad_norm": 0.6415100261758256, "learning_rate": 7.055312246553123e-06, "loss": 0.5828, "step": 23377 }, { "epoch": 0.6825494146156317, "grad_norm": 0.6448750992113462, "learning_rate": 7.0546634225466345e-06, "loss": 0.5851, "step": 23378 }, { "epoch": 0.682578610843479, "grad_norm": 0.6573889868826588, "learning_rate": 7.0540145985401465e-06, "loss": 0.6409, "step": 23379 }, { "epoch": 0.6826078070713264, "grad_norm": 0.6391258181322956, "learning_rate": 7.0533657745336585e-06, "loss": 0.62, "step": 23380 }, { "epoch": 0.6826370032991738, "grad_norm": 0.6608712500503323, "learning_rate": 7.05271695052717e-06, "loss": 0.6303, "step": 23381 }, { "epoch": 0.6826661995270211, "grad_norm": 0.6238682717121948, "learning_rate": 7.052068126520682e-06, "loss": 0.5316, "step": 23382 }, { "epoch": 0.6826953957548685, "grad_norm": 0.6450271486866395, "learning_rate": 7.051419302514194e-06, "loss": 0.5624, "step": 23383 }, { "epoch": 0.6827245919827158, "grad_norm": 0.5865973479194252, "learning_rate": 7.050770478507706e-06, "loss": 0.5337, "step": 23384 }, { "epoch": 0.6827537882105632, "grad_norm": 0.639973821583825, "learning_rate": 7.050121654501218e-06, "loss": 0.5627, "step": 23385 }, { "epoch": 0.6827829844384106, "grad_norm": 0.6406431244650056, "learning_rate": 7.049472830494729e-06, "loss": 0.5951, "step": 23386 }, { "epoch": 0.6828121806662579, "grad_norm": 0.6395625678963663, "learning_rate": 7.048824006488241e-06, "loss": 0.6269, "step": 23387 }, { "epoch": 0.6828413768941053, "grad_norm": 0.6202455100391263, "learning_rate": 7.048175182481752e-06, "loss": 0.6056, "step": 23388 }, { "epoch": 0.6828705731219527, "grad_norm": 0.633815275925188, "learning_rate": 7.047526358475264e-06, "loss": 0.5814, "step": 23389 }, { "epoch": 0.6828997693498, "grad_norm": 0.69444786916106, "learning_rate": 7.046877534468775e-06, "loss": 0.6887, "step": 23390 }, { "epoch": 0.6829289655776474, "grad_norm": 0.6695555915239146, "learning_rate": 7.0462287104622874e-06, "loss": 0.6362, "step": 23391 }, { "epoch": 0.6829581618054947, "grad_norm": 0.6699295621502602, "learning_rate": 7.045579886455799e-06, "loss": 0.6338, "step": 23392 }, { "epoch": 0.6829873580333421, "grad_norm": 0.6969546672533702, "learning_rate": 7.0449310624493115e-06, "loss": 0.7213, "step": 23393 }, { "epoch": 0.6830165542611895, "grad_norm": 0.6476663602233012, "learning_rate": 7.0442822384428235e-06, "loss": 0.6069, "step": 23394 }, { "epoch": 0.6830457504890368, "grad_norm": 0.5913022476777673, "learning_rate": 7.043633414436335e-06, "loss": 0.5026, "step": 23395 }, { "epoch": 0.6830749467168842, "grad_norm": 0.5855157829198663, "learning_rate": 7.042984590429847e-06, "loss": 0.5352, "step": 23396 }, { "epoch": 0.6831041429447315, "grad_norm": 0.6342354270786633, "learning_rate": 7.042335766423358e-06, "loss": 0.5795, "step": 23397 }, { "epoch": 0.6831333391725789, "grad_norm": 0.6830640019412445, "learning_rate": 7.04168694241687e-06, "loss": 0.722, "step": 23398 }, { "epoch": 0.6831625354004263, "grad_norm": 0.6272781416407216, "learning_rate": 7.041038118410382e-06, "loss": 0.5437, "step": 23399 }, { "epoch": 0.6831917316282736, "grad_norm": 0.6449532712930752, "learning_rate": 7.040389294403893e-06, "loss": 0.5799, "step": 23400 }, { "epoch": 0.683220927856121, "grad_norm": 0.6379371385558041, "learning_rate": 7.039740470397405e-06, "loss": 0.6001, "step": 23401 }, { "epoch": 0.6832501240839683, "grad_norm": 0.6858889425140181, "learning_rate": 7.039091646390917e-06, "loss": 0.6078, "step": 23402 }, { "epoch": 0.6832793203118157, "grad_norm": 0.6836239956921348, "learning_rate": 7.038442822384429e-06, "loss": 0.629, "step": 23403 }, { "epoch": 0.6833085165396631, "grad_norm": 0.6192118478584463, "learning_rate": 7.037793998377941e-06, "loss": 0.593, "step": 23404 }, { "epoch": 0.6833377127675104, "grad_norm": 0.6634097329076147, "learning_rate": 7.0371451743714524e-06, "loss": 0.6307, "step": 23405 }, { "epoch": 0.6833669089953578, "grad_norm": 0.664964215752718, "learning_rate": 7.0364963503649645e-06, "loss": 0.6606, "step": 23406 }, { "epoch": 0.6833961052232052, "grad_norm": 0.6888154165704545, "learning_rate": 7.035847526358476e-06, "loss": 0.6325, "step": 23407 }, { "epoch": 0.6834253014510525, "grad_norm": 0.6737289628513965, "learning_rate": 7.035198702351988e-06, "loss": 0.6494, "step": 23408 }, { "epoch": 0.6834544976788999, "grad_norm": 0.6482703427845309, "learning_rate": 7.034549878345499e-06, "loss": 0.5921, "step": 23409 }, { "epoch": 0.6834836939067472, "grad_norm": 0.5969657968522665, "learning_rate": 7.033901054339011e-06, "loss": 0.5482, "step": 23410 }, { "epoch": 0.6835128901345946, "grad_norm": 0.6776569372094053, "learning_rate": 7.033252230332522e-06, "loss": 0.7086, "step": 23411 }, { "epoch": 0.683542086362442, "grad_norm": 0.6512939415738122, "learning_rate": 7.032603406326035e-06, "loss": 0.605, "step": 23412 }, { "epoch": 0.6835712825902893, "grad_norm": 0.6279137086770612, "learning_rate": 7.031954582319547e-06, "loss": 0.5698, "step": 23413 }, { "epoch": 0.6836004788181367, "grad_norm": 0.665246447873302, "learning_rate": 7.031305758313058e-06, "loss": 0.632, "step": 23414 }, { "epoch": 0.683629675045984, "grad_norm": 0.681145845046463, "learning_rate": 7.03065693430657e-06, "loss": 0.6214, "step": 23415 }, { "epoch": 0.6836588712738314, "grad_norm": 0.6701660068197635, "learning_rate": 7.030008110300081e-06, "loss": 0.7123, "step": 23416 }, { "epoch": 0.6836880675016788, "grad_norm": 0.6390418682780086, "learning_rate": 7.029359286293593e-06, "loss": 0.6145, "step": 23417 }, { "epoch": 0.6837172637295261, "grad_norm": 0.6043729943593729, "learning_rate": 7.0287104622871046e-06, "loss": 0.5238, "step": 23418 }, { "epoch": 0.6837464599573735, "grad_norm": 0.6091566156471205, "learning_rate": 7.028061638280617e-06, "loss": 0.5528, "step": 23419 }, { "epoch": 0.6837756561852208, "grad_norm": 0.6766888748499591, "learning_rate": 7.027412814274129e-06, "loss": 0.632, "step": 23420 }, { "epoch": 0.6838048524130682, "grad_norm": 0.666071575655537, "learning_rate": 7.026763990267641e-06, "loss": 0.5836, "step": 23421 }, { "epoch": 0.6838340486409156, "grad_norm": 0.6500714794203999, "learning_rate": 7.026115166261153e-06, "loss": 0.6431, "step": 23422 }, { "epoch": 0.6838632448687629, "grad_norm": 0.6162202173584226, "learning_rate": 7.025466342254665e-06, "loss": 0.5443, "step": 23423 }, { "epoch": 0.6838924410966103, "grad_norm": 0.647517019962965, "learning_rate": 7.024817518248176e-06, "loss": 0.632, "step": 23424 }, { "epoch": 0.6839216373244577, "grad_norm": 0.6489823139895867, "learning_rate": 7.024168694241688e-06, "loss": 0.6303, "step": 23425 }, { "epoch": 0.683950833552305, "grad_norm": 0.660652650078922, "learning_rate": 7.023519870235199e-06, "loss": 0.6565, "step": 23426 }, { "epoch": 0.6839800297801524, "grad_norm": 0.610965344447392, "learning_rate": 7.022871046228711e-06, "loss": 0.5521, "step": 23427 }, { "epoch": 0.6840092260079997, "grad_norm": 0.6682505677121282, "learning_rate": 7.022222222222222e-06, "loss": 0.6952, "step": 23428 }, { "epoch": 0.6840384222358471, "grad_norm": 0.6775552629682672, "learning_rate": 7.021573398215734e-06, "loss": 0.6637, "step": 23429 }, { "epoch": 0.6840676184636945, "grad_norm": 0.6424797834518579, "learning_rate": 7.0209245742092455e-06, "loss": 0.5892, "step": 23430 }, { "epoch": 0.6840968146915418, "grad_norm": 0.6459383782384468, "learning_rate": 7.020275750202758e-06, "loss": 0.5842, "step": 23431 }, { "epoch": 0.6841260109193892, "grad_norm": 0.62414691036253, "learning_rate": 7.01962692619627e-06, "loss": 0.5487, "step": 23432 }, { "epoch": 0.6841552071472365, "grad_norm": 0.6429647463858787, "learning_rate": 7.0189781021897816e-06, "loss": 0.6428, "step": 23433 }, { "epoch": 0.6841844033750839, "grad_norm": 0.5803710703908704, "learning_rate": 7.018329278183294e-06, "loss": 0.5412, "step": 23434 }, { "epoch": 0.6842135996029313, "grad_norm": 0.5960067334760571, "learning_rate": 7.017680454176805e-06, "loss": 0.5522, "step": 23435 }, { "epoch": 0.6842427958307786, "grad_norm": 0.6022986546751209, "learning_rate": 7.017031630170317e-06, "loss": 0.5665, "step": 23436 }, { "epoch": 0.684271992058626, "grad_norm": 0.6403308689284057, "learning_rate": 7.016382806163828e-06, "loss": 0.5867, "step": 23437 }, { "epoch": 0.6843011882864733, "grad_norm": 0.6292569719157215, "learning_rate": 7.01573398215734e-06, "loss": 0.5593, "step": 23438 }, { "epoch": 0.6843303845143207, "grad_norm": 0.8095859026285613, "learning_rate": 7.015085158150852e-06, "loss": 0.7698, "step": 23439 }, { "epoch": 0.6843595807421681, "grad_norm": 0.6536775033440838, "learning_rate": 7.014436334144364e-06, "loss": 0.667, "step": 23440 }, { "epoch": 0.6843887769700154, "grad_norm": 0.643867241826248, "learning_rate": 7.013787510137876e-06, "loss": 0.5871, "step": 23441 }, { "epoch": 0.6844179731978628, "grad_norm": 0.6321568046278403, "learning_rate": 7.013138686131388e-06, "loss": 0.6039, "step": 23442 }, { "epoch": 0.6844471694257102, "grad_norm": 0.5899514497762753, "learning_rate": 7.012489862124899e-06, "loss": 0.5122, "step": 23443 }, { "epoch": 0.6844763656535575, "grad_norm": 0.6114280945508174, "learning_rate": 7.011841038118411e-06, "loss": 0.5571, "step": 23444 }, { "epoch": 0.6845055618814049, "grad_norm": 0.6280124451593023, "learning_rate": 7.0111922141119225e-06, "loss": 0.5743, "step": 23445 }, { "epoch": 0.6845347581092522, "grad_norm": 0.6592929943933673, "learning_rate": 7.0105433901054345e-06, "loss": 0.6117, "step": 23446 }, { "epoch": 0.6845639543370996, "grad_norm": 0.6649427916929569, "learning_rate": 7.009894566098946e-06, "loss": 0.6461, "step": 23447 }, { "epoch": 0.684593150564947, "grad_norm": 0.6765765675752646, "learning_rate": 7.009245742092458e-06, "loss": 0.6402, "step": 23448 }, { "epoch": 0.6846223467927943, "grad_norm": 0.6189784494070113, "learning_rate": 7.008596918085969e-06, "loss": 0.5553, "step": 23449 }, { "epoch": 0.6846515430206417, "grad_norm": 0.6301221102880237, "learning_rate": 7.007948094079482e-06, "loss": 0.5955, "step": 23450 }, { "epoch": 0.684680739248489, "grad_norm": 0.6220307767487064, "learning_rate": 7.007299270072994e-06, "loss": 0.6032, "step": 23451 }, { "epoch": 0.6847099354763364, "grad_norm": 0.6933612546039389, "learning_rate": 7.006650446066505e-06, "loss": 0.6413, "step": 23452 }, { "epoch": 0.6847391317041838, "grad_norm": 0.5934805275260999, "learning_rate": 7.006001622060017e-06, "loss": 0.5397, "step": 23453 }, { "epoch": 0.6847683279320311, "grad_norm": 0.6800348121645808, "learning_rate": 7.005352798053528e-06, "loss": 0.6794, "step": 23454 }, { "epoch": 0.6847975241598785, "grad_norm": 0.6100235645000858, "learning_rate": 7.00470397404704e-06, "loss": 0.5851, "step": 23455 }, { "epoch": 0.6848267203877259, "grad_norm": 0.647142016497874, "learning_rate": 7.004055150040551e-06, "loss": 0.6127, "step": 23456 }, { "epoch": 0.6848559166155732, "grad_norm": 0.655222613903383, "learning_rate": 7.0034063260340634e-06, "loss": 0.6425, "step": 23457 }, { "epoch": 0.6848851128434207, "grad_norm": 0.6220198362487313, "learning_rate": 7.0027575020275755e-06, "loss": 0.5713, "step": 23458 }, { "epoch": 0.684914309071268, "grad_norm": 0.6584110612515718, "learning_rate": 7.0021086780210875e-06, "loss": 0.6109, "step": 23459 }, { "epoch": 0.6849435052991154, "grad_norm": 0.6491585954340066, "learning_rate": 7.0014598540145995e-06, "loss": 0.6248, "step": 23460 }, { "epoch": 0.6849727015269628, "grad_norm": 0.6447529927378607, "learning_rate": 7.000811030008111e-06, "loss": 0.5618, "step": 23461 }, { "epoch": 0.6850018977548101, "grad_norm": 0.6912651774473483, "learning_rate": 7.000162206001623e-06, "loss": 0.6616, "step": 23462 }, { "epoch": 0.6850310939826575, "grad_norm": 0.65559983058584, "learning_rate": 6.999513381995135e-06, "loss": 0.616, "step": 23463 }, { "epoch": 0.6850602902105049, "grad_norm": 0.6725551805928783, "learning_rate": 6.998864557988646e-06, "loss": 0.6344, "step": 23464 }, { "epoch": 0.6850894864383522, "grad_norm": 0.6592072080679902, "learning_rate": 6.998215733982158e-06, "loss": 0.6364, "step": 23465 }, { "epoch": 0.6851186826661996, "grad_norm": 0.6321385678664057, "learning_rate": 6.997566909975669e-06, "loss": 0.5699, "step": 23466 }, { "epoch": 0.6851478788940469, "grad_norm": 0.6535294964309937, "learning_rate": 6.996918085969181e-06, "loss": 0.6443, "step": 23467 }, { "epoch": 0.6851770751218943, "grad_norm": 0.6109812275247204, "learning_rate": 6.996269261962694e-06, "loss": 0.6064, "step": 23468 }, { "epoch": 0.6852062713497417, "grad_norm": 0.6498181802140138, "learning_rate": 6.995620437956205e-06, "loss": 0.6008, "step": 23469 }, { "epoch": 0.685235467577589, "grad_norm": 0.6667683499256912, "learning_rate": 6.994971613949717e-06, "loss": 0.6218, "step": 23470 }, { "epoch": 0.6852646638054364, "grad_norm": 0.6571273086036402, "learning_rate": 6.9943227899432284e-06, "loss": 0.6484, "step": 23471 }, { "epoch": 0.6852938600332837, "grad_norm": 0.6663814505908805, "learning_rate": 6.9936739659367405e-06, "loss": 0.7122, "step": 23472 }, { "epoch": 0.6853230562611311, "grad_norm": 0.6408530416271723, "learning_rate": 6.993025141930252e-06, "loss": 0.6091, "step": 23473 }, { "epoch": 0.6853522524889785, "grad_norm": 0.6341155433743407, "learning_rate": 6.992376317923764e-06, "loss": 0.5702, "step": 23474 }, { "epoch": 0.6853814487168258, "grad_norm": 0.5904666267727198, "learning_rate": 6.991727493917275e-06, "loss": 0.5191, "step": 23475 }, { "epoch": 0.6854106449446732, "grad_norm": 0.6491646986839107, "learning_rate": 6.991078669910787e-06, "loss": 0.6048, "step": 23476 }, { "epoch": 0.6854398411725205, "grad_norm": 0.6655094541364095, "learning_rate": 6.990429845904299e-06, "loss": 0.6066, "step": 23477 }, { "epoch": 0.6854690374003679, "grad_norm": 0.677376046644468, "learning_rate": 6.989781021897811e-06, "loss": 0.6623, "step": 23478 }, { "epoch": 0.6854982336282153, "grad_norm": 0.6079349919321009, "learning_rate": 6.989132197891323e-06, "loss": 0.5563, "step": 23479 }, { "epoch": 0.6855274298560626, "grad_norm": 0.6356110717835929, "learning_rate": 6.988483373884834e-06, "loss": 0.5566, "step": 23480 }, { "epoch": 0.68555662608391, "grad_norm": 0.630720805502403, "learning_rate": 6.987834549878346e-06, "loss": 0.5506, "step": 23481 }, { "epoch": 0.6855858223117574, "grad_norm": 0.6584874751784932, "learning_rate": 6.987185725871858e-06, "loss": 0.6365, "step": 23482 }, { "epoch": 0.6856150185396047, "grad_norm": 0.6821356157183038, "learning_rate": 6.986536901865369e-06, "loss": 0.652, "step": 23483 }, { "epoch": 0.6856442147674521, "grad_norm": 0.5958735543271962, "learning_rate": 6.985888077858881e-06, "loss": 0.5599, "step": 23484 }, { "epoch": 0.6856734109952994, "grad_norm": 0.6114188821078679, "learning_rate": 6.985239253852393e-06, "loss": 0.5573, "step": 23485 }, { "epoch": 0.6857026072231468, "grad_norm": 0.6065542661135016, "learning_rate": 6.984590429845905e-06, "loss": 0.5378, "step": 23486 }, { "epoch": 0.6857318034509942, "grad_norm": 0.6235035368333264, "learning_rate": 6.9839416058394175e-06, "loss": 0.5583, "step": 23487 }, { "epoch": 0.6857609996788415, "grad_norm": 0.6824521337056311, "learning_rate": 6.983292781832929e-06, "loss": 0.6372, "step": 23488 }, { "epoch": 0.6857901959066889, "grad_norm": 0.6741977000001703, "learning_rate": 6.982643957826441e-06, "loss": 0.6157, "step": 23489 }, { "epoch": 0.6858193921345362, "grad_norm": 0.609589153476648, "learning_rate": 6.981995133819952e-06, "loss": 0.554, "step": 23490 }, { "epoch": 0.6858485883623836, "grad_norm": 0.6473205518230283, "learning_rate": 6.981346309813464e-06, "loss": 0.5864, "step": 23491 }, { "epoch": 0.685877784590231, "grad_norm": 0.6633295971542975, "learning_rate": 6.980697485806975e-06, "loss": 0.6042, "step": 23492 }, { "epoch": 0.6859069808180783, "grad_norm": 0.6333482510456278, "learning_rate": 6.980048661800487e-06, "loss": 0.6065, "step": 23493 }, { "epoch": 0.6859361770459257, "grad_norm": 0.6421866448502125, "learning_rate": 6.979399837793998e-06, "loss": 0.6253, "step": 23494 }, { "epoch": 0.685965373273773, "grad_norm": 0.6052933395719999, "learning_rate": 6.97875101378751e-06, "loss": 0.5622, "step": 23495 }, { "epoch": 0.6859945695016204, "grad_norm": 0.6608430937838511, "learning_rate": 6.978102189781022e-06, "loss": 0.5998, "step": 23496 }, { "epoch": 0.6860237657294678, "grad_norm": 0.6712415130818877, "learning_rate": 6.977453365774534e-06, "loss": 0.6226, "step": 23497 }, { "epoch": 0.6860529619573151, "grad_norm": 0.5971686696989617, "learning_rate": 6.976804541768046e-06, "loss": 0.5362, "step": 23498 }, { "epoch": 0.6860821581851625, "grad_norm": 0.7076098052343959, "learning_rate": 6.976155717761558e-06, "loss": 0.6602, "step": 23499 }, { "epoch": 0.6861113544130099, "grad_norm": 0.6487224944164338, "learning_rate": 6.97550689375507e-06, "loss": 0.5765, "step": 23500 }, { "epoch": 0.6861405506408572, "grad_norm": 1.2160233097541349, "learning_rate": 6.974858069748582e-06, "loss": 0.5765, "step": 23501 }, { "epoch": 0.6861697468687046, "grad_norm": 0.6742505743228288, "learning_rate": 6.974209245742093e-06, "loss": 0.6034, "step": 23502 }, { "epoch": 0.6861989430965519, "grad_norm": 0.5807559670465126, "learning_rate": 6.973560421735605e-06, "loss": 0.4719, "step": 23503 }, { "epoch": 0.6862281393243993, "grad_norm": 0.6298381216854962, "learning_rate": 6.972911597729116e-06, "loss": 0.5976, "step": 23504 }, { "epoch": 0.6862573355522467, "grad_norm": 0.6835486516424046, "learning_rate": 6.972262773722628e-06, "loss": 0.6685, "step": 23505 }, { "epoch": 0.686286531780094, "grad_norm": 0.6864526196610091, "learning_rate": 6.971613949716141e-06, "loss": 0.6614, "step": 23506 }, { "epoch": 0.6863157280079414, "grad_norm": 0.6766625395716571, "learning_rate": 6.970965125709652e-06, "loss": 0.631, "step": 23507 }, { "epoch": 0.6863449242357887, "grad_norm": 0.6876952080801859, "learning_rate": 6.970316301703164e-06, "loss": 0.6608, "step": 23508 }, { "epoch": 0.6863741204636361, "grad_norm": 0.6520748908030928, "learning_rate": 6.969667477696675e-06, "loss": 0.6484, "step": 23509 }, { "epoch": 0.6864033166914835, "grad_norm": 0.6319363770417956, "learning_rate": 6.969018653690187e-06, "loss": 0.5832, "step": 23510 }, { "epoch": 0.6864325129193308, "grad_norm": 0.6180513794032008, "learning_rate": 6.9683698296836985e-06, "loss": 0.5908, "step": 23511 }, { "epoch": 0.6864617091471782, "grad_norm": 0.6419162044452875, "learning_rate": 6.9677210056772105e-06, "loss": 0.6173, "step": 23512 }, { "epoch": 0.6864909053750256, "grad_norm": 0.621157200970733, "learning_rate": 6.967072181670722e-06, "loss": 0.5553, "step": 23513 }, { "epoch": 0.6865201016028729, "grad_norm": 0.6084491825344713, "learning_rate": 6.966423357664234e-06, "loss": 0.5564, "step": 23514 }, { "epoch": 0.6865492978307203, "grad_norm": 0.6484356257453946, "learning_rate": 6.965774533657746e-06, "loss": 0.5795, "step": 23515 }, { "epoch": 0.6865784940585676, "grad_norm": 0.6284217609610718, "learning_rate": 6.965125709651258e-06, "loss": 0.5392, "step": 23516 }, { "epoch": 0.686607690286415, "grad_norm": 0.6961185182091822, "learning_rate": 6.96447688564477e-06, "loss": 0.7147, "step": 23517 }, { "epoch": 0.6866368865142624, "grad_norm": 0.6168115070403161, "learning_rate": 6.963828061638281e-06, "loss": 0.6166, "step": 23518 }, { "epoch": 0.6866660827421097, "grad_norm": 0.6454452174559082, "learning_rate": 6.963179237631793e-06, "loss": 0.5883, "step": 23519 }, { "epoch": 0.6866952789699571, "grad_norm": 0.6271266005772637, "learning_rate": 6.962530413625305e-06, "loss": 0.5724, "step": 23520 }, { "epoch": 0.6867244751978044, "grad_norm": 0.7015321149831556, "learning_rate": 6.961881589618816e-06, "loss": 0.6414, "step": 23521 }, { "epoch": 0.6867536714256518, "grad_norm": 0.6225814216366958, "learning_rate": 6.961232765612328e-06, "loss": 0.5426, "step": 23522 }, { "epoch": 0.6867828676534992, "grad_norm": 0.6891860405220316, "learning_rate": 6.9605839416058395e-06, "loss": 0.6861, "step": 23523 }, { "epoch": 0.6868120638813465, "grad_norm": 0.6759525631071174, "learning_rate": 6.9599351175993515e-06, "loss": 0.6551, "step": 23524 }, { "epoch": 0.6868412601091939, "grad_norm": 0.6455548965072669, "learning_rate": 6.959286293592864e-06, "loss": 0.6415, "step": 23525 }, { "epoch": 0.6868704563370412, "grad_norm": 0.6654568474077472, "learning_rate": 6.9586374695863755e-06, "loss": 0.6387, "step": 23526 }, { "epoch": 0.6868996525648886, "grad_norm": 0.6583616403091519, "learning_rate": 6.9579886455798876e-06, "loss": 0.6094, "step": 23527 }, { "epoch": 0.686928848792736, "grad_norm": 0.6574131165201177, "learning_rate": 6.957339821573399e-06, "loss": 0.6344, "step": 23528 }, { "epoch": 0.6869580450205833, "grad_norm": 0.6271411408065705, "learning_rate": 6.956690997566911e-06, "loss": 0.5817, "step": 23529 }, { "epoch": 0.6869872412484307, "grad_norm": 0.6517178593461506, "learning_rate": 6.956042173560422e-06, "loss": 0.6111, "step": 23530 }, { "epoch": 0.687016437476278, "grad_norm": 0.6278656190739325, "learning_rate": 6.955393349553934e-06, "loss": 0.5616, "step": 23531 }, { "epoch": 0.6870456337041254, "grad_norm": 0.6129053069242323, "learning_rate": 6.954744525547445e-06, "loss": 0.559, "step": 23532 }, { "epoch": 0.6870748299319728, "grad_norm": 0.6548093607994255, "learning_rate": 6.954095701540957e-06, "loss": 0.6293, "step": 23533 }, { "epoch": 0.6871040261598201, "grad_norm": 0.6349706124918532, "learning_rate": 6.95344687753447e-06, "loss": 0.5757, "step": 23534 }, { "epoch": 0.6871332223876675, "grad_norm": 0.6168036698770919, "learning_rate": 6.952798053527981e-06, "loss": 0.5415, "step": 23535 }, { "epoch": 0.6871624186155149, "grad_norm": 0.661711114863874, "learning_rate": 6.952149229521493e-06, "loss": 0.6217, "step": 23536 }, { "epoch": 0.6871916148433622, "grad_norm": 0.6820193832258085, "learning_rate": 6.9515004055150044e-06, "loss": 0.686, "step": 23537 }, { "epoch": 0.6872208110712096, "grad_norm": 0.6222676409402289, "learning_rate": 6.9508515815085165e-06, "loss": 0.5417, "step": 23538 }, { "epoch": 0.6872500072990569, "grad_norm": 0.7063664036945355, "learning_rate": 6.9502027575020285e-06, "loss": 0.6926, "step": 23539 }, { "epoch": 0.6872792035269043, "grad_norm": 0.6077070909844747, "learning_rate": 6.94955393349554e-06, "loss": 0.5769, "step": 23540 }, { "epoch": 0.6873083997547517, "grad_norm": 0.6242071736023469, "learning_rate": 6.948905109489052e-06, "loss": 0.5835, "step": 23541 }, { "epoch": 0.687337595982599, "grad_norm": 0.6557741020998462, "learning_rate": 6.948256285482563e-06, "loss": 0.6051, "step": 23542 }, { "epoch": 0.6873667922104464, "grad_norm": 0.6363253689187394, "learning_rate": 6.947607461476075e-06, "loss": 0.6205, "step": 23543 }, { "epoch": 0.6873959884382937, "grad_norm": 0.6421449559007463, "learning_rate": 6.946958637469588e-06, "loss": 0.6184, "step": 23544 }, { "epoch": 0.6874251846661411, "grad_norm": 0.6770865326835546, "learning_rate": 6.946309813463099e-06, "loss": 0.6634, "step": 23545 }, { "epoch": 0.6874543808939885, "grad_norm": 0.6402604009288303, "learning_rate": 6.945660989456611e-06, "loss": 0.581, "step": 23546 }, { "epoch": 0.6874835771218358, "grad_norm": 0.6369300510538266, "learning_rate": 6.945012165450122e-06, "loss": 0.6054, "step": 23547 }, { "epoch": 0.6875127733496832, "grad_norm": 0.6319177447785352, "learning_rate": 6.944363341443634e-06, "loss": 0.6127, "step": 23548 }, { "epoch": 0.6875419695775306, "grad_norm": 0.6144946808612457, "learning_rate": 6.943714517437145e-06, "loss": 0.5718, "step": 23549 }, { "epoch": 0.6875711658053779, "grad_norm": 0.6382454157104187, "learning_rate": 6.943065693430657e-06, "loss": 0.5873, "step": 23550 }, { "epoch": 0.6876003620332253, "grad_norm": 0.6397906205993682, "learning_rate": 6.942416869424169e-06, "loss": 0.5914, "step": 23551 }, { "epoch": 0.6876295582610726, "grad_norm": 0.6252188374027242, "learning_rate": 6.941768045417681e-06, "loss": 0.5934, "step": 23552 }, { "epoch": 0.68765875448892, "grad_norm": 0.6816209577114914, "learning_rate": 6.9411192214111935e-06, "loss": 0.6319, "step": 23553 }, { "epoch": 0.6876879507167674, "grad_norm": 0.6512389893018783, "learning_rate": 6.940470397404705e-06, "loss": 0.5653, "step": 23554 }, { "epoch": 0.6877171469446147, "grad_norm": 0.5955931847172666, "learning_rate": 6.939821573398217e-06, "loss": 0.5597, "step": 23555 }, { "epoch": 0.6877463431724621, "grad_norm": 0.6100883922519535, "learning_rate": 6.939172749391728e-06, "loss": 0.5791, "step": 23556 }, { "epoch": 0.6877755394003094, "grad_norm": 0.6704742603711022, "learning_rate": 6.93852392538524e-06, "loss": 0.6348, "step": 23557 }, { "epoch": 0.6878047356281568, "grad_norm": 0.6811731402232607, "learning_rate": 6.937875101378752e-06, "loss": 0.6823, "step": 23558 }, { "epoch": 0.6878339318560042, "grad_norm": 0.6510643235669024, "learning_rate": 6.937226277372263e-06, "loss": 0.6161, "step": 23559 }, { "epoch": 0.6878631280838515, "grad_norm": 0.6734098520970322, "learning_rate": 6.936577453365775e-06, "loss": 0.5916, "step": 23560 }, { "epoch": 0.6878923243116989, "grad_norm": 0.6592372667438453, "learning_rate": 6.935928629359286e-06, "loss": 0.6532, "step": 23561 }, { "epoch": 0.6879215205395462, "grad_norm": 0.6659176904176181, "learning_rate": 6.935279805352798e-06, "loss": 0.6602, "step": 23562 }, { "epoch": 0.6879507167673936, "grad_norm": 0.6405328429451153, "learning_rate": 6.934630981346311e-06, "loss": 0.5479, "step": 23563 }, { "epoch": 0.687979912995241, "grad_norm": 0.6527353417632752, "learning_rate": 6.933982157339822e-06, "loss": 0.5921, "step": 23564 }, { "epoch": 0.6880091092230883, "grad_norm": 0.7615533326581102, "learning_rate": 6.9333333333333344e-06, "loss": 0.5819, "step": 23565 }, { "epoch": 0.6880383054509357, "grad_norm": 0.6368554190461887, "learning_rate": 6.932684509326846e-06, "loss": 0.5887, "step": 23566 }, { "epoch": 0.6880675016787831, "grad_norm": 0.6524106435564693, "learning_rate": 6.932035685320358e-06, "loss": 0.6075, "step": 23567 }, { "epoch": 0.6880966979066304, "grad_norm": 0.6385090983450842, "learning_rate": 6.931386861313869e-06, "loss": 0.6109, "step": 23568 }, { "epoch": 0.6881258941344778, "grad_norm": 0.6953142919731611, "learning_rate": 6.930738037307381e-06, "loss": 0.7141, "step": 23569 }, { "epoch": 0.6881550903623251, "grad_norm": 0.6572342476822242, "learning_rate": 6.930089213300892e-06, "loss": 0.6367, "step": 23570 }, { "epoch": 0.6881842865901725, "grad_norm": 0.6310864907840998, "learning_rate": 6.929440389294404e-06, "loss": 0.5635, "step": 23571 }, { "epoch": 0.6882134828180199, "grad_norm": 0.6620401866815889, "learning_rate": 6.928791565287917e-06, "loss": 0.6248, "step": 23572 }, { "epoch": 0.6882426790458672, "grad_norm": 0.62179383291801, "learning_rate": 6.928142741281428e-06, "loss": 0.6089, "step": 23573 }, { "epoch": 0.6882718752737146, "grad_norm": 0.6710175006533968, "learning_rate": 6.92749391727494e-06, "loss": 0.6065, "step": 23574 }, { "epoch": 0.688301071501562, "grad_norm": 0.694313279887741, "learning_rate": 6.926845093268451e-06, "loss": 0.6323, "step": 23575 }, { "epoch": 0.6883302677294093, "grad_norm": 0.6615283733242541, "learning_rate": 6.926196269261963e-06, "loss": 0.6213, "step": 23576 }, { "epoch": 0.6883594639572567, "grad_norm": 0.6399280369952084, "learning_rate": 6.925547445255475e-06, "loss": 0.6537, "step": 23577 }, { "epoch": 0.6883886601851041, "grad_norm": 0.6380232040147882, "learning_rate": 6.9248986212489865e-06, "loss": 0.6079, "step": 23578 }, { "epoch": 0.6884178564129515, "grad_norm": 0.671066621456783, "learning_rate": 6.924249797242499e-06, "loss": 0.5866, "step": 23579 }, { "epoch": 0.6884470526407989, "grad_norm": 0.6704628900254347, "learning_rate": 6.92360097323601e-06, "loss": 0.6373, "step": 23580 }, { "epoch": 0.6884762488686462, "grad_norm": 0.6392385666903465, "learning_rate": 6.922952149229522e-06, "loss": 0.5893, "step": 23581 }, { "epoch": 0.6885054450964936, "grad_norm": 0.6536371607829778, "learning_rate": 6.922303325223035e-06, "loss": 0.6314, "step": 23582 }, { "epoch": 0.688534641324341, "grad_norm": 0.6467235872176835, "learning_rate": 6.921654501216546e-06, "loss": 0.5951, "step": 23583 }, { "epoch": 0.6885638375521883, "grad_norm": 0.7045209034369364, "learning_rate": 6.921005677210058e-06, "loss": 0.6191, "step": 23584 }, { "epoch": 0.6885930337800357, "grad_norm": 0.6863750057521105, "learning_rate": 6.920356853203569e-06, "loss": 0.6879, "step": 23585 }, { "epoch": 0.688622230007883, "grad_norm": 0.6309601743317396, "learning_rate": 6.919708029197081e-06, "loss": 0.5378, "step": 23586 }, { "epoch": 0.6886514262357304, "grad_norm": 0.6818202376003593, "learning_rate": 6.919059205190592e-06, "loss": 0.7037, "step": 23587 }, { "epoch": 0.6886806224635778, "grad_norm": 0.6541347163380108, "learning_rate": 6.918410381184104e-06, "loss": 0.5956, "step": 23588 }, { "epoch": 0.6887098186914251, "grad_norm": 0.667203241079892, "learning_rate": 6.9177615571776155e-06, "loss": 0.6019, "step": 23589 }, { "epoch": 0.6887390149192725, "grad_norm": 0.6647394989948886, "learning_rate": 6.9171127331711275e-06, "loss": 0.5697, "step": 23590 }, { "epoch": 0.6887682111471198, "grad_norm": 0.6906049617231156, "learning_rate": 6.91646390916464e-06, "loss": 0.6698, "step": 23591 }, { "epoch": 0.6887974073749672, "grad_norm": 0.6913557305044326, "learning_rate": 6.9158150851581515e-06, "loss": 0.6659, "step": 23592 }, { "epoch": 0.6888266036028146, "grad_norm": 0.6430367009985769, "learning_rate": 6.9151662611516636e-06, "loss": 0.5691, "step": 23593 }, { "epoch": 0.6888557998306619, "grad_norm": 0.6405563144839932, "learning_rate": 6.914517437145175e-06, "loss": 0.5563, "step": 23594 }, { "epoch": 0.6888849960585093, "grad_norm": 0.6805830781932617, "learning_rate": 6.913868613138687e-06, "loss": 0.604, "step": 23595 }, { "epoch": 0.6889141922863566, "grad_norm": 0.6438709308966908, "learning_rate": 6.913219789132198e-06, "loss": 0.5748, "step": 23596 }, { "epoch": 0.688943388514204, "grad_norm": 0.5547958801654136, "learning_rate": 6.91257096512571e-06, "loss": 0.4343, "step": 23597 }, { "epoch": 0.6889725847420514, "grad_norm": 0.6279569001542681, "learning_rate": 6.911922141119222e-06, "loss": 0.5593, "step": 23598 }, { "epoch": 0.6890017809698987, "grad_norm": 0.6407848090926149, "learning_rate": 6.911273317112733e-06, "loss": 0.5939, "step": 23599 }, { "epoch": 0.6890309771977461, "grad_norm": 0.6696634058832068, "learning_rate": 6.910624493106245e-06, "loss": 0.6539, "step": 23600 }, { "epoch": 0.6890601734255934, "grad_norm": 0.6690922700272299, "learning_rate": 6.909975669099758e-06, "loss": 0.6796, "step": 23601 }, { "epoch": 0.6890893696534408, "grad_norm": 0.5515849444790116, "learning_rate": 6.909326845093269e-06, "loss": 0.4887, "step": 23602 }, { "epoch": 0.6891185658812882, "grad_norm": 0.6719179721159418, "learning_rate": 6.908678021086781e-06, "loss": 0.616, "step": 23603 }, { "epoch": 0.6891477621091355, "grad_norm": 0.6706235870575576, "learning_rate": 6.9080291970802925e-06, "loss": 0.6697, "step": 23604 }, { "epoch": 0.6891769583369829, "grad_norm": 0.6291944895997735, "learning_rate": 6.9073803730738045e-06, "loss": 0.6332, "step": 23605 }, { "epoch": 0.6892061545648303, "grad_norm": 0.6321294936140283, "learning_rate": 6.906731549067316e-06, "loss": 0.5663, "step": 23606 }, { "epoch": 0.6892353507926776, "grad_norm": 0.6496625755493304, "learning_rate": 6.906082725060828e-06, "loss": 0.6216, "step": 23607 }, { "epoch": 0.689264547020525, "grad_norm": 0.601522581882348, "learning_rate": 6.905433901054339e-06, "loss": 0.508, "step": 23608 }, { "epoch": 0.6892937432483723, "grad_norm": 0.6585482749709101, "learning_rate": 6.904785077047851e-06, "loss": 0.6052, "step": 23609 }, { "epoch": 0.6893229394762197, "grad_norm": 0.6700640042526164, "learning_rate": 6.904136253041364e-06, "loss": 0.6545, "step": 23610 }, { "epoch": 0.6893521357040671, "grad_norm": 0.5835179357782994, "learning_rate": 6.903487429034875e-06, "loss": 0.4748, "step": 23611 }, { "epoch": 0.6893813319319144, "grad_norm": 0.7255246849799045, "learning_rate": 6.902838605028387e-06, "loss": 0.5987, "step": 23612 }, { "epoch": 0.6894105281597618, "grad_norm": 0.6313620073396676, "learning_rate": 6.902189781021898e-06, "loss": 0.6007, "step": 23613 }, { "epoch": 0.6894397243876091, "grad_norm": 0.6467656940762807, "learning_rate": 6.90154095701541e-06, "loss": 0.5923, "step": 23614 }, { "epoch": 0.6894689206154565, "grad_norm": 0.6528995159511333, "learning_rate": 6.900892133008921e-06, "loss": 0.6094, "step": 23615 }, { "epoch": 0.6894981168433039, "grad_norm": 0.8285735211998517, "learning_rate": 6.900243309002433e-06, "loss": 0.5873, "step": 23616 }, { "epoch": 0.6895273130711512, "grad_norm": 0.647874048100921, "learning_rate": 6.8995944849959454e-06, "loss": 0.6178, "step": 23617 }, { "epoch": 0.6895565092989986, "grad_norm": 0.6815426437695263, "learning_rate": 6.898945660989457e-06, "loss": 0.6513, "step": 23618 }, { "epoch": 0.689585705526846, "grad_norm": 0.640331822550009, "learning_rate": 6.8982968369829695e-06, "loss": 0.5882, "step": 23619 }, { "epoch": 0.6896149017546933, "grad_norm": 0.606546834099471, "learning_rate": 6.8976480129764815e-06, "loss": 0.5008, "step": 23620 }, { "epoch": 0.6896440979825407, "grad_norm": 0.6097658806560508, "learning_rate": 6.896999188969993e-06, "loss": 0.5759, "step": 23621 }, { "epoch": 0.689673294210388, "grad_norm": 0.6821433107474684, "learning_rate": 6.896350364963505e-06, "loss": 0.6276, "step": 23622 }, { "epoch": 0.6897024904382354, "grad_norm": 0.638687438312034, "learning_rate": 6.895701540957016e-06, "loss": 0.587, "step": 23623 }, { "epoch": 0.6897316866660828, "grad_norm": 0.7230277424637995, "learning_rate": 6.895052716950528e-06, "loss": 0.6604, "step": 23624 }, { "epoch": 0.6897608828939301, "grad_norm": 0.6281127490278652, "learning_rate": 6.894403892944039e-06, "loss": 0.5819, "step": 23625 }, { "epoch": 0.6897900791217775, "grad_norm": 0.6683099973943438, "learning_rate": 6.893755068937551e-06, "loss": 0.6599, "step": 23626 }, { "epoch": 0.6898192753496248, "grad_norm": 0.652664871394363, "learning_rate": 6.893106244931062e-06, "loss": 0.658, "step": 23627 }, { "epoch": 0.6898484715774722, "grad_norm": 0.5935799879151435, "learning_rate": 6.892457420924574e-06, "loss": 0.5431, "step": 23628 }, { "epoch": 0.6898776678053196, "grad_norm": 0.6733166018803619, "learning_rate": 6.891808596918087e-06, "loss": 0.6699, "step": 23629 }, { "epoch": 0.6899068640331669, "grad_norm": 0.6790356359115352, "learning_rate": 6.891159772911598e-06, "loss": 0.648, "step": 23630 }, { "epoch": 0.6899360602610143, "grad_norm": 0.6407626976293258, "learning_rate": 6.8905109489051104e-06, "loss": 0.6261, "step": 23631 }, { "epoch": 0.6899652564888616, "grad_norm": 0.6961606033396497, "learning_rate": 6.889862124898622e-06, "loss": 0.6829, "step": 23632 }, { "epoch": 0.689994452716709, "grad_norm": 0.7054189962625979, "learning_rate": 6.889213300892134e-06, "loss": 0.7149, "step": 23633 }, { "epoch": 0.6900236489445564, "grad_norm": 0.6336458111070621, "learning_rate": 6.888564476885645e-06, "loss": 0.5983, "step": 23634 }, { "epoch": 0.6900528451724037, "grad_norm": 0.6306941498886323, "learning_rate": 6.887915652879157e-06, "loss": 0.5888, "step": 23635 }, { "epoch": 0.6900820414002511, "grad_norm": 0.6608453174035261, "learning_rate": 6.887266828872669e-06, "loss": 0.6039, "step": 23636 }, { "epoch": 0.6901112376280985, "grad_norm": 0.6700606870767309, "learning_rate": 6.88661800486618e-06, "loss": 0.627, "step": 23637 }, { "epoch": 0.6901404338559458, "grad_norm": 0.5625459919273138, "learning_rate": 6.885969180859693e-06, "loss": 0.4709, "step": 23638 }, { "epoch": 0.6901696300837932, "grad_norm": 0.6572912292458261, "learning_rate": 6.885320356853205e-06, "loss": 0.6392, "step": 23639 }, { "epoch": 0.6901988263116405, "grad_norm": 0.6681899073294946, "learning_rate": 6.884671532846716e-06, "loss": 0.6403, "step": 23640 }, { "epoch": 0.6902280225394879, "grad_norm": 0.6055526305695902, "learning_rate": 6.884022708840228e-06, "loss": 0.5359, "step": 23641 }, { "epoch": 0.6902572187673353, "grad_norm": 0.6726956440973688, "learning_rate": 6.883373884833739e-06, "loss": 0.6018, "step": 23642 }, { "epoch": 0.6902864149951826, "grad_norm": 0.6190398442756527, "learning_rate": 6.882725060827251e-06, "loss": 0.5918, "step": 23643 }, { "epoch": 0.69031561122303, "grad_norm": 0.6434365448330281, "learning_rate": 6.8820762368207626e-06, "loss": 0.6157, "step": 23644 }, { "epoch": 0.6903448074508773, "grad_norm": 0.6440490646619607, "learning_rate": 6.881427412814275e-06, "loss": 0.627, "step": 23645 }, { "epoch": 0.6903740036787247, "grad_norm": 0.6218028760718323, "learning_rate": 6.880778588807786e-06, "loss": 0.5579, "step": 23646 }, { "epoch": 0.6904031999065721, "grad_norm": 0.6356832335561916, "learning_rate": 6.880129764801298e-06, "loss": 0.6595, "step": 23647 }, { "epoch": 0.6904323961344194, "grad_norm": 0.6621725091699406, "learning_rate": 6.879480940794811e-06, "loss": 0.5945, "step": 23648 }, { "epoch": 0.6904615923622668, "grad_norm": 0.5890598637926788, "learning_rate": 6.878832116788322e-06, "loss": 0.5255, "step": 23649 }, { "epoch": 0.6904907885901141, "grad_norm": 0.7000872746347414, "learning_rate": 6.878183292781834e-06, "loss": 0.6781, "step": 23650 }, { "epoch": 0.6905199848179615, "grad_norm": 0.6822726505486184, "learning_rate": 6.877534468775345e-06, "loss": 0.5872, "step": 23651 }, { "epoch": 0.6905491810458089, "grad_norm": 0.626099098496877, "learning_rate": 6.876885644768857e-06, "loss": 0.572, "step": 23652 }, { "epoch": 0.6905783772736562, "grad_norm": 0.7019178009262087, "learning_rate": 6.876236820762368e-06, "loss": 0.6087, "step": 23653 }, { "epoch": 0.6906075735015036, "grad_norm": 0.6017931218929035, "learning_rate": 6.87558799675588e-06, "loss": 0.5323, "step": 23654 }, { "epoch": 0.690636769729351, "grad_norm": 0.6483441305578698, "learning_rate": 6.874939172749392e-06, "loss": 0.6275, "step": 23655 }, { "epoch": 0.6906659659571983, "grad_norm": 0.6496909053301012, "learning_rate": 6.8742903487429035e-06, "loss": 0.6272, "step": 23656 }, { "epoch": 0.6906951621850457, "grad_norm": 0.6223092980438782, "learning_rate": 6.873641524736416e-06, "loss": 0.5794, "step": 23657 }, { "epoch": 0.690724358412893, "grad_norm": 0.6630407925785873, "learning_rate": 6.8729927007299275e-06, "loss": 0.6079, "step": 23658 }, { "epoch": 0.6907535546407404, "grad_norm": 0.6335894027586115, "learning_rate": 6.87234387672344e-06, "loss": 0.6112, "step": 23659 }, { "epoch": 0.6907827508685878, "grad_norm": 0.6174505227530347, "learning_rate": 6.871695052716952e-06, "loss": 0.6033, "step": 23660 }, { "epoch": 0.6908119470964351, "grad_norm": 0.6582666358483804, "learning_rate": 6.871046228710463e-06, "loss": 0.5862, "step": 23661 }, { "epoch": 0.6908411433242825, "grad_norm": 0.6499510284876177, "learning_rate": 6.870397404703975e-06, "loss": 0.6035, "step": 23662 }, { "epoch": 0.6908703395521298, "grad_norm": 0.6641048658886047, "learning_rate": 6.869748580697486e-06, "loss": 0.6895, "step": 23663 }, { "epoch": 0.6908995357799772, "grad_norm": 0.6623500521949832, "learning_rate": 6.869099756690998e-06, "loss": 0.633, "step": 23664 }, { "epoch": 0.6909287320078246, "grad_norm": 0.6108859413615032, "learning_rate": 6.868450932684509e-06, "loss": 0.5372, "step": 23665 }, { "epoch": 0.6909579282356719, "grad_norm": 0.6248941730903047, "learning_rate": 6.867802108678021e-06, "loss": 0.5592, "step": 23666 }, { "epoch": 0.6909871244635193, "grad_norm": 0.6376816388680007, "learning_rate": 6.867153284671534e-06, "loss": 0.6045, "step": 23667 }, { "epoch": 0.6910163206913666, "grad_norm": 0.6484710416447873, "learning_rate": 6.866504460665045e-06, "loss": 0.5969, "step": 23668 }, { "epoch": 0.691045516919214, "grad_norm": 0.6264780044141909, "learning_rate": 6.865855636658557e-06, "loss": 0.5393, "step": 23669 }, { "epoch": 0.6910747131470614, "grad_norm": 0.6039361383655643, "learning_rate": 6.8652068126520685e-06, "loss": 0.558, "step": 23670 }, { "epoch": 0.6911039093749087, "grad_norm": 0.6364800481774746, "learning_rate": 6.8645579886455805e-06, "loss": 0.5594, "step": 23671 }, { "epoch": 0.6911331056027561, "grad_norm": 0.6569724800802675, "learning_rate": 6.863909164639092e-06, "loss": 0.604, "step": 23672 }, { "epoch": 0.6911623018306035, "grad_norm": 0.6743469077454499, "learning_rate": 6.863260340632604e-06, "loss": 0.6359, "step": 23673 }, { "epoch": 0.6911914980584508, "grad_norm": 0.5952758301625979, "learning_rate": 6.862611516626116e-06, "loss": 0.5335, "step": 23674 }, { "epoch": 0.6912206942862982, "grad_norm": 0.619069687269647, "learning_rate": 6.861962692619627e-06, "loss": 0.5806, "step": 23675 }, { "epoch": 0.6912498905141455, "grad_norm": 0.6668507919982232, "learning_rate": 6.86131386861314e-06, "loss": 0.6382, "step": 23676 }, { "epoch": 0.6912790867419929, "grad_norm": 0.6729935586689603, "learning_rate": 6.860665044606651e-06, "loss": 0.6581, "step": 23677 }, { "epoch": 0.6913082829698403, "grad_norm": 0.6404064657446794, "learning_rate": 6.860016220600163e-06, "loss": 0.6276, "step": 23678 }, { "epoch": 0.6913374791976876, "grad_norm": 0.6276026020911732, "learning_rate": 6.859367396593675e-06, "loss": 0.5937, "step": 23679 }, { "epoch": 0.691366675425535, "grad_norm": 0.6477917080520669, "learning_rate": 6.858718572587186e-06, "loss": 0.5977, "step": 23680 }, { "epoch": 0.6913958716533823, "grad_norm": 0.625529964502887, "learning_rate": 6.858069748580698e-06, "loss": 0.5876, "step": 23681 }, { "epoch": 0.6914250678812297, "grad_norm": 0.6473660908202831, "learning_rate": 6.8574209245742094e-06, "loss": 0.5871, "step": 23682 }, { "epoch": 0.6914542641090771, "grad_norm": 0.6458089971717341, "learning_rate": 6.8567721005677215e-06, "loss": 0.5926, "step": 23683 }, { "epoch": 0.6914834603369244, "grad_norm": 0.6105922052496519, "learning_rate": 6.856123276561233e-06, "loss": 0.581, "step": 23684 }, { "epoch": 0.6915126565647718, "grad_norm": 0.6801061682848027, "learning_rate": 6.8554744525547455e-06, "loss": 0.5837, "step": 23685 }, { "epoch": 0.6915418527926191, "grad_norm": 0.6106783104516494, "learning_rate": 6.8548256285482575e-06, "loss": 0.5321, "step": 23686 }, { "epoch": 0.6915710490204665, "grad_norm": 20.81991752395924, "learning_rate": 6.854176804541769e-06, "loss": 1.1792, "step": 23687 }, { "epoch": 0.6916002452483139, "grad_norm": 0.6399259901956034, "learning_rate": 6.853527980535281e-06, "loss": 0.6026, "step": 23688 }, { "epoch": 0.6916294414761612, "grad_norm": 0.6170181293295849, "learning_rate": 6.852879156528792e-06, "loss": 0.5343, "step": 23689 }, { "epoch": 0.6916586377040086, "grad_norm": 0.6715054961970832, "learning_rate": 6.852230332522304e-06, "loss": 0.5592, "step": 23690 }, { "epoch": 0.691687833931856, "grad_norm": 0.6270338907201517, "learning_rate": 6.851581508515815e-06, "loss": 0.5195, "step": 23691 }, { "epoch": 0.6917170301597033, "grad_norm": 0.6815502121891887, "learning_rate": 6.850932684509327e-06, "loss": 0.7187, "step": 23692 }, { "epoch": 0.6917462263875507, "grad_norm": 0.6496183840533776, "learning_rate": 6.850283860502839e-06, "loss": 0.6032, "step": 23693 }, { "epoch": 0.691775422615398, "grad_norm": 0.62169177928595, "learning_rate": 6.84963503649635e-06, "loss": 0.56, "step": 23694 }, { "epoch": 0.6918046188432454, "grad_norm": 0.656095475314084, "learning_rate": 6.848986212489863e-06, "loss": 0.5995, "step": 23695 }, { "epoch": 0.6918338150710928, "grad_norm": 0.6655389294652289, "learning_rate": 6.848337388483374e-06, "loss": 0.6243, "step": 23696 }, { "epoch": 0.6918630112989401, "grad_norm": 0.6353748406308577, "learning_rate": 6.8476885644768864e-06, "loss": 0.5558, "step": 23697 }, { "epoch": 0.6918922075267875, "grad_norm": 0.6173758683903061, "learning_rate": 6.8470397404703985e-06, "loss": 0.5575, "step": 23698 }, { "epoch": 0.691921403754635, "grad_norm": 0.6188251392571134, "learning_rate": 6.84639091646391e-06, "loss": 0.5635, "step": 23699 }, { "epoch": 0.6919505999824823, "grad_norm": 0.6406586131307851, "learning_rate": 6.845742092457422e-06, "loss": 0.5637, "step": 23700 }, { "epoch": 0.6919797962103297, "grad_norm": 0.6205380741842478, "learning_rate": 6.845093268450933e-06, "loss": 0.5616, "step": 23701 }, { "epoch": 0.692008992438177, "grad_norm": 0.7008262825659837, "learning_rate": 6.844444444444445e-06, "loss": 0.605, "step": 23702 }, { "epoch": 0.6920381886660244, "grad_norm": 0.6083082838383329, "learning_rate": 6.843795620437956e-06, "loss": 0.5487, "step": 23703 }, { "epoch": 0.6920673848938718, "grad_norm": 0.6985403666646491, "learning_rate": 6.843146796431469e-06, "loss": 0.6375, "step": 23704 }, { "epoch": 0.6920965811217191, "grad_norm": 0.6687873579643395, "learning_rate": 6.842497972424981e-06, "loss": 0.5838, "step": 23705 }, { "epoch": 0.6921257773495665, "grad_norm": 0.6122541804711309, "learning_rate": 6.841849148418492e-06, "loss": 0.5363, "step": 23706 }, { "epoch": 0.6921549735774138, "grad_norm": 0.660747777337776, "learning_rate": 6.841200324412004e-06, "loss": 0.5497, "step": 23707 }, { "epoch": 0.6921841698052612, "grad_norm": 0.6084611806919177, "learning_rate": 6.840551500405515e-06, "loss": 0.532, "step": 23708 }, { "epoch": 0.6922133660331086, "grad_norm": 0.5892498154078, "learning_rate": 6.839902676399027e-06, "loss": 0.4914, "step": 23709 }, { "epoch": 0.6922425622609559, "grad_norm": 0.6487156921412129, "learning_rate": 6.8392538523925386e-06, "loss": 0.5995, "step": 23710 }, { "epoch": 0.6922717584888033, "grad_norm": 0.6681826742300262, "learning_rate": 6.838605028386051e-06, "loss": 0.6356, "step": 23711 }, { "epoch": 0.6923009547166507, "grad_norm": 0.6826761263362111, "learning_rate": 6.837956204379563e-06, "loss": 0.6341, "step": 23712 }, { "epoch": 0.692330150944498, "grad_norm": 0.695959554044596, "learning_rate": 6.837307380373074e-06, "loss": 0.5845, "step": 23713 }, { "epoch": 0.6923593471723454, "grad_norm": 0.6408724417692043, "learning_rate": 6.836658556366587e-06, "loss": 0.6117, "step": 23714 }, { "epoch": 0.6923885434001927, "grad_norm": 0.6716105899405903, "learning_rate": 6.836009732360098e-06, "loss": 0.625, "step": 23715 }, { "epoch": 0.6924177396280401, "grad_norm": 0.7093193560612034, "learning_rate": 6.83536090835361e-06, "loss": 0.696, "step": 23716 }, { "epoch": 0.6924469358558875, "grad_norm": 0.6124138280201211, "learning_rate": 6.834712084347122e-06, "loss": 0.5473, "step": 23717 }, { "epoch": 0.6924761320837348, "grad_norm": 0.6718693939155258, "learning_rate": 6.834063260340633e-06, "loss": 0.6474, "step": 23718 }, { "epoch": 0.6925053283115822, "grad_norm": 0.6408697340512951, "learning_rate": 6.833414436334145e-06, "loss": 0.5935, "step": 23719 }, { "epoch": 0.6925345245394295, "grad_norm": 0.6441285725973637, "learning_rate": 6.832765612327656e-06, "loss": 0.626, "step": 23720 }, { "epoch": 0.6925637207672769, "grad_norm": 0.6080592601700353, "learning_rate": 6.832116788321168e-06, "loss": 0.5735, "step": 23721 }, { "epoch": 0.6925929169951243, "grad_norm": 0.7043813201273419, "learning_rate": 6.8314679643146795e-06, "loss": 0.6688, "step": 23722 }, { "epoch": 0.6926221132229716, "grad_norm": 0.5949054701402029, "learning_rate": 6.830819140308192e-06, "loss": 0.5244, "step": 23723 }, { "epoch": 0.692651309450819, "grad_norm": 0.6327698303936081, "learning_rate": 6.830170316301704e-06, "loss": 0.6034, "step": 23724 }, { "epoch": 0.6926805056786663, "grad_norm": 0.6900193382376506, "learning_rate": 6.829521492295216e-06, "loss": 0.6989, "step": 23725 }, { "epoch": 0.6927097019065137, "grad_norm": 0.6389469575845377, "learning_rate": 6.828872668288728e-06, "loss": 0.5998, "step": 23726 }, { "epoch": 0.6927388981343611, "grad_norm": 0.6676353166290246, "learning_rate": 6.828223844282239e-06, "loss": 0.6497, "step": 23727 }, { "epoch": 0.6927680943622084, "grad_norm": 0.6407866115638695, "learning_rate": 6.827575020275751e-06, "loss": 0.5764, "step": 23728 }, { "epoch": 0.6927972905900558, "grad_norm": 0.6980320133397196, "learning_rate": 6.826926196269262e-06, "loss": 0.6853, "step": 23729 }, { "epoch": 0.6928264868179032, "grad_norm": 0.666022381376422, "learning_rate": 6.826277372262774e-06, "loss": 0.6124, "step": 23730 }, { "epoch": 0.6928556830457505, "grad_norm": 0.586864607057208, "learning_rate": 6.825628548256285e-06, "loss": 0.5345, "step": 23731 }, { "epoch": 0.6928848792735979, "grad_norm": 0.684498296350969, "learning_rate": 6.824979724249797e-06, "loss": 0.6073, "step": 23732 }, { "epoch": 0.6929140755014452, "grad_norm": 0.6888590221303144, "learning_rate": 6.82433090024331e-06, "loss": 0.7167, "step": 23733 }, { "epoch": 0.6929432717292926, "grad_norm": 0.6344586700708578, "learning_rate": 6.823682076236821e-06, "loss": 0.5978, "step": 23734 }, { "epoch": 0.69297246795714, "grad_norm": 0.6609826439479559, "learning_rate": 6.823033252230333e-06, "loss": 0.5779, "step": 23735 }, { "epoch": 0.6930016641849873, "grad_norm": 0.6285716573723513, "learning_rate": 6.822384428223845e-06, "loss": 0.5725, "step": 23736 }, { "epoch": 0.6930308604128347, "grad_norm": 0.6789905340996742, "learning_rate": 6.8217356042173565e-06, "loss": 0.6083, "step": 23737 }, { "epoch": 0.693060056640682, "grad_norm": 0.5976765064114076, "learning_rate": 6.8210867802108685e-06, "loss": 0.5518, "step": 23738 }, { "epoch": 0.6930892528685294, "grad_norm": 0.665918332461675, "learning_rate": 6.82043795620438e-06, "loss": 0.6101, "step": 23739 }, { "epoch": 0.6931184490963768, "grad_norm": 0.6135268321851117, "learning_rate": 6.819789132197892e-06, "loss": 0.5719, "step": 23740 }, { "epoch": 0.6931476453242241, "grad_norm": 0.6101011977039117, "learning_rate": 6.819140308191403e-06, "loss": 0.5799, "step": 23741 }, { "epoch": 0.6931768415520715, "grad_norm": 0.6213516073598089, "learning_rate": 6.818491484184916e-06, "loss": 0.5553, "step": 23742 }, { "epoch": 0.6932060377799188, "grad_norm": 0.6466611604393245, "learning_rate": 6.817842660178428e-06, "loss": 0.5892, "step": 23743 }, { "epoch": 0.6932352340077662, "grad_norm": 0.6635490351526797, "learning_rate": 6.817193836171939e-06, "loss": 0.5219, "step": 23744 }, { "epoch": 0.6932644302356136, "grad_norm": 0.6623509666534335, "learning_rate": 6.816545012165451e-06, "loss": 0.6203, "step": 23745 }, { "epoch": 0.6932936264634609, "grad_norm": 0.6606155440100802, "learning_rate": 6.815896188158962e-06, "loss": 0.6674, "step": 23746 }, { "epoch": 0.6933228226913083, "grad_norm": 0.6707622779515018, "learning_rate": 6.815247364152474e-06, "loss": 0.6701, "step": 23747 }, { "epoch": 0.6933520189191557, "grad_norm": 0.6423581634522023, "learning_rate": 6.8145985401459854e-06, "loss": 0.5908, "step": 23748 }, { "epoch": 0.693381215147003, "grad_norm": 0.6911897246645154, "learning_rate": 6.8139497161394975e-06, "loss": 0.6251, "step": 23749 }, { "epoch": 0.6934104113748504, "grad_norm": 0.6520915654532614, "learning_rate": 6.813300892133009e-06, "loss": 0.6434, "step": 23750 }, { "epoch": 0.6934396076026977, "grad_norm": 0.7312767701877, "learning_rate": 6.812652068126521e-06, "loss": 0.7287, "step": 23751 }, { "epoch": 0.6934688038305451, "grad_norm": 0.6221105911313348, "learning_rate": 6.8120032441200335e-06, "loss": 0.5709, "step": 23752 }, { "epoch": 0.6934980000583925, "grad_norm": 0.7091426914792895, "learning_rate": 6.811354420113545e-06, "loss": 0.5732, "step": 23753 }, { "epoch": 0.6935271962862398, "grad_norm": 0.639929143086945, "learning_rate": 6.810705596107057e-06, "loss": 0.6092, "step": 23754 }, { "epoch": 0.6935563925140872, "grad_norm": 0.5997127969186122, "learning_rate": 6.810056772100569e-06, "loss": 0.5485, "step": 23755 }, { "epoch": 0.6935855887419345, "grad_norm": 0.6962620099543728, "learning_rate": 6.80940794809408e-06, "loss": 0.6778, "step": 23756 }, { "epoch": 0.6936147849697819, "grad_norm": 0.6381110845276166, "learning_rate": 6.808759124087592e-06, "loss": 0.6012, "step": 23757 }, { "epoch": 0.6936439811976293, "grad_norm": 0.6104111657111979, "learning_rate": 6.808110300081103e-06, "loss": 0.5061, "step": 23758 }, { "epoch": 0.6936731774254766, "grad_norm": 0.6683671810408862, "learning_rate": 6.807461476074615e-06, "loss": 0.6063, "step": 23759 }, { "epoch": 0.693702373653324, "grad_norm": 0.6793655589341457, "learning_rate": 6.806812652068126e-06, "loss": 0.6673, "step": 23760 }, { "epoch": 0.6937315698811714, "grad_norm": 0.7151955988297314, "learning_rate": 6.806163828061639e-06, "loss": 0.759, "step": 23761 }, { "epoch": 0.6937607661090187, "grad_norm": 0.6549410328286862, "learning_rate": 6.805515004055151e-06, "loss": 0.6915, "step": 23762 }, { "epoch": 0.6937899623368661, "grad_norm": 0.6831141174666705, "learning_rate": 6.8048661800486625e-06, "loss": 0.6407, "step": 23763 }, { "epoch": 0.6938191585647134, "grad_norm": 0.6355223549570755, "learning_rate": 6.8042173560421745e-06, "loss": 0.558, "step": 23764 }, { "epoch": 0.6938483547925608, "grad_norm": 0.6613114943794609, "learning_rate": 6.803568532035686e-06, "loss": 0.6324, "step": 23765 }, { "epoch": 0.6938775510204082, "grad_norm": 0.7054959371978264, "learning_rate": 6.802919708029198e-06, "loss": 0.6498, "step": 23766 }, { "epoch": 0.6939067472482555, "grad_norm": 0.6246312224734546, "learning_rate": 6.802270884022709e-06, "loss": 0.5707, "step": 23767 }, { "epoch": 0.6939359434761029, "grad_norm": 0.6254476784805434, "learning_rate": 6.801622060016221e-06, "loss": 0.5841, "step": 23768 }, { "epoch": 0.6939651397039502, "grad_norm": 0.6655405791542941, "learning_rate": 6.800973236009732e-06, "loss": 0.6386, "step": 23769 }, { "epoch": 0.6939943359317976, "grad_norm": 0.6378099969322746, "learning_rate": 6.800324412003245e-06, "loss": 0.5357, "step": 23770 }, { "epoch": 0.694023532159645, "grad_norm": 0.6617038188403418, "learning_rate": 6.799675587996757e-06, "loss": 0.6405, "step": 23771 }, { "epoch": 0.6940527283874923, "grad_norm": 0.6222832303280071, "learning_rate": 6.799026763990268e-06, "loss": 0.6309, "step": 23772 }, { "epoch": 0.6940819246153397, "grad_norm": 0.6527638949719884, "learning_rate": 6.79837793998378e-06, "loss": 0.6239, "step": 23773 }, { "epoch": 0.694111120843187, "grad_norm": 0.6250109425516988, "learning_rate": 6.797729115977292e-06, "loss": 0.5715, "step": 23774 }, { "epoch": 0.6941403170710344, "grad_norm": 0.7075789062923264, "learning_rate": 6.797080291970803e-06, "loss": 0.6344, "step": 23775 }, { "epoch": 0.6941695132988818, "grad_norm": 0.6397371765864713, "learning_rate": 6.796431467964315e-06, "loss": 0.6144, "step": 23776 }, { "epoch": 0.6941987095267291, "grad_norm": 0.6412094679047777, "learning_rate": 6.795782643957827e-06, "loss": 0.5918, "step": 23777 }, { "epoch": 0.6942279057545765, "grad_norm": 0.6021141969025748, "learning_rate": 6.795133819951339e-06, "loss": 0.5334, "step": 23778 }, { "epoch": 0.6942571019824239, "grad_norm": 0.6591998494091936, "learning_rate": 6.79448499594485e-06, "loss": 0.6201, "step": 23779 }, { "epoch": 0.6942862982102712, "grad_norm": 0.6296240293944178, "learning_rate": 6.793836171938363e-06, "loss": 0.6033, "step": 23780 }, { "epoch": 0.6943154944381186, "grad_norm": 0.6953880193069153, "learning_rate": 6.793187347931875e-06, "loss": 0.6398, "step": 23781 }, { "epoch": 0.6943446906659659, "grad_norm": 0.6092127031970807, "learning_rate": 6.792538523925386e-06, "loss": 0.5709, "step": 23782 }, { "epoch": 0.6943738868938133, "grad_norm": 0.646195500632654, "learning_rate": 6.791889699918898e-06, "loss": 0.6141, "step": 23783 }, { "epoch": 0.6944030831216607, "grad_norm": 0.6341747487255512, "learning_rate": 6.791240875912409e-06, "loss": 0.5855, "step": 23784 }, { "epoch": 0.694432279349508, "grad_norm": 0.7937999840160183, "learning_rate": 6.790592051905921e-06, "loss": 0.7115, "step": 23785 }, { "epoch": 0.6944614755773554, "grad_norm": 0.6284080579035435, "learning_rate": 6.789943227899432e-06, "loss": 0.588, "step": 23786 }, { "epoch": 0.6944906718052027, "grad_norm": 0.6668606987599068, "learning_rate": 6.789294403892944e-06, "loss": 0.6033, "step": 23787 }, { "epoch": 0.6945198680330501, "grad_norm": 0.6449549233950846, "learning_rate": 6.7886455798864555e-06, "loss": 0.6099, "step": 23788 }, { "epoch": 0.6945490642608975, "grad_norm": 0.678805304735552, "learning_rate": 6.787996755879968e-06, "loss": 0.6906, "step": 23789 }, { "epoch": 0.6945782604887448, "grad_norm": 0.6891764782420147, "learning_rate": 6.78734793187348e-06, "loss": 0.5896, "step": 23790 }, { "epoch": 0.6946074567165922, "grad_norm": 0.6153552609312161, "learning_rate": 6.786699107866992e-06, "loss": 0.5639, "step": 23791 }, { "epoch": 0.6946366529444395, "grad_norm": 0.6495748468530493, "learning_rate": 6.786050283860504e-06, "loss": 0.6222, "step": 23792 }, { "epoch": 0.6946658491722869, "grad_norm": 0.6329160926588268, "learning_rate": 6.785401459854015e-06, "loss": 0.6229, "step": 23793 }, { "epoch": 0.6946950454001343, "grad_norm": 0.599342188172154, "learning_rate": 6.784752635847527e-06, "loss": 0.532, "step": 23794 }, { "epoch": 0.6947242416279816, "grad_norm": 0.627220477616941, "learning_rate": 6.784103811841039e-06, "loss": 0.5059, "step": 23795 }, { "epoch": 0.694753437855829, "grad_norm": 0.6048567376267295, "learning_rate": 6.78345498783455e-06, "loss": 0.5385, "step": 23796 }, { "epoch": 0.6947826340836764, "grad_norm": 0.6660237151496904, "learning_rate": 6.782806163828062e-06, "loss": 0.6563, "step": 23797 }, { "epoch": 0.6948118303115237, "grad_norm": 0.6335036709443029, "learning_rate": 6.782157339821573e-06, "loss": 0.554, "step": 23798 }, { "epoch": 0.6948410265393711, "grad_norm": 0.6333989438178974, "learning_rate": 6.781508515815086e-06, "loss": 0.6205, "step": 23799 }, { "epoch": 0.6948702227672184, "grad_norm": 0.5930184515425084, "learning_rate": 6.780859691808598e-06, "loss": 0.5036, "step": 23800 }, { "epoch": 0.6948994189950658, "grad_norm": 0.60320343415928, "learning_rate": 6.780210867802109e-06, "loss": 0.5373, "step": 23801 }, { "epoch": 0.6949286152229132, "grad_norm": 0.6702086000113152, "learning_rate": 6.779562043795621e-06, "loss": 0.6779, "step": 23802 }, { "epoch": 0.6949578114507605, "grad_norm": 0.6850478584568659, "learning_rate": 6.7789132197891325e-06, "loss": 0.6152, "step": 23803 }, { "epoch": 0.6949870076786079, "grad_norm": 0.5972172921053894, "learning_rate": 6.7782643957826446e-06, "loss": 0.5402, "step": 23804 }, { "epoch": 0.6950162039064552, "grad_norm": 0.5930189586184932, "learning_rate": 6.777615571776156e-06, "loss": 0.498, "step": 23805 }, { "epoch": 0.6950454001343026, "grad_norm": 0.6787171213479619, "learning_rate": 6.776966747769668e-06, "loss": 0.6655, "step": 23806 }, { "epoch": 0.69507459636215, "grad_norm": 0.6439267220994548, "learning_rate": 6.776317923763179e-06, "loss": 0.6167, "step": 23807 }, { "epoch": 0.6951037925899973, "grad_norm": 0.6423097465815516, "learning_rate": 6.775669099756692e-06, "loss": 0.6081, "step": 23808 }, { "epoch": 0.6951329888178447, "grad_norm": 0.7128350276569196, "learning_rate": 6.775020275750204e-06, "loss": 0.6931, "step": 23809 }, { "epoch": 0.695162185045692, "grad_norm": 0.6498646135206518, "learning_rate": 6.774371451743715e-06, "loss": 0.5918, "step": 23810 }, { "epoch": 0.6951913812735394, "grad_norm": 0.707394905841378, "learning_rate": 6.773722627737227e-06, "loss": 0.657, "step": 23811 }, { "epoch": 0.6952205775013868, "grad_norm": 0.5567585398999787, "learning_rate": 6.773073803730738e-06, "loss": 0.4502, "step": 23812 }, { "epoch": 0.6952497737292341, "grad_norm": 0.6482424434560938, "learning_rate": 6.77242497972425e-06, "loss": 0.5941, "step": 23813 }, { "epoch": 0.6952789699570815, "grad_norm": 0.673814468277948, "learning_rate": 6.771776155717762e-06, "loss": 0.6608, "step": 23814 }, { "epoch": 0.6953081661849289, "grad_norm": 0.660234620497304, "learning_rate": 6.7711273317112735e-06, "loss": 0.6688, "step": 23815 }, { "epoch": 0.6953373624127762, "grad_norm": 0.6351117607350577, "learning_rate": 6.7704785077047855e-06, "loss": 0.5929, "step": 23816 }, { "epoch": 0.6953665586406236, "grad_norm": 0.6855075637247194, "learning_rate": 6.769829683698297e-06, "loss": 0.7176, "step": 23817 }, { "epoch": 0.6953957548684709, "grad_norm": 0.7019967952254915, "learning_rate": 6.7691808596918095e-06, "loss": 0.6627, "step": 23818 }, { "epoch": 0.6954249510963184, "grad_norm": 0.6697375129563895, "learning_rate": 6.7685320356853216e-06, "loss": 0.5856, "step": 23819 }, { "epoch": 0.6954541473241658, "grad_norm": 0.6675232668595851, "learning_rate": 6.767883211678833e-06, "loss": 0.6336, "step": 23820 }, { "epoch": 0.6954833435520131, "grad_norm": 0.6450993182245267, "learning_rate": 6.767234387672345e-06, "loss": 0.5622, "step": 23821 }, { "epoch": 0.6955125397798605, "grad_norm": 0.6072258792265541, "learning_rate": 6.766585563665856e-06, "loss": 0.5441, "step": 23822 }, { "epoch": 0.6955417360077079, "grad_norm": 0.6525601611977206, "learning_rate": 6.765936739659368e-06, "loss": 0.6021, "step": 23823 }, { "epoch": 0.6955709322355552, "grad_norm": 0.707216572405314, "learning_rate": 6.765287915652879e-06, "loss": 0.6777, "step": 23824 }, { "epoch": 0.6956001284634026, "grad_norm": 0.672505373433396, "learning_rate": 6.764639091646391e-06, "loss": 0.6111, "step": 23825 }, { "epoch": 0.6956293246912499, "grad_norm": 0.6313944528581643, "learning_rate": 6.763990267639902e-06, "loss": 0.5821, "step": 23826 }, { "epoch": 0.6956585209190973, "grad_norm": 0.6460776744565183, "learning_rate": 6.763341443633415e-06, "loss": 0.6117, "step": 23827 }, { "epoch": 0.6956877171469447, "grad_norm": 0.6671336955572923, "learning_rate": 6.762692619626927e-06, "loss": 0.594, "step": 23828 }, { "epoch": 0.695716913374792, "grad_norm": 0.6304706412483331, "learning_rate": 6.7620437956204385e-06, "loss": 0.5852, "step": 23829 }, { "epoch": 0.6957461096026394, "grad_norm": 0.6475818320757624, "learning_rate": 6.7613949716139505e-06, "loss": 0.5564, "step": 23830 }, { "epoch": 0.6957753058304867, "grad_norm": 0.7089752962884248, "learning_rate": 6.760746147607462e-06, "loss": 0.6726, "step": 23831 }, { "epoch": 0.6958045020583341, "grad_norm": 0.6343151489421518, "learning_rate": 6.760097323600974e-06, "loss": 0.5623, "step": 23832 }, { "epoch": 0.6958336982861815, "grad_norm": 0.6532396976196142, "learning_rate": 6.759448499594486e-06, "loss": 0.6109, "step": 23833 }, { "epoch": 0.6958628945140288, "grad_norm": 0.5448029970244261, "learning_rate": 6.758799675587997e-06, "loss": 0.4558, "step": 23834 }, { "epoch": 0.6958920907418762, "grad_norm": 0.661335028322183, "learning_rate": 6.758150851581509e-06, "loss": 0.662, "step": 23835 }, { "epoch": 0.6959212869697236, "grad_norm": 0.6568857459674373, "learning_rate": 6.75750202757502e-06, "loss": 0.6079, "step": 23836 }, { "epoch": 0.6959504831975709, "grad_norm": 0.6037625834956759, "learning_rate": 6.756853203568533e-06, "loss": 0.5576, "step": 23837 }, { "epoch": 0.6959796794254183, "grad_norm": 0.6315302895525485, "learning_rate": 6.756204379562045e-06, "loss": 0.5696, "step": 23838 }, { "epoch": 0.6960088756532656, "grad_norm": 0.6393058755153882, "learning_rate": 6.755555555555556e-06, "loss": 0.6071, "step": 23839 }, { "epoch": 0.696038071881113, "grad_norm": 0.6652069123318506, "learning_rate": 6.754906731549068e-06, "loss": 0.6323, "step": 23840 }, { "epoch": 0.6960672681089604, "grad_norm": 0.6004446475296292, "learning_rate": 6.754257907542579e-06, "loss": 0.5841, "step": 23841 }, { "epoch": 0.6960964643368077, "grad_norm": 0.6529633928060081, "learning_rate": 6.7536090835360914e-06, "loss": 0.6029, "step": 23842 }, { "epoch": 0.6961256605646551, "grad_norm": 0.6534776589516037, "learning_rate": 6.752960259529603e-06, "loss": 0.6123, "step": 23843 }, { "epoch": 0.6961548567925024, "grad_norm": 0.6611283216704356, "learning_rate": 6.752311435523115e-06, "loss": 0.6435, "step": 23844 }, { "epoch": 0.6961840530203498, "grad_norm": 0.5889092789181641, "learning_rate": 6.751662611516626e-06, "loss": 0.5359, "step": 23845 }, { "epoch": 0.6962132492481972, "grad_norm": 0.6013847748407273, "learning_rate": 6.751013787510139e-06, "loss": 0.5356, "step": 23846 }, { "epoch": 0.6962424454760445, "grad_norm": 0.7060815707264084, "learning_rate": 6.750364963503651e-06, "loss": 0.6899, "step": 23847 }, { "epoch": 0.6962716417038919, "grad_norm": 0.6050815634647336, "learning_rate": 6.749716139497162e-06, "loss": 0.5857, "step": 23848 }, { "epoch": 0.6963008379317392, "grad_norm": 0.6203317918007316, "learning_rate": 6.749067315490674e-06, "loss": 0.5455, "step": 23849 }, { "epoch": 0.6963300341595866, "grad_norm": 0.6741683515487725, "learning_rate": 6.748418491484185e-06, "loss": 0.6275, "step": 23850 }, { "epoch": 0.696359230387434, "grad_norm": 0.7681174551839615, "learning_rate": 6.747769667477697e-06, "loss": 0.5373, "step": 23851 }, { "epoch": 0.6963884266152813, "grad_norm": 0.6853494506933124, "learning_rate": 6.747120843471209e-06, "loss": 0.6339, "step": 23852 }, { "epoch": 0.6964176228431287, "grad_norm": 0.6179724810181721, "learning_rate": 6.74647201946472e-06, "loss": 0.5704, "step": 23853 }, { "epoch": 0.696446819070976, "grad_norm": 0.5924939784279833, "learning_rate": 6.745823195458232e-06, "loss": 0.5658, "step": 23854 }, { "epoch": 0.6964760152988234, "grad_norm": 0.6169128319617031, "learning_rate": 6.745174371451744e-06, "loss": 0.581, "step": 23855 }, { "epoch": 0.6965052115266708, "grad_norm": 0.6892394767121335, "learning_rate": 6.744525547445256e-06, "loss": 0.7163, "step": 23856 }, { "epoch": 0.6965344077545181, "grad_norm": 0.5949067630144358, "learning_rate": 6.7438767234387684e-06, "loss": 0.5618, "step": 23857 }, { "epoch": 0.6965636039823655, "grad_norm": 0.5920633288347725, "learning_rate": 6.74322789943228e-06, "loss": 0.548, "step": 23858 }, { "epoch": 0.6965928002102129, "grad_norm": 0.6707377817004527, "learning_rate": 6.742579075425792e-06, "loss": 0.6622, "step": 23859 }, { "epoch": 0.6966219964380602, "grad_norm": 0.6651060944276104, "learning_rate": 6.741930251419303e-06, "loss": 0.626, "step": 23860 }, { "epoch": 0.6966511926659076, "grad_norm": 0.6607520469867041, "learning_rate": 6.741281427412815e-06, "loss": 0.6314, "step": 23861 }, { "epoch": 0.6966803888937549, "grad_norm": 0.5802286127794707, "learning_rate": 6.740632603406326e-06, "loss": 0.5166, "step": 23862 }, { "epoch": 0.6967095851216023, "grad_norm": 0.6388298454361849, "learning_rate": 6.739983779399838e-06, "loss": 0.6006, "step": 23863 }, { "epoch": 0.6967387813494497, "grad_norm": 0.6611033541815078, "learning_rate": 6.739334955393349e-06, "loss": 0.6132, "step": 23864 }, { "epoch": 0.696767977577297, "grad_norm": 0.6876406981901285, "learning_rate": 6.738686131386862e-06, "loss": 0.6483, "step": 23865 }, { "epoch": 0.6967971738051444, "grad_norm": 0.6544501951773962, "learning_rate": 6.738037307380374e-06, "loss": 0.59, "step": 23866 }, { "epoch": 0.6968263700329917, "grad_norm": 0.6037796738549471, "learning_rate": 6.737388483373885e-06, "loss": 0.5395, "step": 23867 }, { "epoch": 0.6968555662608391, "grad_norm": 0.6613948829078766, "learning_rate": 6.736739659367397e-06, "loss": 0.6482, "step": 23868 }, { "epoch": 0.6968847624886865, "grad_norm": 0.630470130998796, "learning_rate": 6.7360908353609085e-06, "loss": 0.559, "step": 23869 }, { "epoch": 0.6969139587165338, "grad_norm": 0.7066491120506678, "learning_rate": 6.7354420113544206e-06, "loss": 0.5903, "step": 23870 }, { "epoch": 0.6969431549443812, "grad_norm": 0.6731309630091952, "learning_rate": 6.734793187347933e-06, "loss": 0.6877, "step": 23871 }, { "epoch": 0.6969723511722286, "grad_norm": 0.6491608359137047, "learning_rate": 6.734144363341444e-06, "loss": 0.5989, "step": 23872 }, { "epoch": 0.6970015474000759, "grad_norm": 0.6297211388924194, "learning_rate": 6.733495539334956e-06, "loss": 0.5927, "step": 23873 }, { "epoch": 0.6970307436279233, "grad_norm": 0.634712304251494, "learning_rate": 6.732846715328468e-06, "loss": 0.5842, "step": 23874 }, { "epoch": 0.6970599398557706, "grad_norm": 0.6489328180377805, "learning_rate": 6.73219789132198e-06, "loss": 0.609, "step": 23875 }, { "epoch": 0.697089136083618, "grad_norm": 0.6117144035960196, "learning_rate": 6.731549067315492e-06, "loss": 0.5668, "step": 23876 }, { "epoch": 0.6971183323114654, "grad_norm": 0.6157317900420216, "learning_rate": 6.730900243309003e-06, "loss": 0.5371, "step": 23877 }, { "epoch": 0.6971475285393127, "grad_norm": 0.6486919807474899, "learning_rate": 6.730251419302515e-06, "loss": 0.5713, "step": 23878 }, { "epoch": 0.6971767247671601, "grad_norm": 0.5829436390286117, "learning_rate": 6.729602595296026e-06, "loss": 0.5349, "step": 23879 }, { "epoch": 0.6972059209950074, "grad_norm": 0.6633849516730539, "learning_rate": 6.728953771289538e-06, "loss": 0.6556, "step": 23880 }, { "epoch": 0.6972351172228548, "grad_norm": 0.613037453359749, "learning_rate": 6.7283049472830495e-06, "loss": 0.5653, "step": 23881 }, { "epoch": 0.6972643134507022, "grad_norm": 0.5980336368528256, "learning_rate": 6.7276561232765615e-06, "loss": 0.4987, "step": 23882 }, { "epoch": 0.6972935096785495, "grad_norm": 0.6234022562780814, "learning_rate": 6.727007299270073e-06, "loss": 0.5928, "step": 23883 }, { "epoch": 0.6973227059063969, "grad_norm": 0.5960703557587662, "learning_rate": 6.7263584752635856e-06, "loss": 0.5006, "step": 23884 }, { "epoch": 0.6973519021342443, "grad_norm": 0.6447309404221337, "learning_rate": 6.725709651257098e-06, "loss": 0.5594, "step": 23885 }, { "epoch": 0.6973810983620916, "grad_norm": 0.5602495823069905, "learning_rate": 6.725060827250609e-06, "loss": 0.4969, "step": 23886 }, { "epoch": 0.697410294589939, "grad_norm": 0.6123044662591299, "learning_rate": 6.724412003244121e-06, "loss": 0.5419, "step": 23887 }, { "epoch": 0.6974394908177863, "grad_norm": 0.6340058074941424, "learning_rate": 6.723763179237632e-06, "loss": 0.5895, "step": 23888 }, { "epoch": 0.6974686870456337, "grad_norm": 0.6554664651447211, "learning_rate": 6.723114355231144e-06, "loss": 0.602, "step": 23889 }, { "epoch": 0.6974978832734811, "grad_norm": 0.621928996525913, "learning_rate": 6.722465531224656e-06, "loss": 0.5767, "step": 23890 }, { "epoch": 0.6975270795013284, "grad_norm": 0.6223647607131689, "learning_rate": 6.721816707218167e-06, "loss": 0.5803, "step": 23891 }, { "epoch": 0.6975562757291758, "grad_norm": 0.649479960500075, "learning_rate": 6.721167883211679e-06, "loss": 0.6118, "step": 23892 }, { "epoch": 0.6975854719570231, "grad_norm": 0.677336792521023, "learning_rate": 6.720519059205191e-06, "loss": 0.6455, "step": 23893 }, { "epoch": 0.6976146681848705, "grad_norm": 0.6221831090387593, "learning_rate": 6.719870235198703e-06, "loss": 0.5832, "step": 23894 }, { "epoch": 0.6976438644127179, "grad_norm": 0.6583801340564293, "learning_rate": 6.719221411192215e-06, "loss": 0.5896, "step": 23895 }, { "epoch": 0.6976730606405652, "grad_norm": 0.6275758765249945, "learning_rate": 6.7185725871857265e-06, "loss": 0.6012, "step": 23896 }, { "epoch": 0.6977022568684126, "grad_norm": 0.6411321382197219, "learning_rate": 6.7179237631792385e-06, "loss": 0.6238, "step": 23897 }, { "epoch": 0.69773145309626, "grad_norm": 0.6221625157416159, "learning_rate": 6.71727493917275e-06, "loss": 0.5863, "step": 23898 }, { "epoch": 0.6977606493241073, "grad_norm": 0.6577885460411351, "learning_rate": 6.716626115166262e-06, "loss": 0.6458, "step": 23899 }, { "epoch": 0.6977898455519547, "grad_norm": 0.6642383202387999, "learning_rate": 6.715977291159773e-06, "loss": 0.6002, "step": 23900 }, { "epoch": 0.697819041779802, "grad_norm": 0.6763746994323854, "learning_rate": 6.715328467153285e-06, "loss": 0.6466, "step": 23901 }, { "epoch": 0.6978482380076494, "grad_norm": 0.666104696097874, "learning_rate": 6.714679643146796e-06, "loss": 0.6095, "step": 23902 }, { "epoch": 0.6978774342354968, "grad_norm": 0.6975579409571275, "learning_rate": 6.714030819140309e-06, "loss": 0.6646, "step": 23903 }, { "epoch": 0.6979066304633441, "grad_norm": 0.687659173636951, "learning_rate": 6.713381995133821e-06, "loss": 0.6363, "step": 23904 }, { "epoch": 0.6979358266911915, "grad_norm": 0.6747496333700762, "learning_rate": 6.712733171127332e-06, "loss": 0.6238, "step": 23905 }, { "epoch": 0.6979650229190388, "grad_norm": 0.5968520197384459, "learning_rate": 6.712084347120844e-06, "loss": 0.4924, "step": 23906 }, { "epoch": 0.6979942191468862, "grad_norm": 0.6461757309164051, "learning_rate": 6.711435523114355e-06, "loss": 0.5841, "step": 23907 }, { "epoch": 0.6980234153747336, "grad_norm": 0.6551761203711897, "learning_rate": 6.7107866991078674e-06, "loss": 0.6409, "step": 23908 }, { "epoch": 0.6980526116025809, "grad_norm": 0.6714857705386992, "learning_rate": 6.7101378751013795e-06, "loss": 0.6293, "step": 23909 }, { "epoch": 0.6980818078304283, "grad_norm": 0.5879355179276579, "learning_rate": 6.709489051094891e-06, "loss": 0.5188, "step": 23910 }, { "epoch": 0.6981110040582756, "grad_norm": 0.6272735798505887, "learning_rate": 6.708840227088403e-06, "loss": 0.5828, "step": 23911 }, { "epoch": 0.698140200286123, "grad_norm": 0.6872463040151988, "learning_rate": 6.708191403081915e-06, "loss": 0.6664, "step": 23912 }, { "epoch": 0.6981693965139704, "grad_norm": 0.6194659640371748, "learning_rate": 6.707542579075427e-06, "loss": 0.5445, "step": 23913 }, { "epoch": 0.6981985927418177, "grad_norm": 0.6296428953826194, "learning_rate": 6.706893755068939e-06, "loss": 0.5451, "step": 23914 }, { "epoch": 0.6982277889696651, "grad_norm": 0.6593319422554575, "learning_rate": 6.70624493106245e-06, "loss": 0.6002, "step": 23915 }, { "epoch": 0.6982569851975124, "grad_norm": 0.6443920932720597, "learning_rate": 6.705596107055962e-06, "loss": 0.5724, "step": 23916 }, { "epoch": 0.6982861814253598, "grad_norm": 0.651101003291672, "learning_rate": 6.704947283049473e-06, "loss": 0.5777, "step": 23917 }, { "epoch": 0.6983153776532072, "grad_norm": 0.6729433893016846, "learning_rate": 6.704298459042985e-06, "loss": 0.6139, "step": 23918 }, { "epoch": 0.6983445738810545, "grad_norm": 0.6401138398419619, "learning_rate": 6.703649635036496e-06, "loss": 0.6322, "step": 23919 }, { "epoch": 0.6983737701089019, "grad_norm": 0.5998171064512059, "learning_rate": 6.703000811030008e-06, "loss": 0.5669, "step": 23920 }, { "epoch": 0.6984029663367493, "grad_norm": 0.6953828788799036, "learning_rate": 6.702351987023521e-06, "loss": 0.6856, "step": 23921 }, { "epoch": 0.6984321625645966, "grad_norm": 0.6008957983933501, "learning_rate": 6.7017031630170324e-06, "loss": 0.5034, "step": 23922 }, { "epoch": 0.698461358792444, "grad_norm": 0.6829437978733445, "learning_rate": 6.7010543390105445e-06, "loss": 0.6087, "step": 23923 }, { "epoch": 0.6984905550202913, "grad_norm": 0.6184944972613505, "learning_rate": 6.700405515004056e-06, "loss": 0.5913, "step": 23924 }, { "epoch": 0.6985197512481387, "grad_norm": 0.6296575914210075, "learning_rate": 6.699756690997568e-06, "loss": 0.6096, "step": 23925 }, { "epoch": 0.6985489474759861, "grad_norm": 0.6532823956917652, "learning_rate": 6.699107866991079e-06, "loss": 0.6116, "step": 23926 }, { "epoch": 0.6985781437038334, "grad_norm": 0.6245071098147389, "learning_rate": 6.698459042984591e-06, "loss": 0.5532, "step": 23927 }, { "epoch": 0.6986073399316808, "grad_norm": 0.6647979502841467, "learning_rate": 6.697810218978102e-06, "loss": 0.6331, "step": 23928 }, { "epoch": 0.6986365361595281, "grad_norm": 0.6454726303820363, "learning_rate": 6.697161394971614e-06, "loss": 0.5707, "step": 23929 }, { "epoch": 0.6986657323873755, "grad_norm": 0.6188596335333497, "learning_rate": 6.696512570965126e-06, "loss": 0.5677, "step": 23930 }, { "epoch": 0.6986949286152229, "grad_norm": 0.692747848685737, "learning_rate": 6.695863746958638e-06, "loss": 0.6029, "step": 23931 }, { "epoch": 0.6987241248430702, "grad_norm": 0.6294801721100274, "learning_rate": 6.69521492295215e-06, "loss": 0.5631, "step": 23932 }, { "epoch": 0.6987533210709176, "grad_norm": 0.6197657503778402, "learning_rate": 6.694566098945662e-06, "loss": 0.5399, "step": 23933 }, { "epoch": 0.698782517298765, "grad_norm": 0.6987636918921137, "learning_rate": 6.693917274939173e-06, "loss": 0.6459, "step": 23934 }, { "epoch": 0.6988117135266123, "grad_norm": 0.5773671785177592, "learning_rate": 6.693268450932685e-06, "loss": 0.4921, "step": 23935 }, { "epoch": 0.6988409097544597, "grad_norm": 0.6626601636612985, "learning_rate": 6.6926196269261966e-06, "loss": 0.6211, "step": 23936 }, { "epoch": 0.698870105982307, "grad_norm": 0.6688469170991346, "learning_rate": 6.691970802919709e-06, "loss": 0.637, "step": 23937 }, { "epoch": 0.6988993022101544, "grad_norm": 0.6328314507946575, "learning_rate": 6.69132197891322e-06, "loss": 0.5988, "step": 23938 }, { "epoch": 0.6989284984380018, "grad_norm": 0.6130640627308195, "learning_rate": 6.690673154906732e-06, "loss": 0.6003, "step": 23939 }, { "epoch": 0.6989576946658492, "grad_norm": 0.6924320809774278, "learning_rate": 6.690024330900245e-06, "loss": 0.6331, "step": 23940 }, { "epoch": 0.6989868908936966, "grad_norm": 0.700961775590804, "learning_rate": 6.689375506893756e-06, "loss": 0.6763, "step": 23941 }, { "epoch": 0.699016087121544, "grad_norm": 0.6155606460431766, "learning_rate": 6.688726682887268e-06, "loss": 0.575, "step": 23942 }, { "epoch": 0.6990452833493913, "grad_norm": 0.6828229799004097, "learning_rate": 6.688077858880779e-06, "loss": 0.5663, "step": 23943 }, { "epoch": 0.6990744795772387, "grad_norm": 0.6850649247412929, "learning_rate": 6.687429034874291e-06, "loss": 0.6467, "step": 23944 }, { "epoch": 0.699103675805086, "grad_norm": 0.6599320681928353, "learning_rate": 6.686780210867802e-06, "loss": 0.6341, "step": 23945 }, { "epoch": 0.6991328720329334, "grad_norm": 0.6148768451537788, "learning_rate": 6.686131386861314e-06, "loss": 0.5613, "step": 23946 }, { "epoch": 0.6991620682607808, "grad_norm": 0.6854216677120539, "learning_rate": 6.6854825628548255e-06, "loss": 0.6954, "step": 23947 }, { "epoch": 0.6991912644886281, "grad_norm": 0.6387532474099673, "learning_rate": 6.6848337388483375e-06, "loss": 0.5798, "step": 23948 }, { "epoch": 0.6992204607164755, "grad_norm": 0.6436532310030513, "learning_rate": 6.6841849148418495e-06, "loss": 0.6385, "step": 23949 }, { "epoch": 0.6992496569443228, "grad_norm": 0.6356512529561894, "learning_rate": 6.6835360908353616e-06, "loss": 0.5776, "step": 23950 }, { "epoch": 0.6992788531721702, "grad_norm": 0.6143262249929354, "learning_rate": 6.682887266828874e-06, "loss": 0.5617, "step": 23951 }, { "epoch": 0.6993080494000176, "grad_norm": 0.604915275177137, "learning_rate": 6.682238442822386e-06, "loss": 0.5758, "step": 23952 }, { "epoch": 0.6993372456278649, "grad_norm": 0.6519923650587645, "learning_rate": 6.681589618815897e-06, "loss": 0.539, "step": 23953 }, { "epoch": 0.6993664418557123, "grad_norm": 0.6162742388769714, "learning_rate": 6.680940794809409e-06, "loss": 0.5467, "step": 23954 }, { "epoch": 0.6993956380835596, "grad_norm": 0.6073572754197661, "learning_rate": 6.68029197080292e-06, "loss": 0.5535, "step": 23955 }, { "epoch": 0.699424834311407, "grad_norm": 0.6057580242326934, "learning_rate": 6.679643146796432e-06, "loss": 0.5219, "step": 23956 }, { "epoch": 0.6994540305392544, "grad_norm": 0.6220298159215655, "learning_rate": 6.678994322789943e-06, "loss": 0.5935, "step": 23957 }, { "epoch": 0.6994832267671017, "grad_norm": 0.6483178339764428, "learning_rate": 6.678345498783455e-06, "loss": 0.59, "step": 23958 }, { "epoch": 0.6995124229949491, "grad_norm": 0.7468340711173472, "learning_rate": 6.677696674776968e-06, "loss": 0.7196, "step": 23959 }, { "epoch": 0.6995416192227965, "grad_norm": 0.6571291486315116, "learning_rate": 6.677047850770479e-06, "loss": 0.5716, "step": 23960 }, { "epoch": 0.6995708154506438, "grad_norm": 0.703645684750105, "learning_rate": 6.676399026763991e-06, "loss": 0.6903, "step": 23961 }, { "epoch": 0.6996000116784912, "grad_norm": 0.6360373042605232, "learning_rate": 6.6757502027575025e-06, "loss": 0.6085, "step": 23962 }, { "epoch": 0.6996292079063385, "grad_norm": 0.6086685745905509, "learning_rate": 6.6751013787510145e-06, "loss": 0.5104, "step": 23963 }, { "epoch": 0.6996584041341859, "grad_norm": 0.6204657738284319, "learning_rate": 6.674452554744526e-06, "loss": 0.5226, "step": 23964 }, { "epoch": 0.6996876003620333, "grad_norm": 0.677312539775277, "learning_rate": 6.673803730738038e-06, "loss": 0.6452, "step": 23965 }, { "epoch": 0.6997167965898806, "grad_norm": 0.6894681160861247, "learning_rate": 6.673154906731549e-06, "loss": 0.6954, "step": 23966 }, { "epoch": 0.699745992817728, "grad_norm": 0.6594068121334434, "learning_rate": 6.672506082725061e-06, "loss": 0.6147, "step": 23967 }, { "epoch": 0.6997751890455753, "grad_norm": 0.5996561494093194, "learning_rate": 6.671857258718573e-06, "loss": 0.5507, "step": 23968 }, { "epoch": 0.6998043852734227, "grad_norm": 0.6592677294773082, "learning_rate": 6.671208434712085e-06, "loss": 0.6236, "step": 23969 }, { "epoch": 0.6998335815012701, "grad_norm": 0.6531685524846578, "learning_rate": 6.670559610705597e-06, "loss": 0.596, "step": 23970 }, { "epoch": 0.6998627777291174, "grad_norm": 0.6930046572862181, "learning_rate": 6.669910786699109e-06, "loss": 0.6652, "step": 23971 }, { "epoch": 0.6998919739569648, "grad_norm": 0.6431864603719853, "learning_rate": 6.66926196269262e-06, "loss": 0.6086, "step": 23972 }, { "epoch": 0.6999211701848121, "grad_norm": 0.6322499578460863, "learning_rate": 6.668613138686132e-06, "loss": 0.5742, "step": 23973 }, { "epoch": 0.6999503664126595, "grad_norm": 0.6880141983493391, "learning_rate": 6.6679643146796434e-06, "loss": 0.6502, "step": 23974 }, { "epoch": 0.6999795626405069, "grad_norm": 0.6500124981124418, "learning_rate": 6.6673154906731555e-06, "loss": 0.6559, "step": 23975 }, { "epoch": 0.7000087588683542, "grad_norm": 0.6014437618104636, "learning_rate": 6.666666666666667e-06, "loss": 0.5258, "step": 23976 }, { "epoch": 0.7000379550962016, "grad_norm": 0.6425117617130367, "learning_rate": 6.666017842660179e-06, "loss": 0.5846, "step": 23977 }, { "epoch": 0.700067151324049, "grad_norm": 0.6392848592832546, "learning_rate": 6.6653690186536915e-06, "loss": 0.538, "step": 23978 }, { "epoch": 0.7000963475518963, "grad_norm": 0.6494459677924188, "learning_rate": 6.664720194647203e-06, "loss": 0.588, "step": 23979 }, { "epoch": 0.7001255437797437, "grad_norm": 0.639886341750865, "learning_rate": 6.664071370640715e-06, "loss": 0.6124, "step": 23980 }, { "epoch": 0.700154740007591, "grad_norm": 0.6676798620196451, "learning_rate": 6.663422546634226e-06, "loss": 0.6106, "step": 23981 }, { "epoch": 0.7001839362354384, "grad_norm": 0.6565074317691704, "learning_rate": 6.662773722627738e-06, "loss": 0.6374, "step": 23982 }, { "epoch": 0.7002131324632858, "grad_norm": 0.6080834716061226, "learning_rate": 6.662124898621249e-06, "loss": 0.5761, "step": 23983 }, { "epoch": 0.7002423286911331, "grad_norm": 0.6304764152140885, "learning_rate": 6.661476074614761e-06, "loss": 0.5843, "step": 23984 }, { "epoch": 0.7002715249189805, "grad_norm": 0.6590299559423437, "learning_rate": 6.660827250608272e-06, "loss": 0.6596, "step": 23985 }, { "epoch": 0.7003007211468278, "grad_norm": 0.6303335807992937, "learning_rate": 6.660178426601784e-06, "loss": 0.5884, "step": 23986 }, { "epoch": 0.7003299173746752, "grad_norm": 0.6263399900414087, "learning_rate": 6.659529602595296e-06, "loss": 0.5702, "step": 23987 }, { "epoch": 0.7003591136025226, "grad_norm": 0.611848834053407, "learning_rate": 6.6588807785888084e-06, "loss": 0.5438, "step": 23988 }, { "epoch": 0.7003883098303699, "grad_norm": 0.7066492434901176, "learning_rate": 6.6582319545823205e-06, "loss": 0.6287, "step": 23989 }, { "epoch": 0.7004175060582173, "grad_norm": 0.6027097583334439, "learning_rate": 6.657583130575832e-06, "loss": 0.5392, "step": 23990 }, { "epoch": 0.7004467022860646, "grad_norm": 0.6459396574570336, "learning_rate": 6.656934306569344e-06, "loss": 0.5702, "step": 23991 }, { "epoch": 0.700475898513912, "grad_norm": 0.6721113937737014, "learning_rate": 6.656285482562856e-06, "loss": 0.6138, "step": 23992 }, { "epoch": 0.7005050947417594, "grad_norm": 0.6419265150314633, "learning_rate": 6.655636658556367e-06, "loss": 0.6058, "step": 23993 }, { "epoch": 0.7005342909696067, "grad_norm": 0.6151131741378782, "learning_rate": 6.654987834549879e-06, "loss": 0.5325, "step": 23994 }, { "epoch": 0.7005634871974541, "grad_norm": 0.6651805432475172, "learning_rate": 6.65433901054339e-06, "loss": 0.6429, "step": 23995 }, { "epoch": 0.7005926834253015, "grad_norm": 0.6750130870596172, "learning_rate": 6.653690186536902e-06, "loss": 0.6447, "step": 23996 }, { "epoch": 0.7006218796531488, "grad_norm": 0.6792881394769551, "learning_rate": 6.653041362530415e-06, "loss": 0.6383, "step": 23997 }, { "epoch": 0.7006510758809962, "grad_norm": 0.6544043408658677, "learning_rate": 6.652392538523926e-06, "loss": 0.6325, "step": 23998 }, { "epoch": 0.7006802721088435, "grad_norm": 0.6409528266723749, "learning_rate": 6.651743714517438e-06, "loss": 0.5982, "step": 23999 }, { "epoch": 0.7007094683366909, "grad_norm": 0.651168782648822, "learning_rate": 6.651094890510949e-06, "loss": 0.6123, "step": 24000 }, { "epoch": 0.7007386645645383, "grad_norm": 0.6634987551715482, "learning_rate": 6.650446066504461e-06, "loss": 0.6144, "step": 24001 }, { "epoch": 0.7007678607923856, "grad_norm": 0.6844132798517265, "learning_rate": 6.649797242497973e-06, "loss": 0.6549, "step": 24002 }, { "epoch": 0.700797057020233, "grad_norm": 0.6138365351612358, "learning_rate": 6.649148418491485e-06, "loss": 0.5064, "step": 24003 }, { "epoch": 0.7008262532480803, "grad_norm": 0.6449506629543568, "learning_rate": 6.648499594484996e-06, "loss": 0.6356, "step": 24004 }, { "epoch": 0.7008554494759277, "grad_norm": 0.6197519028810431, "learning_rate": 6.647850770478508e-06, "loss": 0.5392, "step": 24005 }, { "epoch": 0.7008846457037751, "grad_norm": 0.6732936036258966, "learning_rate": 6.647201946472021e-06, "loss": 0.6841, "step": 24006 }, { "epoch": 0.7009138419316224, "grad_norm": 0.6374955538880845, "learning_rate": 6.646553122465532e-06, "loss": 0.6069, "step": 24007 }, { "epoch": 0.7009430381594698, "grad_norm": 0.6111004228672229, "learning_rate": 6.645904298459044e-06, "loss": 0.5676, "step": 24008 }, { "epoch": 0.7009722343873172, "grad_norm": 0.6145651011655134, "learning_rate": 6.645255474452555e-06, "loss": 0.6002, "step": 24009 }, { "epoch": 0.7010014306151645, "grad_norm": 0.6430313323616069, "learning_rate": 6.644606650446067e-06, "loss": 0.6047, "step": 24010 }, { "epoch": 0.7010306268430119, "grad_norm": 0.6222734095045231, "learning_rate": 6.643957826439579e-06, "loss": 0.5582, "step": 24011 }, { "epoch": 0.7010598230708592, "grad_norm": 0.6374601274251627, "learning_rate": 6.64330900243309e-06, "loss": 0.5726, "step": 24012 }, { "epoch": 0.7010890192987066, "grad_norm": 0.7275529736019133, "learning_rate": 6.642660178426602e-06, "loss": 0.6557, "step": 24013 }, { "epoch": 0.701118215526554, "grad_norm": 0.6123012784693064, "learning_rate": 6.6420113544201135e-06, "loss": 0.5697, "step": 24014 }, { "epoch": 0.7011474117544013, "grad_norm": 0.6005842617670929, "learning_rate": 6.6413625304136255e-06, "loss": 0.5324, "step": 24015 }, { "epoch": 0.7011766079822487, "grad_norm": 0.6557903486671828, "learning_rate": 6.640713706407138e-06, "loss": 0.6361, "step": 24016 }, { "epoch": 0.701205804210096, "grad_norm": 0.6470404263089393, "learning_rate": 6.64006488240065e-06, "loss": 0.6383, "step": 24017 }, { "epoch": 0.7012350004379434, "grad_norm": 0.6175141584155744, "learning_rate": 6.639416058394162e-06, "loss": 0.5957, "step": 24018 }, { "epoch": 0.7012641966657908, "grad_norm": 0.6280035776950427, "learning_rate": 6.638767234387673e-06, "loss": 0.5916, "step": 24019 }, { "epoch": 0.7012933928936381, "grad_norm": 0.6519858061644958, "learning_rate": 6.638118410381185e-06, "loss": 0.6358, "step": 24020 }, { "epoch": 0.7013225891214855, "grad_norm": 0.6140247975009414, "learning_rate": 6.637469586374696e-06, "loss": 0.5733, "step": 24021 }, { "epoch": 0.7013517853493328, "grad_norm": 0.5986651549388533, "learning_rate": 6.636820762368208e-06, "loss": 0.5278, "step": 24022 }, { "epoch": 0.7013809815771802, "grad_norm": 0.6113587294575974, "learning_rate": 6.636171938361719e-06, "loss": 0.5355, "step": 24023 }, { "epoch": 0.7014101778050276, "grad_norm": 0.6578101155052308, "learning_rate": 6.635523114355231e-06, "loss": 0.6633, "step": 24024 }, { "epoch": 0.7014393740328749, "grad_norm": 0.6109735865873348, "learning_rate": 6.634874290348744e-06, "loss": 0.5112, "step": 24025 }, { "epoch": 0.7014685702607223, "grad_norm": 0.5708052578528617, "learning_rate": 6.634225466342255e-06, "loss": 0.5077, "step": 24026 }, { "epoch": 0.7014977664885697, "grad_norm": 0.6490343556815809, "learning_rate": 6.633576642335767e-06, "loss": 0.6016, "step": 24027 }, { "epoch": 0.701526962716417, "grad_norm": 0.587117604757887, "learning_rate": 6.6329278183292785e-06, "loss": 0.5129, "step": 24028 }, { "epoch": 0.7015561589442644, "grad_norm": 0.6332862297122483, "learning_rate": 6.6322789943227905e-06, "loss": 0.6092, "step": 24029 }, { "epoch": 0.7015853551721117, "grad_norm": 0.6472307226743971, "learning_rate": 6.6316301703163026e-06, "loss": 0.651, "step": 24030 }, { "epoch": 0.7016145513999591, "grad_norm": 0.7237803887257473, "learning_rate": 6.630981346309814e-06, "loss": 0.6301, "step": 24031 }, { "epoch": 0.7016437476278065, "grad_norm": 0.6216776561011506, "learning_rate": 6.630332522303326e-06, "loss": 0.5656, "step": 24032 }, { "epoch": 0.7016729438556538, "grad_norm": 0.6056013662904671, "learning_rate": 6.629683698296837e-06, "loss": 0.519, "step": 24033 }, { "epoch": 0.7017021400835012, "grad_norm": 0.6568559005920037, "learning_rate": 6.629034874290349e-06, "loss": 0.6023, "step": 24034 }, { "epoch": 0.7017313363113485, "grad_norm": 0.613772536212806, "learning_rate": 6.628386050283862e-06, "loss": 0.5774, "step": 24035 }, { "epoch": 0.7017605325391959, "grad_norm": 0.6545235783089602, "learning_rate": 6.627737226277373e-06, "loss": 0.6272, "step": 24036 }, { "epoch": 0.7017897287670433, "grad_norm": 0.63850984932262, "learning_rate": 6.627088402270885e-06, "loss": 0.5587, "step": 24037 }, { "epoch": 0.7018189249948906, "grad_norm": 0.6872015858442302, "learning_rate": 6.626439578264396e-06, "loss": 0.6653, "step": 24038 }, { "epoch": 0.701848121222738, "grad_norm": 0.6397810634645675, "learning_rate": 6.625790754257908e-06, "loss": 0.5716, "step": 24039 }, { "epoch": 0.7018773174505853, "grad_norm": 0.6338061478841186, "learning_rate": 6.6251419302514194e-06, "loss": 0.6113, "step": 24040 }, { "epoch": 0.7019065136784327, "grad_norm": 0.7749189522322573, "learning_rate": 6.6244931062449315e-06, "loss": 0.6387, "step": 24041 }, { "epoch": 0.7019357099062801, "grad_norm": 0.7343541838298394, "learning_rate": 6.623844282238443e-06, "loss": 0.6954, "step": 24042 }, { "epoch": 0.7019649061341274, "grad_norm": 0.682204120527423, "learning_rate": 6.623195458231955e-06, "loss": 0.6866, "step": 24043 }, { "epoch": 0.7019941023619748, "grad_norm": 0.6092092316155975, "learning_rate": 6.6225466342254676e-06, "loss": 0.5671, "step": 24044 }, { "epoch": 0.7020232985898222, "grad_norm": 0.6024808245680703, "learning_rate": 6.621897810218979e-06, "loss": 0.5318, "step": 24045 }, { "epoch": 0.7020524948176695, "grad_norm": 0.6834328960884734, "learning_rate": 6.621248986212491e-06, "loss": 0.6887, "step": 24046 }, { "epoch": 0.7020816910455169, "grad_norm": 0.6891692945688941, "learning_rate": 6.620600162206002e-06, "loss": 0.7027, "step": 24047 }, { "epoch": 0.7021108872733642, "grad_norm": 0.6262487821398918, "learning_rate": 6.619951338199514e-06, "loss": 0.5721, "step": 24048 }, { "epoch": 0.7021400835012116, "grad_norm": 0.6098189320866607, "learning_rate": 6.619302514193026e-06, "loss": 0.5507, "step": 24049 }, { "epoch": 0.702169279729059, "grad_norm": 0.624514726222322, "learning_rate": 6.618653690186537e-06, "loss": 0.5734, "step": 24050 }, { "epoch": 0.7021984759569063, "grad_norm": 0.672824821908131, "learning_rate": 6.618004866180049e-06, "loss": 0.6408, "step": 24051 }, { "epoch": 0.7022276721847537, "grad_norm": 0.6205943412057782, "learning_rate": 6.61735604217356e-06, "loss": 0.5472, "step": 24052 }, { "epoch": 0.702256868412601, "grad_norm": 0.6224973543634422, "learning_rate": 6.616707218167072e-06, "loss": 0.5645, "step": 24053 }, { "epoch": 0.7022860646404484, "grad_norm": 0.6812245398418121, "learning_rate": 6.616058394160585e-06, "loss": 0.6324, "step": 24054 }, { "epoch": 0.7023152608682958, "grad_norm": 0.6493340444580417, "learning_rate": 6.6154095701540965e-06, "loss": 0.6233, "step": 24055 }, { "epoch": 0.7023444570961431, "grad_norm": 0.6438840003464326, "learning_rate": 6.6147607461476085e-06, "loss": 0.6301, "step": 24056 }, { "epoch": 0.7023736533239905, "grad_norm": 0.6176080124251797, "learning_rate": 6.61411192214112e-06, "loss": 0.5458, "step": 24057 }, { "epoch": 0.7024028495518378, "grad_norm": 0.6116862169781675, "learning_rate": 6.613463098134632e-06, "loss": 0.5555, "step": 24058 }, { "epoch": 0.7024320457796852, "grad_norm": 0.5989053277911668, "learning_rate": 6.612814274128143e-06, "loss": 0.5393, "step": 24059 }, { "epoch": 0.7024612420075326, "grad_norm": 0.606582380340971, "learning_rate": 6.612165450121655e-06, "loss": 0.5194, "step": 24060 }, { "epoch": 0.70249043823538, "grad_norm": 0.6963848895396655, "learning_rate": 6.611516626115166e-06, "loss": 0.6756, "step": 24061 }, { "epoch": 0.7025196344632274, "grad_norm": 0.6096420827877835, "learning_rate": 6.610867802108678e-06, "loss": 0.552, "step": 24062 }, { "epoch": 0.7025488306910748, "grad_norm": 0.7485124372404373, "learning_rate": 6.610218978102191e-06, "loss": 0.6844, "step": 24063 }, { "epoch": 0.7025780269189221, "grad_norm": 0.6614161388032084, "learning_rate": 6.609570154095702e-06, "loss": 0.6647, "step": 24064 }, { "epoch": 0.7026072231467695, "grad_norm": 0.5815467269285098, "learning_rate": 6.608921330089214e-06, "loss": 0.5344, "step": 24065 }, { "epoch": 0.7026364193746168, "grad_norm": 0.6383996748539598, "learning_rate": 6.608272506082725e-06, "loss": 0.5806, "step": 24066 }, { "epoch": 0.7026656156024642, "grad_norm": 0.6588885995031982, "learning_rate": 6.607623682076237e-06, "loss": 0.6091, "step": 24067 }, { "epoch": 0.7026948118303116, "grad_norm": 0.6190015000527861, "learning_rate": 6.6069748580697494e-06, "loss": 0.5687, "step": 24068 }, { "epoch": 0.7027240080581589, "grad_norm": 0.664389063773579, "learning_rate": 6.606326034063261e-06, "loss": 0.6181, "step": 24069 }, { "epoch": 0.7027532042860063, "grad_norm": 0.6382696918784145, "learning_rate": 6.605677210056773e-06, "loss": 0.5747, "step": 24070 }, { "epoch": 0.7027824005138537, "grad_norm": 0.6990705149186391, "learning_rate": 6.605028386050284e-06, "loss": 0.6736, "step": 24071 }, { "epoch": 0.702811596741701, "grad_norm": 0.6389142744492203, "learning_rate": 6.604379562043797e-06, "loss": 0.5893, "step": 24072 }, { "epoch": 0.7028407929695484, "grad_norm": 0.6315332450480707, "learning_rate": 6.603730738037309e-06, "loss": 0.5852, "step": 24073 }, { "epoch": 0.7028699891973957, "grad_norm": 0.6130110105279563, "learning_rate": 6.60308191403082e-06, "loss": 0.5762, "step": 24074 }, { "epoch": 0.7028991854252431, "grad_norm": 0.6839030145936706, "learning_rate": 6.602433090024332e-06, "loss": 0.7004, "step": 24075 }, { "epoch": 0.7029283816530905, "grad_norm": 0.6226771969485367, "learning_rate": 6.601784266017843e-06, "loss": 0.5447, "step": 24076 }, { "epoch": 0.7029575778809378, "grad_norm": 0.6783960541581732, "learning_rate": 6.601135442011355e-06, "loss": 0.6743, "step": 24077 }, { "epoch": 0.7029867741087852, "grad_norm": 0.6107611805060059, "learning_rate": 6.600486618004866e-06, "loss": 0.5795, "step": 24078 }, { "epoch": 0.7030159703366325, "grad_norm": 0.5996080839693937, "learning_rate": 6.599837793998378e-06, "loss": 0.5263, "step": 24079 }, { "epoch": 0.7030451665644799, "grad_norm": 0.6271102648338595, "learning_rate": 6.5991889699918895e-06, "loss": 0.5799, "step": 24080 }, { "epoch": 0.7030743627923273, "grad_norm": 0.8065065948633298, "learning_rate": 6.5985401459854016e-06, "loss": 0.6316, "step": 24081 }, { "epoch": 0.7031035590201746, "grad_norm": 0.6335030696821372, "learning_rate": 6.5978913219789144e-06, "loss": 0.5881, "step": 24082 }, { "epoch": 0.703132755248022, "grad_norm": 0.6337515494825786, "learning_rate": 6.597242497972426e-06, "loss": 0.5963, "step": 24083 }, { "epoch": 0.7031619514758694, "grad_norm": 0.659879330393546, "learning_rate": 6.596593673965938e-06, "loss": 0.5886, "step": 24084 }, { "epoch": 0.7031911477037167, "grad_norm": 0.605060879197405, "learning_rate": 6.595944849959449e-06, "loss": 0.5441, "step": 24085 }, { "epoch": 0.7032203439315641, "grad_norm": 0.6648306503561713, "learning_rate": 6.595296025952961e-06, "loss": 0.6089, "step": 24086 }, { "epoch": 0.7032495401594114, "grad_norm": 0.6573695455392788, "learning_rate": 6.594647201946473e-06, "loss": 0.5451, "step": 24087 }, { "epoch": 0.7032787363872588, "grad_norm": 0.6336845603560383, "learning_rate": 6.593998377939984e-06, "loss": 0.5087, "step": 24088 }, { "epoch": 0.7033079326151062, "grad_norm": 0.6739565783619902, "learning_rate": 6.593349553933496e-06, "loss": 0.6441, "step": 24089 }, { "epoch": 0.7033371288429535, "grad_norm": 0.7307376460712423, "learning_rate": 6.592700729927007e-06, "loss": 0.6827, "step": 24090 }, { "epoch": 0.7033663250708009, "grad_norm": 0.7087064204866734, "learning_rate": 6.59205190592052e-06, "loss": 0.6758, "step": 24091 }, { "epoch": 0.7033955212986482, "grad_norm": 0.6445126170219696, "learning_rate": 6.591403081914032e-06, "loss": 0.6274, "step": 24092 }, { "epoch": 0.7034247175264956, "grad_norm": 0.6398550781106732, "learning_rate": 6.590754257907543e-06, "loss": 0.5825, "step": 24093 }, { "epoch": 0.703453913754343, "grad_norm": 0.6372194679075461, "learning_rate": 6.590105433901055e-06, "loss": 0.6009, "step": 24094 }, { "epoch": 0.7034831099821903, "grad_norm": 0.6206003842157221, "learning_rate": 6.5894566098945665e-06, "loss": 0.5973, "step": 24095 }, { "epoch": 0.7035123062100377, "grad_norm": 0.6180783294222487, "learning_rate": 6.5888077858880786e-06, "loss": 0.5826, "step": 24096 }, { "epoch": 0.703541502437885, "grad_norm": 0.660277953716799, "learning_rate": 6.58815896188159e-06, "loss": 0.5955, "step": 24097 }, { "epoch": 0.7035706986657324, "grad_norm": 0.6433315715074046, "learning_rate": 6.587510137875102e-06, "loss": 0.5583, "step": 24098 }, { "epoch": 0.7035998948935798, "grad_norm": 0.6688333652784448, "learning_rate": 6.586861313868613e-06, "loss": 0.6535, "step": 24099 }, { "epoch": 0.7036290911214271, "grad_norm": 0.6593381975619599, "learning_rate": 6.586212489862125e-06, "loss": 0.6162, "step": 24100 }, { "epoch": 0.7036582873492745, "grad_norm": 0.6742257560616286, "learning_rate": 6.585563665855638e-06, "loss": 0.6682, "step": 24101 }, { "epoch": 0.7036874835771219, "grad_norm": 0.6085264278895252, "learning_rate": 6.584914841849149e-06, "loss": 0.53, "step": 24102 }, { "epoch": 0.7037166798049692, "grad_norm": 0.7014427632631073, "learning_rate": 6.584266017842661e-06, "loss": 0.7436, "step": 24103 }, { "epoch": 0.7037458760328166, "grad_norm": 0.6326490269028137, "learning_rate": 6.583617193836172e-06, "loss": 0.616, "step": 24104 }, { "epoch": 0.7037750722606639, "grad_norm": 0.6700146980527382, "learning_rate": 6.582968369829684e-06, "loss": 0.6279, "step": 24105 }, { "epoch": 0.7038042684885113, "grad_norm": 0.6268054616021924, "learning_rate": 6.5823195458231955e-06, "loss": 0.5746, "step": 24106 }, { "epoch": 0.7038334647163587, "grad_norm": 0.6266586289761946, "learning_rate": 6.5816707218167075e-06, "loss": 0.5658, "step": 24107 }, { "epoch": 0.703862660944206, "grad_norm": 0.6648967467567186, "learning_rate": 6.5810218978102195e-06, "loss": 0.6452, "step": 24108 }, { "epoch": 0.7038918571720534, "grad_norm": 0.6870006456110017, "learning_rate": 6.580373073803731e-06, "loss": 0.6737, "step": 24109 }, { "epoch": 0.7039210533999007, "grad_norm": 0.6452963977902095, "learning_rate": 6.5797242497972436e-06, "loss": 0.6181, "step": 24110 }, { "epoch": 0.7039502496277481, "grad_norm": 0.6463226178004541, "learning_rate": 6.579075425790756e-06, "loss": 0.638, "step": 24111 }, { "epoch": 0.7039794458555955, "grad_norm": 0.6770182066771286, "learning_rate": 6.578426601784267e-06, "loss": 0.6599, "step": 24112 }, { "epoch": 0.7040086420834428, "grad_norm": 0.6744288729896866, "learning_rate": 6.577777777777779e-06, "loss": 0.6721, "step": 24113 }, { "epoch": 0.7040378383112902, "grad_norm": 0.6914065209582343, "learning_rate": 6.57712895377129e-06, "loss": 0.5889, "step": 24114 }, { "epoch": 0.7040670345391375, "grad_norm": 0.6880318619047764, "learning_rate": 6.576480129764802e-06, "loss": 0.669, "step": 24115 }, { "epoch": 0.7040962307669849, "grad_norm": 0.6016654590461462, "learning_rate": 6.575831305758313e-06, "loss": 0.4879, "step": 24116 }, { "epoch": 0.7041254269948323, "grad_norm": 0.5716620861074388, "learning_rate": 6.575182481751825e-06, "loss": 0.4951, "step": 24117 }, { "epoch": 0.7041546232226796, "grad_norm": 0.6264638194032298, "learning_rate": 6.574533657745336e-06, "loss": 0.6316, "step": 24118 }, { "epoch": 0.704183819450527, "grad_norm": 0.6342892093637112, "learning_rate": 6.573884833738848e-06, "loss": 0.6221, "step": 24119 }, { "epoch": 0.7042130156783744, "grad_norm": 0.6263510664228504, "learning_rate": 6.573236009732361e-06, "loss": 0.5353, "step": 24120 }, { "epoch": 0.7042422119062217, "grad_norm": 0.6427535056931917, "learning_rate": 6.5725871857258725e-06, "loss": 0.5772, "step": 24121 }, { "epoch": 0.7042714081340691, "grad_norm": 0.5795010510930718, "learning_rate": 6.5719383617193845e-06, "loss": 0.5088, "step": 24122 }, { "epoch": 0.7043006043619164, "grad_norm": 0.6648347089106248, "learning_rate": 6.571289537712896e-06, "loss": 0.6101, "step": 24123 }, { "epoch": 0.7043298005897638, "grad_norm": 0.6471802361136983, "learning_rate": 6.570640713706408e-06, "loss": 0.6026, "step": 24124 }, { "epoch": 0.7043589968176112, "grad_norm": 0.6648080913850707, "learning_rate": 6.569991889699919e-06, "loss": 0.582, "step": 24125 }, { "epoch": 0.7043881930454585, "grad_norm": 0.6586840308996956, "learning_rate": 6.569343065693431e-06, "loss": 0.6139, "step": 24126 }, { "epoch": 0.7044173892733059, "grad_norm": 0.6238140039750385, "learning_rate": 6.568694241686943e-06, "loss": 0.552, "step": 24127 }, { "epoch": 0.7044465855011532, "grad_norm": 0.6919764382055593, "learning_rate": 6.568045417680454e-06, "loss": 0.6482, "step": 24128 }, { "epoch": 0.7044757817290006, "grad_norm": 0.6059878484850569, "learning_rate": 6.567396593673967e-06, "loss": 0.5679, "step": 24129 }, { "epoch": 0.704504977956848, "grad_norm": 0.6480546957254394, "learning_rate": 6.566747769667479e-06, "loss": 0.641, "step": 24130 }, { "epoch": 0.7045341741846953, "grad_norm": 0.5937014723117152, "learning_rate": 6.56609894566099e-06, "loss": 0.5358, "step": 24131 }, { "epoch": 0.7045633704125427, "grad_norm": 0.6981102500397638, "learning_rate": 6.565450121654502e-06, "loss": 0.6778, "step": 24132 }, { "epoch": 0.70459256664039, "grad_norm": 0.6412136882042198, "learning_rate": 6.564801297648013e-06, "loss": 0.5861, "step": 24133 }, { "epoch": 0.7046217628682374, "grad_norm": 0.6788868153193285, "learning_rate": 6.5641524736415254e-06, "loss": 0.6357, "step": 24134 }, { "epoch": 0.7046509590960848, "grad_norm": 0.6459107145984071, "learning_rate": 6.563503649635037e-06, "loss": 0.5715, "step": 24135 }, { "epoch": 0.7046801553239321, "grad_norm": 0.6759089508222915, "learning_rate": 6.562854825628549e-06, "loss": 0.7001, "step": 24136 }, { "epoch": 0.7047093515517795, "grad_norm": 0.6435046422319642, "learning_rate": 6.56220600162206e-06, "loss": 0.6045, "step": 24137 }, { "epoch": 0.7047385477796269, "grad_norm": 0.6292609859472474, "learning_rate": 6.561557177615572e-06, "loss": 0.5871, "step": 24138 }, { "epoch": 0.7047677440074742, "grad_norm": 0.5788857397163291, "learning_rate": 6.560908353609085e-06, "loss": 0.5127, "step": 24139 }, { "epoch": 0.7047969402353216, "grad_norm": 0.6616398274798038, "learning_rate": 6.560259529602596e-06, "loss": 0.6661, "step": 24140 }, { "epoch": 0.7048261364631689, "grad_norm": 0.6523065590556577, "learning_rate": 6.559610705596108e-06, "loss": 0.6517, "step": 24141 }, { "epoch": 0.7048553326910163, "grad_norm": 0.6501542469146973, "learning_rate": 6.558961881589619e-06, "loss": 0.6455, "step": 24142 }, { "epoch": 0.7048845289188637, "grad_norm": 0.6419689621528071, "learning_rate": 6.558313057583131e-06, "loss": 0.5978, "step": 24143 }, { "epoch": 0.704913725146711, "grad_norm": 0.6525844166436373, "learning_rate": 6.557664233576642e-06, "loss": 0.5959, "step": 24144 }, { "epoch": 0.7049429213745584, "grad_norm": 0.620482727789478, "learning_rate": 6.557015409570154e-06, "loss": 0.5449, "step": 24145 }, { "epoch": 0.7049721176024057, "grad_norm": 0.6744138700049703, "learning_rate": 6.556366585563666e-06, "loss": 0.6708, "step": 24146 }, { "epoch": 0.7050013138302531, "grad_norm": 0.6165573920032111, "learning_rate": 6.5557177615571776e-06, "loss": 0.5632, "step": 24147 }, { "epoch": 0.7050305100581005, "grad_norm": 0.6522573452289506, "learning_rate": 6.5550689375506904e-06, "loss": 0.6451, "step": 24148 }, { "epoch": 0.7050597062859478, "grad_norm": 0.6269640194758048, "learning_rate": 6.5544201135442025e-06, "loss": 0.6317, "step": 24149 }, { "epoch": 0.7050889025137952, "grad_norm": 0.6385329057603166, "learning_rate": 6.553771289537714e-06, "loss": 0.6004, "step": 24150 }, { "epoch": 0.7051180987416426, "grad_norm": 0.6560191132683002, "learning_rate": 6.553122465531226e-06, "loss": 0.6382, "step": 24151 }, { "epoch": 0.7051472949694899, "grad_norm": 0.6797541435942659, "learning_rate": 6.552473641524737e-06, "loss": 0.6135, "step": 24152 }, { "epoch": 0.7051764911973373, "grad_norm": 0.6707223426894112, "learning_rate": 6.551824817518249e-06, "loss": 0.6416, "step": 24153 }, { "epoch": 0.7052056874251846, "grad_norm": 0.6387622398854801, "learning_rate": 6.55117599351176e-06, "loss": 0.6093, "step": 24154 }, { "epoch": 0.705234883653032, "grad_norm": 0.6604054301555491, "learning_rate": 6.550527169505272e-06, "loss": 0.6147, "step": 24155 }, { "epoch": 0.7052640798808794, "grad_norm": 0.6251160716603885, "learning_rate": 6.549878345498783e-06, "loss": 0.5515, "step": 24156 }, { "epoch": 0.7052932761087267, "grad_norm": 0.625819765032382, "learning_rate": 6.549229521492296e-06, "loss": 0.537, "step": 24157 }, { "epoch": 0.7053224723365741, "grad_norm": 0.6399524375286944, "learning_rate": 6.548580697485808e-06, "loss": 0.6166, "step": 24158 }, { "epoch": 0.7053516685644214, "grad_norm": 0.6289516314840553, "learning_rate": 6.547931873479319e-06, "loss": 0.5545, "step": 24159 }, { "epoch": 0.7053808647922688, "grad_norm": 0.6459070785972079, "learning_rate": 6.547283049472831e-06, "loss": 0.5684, "step": 24160 }, { "epoch": 0.7054100610201162, "grad_norm": 0.7126967498930618, "learning_rate": 6.5466342254663426e-06, "loss": 0.6908, "step": 24161 }, { "epoch": 0.7054392572479635, "grad_norm": 0.8657853727703201, "learning_rate": 6.545985401459855e-06, "loss": 0.6287, "step": 24162 }, { "epoch": 0.7054684534758109, "grad_norm": 0.617178856319482, "learning_rate": 6.545336577453366e-06, "loss": 0.5668, "step": 24163 }, { "epoch": 0.7054976497036582, "grad_norm": 0.6366359265047871, "learning_rate": 6.544687753446878e-06, "loss": 0.5696, "step": 24164 }, { "epoch": 0.7055268459315056, "grad_norm": 0.6680518878772208, "learning_rate": 6.54403892944039e-06, "loss": 0.6474, "step": 24165 }, { "epoch": 0.705556042159353, "grad_norm": 0.6618480045774261, "learning_rate": 6.543390105433901e-06, "loss": 0.6039, "step": 24166 }, { "epoch": 0.7055852383872003, "grad_norm": 0.7248530671271491, "learning_rate": 6.542741281427414e-06, "loss": 0.6599, "step": 24167 }, { "epoch": 0.7056144346150477, "grad_norm": 0.6307910910210385, "learning_rate": 6.542092457420925e-06, "loss": 0.5741, "step": 24168 }, { "epoch": 0.705643630842895, "grad_norm": 0.6489574262695719, "learning_rate": 6.541443633414437e-06, "loss": 0.5883, "step": 24169 }, { "epoch": 0.7056728270707424, "grad_norm": 0.6356301649541717, "learning_rate": 6.540794809407949e-06, "loss": 0.6011, "step": 24170 }, { "epoch": 0.7057020232985898, "grad_norm": 0.6732020509358815, "learning_rate": 6.54014598540146e-06, "loss": 0.6031, "step": 24171 }, { "epoch": 0.7057312195264371, "grad_norm": 0.6515246416869122, "learning_rate": 6.539497161394972e-06, "loss": 0.5941, "step": 24172 }, { "epoch": 0.7057604157542845, "grad_norm": 0.6258606253403024, "learning_rate": 6.5388483373884835e-06, "loss": 0.5885, "step": 24173 }, { "epoch": 0.7057896119821319, "grad_norm": 0.6000126041754367, "learning_rate": 6.5381995133819955e-06, "loss": 0.5354, "step": 24174 }, { "epoch": 0.7058188082099792, "grad_norm": 0.6810791567363786, "learning_rate": 6.537550689375507e-06, "loss": 0.6802, "step": 24175 }, { "epoch": 0.7058480044378266, "grad_norm": 0.6887684434047047, "learning_rate": 6.5369018653690196e-06, "loss": 0.646, "step": 24176 }, { "epoch": 0.7058772006656739, "grad_norm": 0.6923350605472982, "learning_rate": 6.536253041362532e-06, "loss": 0.6779, "step": 24177 }, { "epoch": 0.7059063968935213, "grad_norm": 0.6214164672140972, "learning_rate": 6.535604217356043e-06, "loss": 0.5773, "step": 24178 }, { "epoch": 0.7059355931213687, "grad_norm": 0.6620661327752113, "learning_rate": 6.534955393349555e-06, "loss": 0.6346, "step": 24179 }, { "epoch": 0.705964789349216, "grad_norm": 0.66776027815683, "learning_rate": 6.534306569343066e-06, "loss": 0.647, "step": 24180 }, { "epoch": 0.7059939855770635, "grad_norm": 0.6678835426756305, "learning_rate": 6.533657745336578e-06, "loss": 0.6257, "step": 24181 }, { "epoch": 0.7060231818049109, "grad_norm": 0.6514898356561172, "learning_rate": 6.533008921330089e-06, "loss": 0.598, "step": 24182 }, { "epoch": 0.7060523780327582, "grad_norm": 0.6978364996963147, "learning_rate": 6.532360097323601e-06, "loss": 0.6883, "step": 24183 }, { "epoch": 0.7060815742606056, "grad_norm": 0.6514769626252338, "learning_rate": 6.531711273317113e-06, "loss": 0.6313, "step": 24184 }, { "epoch": 0.7061107704884529, "grad_norm": 0.6506314179200727, "learning_rate": 6.5310624493106244e-06, "loss": 0.6268, "step": 24185 }, { "epoch": 0.7061399667163003, "grad_norm": 0.6556158393017554, "learning_rate": 6.530413625304137e-06, "loss": 0.6076, "step": 24186 }, { "epoch": 0.7061691629441477, "grad_norm": 0.6203868414184605, "learning_rate": 6.5297648012976485e-06, "loss": 0.6004, "step": 24187 }, { "epoch": 0.706198359171995, "grad_norm": 0.6867729402480088, "learning_rate": 6.5291159772911605e-06, "loss": 0.6041, "step": 24188 }, { "epoch": 0.7062275553998424, "grad_norm": 0.6114519534598177, "learning_rate": 6.5284671532846725e-06, "loss": 0.5555, "step": 24189 }, { "epoch": 0.7062567516276897, "grad_norm": 0.7036921702952353, "learning_rate": 6.527818329278184e-06, "loss": 0.6857, "step": 24190 }, { "epoch": 0.7062859478555371, "grad_norm": 0.647208734494003, "learning_rate": 6.527169505271696e-06, "loss": 0.6197, "step": 24191 }, { "epoch": 0.7063151440833845, "grad_norm": 0.6489136464298297, "learning_rate": 6.526520681265207e-06, "loss": 0.6427, "step": 24192 }, { "epoch": 0.7063443403112318, "grad_norm": 0.6310776437740755, "learning_rate": 6.525871857258719e-06, "loss": 0.5894, "step": 24193 }, { "epoch": 0.7063735365390792, "grad_norm": 0.6156284719340133, "learning_rate": 6.52522303325223e-06, "loss": 0.5998, "step": 24194 }, { "epoch": 0.7064027327669266, "grad_norm": 0.6599957084094387, "learning_rate": 6.524574209245743e-06, "loss": 0.5755, "step": 24195 }, { "epoch": 0.7064319289947739, "grad_norm": 0.6582222267444214, "learning_rate": 6.523925385239255e-06, "loss": 0.626, "step": 24196 }, { "epoch": 0.7064611252226213, "grad_norm": 0.6237515580805701, "learning_rate": 6.523276561232766e-06, "loss": 0.5706, "step": 24197 }, { "epoch": 0.7064903214504686, "grad_norm": 0.6456084431694902, "learning_rate": 6.522627737226278e-06, "loss": 0.6254, "step": 24198 }, { "epoch": 0.706519517678316, "grad_norm": 0.7103331337702475, "learning_rate": 6.521978913219789e-06, "loss": 0.6539, "step": 24199 }, { "epoch": 0.7065487139061634, "grad_norm": 0.6755556148233901, "learning_rate": 6.5213300892133014e-06, "loss": 0.6628, "step": 24200 }, { "epoch": 0.7065779101340107, "grad_norm": 0.6267042597794319, "learning_rate": 6.520681265206813e-06, "loss": 0.5616, "step": 24201 }, { "epoch": 0.7066071063618581, "grad_norm": 0.6364003120295213, "learning_rate": 6.520032441200325e-06, "loss": 0.6267, "step": 24202 }, { "epoch": 0.7066363025897054, "grad_norm": 0.6742743154528316, "learning_rate": 6.519383617193837e-06, "loss": 0.677, "step": 24203 }, { "epoch": 0.7066654988175528, "grad_norm": 0.6362928068315686, "learning_rate": 6.518734793187348e-06, "loss": 0.5504, "step": 24204 }, { "epoch": 0.7066946950454002, "grad_norm": 0.6118760787093472, "learning_rate": 6.518085969180861e-06, "loss": 0.5706, "step": 24205 }, { "epoch": 0.7067238912732475, "grad_norm": 0.6686320304962764, "learning_rate": 6.517437145174372e-06, "loss": 0.6351, "step": 24206 }, { "epoch": 0.7067530875010949, "grad_norm": 0.6369400435690405, "learning_rate": 6.516788321167884e-06, "loss": 0.6211, "step": 24207 }, { "epoch": 0.7067822837289423, "grad_norm": 0.6222126491311347, "learning_rate": 6.516139497161396e-06, "loss": 0.596, "step": 24208 }, { "epoch": 0.7068114799567896, "grad_norm": 0.6826925712444301, "learning_rate": 6.515490673154907e-06, "loss": 0.5866, "step": 24209 }, { "epoch": 0.706840676184637, "grad_norm": 0.6720355036150751, "learning_rate": 6.514841849148419e-06, "loss": 0.6812, "step": 24210 }, { "epoch": 0.7068698724124843, "grad_norm": 0.6475526750954528, "learning_rate": 6.51419302514193e-06, "loss": 0.6387, "step": 24211 }, { "epoch": 0.7068990686403317, "grad_norm": 0.6221241409273097, "learning_rate": 6.513544201135442e-06, "loss": 0.5454, "step": 24212 }, { "epoch": 0.7069282648681791, "grad_norm": 0.6899715160163612, "learning_rate": 6.5128953771289536e-06, "loss": 0.6929, "step": 24213 }, { "epoch": 0.7069574610960264, "grad_norm": 0.6569173973801209, "learning_rate": 6.5122465531224664e-06, "loss": 0.6493, "step": 24214 }, { "epoch": 0.7069866573238738, "grad_norm": 0.6298992221695902, "learning_rate": 6.5115977291159785e-06, "loss": 0.5943, "step": 24215 }, { "epoch": 0.7070158535517211, "grad_norm": 0.7119115046525758, "learning_rate": 6.51094890510949e-06, "loss": 0.6162, "step": 24216 }, { "epoch": 0.7070450497795685, "grad_norm": 0.6759479738721611, "learning_rate": 6.510300081103002e-06, "loss": 0.6155, "step": 24217 }, { "epoch": 0.7070742460074159, "grad_norm": 0.7184528985802269, "learning_rate": 6.509651257096513e-06, "loss": 0.6997, "step": 24218 }, { "epoch": 0.7071034422352632, "grad_norm": 0.6367505912070321, "learning_rate": 6.509002433090025e-06, "loss": 0.6001, "step": 24219 }, { "epoch": 0.7071326384631106, "grad_norm": 0.6554599772572219, "learning_rate": 6.508353609083536e-06, "loss": 0.614, "step": 24220 }, { "epoch": 0.707161834690958, "grad_norm": 0.7238249288537877, "learning_rate": 6.507704785077048e-06, "loss": 0.7182, "step": 24221 }, { "epoch": 0.7071910309188053, "grad_norm": 0.6616452362703349, "learning_rate": 6.50705596107056e-06, "loss": 0.6415, "step": 24222 }, { "epoch": 0.7072202271466527, "grad_norm": 0.5803884733402485, "learning_rate": 6.506407137064072e-06, "loss": 0.4822, "step": 24223 }, { "epoch": 0.7072494233745, "grad_norm": 0.608232736084781, "learning_rate": 6.505758313057584e-06, "loss": 0.5367, "step": 24224 }, { "epoch": 0.7072786196023474, "grad_norm": 0.6938188606390576, "learning_rate": 6.505109489051095e-06, "loss": 0.6672, "step": 24225 }, { "epoch": 0.7073078158301948, "grad_norm": 0.6150421520936168, "learning_rate": 6.504460665044607e-06, "loss": 0.5658, "step": 24226 }, { "epoch": 0.7073370120580421, "grad_norm": 0.6642260051444733, "learning_rate": 6.503811841038119e-06, "loss": 0.5654, "step": 24227 }, { "epoch": 0.7073662082858895, "grad_norm": 0.6572657257877949, "learning_rate": 6.503163017031631e-06, "loss": 0.673, "step": 24228 }, { "epoch": 0.7073954045137368, "grad_norm": 0.6398441320944677, "learning_rate": 6.502514193025143e-06, "loss": 0.5619, "step": 24229 }, { "epoch": 0.7074246007415842, "grad_norm": 0.6339527932898443, "learning_rate": 6.501865369018654e-06, "loss": 0.5859, "step": 24230 }, { "epoch": 0.7074537969694316, "grad_norm": 0.6561649313486678, "learning_rate": 6.501216545012166e-06, "loss": 0.6179, "step": 24231 }, { "epoch": 0.7074829931972789, "grad_norm": 0.6836613302065548, "learning_rate": 6.500567721005677e-06, "loss": 0.7163, "step": 24232 }, { "epoch": 0.7075121894251263, "grad_norm": 0.6686162465342784, "learning_rate": 6.49991889699919e-06, "loss": 0.6201, "step": 24233 }, { "epoch": 0.7075413856529736, "grad_norm": 0.5953005821812909, "learning_rate": 6.499270072992702e-06, "loss": 0.5108, "step": 24234 }, { "epoch": 0.707570581880821, "grad_norm": 0.7280166522562281, "learning_rate": 6.498621248986213e-06, "loss": 0.7045, "step": 24235 }, { "epoch": 0.7075997781086684, "grad_norm": 0.6446430046769687, "learning_rate": 6.497972424979725e-06, "loss": 0.5741, "step": 24236 }, { "epoch": 0.7076289743365157, "grad_norm": 0.6761470844858469, "learning_rate": 6.497323600973236e-06, "loss": 0.6431, "step": 24237 }, { "epoch": 0.7076581705643631, "grad_norm": 0.6252557135904174, "learning_rate": 6.496674776966748e-06, "loss": 0.5792, "step": 24238 }, { "epoch": 0.7076873667922104, "grad_norm": 0.6502825849288085, "learning_rate": 6.4960259529602595e-06, "loss": 0.5499, "step": 24239 }, { "epoch": 0.7077165630200578, "grad_norm": 0.6127879247428389, "learning_rate": 6.4953771289537715e-06, "loss": 0.5575, "step": 24240 }, { "epoch": 0.7077457592479052, "grad_norm": 0.651092312058407, "learning_rate": 6.494728304947283e-06, "loss": 0.6466, "step": 24241 }, { "epoch": 0.7077749554757525, "grad_norm": 0.6357110659833407, "learning_rate": 6.494079480940796e-06, "loss": 0.5878, "step": 24242 }, { "epoch": 0.7078041517035999, "grad_norm": 0.6312997070063318, "learning_rate": 6.493430656934308e-06, "loss": 0.5654, "step": 24243 }, { "epoch": 0.7078333479314473, "grad_norm": 0.5869687284423626, "learning_rate": 6.492781832927819e-06, "loss": 0.5339, "step": 24244 }, { "epoch": 0.7078625441592946, "grad_norm": 0.6490533470247556, "learning_rate": 6.492133008921331e-06, "loss": 0.6064, "step": 24245 }, { "epoch": 0.707891740387142, "grad_norm": 0.6341995685091183, "learning_rate": 6.491484184914843e-06, "loss": 0.5652, "step": 24246 }, { "epoch": 0.7079209366149893, "grad_norm": 0.6520918798318035, "learning_rate": 6.490835360908354e-06, "loss": 0.5881, "step": 24247 }, { "epoch": 0.7079501328428367, "grad_norm": 0.6690325288914762, "learning_rate": 6.490186536901866e-06, "loss": 0.6064, "step": 24248 }, { "epoch": 0.7079793290706841, "grad_norm": 0.6155436089719646, "learning_rate": 6.489537712895377e-06, "loss": 0.5245, "step": 24249 }, { "epoch": 0.7080085252985314, "grad_norm": 0.65468806722715, "learning_rate": 6.488888888888889e-06, "loss": 0.5811, "step": 24250 }, { "epoch": 0.7080377215263788, "grad_norm": 0.7353381128339976, "learning_rate": 6.4882400648824004e-06, "loss": 0.7532, "step": 24251 }, { "epoch": 0.7080669177542261, "grad_norm": 0.6284606767399946, "learning_rate": 6.487591240875913e-06, "loss": 0.5517, "step": 24252 }, { "epoch": 0.7080961139820735, "grad_norm": 0.6123512513553823, "learning_rate": 6.486942416869425e-06, "loss": 0.5736, "step": 24253 }, { "epoch": 0.7081253102099209, "grad_norm": 0.6561833311271016, "learning_rate": 6.4862935928629365e-06, "loss": 0.6217, "step": 24254 }, { "epoch": 0.7081545064377682, "grad_norm": 0.7016217953220701, "learning_rate": 6.4856447688564485e-06, "loss": 0.7174, "step": 24255 }, { "epoch": 0.7081837026656156, "grad_norm": 0.6424794429901337, "learning_rate": 6.48499594484996e-06, "loss": 0.5824, "step": 24256 }, { "epoch": 0.708212898893463, "grad_norm": 0.653766045646195, "learning_rate": 6.484347120843472e-06, "loss": 0.6614, "step": 24257 }, { "epoch": 0.7082420951213103, "grad_norm": 0.6549317237370081, "learning_rate": 6.483698296836983e-06, "loss": 0.6135, "step": 24258 }, { "epoch": 0.7082712913491577, "grad_norm": 0.6147468928469777, "learning_rate": 6.483049472830495e-06, "loss": 0.519, "step": 24259 }, { "epoch": 0.708300487577005, "grad_norm": 0.6305604591297894, "learning_rate": 6.482400648824006e-06, "loss": 0.5587, "step": 24260 }, { "epoch": 0.7083296838048524, "grad_norm": 0.6373226426391045, "learning_rate": 6.481751824817519e-06, "loss": 0.5967, "step": 24261 }, { "epoch": 0.7083588800326998, "grad_norm": 0.6128734421039993, "learning_rate": 6.481103000811031e-06, "loss": 0.5725, "step": 24262 }, { "epoch": 0.7083880762605471, "grad_norm": 0.6495526229441725, "learning_rate": 6.480454176804542e-06, "loss": 0.5685, "step": 24263 }, { "epoch": 0.7084172724883945, "grad_norm": 0.6725695929527793, "learning_rate": 6.479805352798054e-06, "loss": 0.64, "step": 24264 }, { "epoch": 0.7084464687162418, "grad_norm": 0.6389283795769806, "learning_rate": 6.479156528791566e-06, "loss": 0.6231, "step": 24265 }, { "epoch": 0.7084756649440892, "grad_norm": 0.6177638236741279, "learning_rate": 6.4785077047850775e-06, "loss": 0.6002, "step": 24266 }, { "epoch": 0.7085048611719366, "grad_norm": 0.762291760066353, "learning_rate": 6.4778588807785895e-06, "loss": 0.6542, "step": 24267 }, { "epoch": 0.7085340573997839, "grad_norm": 0.6242463081673764, "learning_rate": 6.477210056772101e-06, "loss": 0.5514, "step": 24268 }, { "epoch": 0.7085632536276313, "grad_norm": 0.6483031465862236, "learning_rate": 6.476561232765613e-06, "loss": 0.6012, "step": 24269 }, { "epoch": 0.7085924498554786, "grad_norm": 0.6804583427619143, "learning_rate": 6.475912408759124e-06, "loss": 0.6417, "step": 24270 }, { "epoch": 0.708621646083326, "grad_norm": 0.6108023777632866, "learning_rate": 6.475263584752637e-06, "loss": 0.543, "step": 24271 }, { "epoch": 0.7086508423111734, "grad_norm": 0.6443476218714697, "learning_rate": 6.474614760746149e-06, "loss": 0.6249, "step": 24272 }, { "epoch": 0.7086800385390207, "grad_norm": 0.6181783210860715, "learning_rate": 6.47396593673966e-06, "loss": 0.5929, "step": 24273 }, { "epoch": 0.7087092347668681, "grad_norm": 0.6818363320056071, "learning_rate": 6.473317112733172e-06, "loss": 0.5995, "step": 24274 }, { "epoch": 0.7087384309947155, "grad_norm": 0.6220808479101031, "learning_rate": 6.472668288726683e-06, "loss": 0.5495, "step": 24275 }, { "epoch": 0.7087676272225628, "grad_norm": 0.702191750152006, "learning_rate": 6.472019464720195e-06, "loss": 0.5986, "step": 24276 }, { "epoch": 0.7087968234504102, "grad_norm": 0.6990479540213232, "learning_rate": 6.471370640713706e-06, "loss": 0.6685, "step": 24277 }, { "epoch": 0.7088260196782575, "grad_norm": 0.6243125639107469, "learning_rate": 6.470721816707218e-06, "loss": 0.5756, "step": 24278 }, { "epoch": 0.7088552159061049, "grad_norm": 0.6889229461561648, "learning_rate": 6.4700729927007296e-06, "loss": 0.6849, "step": 24279 }, { "epoch": 0.7088844121339523, "grad_norm": 0.6664063945705319, "learning_rate": 6.4694241686942424e-06, "loss": 0.6359, "step": 24280 }, { "epoch": 0.7089136083617996, "grad_norm": 0.6640929175032265, "learning_rate": 6.4687753446877545e-06, "loss": 0.6202, "step": 24281 }, { "epoch": 0.708942804589647, "grad_norm": 0.6364477741630477, "learning_rate": 6.468126520681266e-06, "loss": 0.578, "step": 24282 }, { "epoch": 0.7089720008174943, "grad_norm": 0.6257162510457324, "learning_rate": 6.467477696674778e-06, "loss": 0.5812, "step": 24283 }, { "epoch": 0.7090011970453417, "grad_norm": 0.6178743245015615, "learning_rate": 6.46682887266829e-06, "loss": 0.5554, "step": 24284 }, { "epoch": 0.7090303932731891, "grad_norm": 0.6071630719176803, "learning_rate": 6.466180048661801e-06, "loss": 0.5572, "step": 24285 }, { "epoch": 0.7090595895010364, "grad_norm": 0.6772323399768948, "learning_rate": 6.465531224655313e-06, "loss": 0.6293, "step": 24286 }, { "epoch": 0.7090887857288838, "grad_norm": 0.6350986711413151, "learning_rate": 6.464882400648824e-06, "loss": 0.5929, "step": 24287 }, { "epoch": 0.7091179819567311, "grad_norm": 0.6989392630950162, "learning_rate": 6.464233576642336e-06, "loss": 0.6922, "step": 24288 }, { "epoch": 0.7091471781845785, "grad_norm": 0.635717360021076, "learning_rate": 6.463584752635847e-06, "loss": 0.578, "step": 24289 }, { "epoch": 0.7091763744124259, "grad_norm": 0.6365926547212591, "learning_rate": 6.46293592862936e-06, "loss": 0.6138, "step": 24290 }, { "epoch": 0.7092055706402732, "grad_norm": 0.6765133542231171, "learning_rate": 6.462287104622872e-06, "loss": 0.6612, "step": 24291 }, { "epoch": 0.7092347668681206, "grad_norm": 0.6611116881615754, "learning_rate": 6.461638280616383e-06, "loss": 0.5728, "step": 24292 }, { "epoch": 0.709263963095968, "grad_norm": 0.6977586820696263, "learning_rate": 6.460989456609895e-06, "loss": 0.6796, "step": 24293 }, { "epoch": 0.7092931593238153, "grad_norm": 0.6609236975851931, "learning_rate": 6.460340632603407e-06, "loss": 0.5871, "step": 24294 }, { "epoch": 0.7093223555516627, "grad_norm": 0.6392978504073562, "learning_rate": 6.459691808596919e-06, "loss": 0.5745, "step": 24295 }, { "epoch": 0.70935155177951, "grad_norm": 0.7341821394107327, "learning_rate": 6.45904298459043e-06, "loss": 0.7613, "step": 24296 }, { "epoch": 0.7093807480073574, "grad_norm": 0.6107582960811945, "learning_rate": 6.458394160583942e-06, "loss": 0.551, "step": 24297 }, { "epoch": 0.7094099442352048, "grad_norm": 0.6335840929789143, "learning_rate": 6.457745336577453e-06, "loss": 0.5884, "step": 24298 }, { "epoch": 0.7094391404630521, "grad_norm": 0.6885717711347973, "learning_rate": 6.457096512570966e-06, "loss": 0.6162, "step": 24299 }, { "epoch": 0.7094683366908995, "grad_norm": 0.6073141678589391, "learning_rate": 6.456447688564478e-06, "loss": 0.5618, "step": 24300 }, { "epoch": 0.7094975329187468, "grad_norm": 0.6161369650662787, "learning_rate": 6.455798864557989e-06, "loss": 0.5475, "step": 24301 }, { "epoch": 0.7095267291465943, "grad_norm": 0.5973044196274179, "learning_rate": 6.455150040551501e-06, "loss": 0.5015, "step": 24302 }, { "epoch": 0.7095559253744417, "grad_norm": 0.6681292325729637, "learning_rate": 6.454501216545012e-06, "loss": 0.6235, "step": 24303 }, { "epoch": 0.709585121602289, "grad_norm": 0.6534265637535951, "learning_rate": 6.453852392538524e-06, "loss": 0.6076, "step": 24304 }, { "epoch": 0.7096143178301364, "grad_norm": 0.6356776476387221, "learning_rate": 6.453203568532036e-06, "loss": 0.5718, "step": 24305 }, { "epoch": 0.7096435140579838, "grad_norm": 0.6631547952909812, "learning_rate": 6.4525547445255475e-06, "loss": 0.6117, "step": 24306 }, { "epoch": 0.7096727102858311, "grad_norm": 0.6556538536424276, "learning_rate": 6.4519059205190596e-06, "loss": 0.6207, "step": 24307 }, { "epoch": 0.7097019065136785, "grad_norm": 0.6541671934341557, "learning_rate": 6.4512570965125724e-06, "loss": 0.6225, "step": 24308 }, { "epoch": 0.7097311027415258, "grad_norm": 0.6509889012839105, "learning_rate": 6.450608272506084e-06, "loss": 0.5802, "step": 24309 }, { "epoch": 0.7097602989693732, "grad_norm": 0.6455905047864441, "learning_rate": 6.449959448499596e-06, "loss": 0.5916, "step": 24310 }, { "epoch": 0.7097894951972206, "grad_norm": 0.6334218467033065, "learning_rate": 6.449310624493107e-06, "loss": 0.5917, "step": 24311 }, { "epoch": 0.7098186914250679, "grad_norm": 0.6533037992295794, "learning_rate": 6.448661800486619e-06, "loss": 0.5677, "step": 24312 }, { "epoch": 0.7098478876529153, "grad_norm": 0.6448210900403789, "learning_rate": 6.44801297648013e-06, "loss": 0.5834, "step": 24313 }, { "epoch": 0.7098770838807626, "grad_norm": 0.6518052585182368, "learning_rate": 6.447364152473642e-06, "loss": 0.6302, "step": 24314 }, { "epoch": 0.70990628010861, "grad_norm": 0.6262456000230127, "learning_rate": 6.446715328467153e-06, "loss": 0.5668, "step": 24315 }, { "epoch": 0.7099354763364574, "grad_norm": 0.6574472334812527, "learning_rate": 6.446066504460665e-06, "loss": 0.6532, "step": 24316 }, { "epoch": 0.7099646725643047, "grad_norm": 0.6556915938520695, "learning_rate": 6.4454176804541764e-06, "loss": 0.6149, "step": 24317 }, { "epoch": 0.7099938687921521, "grad_norm": 0.6238295337448473, "learning_rate": 6.444768856447689e-06, "loss": 0.5359, "step": 24318 }, { "epoch": 0.7100230650199995, "grad_norm": 0.6498779871124413, "learning_rate": 6.444120032441201e-06, "loss": 0.6152, "step": 24319 }, { "epoch": 0.7100522612478468, "grad_norm": 0.6523504972032295, "learning_rate": 6.4434712084347125e-06, "loss": 0.5545, "step": 24320 }, { "epoch": 0.7100814574756942, "grad_norm": 0.5837471844884634, "learning_rate": 6.4428223844282246e-06, "loss": 0.4975, "step": 24321 }, { "epoch": 0.7101106537035415, "grad_norm": 0.6908316123584872, "learning_rate": 6.442173560421736e-06, "loss": 0.684, "step": 24322 }, { "epoch": 0.7101398499313889, "grad_norm": 0.623044439128021, "learning_rate": 6.441524736415248e-06, "loss": 0.5528, "step": 24323 }, { "epoch": 0.7101690461592363, "grad_norm": 0.6527242196130145, "learning_rate": 6.44087591240876e-06, "loss": 0.5934, "step": 24324 }, { "epoch": 0.7101982423870836, "grad_norm": 0.6386840432392796, "learning_rate": 6.440227088402271e-06, "loss": 0.6113, "step": 24325 }, { "epoch": 0.710227438614931, "grad_norm": 0.6215717360521376, "learning_rate": 6.439578264395783e-06, "loss": 0.6156, "step": 24326 }, { "epoch": 0.7102566348427783, "grad_norm": 0.6052445377804428, "learning_rate": 6.438929440389296e-06, "loss": 0.5554, "step": 24327 }, { "epoch": 0.7102858310706257, "grad_norm": 0.6953825735780578, "learning_rate": 6.438280616382807e-06, "loss": 0.6654, "step": 24328 }, { "epoch": 0.7103150272984731, "grad_norm": 0.637014875960748, "learning_rate": 6.437631792376319e-06, "loss": 0.5947, "step": 24329 }, { "epoch": 0.7103442235263204, "grad_norm": 0.6405100652135499, "learning_rate": 6.43698296836983e-06, "loss": 0.5825, "step": 24330 }, { "epoch": 0.7103734197541678, "grad_norm": 0.6485469748823204, "learning_rate": 6.436334144363342e-06, "loss": 0.582, "step": 24331 }, { "epoch": 0.7104026159820152, "grad_norm": 0.6291303779040125, "learning_rate": 6.4356853203568535e-06, "loss": 0.5815, "step": 24332 }, { "epoch": 0.7104318122098625, "grad_norm": 0.662276021279004, "learning_rate": 6.4350364963503655e-06, "loss": 0.6214, "step": 24333 }, { "epoch": 0.7104610084377099, "grad_norm": 0.6550985713034149, "learning_rate": 6.434387672343877e-06, "loss": 0.6512, "step": 24334 }, { "epoch": 0.7104902046655572, "grad_norm": 0.5977823960727687, "learning_rate": 6.433738848337389e-06, "loss": 0.5359, "step": 24335 }, { "epoch": 0.7105194008934046, "grad_norm": 0.6182669199501525, "learning_rate": 6.4330900243309e-06, "loss": 0.5282, "step": 24336 }, { "epoch": 0.710548597121252, "grad_norm": 0.6595040534227093, "learning_rate": 6.432441200324413e-06, "loss": 0.6501, "step": 24337 }, { "epoch": 0.7105777933490993, "grad_norm": 0.6281412508678269, "learning_rate": 6.431792376317925e-06, "loss": 0.5942, "step": 24338 }, { "epoch": 0.7106069895769467, "grad_norm": 0.7067864516074369, "learning_rate": 6.431143552311436e-06, "loss": 0.6358, "step": 24339 }, { "epoch": 0.710636185804794, "grad_norm": 0.6393424276861243, "learning_rate": 6.430494728304948e-06, "loss": 0.5722, "step": 24340 }, { "epoch": 0.7106653820326414, "grad_norm": 0.6108612793874447, "learning_rate": 6.429845904298459e-06, "loss": 0.5572, "step": 24341 }, { "epoch": 0.7106945782604888, "grad_norm": 0.6591769463490688, "learning_rate": 6.429197080291971e-06, "loss": 0.6234, "step": 24342 }, { "epoch": 0.7107237744883361, "grad_norm": 0.7009157678629606, "learning_rate": 6.428548256285483e-06, "loss": 0.6919, "step": 24343 }, { "epoch": 0.7107529707161835, "grad_norm": 0.6736935451714646, "learning_rate": 6.427899432278994e-06, "loss": 0.6756, "step": 24344 }, { "epoch": 0.7107821669440308, "grad_norm": 0.6363941266725122, "learning_rate": 6.4272506082725064e-06, "loss": 0.6221, "step": 24345 }, { "epoch": 0.7108113631718782, "grad_norm": 0.6544236691925435, "learning_rate": 6.426601784266019e-06, "loss": 0.5952, "step": 24346 }, { "epoch": 0.7108405593997256, "grad_norm": 0.6569795501632594, "learning_rate": 6.4259529602595305e-06, "loss": 0.6187, "step": 24347 }, { "epoch": 0.7108697556275729, "grad_norm": 0.6835625126722525, "learning_rate": 6.4253041362530425e-06, "loss": 0.6661, "step": 24348 }, { "epoch": 0.7108989518554203, "grad_norm": 0.6487038876128833, "learning_rate": 6.424655312246554e-06, "loss": 0.6173, "step": 24349 }, { "epoch": 0.7109281480832677, "grad_norm": 0.5956214510003012, "learning_rate": 6.424006488240066e-06, "loss": 0.5395, "step": 24350 }, { "epoch": 0.710957344311115, "grad_norm": 0.6326379096684933, "learning_rate": 6.423357664233577e-06, "loss": 0.5972, "step": 24351 }, { "epoch": 0.7109865405389624, "grad_norm": 0.5979226567776404, "learning_rate": 6.422708840227089e-06, "loss": 0.5151, "step": 24352 }, { "epoch": 0.7110157367668097, "grad_norm": 0.6172351111029504, "learning_rate": 6.4220600162206e-06, "loss": 0.6039, "step": 24353 }, { "epoch": 0.7110449329946571, "grad_norm": 0.6081508148623956, "learning_rate": 6.421411192214112e-06, "loss": 0.5396, "step": 24354 }, { "epoch": 0.7110741292225045, "grad_norm": 0.7051303136531675, "learning_rate": 6.420762368207623e-06, "loss": 0.6432, "step": 24355 }, { "epoch": 0.7111033254503518, "grad_norm": 0.6468394483896656, "learning_rate": 6.420113544201136e-06, "loss": 0.6178, "step": 24356 }, { "epoch": 0.7111325216781992, "grad_norm": 0.6666197442539927, "learning_rate": 6.419464720194648e-06, "loss": 0.5839, "step": 24357 }, { "epoch": 0.7111617179060465, "grad_norm": 0.5921885116205591, "learning_rate": 6.418815896188159e-06, "loss": 0.5332, "step": 24358 }, { "epoch": 0.7111909141338939, "grad_norm": 0.6737286227411754, "learning_rate": 6.418167072181671e-06, "loss": 0.5931, "step": 24359 }, { "epoch": 0.7112201103617413, "grad_norm": 0.6368459649408009, "learning_rate": 6.417518248175183e-06, "loss": 0.598, "step": 24360 }, { "epoch": 0.7112493065895886, "grad_norm": 0.6693410978127803, "learning_rate": 6.416869424168695e-06, "loss": 0.642, "step": 24361 }, { "epoch": 0.711278502817436, "grad_norm": 0.644578648852437, "learning_rate": 6.416220600162207e-06, "loss": 0.5782, "step": 24362 }, { "epoch": 0.7113076990452833, "grad_norm": 0.6273550945806902, "learning_rate": 6.415571776155718e-06, "loss": 0.5831, "step": 24363 }, { "epoch": 0.7113368952731307, "grad_norm": 0.6617034170507013, "learning_rate": 6.41492295214923e-06, "loss": 0.6378, "step": 24364 }, { "epoch": 0.7113660915009781, "grad_norm": 0.6527688762204563, "learning_rate": 6.414274128142742e-06, "loss": 0.6335, "step": 24365 }, { "epoch": 0.7113952877288254, "grad_norm": 0.6358553964099032, "learning_rate": 6.413625304136254e-06, "loss": 0.584, "step": 24366 }, { "epoch": 0.7114244839566728, "grad_norm": 0.6577437667403165, "learning_rate": 6.412976480129766e-06, "loss": 0.6462, "step": 24367 }, { "epoch": 0.7114536801845202, "grad_norm": 0.6331439817143536, "learning_rate": 6.412327656123277e-06, "loss": 0.5885, "step": 24368 }, { "epoch": 0.7114828764123675, "grad_norm": 0.6117146120738356, "learning_rate": 6.411678832116789e-06, "loss": 0.4978, "step": 24369 }, { "epoch": 0.7115120726402149, "grad_norm": 0.6808948876410805, "learning_rate": 6.4110300081103e-06, "loss": 0.6659, "step": 24370 }, { "epoch": 0.7115412688680622, "grad_norm": 0.608884394984378, "learning_rate": 6.410381184103812e-06, "loss": 0.5514, "step": 24371 }, { "epoch": 0.7115704650959096, "grad_norm": 0.6676731269253259, "learning_rate": 6.4097323600973235e-06, "loss": 0.628, "step": 24372 }, { "epoch": 0.711599661323757, "grad_norm": 0.6430337379722199, "learning_rate": 6.4090835360908356e-06, "loss": 0.5726, "step": 24373 }, { "epoch": 0.7116288575516043, "grad_norm": 0.6531073713755761, "learning_rate": 6.4084347120843484e-06, "loss": 0.6011, "step": 24374 }, { "epoch": 0.7116580537794517, "grad_norm": 0.690152693650515, "learning_rate": 6.40778588807786e-06, "loss": 0.6603, "step": 24375 }, { "epoch": 0.711687250007299, "grad_norm": 0.6578517593180924, "learning_rate": 6.407137064071372e-06, "loss": 0.6392, "step": 24376 }, { "epoch": 0.7117164462351464, "grad_norm": 0.6975079020723713, "learning_rate": 6.406488240064883e-06, "loss": 0.7207, "step": 24377 }, { "epoch": 0.7117456424629938, "grad_norm": 0.6508746682903577, "learning_rate": 6.405839416058395e-06, "loss": 0.6006, "step": 24378 }, { "epoch": 0.7117748386908411, "grad_norm": 0.6641156836565723, "learning_rate": 6.405190592051906e-06, "loss": 0.6231, "step": 24379 }, { "epoch": 0.7118040349186885, "grad_norm": 0.6353225595761834, "learning_rate": 6.404541768045418e-06, "loss": 0.5876, "step": 24380 }, { "epoch": 0.7118332311465358, "grad_norm": 0.6487724717210074, "learning_rate": 6.40389294403893e-06, "loss": 0.6206, "step": 24381 }, { "epoch": 0.7118624273743832, "grad_norm": 0.6551740383489804, "learning_rate": 6.403244120032441e-06, "loss": 0.6209, "step": 24382 }, { "epoch": 0.7118916236022306, "grad_norm": 0.6279909916926643, "learning_rate": 6.402595296025953e-06, "loss": 0.5567, "step": 24383 }, { "epoch": 0.7119208198300779, "grad_norm": 0.6662993232455787, "learning_rate": 6.401946472019465e-06, "loss": 0.6468, "step": 24384 }, { "epoch": 0.7119500160579253, "grad_norm": 0.6612139384710034, "learning_rate": 6.401297648012977e-06, "loss": 0.6239, "step": 24385 }, { "epoch": 0.7119792122857727, "grad_norm": 0.7194657768871797, "learning_rate": 6.400648824006489e-06, "loss": 0.6716, "step": 24386 }, { "epoch": 0.71200840851362, "grad_norm": 0.6673023899753783, "learning_rate": 6.4000000000000006e-06, "loss": 0.6204, "step": 24387 }, { "epoch": 0.7120376047414674, "grad_norm": 0.652558789105623, "learning_rate": 6.399351175993513e-06, "loss": 0.6031, "step": 24388 }, { "epoch": 0.7120668009693147, "grad_norm": 0.6496919335056344, "learning_rate": 6.398702351987024e-06, "loss": 0.6054, "step": 24389 }, { "epoch": 0.7120959971971621, "grad_norm": 0.6356874933564245, "learning_rate": 6.398053527980536e-06, "loss": 0.5989, "step": 24390 }, { "epoch": 0.7121251934250095, "grad_norm": 0.6937774596672852, "learning_rate": 6.397404703974047e-06, "loss": 0.5756, "step": 24391 }, { "epoch": 0.7121543896528568, "grad_norm": 0.6448961122131452, "learning_rate": 6.396755879967559e-06, "loss": 0.6062, "step": 24392 }, { "epoch": 0.7121835858807042, "grad_norm": 0.539725362877181, "learning_rate": 6.396107055961072e-06, "loss": 0.4137, "step": 24393 }, { "epoch": 0.7122127821085515, "grad_norm": 0.5891007208403833, "learning_rate": 6.395458231954583e-06, "loss": 0.5025, "step": 24394 }, { "epoch": 0.7122419783363989, "grad_norm": 0.6471673110255788, "learning_rate": 6.394809407948095e-06, "loss": 0.5441, "step": 24395 }, { "epoch": 0.7122711745642463, "grad_norm": 0.6456884809477589, "learning_rate": 6.394160583941606e-06, "loss": 0.5987, "step": 24396 }, { "epoch": 0.7123003707920936, "grad_norm": 0.6359356111417608, "learning_rate": 6.393511759935118e-06, "loss": 0.6342, "step": 24397 }, { "epoch": 0.712329567019941, "grad_norm": 0.6686093685043545, "learning_rate": 6.3928629359286295e-06, "loss": 0.6628, "step": 24398 }, { "epoch": 0.7123587632477884, "grad_norm": 0.5705380029644488, "learning_rate": 6.3922141119221415e-06, "loss": 0.4926, "step": 24399 }, { "epoch": 0.7123879594756357, "grad_norm": 0.6832694109053178, "learning_rate": 6.3915652879156535e-06, "loss": 0.6485, "step": 24400 }, { "epoch": 0.7124171557034831, "grad_norm": 0.6423936306411072, "learning_rate": 6.390916463909165e-06, "loss": 0.5984, "step": 24401 }, { "epoch": 0.7124463519313304, "grad_norm": 0.585364797944385, "learning_rate": 6.390267639902677e-06, "loss": 0.5203, "step": 24402 }, { "epoch": 0.7124755481591778, "grad_norm": 0.6003239979934905, "learning_rate": 6.389618815896189e-06, "loss": 0.535, "step": 24403 }, { "epoch": 0.7125047443870252, "grad_norm": 0.6706524116573293, "learning_rate": 6.388969991889701e-06, "loss": 0.6349, "step": 24404 }, { "epoch": 0.7125339406148725, "grad_norm": 0.6535230778863603, "learning_rate": 6.388321167883213e-06, "loss": 0.6543, "step": 24405 }, { "epoch": 0.7125631368427199, "grad_norm": 0.6885463619660382, "learning_rate": 6.387672343876724e-06, "loss": 0.7204, "step": 24406 }, { "epoch": 0.7125923330705672, "grad_norm": 0.6278803221967834, "learning_rate": 6.387023519870236e-06, "loss": 0.5791, "step": 24407 }, { "epoch": 0.7126215292984146, "grad_norm": 0.6417090689031706, "learning_rate": 6.386374695863747e-06, "loss": 0.5312, "step": 24408 }, { "epoch": 0.712650725526262, "grad_norm": 0.6775547856563509, "learning_rate": 6.385725871857259e-06, "loss": 0.643, "step": 24409 }, { "epoch": 0.7126799217541093, "grad_norm": 0.6083323743325492, "learning_rate": 6.38507704785077e-06, "loss": 0.5571, "step": 24410 }, { "epoch": 0.7127091179819567, "grad_norm": 0.644044425679842, "learning_rate": 6.3844282238442824e-06, "loss": 0.5722, "step": 24411 }, { "epoch": 0.712738314209804, "grad_norm": 0.6266753995404177, "learning_rate": 6.383779399837795e-06, "loss": 0.6149, "step": 24412 }, { "epoch": 0.7127675104376514, "grad_norm": 0.6403279964714095, "learning_rate": 6.3831305758313065e-06, "loss": 0.6309, "step": 24413 }, { "epoch": 0.7127967066654988, "grad_norm": 0.6616137789557232, "learning_rate": 6.3824817518248185e-06, "loss": 0.6052, "step": 24414 }, { "epoch": 0.7128259028933461, "grad_norm": 0.6116743310950034, "learning_rate": 6.38183292781833e-06, "loss": 0.5094, "step": 24415 }, { "epoch": 0.7128550991211935, "grad_norm": 0.6422042727322332, "learning_rate": 6.381184103811842e-06, "loss": 0.5665, "step": 24416 }, { "epoch": 0.7128842953490409, "grad_norm": 0.6566463750357436, "learning_rate": 6.380535279805353e-06, "loss": 0.5959, "step": 24417 }, { "epoch": 0.7129134915768882, "grad_norm": 0.6128241135307789, "learning_rate": 6.379886455798865e-06, "loss": 0.5489, "step": 24418 }, { "epoch": 0.7129426878047356, "grad_norm": 0.6449762090836914, "learning_rate": 6.379237631792377e-06, "loss": 0.6376, "step": 24419 }, { "epoch": 0.7129718840325829, "grad_norm": 0.6002591575340613, "learning_rate": 6.378588807785888e-06, "loss": 0.532, "step": 24420 }, { "epoch": 0.7130010802604303, "grad_norm": 0.6258758665571266, "learning_rate": 6.3779399837794e-06, "loss": 0.5835, "step": 24421 }, { "epoch": 0.7130302764882778, "grad_norm": 0.7006122095759071, "learning_rate": 6.377291159772912e-06, "loss": 0.6912, "step": 24422 }, { "epoch": 0.7130594727161251, "grad_norm": 0.679215256524647, "learning_rate": 6.376642335766424e-06, "loss": 0.6541, "step": 24423 }, { "epoch": 0.7130886689439725, "grad_norm": 0.6181774063400197, "learning_rate": 6.375993511759936e-06, "loss": 0.588, "step": 24424 }, { "epoch": 0.7131178651718199, "grad_norm": 0.6342037650968011, "learning_rate": 6.3753446877534474e-06, "loss": 0.5548, "step": 24425 }, { "epoch": 0.7131470613996672, "grad_norm": 0.666772573490292, "learning_rate": 6.3746958637469595e-06, "loss": 0.6442, "step": 24426 }, { "epoch": 0.7131762576275146, "grad_norm": 0.6903513192508024, "learning_rate": 6.374047039740471e-06, "loss": 0.6483, "step": 24427 }, { "epoch": 0.7132054538553619, "grad_norm": 0.6159349437474136, "learning_rate": 6.373398215733983e-06, "loss": 0.5519, "step": 24428 }, { "epoch": 0.7132346500832093, "grad_norm": 0.6296724384494508, "learning_rate": 6.372749391727494e-06, "loss": 0.5545, "step": 24429 }, { "epoch": 0.7132638463110567, "grad_norm": 0.6336751658668465, "learning_rate": 6.372100567721006e-06, "loss": 0.574, "step": 24430 }, { "epoch": 0.713293042538904, "grad_norm": 0.6459926558580028, "learning_rate": 6.371451743714519e-06, "loss": 0.5939, "step": 24431 }, { "epoch": 0.7133222387667514, "grad_norm": 0.6287780812445926, "learning_rate": 6.37080291970803e-06, "loss": 0.5791, "step": 24432 }, { "epoch": 0.7133514349945987, "grad_norm": 0.6623851453740186, "learning_rate": 6.370154095701542e-06, "loss": 0.6196, "step": 24433 }, { "epoch": 0.7133806312224461, "grad_norm": 0.6565240502133425, "learning_rate": 6.369505271695053e-06, "loss": 0.6727, "step": 24434 }, { "epoch": 0.7134098274502935, "grad_norm": 0.7104138133702592, "learning_rate": 6.368856447688565e-06, "loss": 0.6261, "step": 24435 }, { "epoch": 0.7134390236781408, "grad_norm": 0.6084326717199465, "learning_rate": 6.368207623682076e-06, "loss": 0.5968, "step": 24436 }, { "epoch": 0.7134682199059882, "grad_norm": 0.6200940754537495, "learning_rate": 6.367558799675588e-06, "loss": 0.5378, "step": 24437 }, { "epoch": 0.7134974161338355, "grad_norm": 0.6032340599113274, "learning_rate": 6.3669099756690995e-06, "loss": 0.5136, "step": 24438 }, { "epoch": 0.7135266123616829, "grad_norm": 0.6413277748236266, "learning_rate": 6.3662611516626116e-06, "loss": 0.6098, "step": 24439 }, { "epoch": 0.7135558085895303, "grad_norm": 0.6549711972084132, "learning_rate": 6.365612327656124e-06, "loss": 0.6305, "step": 24440 }, { "epoch": 0.7135850048173776, "grad_norm": 0.6221715834777867, "learning_rate": 6.364963503649636e-06, "loss": 0.5564, "step": 24441 }, { "epoch": 0.713614201045225, "grad_norm": 0.5705318108990263, "learning_rate": 6.364314679643148e-06, "loss": 0.513, "step": 24442 }, { "epoch": 0.7136433972730724, "grad_norm": 0.6399337438671615, "learning_rate": 6.36366585563666e-06, "loss": 0.5842, "step": 24443 }, { "epoch": 0.7136725935009197, "grad_norm": 0.6856526423073714, "learning_rate": 6.363017031630171e-06, "loss": 0.6337, "step": 24444 }, { "epoch": 0.7137017897287671, "grad_norm": 0.6050636361383425, "learning_rate": 6.362368207623683e-06, "loss": 0.5494, "step": 24445 }, { "epoch": 0.7137309859566144, "grad_norm": 0.6359896618642622, "learning_rate": 6.361719383617194e-06, "loss": 0.5803, "step": 24446 }, { "epoch": 0.7137601821844618, "grad_norm": 0.6088291226227484, "learning_rate": 6.361070559610706e-06, "loss": 0.5391, "step": 24447 }, { "epoch": 0.7137893784123092, "grad_norm": 0.6341338076236702, "learning_rate": 6.360421735604217e-06, "loss": 0.5901, "step": 24448 }, { "epoch": 0.7138185746401565, "grad_norm": 0.6658714223254193, "learning_rate": 6.359772911597729e-06, "loss": 0.6104, "step": 24449 }, { "epoch": 0.7138477708680039, "grad_norm": 0.6828070231825843, "learning_rate": 6.359124087591242e-06, "loss": 0.5962, "step": 24450 }, { "epoch": 0.7138769670958512, "grad_norm": 0.6568852659022281, "learning_rate": 6.358475263584753e-06, "loss": 0.6261, "step": 24451 }, { "epoch": 0.7139061633236986, "grad_norm": 0.6832460609096787, "learning_rate": 6.357826439578265e-06, "loss": 0.6324, "step": 24452 }, { "epoch": 0.713935359551546, "grad_norm": 0.6442471624733046, "learning_rate": 6.3571776155717766e-06, "loss": 0.6248, "step": 24453 }, { "epoch": 0.7139645557793933, "grad_norm": 0.6621326941014609, "learning_rate": 6.356528791565289e-06, "loss": 0.6498, "step": 24454 }, { "epoch": 0.7139937520072407, "grad_norm": 0.6297804982030493, "learning_rate": 6.3558799675588e-06, "loss": 0.5973, "step": 24455 }, { "epoch": 0.714022948235088, "grad_norm": 0.6570781827483416, "learning_rate": 6.355231143552312e-06, "loss": 0.6086, "step": 24456 }, { "epoch": 0.7140521444629354, "grad_norm": 0.6958957566192535, "learning_rate": 6.354582319545823e-06, "loss": 0.6806, "step": 24457 }, { "epoch": 0.7140813406907828, "grad_norm": 0.6653767356225258, "learning_rate": 6.353933495539335e-06, "loss": 0.6221, "step": 24458 }, { "epoch": 0.7141105369186301, "grad_norm": 0.6440325045918116, "learning_rate": 6.353284671532848e-06, "loss": 0.5434, "step": 24459 }, { "epoch": 0.7141397331464775, "grad_norm": 0.6491775133884174, "learning_rate": 6.352635847526359e-06, "loss": 0.5809, "step": 24460 }, { "epoch": 0.7141689293743249, "grad_norm": 0.639024801559817, "learning_rate": 6.351987023519871e-06, "loss": 0.5992, "step": 24461 }, { "epoch": 0.7141981256021722, "grad_norm": 0.6913426383723589, "learning_rate": 6.351338199513383e-06, "loss": 0.6073, "step": 24462 }, { "epoch": 0.7142273218300196, "grad_norm": 0.6312436035253581, "learning_rate": 6.350689375506894e-06, "loss": 0.6101, "step": 24463 }, { "epoch": 0.7142565180578669, "grad_norm": 0.6822173885834638, "learning_rate": 6.350040551500406e-06, "loss": 0.6783, "step": 24464 }, { "epoch": 0.7142857142857143, "grad_norm": 0.6095930324811579, "learning_rate": 6.3493917274939175e-06, "loss": 0.5174, "step": 24465 }, { "epoch": 0.7143149105135617, "grad_norm": 0.5984139402561129, "learning_rate": 6.3487429034874295e-06, "loss": 0.5511, "step": 24466 }, { "epoch": 0.714344106741409, "grad_norm": 0.6417775912713015, "learning_rate": 6.348094079480941e-06, "loss": 0.596, "step": 24467 }, { "epoch": 0.7143733029692564, "grad_norm": 0.6239059201216341, "learning_rate": 6.347445255474453e-06, "loss": 0.5673, "step": 24468 }, { "epoch": 0.7144024991971037, "grad_norm": 0.7519432730537764, "learning_rate": 6.346796431467966e-06, "loss": 0.661, "step": 24469 }, { "epoch": 0.7144316954249511, "grad_norm": 0.6129176231733788, "learning_rate": 6.346147607461477e-06, "loss": 0.5707, "step": 24470 }, { "epoch": 0.7144608916527985, "grad_norm": 0.6343541249496106, "learning_rate": 6.345498783454989e-06, "loss": 0.5697, "step": 24471 }, { "epoch": 0.7144900878806458, "grad_norm": 0.7164735312309022, "learning_rate": 6.3448499594485e-06, "loss": 0.5755, "step": 24472 }, { "epoch": 0.7145192841084932, "grad_norm": 0.6561937470655675, "learning_rate": 6.344201135442012e-06, "loss": 0.5886, "step": 24473 }, { "epoch": 0.7145484803363406, "grad_norm": 0.6451134015144243, "learning_rate": 6.343552311435523e-06, "loss": 0.6219, "step": 24474 }, { "epoch": 0.7145776765641879, "grad_norm": 0.6579456043617355, "learning_rate": 6.342903487429035e-06, "loss": 0.6371, "step": 24475 }, { "epoch": 0.7146068727920353, "grad_norm": 0.6311614269371503, "learning_rate": 6.342254663422546e-06, "loss": 0.5751, "step": 24476 }, { "epoch": 0.7146360690198826, "grad_norm": 0.6550010019059298, "learning_rate": 6.3416058394160584e-06, "loss": 0.6243, "step": 24477 }, { "epoch": 0.71466526524773, "grad_norm": 0.6964377521899505, "learning_rate": 6.340957015409571e-06, "loss": 0.6772, "step": 24478 }, { "epoch": 0.7146944614755774, "grad_norm": 0.7028656298723761, "learning_rate": 6.3403081914030825e-06, "loss": 0.6057, "step": 24479 }, { "epoch": 0.7147236577034247, "grad_norm": 0.6872200218408606, "learning_rate": 6.3396593673965945e-06, "loss": 0.6644, "step": 24480 }, { "epoch": 0.7147528539312721, "grad_norm": 0.6227323933803279, "learning_rate": 6.3390105433901065e-06, "loss": 0.5472, "step": 24481 }, { "epoch": 0.7147820501591194, "grad_norm": 0.5850699703152502, "learning_rate": 6.338361719383618e-06, "loss": 0.551, "step": 24482 }, { "epoch": 0.7148112463869668, "grad_norm": 0.6423447333601042, "learning_rate": 6.33771289537713e-06, "loss": 0.6295, "step": 24483 }, { "epoch": 0.7148404426148142, "grad_norm": 0.6219056908728845, "learning_rate": 6.337064071370641e-06, "loss": 0.5878, "step": 24484 }, { "epoch": 0.7148696388426615, "grad_norm": 0.6403128463400347, "learning_rate": 6.336415247364153e-06, "loss": 0.5898, "step": 24485 }, { "epoch": 0.7148988350705089, "grad_norm": 0.5955649930534107, "learning_rate": 6.335766423357664e-06, "loss": 0.5317, "step": 24486 }, { "epoch": 0.7149280312983562, "grad_norm": 0.6776063736543092, "learning_rate": 6.335117599351176e-06, "loss": 0.6594, "step": 24487 }, { "epoch": 0.7149572275262036, "grad_norm": 0.6547918448434492, "learning_rate": 6.334468775344689e-06, "loss": 0.6462, "step": 24488 }, { "epoch": 0.714986423754051, "grad_norm": 0.675600064404614, "learning_rate": 6.3338199513382e-06, "loss": 0.6926, "step": 24489 }, { "epoch": 0.7150156199818983, "grad_norm": 0.6100883242938983, "learning_rate": 6.333171127331712e-06, "loss": 0.5105, "step": 24490 }, { "epoch": 0.7150448162097457, "grad_norm": 0.6833466398198326, "learning_rate": 6.3325223033252234e-06, "loss": 0.6321, "step": 24491 }, { "epoch": 0.715074012437593, "grad_norm": 0.6234509048715368, "learning_rate": 6.3318734793187355e-06, "loss": 0.5587, "step": 24492 }, { "epoch": 0.7151032086654404, "grad_norm": 0.7002653631163996, "learning_rate": 6.331224655312247e-06, "loss": 0.6937, "step": 24493 }, { "epoch": 0.7151324048932878, "grad_norm": 0.6310627091546853, "learning_rate": 6.330575831305759e-06, "loss": 0.527, "step": 24494 }, { "epoch": 0.7151616011211351, "grad_norm": 0.66673903348836, "learning_rate": 6.32992700729927e-06, "loss": 0.609, "step": 24495 }, { "epoch": 0.7151907973489825, "grad_norm": 0.6463028549873084, "learning_rate": 6.329278183292782e-06, "loss": 0.5862, "step": 24496 }, { "epoch": 0.7152199935768299, "grad_norm": 0.667040100802705, "learning_rate": 6.328629359286295e-06, "loss": 0.6132, "step": 24497 }, { "epoch": 0.7152491898046772, "grad_norm": 0.6754638734044786, "learning_rate": 6.327980535279806e-06, "loss": 0.6187, "step": 24498 }, { "epoch": 0.7152783860325246, "grad_norm": 0.6561025210267194, "learning_rate": 6.327331711273318e-06, "loss": 0.6219, "step": 24499 }, { "epoch": 0.7153075822603719, "grad_norm": 0.6032960670370452, "learning_rate": 6.326682887266829e-06, "loss": 0.5432, "step": 24500 }, { "epoch": 0.7153367784882193, "grad_norm": 0.6823670120225322, "learning_rate": 6.326034063260341e-06, "loss": 0.6786, "step": 24501 }, { "epoch": 0.7153659747160667, "grad_norm": 0.573530403648387, "learning_rate": 6.325385239253853e-06, "loss": 0.4994, "step": 24502 }, { "epoch": 0.715395170943914, "grad_norm": 0.6352128277310243, "learning_rate": 6.324736415247364e-06, "loss": 0.5994, "step": 24503 }, { "epoch": 0.7154243671717614, "grad_norm": 0.6379858725824937, "learning_rate": 6.324087591240876e-06, "loss": 0.6404, "step": 24504 }, { "epoch": 0.7154535633996087, "grad_norm": 0.6046412694783105, "learning_rate": 6.323438767234388e-06, "loss": 0.5488, "step": 24505 }, { "epoch": 0.7154827596274561, "grad_norm": 0.6093137817597389, "learning_rate": 6.3227899432279e-06, "loss": 0.5586, "step": 24506 }, { "epoch": 0.7155119558553035, "grad_norm": 0.7499256050975691, "learning_rate": 6.3221411192214125e-06, "loss": 0.6652, "step": 24507 }, { "epoch": 0.7155411520831508, "grad_norm": 0.6842571481160068, "learning_rate": 6.321492295214924e-06, "loss": 0.6773, "step": 24508 }, { "epoch": 0.7155703483109982, "grad_norm": 0.588963774183341, "learning_rate": 6.320843471208436e-06, "loss": 0.5317, "step": 24509 }, { "epoch": 0.7155995445388456, "grad_norm": 0.6216118389662071, "learning_rate": 6.320194647201947e-06, "loss": 0.5592, "step": 24510 }, { "epoch": 0.7156287407666929, "grad_norm": 0.6195289421010378, "learning_rate": 6.319545823195459e-06, "loss": 0.5687, "step": 24511 }, { "epoch": 0.7156579369945403, "grad_norm": 0.6418380920453371, "learning_rate": 6.31889699918897e-06, "loss": 0.6073, "step": 24512 }, { "epoch": 0.7156871332223876, "grad_norm": 0.6539374192630769, "learning_rate": 6.318248175182482e-06, "loss": 0.6689, "step": 24513 }, { "epoch": 0.715716329450235, "grad_norm": 0.6468713321177351, "learning_rate": 6.317599351175993e-06, "loss": 0.6436, "step": 24514 }, { "epoch": 0.7157455256780824, "grad_norm": 0.7027464622648428, "learning_rate": 6.316950527169505e-06, "loss": 0.6763, "step": 24515 }, { "epoch": 0.7157747219059297, "grad_norm": 0.6611051708717381, "learning_rate": 6.316301703163018e-06, "loss": 0.6649, "step": 24516 }, { "epoch": 0.7158039181337771, "grad_norm": 0.6016967669624651, "learning_rate": 6.315652879156529e-06, "loss": 0.5553, "step": 24517 }, { "epoch": 0.7158331143616244, "grad_norm": 0.6820365521867517, "learning_rate": 6.315004055150041e-06, "loss": 0.5844, "step": 24518 }, { "epoch": 0.7158623105894718, "grad_norm": 0.6506641526445531, "learning_rate": 6.3143552311435526e-06, "loss": 0.6119, "step": 24519 }, { "epoch": 0.7158915068173192, "grad_norm": 0.6474155211630991, "learning_rate": 6.313706407137065e-06, "loss": 0.5768, "step": 24520 }, { "epoch": 0.7159207030451665, "grad_norm": 0.6480738275663925, "learning_rate": 6.313057583130577e-06, "loss": 0.6231, "step": 24521 }, { "epoch": 0.7159498992730139, "grad_norm": 0.6337165611529341, "learning_rate": 6.312408759124088e-06, "loss": 0.5863, "step": 24522 }, { "epoch": 0.7159790955008613, "grad_norm": 0.5882986383792355, "learning_rate": 6.3117599351176e-06, "loss": 0.5179, "step": 24523 }, { "epoch": 0.7160082917287086, "grad_norm": 0.6189332494139761, "learning_rate": 6.311111111111111e-06, "loss": 0.6045, "step": 24524 }, { "epoch": 0.716037487956556, "grad_norm": 0.5947073698947213, "learning_rate": 6.310462287104624e-06, "loss": 0.5318, "step": 24525 }, { "epoch": 0.7160666841844033, "grad_norm": 0.6566669990974571, "learning_rate": 6.309813463098136e-06, "loss": 0.6139, "step": 24526 }, { "epoch": 0.7160958804122507, "grad_norm": 0.6256577319275392, "learning_rate": 6.309164639091647e-06, "loss": 0.6081, "step": 24527 }, { "epoch": 0.7161250766400981, "grad_norm": 0.5790868958145108, "learning_rate": 6.308515815085159e-06, "loss": 0.5122, "step": 24528 }, { "epoch": 0.7161542728679454, "grad_norm": 0.5971211462057339, "learning_rate": 6.30786699107867e-06, "loss": 0.5314, "step": 24529 }, { "epoch": 0.7161834690957928, "grad_norm": 0.6552866207992423, "learning_rate": 6.307218167072182e-06, "loss": 0.5984, "step": 24530 }, { "epoch": 0.7162126653236401, "grad_norm": 0.6295752613303661, "learning_rate": 6.3065693430656935e-06, "loss": 0.5762, "step": 24531 }, { "epoch": 0.7162418615514875, "grad_norm": 0.620479444458859, "learning_rate": 6.3059205190592055e-06, "loss": 0.566, "step": 24532 }, { "epoch": 0.7162710577793349, "grad_norm": 0.6704649870679911, "learning_rate": 6.305271695052717e-06, "loss": 0.6542, "step": 24533 }, { "epoch": 0.7163002540071822, "grad_norm": 0.6319696117462227, "learning_rate": 6.304622871046229e-06, "loss": 0.6284, "step": 24534 }, { "epoch": 0.7163294502350296, "grad_norm": 0.6474467269044858, "learning_rate": 6.303974047039742e-06, "loss": 0.5616, "step": 24535 }, { "epoch": 0.716358646462877, "grad_norm": 0.6822854824511749, "learning_rate": 6.303325223033253e-06, "loss": 0.621, "step": 24536 }, { "epoch": 0.7163878426907243, "grad_norm": 0.6533830377362615, "learning_rate": 6.302676399026765e-06, "loss": 0.5672, "step": 24537 }, { "epoch": 0.7164170389185717, "grad_norm": 0.6397453449751078, "learning_rate": 6.302027575020276e-06, "loss": 0.6119, "step": 24538 }, { "epoch": 0.716446235146419, "grad_norm": 0.6910349929463211, "learning_rate": 6.301378751013788e-06, "loss": 0.7052, "step": 24539 }, { "epoch": 0.7164754313742664, "grad_norm": 0.645201546496912, "learning_rate": 6.3007299270073e-06, "loss": 0.6243, "step": 24540 }, { "epoch": 0.7165046276021138, "grad_norm": 0.6471274269912901, "learning_rate": 6.300081103000811e-06, "loss": 0.6434, "step": 24541 }, { "epoch": 0.7165338238299611, "grad_norm": 0.6657705207683567, "learning_rate": 6.299432278994323e-06, "loss": 0.5863, "step": 24542 }, { "epoch": 0.7165630200578086, "grad_norm": 0.6363177858057874, "learning_rate": 6.2987834549878344e-06, "loss": 0.5953, "step": 24543 }, { "epoch": 0.716592216285656, "grad_norm": 0.6216296106707807, "learning_rate": 6.298134630981347e-06, "loss": 0.6125, "step": 24544 }, { "epoch": 0.7166214125135033, "grad_norm": 0.6590766258582167, "learning_rate": 6.297485806974859e-06, "loss": 0.6018, "step": 24545 }, { "epoch": 0.7166506087413507, "grad_norm": 0.6249921782603822, "learning_rate": 6.2968369829683705e-06, "loss": 0.5565, "step": 24546 }, { "epoch": 0.716679804969198, "grad_norm": 0.6339433879560126, "learning_rate": 6.2961881589618826e-06, "loss": 0.6103, "step": 24547 }, { "epoch": 0.7167090011970454, "grad_norm": 0.69156942977175, "learning_rate": 6.295539334955394e-06, "loss": 0.6638, "step": 24548 }, { "epoch": 0.7167381974248928, "grad_norm": 0.6445432543254828, "learning_rate": 6.294890510948906e-06, "loss": 0.5901, "step": 24549 }, { "epoch": 0.7167673936527401, "grad_norm": 0.6452054565434767, "learning_rate": 6.294241686942417e-06, "loss": 0.5707, "step": 24550 }, { "epoch": 0.7167965898805875, "grad_norm": 0.6249463915107359, "learning_rate": 6.293592862935929e-06, "loss": 0.6224, "step": 24551 }, { "epoch": 0.7168257861084348, "grad_norm": 0.6847700796844854, "learning_rate": 6.29294403892944e-06, "loss": 0.6576, "step": 24552 }, { "epoch": 0.7168549823362822, "grad_norm": 0.5849193941451835, "learning_rate": 6.292295214922952e-06, "loss": 0.5134, "step": 24553 }, { "epoch": 0.7168841785641296, "grad_norm": 0.6212537803375975, "learning_rate": 6.291646390916465e-06, "loss": 0.5602, "step": 24554 }, { "epoch": 0.7169133747919769, "grad_norm": 0.5864152868084138, "learning_rate": 6.290997566909976e-06, "loss": 0.4761, "step": 24555 }, { "epoch": 0.7169425710198243, "grad_norm": 0.7471558111967402, "learning_rate": 6.290348742903488e-06, "loss": 0.8165, "step": 24556 }, { "epoch": 0.7169717672476716, "grad_norm": 0.6248377980608325, "learning_rate": 6.2896999188969994e-06, "loss": 0.5904, "step": 24557 }, { "epoch": 0.717000963475519, "grad_norm": 0.577785633286315, "learning_rate": 6.2890510948905115e-06, "loss": 0.5301, "step": 24558 }, { "epoch": 0.7170301597033664, "grad_norm": 0.694072792339982, "learning_rate": 6.2884022708840235e-06, "loss": 0.6612, "step": 24559 }, { "epoch": 0.7170593559312137, "grad_norm": 0.643669928603438, "learning_rate": 6.287753446877535e-06, "loss": 0.5966, "step": 24560 }, { "epoch": 0.7170885521590611, "grad_norm": 0.6646812723114891, "learning_rate": 6.287104622871047e-06, "loss": 0.6241, "step": 24561 }, { "epoch": 0.7171177483869084, "grad_norm": 0.6182729440791229, "learning_rate": 6.286455798864558e-06, "loss": 0.5577, "step": 24562 }, { "epoch": 0.7171469446147558, "grad_norm": 0.6212960572280996, "learning_rate": 6.285806974858071e-06, "loss": 0.5641, "step": 24563 }, { "epoch": 0.7171761408426032, "grad_norm": 0.6223613407854726, "learning_rate": 6.285158150851583e-06, "loss": 0.5178, "step": 24564 }, { "epoch": 0.7172053370704505, "grad_norm": 0.7146283367864348, "learning_rate": 6.284509326845094e-06, "loss": 0.7162, "step": 24565 }, { "epoch": 0.7172345332982979, "grad_norm": 0.5893520662049275, "learning_rate": 6.283860502838606e-06, "loss": 0.5115, "step": 24566 }, { "epoch": 0.7172637295261453, "grad_norm": 0.6586236000107464, "learning_rate": 6.283211678832117e-06, "loss": 0.6727, "step": 24567 }, { "epoch": 0.7172929257539926, "grad_norm": 0.6254357763429684, "learning_rate": 6.282562854825629e-06, "loss": 0.5714, "step": 24568 }, { "epoch": 0.71732212198184, "grad_norm": 0.6907498629687651, "learning_rate": 6.28191403081914e-06, "loss": 0.666, "step": 24569 }, { "epoch": 0.7173513182096873, "grad_norm": 0.6272828885985927, "learning_rate": 6.281265206812652e-06, "loss": 0.5629, "step": 24570 }, { "epoch": 0.7173805144375347, "grad_norm": 0.6522946167634384, "learning_rate": 6.280616382806164e-06, "loss": 0.6015, "step": 24571 }, { "epoch": 0.7174097106653821, "grad_norm": 0.6572031357456285, "learning_rate": 6.279967558799676e-06, "loss": 0.6152, "step": 24572 }, { "epoch": 0.7174389068932294, "grad_norm": 0.632544861315353, "learning_rate": 6.2793187347931885e-06, "loss": 0.5901, "step": 24573 }, { "epoch": 0.7174681031210768, "grad_norm": 0.6506871316765531, "learning_rate": 6.2786699107867e-06, "loss": 0.6014, "step": 24574 }, { "epoch": 0.7174972993489241, "grad_norm": 0.6484992099267778, "learning_rate": 6.278021086780212e-06, "loss": 0.598, "step": 24575 }, { "epoch": 0.7175264955767715, "grad_norm": 0.6625798262252989, "learning_rate": 6.277372262773723e-06, "loss": 0.6014, "step": 24576 }, { "epoch": 0.7175556918046189, "grad_norm": 0.6102761736224215, "learning_rate": 6.276723438767235e-06, "loss": 0.5084, "step": 24577 }, { "epoch": 0.7175848880324662, "grad_norm": 0.6897679189033992, "learning_rate": 6.276074614760747e-06, "loss": 0.6872, "step": 24578 }, { "epoch": 0.7176140842603136, "grad_norm": 0.6698681399719318, "learning_rate": 6.275425790754258e-06, "loss": 0.6761, "step": 24579 }, { "epoch": 0.717643280488161, "grad_norm": 0.6681875532774003, "learning_rate": 6.27477696674777e-06, "loss": 0.5971, "step": 24580 }, { "epoch": 0.7176724767160083, "grad_norm": 0.68590744354932, "learning_rate": 6.274128142741281e-06, "loss": 0.7073, "step": 24581 }, { "epoch": 0.7177016729438557, "grad_norm": 0.6231342320388563, "learning_rate": 6.273479318734794e-06, "loss": 0.5412, "step": 24582 }, { "epoch": 0.717730869171703, "grad_norm": 0.6736366377896688, "learning_rate": 6.272830494728306e-06, "loss": 0.6179, "step": 24583 }, { "epoch": 0.7177600653995504, "grad_norm": 0.6212675953076471, "learning_rate": 6.272181670721817e-06, "loss": 0.6143, "step": 24584 }, { "epoch": 0.7177892616273978, "grad_norm": 0.673557805760713, "learning_rate": 6.2715328467153294e-06, "loss": 0.6371, "step": 24585 }, { "epoch": 0.7178184578552451, "grad_norm": 0.6694185652845394, "learning_rate": 6.270884022708841e-06, "loss": 0.6149, "step": 24586 }, { "epoch": 0.7178476540830925, "grad_norm": 0.6208905931434738, "learning_rate": 6.270235198702353e-06, "loss": 0.559, "step": 24587 }, { "epoch": 0.7178768503109398, "grad_norm": 0.6116839835871148, "learning_rate": 6.269586374695864e-06, "loss": 0.5749, "step": 24588 }, { "epoch": 0.7179060465387872, "grad_norm": 0.6221303861029439, "learning_rate": 6.268937550689376e-06, "loss": 0.5738, "step": 24589 }, { "epoch": 0.7179352427666346, "grad_norm": 0.6611269858590474, "learning_rate": 6.268288726682887e-06, "loss": 0.6518, "step": 24590 }, { "epoch": 0.7179644389944819, "grad_norm": 0.6419219168229775, "learning_rate": 6.267639902676399e-06, "loss": 0.5804, "step": 24591 }, { "epoch": 0.7179936352223293, "grad_norm": 0.6958327405775664, "learning_rate": 6.266991078669912e-06, "loss": 0.6906, "step": 24592 }, { "epoch": 0.7180228314501766, "grad_norm": 0.665556407510506, "learning_rate": 6.266342254663423e-06, "loss": 0.6815, "step": 24593 }, { "epoch": 0.718052027678024, "grad_norm": 0.683238903610334, "learning_rate": 6.265693430656935e-06, "loss": 0.6741, "step": 24594 }, { "epoch": 0.7180812239058714, "grad_norm": 0.5878642062667518, "learning_rate": 6.265044606650446e-06, "loss": 0.5377, "step": 24595 }, { "epoch": 0.7181104201337187, "grad_norm": 0.6785802501542881, "learning_rate": 6.264395782643958e-06, "loss": 0.667, "step": 24596 }, { "epoch": 0.7181396163615661, "grad_norm": 0.7083546319816573, "learning_rate": 6.26374695863747e-06, "loss": 0.6603, "step": 24597 }, { "epoch": 0.7181688125894135, "grad_norm": 0.6025974537421616, "learning_rate": 6.2630981346309815e-06, "loss": 0.5484, "step": 24598 }, { "epoch": 0.7181980088172608, "grad_norm": 0.6179903559362946, "learning_rate": 6.2624493106244936e-06, "loss": 0.5813, "step": 24599 }, { "epoch": 0.7182272050451082, "grad_norm": 0.6568217055742871, "learning_rate": 6.261800486618005e-06, "loss": 0.6058, "step": 24600 }, { "epoch": 0.7182564012729555, "grad_norm": 0.6713560685717539, "learning_rate": 6.261151662611518e-06, "loss": 0.6573, "step": 24601 }, { "epoch": 0.7182855975008029, "grad_norm": 0.6252823849343991, "learning_rate": 6.26050283860503e-06, "loss": 0.5737, "step": 24602 }, { "epoch": 0.7183147937286503, "grad_norm": 0.730571885639293, "learning_rate": 6.259854014598541e-06, "loss": 0.7082, "step": 24603 }, { "epoch": 0.7183439899564976, "grad_norm": 0.63509417037104, "learning_rate": 6.259205190592053e-06, "loss": 0.559, "step": 24604 }, { "epoch": 0.718373186184345, "grad_norm": 0.6365860680799105, "learning_rate": 6.258556366585564e-06, "loss": 0.5862, "step": 24605 }, { "epoch": 0.7184023824121923, "grad_norm": 0.6097350482952933, "learning_rate": 6.257907542579076e-06, "loss": 0.5279, "step": 24606 }, { "epoch": 0.7184315786400397, "grad_norm": 0.6329176532199722, "learning_rate": 6.257258718572587e-06, "loss": 0.5728, "step": 24607 }, { "epoch": 0.7184607748678871, "grad_norm": 0.6934843568511825, "learning_rate": 6.256609894566099e-06, "loss": 0.6749, "step": 24608 }, { "epoch": 0.7184899710957344, "grad_norm": 0.6297475164746019, "learning_rate": 6.2559610705596105e-06, "loss": 0.5829, "step": 24609 }, { "epoch": 0.7185191673235818, "grad_norm": 0.6571752546044881, "learning_rate": 6.255312246553123e-06, "loss": 0.665, "step": 24610 }, { "epoch": 0.7185483635514291, "grad_norm": 0.7177047018717213, "learning_rate": 6.254663422546635e-06, "loss": 0.6146, "step": 24611 }, { "epoch": 0.7185775597792765, "grad_norm": 0.6507358634529852, "learning_rate": 6.2540145985401465e-06, "loss": 0.6084, "step": 24612 }, { "epoch": 0.7186067560071239, "grad_norm": 0.6873214579878694, "learning_rate": 6.2533657745336586e-06, "loss": 0.6256, "step": 24613 }, { "epoch": 0.7186359522349712, "grad_norm": 0.6911818149051299, "learning_rate": 6.25271695052717e-06, "loss": 0.6421, "step": 24614 }, { "epoch": 0.7186651484628186, "grad_norm": 0.5759019029886431, "learning_rate": 6.252068126520682e-06, "loss": 0.4719, "step": 24615 }, { "epoch": 0.718694344690666, "grad_norm": 0.7013644995440067, "learning_rate": 6.251419302514194e-06, "loss": 0.6949, "step": 24616 }, { "epoch": 0.7187235409185133, "grad_norm": 0.6873903024666433, "learning_rate": 6.250770478507705e-06, "loss": 0.7117, "step": 24617 }, { "epoch": 0.7187527371463607, "grad_norm": 0.6766534544955418, "learning_rate": 6.250121654501217e-06, "loss": 0.6804, "step": 24618 }, { "epoch": 0.718781933374208, "grad_norm": 0.6397511167981886, "learning_rate": 6.249472830494728e-06, "loss": 0.5919, "step": 24619 }, { "epoch": 0.7188111296020554, "grad_norm": 0.6066740222808136, "learning_rate": 6.248824006488241e-06, "loss": 0.5264, "step": 24620 }, { "epoch": 0.7188403258299028, "grad_norm": 0.6145082284625283, "learning_rate": 6.248175182481753e-06, "loss": 0.5978, "step": 24621 }, { "epoch": 0.7188695220577501, "grad_norm": 0.6612630220639732, "learning_rate": 6.247526358475264e-06, "loss": 0.6632, "step": 24622 }, { "epoch": 0.7188987182855975, "grad_norm": 0.651030854126349, "learning_rate": 6.246877534468776e-06, "loss": 0.5718, "step": 24623 }, { "epoch": 0.7189279145134448, "grad_norm": 0.6246494269023489, "learning_rate": 6.2462287104622875e-06, "loss": 0.5422, "step": 24624 }, { "epoch": 0.7189571107412922, "grad_norm": 0.6138349137249559, "learning_rate": 6.2455798864557995e-06, "loss": 0.5346, "step": 24625 }, { "epoch": 0.7189863069691396, "grad_norm": 0.6409214601367704, "learning_rate": 6.244931062449311e-06, "loss": 0.6075, "step": 24626 }, { "epoch": 0.7190155031969869, "grad_norm": 0.6334863829657136, "learning_rate": 6.244282238442823e-06, "loss": 0.5816, "step": 24627 }, { "epoch": 0.7190446994248343, "grad_norm": 0.6609746849101812, "learning_rate": 6.243633414436334e-06, "loss": 0.6123, "step": 24628 }, { "epoch": 0.7190738956526816, "grad_norm": 0.7237968582184131, "learning_rate": 6.242984590429847e-06, "loss": 0.7267, "step": 24629 }, { "epoch": 0.719103091880529, "grad_norm": 0.6372590879985166, "learning_rate": 6.242335766423359e-06, "loss": 0.5813, "step": 24630 }, { "epoch": 0.7191322881083764, "grad_norm": 0.6455640649372569, "learning_rate": 6.24168694241687e-06, "loss": 0.5834, "step": 24631 }, { "epoch": 0.7191614843362237, "grad_norm": 0.6245905573822175, "learning_rate": 6.241038118410382e-06, "loss": 0.5585, "step": 24632 }, { "epoch": 0.7191906805640711, "grad_norm": 0.6258821432738277, "learning_rate": 6.240389294403893e-06, "loss": 0.5762, "step": 24633 }, { "epoch": 0.7192198767919185, "grad_norm": 0.6472276196260551, "learning_rate": 6.239740470397405e-06, "loss": 0.591, "step": 24634 }, { "epoch": 0.7192490730197658, "grad_norm": 0.6046064732978188, "learning_rate": 6.239091646390916e-06, "loss": 0.518, "step": 24635 }, { "epoch": 0.7192782692476132, "grad_norm": 0.6881575795884796, "learning_rate": 6.238442822384428e-06, "loss": 0.6742, "step": 24636 }, { "epoch": 0.7193074654754605, "grad_norm": 0.6940027163067588, "learning_rate": 6.2377939983779404e-06, "loss": 0.6788, "step": 24637 }, { "epoch": 0.7193366617033079, "grad_norm": 0.6189902621182259, "learning_rate": 6.237145174371452e-06, "loss": 0.5597, "step": 24638 }, { "epoch": 0.7193658579311553, "grad_norm": 0.6406857904905102, "learning_rate": 6.2364963503649645e-06, "loss": 0.5887, "step": 24639 }, { "epoch": 0.7193950541590026, "grad_norm": 0.6330163217230678, "learning_rate": 6.2358475263584765e-06, "loss": 0.5827, "step": 24640 }, { "epoch": 0.71942425038685, "grad_norm": 0.6616370001728605, "learning_rate": 6.235198702351988e-06, "loss": 0.6155, "step": 24641 }, { "epoch": 0.7194534466146973, "grad_norm": 0.6529854121840296, "learning_rate": 6.2345498783455e-06, "loss": 0.5979, "step": 24642 }, { "epoch": 0.7194826428425447, "grad_norm": 0.6406542680069176, "learning_rate": 6.233901054339011e-06, "loss": 0.6083, "step": 24643 }, { "epoch": 0.7195118390703921, "grad_norm": 0.7255890074206103, "learning_rate": 6.233252230332523e-06, "loss": 0.6967, "step": 24644 }, { "epoch": 0.7195410352982394, "grad_norm": 0.6854324875000952, "learning_rate": 6.232603406326034e-06, "loss": 0.6612, "step": 24645 }, { "epoch": 0.7195702315260868, "grad_norm": 0.6527889172103633, "learning_rate": 6.231954582319546e-06, "loss": 0.6436, "step": 24646 }, { "epoch": 0.7195994277539342, "grad_norm": 0.6061586844813518, "learning_rate": 6.231305758313057e-06, "loss": 0.5681, "step": 24647 }, { "epoch": 0.7196286239817815, "grad_norm": 0.6177384901224688, "learning_rate": 6.23065693430657e-06, "loss": 0.5836, "step": 24648 }, { "epoch": 0.7196578202096289, "grad_norm": 0.6592904520539581, "learning_rate": 6.230008110300082e-06, "loss": 0.6516, "step": 24649 }, { "epoch": 0.7196870164374762, "grad_norm": 0.6557410020176012, "learning_rate": 6.229359286293593e-06, "loss": 0.6431, "step": 24650 }, { "epoch": 0.7197162126653236, "grad_norm": 0.6621876870064828, "learning_rate": 6.2287104622871054e-06, "loss": 0.5494, "step": 24651 }, { "epoch": 0.719745408893171, "grad_norm": 0.6401393815053925, "learning_rate": 6.228061638280617e-06, "loss": 0.5897, "step": 24652 }, { "epoch": 0.7197746051210183, "grad_norm": 0.6051131413434329, "learning_rate": 6.227412814274129e-06, "loss": 0.5419, "step": 24653 }, { "epoch": 0.7198038013488657, "grad_norm": 0.5819361064667766, "learning_rate": 6.22676399026764e-06, "loss": 0.5035, "step": 24654 }, { "epoch": 0.719832997576713, "grad_norm": 0.6213997255853501, "learning_rate": 6.226115166261152e-06, "loss": 0.6244, "step": 24655 }, { "epoch": 0.7198621938045604, "grad_norm": 0.686455238591767, "learning_rate": 6.225466342254664e-06, "loss": 0.6534, "step": 24656 }, { "epoch": 0.7198913900324078, "grad_norm": 0.6434789129740442, "learning_rate": 6.224817518248175e-06, "loss": 0.5949, "step": 24657 }, { "epoch": 0.7199205862602551, "grad_norm": 0.6704293544110731, "learning_rate": 6.224168694241688e-06, "loss": 0.6225, "step": 24658 }, { "epoch": 0.7199497824881025, "grad_norm": 0.6595279767061771, "learning_rate": 6.2235198702352e-06, "loss": 0.6415, "step": 24659 }, { "epoch": 0.7199789787159498, "grad_norm": 0.6795561200382029, "learning_rate": 6.222871046228711e-06, "loss": 0.6194, "step": 24660 }, { "epoch": 0.7200081749437972, "grad_norm": 0.6341671059431425, "learning_rate": 6.222222222222223e-06, "loss": 0.589, "step": 24661 }, { "epoch": 0.7200373711716446, "grad_norm": 0.6684942635831085, "learning_rate": 6.221573398215734e-06, "loss": 0.6691, "step": 24662 }, { "epoch": 0.7200665673994919, "grad_norm": 0.6246030614200266, "learning_rate": 6.220924574209246e-06, "loss": 0.5119, "step": 24663 }, { "epoch": 0.7200957636273394, "grad_norm": 0.6402436798014582, "learning_rate": 6.2202757502027576e-06, "loss": 0.5526, "step": 24664 }, { "epoch": 0.7201249598551868, "grad_norm": 0.6183477311613266, "learning_rate": 6.21962692619627e-06, "loss": 0.5849, "step": 24665 }, { "epoch": 0.7201541560830341, "grad_norm": 0.690205723003642, "learning_rate": 6.218978102189781e-06, "loss": 0.6536, "step": 24666 }, { "epoch": 0.7201833523108815, "grad_norm": 0.624239730475222, "learning_rate": 6.218329278183294e-06, "loss": 0.5745, "step": 24667 }, { "epoch": 0.7202125485387288, "grad_norm": 0.6912755817884273, "learning_rate": 6.217680454176806e-06, "loss": 0.7053, "step": 24668 }, { "epoch": 0.7202417447665762, "grad_norm": 0.6373715613724434, "learning_rate": 6.217031630170317e-06, "loss": 0.636, "step": 24669 }, { "epoch": 0.7202709409944236, "grad_norm": 0.6231787439306902, "learning_rate": 6.216382806163829e-06, "loss": 0.5878, "step": 24670 }, { "epoch": 0.7203001372222709, "grad_norm": 0.6563602551737291, "learning_rate": 6.21573398215734e-06, "loss": 0.6424, "step": 24671 }, { "epoch": 0.7203293334501183, "grad_norm": 0.8237589853805792, "learning_rate": 6.215085158150852e-06, "loss": 0.6847, "step": 24672 }, { "epoch": 0.7203585296779657, "grad_norm": 0.6567392298460468, "learning_rate": 6.214436334144363e-06, "loss": 0.6364, "step": 24673 }, { "epoch": 0.720387725905813, "grad_norm": 0.6764715411131281, "learning_rate": 6.213787510137875e-06, "loss": 0.6704, "step": 24674 }, { "epoch": 0.7204169221336604, "grad_norm": 0.6295087476509094, "learning_rate": 6.213138686131387e-06, "loss": 0.5611, "step": 24675 }, { "epoch": 0.7204461183615077, "grad_norm": 0.6295482875246539, "learning_rate": 6.212489862124899e-06, "loss": 0.5289, "step": 24676 }, { "epoch": 0.7204753145893551, "grad_norm": 0.661358282935688, "learning_rate": 6.211841038118411e-06, "loss": 0.5972, "step": 24677 }, { "epoch": 0.7205045108172025, "grad_norm": 0.6156498914362525, "learning_rate": 6.211192214111923e-06, "loss": 0.5486, "step": 24678 }, { "epoch": 0.7205337070450498, "grad_norm": 0.6246788024224973, "learning_rate": 6.2105433901054346e-06, "loss": 0.6113, "step": 24679 }, { "epoch": 0.7205629032728972, "grad_norm": 0.6247032978824195, "learning_rate": 6.209894566098947e-06, "loss": 0.5862, "step": 24680 }, { "epoch": 0.7205920995007445, "grad_norm": 0.6914912384607869, "learning_rate": 6.209245742092458e-06, "loss": 0.6118, "step": 24681 }, { "epoch": 0.7206212957285919, "grad_norm": 0.6721758471277044, "learning_rate": 6.20859691808597e-06, "loss": 0.6222, "step": 24682 }, { "epoch": 0.7206504919564393, "grad_norm": 0.6466827323955557, "learning_rate": 6.207948094079481e-06, "loss": 0.6033, "step": 24683 }, { "epoch": 0.7206796881842866, "grad_norm": 0.6855328153284381, "learning_rate": 6.207299270072993e-06, "loss": 0.6471, "step": 24684 }, { "epoch": 0.720708884412134, "grad_norm": 0.6869633941153723, "learning_rate": 6.206650446066504e-06, "loss": 0.7261, "step": 24685 }, { "epoch": 0.7207380806399813, "grad_norm": 0.6923350151615469, "learning_rate": 6.206001622060017e-06, "loss": 0.6669, "step": 24686 }, { "epoch": 0.7207672768678287, "grad_norm": 0.6673997954982527, "learning_rate": 6.205352798053529e-06, "loss": 0.6569, "step": 24687 }, { "epoch": 0.7207964730956761, "grad_norm": 0.6318160492705776, "learning_rate": 6.20470397404704e-06, "loss": 0.5932, "step": 24688 }, { "epoch": 0.7208256693235234, "grad_norm": 0.660486050007025, "learning_rate": 6.204055150040552e-06, "loss": 0.6376, "step": 24689 }, { "epoch": 0.7208548655513708, "grad_norm": 0.6253764441793594, "learning_rate": 6.2034063260340635e-06, "loss": 0.6041, "step": 24690 }, { "epoch": 0.7208840617792182, "grad_norm": 0.7327534131036193, "learning_rate": 6.2027575020275755e-06, "loss": 0.6591, "step": 24691 }, { "epoch": 0.7209132580070655, "grad_norm": 0.650975840372582, "learning_rate": 6.202108678021087e-06, "loss": 0.6163, "step": 24692 }, { "epoch": 0.7209424542349129, "grad_norm": 0.6170336283224894, "learning_rate": 6.201459854014599e-06, "loss": 0.5856, "step": 24693 }, { "epoch": 0.7209716504627602, "grad_norm": 0.6510576275271079, "learning_rate": 6.200811030008111e-06, "loss": 0.6303, "step": 24694 }, { "epoch": 0.7210008466906076, "grad_norm": 0.6223348926882432, "learning_rate": 6.200162206001623e-06, "loss": 0.5579, "step": 24695 }, { "epoch": 0.721030042918455, "grad_norm": 0.5933931085086321, "learning_rate": 6.199513381995135e-06, "loss": 0.5328, "step": 24696 }, { "epoch": 0.7210592391463023, "grad_norm": 0.6360352309568221, "learning_rate": 6.198864557988646e-06, "loss": 0.6059, "step": 24697 }, { "epoch": 0.7210884353741497, "grad_norm": 0.6612719560517449, "learning_rate": 6.198215733982158e-06, "loss": 0.5653, "step": 24698 }, { "epoch": 0.721117631601997, "grad_norm": 0.6649494119898478, "learning_rate": 6.19756690997567e-06, "loss": 0.6709, "step": 24699 }, { "epoch": 0.7211468278298444, "grad_norm": 0.6211077439755562, "learning_rate": 6.196918085969181e-06, "loss": 0.5751, "step": 24700 }, { "epoch": 0.7211760240576918, "grad_norm": 0.6542888109723645, "learning_rate": 6.196269261962693e-06, "loss": 0.6309, "step": 24701 }, { "epoch": 0.7212052202855391, "grad_norm": 0.6432945849273739, "learning_rate": 6.195620437956204e-06, "loss": 0.6413, "step": 24702 }, { "epoch": 0.7212344165133865, "grad_norm": 0.6442414775551788, "learning_rate": 6.1949716139497164e-06, "loss": 0.5811, "step": 24703 }, { "epoch": 0.7212636127412339, "grad_norm": 0.6355144374181876, "learning_rate": 6.194322789943228e-06, "loss": 0.6178, "step": 24704 }, { "epoch": 0.7212928089690812, "grad_norm": 0.6862927207971252, "learning_rate": 6.1936739659367405e-06, "loss": 0.6819, "step": 24705 }, { "epoch": 0.7213220051969286, "grad_norm": 0.600319986818769, "learning_rate": 6.1930251419302525e-06, "loss": 0.5514, "step": 24706 }, { "epoch": 0.7213512014247759, "grad_norm": 0.6116761264117404, "learning_rate": 6.192376317923764e-06, "loss": 0.5705, "step": 24707 }, { "epoch": 0.7213803976526233, "grad_norm": 0.6541718117116908, "learning_rate": 6.191727493917276e-06, "loss": 0.6355, "step": 24708 }, { "epoch": 0.7214095938804707, "grad_norm": 0.57105479277425, "learning_rate": 6.191078669910787e-06, "loss": 0.522, "step": 24709 }, { "epoch": 0.721438790108318, "grad_norm": 0.6193456280038929, "learning_rate": 6.190429845904299e-06, "loss": 0.5377, "step": 24710 }, { "epoch": 0.7214679863361654, "grad_norm": 0.6686675478220844, "learning_rate": 6.18978102189781e-06, "loss": 0.6232, "step": 24711 }, { "epoch": 0.7214971825640127, "grad_norm": 0.6294283340612248, "learning_rate": 6.189132197891322e-06, "loss": 0.5923, "step": 24712 }, { "epoch": 0.7215263787918601, "grad_norm": 0.6187152942780364, "learning_rate": 6.188483373884834e-06, "loss": 0.5631, "step": 24713 }, { "epoch": 0.7215555750197075, "grad_norm": 0.6397533823474764, "learning_rate": 6.187834549878346e-06, "loss": 0.6232, "step": 24714 }, { "epoch": 0.7215847712475548, "grad_norm": 0.6502082297860992, "learning_rate": 6.187185725871858e-06, "loss": 0.5979, "step": 24715 }, { "epoch": 0.7216139674754022, "grad_norm": 0.6349839959960712, "learning_rate": 6.186536901865369e-06, "loss": 0.5623, "step": 24716 }, { "epoch": 0.7216431637032495, "grad_norm": 0.6552725079837812, "learning_rate": 6.1858880778588814e-06, "loss": 0.6106, "step": 24717 }, { "epoch": 0.7216723599310969, "grad_norm": 0.6202476437076788, "learning_rate": 6.1852392538523935e-06, "loss": 0.5835, "step": 24718 }, { "epoch": 0.7217015561589443, "grad_norm": 0.6764568988737263, "learning_rate": 6.184590429845905e-06, "loss": 0.6385, "step": 24719 }, { "epoch": 0.7217307523867916, "grad_norm": 0.6420436567752729, "learning_rate": 6.183941605839417e-06, "loss": 0.5669, "step": 24720 }, { "epoch": 0.721759948614639, "grad_norm": 0.6664770419296916, "learning_rate": 6.183292781832928e-06, "loss": 0.6606, "step": 24721 }, { "epoch": 0.7217891448424864, "grad_norm": 0.6206959125010334, "learning_rate": 6.18264395782644e-06, "loss": 0.5309, "step": 24722 }, { "epoch": 0.7218183410703337, "grad_norm": 0.6254567851767492, "learning_rate": 6.181995133819951e-06, "loss": 0.5718, "step": 24723 }, { "epoch": 0.7218475372981811, "grad_norm": 0.6982271411254852, "learning_rate": 6.181346309813464e-06, "loss": 0.6743, "step": 24724 }, { "epoch": 0.7218767335260284, "grad_norm": 0.6695547638139429, "learning_rate": 6.180697485806976e-06, "loss": 0.6632, "step": 24725 }, { "epoch": 0.7219059297538758, "grad_norm": 0.7067592999031872, "learning_rate": 6.180048661800487e-06, "loss": 0.7098, "step": 24726 }, { "epoch": 0.7219351259817232, "grad_norm": 0.6669192944062021, "learning_rate": 6.179399837793999e-06, "loss": 0.647, "step": 24727 }, { "epoch": 0.7219643222095705, "grad_norm": 0.6468055671228455, "learning_rate": 6.17875101378751e-06, "loss": 0.6351, "step": 24728 }, { "epoch": 0.7219935184374179, "grad_norm": 0.6745515287067582, "learning_rate": 6.178102189781022e-06, "loss": 0.6357, "step": 24729 }, { "epoch": 0.7220227146652652, "grad_norm": 0.6638033216009981, "learning_rate": 6.1774533657745336e-06, "loss": 0.5578, "step": 24730 }, { "epoch": 0.7220519108931126, "grad_norm": 0.593342061959199, "learning_rate": 6.176804541768046e-06, "loss": 0.513, "step": 24731 }, { "epoch": 0.72208110712096, "grad_norm": 0.6897825928811715, "learning_rate": 6.176155717761558e-06, "loss": 0.686, "step": 24732 }, { "epoch": 0.7221103033488073, "grad_norm": 0.6659753937533915, "learning_rate": 6.17550689375507e-06, "loss": 0.6585, "step": 24733 }, { "epoch": 0.7221394995766547, "grad_norm": 0.6250756758026988, "learning_rate": 6.174858069748582e-06, "loss": 0.5788, "step": 24734 }, { "epoch": 0.722168695804502, "grad_norm": 0.5954896267324603, "learning_rate": 6.174209245742093e-06, "loss": 0.5525, "step": 24735 }, { "epoch": 0.7221978920323494, "grad_norm": 0.6476246413127078, "learning_rate": 6.173560421735605e-06, "loss": 0.6209, "step": 24736 }, { "epoch": 0.7222270882601968, "grad_norm": 0.6464403411160974, "learning_rate": 6.172911597729117e-06, "loss": 0.5913, "step": 24737 }, { "epoch": 0.7222562844880441, "grad_norm": 0.6738425871261214, "learning_rate": 6.172262773722628e-06, "loss": 0.7073, "step": 24738 }, { "epoch": 0.7222854807158915, "grad_norm": 0.6145982861951407, "learning_rate": 6.17161394971614e-06, "loss": 0.5725, "step": 24739 }, { "epoch": 0.7223146769437389, "grad_norm": 0.6194058792919608, "learning_rate": 6.170965125709651e-06, "loss": 0.555, "step": 24740 }, { "epoch": 0.7223438731715862, "grad_norm": 0.6266839732564243, "learning_rate": 6.170316301703163e-06, "loss": 0.5781, "step": 24741 }, { "epoch": 0.7223730693994336, "grad_norm": 0.7067955782621468, "learning_rate": 6.1696674776966745e-06, "loss": 0.6585, "step": 24742 }, { "epoch": 0.7224022656272809, "grad_norm": 0.6415867820662956, "learning_rate": 6.169018653690187e-06, "loss": 0.5915, "step": 24743 }, { "epoch": 0.7224314618551283, "grad_norm": 0.6316337510875396, "learning_rate": 6.168369829683699e-06, "loss": 0.5894, "step": 24744 }, { "epoch": 0.7224606580829757, "grad_norm": 0.6937421404346772, "learning_rate": 6.167721005677211e-06, "loss": 0.6817, "step": 24745 }, { "epoch": 0.722489854310823, "grad_norm": 0.6491187052300736, "learning_rate": 6.167072181670723e-06, "loss": 0.5972, "step": 24746 }, { "epoch": 0.7225190505386704, "grad_norm": 0.6767290378254496, "learning_rate": 6.166423357664234e-06, "loss": 0.6144, "step": 24747 }, { "epoch": 0.7225482467665177, "grad_norm": 0.6307160256823436, "learning_rate": 6.165774533657746e-06, "loss": 0.5847, "step": 24748 }, { "epoch": 0.7225774429943651, "grad_norm": 0.6845955552148781, "learning_rate": 6.165125709651257e-06, "loss": 0.6849, "step": 24749 }, { "epoch": 0.7226066392222125, "grad_norm": 0.6573075971380519, "learning_rate": 6.164476885644769e-06, "loss": 0.639, "step": 24750 }, { "epoch": 0.7226358354500598, "grad_norm": 0.6328905936907384, "learning_rate": 6.163828061638281e-06, "loss": 0.6337, "step": 24751 }, { "epoch": 0.7226650316779072, "grad_norm": 0.7589075941860317, "learning_rate": 6.163179237631793e-06, "loss": 0.6636, "step": 24752 }, { "epoch": 0.7226942279057545, "grad_norm": 0.7051253580358294, "learning_rate": 6.162530413625305e-06, "loss": 0.7176, "step": 24753 }, { "epoch": 0.7227234241336019, "grad_norm": 0.6379077567163633, "learning_rate": 6.161881589618816e-06, "loss": 0.6061, "step": 24754 }, { "epoch": 0.7227526203614493, "grad_norm": 0.6578612328191572, "learning_rate": 6.161232765612328e-06, "loss": 0.6417, "step": 24755 }, { "epoch": 0.7227818165892966, "grad_norm": 0.641394410834343, "learning_rate": 6.16058394160584e-06, "loss": 0.6403, "step": 24756 }, { "epoch": 0.722811012817144, "grad_norm": 0.6288932182930101, "learning_rate": 6.1599351175993515e-06, "loss": 0.5705, "step": 24757 }, { "epoch": 0.7228402090449914, "grad_norm": 0.6205874293228884, "learning_rate": 6.1592862935928635e-06, "loss": 0.5053, "step": 24758 }, { "epoch": 0.7228694052728387, "grad_norm": 0.6552307235946264, "learning_rate": 6.158637469586375e-06, "loss": 0.6226, "step": 24759 }, { "epoch": 0.7228986015006861, "grad_norm": 0.6671834214275, "learning_rate": 6.157988645579887e-06, "loss": 0.6388, "step": 24760 }, { "epoch": 0.7229277977285334, "grad_norm": 0.6679846219829993, "learning_rate": 6.1573398215734e-06, "loss": 0.6415, "step": 24761 }, { "epoch": 0.7229569939563808, "grad_norm": 0.623388342357767, "learning_rate": 6.156690997566911e-06, "loss": 0.5256, "step": 24762 }, { "epoch": 0.7229861901842282, "grad_norm": 0.6536761080566469, "learning_rate": 6.156042173560423e-06, "loss": 0.6138, "step": 24763 }, { "epoch": 0.7230153864120755, "grad_norm": 0.6114320972456428, "learning_rate": 6.155393349553934e-06, "loss": 0.5516, "step": 24764 }, { "epoch": 0.7230445826399229, "grad_norm": 0.6832328380398687, "learning_rate": 6.154744525547446e-06, "loss": 0.5372, "step": 24765 }, { "epoch": 0.7230737788677702, "grad_norm": 0.6882377268216685, "learning_rate": 6.154095701540957e-06, "loss": 0.6584, "step": 24766 }, { "epoch": 0.7231029750956176, "grad_norm": 0.6357164589132028, "learning_rate": 6.153446877534469e-06, "loss": 0.6006, "step": 24767 }, { "epoch": 0.723132171323465, "grad_norm": 0.6445957905034622, "learning_rate": 6.1527980535279804e-06, "loss": 0.5988, "step": 24768 }, { "epoch": 0.7231613675513123, "grad_norm": 0.6417055578931531, "learning_rate": 6.1521492295214925e-06, "loss": 0.6325, "step": 24769 }, { "epoch": 0.7231905637791597, "grad_norm": 0.6048240659389147, "learning_rate": 6.151500405515004e-06, "loss": 0.557, "step": 24770 }, { "epoch": 0.723219760007007, "grad_norm": 0.6575972010505224, "learning_rate": 6.1508515815085165e-06, "loss": 0.6331, "step": 24771 }, { "epoch": 0.7232489562348544, "grad_norm": 0.6394327354020057, "learning_rate": 6.1502027575020285e-06, "loss": 0.5835, "step": 24772 }, { "epoch": 0.7232781524627018, "grad_norm": 0.6348328089394009, "learning_rate": 6.14955393349554e-06, "loss": 0.5326, "step": 24773 }, { "epoch": 0.7233073486905491, "grad_norm": 0.6675293178098766, "learning_rate": 6.148905109489052e-06, "loss": 0.6072, "step": 24774 }, { "epoch": 0.7233365449183965, "grad_norm": 0.6259469941822077, "learning_rate": 6.148256285482564e-06, "loss": 0.5896, "step": 24775 }, { "epoch": 0.7233657411462439, "grad_norm": 0.6726929912793851, "learning_rate": 6.147607461476075e-06, "loss": 0.681, "step": 24776 }, { "epoch": 0.7233949373740912, "grad_norm": 0.6279206313176319, "learning_rate": 6.146958637469587e-06, "loss": 0.5909, "step": 24777 }, { "epoch": 0.7234241336019386, "grad_norm": 0.6221790215487045, "learning_rate": 6.146309813463098e-06, "loss": 0.5938, "step": 24778 }, { "epoch": 0.7234533298297859, "grad_norm": 0.7187096681063774, "learning_rate": 6.14566098945661e-06, "loss": 0.6129, "step": 24779 }, { "epoch": 0.7234825260576333, "grad_norm": 0.6526120908064814, "learning_rate": 6.145012165450123e-06, "loss": 0.6064, "step": 24780 }, { "epoch": 0.7235117222854807, "grad_norm": 0.6472344999397371, "learning_rate": 6.144363341443634e-06, "loss": 0.6068, "step": 24781 }, { "epoch": 0.723540918513328, "grad_norm": 0.6600012004924488, "learning_rate": 6.143714517437146e-06, "loss": 0.5994, "step": 24782 }, { "epoch": 0.7235701147411754, "grad_norm": 0.6393788816613992, "learning_rate": 6.1430656934306574e-06, "loss": 0.6227, "step": 24783 }, { "epoch": 0.7235993109690229, "grad_norm": 0.5954327898031853, "learning_rate": 6.1424168694241695e-06, "loss": 0.5337, "step": 24784 }, { "epoch": 0.7236285071968702, "grad_norm": 0.6273710214132541, "learning_rate": 6.141768045417681e-06, "loss": 0.5862, "step": 24785 }, { "epoch": 0.7236577034247176, "grad_norm": 0.6285934607405768, "learning_rate": 6.141119221411193e-06, "loss": 0.5528, "step": 24786 }, { "epoch": 0.7236868996525649, "grad_norm": 0.6005644581205981, "learning_rate": 6.140470397404704e-06, "loss": 0.5053, "step": 24787 }, { "epoch": 0.7237160958804123, "grad_norm": 0.6105283755512685, "learning_rate": 6.139821573398216e-06, "loss": 0.5215, "step": 24788 }, { "epoch": 0.7237452921082597, "grad_norm": 0.620673449886315, "learning_rate": 6.139172749391727e-06, "loss": 0.5797, "step": 24789 }, { "epoch": 0.723774488336107, "grad_norm": 0.6233503220159791, "learning_rate": 6.13852392538524e-06, "loss": 0.5372, "step": 24790 }, { "epoch": 0.7238036845639544, "grad_norm": 0.6881132264965218, "learning_rate": 6.137875101378752e-06, "loss": 0.6373, "step": 24791 }, { "epoch": 0.7238328807918017, "grad_norm": 0.6596887732959764, "learning_rate": 6.137226277372263e-06, "loss": 0.6114, "step": 24792 }, { "epoch": 0.7238620770196491, "grad_norm": 0.6359326296004765, "learning_rate": 6.136577453365775e-06, "loss": 0.5837, "step": 24793 }, { "epoch": 0.7238912732474965, "grad_norm": 0.6327127879120625, "learning_rate": 6.135928629359287e-06, "loss": 0.5483, "step": 24794 }, { "epoch": 0.7239204694753438, "grad_norm": 0.7238524944564582, "learning_rate": 6.135279805352798e-06, "loss": 0.7583, "step": 24795 }, { "epoch": 0.7239496657031912, "grad_norm": 0.6619657365642897, "learning_rate": 6.13463098134631e-06, "loss": 0.6191, "step": 24796 }, { "epoch": 0.7239788619310386, "grad_norm": 0.661865430353979, "learning_rate": 6.133982157339822e-06, "loss": 0.6089, "step": 24797 }, { "epoch": 0.7240080581588859, "grad_norm": 0.6559387131957621, "learning_rate": 6.133333333333334e-06, "loss": 0.6284, "step": 24798 }, { "epoch": 0.7240372543867333, "grad_norm": 0.6602966348519662, "learning_rate": 6.1326845093268465e-06, "loss": 0.6325, "step": 24799 }, { "epoch": 0.7240664506145806, "grad_norm": 0.7267974619817755, "learning_rate": 6.132035685320358e-06, "loss": 0.6981, "step": 24800 }, { "epoch": 0.724095646842428, "grad_norm": 0.6825774147947027, "learning_rate": 6.13138686131387e-06, "loss": 0.6203, "step": 24801 }, { "epoch": 0.7241248430702754, "grad_norm": 0.6299274065642639, "learning_rate": 6.130738037307381e-06, "loss": 0.5429, "step": 24802 }, { "epoch": 0.7241540392981227, "grad_norm": 0.5927459878126348, "learning_rate": 6.130089213300893e-06, "loss": 0.5285, "step": 24803 }, { "epoch": 0.7241832355259701, "grad_norm": 0.6169148129347481, "learning_rate": 6.129440389294404e-06, "loss": 0.5857, "step": 24804 }, { "epoch": 0.7242124317538174, "grad_norm": 0.6651353026764746, "learning_rate": 6.128791565287916e-06, "loss": 0.6037, "step": 24805 }, { "epoch": 0.7242416279816648, "grad_norm": 0.6805002906591895, "learning_rate": 6.128142741281427e-06, "loss": 0.6538, "step": 24806 }, { "epoch": 0.7242708242095122, "grad_norm": 0.6530580411360882, "learning_rate": 6.127493917274939e-06, "loss": 0.6024, "step": 24807 }, { "epoch": 0.7243000204373595, "grad_norm": 0.6208698714672, "learning_rate": 6.1268450932684505e-06, "loss": 0.5949, "step": 24808 }, { "epoch": 0.7243292166652069, "grad_norm": 0.6775310725646698, "learning_rate": 6.126196269261963e-06, "loss": 0.6581, "step": 24809 }, { "epoch": 0.7243584128930542, "grad_norm": 0.6433932352396556, "learning_rate": 6.125547445255475e-06, "loss": 0.5781, "step": 24810 }, { "epoch": 0.7243876091209016, "grad_norm": 0.6714617365665495, "learning_rate": 6.124898621248987e-06, "loss": 0.6652, "step": 24811 }, { "epoch": 0.724416805348749, "grad_norm": 0.6997829682504509, "learning_rate": 6.124249797242499e-06, "loss": 0.6625, "step": 24812 }, { "epoch": 0.7244460015765963, "grad_norm": 0.6313584542947926, "learning_rate": 6.12360097323601e-06, "loss": 0.6037, "step": 24813 }, { "epoch": 0.7244751978044437, "grad_norm": 0.6424293073483125, "learning_rate": 6.122952149229522e-06, "loss": 0.6023, "step": 24814 }, { "epoch": 0.724504394032291, "grad_norm": 0.7604677550962547, "learning_rate": 6.122303325223034e-06, "loss": 0.6697, "step": 24815 }, { "epoch": 0.7245335902601384, "grad_norm": 0.6350337765025745, "learning_rate": 6.121654501216545e-06, "loss": 0.5836, "step": 24816 }, { "epoch": 0.7245627864879858, "grad_norm": 0.655882173699884, "learning_rate": 6.121005677210057e-06, "loss": 0.6351, "step": 24817 }, { "epoch": 0.7245919827158331, "grad_norm": 0.6518437501472907, "learning_rate": 6.12035685320357e-06, "loss": 0.6116, "step": 24818 }, { "epoch": 0.7246211789436805, "grad_norm": 0.6235441844120754, "learning_rate": 6.119708029197081e-06, "loss": 0.5921, "step": 24819 }, { "epoch": 0.7246503751715279, "grad_norm": 0.7188110617467662, "learning_rate": 6.119059205190593e-06, "loss": 0.6947, "step": 24820 }, { "epoch": 0.7246795713993752, "grad_norm": 0.6270821572173985, "learning_rate": 6.118410381184104e-06, "loss": 0.5807, "step": 24821 }, { "epoch": 0.7247087676272226, "grad_norm": 0.6703104688856953, "learning_rate": 6.117761557177616e-06, "loss": 0.5919, "step": 24822 }, { "epoch": 0.7247379638550699, "grad_norm": 0.647007762755447, "learning_rate": 6.1171127331711275e-06, "loss": 0.6181, "step": 24823 }, { "epoch": 0.7247671600829173, "grad_norm": 0.6486301908814935, "learning_rate": 6.1164639091646396e-06, "loss": 0.6369, "step": 24824 }, { "epoch": 0.7247963563107647, "grad_norm": 0.8083960109107946, "learning_rate": 6.115815085158151e-06, "loss": 0.5892, "step": 24825 }, { "epoch": 0.724825552538612, "grad_norm": 0.6617051451197908, "learning_rate": 6.115166261151663e-06, "loss": 0.6288, "step": 24826 }, { "epoch": 0.7248547487664594, "grad_norm": 0.6642492846289914, "learning_rate": 6.114517437145174e-06, "loss": 0.6362, "step": 24827 }, { "epoch": 0.7248839449943068, "grad_norm": 0.61754244368212, "learning_rate": 6.113868613138687e-06, "loss": 0.5603, "step": 24828 }, { "epoch": 0.7249131412221541, "grad_norm": 0.6590332307791489, "learning_rate": 6.113219789132199e-06, "loss": 0.5881, "step": 24829 }, { "epoch": 0.7249423374500015, "grad_norm": 0.6144060879474733, "learning_rate": 6.11257096512571e-06, "loss": 0.5666, "step": 24830 }, { "epoch": 0.7249715336778488, "grad_norm": 0.6658872706464437, "learning_rate": 6.111922141119222e-06, "loss": 0.6221, "step": 24831 }, { "epoch": 0.7250007299056962, "grad_norm": 0.690682067293841, "learning_rate": 6.111273317112733e-06, "loss": 0.6252, "step": 24832 }, { "epoch": 0.7250299261335436, "grad_norm": 0.6291742821262577, "learning_rate": 6.110624493106245e-06, "loss": 0.5744, "step": 24833 }, { "epoch": 0.7250591223613909, "grad_norm": 0.6435495927315933, "learning_rate": 6.109975669099757e-06, "loss": 0.6069, "step": 24834 }, { "epoch": 0.7250883185892383, "grad_norm": 0.6056594559979293, "learning_rate": 6.1093268450932685e-06, "loss": 0.538, "step": 24835 }, { "epoch": 0.7251175148170856, "grad_norm": 0.6057465817925, "learning_rate": 6.1086780210867805e-06, "loss": 0.5338, "step": 24836 }, { "epoch": 0.725146711044933, "grad_norm": 0.5973482931351148, "learning_rate": 6.108029197080293e-06, "loss": 0.497, "step": 24837 }, { "epoch": 0.7251759072727804, "grad_norm": 0.7084499212301648, "learning_rate": 6.1073803730738045e-06, "loss": 0.6486, "step": 24838 }, { "epoch": 0.7252051035006277, "grad_norm": 0.6472618454283353, "learning_rate": 6.1067315490673166e-06, "loss": 0.6416, "step": 24839 }, { "epoch": 0.7252342997284751, "grad_norm": 0.6877102373318837, "learning_rate": 6.106082725060828e-06, "loss": 0.6355, "step": 24840 }, { "epoch": 0.7252634959563224, "grad_norm": 0.7140848138281463, "learning_rate": 6.10543390105434e-06, "loss": 0.6601, "step": 24841 }, { "epoch": 0.7252926921841698, "grad_norm": 0.6539695832038613, "learning_rate": 6.104785077047851e-06, "loss": 0.6593, "step": 24842 }, { "epoch": 0.7253218884120172, "grad_norm": 0.6652049812836044, "learning_rate": 6.104136253041363e-06, "loss": 0.6035, "step": 24843 }, { "epoch": 0.7253510846398645, "grad_norm": 0.6165381146661603, "learning_rate": 6.103487429034874e-06, "loss": 0.5613, "step": 24844 }, { "epoch": 0.7253802808677119, "grad_norm": 0.6430954358287414, "learning_rate": 6.102838605028386e-06, "loss": 0.6417, "step": 24845 }, { "epoch": 0.7254094770955593, "grad_norm": 0.6278911320926646, "learning_rate": 6.102189781021899e-06, "loss": 0.5838, "step": 24846 }, { "epoch": 0.7254386733234066, "grad_norm": 0.646935828489615, "learning_rate": 6.10154095701541e-06, "loss": 0.6184, "step": 24847 }, { "epoch": 0.725467869551254, "grad_norm": 0.6204749418373523, "learning_rate": 6.100892133008922e-06, "loss": 0.5635, "step": 24848 }, { "epoch": 0.7254970657791013, "grad_norm": 0.6554515893147438, "learning_rate": 6.1002433090024335e-06, "loss": 0.616, "step": 24849 }, { "epoch": 0.7255262620069487, "grad_norm": 0.6308925906333679, "learning_rate": 6.0995944849959455e-06, "loss": 0.5665, "step": 24850 }, { "epoch": 0.7255554582347961, "grad_norm": 0.6534102535206463, "learning_rate": 6.098945660989457e-06, "loss": 0.5669, "step": 24851 }, { "epoch": 0.7255846544626434, "grad_norm": 0.6945497006019984, "learning_rate": 6.098296836982969e-06, "loss": 0.5898, "step": 24852 }, { "epoch": 0.7256138506904908, "grad_norm": 0.6343782079183404, "learning_rate": 6.097648012976481e-06, "loss": 0.6211, "step": 24853 }, { "epoch": 0.7256430469183381, "grad_norm": 0.6662221005768719, "learning_rate": 6.096999188969992e-06, "loss": 0.6336, "step": 24854 }, { "epoch": 0.7256722431461855, "grad_norm": 0.6160749872278986, "learning_rate": 6.096350364963504e-06, "loss": 0.5443, "step": 24855 }, { "epoch": 0.7257014393740329, "grad_norm": 0.6304675991506833, "learning_rate": 6.095701540957017e-06, "loss": 0.5881, "step": 24856 }, { "epoch": 0.7257306356018802, "grad_norm": 0.6163565611053056, "learning_rate": 6.095052716950528e-06, "loss": 0.5887, "step": 24857 }, { "epoch": 0.7257598318297276, "grad_norm": 0.6406436615048076, "learning_rate": 6.09440389294404e-06, "loss": 0.6123, "step": 24858 }, { "epoch": 0.725789028057575, "grad_norm": 0.6418532807603953, "learning_rate": 6.093755068937551e-06, "loss": 0.5598, "step": 24859 }, { "epoch": 0.7258182242854223, "grad_norm": 0.6421951511656676, "learning_rate": 6.093106244931063e-06, "loss": 0.6174, "step": 24860 }, { "epoch": 0.7258474205132697, "grad_norm": 0.6660961660096483, "learning_rate": 6.092457420924574e-06, "loss": 0.5881, "step": 24861 }, { "epoch": 0.725876616741117, "grad_norm": 0.6274487671778803, "learning_rate": 6.091808596918086e-06, "loss": 0.597, "step": 24862 }, { "epoch": 0.7259058129689644, "grad_norm": 0.6877055468477541, "learning_rate": 6.091159772911598e-06, "loss": 0.6601, "step": 24863 }, { "epoch": 0.7259350091968118, "grad_norm": 0.56776311195531, "learning_rate": 6.09051094890511e-06, "loss": 0.4454, "step": 24864 }, { "epoch": 0.7259642054246591, "grad_norm": 0.7145634728780162, "learning_rate": 6.0898621248986225e-06, "loss": 0.6855, "step": 24865 }, { "epoch": 0.7259934016525065, "grad_norm": 0.6641282383581354, "learning_rate": 6.089213300892134e-06, "loss": 0.5973, "step": 24866 }, { "epoch": 0.7260225978803538, "grad_norm": 0.6342278706107858, "learning_rate": 6.088564476885646e-06, "loss": 0.6218, "step": 24867 }, { "epoch": 0.7260517941082012, "grad_norm": 0.6726746719974268, "learning_rate": 6.087915652879157e-06, "loss": 0.6042, "step": 24868 }, { "epoch": 0.7260809903360486, "grad_norm": 0.6536690479403485, "learning_rate": 6.087266828872669e-06, "loss": 0.5337, "step": 24869 }, { "epoch": 0.7261101865638959, "grad_norm": 0.5939930207063686, "learning_rate": 6.08661800486618e-06, "loss": 0.495, "step": 24870 }, { "epoch": 0.7261393827917433, "grad_norm": 0.5953935389569897, "learning_rate": 6.085969180859692e-06, "loss": 0.5657, "step": 24871 }, { "epoch": 0.7261685790195906, "grad_norm": 0.6085957113473457, "learning_rate": 6.085320356853204e-06, "loss": 0.5395, "step": 24872 }, { "epoch": 0.726197775247438, "grad_norm": 0.5895967964936246, "learning_rate": 6.084671532846715e-06, "loss": 0.5359, "step": 24873 }, { "epoch": 0.7262269714752854, "grad_norm": 0.6131993171423726, "learning_rate": 6.084022708840227e-06, "loss": 0.5336, "step": 24874 }, { "epoch": 0.7262561677031327, "grad_norm": 0.6785577158044844, "learning_rate": 6.083373884833739e-06, "loss": 0.6444, "step": 24875 }, { "epoch": 0.7262853639309801, "grad_norm": 0.6366900201009843, "learning_rate": 6.082725060827251e-06, "loss": 0.5862, "step": 24876 }, { "epoch": 0.7263145601588274, "grad_norm": 0.6486944132126905, "learning_rate": 6.0820762368207634e-06, "loss": 0.5452, "step": 24877 }, { "epoch": 0.7263437563866748, "grad_norm": 0.6198116991980492, "learning_rate": 6.081427412814275e-06, "loss": 0.5609, "step": 24878 }, { "epoch": 0.7263729526145222, "grad_norm": 0.6383296907301439, "learning_rate": 6.080778588807787e-06, "loss": 0.6062, "step": 24879 }, { "epoch": 0.7264021488423695, "grad_norm": 0.6860215054309987, "learning_rate": 6.080129764801298e-06, "loss": 0.6806, "step": 24880 }, { "epoch": 0.7264313450702169, "grad_norm": 0.6147045986341412, "learning_rate": 6.07948094079481e-06, "loss": 0.5506, "step": 24881 }, { "epoch": 0.7264605412980643, "grad_norm": 0.6549784687878667, "learning_rate": 6.078832116788321e-06, "loss": 0.6456, "step": 24882 }, { "epoch": 0.7264897375259116, "grad_norm": 0.6036743118980212, "learning_rate": 6.078183292781833e-06, "loss": 0.5513, "step": 24883 }, { "epoch": 0.726518933753759, "grad_norm": 0.6191933711019306, "learning_rate": 6.077534468775346e-06, "loss": 0.6007, "step": 24884 }, { "epoch": 0.7265481299816063, "grad_norm": 0.6319843919430159, "learning_rate": 6.076885644768857e-06, "loss": 0.5813, "step": 24885 }, { "epoch": 0.7265773262094537, "grad_norm": 0.6749313360112491, "learning_rate": 6.076236820762369e-06, "loss": 0.6342, "step": 24886 }, { "epoch": 0.7266065224373011, "grad_norm": 0.6697088986881767, "learning_rate": 6.07558799675588e-06, "loss": 0.6418, "step": 24887 }, { "epoch": 0.7266357186651484, "grad_norm": 0.6107985382308442, "learning_rate": 6.074939172749392e-06, "loss": 0.5602, "step": 24888 }, { "epoch": 0.7266649148929958, "grad_norm": 0.6157211351090985, "learning_rate": 6.0742903487429035e-06, "loss": 0.5672, "step": 24889 }, { "epoch": 0.7266941111208431, "grad_norm": 0.7157098948575179, "learning_rate": 6.0736415247364156e-06, "loss": 0.6485, "step": 24890 }, { "epoch": 0.7267233073486905, "grad_norm": 0.6783650659224334, "learning_rate": 6.072992700729928e-06, "loss": 0.6334, "step": 24891 }, { "epoch": 0.7267525035765379, "grad_norm": 0.7123548223214258, "learning_rate": 6.072343876723439e-06, "loss": 0.6744, "step": 24892 }, { "epoch": 0.7267816998043852, "grad_norm": 0.6889604175698457, "learning_rate": 6.071695052716951e-06, "loss": 0.6894, "step": 24893 }, { "epoch": 0.7268108960322326, "grad_norm": 0.6512343391138672, "learning_rate": 6.071046228710463e-06, "loss": 0.6503, "step": 24894 }, { "epoch": 0.72684009226008, "grad_norm": 0.6217579675266313, "learning_rate": 6.070397404703975e-06, "loss": 0.5801, "step": 24895 }, { "epoch": 0.7268692884879273, "grad_norm": 0.6647402544019041, "learning_rate": 6.069748580697487e-06, "loss": 0.6285, "step": 24896 }, { "epoch": 0.7268984847157747, "grad_norm": 0.6055852427960335, "learning_rate": 6.069099756690998e-06, "loss": 0.5209, "step": 24897 }, { "epoch": 0.726927680943622, "grad_norm": 0.6029435356879256, "learning_rate": 6.06845093268451e-06, "loss": 0.5454, "step": 24898 }, { "epoch": 0.7269568771714694, "grad_norm": 0.639669879111815, "learning_rate": 6.067802108678021e-06, "loss": 0.5784, "step": 24899 }, { "epoch": 0.7269860733993168, "grad_norm": 0.6410112113587487, "learning_rate": 6.067153284671533e-06, "loss": 0.593, "step": 24900 }, { "epoch": 0.7270152696271641, "grad_norm": 0.6261297800522195, "learning_rate": 6.0665044606650445e-06, "loss": 0.554, "step": 24901 }, { "epoch": 0.7270444658550115, "grad_norm": 0.6271833137392271, "learning_rate": 6.0658556366585565e-06, "loss": 0.5561, "step": 24902 }, { "epoch": 0.7270736620828588, "grad_norm": 0.6647327445138496, "learning_rate": 6.065206812652069e-06, "loss": 0.6274, "step": 24903 }, { "epoch": 0.7271028583107062, "grad_norm": 0.6667812931856629, "learning_rate": 6.0645579886455806e-06, "loss": 0.6487, "step": 24904 }, { "epoch": 0.7271320545385537, "grad_norm": 0.639484126075396, "learning_rate": 6.063909164639093e-06, "loss": 0.6097, "step": 24905 }, { "epoch": 0.727161250766401, "grad_norm": 0.6914928623218146, "learning_rate": 6.063260340632604e-06, "loss": 0.6763, "step": 24906 }, { "epoch": 0.7271904469942484, "grad_norm": 0.6697901921733598, "learning_rate": 6.062611516626116e-06, "loss": 0.6549, "step": 24907 }, { "epoch": 0.7272196432220958, "grad_norm": 0.5945993522642997, "learning_rate": 6.061962692619627e-06, "loss": 0.541, "step": 24908 }, { "epoch": 0.7272488394499431, "grad_norm": 0.6103393086636612, "learning_rate": 6.061313868613139e-06, "loss": 0.5306, "step": 24909 }, { "epoch": 0.7272780356777905, "grad_norm": 0.6239617337320735, "learning_rate": 6.060665044606651e-06, "loss": 0.5459, "step": 24910 }, { "epoch": 0.7273072319056378, "grad_norm": 0.5917042276409964, "learning_rate": 6.060016220600162e-06, "loss": 0.5394, "step": 24911 }, { "epoch": 0.7273364281334852, "grad_norm": 0.6604574045230276, "learning_rate": 6.059367396593675e-06, "loss": 0.621, "step": 24912 }, { "epoch": 0.7273656243613326, "grad_norm": 0.6699296650450908, "learning_rate": 6.058718572587186e-06, "loss": 0.6264, "step": 24913 }, { "epoch": 0.7273948205891799, "grad_norm": 0.6692847321565669, "learning_rate": 6.058069748580698e-06, "loss": 0.6501, "step": 24914 }, { "epoch": 0.7274240168170273, "grad_norm": 0.7044510220553609, "learning_rate": 6.05742092457421e-06, "loss": 0.5823, "step": 24915 }, { "epoch": 0.7274532130448746, "grad_norm": 0.611592838593064, "learning_rate": 6.0567721005677215e-06, "loss": 0.567, "step": 24916 }, { "epoch": 0.727482409272722, "grad_norm": 0.6345516229014888, "learning_rate": 6.0561232765612335e-06, "loss": 0.515, "step": 24917 }, { "epoch": 0.7275116055005694, "grad_norm": 0.6405821711599506, "learning_rate": 6.055474452554745e-06, "loss": 0.5623, "step": 24918 }, { "epoch": 0.7275408017284167, "grad_norm": 0.6435796268771884, "learning_rate": 6.054825628548257e-06, "loss": 0.6163, "step": 24919 }, { "epoch": 0.7275699979562641, "grad_norm": 0.683813745798957, "learning_rate": 6.054176804541768e-06, "loss": 0.6841, "step": 24920 }, { "epoch": 0.7275991941841115, "grad_norm": 0.6105110382525969, "learning_rate": 6.05352798053528e-06, "loss": 0.57, "step": 24921 }, { "epoch": 0.7276283904119588, "grad_norm": 0.6086769816721724, "learning_rate": 6.052879156528793e-06, "loss": 0.5688, "step": 24922 }, { "epoch": 0.7276575866398062, "grad_norm": 0.6393435750733065, "learning_rate": 6.052230332522304e-06, "loss": 0.5837, "step": 24923 }, { "epoch": 0.7276867828676535, "grad_norm": 0.6658395253147495, "learning_rate": 6.051581508515816e-06, "loss": 0.6709, "step": 24924 }, { "epoch": 0.7277159790955009, "grad_norm": 0.6346842443462565, "learning_rate": 6.050932684509327e-06, "loss": 0.6342, "step": 24925 }, { "epoch": 0.7277451753233483, "grad_norm": 0.6859251391380146, "learning_rate": 6.050283860502839e-06, "loss": 0.6074, "step": 24926 }, { "epoch": 0.7277743715511956, "grad_norm": 0.6033906794401395, "learning_rate": 6.04963503649635e-06, "loss": 0.5468, "step": 24927 }, { "epoch": 0.727803567779043, "grad_norm": 0.6406577770507881, "learning_rate": 6.0489862124898624e-06, "loss": 0.6119, "step": 24928 }, { "epoch": 0.7278327640068903, "grad_norm": 0.6259894201160412, "learning_rate": 6.0483373884833745e-06, "loss": 0.6033, "step": 24929 }, { "epoch": 0.7278619602347377, "grad_norm": 0.6745971560127142, "learning_rate": 6.047688564476886e-06, "loss": 0.6631, "step": 24930 }, { "epoch": 0.7278911564625851, "grad_norm": 0.5929083845781974, "learning_rate": 6.0470397404703985e-06, "loss": 0.5022, "step": 24931 }, { "epoch": 0.7279203526904324, "grad_norm": 0.6774390540337729, "learning_rate": 6.04639091646391e-06, "loss": 0.6108, "step": 24932 }, { "epoch": 0.7279495489182798, "grad_norm": 0.6314790516466686, "learning_rate": 6.045742092457422e-06, "loss": 0.5836, "step": 24933 }, { "epoch": 0.7279787451461271, "grad_norm": 0.6349815328372664, "learning_rate": 6.045093268450934e-06, "loss": 0.5962, "step": 24934 }, { "epoch": 0.7280079413739745, "grad_norm": 0.6350681310862291, "learning_rate": 6.044444444444445e-06, "loss": 0.5905, "step": 24935 }, { "epoch": 0.7280371376018219, "grad_norm": 0.6980186006864048, "learning_rate": 6.043795620437957e-06, "loss": 0.6353, "step": 24936 }, { "epoch": 0.7280663338296692, "grad_norm": 0.6048975681029354, "learning_rate": 6.043146796431468e-06, "loss": 0.5465, "step": 24937 }, { "epoch": 0.7280955300575166, "grad_norm": 0.6211123136542116, "learning_rate": 6.04249797242498e-06, "loss": 0.5777, "step": 24938 }, { "epoch": 0.728124726285364, "grad_norm": 0.5930469846513879, "learning_rate": 6.041849148418491e-06, "loss": 0.4949, "step": 24939 }, { "epoch": 0.7281539225132113, "grad_norm": 0.644290766613654, "learning_rate": 6.041200324412003e-06, "loss": 0.6034, "step": 24940 }, { "epoch": 0.7281831187410587, "grad_norm": 0.6543016637089499, "learning_rate": 6.040551500405516e-06, "loss": 0.6592, "step": 24941 }, { "epoch": 0.728212314968906, "grad_norm": 0.7089469810587108, "learning_rate": 6.039902676399027e-06, "loss": 0.6125, "step": 24942 }, { "epoch": 0.7282415111967534, "grad_norm": 0.707934597240652, "learning_rate": 6.0392538523925394e-06, "loss": 0.7412, "step": 24943 }, { "epoch": 0.7282707074246008, "grad_norm": 0.6190740673692305, "learning_rate": 6.038605028386051e-06, "loss": 0.5735, "step": 24944 }, { "epoch": 0.7282999036524481, "grad_norm": 0.5774558045104189, "learning_rate": 6.037956204379563e-06, "loss": 0.4996, "step": 24945 }, { "epoch": 0.7283290998802955, "grad_norm": 0.7297195781152114, "learning_rate": 6.037307380373074e-06, "loss": 0.711, "step": 24946 }, { "epoch": 0.7283582961081428, "grad_norm": 0.6219616340767798, "learning_rate": 6.036658556366586e-06, "loss": 0.5339, "step": 24947 }, { "epoch": 0.7283874923359902, "grad_norm": 0.6472417167997321, "learning_rate": 6.036009732360097e-06, "loss": 0.6311, "step": 24948 }, { "epoch": 0.7284166885638376, "grad_norm": 0.7318817255406181, "learning_rate": 6.035360908353609e-06, "loss": 0.6656, "step": 24949 }, { "epoch": 0.7284458847916849, "grad_norm": 0.6592575866960119, "learning_rate": 6.034712084347122e-06, "loss": 0.6028, "step": 24950 }, { "epoch": 0.7284750810195323, "grad_norm": 0.7003854783181708, "learning_rate": 6.034063260340633e-06, "loss": 0.6962, "step": 24951 }, { "epoch": 0.7285042772473797, "grad_norm": 0.6863143390361942, "learning_rate": 6.033414436334145e-06, "loss": 0.6664, "step": 24952 }, { "epoch": 0.728533473475227, "grad_norm": 0.5905969485975269, "learning_rate": 6.032765612327657e-06, "loss": 0.5387, "step": 24953 }, { "epoch": 0.7285626697030744, "grad_norm": 0.6470633491118182, "learning_rate": 6.032116788321168e-06, "loss": 0.5951, "step": 24954 }, { "epoch": 0.7285918659309217, "grad_norm": 0.6125585142224655, "learning_rate": 6.03146796431468e-06, "loss": 0.5721, "step": 24955 }, { "epoch": 0.7286210621587691, "grad_norm": 0.645041517702136, "learning_rate": 6.0308191403081916e-06, "loss": 0.5993, "step": 24956 }, { "epoch": 0.7286502583866165, "grad_norm": 0.6309788438931322, "learning_rate": 6.030170316301704e-06, "loss": 0.576, "step": 24957 }, { "epoch": 0.7286794546144638, "grad_norm": 0.7193908666819342, "learning_rate": 6.029521492295215e-06, "loss": 0.6255, "step": 24958 }, { "epoch": 0.7287086508423112, "grad_norm": 0.6533156680701009, "learning_rate": 6.028872668288727e-06, "loss": 0.6181, "step": 24959 }, { "epoch": 0.7287378470701585, "grad_norm": 0.6006749159305109, "learning_rate": 6.02822384428224e-06, "loss": 0.5382, "step": 24960 }, { "epoch": 0.7287670432980059, "grad_norm": 0.7013070736482193, "learning_rate": 6.027575020275751e-06, "loss": 0.6419, "step": 24961 }, { "epoch": 0.7287962395258533, "grad_norm": 0.6370945789170289, "learning_rate": 6.026926196269263e-06, "loss": 0.6298, "step": 24962 }, { "epoch": 0.7288254357537006, "grad_norm": 0.6495658556744991, "learning_rate": 6.026277372262774e-06, "loss": 0.5659, "step": 24963 }, { "epoch": 0.728854631981548, "grad_norm": 0.6345263657750883, "learning_rate": 6.025628548256286e-06, "loss": 0.6184, "step": 24964 }, { "epoch": 0.7288838282093953, "grad_norm": 0.6483381784950197, "learning_rate": 6.024979724249797e-06, "loss": 0.6278, "step": 24965 }, { "epoch": 0.7289130244372427, "grad_norm": 0.6581887483147968, "learning_rate": 6.024330900243309e-06, "loss": 0.6568, "step": 24966 }, { "epoch": 0.7289422206650901, "grad_norm": 0.6286788630996281, "learning_rate": 6.0236820762368205e-06, "loss": 0.58, "step": 24967 }, { "epoch": 0.7289714168929374, "grad_norm": 0.6709650019025364, "learning_rate": 6.0230332522303325e-06, "loss": 0.6424, "step": 24968 }, { "epoch": 0.7290006131207848, "grad_norm": 0.6871230129478716, "learning_rate": 6.022384428223845e-06, "loss": 0.6749, "step": 24969 }, { "epoch": 0.7290298093486322, "grad_norm": 0.6525845965941018, "learning_rate": 6.0217356042173566e-06, "loss": 0.6574, "step": 24970 }, { "epoch": 0.7290590055764795, "grad_norm": 0.6207024415124323, "learning_rate": 6.021086780210869e-06, "loss": 0.5345, "step": 24971 }, { "epoch": 0.7290882018043269, "grad_norm": 0.6187375945714206, "learning_rate": 6.020437956204381e-06, "loss": 0.5211, "step": 24972 }, { "epoch": 0.7291173980321742, "grad_norm": 0.6169294703196028, "learning_rate": 6.019789132197892e-06, "loss": 0.5544, "step": 24973 }, { "epoch": 0.7291465942600216, "grad_norm": 0.6288386428161798, "learning_rate": 6.019140308191404e-06, "loss": 0.5572, "step": 24974 }, { "epoch": 0.729175790487869, "grad_norm": 0.6275844287576386, "learning_rate": 6.018491484184915e-06, "loss": 0.5599, "step": 24975 }, { "epoch": 0.7292049867157163, "grad_norm": 0.6583459454486598, "learning_rate": 6.017842660178427e-06, "loss": 0.6213, "step": 24976 }, { "epoch": 0.7292341829435637, "grad_norm": 0.5896020118228739, "learning_rate": 6.017193836171938e-06, "loss": 0.4885, "step": 24977 }, { "epoch": 0.729263379171411, "grad_norm": 0.6883828378820581, "learning_rate": 6.01654501216545e-06, "loss": 0.6761, "step": 24978 }, { "epoch": 0.7292925753992584, "grad_norm": 0.6231463514800304, "learning_rate": 6.015896188158963e-06, "loss": 0.6099, "step": 24979 }, { "epoch": 0.7293217716271058, "grad_norm": 0.5959649603883189, "learning_rate": 6.015247364152474e-06, "loss": 0.5155, "step": 24980 }, { "epoch": 0.7293509678549531, "grad_norm": 0.6100723082381426, "learning_rate": 6.014598540145986e-06, "loss": 0.554, "step": 24981 }, { "epoch": 0.7293801640828005, "grad_norm": 0.591449194317502, "learning_rate": 6.0139497161394975e-06, "loss": 0.5082, "step": 24982 }, { "epoch": 0.7294093603106478, "grad_norm": 0.6247799856111709, "learning_rate": 6.0133008921330095e-06, "loss": 0.584, "step": 24983 }, { "epoch": 0.7294385565384952, "grad_norm": 0.6641076453749359, "learning_rate": 6.012652068126521e-06, "loss": 0.6075, "step": 24984 }, { "epoch": 0.7294677527663426, "grad_norm": 0.5999793703206107, "learning_rate": 6.012003244120033e-06, "loss": 0.5395, "step": 24985 }, { "epoch": 0.7294969489941899, "grad_norm": 0.6260093203458773, "learning_rate": 6.011354420113544e-06, "loss": 0.5805, "step": 24986 }, { "epoch": 0.7295261452220373, "grad_norm": 0.6529198828801427, "learning_rate": 6.010705596107056e-06, "loss": 0.6125, "step": 24987 }, { "epoch": 0.7295553414498847, "grad_norm": 0.667095868905292, "learning_rate": 6.010056772100569e-06, "loss": 0.5883, "step": 24988 }, { "epoch": 0.729584537677732, "grad_norm": 0.6751934044263722, "learning_rate": 6.00940794809408e-06, "loss": 0.6774, "step": 24989 }, { "epoch": 0.7296137339055794, "grad_norm": 0.6812183615647333, "learning_rate": 6.008759124087592e-06, "loss": 0.652, "step": 24990 }, { "epoch": 0.7296429301334267, "grad_norm": 0.6335219346835085, "learning_rate": 6.008110300081104e-06, "loss": 0.6036, "step": 24991 }, { "epoch": 0.7296721263612741, "grad_norm": 0.6649294598727518, "learning_rate": 6.007461476074615e-06, "loss": 0.6398, "step": 24992 }, { "epoch": 0.7297013225891215, "grad_norm": 0.6333961977637212, "learning_rate": 6.006812652068127e-06, "loss": 0.5858, "step": 24993 }, { "epoch": 0.7297305188169688, "grad_norm": 0.6702635376085314, "learning_rate": 6.0061638280616384e-06, "loss": 0.6217, "step": 24994 }, { "epoch": 0.7297597150448162, "grad_norm": 0.6439599111020023, "learning_rate": 6.0055150040551505e-06, "loss": 0.5939, "step": 24995 }, { "epoch": 0.7297889112726635, "grad_norm": 0.6660607938492583, "learning_rate": 6.004866180048662e-06, "loss": 0.6142, "step": 24996 }, { "epoch": 0.7298181075005109, "grad_norm": 0.6513372216720855, "learning_rate": 6.0042173560421745e-06, "loss": 0.6062, "step": 24997 }, { "epoch": 0.7298473037283583, "grad_norm": 0.6343718793317932, "learning_rate": 6.0035685320356865e-06, "loss": 0.5195, "step": 24998 }, { "epoch": 0.7298764999562056, "grad_norm": 0.665527831680332, "learning_rate": 6.002919708029198e-06, "loss": 0.6014, "step": 24999 }, { "epoch": 0.729905696184053, "grad_norm": 0.6772030555588331, "learning_rate": 6.00227088402271e-06, "loss": 0.6698, "step": 25000 }, { "epoch": 0.7299348924119003, "grad_norm": 0.6155584558454354, "learning_rate": 6.001622060016221e-06, "loss": 0.5616, "step": 25001 }, { "epoch": 0.7299640886397477, "grad_norm": 0.6311116022962238, "learning_rate": 6.000973236009733e-06, "loss": 0.6036, "step": 25002 }, { "epoch": 0.7299932848675951, "grad_norm": 0.6031340402854138, "learning_rate": 6.000324412003244e-06, "loss": 0.5498, "step": 25003 }, { "epoch": 0.7300224810954424, "grad_norm": 0.6714642854778369, "learning_rate": 5.999675587996756e-06, "loss": 0.6316, "step": 25004 }, { "epoch": 0.7300516773232898, "grad_norm": 0.6360639327581444, "learning_rate": 5.999026763990267e-06, "loss": 0.5707, "step": 25005 }, { "epoch": 0.7300808735511372, "grad_norm": 0.646622176765761, "learning_rate": 5.998377939983779e-06, "loss": 0.6456, "step": 25006 }, { "epoch": 0.7301100697789845, "grad_norm": 0.656072823281976, "learning_rate": 5.997729115977292e-06, "loss": 0.646, "step": 25007 }, { "epoch": 0.7301392660068319, "grad_norm": 0.6207992242406087, "learning_rate": 5.9970802919708034e-06, "loss": 0.5913, "step": 25008 }, { "epoch": 0.7301684622346792, "grad_norm": 0.6409644013052503, "learning_rate": 5.9964314679643155e-06, "loss": 0.5713, "step": 25009 }, { "epoch": 0.7301976584625266, "grad_norm": 0.7152733896106961, "learning_rate": 5.995782643957827e-06, "loss": 0.7034, "step": 25010 }, { "epoch": 0.730226854690374, "grad_norm": 0.5873038586984715, "learning_rate": 5.995133819951339e-06, "loss": 0.4832, "step": 25011 }, { "epoch": 0.7302560509182213, "grad_norm": 0.7042725792957417, "learning_rate": 5.994484995944851e-06, "loss": 0.7006, "step": 25012 }, { "epoch": 0.7302852471460687, "grad_norm": 0.6485355944006764, "learning_rate": 5.993836171938362e-06, "loss": 0.6444, "step": 25013 }, { "epoch": 0.730314443373916, "grad_norm": 0.6606048497855209, "learning_rate": 5.993187347931874e-06, "loss": 0.5996, "step": 25014 }, { "epoch": 0.7303436396017634, "grad_norm": 0.6525889393421772, "learning_rate": 5.992538523925385e-06, "loss": 0.6193, "step": 25015 }, { "epoch": 0.7303728358296108, "grad_norm": 0.5605726455523533, "learning_rate": 5.991889699918898e-06, "loss": 0.4576, "step": 25016 }, { "epoch": 0.7304020320574581, "grad_norm": 0.6303525253505217, "learning_rate": 5.99124087591241e-06, "loss": 0.5828, "step": 25017 }, { "epoch": 0.7304312282853055, "grad_norm": 0.6951398687559941, "learning_rate": 5.990592051905921e-06, "loss": 0.686, "step": 25018 }, { "epoch": 0.7304604245131529, "grad_norm": 0.6332541303645565, "learning_rate": 5.989943227899433e-06, "loss": 0.5667, "step": 25019 }, { "epoch": 0.7304896207410002, "grad_norm": 0.6849388318417254, "learning_rate": 5.989294403892944e-06, "loss": 0.616, "step": 25020 }, { "epoch": 0.7305188169688476, "grad_norm": 0.6366552896869921, "learning_rate": 5.988645579886456e-06, "loss": 0.5608, "step": 25021 }, { "epoch": 0.7305480131966949, "grad_norm": 0.6591252656131686, "learning_rate": 5.9879967558799676e-06, "loss": 0.686, "step": 25022 }, { "epoch": 0.7305772094245423, "grad_norm": 0.6195082409028229, "learning_rate": 5.98734793187348e-06, "loss": 0.5443, "step": 25023 }, { "epoch": 0.7306064056523897, "grad_norm": 0.6621463129137256, "learning_rate": 5.986699107866991e-06, "loss": 0.6327, "step": 25024 }, { "epoch": 0.7306356018802371, "grad_norm": 0.626737452396479, "learning_rate": 5.986050283860503e-06, "loss": 0.5907, "step": 25025 }, { "epoch": 0.7306647981080845, "grad_norm": 0.6309491299462543, "learning_rate": 5.985401459854016e-06, "loss": 0.6023, "step": 25026 }, { "epoch": 0.7306939943359319, "grad_norm": 0.6154203629017597, "learning_rate": 5.984752635847527e-06, "loss": 0.5405, "step": 25027 }, { "epoch": 0.7307231905637792, "grad_norm": 0.6561646470471747, "learning_rate": 5.984103811841039e-06, "loss": 0.5925, "step": 25028 }, { "epoch": 0.7307523867916266, "grad_norm": 0.6458149884386608, "learning_rate": 5.98345498783455e-06, "loss": 0.6242, "step": 25029 }, { "epoch": 0.7307815830194739, "grad_norm": 0.612229684089857, "learning_rate": 5.982806163828062e-06, "loss": 0.5563, "step": 25030 }, { "epoch": 0.7308107792473213, "grad_norm": 0.6714595216080499, "learning_rate": 5.982157339821574e-06, "loss": 0.6615, "step": 25031 }, { "epoch": 0.7308399754751687, "grad_norm": 0.6575580877621741, "learning_rate": 5.981508515815085e-06, "loss": 0.6076, "step": 25032 }, { "epoch": 0.730869171703016, "grad_norm": 0.6562980860936863, "learning_rate": 5.980859691808597e-06, "loss": 0.6186, "step": 25033 }, { "epoch": 0.7308983679308634, "grad_norm": 0.787819540267694, "learning_rate": 5.9802108678021085e-06, "loss": 0.5956, "step": 25034 }, { "epoch": 0.7309275641587107, "grad_norm": 0.6573252231160694, "learning_rate": 5.979562043795621e-06, "loss": 0.6095, "step": 25035 }, { "epoch": 0.7309567603865581, "grad_norm": 0.5891119256181422, "learning_rate": 5.978913219789133e-06, "loss": 0.5469, "step": 25036 }, { "epoch": 0.7309859566144055, "grad_norm": 0.7204971180706984, "learning_rate": 5.978264395782645e-06, "loss": 0.6492, "step": 25037 }, { "epoch": 0.7310151528422528, "grad_norm": 0.6692062409822978, "learning_rate": 5.977615571776157e-06, "loss": 0.6527, "step": 25038 }, { "epoch": 0.7310443490701002, "grad_norm": 0.6581337328777779, "learning_rate": 5.976966747769668e-06, "loss": 0.6057, "step": 25039 }, { "epoch": 0.7310735452979475, "grad_norm": 0.6988398134983703, "learning_rate": 5.97631792376318e-06, "loss": 0.6909, "step": 25040 }, { "epoch": 0.7311027415257949, "grad_norm": 0.6115098211103304, "learning_rate": 5.975669099756691e-06, "loss": 0.5762, "step": 25041 }, { "epoch": 0.7311319377536423, "grad_norm": 0.6893878666689023, "learning_rate": 5.975020275750203e-06, "loss": 0.6871, "step": 25042 }, { "epoch": 0.7311611339814896, "grad_norm": 0.64707577572041, "learning_rate": 5.974371451743714e-06, "loss": 0.5941, "step": 25043 }, { "epoch": 0.731190330209337, "grad_norm": 0.6730408575556793, "learning_rate": 5.973722627737226e-06, "loss": 0.6635, "step": 25044 }, { "epoch": 0.7312195264371844, "grad_norm": 0.6150623127954973, "learning_rate": 5.973073803730739e-06, "loss": 0.5451, "step": 25045 }, { "epoch": 0.7312487226650317, "grad_norm": 0.63049438226984, "learning_rate": 5.97242497972425e-06, "loss": 0.6078, "step": 25046 }, { "epoch": 0.7312779188928791, "grad_norm": 0.6021299176352611, "learning_rate": 5.971776155717762e-06, "loss": 0.5335, "step": 25047 }, { "epoch": 0.7313071151207264, "grad_norm": 0.6449530993274595, "learning_rate": 5.9711273317112735e-06, "loss": 0.5765, "step": 25048 }, { "epoch": 0.7313363113485738, "grad_norm": 0.6994388265105559, "learning_rate": 5.9704785077047855e-06, "loss": 0.6885, "step": 25049 }, { "epoch": 0.7313655075764212, "grad_norm": 0.5930357231257433, "learning_rate": 5.9698296836982976e-06, "loss": 0.5292, "step": 25050 }, { "epoch": 0.7313947038042685, "grad_norm": 0.636929171898109, "learning_rate": 5.969180859691809e-06, "loss": 0.5568, "step": 25051 }, { "epoch": 0.7314239000321159, "grad_norm": 0.6894359534138007, "learning_rate": 5.968532035685321e-06, "loss": 0.6653, "step": 25052 }, { "epoch": 0.7314530962599632, "grad_norm": 0.6210686571539046, "learning_rate": 5.967883211678832e-06, "loss": 0.5794, "step": 25053 }, { "epoch": 0.7314822924878106, "grad_norm": 0.583969295664323, "learning_rate": 5.967234387672345e-06, "loss": 0.5323, "step": 25054 }, { "epoch": 0.731511488715658, "grad_norm": 0.6286887755588688, "learning_rate": 5.966585563665857e-06, "loss": 0.5701, "step": 25055 }, { "epoch": 0.7315406849435053, "grad_norm": 0.6063664439175714, "learning_rate": 5.965936739659368e-06, "loss": 0.5804, "step": 25056 }, { "epoch": 0.7315698811713527, "grad_norm": 0.6411354510003043, "learning_rate": 5.96528791565288e-06, "loss": 0.6241, "step": 25057 }, { "epoch": 0.7315990773992, "grad_norm": 0.6484604240759286, "learning_rate": 5.964639091646391e-06, "loss": 0.5557, "step": 25058 }, { "epoch": 0.7316282736270474, "grad_norm": 0.6186927340813767, "learning_rate": 5.963990267639903e-06, "loss": 0.5708, "step": 25059 }, { "epoch": 0.7316574698548948, "grad_norm": 0.627266475935472, "learning_rate": 5.9633414436334144e-06, "loss": 0.5688, "step": 25060 }, { "epoch": 0.7316866660827421, "grad_norm": 0.6486817279718717, "learning_rate": 5.9626926196269265e-06, "loss": 0.657, "step": 25061 }, { "epoch": 0.7317158623105895, "grad_norm": 0.6394424482806743, "learning_rate": 5.962043795620438e-06, "loss": 0.6346, "step": 25062 }, { "epoch": 0.7317450585384369, "grad_norm": 0.6309428075451339, "learning_rate": 5.9613949716139505e-06, "loss": 0.5024, "step": 25063 }, { "epoch": 0.7317742547662842, "grad_norm": 0.6386658697878133, "learning_rate": 5.9607461476074626e-06, "loss": 0.5646, "step": 25064 }, { "epoch": 0.7318034509941316, "grad_norm": 0.6043020988380134, "learning_rate": 5.960097323600974e-06, "loss": 0.5249, "step": 25065 }, { "epoch": 0.7318326472219789, "grad_norm": 0.6043532234495498, "learning_rate": 5.959448499594486e-06, "loss": 0.5484, "step": 25066 }, { "epoch": 0.7318618434498263, "grad_norm": 0.6516488299762453, "learning_rate": 5.958799675587997e-06, "loss": 0.603, "step": 25067 }, { "epoch": 0.7318910396776737, "grad_norm": 0.6122817669981098, "learning_rate": 5.958150851581509e-06, "loss": 0.5281, "step": 25068 }, { "epoch": 0.731920235905521, "grad_norm": 0.6329343125596985, "learning_rate": 5.957502027575021e-06, "loss": 0.6057, "step": 25069 }, { "epoch": 0.7319494321333684, "grad_norm": 0.6832192672608299, "learning_rate": 5.956853203568532e-06, "loss": 0.6388, "step": 25070 }, { "epoch": 0.7319786283612157, "grad_norm": 0.6707870734784214, "learning_rate": 5.956204379562044e-06, "loss": 0.6792, "step": 25071 }, { "epoch": 0.7320078245890631, "grad_norm": 0.625808007622142, "learning_rate": 5.955555555555555e-06, "loss": 0.5985, "step": 25072 }, { "epoch": 0.7320370208169105, "grad_norm": 0.6071679615812436, "learning_rate": 5.954906731549068e-06, "loss": 0.5335, "step": 25073 }, { "epoch": 0.7320662170447578, "grad_norm": 0.6285951666977984, "learning_rate": 5.95425790754258e-06, "loss": 0.5847, "step": 25074 }, { "epoch": 0.7320954132726052, "grad_norm": 0.6117302019623977, "learning_rate": 5.9536090835360915e-06, "loss": 0.5475, "step": 25075 }, { "epoch": 0.7321246095004526, "grad_norm": 0.7140927881766405, "learning_rate": 5.9529602595296035e-06, "loss": 0.7151, "step": 25076 }, { "epoch": 0.7321538057282999, "grad_norm": 0.6170886928561485, "learning_rate": 5.952311435523115e-06, "loss": 0.5344, "step": 25077 }, { "epoch": 0.7321830019561473, "grad_norm": 0.6888561561187159, "learning_rate": 5.951662611516627e-06, "loss": 0.6533, "step": 25078 }, { "epoch": 0.7322121981839946, "grad_norm": 0.6682544290276632, "learning_rate": 5.951013787510138e-06, "loss": 0.6665, "step": 25079 }, { "epoch": 0.732241394411842, "grad_norm": 0.6238883930946498, "learning_rate": 5.95036496350365e-06, "loss": 0.5906, "step": 25080 }, { "epoch": 0.7322705906396894, "grad_norm": 0.6588899550654671, "learning_rate": 5.949716139497161e-06, "loss": 0.6105, "step": 25081 }, { "epoch": 0.7322997868675367, "grad_norm": 0.6407680128822256, "learning_rate": 5.949067315490674e-06, "loss": 0.5583, "step": 25082 }, { "epoch": 0.7323289830953841, "grad_norm": 0.6671407147007902, "learning_rate": 5.948418491484186e-06, "loss": 0.6153, "step": 25083 }, { "epoch": 0.7323581793232314, "grad_norm": 0.6935818374445113, "learning_rate": 5.947769667477697e-06, "loss": 0.6786, "step": 25084 }, { "epoch": 0.7323873755510788, "grad_norm": 0.6112533646366721, "learning_rate": 5.947120843471209e-06, "loss": 0.5562, "step": 25085 }, { "epoch": 0.7324165717789262, "grad_norm": 0.6705125394159519, "learning_rate": 5.94647201946472e-06, "loss": 0.6208, "step": 25086 }, { "epoch": 0.7324457680067735, "grad_norm": 0.6579007222457586, "learning_rate": 5.945823195458232e-06, "loss": 0.6191, "step": 25087 }, { "epoch": 0.7324749642346209, "grad_norm": 0.7012495274269728, "learning_rate": 5.9451743714517444e-06, "loss": 0.66, "step": 25088 }, { "epoch": 0.7325041604624682, "grad_norm": 0.6201620353022982, "learning_rate": 5.944525547445256e-06, "loss": 0.5719, "step": 25089 }, { "epoch": 0.7325333566903156, "grad_norm": 0.6192290839265635, "learning_rate": 5.943876723438768e-06, "loss": 0.5566, "step": 25090 }, { "epoch": 0.732562552918163, "grad_norm": 0.6238329342969504, "learning_rate": 5.943227899432279e-06, "loss": 0.5791, "step": 25091 }, { "epoch": 0.7325917491460103, "grad_norm": 0.6356937633369184, "learning_rate": 5.942579075425792e-06, "loss": 0.6095, "step": 25092 }, { "epoch": 0.7326209453738577, "grad_norm": 0.5874548117092357, "learning_rate": 5.941930251419304e-06, "loss": 0.5006, "step": 25093 }, { "epoch": 0.732650141601705, "grad_norm": 0.6042991679402446, "learning_rate": 5.941281427412815e-06, "loss": 0.5119, "step": 25094 }, { "epoch": 0.7326793378295524, "grad_norm": 0.6326447866296994, "learning_rate": 5.940632603406327e-06, "loss": 0.56, "step": 25095 }, { "epoch": 0.7327085340573998, "grad_norm": 0.6596759631374272, "learning_rate": 5.939983779399838e-06, "loss": 0.6881, "step": 25096 }, { "epoch": 0.7327377302852471, "grad_norm": 0.6028126452836013, "learning_rate": 5.93933495539335e-06, "loss": 0.5295, "step": 25097 }, { "epoch": 0.7327669265130945, "grad_norm": 0.6573019317659001, "learning_rate": 5.938686131386861e-06, "loss": 0.6381, "step": 25098 }, { "epoch": 0.7327961227409419, "grad_norm": 0.6992670803084867, "learning_rate": 5.938037307380373e-06, "loss": 0.6796, "step": 25099 }, { "epoch": 0.7328253189687892, "grad_norm": 0.6272654524328045, "learning_rate": 5.9373884833738845e-06, "loss": 0.5846, "step": 25100 }, { "epoch": 0.7328545151966366, "grad_norm": 0.6594138075952765, "learning_rate": 5.936739659367397e-06, "loss": 0.6705, "step": 25101 }, { "epoch": 0.7328837114244839, "grad_norm": 0.6693024123035429, "learning_rate": 5.936090835360909e-06, "loss": 0.6203, "step": 25102 }, { "epoch": 0.7329129076523313, "grad_norm": 0.5754904652069862, "learning_rate": 5.935442011354421e-06, "loss": 0.4911, "step": 25103 }, { "epoch": 0.7329421038801787, "grad_norm": 0.6464099529344404, "learning_rate": 5.934793187347933e-06, "loss": 0.6285, "step": 25104 }, { "epoch": 0.732971300108026, "grad_norm": 0.6388782175421919, "learning_rate": 5.934144363341444e-06, "loss": 0.5992, "step": 25105 }, { "epoch": 0.7330004963358734, "grad_norm": 0.6526027850214168, "learning_rate": 5.933495539334956e-06, "loss": 0.6803, "step": 25106 }, { "epoch": 0.7330296925637207, "grad_norm": 0.6499924787378631, "learning_rate": 5.932846715328468e-06, "loss": 0.6308, "step": 25107 }, { "epoch": 0.7330588887915681, "grad_norm": 0.5980145725939805, "learning_rate": 5.932197891321979e-06, "loss": 0.5575, "step": 25108 }, { "epoch": 0.7330880850194155, "grad_norm": 0.6431140757785347, "learning_rate": 5.931549067315491e-06, "loss": 0.5899, "step": 25109 }, { "epoch": 0.7331172812472628, "grad_norm": 0.7107807452384262, "learning_rate": 5.930900243309002e-06, "loss": 0.6961, "step": 25110 }, { "epoch": 0.7331464774751102, "grad_norm": 0.6567789368758952, "learning_rate": 5.930251419302515e-06, "loss": 0.6402, "step": 25111 }, { "epoch": 0.7331756737029576, "grad_norm": 0.6497498501291276, "learning_rate": 5.929602595296027e-06, "loss": 0.5789, "step": 25112 }, { "epoch": 0.7332048699308049, "grad_norm": 0.6438358218333262, "learning_rate": 5.928953771289538e-06, "loss": 0.5596, "step": 25113 }, { "epoch": 0.7332340661586523, "grad_norm": 0.6403986640864097, "learning_rate": 5.92830494728305e-06, "loss": 0.5941, "step": 25114 }, { "epoch": 0.7332632623864996, "grad_norm": 0.6456431156141537, "learning_rate": 5.9276561232765615e-06, "loss": 0.5986, "step": 25115 }, { "epoch": 0.733292458614347, "grad_norm": 0.6654067731507599, "learning_rate": 5.9270072992700736e-06, "loss": 0.639, "step": 25116 }, { "epoch": 0.7333216548421944, "grad_norm": 0.6415340841650669, "learning_rate": 5.926358475263585e-06, "loss": 0.6017, "step": 25117 }, { "epoch": 0.7333508510700417, "grad_norm": 0.6318182685185132, "learning_rate": 5.925709651257097e-06, "loss": 0.6003, "step": 25118 }, { "epoch": 0.7333800472978891, "grad_norm": 0.6250893906797961, "learning_rate": 5.925060827250608e-06, "loss": 0.5868, "step": 25119 }, { "epoch": 0.7334092435257364, "grad_norm": 0.6224905541955462, "learning_rate": 5.924412003244121e-06, "loss": 0.5968, "step": 25120 }, { "epoch": 0.7334384397535838, "grad_norm": 0.6487304110167629, "learning_rate": 5.923763179237633e-06, "loss": 0.6368, "step": 25121 }, { "epoch": 0.7334676359814312, "grad_norm": 0.6738705904123052, "learning_rate": 5.923114355231144e-06, "loss": 0.6658, "step": 25122 }, { "epoch": 0.7334968322092785, "grad_norm": 0.6474286610913248, "learning_rate": 5.922465531224656e-06, "loss": 0.6232, "step": 25123 }, { "epoch": 0.7335260284371259, "grad_norm": 0.6550454304291132, "learning_rate": 5.921816707218167e-06, "loss": 0.6154, "step": 25124 }, { "epoch": 0.7335552246649732, "grad_norm": 0.6275568193965446, "learning_rate": 5.921167883211679e-06, "loss": 0.5793, "step": 25125 }, { "epoch": 0.7335844208928206, "grad_norm": 0.6331834389616725, "learning_rate": 5.920519059205191e-06, "loss": 0.6, "step": 25126 }, { "epoch": 0.733613617120668, "grad_norm": 0.6195318869345423, "learning_rate": 5.9198702351987025e-06, "loss": 0.599, "step": 25127 }, { "epoch": 0.7336428133485153, "grad_norm": 0.6231648845982296, "learning_rate": 5.9192214111922145e-06, "loss": 0.5325, "step": 25128 }, { "epoch": 0.7336720095763627, "grad_norm": 0.5755832844984159, "learning_rate": 5.918572587185726e-06, "loss": 0.4694, "step": 25129 }, { "epoch": 0.73370120580421, "grad_norm": 0.6449821236395247, "learning_rate": 5.9179237631792386e-06, "loss": 0.6232, "step": 25130 }, { "epoch": 0.7337304020320574, "grad_norm": 0.6021013629336907, "learning_rate": 5.917274939172751e-06, "loss": 0.5038, "step": 25131 }, { "epoch": 0.7337595982599048, "grad_norm": 0.6714730009987135, "learning_rate": 5.916626115166262e-06, "loss": 0.6285, "step": 25132 }, { "epoch": 0.7337887944877521, "grad_norm": 0.6482602287363716, "learning_rate": 5.915977291159774e-06, "loss": 0.6259, "step": 25133 }, { "epoch": 0.7338179907155995, "grad_norm": 0.6900206968734756, "learning_rate": 5.915328467153285e-06, "loss": 0.7087, "step": 25134 }, { "epoch": 0.7338471869434469, "grad_norm": 0.6491024191668361, "learning_rate": 5.914679643146797e-06, "loss": 0.6003, "step": 25135 }, { "epoch": 0.7338763831712942, "grad_norm": 0.6714259721807666, "learning_rate": 5.914030819140308e-06, "loss": 0.6675, "step": 25136 }, { "epoch": 0.7339055793991416, "grad_norm": 0.6390308621099955, "learning_rate": 5.91338199513382e-06, "loss": 0.6259, "step": 25137 }, { "epoch": 0.7339347756269889, "grad_norm": 0.6303510310510612, "learning_rate": 5.912733171127331e-06, "loss": 0.5979, "step": 25138 }, { "epoch": 0.7339639718548363, "grad_norm": 0.6177469785054545, "learning_rate": 5.912084347120844e-06, "loss": 0.6042, "step": 25139 }, { "epoch": 0.7339931680826837, "grad_norm": 0.5993343666830695, "learning_rate": 5.911435523114356e-06, "loss": 0.5166, "step": 25140 }, { "epoch": 0.734022364310531, "grad_norm": 0.6684814889409015, "learning_rate": 5.9107866991078675e-06, "loss": 0.6471, "step": 25141 }, { "epoch": 0.7340515605383784, "grad_norm": 0.6616429626034637, "learning_rate": 5.9101378751013795e-06, "loss": 0.5831, "step": 25142 }, { "epoch": 0.7340807567662258, "grad_norm": 0.6464279396691659, "learning_rate": 5.909489051094891e-06, "loss": 0.6005, "step": 25143 }, { "epoch": 0.7341099529940731, "grad_norm": 0.6328235169511539, "learning_rate": 5.908840227088403e-06, "loss": 0.5806, "step": 25144 }, { "epoch": 0.7341391492219205, "grad_norm": 0.6427459685377659, "learning_rate": 5.908191403081914e-06, "loss": 0.5312, "step": 25145 }, { "epoch": 0.734168345449768, "grad_norm": 0.6357818600397981, "learning_rate": 5.907542579075426e-06, "loss": 0.6177, "step": 25146 }, { "epoch": 0.7341975416776153, "grad_norm": 0.6943322582156513, "learning_rate": 5.906893755068938e-06, "loss": 0.6357, "step": 25147 }, { "epoch": 0.7342267379054627, "grad_norm": 0.6200002057834598, "learning_rate": 5.90624493106245e-06, "loss": 0.5231, "step": 25148 }, { "epoch": 0.73425593413331, "grad_norm": 0.6071338887128508, "learning_rate": 5.905596107055962e-06, "loss": 0.5075, "step": 25149 }, { "epoch": 0.7342851303611574, "grad_norm": 0.6926085034959133, "learning_rate": 5.904947283049474e-06, "loss": 0.6727, "step": 25150 }, { "epoch": 0.7343143265890048, "grad_norm": 0.648066605896862, "learning_rate": 5.904298459042985e-06, "loss": 0.6056, "step": 25151 }, { "epoch": 0.7343435228168521, "grad_norm": 0.6988082940439716, "learning_rate": 5.903649635036497e-06, "loss": 0.6875, "step": 25152 }, { "epoch": 0.7343727190446995, "grad_norm": 0.712726991778327, "learning_rate": 5.903000811030008e-06, "loss": 0.7238, "step": 25153 }, { "epoch": 0.7344019152725468, "grad_norm": 0.6319895704341955, "learning_rate": 5.9023519870235204e-06, "loss": 0.5574, "step": 25154 }, { "epoch": 0.7344311115003942, "grad_norm": 0.6039188923725737, "learning_rate": 5.901703163017032e-06, "loss": 0.5526, "step": 25155 }, { "epoch": 0.7344603077282416, "grad_norm": 0.6879709285227076, "learning_rate": 5.901054339010544e-06, "loss": 0.5724, "step": 25156 }, { "epoch": 0.7344895039560889, "grad_norm": 0.6624855092536901, "learning_rate": 5.900405515004055e-06, "loss": 0.6536, "step": 25157 }, { "epoch": 0.7345187001839363, "grad_norm": 0.680238056622046, "learning_rate": 5.899756690997568e-06, "loss": 0.5732, "step": 25158 }, { "epoch": 0.7345478964117836, "grad_norm": 0.6906718594339996, "learning_rate": 5.89910786699108e-06, "loss": 0.6042, "step": 25159 }, { "epoch": 0.734577092639631, "grad_norm": 0.6820715447903105, "learning_rate": 5.898459042984591e-06, "loss": 0.6545, "step": 25160 }, { "epoch": 0.7346062888674784, "grad_norm": 0.635603662838823, "learning_rate": 5.897810218978103e-06, "loss": 0.5957, "step": 25161 }, { "epoch": 0.7346354850953257, "grad_norm": 0.651339038975742, "learning_rate": 5.897161394971614e-06, "loss": 0.6281, "step": 25162 }, { "epoch": 0.7346646813231731, "grad_norm": 0.6498218202612318, "learning_rate": 5.896512570965126e-06, "loss": 0.6133, "step": 25163 }, { "epoch": 0.7346938775510204, "grad_norm": 0.6082787437903004, "learning_rate": 5.895863746958637e-06, "loss": 0.5608, "step": 25164 }, { "epoch": 0.7347230737788678, "grad_norm": 0.6532348065767545, "learning_rate": 5.895214922952149e-06, "loss": 0.5923, "step": 25165 }, { "epoch": 0.7347522700067152, "grad_norm": 0.6416901586168511, "learning_rate": 5.894566098945661e-06, "loss": 0.6272, "step": 25166 }, { "epoch": 0.7347814662345625, "grad_norm": 0.6159498585097254, "learning_rate": 5.893917274939173e-06, "loss": 0.5237, "step": 25167 }, { "epoch": 0.7348106624624099, "grad_norm": 0.6819317842783882, "learning_rate": 5.8932684509326854e-06, "loss": 0.6222, "step": 25168 }, { "epoch": 0.7348398586902573, "grad_norm": 0.63955806212344, "learning_rate": 5.8926196269261975e-06, "loss": 0.5739, "step": 25169 }, { "epoch": 0.7348690549181046, "grad_norm": 0.6607485511120476, "learning_rate": 5.891970802919709e-06, "loss": 0.6011, "step": 25170 }, { "epoch": 0.734898251145952, "grad_norm": 0.6754232098530855, "learning_rate": 5.891321978913221e-06, "loss": 0.6178, "step": 25171 }, { "epoch": 0.7349274473737993, "grad_norm": 0.669084978566819, "learning_rate": 5.890673154906732e-06, "loss": 0.6469, "step": 25172 }, { "epoch": 0.7349566436016467, "grad_norm": 0.6578672392508045, "learning_rate": 5.890024330900244e-06, "loss": 0.6092, "step": 25173 }, { "epoch": 0.7349858398294941, "grad_norm": 0.6291997359934216, "learning_rate": 5.889375506893755e-06, "loss": 0.5895, "step": 25174 }, { "epoch": 0.7350150360573414, "grad_norm": 0.6106112796065184, "learning_rate": 5.888726682887267e-06, "loss": 0.5345, "step": 25175 }, { "epoch": 0.7350442322851888, "grad_norm": 0.695218191248612, "learning_rate": 5.888077858880778e-06, "loss": 0.6857, "step": 25176 }, { "epoch": 0.7350734285130361, "grad_norm": 0.6629007789963866, "learning_rate": 5.887429034874291e-06, "loss": 0.6273, "step": 25177 }, { "epoch": 0.7351026247408835, "grad_norm": 0.6826891273164802, "learning_rate": 5.886780210867803e-06, "loss": 0.6343, "step": 25178 }, { "epoch": 0.7351318209687309, "grad_norm": 0.7532542531765963, "learning_rate": 5.886131386861314e-06, "loss": 0.6273, "step": 25179 }, { "epoch": 0.7351610171965782, "grad_norm": 0.7172548865377818, "learning_rate": 5.885482562854826e-06, "loss": 0.6854, "step": 25180 }, { "epoch": 0.7351902134244256, "grad_norm": 0.6692549377757485, "learning_rate": 5.8848337388483375e-06, "loss": 0.6525, "step": 25181 }, { "epoch": 0.735219409652273, "grad_norm": 0.6124433079800534, "learning_rate": 5.8841849148418496e-06, "loss": 0.5752, "step": 25182 }, { "epoch": 0.7352486058801203, "grad_norm": 0.6748610865950482, "learning_rate": 5.883536090835361e-06, "loss": 0.6683, "step": 25183 }, { "epoch": 0.7352778021079677, "grad_norm": 0.6024095490978155, "learning_rate": 5.882887266828873e-06, "loss": 0.5264, "step": 25184 }, { "epoch": 0.735306998335815, "grad_norm": 0.6907694012935001, "learning_rate": 5.882238442822385e-06, "loss": 0.6365, "step": 25185 }, { "epoch": 0.7353361945636624, "grad_norm": 0.6946941081390012, "learning_rate": 5.881589618815897e-06, "loss": 0.5384, "step": 25186 }, { "epoch": 0.7353653907915098, "grad_norm": 0.6582403843052229, "learning_rate": 5.880940794809409e-06, "loss": 0.6283, "step": 25187 }, { "epoch": 0.7353945870193571, "grad_norm": 0.6975355889176958, "learning_rate": 5.880291970802921e-06, "loss": 0.6776, "step": 25188 }, { "epoch": 0.7354237832472045, "grad_norm": 0.6639149227580072, "learning_rate": 5.879643146796432e-06, "loss": 0.6029, "step": 25189 }, { "epoch": 0.7354529794750518, "grad_norm": 0.6255613304231875, "learning_rate": 5.878994322789944e-06, "loss": 0.5858, "step": 25190 }, { "epoch": 0.7354821757028992, "grad_norm": 0.703634512021137, "learning_rate": 5.878345498783455e-06, "loss": 0.662, "step": 25191 }, { "epoch": 0.7355113719307466, "grad_norm": 0.6601046078747912, "learning_rate": 5.877696674776967e-06, "loss": 0.5949, "step": 25192 }, { "epoch": 0.7355405681585939, "grad_norm": 0.6664734421151135, "learning_rate": 5.8770478507704785e-06, "loss": 0.6784, "step": 25193 }, { "epoch": 0.7355697643864413, "grad_norm": 0.6007657423658843, "learning_rate": 5.8763990267639905e-06, "loss": 0.5919, "step": 25194 }, { "epoch": 0.7355989606142886, "grad_norm": 0.7313990509719291, "learning_rate": 5.875750202757502e-06, "loss": 0.6418, "step": 25195 }, { "epoch": 0.735628156842136, "grad_norm": 0.6582070272544037, "learning_rate": 5.8751013787510146e-06, "loss": 0.6389, "step": 25196 }, { "epoch": 0.7356573530699834, "grad_norm": 0.6583736693928215, "learning_rate": 5.874452554744527e-06, "loss": 0.5946, "step": 25197 }, { "epoch": 0.7356865492978307, "grad_norm": 0.63063404779935, "learning_rate": 5.873803730738038e-06, "loss": 0.5543, "step": 25198 }, { "epoch": 0.7357157455256781, "grad_norm": 0.6386428024834916, "learning_rate": 5.87315490673155e-06, "loss": 0.5828, "step": 25199 }, { "epoch": 0.7357449417535254, "grad_norm": 0.6469391294509496, "learning_rate": 5.872506082725061e-06, "loss": 0.6018, "step": 25200 }, { "epoch": 0.7357741379813728, "grad_norm": 0.7464050621146026, "learning_rate": 5.871857258718573e-06, "loss": 0.726, "step": 25201 }, { "epoch": 0.7358033342092202, "grad_norm": 0.702841127792168, "learning_rate": 5.871208434712084e-06, "loss": 0.6767, "step": 25202 }, { "epoch": 0.7358325304370675, "grad_norm": 0.6238573650439364, "learning_rate": 5.870559610705596e-06, "loss": 0.5377, "step": 25203 }, { "epoch": 0.7358617266649149, "grad_norm": 0.6539288222659997, "learning_rate": 5.869910786699108e-06, "loss": 0.6395, "step": 25204 }, { "epoch": 0.7358909228927623, "grad_norm": 0.6095947581998018, "learning_rate": 5.86926196269262e-06, "loss": 0.552, "step": 25205 }, { "epoch": 0.7359201191206096, "grad_norm": 0.6410718106733421, "learning_rate": 5.868613138686132e-06, "loss": 0.5834, "step": 25206 }, { "epoch": 0.735949315348457, "grad_norm": 0.6413378845935928, "learning_rate": 5.8679643146796435e-06, "loss": 0.5828, "step": 25207 }, { "epoch": 0.7359785115763043, "grad_norm": 0.6129257852597313, "learning_rate": 5.8673154906731555e-06, "loss": 0.5363, "step": 25208 }, { "epoch": 0.7360077078041517, "grad_norm": 0.6225692124426556, "learning_rate": 5.8666666666666675e-06, "loss": 0.5683, "step": 25209 }, { "epoch": 0.7360369040319991, "grad_norm": 0.6805729521759777, "learning_rate": 5.866017842660179e-06, "loss": 0.6471, "step": 25210 }, { "epoch": 0.7360661002598464, "grad_norm": 0.6783728211521148, "learning_rate": 5.865369018653691e-06, "loss": 0.6601, "step": 25211 }, { "epoch": 0.7360952964876938, "grad_norm": 0.6588655873766107, "learning_rate": 5.864720194647202e-06, "loss": 0.6518, "step": 25212 }, { "epoch": 0.7361244927155411, "grad_norm": 0.6202620252335177, "learning_rate": 5.864071370640714e-06, "loss": 0.5524, "step": 25213 }, { "epoch": 0.7361536889433885, "grad_norm": 0.6647665146176986, "learning_rate": 5.863422546634227e-06, "loss": 0.6721, "step": 25214 }, { "epoch": 0.7361828851712359, "grad_norm": 0.6317529922342878, "learning_rate": 5.862773722627738e-06, "loss": 0.5875, "step": 25215 }, { "epoch": 0.7362120813990832, "grad_norm": 0.6071856568718818, "learning_rate": 5.86212489862125e-06, "loss": 0.5419, "step": 25216 }, { "epoch": 0.7362412776269306, "grad_norm": 0.6594934070588894, "learning_rate": 5.861476074614761e-06, "loss": 0.6332, "step": 25217 }, { "epoch": 0.736270473854778, "grad_norm": 0.6186179578953191, "learning_rate": 5.860827250608273e-06, "loss": 0.6082, "step": 25218 }, { "epoch": 0.7362996700826253, "grad_norm": 0.6572027906168275, "learning_rate": 5.860178426601784e-06, "loss": 0.6043, "step": 25219 }, { "epoch": 0.7363288663104727, "grad_norm": 0.6388508467253428, "learning_rate": 5.8595296025952964e-06, "loss": 0.6296, "step": 25220 }, { "epoch": 0.73635806253832, "grad_norm": 0.6213514138671268, "learning_rate": 5.858880778588808e-06, "loss": 0.5623, "step": 25221 }, { "epoch": 0.7363872587661674, "grad_norm": 0.6611956612651952, "learning_rate": 5.85823195458232e-06, "loss": 0.6148, "step": 25222 }, { "epoch": 0.7364164549940148, "grad_norm": 0.6594080648766728, "learning_rate": 5.857583130575832e-06, "loss": 0.6752, "step": 25223 }, { "epoch": 0.7364456512218621, "grad_norm": 0.6154147858717454, "learning_rate": 5.856934306569344e-06, "loss": 0.5621, "step": 25224 }, { "epoch": 0.7364748474497095, "grad_norm": 0.6313143048281603, "learning_rate": 5.856285482562856e-06, "loss": 0.5902, "step": 25225 }, { "epoch": 0.7365040436775568, "grad_norm": 0.6257485098345503, "learning_rate": 5.855636658556367e-06, "loss": 0.5865, "step": 25226 }, { "epoch": 0.7365332399054042, "grad_norm": 0.651621284029119, "learning_rate": 5.854987834549879e-06, "loss": 0.5698, "step": 25227 }, { "epoch": 0.7365624361332516, "grad_norm": 0.6436734986385507, "learning_rate": 5.854339010543391e-06, "loss": 0.6367, "step": 25228 }, { "epoch": 0.7365916323610989, "grad_norm": 0.619886154568279, "learning_rate": 5.853690186536902e-06, "loss": 0.5566, "step": 25229 }, { "epoch": 0.7366208285889463, "grad_norm": 0.6995206315055358, "learning_rate": 5.853041362530414e-06, "loss": 0.718, "step": 25230 }, { "epoch": 0.7366500248167936, "grad_norm": 0.6574177807650019, "learning_rate": 5.852392538523925e-06, "loss": 0.6044, "step": 25231 }, { "epoch": 0.736679221044641, "grad_norm": 0.6392237814460509, "learning_rate": 5.851743714517437e-06, "loss": 0.6163, "step": 25232 }, { "epoch": 0.7367084172724884, "grad_norm": 0.5978432227032358, "learning_rate": 5.85109489051095e-06, "loss": 0.536, "step": 25233 }, { "epoch": 0.7367376135003357, "grad_norm": 0.6037559842100445, "learning_rate": 5.8504460665044614e-06, "loss": 0.5695, "step": 25234 }, { "epoch": 0.7367668097281831, "grad_norm": 0.6442283575842842, "learning_rate": 5.8497972424979735e-06, "loss": 0.5996, "step": 25235 }, { "epoch": 0.7367960059560305, "grad_norm": 0.6689144635438768, "learning_rate": 5.849148418491485e-06, "loss": 0.6571, "step": 25236 }, { "epoch": 0.7368252021838778, "grad_norm": 0.6499540152979099, "learning_rate": 5.848499594484997e-06, "loss": 0.6325, "step": 25237 }, { "epoch": 0.7368543984117252, "grad_norm": 0.6360905071808194, "learning_rate": 5.847850770478508e-06, "loss": 0.6155, "step": 25238 }, { "epoch": 0.7368835946395725, "grad_norm": 0.6573143898340759, "learning_rate": 5.84720194647202e-06, "loss": 0.6069, "step": 25239 }, { "epoch": 0.7369127908674199, "grad_norm": 0.6062630442419596, "learning_rate": 5.846553122465531e-06, "loss": 0.5403, "step": 25240 }, { "epoch": 0.7369419870952673, "grad_norm": 0.705791778846344, "learning_rate": 5.845904298459043e-06, "loss": 0.677, "step": 25241 }, { "epoch": 0.7369711833231146, "grad_norm": 0.6888358841609614, "learning_rate": 5.845255474452555e-06, "loss": 0.6749, "step": 25242 }, { "epoch": 0.737000379550962, "grad_norm": 0.589445075065132, "learning_rate": 5.844606650446067e-06, "loss": 0.4814, "step": 25243 }, { "epoch": 0.7370295757788093, "grad_norm": 0.6426638868265484, "learning_rate": 5.843957826439579e-06, "loss": 0.5892, "step": 25244 }, { "epoch": 0.7370587720066567, "grad_norm": 0.5766837747204572, "learning_rate": 5.84330900243309e-06, "loss": 0.5314, "step": 25245 }, { "epoch": 0.7370879682345041, "grad_norm": 0.6702524256200724, "learning_rate": 5.842660178426602e-06, "loss": 0.6685, "step": 25246 }, { "epoch": 0.7371171644623514, "grad_norm": 0.6217008459552834, "learning_rate": 5.842011354420114e-06, "loss": 0.596, "step": 25247 }, { "epoch": 0.7371463606901988, "grad_norm": 0.6172849795598827, "learning_rate": 5.841362530413626e-06, "loss": 0.6039, "step": 25248 }, { "epoch": 0.7371755569180461, "grad_norm": 0.6574901210014857, "learning_rate": 5.840713706407138e-06, "loss": 0.6041, "step": 25249 }, { "epoch": 0.7372047531458935, "grad_norm": 0.6626449910584211, "learning_rate": 5.840064882400649e-06, "loss": 0.6308, "step": 25250 }, { "epoch": 0.7372339493737409, "grad_norm": 0.6487803952177438, "learning_rate": 5.839416058394161e-06, "loss": 0.6354, "step": 25251 }, { "epoch": 0.7372631456015882, "grad_norm": 0.6609029363915522, "learning_rate": 5.838767234387674e-06, "loss": 0.6553, "step": 25252 }, { "epoch": 0.7372923418294356, "grad_norm": 0.6493650746730373, "learning_rate": 5.838118410381185e-06, "loss": 0.616, "step": 25253 }, { "epoch": 0.737321538057283, "grad_norm": 0.6027769940613382, "learning_rate": 5.837469586374697e-06, "loss": 0.493, "step": 25254 }, { "epoch": 0.7373507342851303, "grad_norm": 0.6147717740539282, "learning_rate": 5.836820762368208e-06, "loss": 0.5511, "step": 25255 }, { "epoch": 0.7373799305129777, "grad_norm": 0.7032730278068895, "learning_rate": 5.83617193836172e-06, "loss": 0.693, "step": 25256 }, { "epoch": 0.737409126740825, "grad_norm": 0.6628071267426815, "learning_rate": 5.835523114355231e-06, "loss": 0.5823, "step": 25257 }, { "epoch": 0.7374383229686724, "grad_norm": 0.7317553867992072, "learning_rate": 5.834874290348743e-06, "loss": 0.7129, "step": 25258 }, { "epoch": 0.7374675191965198, "grad_norm": 0.6499160918809994, "learning_rate": 5.8342254663422545e-06, "loss": 0.6109, "step": 25259 }, { "epoch": 0.7374967154243671, "grad_norm": 0.6385631228492981, "learning_rate": 5.8335766423357665e-06, "loss": 0.6159, "step": 25260 }, { "epoch": 0.7375259116522145, "grad_norm": 0.6898456370166282, "learning_rate": 5.8329278183292785e-06, "loss": 0.6656, "step": 25261 }, { "epoch": 0.7375551078800618, "grad_norm": 0.6183483915780948, "learning_rate": 5.8322789943227906e-06, "loss": 0.5497, "step": 25262 }, { "epoch": 0.7375843041079092, "grad_norm": 0.6505799271235286, "learning_rate": 5.831630170316303e-06, "loss": 0.6164, "step": 25263 }, { "epoch": 0.7376135003357566, "grad_norm": 0.6360167943753591, "learning_rate": 5.830981346309814e-06, "loss": 0.5905, "step": 25264 }, { "epoch": 0.7376426965636039, "grad_norm": 0.5977519335379714, "learning_rate": 5.830332522303326e-06, "loss": 0.5493, "step": 25265 }, { "epoch": 0.7376718927914513, "grad_norm": 0.645057081162908, "learning_rate": 5.829683698296838e-06, "loss": 0.6187, "step": 25266 }, { "epoch": 0.7377010890192988, "grad_norm": 0.6578286160146111, "learning_rate": 5.829034874290349e-06, "loss": 0.6491, "step": 25267 }, { "epoch": 0.7377302852471461, "grad_norm": 0.6933710891282234, "learning_rate": 5.828386050283861e-06, "loss": 0.6054, "step": 25268 }, { "epoch": 0.7377594814749935, "grad_norm": 0.6702517896944322, "learning_rate": 5.827737226277372e-06, "loss": 0.5875, "step": 25269 }, { "epoch": 0.7377886777028408, "grad_norm": 0.6420490615976525, "learning_rate": 5.827088402270884e-06, "loss": 0.6237, "step": 25270 }, { "epoch": 0.7378178739306882, "grad_norm": 0.665682321546112, "learning_rate": 5.826439578264397e-06, "loss": 0.641, "step": 25271 }, { "epoch": 0.7378470701585356, "grad_norm": 0.6586720951739132, "learning_rate": 5.825790754257908e-06, "loss": 0.6028, "step": 25272 }, { "epoch": 0.7378762663863829, "grad_norm": 0.6559226593127265, "learning_rate": 5.82514193025142e-06, "loss": 0.6034, "step": 25273 }, { "epoch": 0.7379054626142303, "grad_norm": 0.6662292804721323, "learning_rate": 5.8244931062449315e-06, "loss": 0.6448, "step": 25274 }, { "epoch": 0.7379346588420777, "grad_norm": 0.5964325840046723, "learning_rate": 5.8238442822384435e-06, "loss": 0.5389, "step": 25275 }, { "epoch": 0.737963855069925, "grad_norm": 0.613222205917898, "learning_rate": 5.823195458231955e-06, "loss": 0.5909, "step": 25276 }, { "epoch": 0.7379930512977724, "grad_norm": 0.669150430714531, "learning_rate": 5.822546634225467e-06, "loss": 0.6589, "step": 25277 }, { "epoch": 0.7380222475256197, "grad_norm": 0.6557709314015835, "learning_rate": 5.821897810218978e-06, "loss": 0.6453, "step": 25278 }, { "epoch": 0.7380514437534671, "grad_norm": 0.6335344235631564, "learning_rate": 5.82124898621249e-06, "loss": 0.577, "step": 25279 }, { "epoch": 0.7380806399813145, "grad_norm": 0.7155153132989568, "learning_rate": 5.820600162206001e-06, "loss": 0.622, "step": 25280 }, { "epoch": 0.7381098362091618, "grad_norm": 0.6882984820218669, "learning_rate": 5.819951338199514e-06, "loss": 0.6162, "step": 25281 }, { "epoch": 0.7381390324370092, "grad_norm": 0.6748063983991117, "learning_rate": 5.819302514193026e-06, "loss": 0.6243, "step": 25282 }, { "epoch": 0.7381682286648565, "grad_norm": 0.6799172148246297, "learning_rate": 5.818653690186537e-06, "loss": 0.6588, "step": 25283 }, { "epoch": 0.7381974248927039, "grad_norm": 0.6433051199225776, "learning_rate": 5.818004866180049e-06, "loss": 0.5829, "step": 25284 }, { "epoch": 0.7382266211205513, "grad_norm": 0.6175221343444105, "learning_rate": 5.817356042173561e-06, "loss": 0.5298, "step": 25285 }, { "epoch": 0.7382558173483986, "grad_norm": 0.6399486946222731, "learning_rate": 5.8167072181670724e-06, "loss": 0.6227, "step": 25286 }, { "epoch": 0.738285013576246, "grad_norm": 0.6786252132390783, "learning_rate": 5.8160583941605845e-06, "loss": 0.6745, "step": 25287 }, { "epoch": 0.7383142098040933, "grad_norm": 0.6796370912541749, "learning_rate": 5.815409570154096e-06, "loss": 0.6459, "step": 25288 }, { "epoch": 0.7383434060319407, "grad_norm": 0.6057900137345654, "learning_rate": 5.814760746147608e-06, "loss": 0.5255, "step": 25289 }, { "epoch": 0.7383726022597881, "grad_norm": 0.5891015964619104, "learning_rate": 5.8141119221411206e-06, "loss": 0.5025, "step": 25290 }, { "epoch": 0.7384017984876354, "grad_norm": 0.6538152414342362, "learning_rate": 5.813463098134632e-06, "loss": 0.6075, "step": 25291 }, { "epoch": 0.7384309947154828, "grad_norm": 0.6085049366192831, "learning_rate": 5.812814274128144e-06, "loss": 0.5648, "step": 25292 }, { "epoch": 0.7384601909433302, "grad_norm": 0.6276174665598526, "learning_rate": 5.812165450121655e-06, "loss": 0.5777, "step": 25293 }, { "epoch": 0.7384893871711775, "grad_norm": 0.584234577230515, "learning_rate": 5.811516626115167e-06, "loss": 0.5086, "step": 25294 }, { "epoch": 0.7385185833990249, "grad_norm": 0.6334632197662836, "learning_rate": 5.810867802108678e-06, "loss": 0.5874, "step": 25295 }, { "epoch": 0.7385477796268722, "grad_norm": 0.6862330323452797, "learning_rate": 5.81021897810219e-06, "loss": 0.623, "step": 25296 }, { "epoch": 0.7385769758547196, "grad_norm": 0.6300534363921877, "learning_rate": 5.809570154095701e-06, "loss": 0.589, "step": 25297 }, { "epoch": 0.738606172082567, "grad_norm": 0.6477644958160113, "learning_rate": 5.808921330089213e-06, "loss": 0.639, "step": 25298 }, { "epoch": 0.7386353683104143, "grad_norm": 0.6649017508132783, "learning_rate": 5.808272506082726e-06, "loss": 0.6438, "step": 25299 }, { "epoch": 0.7386645645382617, "grad_norm": 0.6906493374263223, "learning_rate": 5.8076236820762374e-06, "loss": 0.6065, "step": 25300 }, { "epoch": 0.738693760766109, "grad_norm": 0.6624085210708697, "learning_rate": 5.8069748580697495e-06, "loss": 0.64, "step": 25301 }, { "epoch": 0.7387229569939564, "grad_norm": 0.5884581418820788, "learning_rate": 5.806326034063261e-06, "loss": 0.5566, "step": 25302 }, { "epoch": 0.7387521532218038, "grad_norm": 0.6920374710779026, "learning_rate": 5.805677210056773e-06, "loss": 0.6355, "step": 25303 }, { "epoch": 0.7387813494496511, "grad_norm": 0.6251532248926257, "learning_rate": 5.805028386050285e-06, "loss": 0.5749, "step": 25304 }, { "epoch": 0.7388105456774985, "grad_norm": 0.6327260887561974, "learning_rate": 5.804379562043796e-06, "loss": 0.5865, "step": 25305 }, { "epoch": 0.7388397419053458, "grad_norm": 0.6365077075094088, "learning_rate": 5.803730738037308e-06, "loss": 0.583, "step": 25306 }, { "epoch": 0.7388689381331932, "grad_norm": 0.6079309046868249, "learning_rate": 5.803081914030819e-06, "loss": 0.5164, "step": 25307 }, { "epoch": 0.7388981343610406, "grad_norm": 0.630139121704981, "learning_rate": 5.802433090024331e-06, "loss": 0.5723, "step": 25308 }, { "epoch": 0.7389273305888879, "grad_norm": 0.6186650327110526, "learning_rate": 5.801784266017844e-06, "loss": 0.5841, "step": 25309 }, { "epoch": 0.7389565268167353, "grad_norm": 0.6044632635486622, "learning_rate": 5.801135442011355e-06, "loss": 0.5138, "step": 25310 }, { "epoch": 0.7389857230445827, "grad_norm": 0.6679635763025736, "learning_rate": 5.800486618004867e-06, "loss": 0.6236, "step": 25311 }, { "epoch": 0.73901491927243, "grad_norm": 0.6513067743214858, "learning_rate": 5.799837793998378e-06, "loss": 0.6093, "step": 25312 }, { "epoch": 0.7390441155002774, "grad_norm": 0.6356641475404592, "learning_rate": 5.79918896999189e-06, "loss": 0.6113, "step": 25313 }, { "epoch": 0.7390733117281247, "grad_norm": 0.6500417963121267, "learning_rate": 5.798540145985402e-06, "loss": 0.638, "step": 25314 }, { "epoch": 0.7391025079559721, "grad_norm": 0.6626270779030572, "learning_rate": 5.797891321978914e-06, "loss": 0.6432, "step": 25315 }, { "epoch": 0.7391317041838195, "grad_norm": 0.6450371323767926, "learning_rate": 5.797242497972425e-06, "loss": 0.5872, "step": 25316 }, { "epoch": 0.7391609004116668, "grad_norm": 0.6522521347461284, "learning_rate": 5.796593673965937e-06, "loss": 0.5949, "step": 25317 }, { "epoch": 0.7391900966395142, "grad_norm": 0.6428441859478653, "learning_rate": 5.79594484995945e-06, "loss": 0.5749, "step": 25318 }, { "epoch": 0.7392192928673615, "grad_norm": 0.6660322974261604, "learning_rate": 5.795296025952961e-06, "loss": 0.6239, "step": 25319 }, { "epoch": 0.7392484890952089, "grad_norm": 0.6867819359937153, "learning_rate": 5.794647201946473e-06, "loss": 0.6355, "step": 25320 }, { "epoch": 0.7392776853230563, "grad_norm": 0.6245683429765436, "learning_rate": 5.793998377939984e-06, "loss": 0.5729, "step": 25321 }, { "epoch": 0.7393068815509036, "grad_norm": 0.6386284891603384, "learning_rate": 5.793349553933496e-06, "loss": 0.6179, "step": 25322 }, { "epoch": 0.739336077778751, "grad_norm": 0.6542066406091543, "learning_rate": 5.792700729927008e-06, "loss": 0.6065, "step": 25323 }, { "epoch": 0.7393652740065983, "grad_norm": 0.6248487863986777, "learning_rate": 5.792051905920519e-06, "loss": 0.5563, "step": 25324 }, { "epoch": 0.7393944702344457, "grad_norm": 0.6603856833164171, "learning_rate": 5.791403081914031e-06, "loss": 0.591, "step": 25325 }, { "epoch": 0.7394236664622931, "grad_norm": 0.5976333357858855, "learning_rate": 5.7907542579075425e-06, "loss": 0.5523, "step": 25326 }, { "epoch": 0.7394528626901404, "grad_norm": 0.6365778769262955, "learning_rate": 5.7901054339010546e-06, "loss": 0.5797, "step": 25327 }, { "epoch": 0.7394820589179878, "grad_norm": 0.61746510115952, "learning_rate": 5.7894566098945674e-06, "loss": 0.566, "step": 25328 }, { "epoch": 0.7395112551458352, "grad_norm": 0.6345790036179862, "learning_rate": 5.788807785888079e-06, "loss": 0.5556, "step": 25329 }, { "epoch": 0.7395404513736825, "grad_norm": 0.6937824967537616, "learning_rate": 5.788158961881591e-06, "loss": 0.6196, "step": 25330 }, { "epoch": 0.7395696476015299, "grad_norm": 0.6180885021791003, "learning_rate": 5.787510137875102e-06, "loss": 0.5728, "step": 25331 }, { "epoch": 0.7395988438293772, "grad_norm": 0.6354356699395982, "learning_rate": 5.786861313868614e-06, "loss": 0.5948, "step": 25332 }, { "epoch": 0.7396280400572246, "grad_norm": 0.639423459403952, "learning_rate": 5.786212489862125e-06, "loss": 0.5818, "step": 25333 }, { "epoch": 0.739657236285072, "grad_norm": 0.6369867942574657, "learning_rate": 5.785563665855637e-06, "loss": 0.6126, "step": 25334 }, { "epoch": 0.7396864325129193, "grad_norm": 0.6777370136481148, "learning_rate": 5.784914841849148e-06, "loss": 0.6371, "step": 25335 }, { "epoch": 0.7397156287407667, "grad_norm": 0.659539079086168, "learning_rate": 5.78426601784266e-06, "loss": 0.6559, "step": 25336 }, { "epoch": 0.739744824968614, "grad_norm": 0.6094671028785031, "learning_rate": 5.783617193836173e-06, "loss": 0.5476, "step": 25337 }, { "epoch": 0.7397740211964614, "grad_norm": 0.6351160991240397, "learning_rate": 5.782968369829684e-06, "loss": 0.5832, "step": 25338 }, { "epoch": 0.7398032174243088, "grad_norm": 0.667077646284371, "learning_rate": 5.782319545823196e-06, "loss": 0.6505, "step": 25339 }, { "epoch": 0.7398324136521561, "grad_norm": 0.6690324793297994, "learning_rate": 5.7816707218167075e-06, "loss": 0.6309, "step": 25340 }, { "epoch": 0.7398616098800035, "grad_norm": 0.6577962458797227, "learning_rate": 5.7810218978102195e-06, "loss": 0.6739, "step": 25341 }, { "epoch": 0.7398908061078509, "grad_norm": 0.6629276063140945, "learning_rate": 5.780373073803731e-06, "loss": 0.632, "step": 25342 }, { "epoch": 0.7399200023356982, "grad_norm": 0.6604831908791334, "learning_rate": 5.779724249797243e-06, "loss": 0.6199, "step": 25343 }, { "epoch": 0.7399491985635456, "grad_norm": 0.6052335270280043, "learning_rate": 5.779075425790755e-06, "loss": 0.5013, "step": 25344 }, { "epoch": 0.7399783947913929, "grad_norm": 0.6708411882740853, "learning_rate": 5.778426601784266e-06, "loss": 0.5738, "step": 25345 }, { "epoch": 0.7400075910192403, "grad_norm": 0.632901741285633, "learning_rate": 5.777777777777778e-06, "loss": 0.6043, "step": 25346 }, { "epoch": 0.7400367872470877, "grad_norm": 0.607031733640398, "learning_rate": 5.777128953771291e-06, "loss": 0.5605, "step": 25347 }, { "epoch": 0.740065983474935, "grad_norm": 0.6667743486228473, "learning_rate": 5.776480129764802e-06, "loss": 0.6189, "step": 25348 }, { "epoch": 0.7400951797027824, "grad_norm": 0.6918724031390029, "learning_rate": 5.775831305758314e-06, "loss": 0.6722, "step": 25349 }, { "epoch": 0.7401243759306297, "grad_norm": 0.6169499143150096, "learning_rate": 5.775182481751825e-06, "loss": 0.5727, "step": 25350 }, { "epoch": 0.7401535721584771, "grad_norm": 0.6124599305690743, "learning_rate": 5.774533657745337e-06, "loss": 0.5329, "step": 25351 }, { "epoch": 0.7401827683863245, "grad_norm": 0.6704107324353618, "learning_rate": 5.7738848337388485e-06, "loss": 0.6333, "step": 25352 }, { "epoch": 0.7402119646141718, "grad_norm": 0.5920771239803911, "learning_rate": 5.7732360097323605e-06, "loss": 0.5628, "step": 25353 }, { "epoch": 0.7402411608420192, "grad_norm": 0.6305919362418945, "learning_rate": 5.772587185725872e-06, "loss": 0.5852, "step": 25354 }, { "epoch": 0.7402703570698665, "grad_norm": 0.6442912740100846, "learning_rate": 5.771938361719384e-06, "loss": 0.5512, "step": 25355 }, { "epoch": 0.7402995532977139, "grad_norm": 0.6893758373609336, "learning_rate": 5.7712895377128966e-06, "loss": 0.6388, "step": 25356 }, { "epoch": 0.7403287495255613, "grad_norm": 0.6357084359637697, "learning_rate": 5.770640713706408e-06, "loss": 0.5695, "step": 25357 }, { "epoch": 0.7403579457534086, "grad_norm": 0.6373866411821305, "learning_rate": 5.76999188969992e-06, "loss": 0.5737, "step": 25358 }, { "epoch": 0.740387141981256, "grad_norm": 0.6580747464363242, "learning_rate": 5.769343065693431e-06, "loss": 0.618, "step": 25359 }, { "epoch": 0.7404163382091034, "grad_norm": 0.6298167266868793, "learning_rate": 5.768694241686943e-06, "loss": 0.5638, "step": 25360 }, { "epoch": 0.7404455344369507, "grad_norm": 0.6786994086160849, "learning_rate": 5.768045417680454e-06, "loss": 0.6358, "step": 25361 }, { "epoch": 0.7404747306647981, "grad_norm": 0.6486184674949329, "learning_rate": 5.767396593673966e-06, "loss": 0.5949, "step": 25362 }, { "epoch": 0.7405039268926454, "grad_norm": 0.6119179914129147, "learning_rate": 5.766747769667478e-06, "loss": 0.5094, "step": 25363 }, { "epoch": 0.7405331231204928, "grad_norm": 0.6577099828424452, "learning_rate": 5.766098945660989e-06, "loss": 0.593, "step": 25364 }, { "epoch": 0.7405623193483402, "grad_norm": 0.6342015593091149, "learning_rate": 5.765450121654502e-06, "loss": 0.5462, "step": 25365 }, { "epoch": 0.7405915155761875, "grad_norm": 0.6544242029228344, "learning_rate": 5.764801297648014e-06, "loss": 0.6033, "step": 25366 }, { "epoch": 0.7406207118040349, "grad_norm": 0.673324790442189, "learning_rate": 5.7641524736415255e-06, "loss": 0.5918, "step": 25367 }, { "epoch": 0.7406499080318822, "grad_norm": 0.6344743740550917, "learning_rate": 5.7635036496350375e-06, "loss": 0.6043, "step": 25368 }, { "epoch": 0.7406791042597296, "grad_norm": 0.6388151888484414, "learning_rate": 5.762854825628549e-06, "loss": 0.5653, "step": 25369 }, { "epoch": 0.740708300487577, "grad_norm": 0.6496483491266538, "learning_rate": 5.762206001622061e-06, "loss": 0.6232, "step": 25370 }, { "epoch": 0.7407374967154243, "grad_norm": 0.5828060192997678, "learning_rate": 5.761557177615572e-06, "loss": 0.5302, "step": 25371 }, { "epoch": 0.7407666929432717, "grad_norm": 0.6596777061610049, "learning_rate": 5.760908353609084e-06, "loss": 0.5939, "step": 25372 }, { "epoch": 0.740795889171119, "grad_norm": 0.6354987454593588, "learning_rate": 5.760259529602595e-06, "loss": 0.5642, "step": 25373 }, { "epoch": 0.7408250853989664, "grad_norm": 0.7048943661749727, "learning_rate": 5.759610705596107e-06, "loss": 0.6706, "step": 25374 }, { "epoch": 0.7408542816268138, "grad_norm": 0.6864493115550753, "learning_rate": 5.75896188158962e-06, "loss": 0.698, "step": 25375 }, { "epoch": 0.7408834778546611, "grad_norm": 0.6691735457544901, "learning_rate": 5.758313057583131e-06, "loss": 0.6305, "step": 25376 }, { "epoch": 0.7409126740825085, "grad_norm": 0.6623679492003056, "learning_rate": 5.757664233576643e-06, "loss": 0.6396, "step": 25377 }, { "epoch": 0.7409418703103559, "grad_norm": 0.6557156877596493, "learning_rate": 5.757015409570154e-06, "loss": 0.6396, "step": 25378 }, { "epoch": 0.7409710665382032, "grad_norm": 0.6161756243349759, "learning_rate": 5.756366585563666e-06, "loss": 0.5284, "step": 25379 }, { "epoch": 0.7410002627660506, "grad_norm": 0.6255420275922757, "learning_rate": 5.755717761557178e-06, "loss": 0.585, "step": 25380 }, { "epoch": 0.7410294589938979, "grad_norm": 0.6310078360319601, "learning_rate": 5.75506893755069e-06, "loss": 0.6261, "step": 25381 }, { "epoch": 0.7410586552217453, "grad_norm": 0.6168433526105656, "learning_rate": 5.754420113544202e-06, "loss": 0.581, "step": 25382 }, { "epoch": 0.7410878514495927, "grad_norm": 0.6769518287500563, "learning_rate": 5.753771289537713e-06, "loss": 0.6307, "step": 25383 }, { "epoch": 0.74111704767744, "grad_norm": 0.6356354056526847, "learning_rate": 5.753122465531226e-06, "loss": 0.6153, "step": 25384 }, { "epoch": 0.7411462439052874, "grad_norm": 0.6474243531699793, "learning_rate": 5.752473641524737e-06, "loss": 0.595, "step": 25385 }, { "epoch": 0.7411754401331347, "grad_norm": 0.6732925852751048, "learning_rate": 5.751824817518249e-06, "loss": 0.6251, "step": 25386 }, { "epoch": 0.7412046363609822, "grad_norm": 0.626732912204898, "learning_rate": 5.751175993511761e-06, "loss": 0.5954, "step": 25387 }, { "epoch": 0.7412338325888296, "grad_norm": 0.6109951751079803, "learning_rate": 5.750527169505272e-06, "loss": 0.5543, "step": 25388 }, { "epoch": 0.7412630288166769, "grad_norm": 0.6303838639586796, "learning_rate": 5.749878345498784e-06, "loss": 0.5801, "step": 25389 }, { "epoch": 0.7412922250445243, "grad_norm": 0.6615985448042485, "learning_rate": 5.749229521492295e-06, "loss": 0.6607, "step": 25390 }, { "epoch": 0.7413214212723717, "grad_norm": 0.6645766661362952, "learning_rate": 5.748580697485807e-06, "loss": 0.6622, "step": 25391 }, { "epoch": 0.741350617500219, "grad_norm": 0.5816811662503315, "learning_rate": 5.7479318734793185e-06, "loss": 0.5102, "step": 25392 }, { "epoch": 0.7413798137280664, "grad_norm": 0.6771805861816365, "learning_rate": 5.7472830494728306e-06, "loss": 0.6378, "step": 25393 }, { "epoch": 0.7414090099559137, "grad_norm": 0.7051742084540292, "learning_rate": 5.7466342254663434e-06, "loss": 0.5208, "step": 25394 }, { "epoch": 0.7414382061837611, "grad_norm": 0.6592497332120165, "learning_rate": 5.745985401459855e-06, "loss": 0.6368, "step": 25395 }, { "epoch": 0.7414674024116085, "grad_norm": 0.6902895419576125, "learning_rate": 5.745336577453367e-06, "loss": 0.6897, "step": 25396 }, { "epoch": 0.7414965986394558, "grad_norm": 0.605512795283883, "learning_rate": 5.744687753446878e-06, "loss": 0.5331, "step": 25397 }, { "epoch": 0.7415257948673032, "grad_norm": 0.6504718764358088, "learning_rate": 5.74403892944039e-06, "loss": 0.5955, "step": 25398 }, { "epoch": 0.7415549910951506, "grad_norm": 0.6331817980851989, "learning_rate": 5.743390105433901e-06, "loss": 0.5808, "step": 25399 }, { "epoch": 0.7415841873229979, "grad_norm": 0.6058736080236587, "learning_rate": 5.742741281427413e-06, "loss": 0.5256, "step": 25400 }, { "epoch": 0.7416133835508453, "grad_norm": 0.6620563052855161, "learning_rate": 5.742092457420925e-06, "loss": 0.6237, "step": 25401 }, { "epoch": 0.7416425797786926, "grad_norm": 0.6599563886120929, "learning_rate": 5.741443633414436e-06, "loss": 0.5863, "step": 25402 }, { "epoch": 0.74167177600654, "grad_norm": 0.6892510045564446, "learning_rate": 5.740794809407949e-06, "loss": 0.623, "step": 25403 }, { "epoch": 0.7417009722343874, "grad_norm": 0.6732526859757108, "learning_rate": 5.74014598540146e-06, "loss": 0.6629, "step": 25404 }, { "epoch": 0.7417301684622347, "grad_norm": 0.6378408619401195, "learning_rate": 5.739497161394972e-06, "loss": 0.5974, "step": 25405 }, { "epoch": 0.7417593646900821, "grad_norm": 0.6275889267491125, "learning_rate": 5.738848337388484e-06, "loss": 0.6123, "step": 25406 }, { "epoch": 0.7417885609179294, "grad_norm": 0.6667355222800222, "learning_rate": 5.7381995133819956e-06, "loss": 0.6373, "step": 25407 }, { "epoch": 0.7418177571457768, "grad_norm": 0.6834281764585204, "learning_rate": 5.737550689375508e-06, "loss": 0.6406, "step": 25408 }, { "epoch": 0.7418469533736242, "grad_norm": 0.6041363362592176, "learning_rate": 5.736901865369019e-06, "loss": 0.5292, "step": 25409 }, { "epoch": 0.7418761496014715, "grad_norm": 0.704123063165692, "learning_rate": 5.736253041362531e-06, "loss": 0.6329, "step": 25410 }, { "epoch": 0.7419053458293189, "grad_norm": 0.676937796769672, "learning_rate": 5.735604217356042e-06, "loss": 0.6151, "step": 25411 }, { "epoch": 0.7419345420571662, "grad_norm": 0.6546709827827809, "learning_rate": 5.734955393349554e-06, "loss": 0.6438, "step": 25412 }, { "epoch": 0.7419637382850136, "grad_norm": 0.6588104234014808, "learning_rate": 5.734306569343067e-06, "loss": 0.5619, "step": 25413 }, { "epoch": 0.741992934512861, "grad_norm": 0.6482858807710363, "learning_rate": 5.733657745336578e-06, "loss": 0.6097, "step": 25414 }, { "epoch": 0.7420221307407083, "grad_norm": 0.707105361715009, "learning_rate": 5.73300892133009e-06, "loss": 0.6404, "step": 25415 }, { "epoch": 0.7420513269685557, "grad_norm": 0.6003375894162022, "learning_rate": 5.732360097323601e-06, "loss": 0.5389, "step": 25416 }, { "epoch": 0.742080523196403, "grad_norm": 0.6581467394202114, "learning_rate": 5.731711273317113e-06, "loss": 0.5722, "step": 25417 }, { "epoch": 0.7421097194242504, "grad_norm": 0.5975243542845108, "learning_rate": 5.7310624493106245e-06, "loss": 0.5344, "step": 25418 }, { "epoch": 0.7421389156520978, "grad_norm": 0.6343810712944109, "learning_rate": 5.7304136253041365e-06, "loss": 0.5689, "step": 25419 }, { "epoch": 0.7421681118799451, "grad_norm": 0.6621966734210295, "learning_rate": 5.7297648012976485e-06, "loss": 0.5976, "step": 25420 }, { "epoch": 0.7421973081077925, "grad_norm": 0.6480258556975924, "learning_rate": 5.72911597729116e-06, "loss": 0.6127, "step": 25421 }, { "epoch": 0.7422265043356399, "grad_norm": 0.6417508356853483, "learning_rate": 5.7284671532846726e-06, "loss": 0.6021, "step": 25422 }, { "epoch": 0.7422557005634872, "grad_norm": 0.6294591714772371, "learning_rate": 5.727818329278184e-06, "loss": 0.5859, "step": 25423 }, { "epoch": 0.7422848967913346, "grad_norm": 0.5621448764968777, "learning_rate": 5.727169505271696e-06, "loss": 0.4759, "step": 25424 }, { "epoch": 0.7423140930191819, "grad_norm": 0.5845723227020545, "learning_rate": 5.726520681265208e-06, "loss": 0.5152, "step": 25425 }, { "epoch": 0.7423432892470293, "grad_norm": 0.7047249203177504, "learning_rate": 5.725871857258719e-06, "loss": 0.5591, "step": 25426 }, { "epoch": 0.7423724854748767, "grad_norm": 0.6517295174084504, "learning_rate": 5.725223033252231e-06, "loss": 0.5939, "step": 25427 }, { "epoch": 0.742401681702724, "grad_norm": 0.6033256291651647, "learning_rate": 5.724574209245742e-06, "loss": 0.5689, "step": 25428 }, { "epoch": 0.7424308779305714, "grad_norm": 0.6197235909708624, "learning_rate": 5.723925385239254e-06, "loss": 0.5383, "step": 25429 }, { "epoch": 0.7424600741584187, "grad_norm": 0.6710378055961997, "learning_rate": 5.723276561232765e-06, "loss": 0.6506, "step": 25430 }, { "epoch": 0.7424892703862661, "grad_norm": 0.6049789650711042, "learning_rate": 5.7226277372262774e-06, "loss": 0.516, "step": 25431 }, { "epoch": 0.7425184666141135, "grad_norm": 0.6363620088991648, "learning_rate": 5.72197891321979e-06, "loss": 0.5652, "step": 25432 }, { "epoch": 0.7425476628419608, "grad_norm": 0.6302773759084022, "learning_rate": 5.7213300892133015e-06, "loss": 0.5521, "step": 25433 }, { "epoch": 0.7425768590698082, "grad_norm": 0.6673944058460123, "learning_rate": 5.7206812652068135e-06, "loss": 0.6605, "step": 25434 }, { "epoch": 0.7426060552976556, "grad_norm": 0.6751281230185807, "learning_rate": 5.720032441200325e-06, "loss": 0.668, "step": 25435 }, { "epoch": 0.7426352515255029, "grad_norm": 0.6656191029816441, "learning_rate": 5.719383617193837e-06, "loss": 0.6447, "step": 25436 }, { "epoch": 0.7426644477533503, "grad_norm": 0.6438694826131084, "learning_rate": 5.718734793187348e-06, "loss": 0.6253, "step": 25437 }, { "epoch": 0.7426936439811976, "grad_norm": 0.7485701335239902, "learning_rate": 5.71808596918086e-06, "loss": 0.631, "step": 25438 }, { "epoch": 0.742722840209045, "grad_norm": 0.6803941706510358, "learning_rate": 5.717437145174372e-06, "loss": 0.6283, "step": 25439 }, { "epoch": 0.7427520364368924, "grad_norm": 0.6343988938341196, "learning_rate": 5.716788321167883e-06, "loss": 0.5456, "step": 25440 }, { "epoch": 0.7427812326647397, "grad_norm": 0.7204291374000625, "learning_rate": 5.716139497161396e-06, "loss": 0.6804, "step": 25441 }, { "epoch": 0.7428104288925871, "grad_norm": 0.6682860771421604, "learning_rate": 5.715490673154907e-06, "loss": 0.6288, "step": 25442 }, { "epoch": 0.7428396251204344, "grad_norm": 0.6462950855371649, "learning_rate": 5.714841849148419e-06, "loss": 0.6074, "step": 25443 }, { "epoch": 0.7428688213482818, "grad_norm": 0.6552318546208882, "learning_rate": 5.714193025141931e-06, "loss": 0.6241, "step": 25444 }, { "epoch": 0.7428980175761292, "grad_norm": 0.6595762326915198, "learning_rate": 5.713544201135442e-06, "loss": 0.6218, "step": 25445 }, { "epoch": 0.7429272138039765, "grad_norm": 0.660828253233185, "learning_rate": 5.7128953771289544e-06, "loss": 0.5847, "step": 25446 }, { "epoch": 0.7429564100318239, "grad_norm": 0.609426325571169, "learning_rate": 5.712246553122466e-06, "loss": 0.5586, "step": 25447 }, { "epoch": 0.7429856062596712, "grad_norm": 0.6015808842904646, "learning_rate": 5.711597729115978e-06, "loss": 0.5405, "step": 25448 }, { "epoch": 0.7430148024875186, "grad_norm": 0.7511089374914434, "learning_rate": 5.710948905109489e-06, "loss": 0.6151, "step": 25449 }, { "epoch": 0.743043998715366, "grad_norm": 0.6380337565060297, "learning_rate": 5.710300081103002e-06, "loss": 0.5641, "step": 25450 }, { "epoch": 0.7430731949432133, "grad_norm": 0.613264732441139, "learning_rate": 5.709651257096514e-06, "loss": 0.5718, "step": 25451 }, { "epoch": 0.7431023911710607, "grad_norm": 0.6217489883136205, "learning_rate": 5.709002433090025e-06, "loss": 0.5755, "step": 25452 }, { "epoch": 0.7431315873989081, "grad_norm": 0.665356095604888, "learning_rate": 5.708353609083537e-06, "loss": 0.6532, "step": 25453 }, { "epoch": 0.7431607836267554, "grad_norm": 0.6040321154845695, "learning_rate": 5.707704785077048e-06, "loss": 0.5133, "step": 25454 }, { "epoch": 0.7431899798546028, "grad_norm": 0.67322620497533, "learning_rate": 5.70705596107056e-06, "loss": 0.6698, "step": 25455 }, { "epoch": 0.7432191760824501, "grad_norm": 0.6788436092073261, "learning_rate": 5.706407137064071e-06, "loss": 0.662, "step": 25456 }, { "epoch": 0.7432483723102975, "grad_norm": 0.6576834264999818, "learning_rate": 5.705758313057583e-06, "loss": 0.6285, "step": 25457 }, { "epoch": 0.7432775685381449, "grad_norm": 0.6355180072029987, "learning_rate": 5.7051094890510945e-06, "loss": 0.5858, "step": 25458 }, { "epoch": 0.7433067647659922, "grad_norm": 0.5892269126644286, "learning_rate": 5.7044606650446066e-06, "loss": 0.5188, "step": 25459 }, { "epoch": 0.7433359609938396, "grad_norm": 0.577802106670226, "learning_rate": 5.7038118410381194e-06, "loss": 0.4944, "step": 25460 }, { "epoch": 0.743365157221687, "grad_norm": 0.6338543427317983, "learning_rate": 5.703163017031631e-06, "loss": 0.614, "step": 25461 }, { "epoch": 0.7433943534495343, "grad_norm": 0.6845077787100871, "learning_rate": 5.702514193025143e-06, "loss": 0.6367, "step": 25462 }, { "epoch": 0.7434235496773817, "grad_norm": 0.6356477501898876, "learning_rate": 5.701865369018655e-06, "loss": 0.5697, "step": 25463 }, { "epoch": 0.743452745905229, "grad_norm": 0.7038917221581357, "learning_rate": 5.701216545012166e-06, "loss": 0.6666, "step": 25464 }, { "epoch": 0.7434819421330764, "grad_norm": 0.6374036791475973, "learning_rate": 5.700567721005678e-06, "loss": 0.5954, "step": 25465 }, { "epoch": 0.7435111383609238, "grad_norm": 0.6123591223842755, "learning_rate": 5.699918896999189e-06, "loss": 0.5556, "step": 25466 }, { "epoch": 0.7435403345887711, "grad_norm": 0.6069961055826119, "learning_rate": 5.699270072992701e-06, "loss": 0.5514, "step": 25467 }, { "epoch": 0.7435695308166185, "grad_norm": 0.6500972188252623, "learning_rate": 5.698621248986212e-06, "loss": 0.6142, "step": 25468 }, { "epoch": 0.7435987270444658, "grad_norm": 0.6443460340123655, "learning_rate": 5.697972424979725e-06, "loss": 0.6033, "step": 25469 }, { "epoch": 0.7436279232723132, "grad_norm": 0.6528500310518022, "learning_rate": 5.697323600973237e-06, "loss": 0.6145, "step": 25470 }, { "epoch": 0.7436571195001606, "grad_norm": 0.6660786000204976, "learning_rate": 5.696674776966748e-06, "loss": 0.6406, "step": 25471 }, { "epoch": 0.7436863157280079, "grad_norm": 0.6359265059847299, "learning_rate": 5.69602595296026e-06, "loss": 0.6099, "step": 25472 }, { "epoch": 0.7437155119558553, "grad_norm": 0.6794340293419047, "learning_rate": 5.6953771289537716e-06, "loss": 0.6179, "step": 25473 }, { "epoch": 0.7437447081837026, "grad_norm": 0.6659684659891705, "learning_rate": 5.694728304947284e-06, "loss": 0.6242, "step": 25474 }, { "epoch": 0.74377390441155, "grad_norm": 0.6237785181877127, "learning_rate": 5.694079480940795e-06, "loss": 0.5506, "step": 25475 }, { "epoch": 0.7438031006393974, "grad_norm": 0.6225435038227105, "learning_rate": 5.693430656934307e-06, "loss": 0.5802, "step": 25476 }, { "epoch": 0.7438322968672447, "grad_norm": 0.6302479576787992, "learning_rate": 5.692781832927818e-06, "loss": 0.5592, "step": 25477 }, { "epoch": 0.7438614930950921, "grad_norm": 0.639221972788117, "learning_rate": 5.69213300892133e-06, "loss": 0.6069, "step": 25478 }, { "epoch": 0.7438906893229394, "grad_norm": 0.6235218065875466, "learning_rate": 5.691484184914843e-06, "loss": 0.5815, "step": 25479 }, { "epoch": 0.7439198855507868, "grad_norm": 0.6966748096507234, "learning_rate": 5.690835360908354e-06, "loss": 0.6926, "step": 25480 }, { "epoch": 0.7439490817786342, "grad_norm": 0.6586307119766414, "learning_rate": 5.690186536901866e-06, "loss": 0.6456, "step": 25481 }, { "epoch": 0.7439782780064815, "grad_norm": 0.6529502425206102, "learning_rate": 5.689537712895378e-06, "loss": 0.6071, "step": 25482 }, { "epoch": 0.7440074742343289, "grad_norm": 0.6464639143206942, "learning_rate": 5.688888888888889e-06, "loss": 0.6039, "step": 25483 }, { "epoch": 0.7440366704621763, "grad_norm": 0.6887748164175207, "learning_rate": 5.688240064882401e-06, "loss": 0.6316, "step": 25484 }, { "epoch": 0.7440658666900236, "grad_norm": 0.6606678616904014, "learning_rate": 5.6875912408759125e-06, "loss": 0.5968, "step": 25485 }, { "epoch": 0.744095062917871, "grad_norm": 0.6344954275730871, "learning_rate": 5.6869424168694245e-06, "loss": 0.5973, "step": 25486 }, { "epoch": 0.7441242591457183, "grad_norm": 0.6311825440267868, "learning_rate": 5.686293592862936e-06, "loss": 0.5525, "step": 25487 }, { "epoch": 0.7441534553735657, "grad_norm": 0.6689255391798732, "learning_rate": 5.685644768856449e-06, "loss": 0.6058, "step": 25488 }, { "epoch": 0.7441826516014131, "grad_norm": 0.6753296654584595, "learning_rate": 5.684995944849961e-06, "loss": 0.6069, "step": 25489 }, { "epoch": 0.7442118478292604, "grad_norm": 0.642797762828914, "learning_rate": 5.684347120843472e-06, "loss": 0.5892, "step": 25490 }, { "epoch": 0.7442410440571078, "grad_norm": 0.6440596169904358, "learning_rate": 5.683698296836984e-06, "loss": 0.6258, "step": 25491 }, { "epoch": 0.7442702402849551, "grad_norm": 0.6885390426346288, "learning_rate": 5.683049472830495e-06, "loss": 0.6589, "step": 25492 }, { "epoch": 0.7442994365128025, "grad_norm": 0.7132526062248146, "learning_rate": 5.682400648824007e-06, "loss": 0.6868, "step": 25493 }, { "epoch": 0.7443286327406499, "grad_norm": 0.6757941249786882, "learning_rate": 5.681751824817518e-06, "loss": 0.6842, "step": 25494 }, { "epoch": 0.7443578289684972, "grad_norm": 0.713542763558633, "learning_rate": 5.68110300081103e-06, "loss": 0.6632, "step": 25495 }, { "epoch": 0.7443870251963446, "grad_norm": 0.6638358140500255, "learning_rate": 5.680454176804541e-06, "loss": 0.5876, "step": 25496 }, { "epoch": 0.744416221424192, "grad_norm": 0.6548722435483101, "learning_rate": 5.6798053527980534e-06, "loss": 0.6097, "step": 25497 }, { "epoch": 0.7444454176520393, "grad_norm": 0.6798480540202879, "learning_rate": 5.679156528791566e-06, "loss": 0.6143, "step": 25498 }, { "epoch": 0.7444746138798867, "grad_norm": 0.6266890213069464, "learning_rate": 5.6785077047850775e-06, "loss": 0.5637, "step": 25499 }, { "epoch": 0.744503810107734, "grad_norm": 0.6744997626585862, "learning_rate": 5.6778588807785895e-06, "loss": 0.6324, "step": 25500 }, { "epoch": 0.7445330063355814, "grad_norm": 0.6044980366859353, "learning_rate": 5.6772100567721015e-06, "loss": 0.541, "step": 25501 }, { "epoch": 0.7445622025634288, "grad_norm": 0.6608783524452793, "learning_rate": 5.676561232765613e-06, "loss": 0.6484, "step": 25502 }, { "epoch": 0.7445913987912761, "grad_norm": 0.6820545036889768, "learning_rate": 5.675912408759125e-06, "loss": 0.5994, "step": 25503 }, { "epoch": 0.7446205950191235, "grad_norm": 0.5957199790333949, "learning_rate": 5.675263584752636e-06, "loss": 0.5066, "step": 25504 }, { "epoch": 0.7446497912469708, "grad_norm": 0.6796988794012859, "learning_rate": 5.674614760746148e-06, "loss": 0.6187, "step": 25505 }, { "epoch": 0.7446789874748182, "grad_norm": 0.6647275382145563, "learning_rate": 5.673965936739659e-06, "loss": 0.6165, "step": 25506 }, { "epoch": 0.7447081837026656, "grad_norm": 0.6204856499386745, "learning_rate": 5.673317112733172e-06, "loss": 0.5355, "step": 25507 }, { "epoch": 0.744737379930513, "grad_norm": 0.6711711984250229, "learning_rate": 5.672668288726684e-06, "loss": 0.6231, "step": 25508 }, { "epoch": 0.7447665761583604, "grad_norm": 0.5798631416420301, "learning_rate": 5.672019464720195e-06, "loss": 0.5085, "step": 25509 }, { "epoch": 0.7447957723862078, "grad_norm": 0.6888441139394684, "learning_rate": 5.671370640713707e-06, "loss": 0.6184, "step": 25510 }, { "epoch": 0.7448249686140551, "grad_norm": 0.6733427854004778, "learning_rate": 5.6707218167072184e-06, "loss": 0.6465, "step": 25511 }, { "epoch": 0.7448541648419025, "grad_norm": 0.600631925370526, "learning_rate": 5.6700729927007305e-06, "loss": 0.512, "step": 25512 }, { "epoch": 0.7448833610697498, "grad_norm": 0.6893661287191195, "learning_rate": 5.669424168694242e-06, "loss": 0.6444, "step": 25513 }, { "epoch": 0.7449125572975972, "grad_norm": 0.6901844664649665, "learning_rate": 5.668775344687754e-06, "loss": 0.6661, "step": 25514 }, { "epoch": 0.7449417535254446, "grad_norm": 0.6175217332672844, "learning_rate": 5.668126520681265e-06, "loss": 0.5339, "step": 25515 }, { "epoch": 0.7449709497532919, "grad_norm": 0.6876535864412748, "learning_rate": 5.667477696674778e-06, "loss": 0.6522, "step": 25516 }, { "epoch": 0.7450001459811393, "grad_norm": 0.6188086355272645, "learning_rate": 5.66682887266829e-06, "loss": 0.56, "step": 25517 }, { "epoch": 0.7450293422089866, "grad_norm": 0.6175364118281289, "learning_rate": 5.666180048661801e-06, "loss": 0.5535, "step": 25518 }, { "epoch": 0.745058538436834, "grad_norm": 0.622722352283719, "learning_rate": 5.665531224655313e-06, "loss": 0.5686, "step": 25519 }, { "epoch": 0.7450877346646814, "grad_norm": 0.6730932013155491, "learning_rate": 5.664882400648824e-06, "loss": 0.6918, "step": 25520 }, { "epoch": 0.7451169308925287, "grad_norm": 0.6480983666418858, "learning_rate": 5.664233576642336e-06, "loss": 0.5886, "step": 25521 }, { "epoch": 0.7451461271203761, "grad_norm": 0.6162432453675366, "learning_rate": 5.663584752635848e-06, "loss": 0.6026, "step": 25522 }, { "epoch": 0.7451753233482235, "grad_norm": 0.5788638690548856, "learning_rate": 5.662935928629359e-06, "loss": 0.5311, "step": 25523 }, { "epoch": 0.7452045195760708, "grad_norm": 0.6213720614982846, "learning_rate": 5.662287104622871e-06, "loss": 0.5359, "step": 25524 }, { "epoch": 0.7452337158039182, "grad_norm": 0.6430827982197951, "learning_rate": 5.6616382806163826e-06, "loss": 0.5485, "step": 25525 }, { "epoch": 0.7452629120317655, "grad_norm": 0.6384725480657006, "learning_rate": 5.6609894566098954e-06, "loss": 0.6036, "step": 25526 }, { "epoch": 0.7452921082596129, "grad_norm": 0.6524352211432465, "learning_rate": 5.6603406326034075e-06, "loss": 0.6429, "step": 25527 }, { "epoch": 0.7453213044874603, "grad_norm": 0.6437175744406237, "learning_rate": 5.659691808596919e-06, "loss": 0.6119, "step": 25528 }, { "epoch": 0.7453505007153076, "grad_norm": 0.5902904922436486, "learning_rate": 5.659042984590431e-06, "loss": 0.5414, "step": 25529 }, { "epoch": 0.745379696943155, "grad_norm": 0.6084559752941363, "learning_rate": 5.658394160583942e-06, "loss": 0.56, "step": 25530 }, { "epoch": 0.7454088931710023, "grad_norm": 0.6066600187510383, "learning_rate": 5.657745336577454e-06, "loss": 0.5393, "step": 25531 }, { "epoch": 0.7454380893988497, "grad_norm": 0.6574885499984874, "learning_rate": 5.657096512570965e-06, "loss": 0.6506, "step": 25532 }, { "epoch": 0.7454672856266971, "grad_norm": 0.6810916167588498, "learning_rate": 5.656447688564477e-06, "loss": 0.6428, "step": 25533 }, { "epoch": 0.7454964818545444, "grad_norm": 0.6404984424713549, "learning_rate": 5.655798864557988e-06, "loss": 0.646, "step": 25534 }, { "epoch": 0.7455256780823918, "grad_norm": 0.6366851641051753, "learning_rate": 5.655150040551501e-06, "loss": 0.6247, "step": 25535 }, { "epoch": 0.7455548743102391, "grad_norm": 0.6209370707863354, "learning_rate": 5.654501216545013e-06, "loss": 0.5495, "step": 25536 }, { "epoch": 0.7455840705380865, "grad_norm": 0.6491872174659213, "learning_rate": 5.653852392538524e-06, "loss": 0.6363, "step": 25537 }, { "epoch": 0.7456132667659339, "grad_norm": 0.6155826726718008, "learning_rate": 5.653203568532036e-06, "loss": 0.5443, "step": 25538 }, { "epoch": 0.7456424629937812, "grad_norm": 0.6571864260341055, "learning_rate": 5.6525547445255476e-06, "loss": 0.6502, "step": 25539 }, { "epoch": 0.7456716592216286, "grad_norm": 0.7009668503944217, "learning_rate": 5.65190592051906e-06, "loss": 0.6202, "step": 25540 }, { "epoch": 0.745700855449476, "grad_norm": 0.6690117705696238, "learning_rate": 5.651257096512572e-06, "loss": 0.6024, "step": 25541 }, { "epoch": 0.7457300516773233, "grad_norm": 0.6518898875051798, "learning_rate": 5.650608272506083e-06, "loss": 0.6069, "step": 25542 }, { "epoch": 0.7457592479051707, "grad_norm": 0.6282768692929325, "learning_rate": 5.649959448499595e-06, "loss": 0.5745, "step": 25543 }, { "epoch": 0.745788444133018, "grad_norm": 0.6633601339080776, "learning_rate": 5.649310624493106e-06, "loss": 0.645, "step": 25544 }, { "epoch": 0.7458176403608654, "grad_norm": 0.6729902645826845, "learning_rate": 5.648661800486619e-06, "loss": 0.6143, "step": 25545 }, { "epoch": 0.7458468365887128, "grad_norm": 0.6828481757948489, "learning_rate": 5.648012976480131e-06, "loss": 0.649, "step": 25546 }, { "epoch": 0.7458760328165601, "grad_norm": 0.7012907130675115, "learning_rate": 5.647364152473642e-06, "loss": 0.6274, "step": 25547 }, { "epoch": 0.7459052290444075, "grad_norm": 0.6908761581518632, "learning_rate": 5.646715328467154e-06, "loss": 0.7342, "step": 25548 }, { "epoch": 0.7459344252722548, "grad_norm": 0.6719305198243832, "learning_rate": 5.646066504460665e-06, "loss": 0.6637, "step": 25549 }, { "epoch": 0.7459636215001022, "grad_norm": 0.6330455284907356, "learning_rate": 5.645417680454177e-06, "loss": 0.5829, "step": 25550 }, { "epoch": 0.7459928177279496, "grad_norm": 0.642718306331064, "learning_rate": 5.6447688564476885e-06, "loss": 0.587, "step": 25551 }, { "epoch": 0.7460220139557969, "grad_norm": 0.6346967618029685, "learning_rate": 5.6441200324412005e-06, "loss": 0.5911, "step": 25552 }, { "epoch": 0.7460512101836443, "grad_norm": 0.6804579370672941, "learning_rate": 5.643471208434712e-06, "loss": 0.6614, "step": 25553 }, { "epoch": 0.7460804064114916, "grad_norm": 0.6534532687478694, "learning_rate": 5.642822384428225e-06, "loss": 0.5337, "step": 25554 }, { "epoch": 0.746109602639339, "grad_norm": 0.6375176278126728, "learning_rate": 5.642173560421737e-06, "loss": 0.5629, "step": 25555 }, { "epoch": 0.7461387988671864, "grad_norm": 0.6276591660717799, "learning_rate": 5.641524736415248e-06, "loss": 0.5836, "step": 25556 }, { "epoch": 0.7461679950950337, "grad_norm": 0.710802740715789, "learning_rate": 5.64087591240876e-06, "loss": 0.6256, "step": 25557 }, { "epoch": 0.7461971913228811, "grad_norm": 0.6503590677082982, "learning_rate": 5.640227088402271e-06, "loss": 0.5972, "step": 25558 }, { "epoch": 0.7462263875507285, "grad_norm": 0.7018541928617413, "learning_rate": 5.639578264395783e-06, "loss": 0.6959, "step": 25559 }, { "epoch": 0.7462555837785758, "grad_norm": 0.6420299690654645, "learning_rate": 5.638929440389295e-06, "loss": 0.5921, "step": 25560 }, { "epoch": 0.7462847800064232, "grad_norm": 0.624146300303066, "learning_rate": 5.638280616382806e-06, "loss": 0.5677, "step": 25561 }, { "epoch": 0.7463139762342705, "grad_norm": 0.631241208037669, "learning_rate": 5.637631792376318e-06, "loss": 0.6174, "step": 25562 }, { "epoch": 0.7463431724621179, "grad_norm": 0.6529250031952354, "learning_rate": 5.6369829683698294e-06, "loss": 0.622, "step": 25563 }, { "epoch": 0.7463723686899653, "grad_norm": 0.6555707914973773, "learning_rate": 5.636334144363342e-06, "loss": 0.6473, "step": 25564 }, { "epoch": 0.7464015649178126, "grad_norm": 0.6405205121417534, "learning_rate": 5.635685320356854e-06, "loss": 0.5807, "step": 25565 }, { "epoch": 0.74643076114566, "grad_norm": 0.701965057275322, "learning_rate": 5.6350364963503655e-06, "loss": 0.6432, "step": 25566 }, { "epoch": 0.7464599573735073, "grad_norm": 0.6308352892872233, "learning_rate": 5.6343876723438776e-06, "loss": 0.6308, "step": 25567 }, { "epoch": 0.7464891536013547, "grad_norm": 0.6120563168065603, "learning_rate": 5.633738848337389e-06, "loss": 0.5421, "step": 25568 }, { "epoch": 0.7465183498292021, "grad_norm": 0.6130869586810976, "learning_rate": 5.633090024330901e-06, "loss": 0.559, "step": 25569 }, { "epoch": 0.7465475460570494, "grad_norm": 0.628625798083448, "learning_rate": 5.632441200324412e-06, "loss": 0.582, "step": 25570 }, { "epoch": 0.7465767422848968, "grad_norm": 0.6480451189648724, "learning_rate": 5.631792376317924e-06, "loss": 0.557, "step": 25571 }, { "epoch": 0.7466059385127441, "grad_norm": 0.6096009934661665, "learning_rate": 5.631143552311435e-06, "loss": 0.5357, "step": 25572 }, { "epoch": 0.7466351347405915, "grad_norm": 0.6724062917679241, "learning_rate": 5.630494728304948e-06, "loss": 0.6214, "step": 25573 }, { "epoch": 0.7466643309684389, "grad_norm": 0.6956250106702508, "learning_rate": 5.62984590429846e-06, "loss": 0.632, "step": 25574 }, { "epoch": 0.7466935271962862, "grad_norm": 0.6405942874481539, "learning_rate": 5.629197080291971e-06, "loss": 0.5791, "step": 25575 }, { "epoch": 0.7467227234241336, "grad_norm": 0.6092219937830811, "learning_rate": 5.628548256285483e-06, "loss": 0.536, "step": 25576 }, { "epoch": 0.746751919651981, "grad_norm": 0.6461278672488908, "learning_rate": 5.6278994322789944e-06, "loss": 0.5833, "step": 25577 }, { "epoch": 0.7467811158798283, "grad_norm": 0.6148078648178802, "learning_rate": 5.6272506082725065e-06, "loss": 0.5743, "step": 25578 }, { "epoch": 0.7468103121076757, "grad_norm": 0.6102546378179462, "learning_rate": 5.6266017842660185e-06, "loss": 0.584, "step": 25579 }, { "epoch": 0.746839508335523, "grad_norm": 0.6483817017564151, "learning_rate": 5.62595296025953e-06, "loss": 0.6203, "step": 25580 }, { "epoch": 0.7468687045633704, "grad_norm": 0.6380241276975153, "learning_rate": 5.625304136253042e-06, "loss": 0.5848, "step": 25581 }, { "epoch": 0.7468979007912178, "grad_norm": 0.6686874499154097, "learning_rate": 5.624655312246553e-06, "loss": 0.6099, "step": 25582 }, { "epoch": 0.7469270970190651, "grad_norm": 0.6300140060534777, "learning_rate": 5.624006488240066e-06, "loss": 0.5835, "step": 25583 }, { "epoch": 0.7469562932469125, "grad_norm": 0.6949010928830491, "learning_rate": 5.623357664233578e-06, "loss": 0.6539, "step": 25584 }, { "epoch": 0.7469854894747598, "grad_norm": 0.5682735705011822, "learning_rate": 5.622708840227089e-06, "loss": 0.5128, "step": 25585 }, { "epoch": 0.7470146857026072, "grad_norm": 0.6266640524494401, "learning_rate": 5.622060016220601e-06, "loss": 0.573, "step": 25586 }, { "epoch": 0.7470438819304546, "grad_norm": 0.6272205044206245, "learning_rate": 5.621411192214112e-06, "loss": 0.5744, "step": 25587 }, { "epoch": 0.7470730781583019, "grad_norm": 0.6673106619529282, "learning_rate": 5.620762368207624e-06, "loss": 0.6157, "step": 25588 }, { "epoch": 0.7471022743861493, "grad_norm": 0.623029755825458, "learning_rate": 5.620113544201135e-06, "loss": 0.5629, "step": 25589 }, { "epoch": 0.7471314706139967, "grad_norm": 0.6288505986677234, "learning_rate": 5.619464720194647e-06, "loss": 0.5443, "step": 25590 }, { "epoch": 0.747160666841844, "grad_norm": 0.6560325310299607, "learning_rate": 5.618815896188159e-06, "loss": 0.6027, "step": 25591 }, { "epoch": 0.7471898630696914, "grad_norm": 0.6152855571850939, "learning_rate": 5.6181670721816715e-06, "loss": 0.5553, "step": 25592 }, { "epoch": 0.7472190592975387, "grad_norm": 0.6163924543972041, "learning_rate": 5.6175182481751835e-06, "loss": 0.5701, "step": 25593 }, { "epoch": 0.7472482555253861, "grad_norm": 0.6535321781806335, "learning_rate": 5.616869424168695e-06, "loss": 0.6417, "step": 25594 }, { "epoch": 0.7472774517532335, "grad_norm": 0.6068354799927336, "learning_rate": 5.616220600162207e-06, "loss": 0.5166, "step": 25595 }, { "epoch": 0.7473066479810808, "grad_norm": 0.637549254799743, "learning_rate": 5.615571776155718e-06, "loss": 0.5749, "step": 25596 }, { "epoch": 0.7473358442089282, "grad_norm": 0.6464846026371776, "learning_rate": 5.61492295214923e-06, "loss": 0.6167, "step": 25597 }, { "epoch": 0.7473650404367755, "grad_norm": 0.6743518936685228, "learning_rate": 5.614274128142742e-06, "loss": 0.6527, "step": 25598 }, { "epoch": 0.7473942366646229, "grad_norm": 0.710814447768739, "learning_rate": 5.613625304136253e-06, "loss": 0.6921, "step": 25599 }, { "epoch": 0.7474234328924703, "grad_norm": 0.6117938341650092, "learning_rate": 5.612976480129765e-06, "loss": 0.5488, "step": 25600 }, { "epoch": 0.7474526291203176, "grad_norm": 0.6502792299210379, "learning_rate": 5.612327656123277e-06, "loss": 0.616, "step": 25601 }, { "epoch": 0.747481825348165, "grad_norm": 0.6891081783459925, "learning_rate": 5.611678832116789e-06, "loss": 0.6331, "step": 25602 }, { "epoch": 0.7475110215760123, "grad_norm": 0.6535197796297603, "learning_rate": 5.611030008110301e-06, "loss": 0.6642, "step": 25603 }, { "epoch": 0.7475402178038597, "grad_norm": 0.6312986013895062, "learning_rate": 5.610381184103812e-06, "loss": 0.5722, "step": 25604 }, { "epoch": 0.7475694140317071, "grad_norm": 0.671921532322733, "learning_rate": 5.609732360097324e-06, "loss": 0.6157, "step": 25605 }, { "epoch": 0.7475986102595544, "grad_norm": 0.6379054602042122, "learning_rate": 5.609083536090836e-06, "loss": 0.6051, "step": 25606 }, { "epoch": 0.7476278064874018, "grad_norm": 0.6327363143736396, "learning_rate": 5.608434712084348e-06, "loss": 0.5949, "step": 25607 }, { "epoch": 0.7476570027152492, "grad_norm": 0.6607722012628445, "learning_rate": 5.607785888077859e-06, "loss": 0.5646, "step": 25608 }, { "epoch": 0.7476861989430965, "grad_norm": 0.645823652374784, "learning_rate": 5.607137064071371e-06, "loss": 0.5975, "step": 25609 }, { "epoch": 0.7477153951709439, "grad_norm": 0.6522977350353181, "learning_rate": 5.606488240064882e-06, "loss": 0.573, "step": 25610 }, { "epoch": 0.7477445913987912, "grad_norm": 0.6095683506643, "learning_rate": 5.605839416058395e-06, "loss": 0.5791, "step": 25611 }, { "epoch": 0.7477737876266386, "grad_norm": 0.6791010094966194, "learning_rate": 5.605190592051907e-06, "loss": 0.664, "step": 25612 }, { "epoch": 0.747802983854486, "grad_norm": 0.671539217276263, "learning_rate": 5.604541768045418e-06, "loss": 0.6575, "step": 25613 }, { "epoch": 0.7478321800823333, "grad_norm": 0.683749382711235, "learning_rate": 5.60389294403893e-06, "loss": 0.6529, "step": 25614 }, { "epoch": 0.7478613763101807, "grad_norm": 0.6090904827685409, "learning_rate": 5.603244120032441e-06, "loss": 0.5683, "step": 25615 }, { "epoch": 0.747890572538028, "grad_norm": 0.6274571776326205, "learning_rate": 5.602595296025953e-06, "loss": 0.5796, "step": 25616 }, { "epoch": 0.7479197687658754, "grad_norm": 0.6413691254863118, "learning_rate": 5.601946472019465e-06, "loss": 0.6154, "step": 25617 }, { "epoch": 0.7479489649937228, "grad_norm": 0.6282480115753714, "learning_rate": 5.6012976480129765e-06, "loss": 0.57, "step": 25618 }, { "epoch": 0.7479781612215701, "grad_norm": 0.657344123808757, "learning_rate": 5.6006488240064886e-06, "loss": 0.6143, "step": 25619 }, { "epoch": 0.7480073574494175, "grad_norm": 0.6356079899869632, "learning_rate": 5.600000000000001e-06, "loss": 0.582, "step": 25620 }, { "epoch": 0.7480365536772648, "grad_norm": 0.6512731520100977, "learning_rate": 5.599351175993513e-06, "loss": 0.5748, "step": 25621 }, { "epoch": 0.7480657499051122, "grad_norm": 0.6330320263325202, "learning_rate": 5.598702351987025e-06, "loss": 0.5679, "step": 25622 }, { "epoch": 0.7480949461329596, "grad_norm": 0.6632786699163513, "learning_rate": 5.598053527980536e-06, "loss": 0.6336, "step": 25623 }, { "epoch": 0.7481241423608069, "grad_norm": 0.6686731805977986, "learning_rate": 5.597404703974048e-06, "loss": 0.6032, "step": 25624 }, { "epoch": 0.7481533385886543, "grad_norm": 0.6862924892314869, "learning_rate": 5.596755879967559e-06, "loss": 0.64, "step": 25625 }, { "epoch": 0.7481825348165017, "grad_norm": 0.6334003702267332, "learning_rate": 5.596107055961071e-06, "loss": 0.6254, "step": 25626 }, { "epoch": 0.748211731044349, "grad_norm": 0.6276990729220681, "learning_rate": 5.595458231954582e-06, "loss": 0.5303, "step": 25627 }, { "epoch": 0.7482409272721965, "grad_norm": 0.6173039040335178, "learning_rate": 5.594809407948094e-06, "loss": 0.5478, "step": 25628 }, { "epoch": 0.7482701235000438, "grad_norm": 0.6200699821066709, "learning_rate": 5.5941605839416055e-06, "loss": 0.586, "step": 25629 }, { "epoch": 0.7482993197278912, "grad_norm": 0.6585086369380333, "learning_rate": 5.593511759935118e-06, "loss": 0.6091, "step": 25630 }, { "epoch": 0.7483285159557386, "grad_norm": 0.7393813536321071, "learning_rate": 5.59286293592863e-06, "loss": 0.6242, "step": 25631 }, { "epoch": 0.7483577121835859, "grad_norm": 0.65099376205725, "learning_rate": 5.5922141119221415e-06, "loss": 0.6024, "step": 25632 }, { "epoch": 0.7483869084114333, "grad_norm": 0.6257247871744481, "learning_rate": 5.5915652879156536e-06, "loss": 0.5807, "step": 25633 }, { "epoch": 0.7484161046392807, "grad_norm": 0.705777492197339, "learning_rate": 5.590916463909165e-06, "loss": 0.7018, "step": 25634 }, { "epoch": 0.748445300867128, "grad_norm": 0.6775082190219985, "learning_rate": 5.590267639902677e-06, "loss": 0.5716, "step": 25635 }, { "epoch": 0.7484744970949754, "grad_norm": 0.6558289565146223, "learning_rate": 5.589618815896189e-06, "loss": 0.6375, "step": 25636 }, { "epoch": 0.7485036933228227, "grad_norm": 0.6118996960610089, "learning_rate": 5.5889699918897e-06, "loss": 0.5496, "step": 25637 }, { "epoch": 0.7485328895506701, "grad_norm": 0.6792418920311525, "learning_rate": 5.588321167883212e-06, "loss": 0.658, "step": 25638 }, { "epoch": 0.7485620857785175, "grad_norm": 0.5975257665307006, "learning_rate": 5.587672343876724e-06, "loss": 0.5434, "step": 25639 }, { "epoch": 0.7485912820063648, "grad_norm": 0.6306661567134646, "learning_rate": 5.587023519870236e-06, "loss": 0.623, "step": 25640 }, { "epoch": 0.7486204782342122, "grad_norm": 0.6396221991653428, "learning_rate": 5.586374695863748e-06, "loss": 0.6288, "step": 25641 }, { "epoch": 0.7486496744620595, "grad_norm": 0.6554337653596934, "learning_rate": 5.585725871857259e-06, "loss": 0.568, "step": 25642 }, { "epoch": 0.7486788706899069, "grad_norm": 0.6597133269575323, "learning_rate": 5.585077047850771e-06, "loss": 0.6544, "step": 25643 }, { "epoch": 0.7487080669177543, "grad_norm": 0.6174036749751084, "learning_rate": 5.5844282238442825e-06, "loss": 0.5742, "step": 25644 }, { "epoch": 0.7487372631456016, "grad_norm": 0.664984934944539, "learning_rate": 5.5837793998377945e-06, "loss": 0.6103, "step": 25645 }, { "epoch": 0.748766459373449, "grad_norm": 0.6646541695537969, "learning_rate": 5.583130575831306e-06, "loss": 0.6035, "step": 25646 }, { "epoch": 0.7487956556012964, "grad_norm": 0.6894520075825286, "learning_rate": 5.582481751824818e-06, "loss": 0.6856, "step": 25647 }, { "epoch": 0.7488248518291437, "grad_norm": 0.5672212287648171, "learning_rate": 5.581832927818329e-06, "loss": 0.4766, "step": 25648 }, { "epoch": 0.7488540480569911, "grad_norm": 0.6877126901053214, "learning_rate": 5.581184103811842e-06, "loss": 0.6284, "step": 25649 }, { "epoch": 0.7488832442848384, "grad_norm": 0.6219869240603357, "learning_rate": 5.580535279805354e-06, "loss": 0.5565, "step": 25650 }, { "epoch": 0.7489124405126858, "grad_norm": 0.6227629569480081, "learning_rate": 5.579886455798865e-06, "loss": 0.5789, "step": 25651 }, { "epoch": 0.7489416367405332, "grad_norm": 0.6621409441402579, "learning_rate": 5.579237631792377e-06, "loss": 0.6377, "step": 25652 }, { "epoch": 0.7489708329683805, "grad_norm": 0.6292671591897592, "learning_rate": 5.578588807785888e-06, "loss": 0.5869, "step": 25653 }, { "epoch": 0.7490000291962279, "grad_norm": 0.6606526919549269, "learning_rate": 5.5779399837794e-06, "loss": 0.5899, "step": 25654 }, { "epoch": 0.7490292254240752, "grad_norm": 0.6080445419280817, "learning_rate": 5.577291159772911e-06, "loss": 0.5879, "step": 25655 }, { "epoch": 0.7490584216519226, "grad_norm": 0.6534178057759572, "learning_rate": 5.576642335766423e-06, "loss": 0.631, "step": 25656 }, { "epoch": 0.74908761787977, "grad_norm": 0.6985820318467971, "learning_rate": 5.5759935117599354e-06, "loss": 0.6612, "step": 25657 }, { "epoch": 0.7491168141076173, "grad_norm": 0.6295615523516878, "learning_rate": 5.5753446877534475e-06, "loss": 0.5524, "step": 25658 }, { "epoch": 0.7491460103354647, "grad_norm": 0.6676727781360648, "learning_rate": 5.5746958637469595e-06, "loss": 0.5744, "step": 25659 }, { "epoch": 0.749175206563312, "grad_norm": 0.6609825693255366, "learning_rate": 5.5740470397404715e-06, "loss": 0.6137, "step": 25660 }, { "epoch": 0.7492044027911594, "grad_norm": 0.6697188198913797, "learning_rate": 5.573398215733983e-06, "loss": 0.6783, "step": 25661 }, { "epoch": 0.7492335990190068, "grad_norm": 0.6563731210597402, "learning_rate": 5.572749391727495e-06, "loss": 0.6002, "step": 25662 }, { "epoch": 0.7492627952468541, "grad_norm": 0.6400407887154905, "learning_rate": 5.572100567721006e-06, "loss": 0.6247, "step": 25663 }, { "epoch": 0.7492919914747015, "grad_norm": 0.6481944716612503, "learning_rate": 5.571451743714518e-06, "loss": 0.5839, "step": 25664 }, { "epoch": 0.7493211877025489, "grad_norm": 0.6343252051322132, "learning_rate": 5.570802919708029e-06, "loss": 0.5772, "step": 25665 }, { "epoch": 0.7493503839303962, "grad_norm": 0.6144879836735979, "learning_rate": 5.570154095701541e-06, "loss": 0.5395, "step": 25666 }, { "epoch": 0.7493795801582436, "grad_norm": 0.6243915657151123, "learning_rate": 5.569505271695052e-06, "loss": 0.5701, "step": 25667 }, { "epoch": 0.7494087763860909, "grad_norm": 0.6384932653955032, "learning_rate": 5.568856447688565e-06, "loss": 0.6242, "step": 25668 }, { "epoch": 0.7494379726139383, "grad_norm": 0.7554298625471567, "learning_rate": 5.568207623682077e-06, "loss": 0.7672, "step": 25669 }, { "epoch": 0.7494671688417857, "grad_norm": 0.6430293108407582, "learning_rate": 5.567558799675588e-06, "loss": 0.5956, "step": 25670 }, { "epoch": 0.749496365069633, "grad_norm": 0.6255131690014041, "learning_rate": 5.5669099756691004e-06, "loss": 0.5881, "step": 25671 }, { "epoch": 0.7495255612974804, "grad_norm": 0.5992268996848514, "learning_rate": 5.566261151662612e-06, "loss": 0.5444, "step": 25672 }, { "epoch": 0.7495547575253277, "grad_norm": 0.6196151643539498, "learning_rate": 5.565612327656124e-06, "loss": 0.5348, "step": 25673 }, { "epoch": 0.7495839537531751, "grad_norm": 0.6119838180403645, "learning_rate": 5.564963503649635e-06, "loss": 0.5593, "step": 25674 }, { "epoch": 0.7496131499810225, "grad_norm": 0.6912660727481537, "learning_rate": 5.564314679643147e-06, "loss": 0.6526, "step": 25675 }, { "epoch": 0.7496423462088698, "grad_norm": 0.6398382997681024, "learning_rate": 5.563665855636659e-06, "loss": 0.5576, "step": 25676 }, { "epoch": 0.7496715424367172, "grad_norm": 0.6561078431663521, "learning_rate": 5.563017031630171e-06, "loss": 0.5844, "step": 25677 }, { "epoch": 0.7497007386645645, "grad_norm": 0.5813430767147013, "learning_rate": 5.562368207623683e-06, "loss": 0.492, "step": 25678 }, { "epoch": 0.7497299348924119, "grad_norm": 0.6412343639366485, "learning_rate": 5.561719383617195e-06, "loss": 0.5966, "step": 25679 }, { "epoch": 0.7497591311202593, "grad_norm": 0.6484921314993891, "learning_rate": 5.561070559610706e-06, "loss": 0.6107, "step": 25680 }, { "epoch": 0.7497883273481066, "grad_norm": 0.6432951715358918, "learning_rate": 5.560421735604218e-06, "loss": 0.5567, "step": 25681 }, { "epoch": 0.749817523575954, "grad_norm": 0.6600118891816731, "learning_rate": 5.559772911597729e-06, "loss": 0.561, "step": 25682 }, { "epoch": 0.7498467198038014, "grad_norm": 0.6603456913197685, "learning_rate": 5.559124087591241e-06, "loss": 0.5839, "step": 25683 }, { "epoch": 0.7498759160316487, "grad_norm": 0.6541818643583059, "learning_rate": 5.5584752635847525e-06, "loss": 0.5891, "step": 25684 }, { "epoch": 0.7499051122594961, "grad_norm": 0.631348098258949, "learning_rate": 5.5578264395782646e-06, "loss": 0.6171, "step": 25685 }, { "epoch": 0.7499343084873434, "grad_norm": 0.6529947293336941, "learning_rate": 5.5571776155717774e-06, "loss": 0.6563, "step": 25686 }, { "epoch": 0.7499635047151908, "grad_norm": 0.6487134804447545, "learning_rate": 5.556528791565289e-06, "loss": 0.5179, "step": 25687 }, { "epoch": 0.7499927009430382, "grad_norm": 0.6420270181377778, "learning_rate": 5.555879967558801e-06, "loss": 0.6082, "step": 25688 }, { "epoch": 0.7500218971708855, "grad_norm": 0.6342789716691806, "learning_rate": 5.555231143552312e-06, "loss": 0.5786, "step": 25689 }, { "epoch": 0.7500510933987329, "grad_norm": 0.6270736707315402, "learning_rate": 5.554582319545824e-06, "loss": 0.5969, "step": 25690 }, { "epoch": 0.7500802896265802, "grad_norm": 0.6626628076683778, "learning_rate": 5.553933495539335e-06, "loss": 0.649, "step": 25691 }, { "epoch": 0.7501094858544276, "grad_norm": 0.7097670138314071, "learning_rate": 5.553284671532847e-06, "loss": 0.6707, "step": 25692 }, { "epoch": 0.750138682082275, "grad_norm": 0.6957580663920471, "learning_rate": 5.552635847526358e-06, "loss": 0.6709, "step": 25693 }, { "epoch": 0.7501678783101223, "grad_norm": 0.6472134347062385, "learning_rate": 5.55198702351987e-06, "loss": 0.608, "step": 25694 }, { "epoch": 0.7501970745379697, "grad_norm": 0.6464262180126403, "learning_rate": 5.551338199513382e-06, "loss": 0.6166, "step": 25695 }, { "epoch": 0.750226270765817, "grad_norm": 0.6716488491348143, "learning_rate": 5.550689375506894e-06, "loss": 0.5992, "step": 25696 }, { "epoch": 0.7502554669936644, "grad_norm": 0.7031863508996689, "learning_rate": 5.550040551500406e-06, "loss": 0.6273, "step": 25697 }, { "epoch": 0.7502846632215118, "grad_norm": 0.661178281037458, "learning_rate": 5.549391727493918e-06, "loss": 0.6614, "step": 25698 }, { "epoch": 0.7503138594493591, "grad_norm": 0.6249347127404961, "learning_rate": 5.5487429034874296e-06, "loss": 0.5825, "step": 25699 }, { "epoch": 0.7503430556772065, "grad_norm": 0.6241817177883431, "learning_rate": 5.548094079480942e-06, "loss": 0.595, "step": 25700 }, { "epoch": 0.7503722519050539, "grad_norm": 0.6607802286839792, "learning_rate": 5.547445255474453e-06, "loss": 0.5987, "step": 25701 }, { "epoch": 0.7504014481329012, "grad_norm": 0.673112514034753, "learning_rate": 5.546796431467965e-06, "loss": 0.6401, "step": 25702 }, { "epoch": 0.7504306443607486, "grad_norm": 0.6262376979530856, "learning_rate": 5.546147607461476e-06, "loss": 0.5406, "step": 25703 }, { "epoch": 0.7504598405885959, "grad_norm": 0.6410179338944878, "learning_rate": 5.545498783454988e-06, "loss": 0.5712, "step": 25704 }, { "epoch": 0.7504890368164433, "grad_norm": 0.6414756358749097, "learning_rate": 5.544849959448501e-06, "loss": 0.6022, "step": 25705 }, { "epoch": 0.7505182330442907, "grad_norm": 0.6966563307482134, "learning_rate": 5.544201135442012e-06, "loss": 0.6782, "step": 25706 }, { "epoch": 0.750547429272138, "grad_norm": 0.6140839930295403, "learning_rate": 5.543552311435524e-06, "loss": 0.5673, "step": 25707 }, { "epoch": 0.7505766254999854, "grad_norm": 0.6394796845384602, "learning_rate": 5.542903487429035e-06, "loss": 0.5843, "step": 25708 }, { "epoch": 0.7506058217278327, "grad_norm": 0.6440188783292323, "learning_rate": 5.542254663422547e-06, "loss": 0.6203, "step": 25709 }, { "epoch": 0.7506350179556801, "grad_norm": 0.6178046374141236, "learning_rate": 5.5416058394160585e-06, "loss": 0.5464, "step": 25710 }, { "epoch": 0.7506642141835275, "grad_norm": 0.5972608161068316, "learning_rate": 5.5409570154095705e-06, "loss": 0.5345, "step": 25711 }, { "epoch": 0.7506934104113748, "grad_norm": 0.6403815729723465, "learning_rate": 5.540308191403082e-06, "loss": 0.6013, "step": 25712 }, { "epoch": 0.7507226066392222, "grad_norm": 0.610250134107033, "learning_rate": 5.539659367396594e-06, "loss": 0.5632, "step": 25713 }, { "epoch": 0.7507518028670696, "grad_norm": 0.7406799124357316, "learning_rate": 5.539010543390106e-06, "loss": 0.7631, "step": 25714 }, { "epoch": 0.7507809990949169, "grad_norm": 0.6575020068345384, "learning_rate": 5.538361719383618e-06, "loss": 0.5924, "step": 25715 }, { "epoch": 0.7508101953227643, "grad_norm": 0.7170253427935213, "learning_rate": 5.53771289537713e-06, "loss": 0.5896, "step": 25716 }, { "epoch": 0.7508393915506116, "grad_norm": 0.582353881520113, "learning_rate": 5.537064071370641e-06, "loss": 0.5367, "step": 25717 }, { "epoch": 0.750868587778459, "grad_norm": 0.6215247305516153, "learning_rate": 5.536415247364153e-06, "loss": 0.5624, "step": 25718 }, { "epoch": 0.7508977840063064, "grad_norm": 0.6619836135097172, "learning_rate": 5.535766423357665e-06, "loss": 0.6054, "step": 25719 }, { "epoch": 0.7509269802341537, "grad_norm": 0.6625237550597722, "learning_rate": 5.535117599351176e-06, "loss": 0.6248, "step": 25720 }, { "epoch": 0.7509561764620011, "grad_norm": 0.6597111694314254, "learning_rate": 5.534468775344688e-06, "loss": 0.5876, "step": 25721 }, { "epoch": 0.7509853726898484, "grad_norm": 0.694176923435364, "learning_rate": 5.533819951338199e-06, "loss": 0.7154, "step": 25722 }, { "epoch": 0.7510145689176958, "grad_norm": 0.5980521026483552, "learning_rate": 5.5331711273317114e-06, "loss": 0.5037, "step": 25723 }, { "epoch": 0.7510437651455432, "grad_norm": 0.6583208552490477, "learning_rate": 5.532522303325224e-06, "loss": 0.5963, "step": 25724 }, { "epoch": 0.7510729613733905, "grad_norm": 0.7264258249654131, "learning_rate": 5.5318734793187355e-06, "loss": 0.6816, "step": 25725 }, { "epoch": 0.7511021576012379, "grad_norm": 0.6772808002635549, "learning_rate": 5.5312246553122475e-06, "loss": 0.6388, "step": 25726 }, { "epoch": 0.7511313538290852, "grad_norm": 0.6555435477211191, "learning_rate": 5.530575831305759e-06, "loss": 0.6071, "step": 25727 }, { "epoch": 0.7511605500569326, "grad_norm": 0.6926256567640925, "learning_rate": 5.529927007299271e-06, "loss": 0.7002, "step": 25728 }, { "epoch": 0.75118974628478, "grad_norm": 0.6608376019388789, "learning_rate": 5.529278183292782e-06, "loss": 0.5891, "step": 25729 }, { "epoch": 0.7512189425126273, "grad_norm": 0.6489097378935366, "learning_rate": 5.528629359286294e-06, "loss": 0.6027, "step": 25730 }, { "epoch": 0.7512481387404747, "grad_norm": 0.6892480505054445, "learning_rate": 5.527980535279805e-06, "loss": 0.6799, "step": 25731 }, { "epoch": 0.751277334968322, "grad_norm": 0.676377681476548, "learning_rate": 5.527331711273317e-06, "loss": 0.6428, "step": 25732 }, { "epoch": 0.7513065311961694, "grad_norm": 0.6202550523484996, "learning_rate": 5.526682887266829e-06, "loss": 0.5609, "step": 25733 }, { "epoch": 0.7513357274240168, "grad_norm": 0.6591266363831069, "learning_rate": 5.526034063260341e-06, "loss": 0.5886, "step": 25734 }, { "epoch": 0.7513649236518641, "grad_norm": 0.6152605919095828, "learning_rate": 5.525385239253853e-06, "loss": 0.5742, "step": 25735 }, { "epoch": 0.7513941198797115, "grad_norm": 0.6356488483151852, "learning_rate": 5.524736415247364e-06, "loss": 0.5985, "step": 25736 }, { "epoch": 0.7514233161075589, "grad_norm": 0.6397347999097338, "learning_rate": 5.5240875912408764e-06, "loss": 0.6125, "step": 25737 }, { "epoch": 0.7514525123354062, "grad_norm": 0.6917360408746738, "learning_rate": 5.5234387672343885e-06, "loss": 0.6848, "step": 25738 }, { "epoch": 0.7514817085632536, "grad_norm": 0.6291166682861467, "learning_rate": 5.5227899432279e-06, "loss": 0.5744, "step": 25739 }, { "epoch": 0.7515109047911009, "grad_norm": 0.6749606415005343, "learning_rate": 5.522141119221412e-06, "loss": 0.6376, "step": 25740 }, { "epoch": 0.7515401010189483, "grad_norm": 0.6192093650205176, "learning_rate": 5.521492295214923e-06, "loss": 0.5821, "step": 25741 }, { "epoch": 0.7515692972467957, "grad_norm": 0.6145166349540645, "learning_rate": 5.520843471208435e-06, "loss": 0.5852, "step": 25742 }, { "epoch": 0.751598493474643, "grad_norm": 0.6226937668274519, "learning_rate": 5.520194647201948e-06, "loss": 0.5624, "step": 25743 }, { "epoch": 0.7516276897024904, "grad_norm": 0.7358409482294291, "learning_rate": 5.519545823195459e-06, "loss": 0.7062, "step": 25744 }, { "epoch": 0.7516568859303377, "grad_norm": 0.6682428246365554, "learning_rate": 5.518896999188971e-06, "loss": 0.59, "step": 25745 }, { "epoch": 0.7516860821581851, "grad_norm": 0.6307147348552421, "learning_rate": 5.518248175182482e-06, "loss": 0.5797, "step": 25746 }, { "epoch": 0.7517152783860325, "grad_norm": 0.5859079987074126, "learning_rate": 5.517599351175994e-06, "loss": 0.4781, "step": 25747 }, { "epoch": 0.7517444746138798, "grad_norm": 0.7006370750987119, "learning_rate": 5.516950527169505e-06, "loss": 0.6784, "step": 25748 }, { "epoch": 0.7517736708417273, "grad_norm": 0.6689035705114154, "learning_rate": 5.516301703163017e-06, "loss": 0.6533, "step": 25749 }, { "epoch": 0.7518028670695747, "grad_norm": 0.6607488131325322, "learning_rate": 5.5156528791565286e-06, "loss": 0.5833, "step": 25750 }, { "epoch": 0.751832063297422, "grad_norm": 0.6163822404817894, "learning_rate": 5.515004055150041e-06, "loss": 0.5607, "step": 25751 }, { "epoch": 0.7518612595252694, "grad_norm": 0.6778537157599992, "learning_rate": 5.5143552311435535e-06, "loss": 0.6462, "step": 25752 }, { "epoch": 0.7518904557531167, "grad_norm": 0.6789565337066045, "learning_rate": 5.513706407137065e-06, "loss": 0.6291, "step": 25753 }, { "epoch": 0.7519196519809641, "grad_norm": 0.6683689611480925, "learning_rate": 5.513057583130577e-06, "loss": 0.6103, "step": 25754 }, { "epoch": 0.7519488482088115, "grad_norm": 0.596377288957586, "learning_rate": 5.512408759124088e-06, "loss": 0.512, "step": 25755 }, { "epoch": 0.7519780444366588, "grad_norm": 0.6804709301261511, "learning_rate": 5.5117599351176e-06, "loss": 0.6354, "step": 25756 }, { "epoch": 0.7520072406645062, "grad_norm": 0.6395796299033165, "learning_rate": 5.511111111111112e-06, "loss": 0.5756, "step": 25757 }, { "epoch": 0.7520364368923536, "grad_norm": 0.6531040947662331, "learning_rate": 5.510462287104623e-06, "loss": 0.6451, "step": 25758 }, { "epoch": 0.7520656331202009, "grad_norm": 0.6469277223611579, "learning_rate": 5.509813463098135e-06, "loss": 0.5894, "step": 25759 }, { "epoch": 0.7520948293480483, "grad_norm": 0.8162851872838407, "learning_rate": 5.509164639091646e-06, "loss": 0.7058, "step": 25760 }, { "epoch": 0.7521240255758956, "grad_norm": 0.6607653029236605, "learning_rate": 5.508515815085158e-06, "loss": 0.6462, "step": 25761 }, { "epoch": 0.752153221803743, "grad_norm": 0.6123111787884327, "learning_rate": 5.507866991078671e-06, "loss": 0.5272, "step": 25762 }, { "epoch": 0.7521824180315904, "grad_norm": 0.6430645086408683, "learning_rate": 5.507218167072182e-06, "loss": 0.5867, "step": 25763 }, { "epoch": 0.7522116142594377, "grad_norm": 0.6705378847833294, "learning_rate": 5.506569343065694e-06, "loss": 0.6493, "step": 25764 }, { "epoch": 0.7522408104872851, "grad_norm": 0.6381549636544003, "learning_rate": 5.5059205190592056e-06, "loss": 0.6047, "step": 25765 }, { "epoch": 0.7522700067151324, "grad_norm": 0.6617271801746027, "learning_rate": 5.505271695052718e-06, "loss": 0.6098, "step": 25766 }, { "epoch": 0.7522992029429798, "grad_norm": 0.645441823561434, "learning_rate": 5.504622871046229e-06, "loss": 0.6116, "step": 25767 }, { "epoch": 0.7523283991708272, "grad_norm": 0.6565385972300213, "learning_rate": 5.503974047039741e-06, "loss": 0.6255, "step": 25768 }, { "epoch": 0.7523575953986745, "grad_norm": 0.649962369109005, "learning_rate": 5.503325223033252e-06, "loss": 0.6021, "step": 25769 }, { "epoch": 0.7523867916265219, "grad_norm": 0.6875580497719876, "learning_rate": 5.502676399026764e-06, "loss": 0.6589, "step": 25770 }, { "epoch": 0.7524159878543693, "grad_norm": 0.6339400809832105, "learning_rate": 5.502027575020277e-06, "loss": 0.5665, "step": 25771 }, { "epoch": 0.7524451840822166, "grad_norm": 0.7218090954300536, "learning_rate": 5.501378751013788e-06, "loss": 0.6675, "step": 25772 }, { "epoch": 0.752474380310064, "grad_norm": 0.634766655709593, "learning_rate": 5.5007299270073e-06, "loss": 0.6293, "step": 25773 }, { "epoch": 0.7525035765379113, "grad_norm": 0.7080768591458558, "learning_rate": 5.500081103000811e-06, "loss": 0.6954, "step": 25774 }, { "epoch": 0.7525327727657587, "grad_norm": 0.7011116626548202, "learning_rate": 5.499432278994323e-06, "loss": 0.6527, "step": 25775 }, { "epoch": 0.7525619689936061, "grad_norm": 0.5995322264738089, "learning_rate": 5.498783454987835e-06, "loss": 0.5107, "step": 25776 }, { "epoch": 0.7525911652214534, "grad_norm": 0.677434591030984, "learning_rate": 5.4981346309813465e-06, "loss": 0.5656, "step": 25777 }, { "epoch": 0.7526203614493008, "grad_norm": 0.6837274082746558, "learning_rate": 5.4974858069748585e-06, "loss": 0.625, "step": 25778 }, { "epoch": 0.7526495576771481, "grad_norm": 0.6651711114111063, "learning_rate": 5.49683698296837e-06, "loss": 0.623, "step": 25779 }, { "epoch": 0.7526787539049955, "grad_norm": 0.6394275347036441, "learning_rate": 5.496188158961882e-06, "loss": 0.5979, "step": 25780 }, { "epoch": 0.7527079501328429, "grad_norm": 0.6453889783561539, "learning_rate": 5.495539334955395e-06, "loss": 0.5776, "step": 25781 }, { "epoch": 0.7527371463606902, "grad_norm": 0.7839258079228305, "learning_rate": 5.494890510948906e-06, "loss": 0.5899, "step": 25782 }, { "epoch": 0.7527663425885376, "grad_norm": 0.719614665591997, "learning_rate": 5.494241686942418e-06, "loss": 0.6281, "step": 25783 }, { "epoch": 0.752795538816385, "grad_norm": 0.6141376571151743, "learning_rate": 5.493592862935929e-06, "loss": 0.5629, "step": 25784 }, { "epoch": 0.7528247350442323, "grad_norm": 0.6388998911464712, "learning_rate": 5.492944038929441e-06, "loss": 0.5916, "step": 25785 }, { "epoch": 0.7528539312720797, "grad_norm": 0.6697779080287559, "learning_rate": 5.492295214922952e-06, "loss": 0.6187, "step": 25786 }, { "epoch": 0.752883127499927, "grad_norm": 0.6334123849049242, "learning_rate": 5.491646390916464e-06, "loss": 0.608, "step": 25787 }, { "epoch": 0.7529123237277744, "grad_norm": 0.6115430957133636, "learning_rate": 5.490997566909975e-06, "loss": 0.5387, "step": 25788 }, { "epoch": 0.7529415199556218, "grad_norm": 0.6933696447589804, "learning_rate": 5.4903487429034874e-06, "loss": 0.6346, "step": 25789 }, { "epoch": 0.7529707161834691, "grad_norm": 0.7090654418381265, "learning_rate": 5.489699918897e-06, "loss": 0.6581, "step": 25790 }, { "epoch": 0.7529999124113165, "grad_norm": 0.6253522198493754, "learning_rate": 5.4890510948905115e-06, "loss": 0.5451, "step": 25791 }, { "epoch": 0.7530291086391638, "grad_norm": 0.6786657985290363, "learning_rate": 5.4884022708840235e-06, "loss": 0.5741, "step": 25792 }, { "epoch": 0.7530583048670112, "grad_norm": 0.6357416370013489, "learning_rate": 5.487753446877535e-06, "loss": 0.5578, "step": 25793 }, { "epoch": 0.7530875010948586, "grad_norm": 0.6195664018448125, "learning_rate": 5.487104622871047e-06, "loss": 0.5556, "step": 25794 }, { "epoch": 0.7531166973227059, "grad_norm": 0.644575444584212, "learning_rate": 5.486455798864559e-06, "loss": 0.5823, "step": 25795 }, { "epoch": 0.7531458935505533, "grad_norm": 0.6292968742596842, "learning_rate": 5.48580697485807e-06, "loss": 0.5782, "step": 25796 }, { "epoch": 0.7531750897784006, "grad_norm": 0.6441852860028012, "learning_rate": 5.485158150851582e-06, "loss": 0.6217, "step": 25797 }, { "epoch": 0.753204286006248, "grad_norm": 0.6544595517172878, "learning_rate": 5.484509326845093e-06, "loss": 0.6048, "step": 25798 }, { "epoch": 0.7532334822340954, "grad_norm": 0.6000766740675486, "learning_rate": 5.483860502838605e-06, "loss": 0.5413, "step": 25799 }, { "epoch": 0.7532626784619427, "grad_norm": 0.6714921065351112, "learning_rate": 5.483211678832118e-06, "loss": 0.6191, "step": 25800 }, { "epoch": 0.7532918746897901, "grad_norm": 0.5950143027405409, "learning_rate": 5.482562854825629e-06, "loss": 0.5541, "step": 25801 }, { "epoch": 0.7533210709176374, "grad_norm": 0.6696157656124375, "learning_rate": 5.481914030819141e-06, "loss": 0.6506, "step": 25802 }, { "epoch": 0.7533502671454848, "grad_norm": 0.63029207598407, "learning_rate": 5.4812652068126524e-06, "loss": 0.5585, "step": 25803 }, { "epoch": 0.7533794633733322, "grad_norm": 0.681446093423926, "learning_rate": 5.4806163828061645e-06, "loss": 0.666, "step": 25804 }, { "epoch": 0.7534086596011795, "grad_norm": 0.6900210032603853, "learning_rate": 5.479967558799676e-06, "loss": 0.6538, "step": 25805 }, { "epoch": 0.7534378558290269, "grad_norm": 0.6815665065943801, "learning_rate": 5.479318734793188e-06, "loss": 0.6667, "step": 25806 }, { "epoch": 0.7534670520568743, "grad_norm": 0.6412422956502964, "learning_rate": 5.478669910786699e-06, "loss": 0.6046, "step": 25807 }, { "epoch": 0.7534962482847216, "grad_norm": 0.63163778108181, "learning_rate": 5.478021086780211e-06, "loss": 0.5822, "step": 25808 }, { "epoch": 0.753525444512569, "grad_norm": 0.6171274236308133, "learning_rate": 5.477372262773724e-06, "loss": 0.5798, "step": 25809 }, { "epoch": 0.7535546407404163, "grad_norm": 0.6718502993122402, "learning_rate": 5.476723438767235e-06, "loss": 0.649, "step": 25810 }, { "epoch": 0.7535838369682637, "grad_norm": 0.7133090094252378, "learning_rate": 5.476074614760747e-06, "loss": 0.6287, "step": 25811 }, { "epoch": 0.7536130331961111, "grad_norm": 0.6179514123931441, "learning_rate": 5.475425790754258e-06, "loss": 0.5785, "step": 25812 }, { "epoch": 0.7536422294239584, "grad_norm": 0.6296109564935329, "learning_rate": 5.47477696674777e-06, "loss": 0.5909, "step": 25813 }, { "epoch": 0.7536714256518058, "grad_norm": 0.6674946007951569, "learning_rate": 5.474128142741282e-06, "loss": 0.6177, "step": 25814 }, { "epoch": 0.7537006218796531, "grad_norm": 0.5924379655881507, "learning_rate": 5.473479318734793e-06, "loss": 0.5222, "step": 25815 }, { "epoch": 0.7537298181075005, "grad_norm": 0.6145891099695616, "learning_rate": 5.472830494728305e-06, "loss": 0.5591, "step": 25816 }, { "epoch": 0.7537590143353479, "grad_norm": 0.6292350491242257, "learning_rate": 5.472181670721817e-06, "loss": 0.5477, "step": 25817 }, { "epoch": 0.7537882105631952, "grad_norm": 0.6000862815349721, "learning_rate": 5.471532846715329e-06, "loss": 0.4873, "step": 25818 }, { "epoch": 0.7538174067910426, "grad_norm": 0.7070119131164782, "learning_rate": 5.4708840227088415e-06, "loss": 0.6671, "step": 25819 }, { "epoch": 0.75384660301889, "grad_norm": 0.6448854793383324, "learning_rate": 5.470235198702353e-06, "loss": 0.5945, "step": 25820 }, { "epoch": 0.7538757992467373, "grad_norm": 0.6002504875191128, "learning_rate": 5.469586374695865e-06, "loss": 0.5184, "step": 25821 }, { "epoch": 0.7539049954745847, "grad_norm": 0.6184796171836822, "learning_rate": 5.468937550689376e-06, "loss": 0.5619, "step": 25822 }, { "epoch": 0.753934191702432, "grad_norm": 0.6544184281444656, "learning_rate": 5.468288726682888e-06, "loss": 0.5681, "step": 25823 }, { "epoch": 0.7539633879302794, "grad_norm": 0.6137743955938262, "learning_rate": 5.467639902676399e-06, "loss": 0.558, "step": 25824 }, { "epoch": 0.7539925841581268, "grad_norm": 0.646822902775442, "learning_rate": 5.466991078669911e-06, "loss": 0.5886, "step": 25825 }, { "epoch": 0.7540217803859741, "grad_norm": 0.6219542014327462, "learning_rate": 5.466342254663422e-06, "loss": 0.5678, "step": 25826 }, { "epoch": 0.7540509766138215, "grad_norm": 0.6743550468478452, "learning_rate": 5.465693430656934e-06, "loss": 0.6059, "step": 25827 }, { "epoch": 0.7540801728416688, "grad_norm": 0.6414733124608996, "learning_rate": 5.465044606650447e-06, "loss": 0.5849, "step": 25828 }, { "epoch": 0.7541093690695162, "grad_norm": 0.6427709324033111, "learning_rate": 5.464395782643958e-06, "loss": 0.5848, "step": 25829 }, { "epoch": 0.7541385652973636, "grad_norm": 0.690806397265673, "learning_rate": 5.46374695863747e-06, "loss": 0.6415, "step": 25830 }, { "epoch": 0.7541677615252109, "grad_norm": 0.6330736948172587, "learning_rate": 5.463098134630982e-06, "loss": 0.5903, "step": 25831 }, { "epoch": 0.7541969577530583, "grad_norm": 0.6376645187023641, "learning_rate": 5.462449310624494e-06, "loss": 0.5723, "step": 25832 }, { "epoch": 0.7542261539809056, "grad_norm": 0.6487300891663916, "learning_rate": 5.461800486618006e-06, "loss": 0.5935, "step": 25833 }, { "epoch": 0.754255350208753, "grad_norm": 0.6590164583648748, "learning_rate": 5.461151662611517e-06, "loss": 0.6521, "step": 25834 }, { "epoch": 0.7542845464366004, "grad_norm": 0.6782596793165804, "learning_rate": 5.460502838605029e-06, "loss": 0.6822, "step": 25835 }, { "epoch": 0.7543137426644477, "grad_norm": 0.6695879293305581, "learning_rate": 5.45985401459854e-06, "loss": 0.6515, "step": 25836 }, { "epoch": 0.7543429388922951, "grad_norm": 0.5709074895671856, "learning_rate": 5.459205190592053e-06, "loss": 0.4811, "step": 25837 }, { "epoch": 0.7543721351201425, "grad_norm": 0.6157045691239695, "learning_rate": 5.458556366585565e-06, "loss": 0.5928, "step": 25838 }, { "epoch": 0.7544013313479898, "grad_norm": 0.6515032377645931, "learning_rate": 5.457907542579076e-06, "loss": 0.6049, "step": 25839 }, { "epoch": 0.7544305275758372, "grad_norm": 0.6801006968572119, "learning_rate": 5.457258718572588e-06, "loss": 0.5927, "step": 25840 }, { "epoch": 0.7544597238036845, "grad_norm": 0.6495744243398477, "learning_rate": 5.456609894566099e-06, "loss": 0.6269, "step": 25841 }, { "epoch": 0.7544889200315319, "grad_norm": 0.6713477214005263, "learning_rate": 5.455961070559611e-06, "loss": 0.6274, "step": 25842 }, { "epoch": 0.7545181162593793, "grad_norm": 0.661596413063828, "learning_rate": 5.4553122465531225e-06, "loss": 0.6419, "step": 25843 }, { "epoch": 0.7545473124872266, "grad_norm": 0.6528759929251439, "learning_rate": 5.4546634225466345e-06, "loss": 0.6228, "step": 25844 }, { "epoch": 0.754576508715074, "grad_norm": 0.6321131587079539, "learning_rate": 5.454014598540146e-06, "loss": 0.4941, "step": 25845 }, { "epoch": 0.7546057049429213, "grad_norm": 0.6592494951412567, "learning_rate": 5.453365774533658e-06, "loss": 0.6133, "step": 25846 }, { "epoch": 0.7546349011707687, "grad_norm": 0.6187157155379871, "learning_rate": 5.452716950527171e-06, "loss": 0.5579, "step": 25847 }, { "epoch": 0.7546640973986161, "grad_norm": 0.6096921768904537, "learning_rate": 5.452068126520682e-06, "loss": 0.5347, "step": 25848 }, { "epoch": 0.7546932936264634, "grad_norm": 0.6247919567291147, "learning_rate": 5.451419302514194e-06, "loss": 0.5644, "step": 25849 }, { "epoch": 0.7547224898543108, "grad_norm": 0.5973854645781633, "learning_rate": 5.450770478507705e-06, "loss": 0.5362, "step": 25850 }, { "epoch": 0.7547516860821581, "grad_norm": 0.6896408310083694, "learning_rate": 5.450121654501217e-06, "loss": 0.6422, "step": 25851 }, { "epoch": 0.7547808823100055, "grad_norm": 0.6221752122782817, "learning_rate": 5.449472830494728e-06, "loss": 0.5421, "step": 25852 }, { "epoch": 0.7548100785378529, "grad_norm": 0.6450673604724335, "learning_rate": 5.44882400648824e-06, "loss": 0.6036, "step": 25853 }, { "epoch": 0.7548392747657002, "grad_norm": 0.6023230018764723, "learning_rate": 5.448175182481752e-06, "loss": 0.5895, "step": 25854 }, { "epoch": 0.7548684709935476, "grad_norm": 0.6525871009208389, "learning_rate": 5.4475263584752635e-06, "loss": 0.6505, "step": 25855 }, { "epoch": 0.754897667221395, "grad_norm": 0.6454979962316074, "learning_rate": 5.446877534468776e-06, "loss": 0.5884, "step": 25856 }, { "epoch": 0.7549268634492423, "grad_norm": 0.7109576414534827, "learning_rate": 5.446228710462288e-06, "loss": 0.7312, "step": 25857 }, { "epoch": 0.7549560596770897, "grad_norm": 0.7874864275493783, "learning_rate": 5.4455798864557995e-06, "loss": 0.5361, "step": 25858 }, { "epoch": 0.754985255904937, "grad_norm": 0.6823451448716988, "learning_rate": 5.4449310624493116e-06, "loss": 0.6544, "step": 25859 }, { "epoch": 0.7550144521327844, "grad_norm": 0.6631522860489234, "learning_rate": 5.444282238442823e-06, "loss": 0.622, "step": 25860 }, { "epoch": 0.7550436483606318, "grad_norm": 0.695886846113228, "learning_rate": 5.443633414436335e-06, "loss": 0.6671, "step": 25861 }, { "epoch": 0.7550728445884791, "grad_norm": 0.6643414198396969, "learning_rate": 5.442984590429846e-06, "loss": 0.6528, "step": 25862 }, { "epoch": 0.7551020408163265, "grad_norm": 0.6031709858147491, "learning_rate": 5.442335766423358e-06, "loss": 0.5212, "step": 25863 }, { "epoch": 0.7551312370441738, "grad_norm": 0.6153963258035473, "learning_rate": 5.441686942416869e-06, "loss": 0.5492, "step": 25864 }, { "epoch": 0.7551604332720212, "grad_norm": 0.6775430817181102, "learning_rate": 5.441038118410381e-06, "loss": 0.6169, "step": 25865 }, { "epoch": 0.7551896294998686, "grad_norm": 0.6474319102970373, "learning_rate": 5.440389294403894e-06, "loss": 0.583, "step": 25866 }, { "epoch": 0.7552188257277159, "grad_norm": 0.6719724521113883, "learning_rate": 5.439740470397405e-06, "loss": 0.6595, "step": 25867 }, { "epoch": 0.7552480219555633, "grad_norm": 0.623561156921768, "learning_rate": 5.439091646390917e-06, "loss": 0.5829, "step": 25868 }, { "epoch": 0.7552772181834106, "grad_norm": 0.6339722128832518, "learning_rate": 5.4384428223844284e-06, "loss": 0.5785, "step": 25869 }, { "epoch": 0.7553064144112581, "grad_norm": 0.6031236040715996, "learning_rate": 5.4377939983779405e-06, "loss": 0.5146, "step": 25870 }, { "epoch": 0.7553356106391055, "grad_norm": 0.6631174685093354, "learning_rate": 5.437145174371452e-06, "loss": 0.625, "step": 25871 }, { "epoch": 0.7553648068669528, "grad_norm": 0.6307413944246271, "learning_rate": 5.436496350364964e-06, "loss": 0.6067, "step": 25872 }, { "epoch": 0.7553940030948002, "grad_norm": 0.6037221920696805, "learning_rate": 5.435847526358476e-06, "loss": 0.5233, "step": 25873 }, { "epoch": 0.7554231993226476, "grad_norm": 0.6632908231699302, "learning_rate": 5.435198702351987e-06, "loss": 0.5171, "step": 25874 }, { "epoch": 0.7554523955504949, "grad_norm": 0.6418500353398309, "learning_rate": 5.4345498783455e-06, "loss": 0.6176, "step": 25875 }, { "epoch": 0.7554815917783423, "grad_norm": 0.6351628810192416, "learning_rate": 5.433901054339012e-06, "loss": 0.5991, "step": 25876 }, { "epoch": 0.7555107880061896, "grad_norm": 0.6162454802865863, "learning_rate": 5.433252230332523e-06, "loss": 0.5714, "step": 25877 }, { "epoch": 0.755539984234037, "grad_norm": 0.6843337670222464, "learning_rate": 5.432603406326035e-06, "loss": 0.5892, "step": 25878 }, { "epoch": 0.7555691804618844, "grad_norm": 0.9227057857226572, "learning_rate": 5.431954582319546e-06, "loss": 0.6323, "step": 25879 }, { "epoch": 0.7555983766897317, "grad_norm": 0.6153710378945744, "learning_rate": 5.431305758313058e-06, "loss": 0.5677, "step": 25880 }, { "epoch": 0.7556275729175791, "grad_norm": 0.6658078699902383, "learning_rate": 5.430656934306569e-06, "loss": 0.5886, "step": 25881 }, { "epoch": 0.7556567691454265, "grad_norm": 0.5764745634219522, "learning_rate": 5.430008110300081e-06, "loss": 0.5117, "step": 25882 }, { "epoch": 0.7556859653732738, "grad_norm": 0.686113782560564, "learning_rate": 5.429359286293593e-06, "loss": 0.5803, "step": 25883 }, { "epoch": 0.7557151616011212, "grad_norm": 0.6342416735870899, "learning_rate": 5.428710462287105e-06, "loss": 0.5867, "step": 25884 }, { "epoch": 0.7557443578289685, "grad_norm": 0.6906478632517674, "learning_rate": 5.4280616382806175e-06, "loss": 0.6249, "step": 25885 }, { "epoch": 0.7557735540568159, "grad_norm": 0.6270144969546413, "learning_rate": 5.427412814274129e-06, "loss": 0.5635, "step": 25886 }, { "epoch": 0.7558027502846633, "grad_norm": 0.6133043423299268, "learning_rate": 5.426763990267641e-06, "loss": 0.5725, "step": 25887 }, { "epoch": 0.7558319465125106, "grad_norm": 0.6712771205009075, "learning_rate": 5.426115166261152e-06, "loss": 0.643, "step": 25888 }, { "epoch": 0.755861142740358, "grad_norm": 0.6586589476988747, "learning_rate": 5.425466342254664e-06, "loss": 0.6353, "step": 25889 }, { "epoch": 0.7558903389682053, "grad_norm": 0.6165859888740889, "learning_rate": 5.424817518248175e-06, "loss": 0.5418, "step": 25890 }, { "epoch": 0.7559195351960527, "grad_norm": 0.6468398841697958, "learning_rate": 5.424168694241687e-06, "loss": 0.5897, "step": 25891 }, { "epoch": 0.7559487314239001, "grad_norm": 0.6124937700281932, "learning_rate": 5.423519870235199e-06, "loss": 0.5466, "step": 25892 }, { "epoch": 0.7559779276517474, "grad_norm": 0.5856469236805018, "learning_rate": 5.42287104622871e-06, "loss": 0.5292, "step": 25893 }, { "epoch": 0.7560071238795948, "grad_norm": 0.652868613109915, "learning_rate": 5.422222222222223e-06, "loss": 0.6077, "step": 25894 }, { "epoch": 0.7560363201074422, "grad_norm": 0.7174325507518955, "learning_rate": 5.421573398215735e-06, "loss": 0.6329, "step": 25895 }, { "epoch": 0.7560655163352895, "grad_norm": 0.6646610698783003, "learning_rate": 5.420924574209246e-06, "loss": 0.6411, "step": 25896 }, { "epoch": 0.7560947125631369, "grad_norm": 0.6235471390364382, "learning_rate": 5.4202757502027584e-06, "loss": 0.5715, "step": 25897 }, { "epoch": 0.7561239087909842, "grad_norm": 0.675075053897016, "learning_rate": 5.41962692619627e-06, "loss": 0.6673, "step": 25898 }, { "epoch": 0.7561531050188316, "grad_norm": 0.6545067076356196, "learning_rate": 5.418978102189782e-06, "loss": 0.5084, "step": 25899 }, { "epoch": 0.756182301246679, "grad_norm": 0.6423607497307624, "learning_rate": 5.418329278183293e-06, "loss": 0.615, "step": 25900 }, { "epoch": 0.7562114974745263, "grad_norm": 0.6390252450826873, "learning_rate": 5.417680454176805e-06, "loss": 0.5778, "step": 25901 }, { "epoch": 0.7562406937023737, "grad_norm": 0.6832407680442456, "learning_rate": 5.417031630170316e-06, "loss": 0.5758, "step": 25902 }, { "epoch": 0.756269889930221, "grad_norm": 0.5916405309258179, "learning_rate": 5.416382806163829e-06, "loss": 0.5184, "step": 25903 }, { "epoch": 0.7562990861580684, "grad_norm": 0.6288000943515982, "learning_rate": 5.415733982157341e-06, "loss": 0.6028, "step": 25904 }, { "epoch": 0.7563282823859158, "grad_norm": 0.6734369798540707, "learning_rate": 5.415085158150852e-06, "loss": 0.6659, "step": 25905 }, { "epoch": 0.7563574786137631, "grad_norm": 0.663340960440628, "learning_rate": 5.414436334144364e-06, "loss": 0.6001, "step": 25906 }, { "epoch": 0.7563866748416105, "grad_norm": 0.6667514921023145, "learning_rate": 5.413787510137875e-06, "loss": 0.6234, "step": 25907 }, { "epoch": 0.7564158710694578, "grad_norm": 0.666182750675177, "learning_rate": 5.413138686131387e-06, "loss": 0.6348, "step": 25908 }, { "epoch": 0.7564450672973052, "grad_norm": 0.6103204423563549, "learning_rate": 5.4124898621248985e-06, "loss": 0.5499, "step": 25909 }, { "epoch": 0.7564742635251526, "grad_norm": 0.6126625344793235, "learning_rate": 5.4118410381184106e-06, "loss": 0.5815, "step": 25910 }, { "epoch": 0.7565034597529999, "grad_norm": 0.6419757522222718, "learning_rate": 5.411192214111923e-06, "loss": 0.6213, "step": 25911 }, { "epoch": 0.7565326559808473, "grad_norm": 0.6370251277184253, "learning_rate": 5.410543390105434e-06, "loss": 0.5726, "step": 25912 }, { "epoch": 0.7565618522086947, "grad_norm": 0.6689218633982885, "learning_rate": 5.409894566098947e-06, "loss": 0.6287, "step": 25913 }, { "epoch": 0.756591048436542, "grad_norm": 0.6788776947512726, "learning_rate": 5.409245742092458e-06, "loss": 0.6261, "step": 25914 }, { "epoch": 0.7566202446643894, "grad_norm": 0.6755356685615796, "learning_rate": 5.40859691808597e-06, "loss": 0.6816, "step": 25915 }, { "epoch": 0.7566494408922367, "grad_norm": 0.642881451016389, "learning_rate": 5.407948094079482e-06, "loss": 0.5487, "step": 25916 }, { "epoch": 0.7566786371200841, "grad_norm": 0.6484091342180064, "learning_rate": 5.407299270072993e-06, "loss": 0.6084, "step": 25917 }, { "epoch": 0.7567078333479315, "grad_norm": 0.6142409117668599, "learning_rate": 5.406650446066505e-06, "loss": 0.5495, "step": 25918 }, { "epoch": 0.7567370295757788, "grad_norm": 0.6675184318241036, "learning_rate": 5.406001622060016e-06, "loss": 0.6814, "step": 25919 }, { "epoch": 0.7567662258036262, "grad_norm": 0.6299936483477379, "learning_rate": 5.405352798053528e-06, "loss": 0.5742, "step": 25920 }, { "epoch": 0.7567954220314735, "grad_norm": 0.622633958206711, "learning_rate": 5.4047039740470395e-06, "loss": 0.5671, "step": 25921 }, { "epoch": 0.7568246182593209, "grad_norm": 0.5787226549800782, "learning_rate": 5.404055150040552e-06, "loss": 0.4774, "step": 25922 }, { "epoch": 0.7568538144871683, "grad_norm": 0.6255083283527255, "learning_rate": 5.403406326034064e-06, "loss": 0.6045, "step": 25923 }, { "epoch": 0.7568830107150156, "grad_norm": 0.7052409940804918, "learning_rate": 5.4027575020275755e-06, "loss": 0.6837, "step": 25924 }, { "epoch": 0.756912206942863, "grad_norm": 0.6299976693981567, "learning_rate": 5.4021086780210876e-06, "loss": 0.5577, "step": 25925 }, { "epoch": 0.7569414031707103, "grad_norm": 0.6046732983224055, "learning_rate": 5.401459854014599e-06, "loss": 0.5463, "step": 25926 }, { "epoch": 0.7569705993985577, "grad_norm": 0.5976874808344684, "learning_rate": 5.400811030008111e-06, "loss": 0.5177, "step": 25927 }, { "epoch": 0.7569997956264051, "grad_norm": 0.627292264967045, "learning_rate": 5.400162206001622e-06, "loss": 0.6018, "step": 25928 }, { "epoch": 0.7570289918542524, "grad_norm": 0.6501329862194978, "learning_rate": 5.399513381995134e-06, "loss": 0.6149, "step": 25929 }, { "epoch": 0.7570581880820998, "grad_norm": 0.66806935888582, "learning_rate": 5.398864557988646e-06, "loss": 0.6385, "step": 25930 }, { "epoch": 0.7570873843099472, "grad_norm": 0.6554840063154368, "learning_rate": 5.398215733982157e-06, "loss": 0.6458, "step": 25931 }, { "epoch": 0.7571165805377945, "grad_norm": 0.5986296855371263, "learning_rate": 5.39756690997567e-06, "loss": 0.5012, "step": 25932 }, { "epoch": 0.7571457767656419, "grad_norm": 0.6001268041061794, "learning_rate": 5.396918085969181e-06, "loss": 0.5109, "step": 25933 }, { "epoch": 0.7571749729934892, "grad_norm": 0.6571920978782985, "learning_rate": 5.396269261962693e-06, "loss": 0.642, "step": 25934 }, { "epoch": 0.7572041692213366, "grad_norm": 0.6170174040671301, "learning_rate": 5.395620437956205e-06, "loss": 0.5671, "step": 25935 }, { "epoch": 0.757233365449184, "grad_norm": 0.6345313702082764, "learning_rate": 5.3949716139497165e-06, "loss": 0.5973, "step": 25936 }, { "epoch": 0.7572625616770313, "grad_norm": 0.7018066069694893, "learning_rate": 5.3943227899432285e-06, "loss": 0.6555, "step": 25937 }, { "epoch": 0.7572917579048787, "grad_norm": 0.6141074111192768, "learning_rate": 5.39367396593674e-06, "loss": 0.5231, "step": 25938 }, { "epoch": 0.757320954132726, "grad_norm": 0.6628835717179987, "learning_rate": 5.393025141930252e-06, "loss": 0.6174, "step": 25939 }, { "epoch": 0.7573501503605734, "grad_norm": 0.6131174496235811, "learning_rate": 5.392376317923763e-06, "loss": 0.5388, "step": 25940 }, { "epoch": 0.7573793465884208, "grad_norm": 0.6314970398504175, "learning_rate": 5.391727493917276e-06, "loss": 0.6214, "step": 25941 }, { "epoch": 0.7574085428162681, "grad_norm": 0.5886392371406468, "learning_rate": 5.391078669910788e-06, "loss": 0.5094, "step": 25942 }, { "epoch": 0.7574377390441155, "grad_norm": 0.6416972519125347, "learning_rate": 5.390429845904299e-06, "loss": 0.5843, "step": 25943 }, { "epoch": 0.7574669352719628, "grad_norm": 0.7146960365269339, "learning_rate": 5.389781021897811e-06, "loss": 0.6426, "step": 25944 }, { "epoch": 0.7574961314998102, "grad_norm": 0.6552188719426577, "learning_rate": 5.389132197891322e-06, "loss": 0.616, "step": 25945 }, { "epoch": 0.7575253277276576, "grad_norm": 0.6290112084646713, "learning_rate": 5.388483373884834e-06, "loss": 0.5498, "step": 25946 }, { "epoch": 0.7575545239555049, "grad_norm": 0.6900454037261197, "learning_rate": 5.387834549878345e-06, "loss": 0.6505, "step": 25947 }, { "epoch": 0.7575837201833523, "grad_norm": 0.6776440621058156, "learning_rate": 5.387185725871857e-06, "loss": 0.6336, "step": 25948 }, { "epoch": 0.7576129164111997, "grad_norm": 0.6249322218196802, "learning_rate": 5.3865369018653694e-06, "loss": 0.5882, "step": 25949 }, { "epoch": 0.757642112639047, "grad_norm": 0.6426662441059116, "learning_rate": 5.385888077858881e-06, "loss": 0.5706, "step": 25950 }, { "epoch": 0.7576713088668944, "grad_norm": 0.6238720564044185, "learning_rate": 5.3852392538523935e-06, "loss": 0.5771, "step": 25951 }, { "epoch": 0.7577005050947417, "grad_norm": 0.5830109318093804, "learning_rate": 5.384590429845905e-06, "loss": 0.5373, "step": 25952 }, { "epoch": 0.7577297013225891, "grad_norm": 0.6330714416014065, "learning_rate": 5.383941605839417e-06, "loss": 0.5526, "step": 25953 }, { "epoch": 0.7577588975504365, "grad_norm": 0.6412139891499308, "learning_rate": 5.383292781832929e-06, "loss": 0.6007, "step": 25954 }, { "epoch": 0.7577880937782838, "grad_norm": 0.6698460762567519, "learning_rate": 5.38264395782644e-06, "loss": 0.6621, "step": 25955 }, { "epoch": 0.7578172900061312, "grad_norm": 0.7822724882007445, "learning_rate": 5.381995133819952e-06, "loss": 0.6291, "step": 25956 }, { "epoch": 0.7578464862339785, "grad_norm": 0.6383679914410567, "learning_rate": 5.381346309813463e-06, "loss": 0.6198, "step": 25957 }, { "epoch": 0.7578756824618259, "grad_norm": 0.6443442962173636, "learning_rate": 5.380697485806975e-06, "loss": 0.5911, "step": 25958 }, { "epoch": 0.7579048786896733, "grad_norm": 0.6364772103431829, "learning_rate": 5.380048661800486e-06, "loss": 0.62, "step": 25959 }, { "epoch": 0.7579340749175206, "grad_norm": 0.7045311018145166, "learning_rate": 5.379399837793999e-06, "loss": 0.7068, "step": 25960 }, { "epoch": 0.757963271145368, "grad_norm": 0.6402726320087422, "learning_rate": 5.378751013787511e-06, "loss": 0.5984, "step": 25961 }, { "epoch": 0.7579924673732154, "grad_norm": 0.653547929485347, "learning_rate": 5.378102189781022e-06, "loss": 0.6067, "step": 25962 }, { "epoch": 0.7580216636010627, "grad_norm": 0.6633284683875129, "learning_rate": 5.3774533657745344e-06, "loss": 0.6508, "step": 25963 }, { "epoch": 0.7580508598289101, "grad_norm": 0.5909966350261517, "learning_rate": 5.376804541768046e-06, "loss": 0.5268, "step": 25964 }, { "epoch": 0.7580800560567574, "grad_norm": 0.6403079231261456, "learning_rate": 5.376155717761558e-06, "loss": 0.5343, "step": 25965 }, { "epoch": 0.7581092522846048, "grad_norm": 0.7042251680626544, "learning_rate": 5.375506893755069e-06, "loss": 0.6098, "step": 25966 }, { "epoch": 0.7581384485124522, "grad_norm": 0.666338165826068, "learning_rate": 5.374858069748581e-06, "loss": 0.5722, "step": 25967 }, { "epoch": 0.7581676447402995, "grad_norm": 0.6482139440897714, "learning_rate": 5.374209245742093e-06, "loss": 0.6206, "step": 25968 }, { "epoch": 0.7581968409681469, "grad_norm": 0.6522846987418685, "learning_rate": 5.373560421735604e-06, "loss": 0.6337, "step": 25969 }, { "epoch": 0.7582260371959942, "grad_norm": 0.6125536939947611, "learning_rate": 5.372911597729117e-06, "loss": 0.534, "step": 25970 }, { "epoch": 0.7582552334238416, "grad_norm": 0.6805800239676413, "learning_rate": 5.372262773722628e-06, "loss": 0.6025, "step": 25971 }, { "epoch": 0.758284429651689, "grad_norm": 0.6635370654358035, "learning_rate": 5.37161394971614e-06, "loss": 0.6283, "step": 25972 }, { "epoch": 0.7583136258795363, "grad_norm": 0.5974828124651858, "learning_rate": 5.370965125709652e-06, "loss": 0.5174, "step": 25973 }, { "epoch": 0.7583428221073837, "grad_norm": 0.6866995925141945, "learning_rate": 5.370316301703163e-06, "loss": 0.6604, "step": 25974 }, { "epoch": 0.758372018335231, "grad_norm": 0.7014256511507627, "learning_rate": 5.369667477696675e-06, "loss": 0.6959, "step": 25975 }, { "epoch": 0.7584012145630784, "grad_norm": 0.6308178097635861, "learning_rate": 5.3690186536901866e-06, "loss": 0.5541, "step": 25976 }, { "epoch": 0.7584304107909258, "grad_norm": 0.6740645799020943, "learning_rate": 5.368369829683699e-06, "loss": 0.6065, "step": 25977 }, { "epoch": 0.7584596070187731, "grad_norm": 0.6762776260915845, "learning_rate": 5.36772100567721e-06, "loss": 0.623, "step": 25978 }, { "epoch": 0.7584888032466205, "grad_norm": 0.6450710201787957, "learning_rate": 5.367072181670723e-06, "loss": 0.6459, "step": 25979 }, { "epoch": 0.7585179994744679, "grad_norm": 0.6380786521567109, "learning_rate": 5.366423357664235e-06, "loss": 0.595, "step": 25980 }, { "epoch": 0.7585471957023152, "grad_norm": 0.7104923463673805, "learning_rate": 5.365774533657746e-06, "loss": 0.5635, "step": 25981 }, { "epoch": 0.7585763919301626, "grad_norm": 0.6249405135759879, "learning_rate": 5.365125709651258e-06, "loss": 0.559, "step": 25982 }, { "epoch": 0.7586055881580099, "grad_norm": 0.6070936051343179, "learning_rate": 5.364476885644769e-06, "loss": 0.5755, "step": 25983 }, { "epoch": 0.7586347843858573, "grad_norm": 0.6692976621196508, "learning_rate": 5.363828061638281e-06, "loss": 0.5729, "step": 25984 }, { "epoch": 0.7586639806137047, "grad_norm": 0.6565581080412574, "learning_rate": 5.363179237631792e-06, "loss": 0.5788, "step": 25985 }, { "epoch": 0.758693176841552, "grad_norm": 0.6733721009115338, "learning_rate": 5.362530413625304e-06, "loss": 0.6409, "step": 25986 }, { "epoch": 0.7587223730693994, "grad_norm": 0.6075940859188784, "learning_rate": 5.3618815896188155e-06, "loss": 0.5225, "step": 25987 }, { "epoch": 0.7587515692972467, "grad_norm": 0.6398515357741651, "learning_rate": 5.361232765612328e-06, "loss": 0.6207, "step": 25988 }, { "epoch": 0.7587807655250941, "grad_norm": 0.6591844961563559, "learning_rate": 5.36058394160584e-06, "loss": 0.5687, "step": 25989 }, { "epoch": 0.7588099617529416, "grad_norm": 0.653713011153, "learning_rate": 5.3599351175993516e-06, "loss": 0.6143, "step": 25990 }, { "epoch": 0.7588391579807889, "grad_norm": 0.6391969028957583, "learning_rate": 5.359286293592864e-06, "loss": 0.5965, "step": 25991 }, { "epoch": 0.7588683542086363, "grad_norm": 0.628150278216706, "learning_rate": 5.358637469586376e-06, "loss": 0.5959, "step": 25992 }, { "epoch": 0.7588975504364837, "grad_norm": 0.6704863775778718, "learning_rate": 5.357988645579887e-06, "loss": 0.6335, "step": 25993 }, { "epoch": 0.758926746664331, "grad_norm": 0.6952620144897611, "learning_rate": 5.357339821573399e-06, "loss": 0.7229, "step": 25994 }, { "epoch": 0.7589559428921784, "grad_norm": 0.6262465732497073, "learning_rate": 5.35669099756691e-06, "loss": 0.5887, "step": 25995 }, { "epoch": 0.7589851391200257, "grad_norm": 0.658934351242574, "learning_rate": 5.356042173560422e-06, "loss": 0.6928, "step": 25996 }, { "epoch": 0.7590143353478731, "grad_norm": 0.6298012075993956, "learning_rate": 5.355393349553933e-06, "loss": 0.5704, "step": 25997 }, { "epoch": 0.7590435315757205, "grad_norm": 0.6383574486309916, "learning_rate": 5.354744525547446e-06, "loss": 0.5793, "step": 25998 }, { "epoch": 0.7590727278035678, "grad_norm": 0.6133546917784105, "learning_rate": 5.354095701540958e-06, "loss": 0.5506, "step": 25999 }, { "epoch": 0.7591019240314152, "grad_norm": 0.6129468609099966, "learning_rate": 5.353446877534469e-06, "loss": 0.5439, "step": 26000 }, { "epoch": 0.7591311202592625, "grad_norm": 0.6391135783443821, "learning_rate": 5.352798053527981e-06, "loss": 0.6393, "step": 26001 }, { "epoch": 0.7591603164871099, "grad_norm": 0.6401324897346128, "learning_rate": 5.3521492295214925e-06, "loss": 0.5869, "step": 26002 }, { "epoch": 0.7591895127149573, "grad_norm": 0.6883448329100593, "learning_rate": 5.3515004055150045e-06, "loss": 0.6734, "step": 26003 }, { "epoch": 0.7592187089428046, "grad_norm": 0.6010748014746528, "learning_rate": 5.350851581508516e-06, "loss": 0.5121, "step": 26004 }, { "epoch": 0.759247905170652, "grad_norm": 0.6287540331373712, "learning_rate": 5.350202757502028e-06, "loss": 0.5764, "step": 26005 }, { "epoch": 0.7592771013984994, "grad_norm": 0.7474381585233997, "learning_rate": 5.349553933495539e-06, "loss": 0.714, "step": 26006 }, { "epoch": 0.7593062976263467, "grad_norm": 0.6410592064603957, "learning_rate": 5.348905109489052e-06, "loss": 0.579, "step": 26007 }, { "epoch": 0.7593354938541941, "grad_norm": 0.6138542084323845, "learning_rate": 5.348256285482564e-06, "loss": 0.5841, "step": 26008 }, { "epoch": 0.7593646900820414, "grad_norm": 0.6086208365883093, "learning_rate": 5.347607461476075e-06, "loss": 0.496, "step": 26009 }, { "epoch": 0.7593938863098888, "grad_norm": 0.6578641208423026, "learning_rate": 5.346958637469587e-06, "loss": 0.6217, "step": 26010 }, { "epoch": 0.7594230825377362, "grad_norm": 0.6591017916743586, "learning_rate": 5.346309813463099e-06, "loss": 0.5985, "step": 26011 }, { "epoch": 0.7594522787655835, "grad_norm": 0.6419007908807032, "learning_rate": 5.34566098945661e-06, "loss": 0.6077, "step": 26012 }, { "epoch": 0.7594814749934309, "grad_norm": 0.7283321370024757, "learning_rate": 5.345012165450122e-06, "loss": 0.6922, "step": 26013 }, { "epoch": 0.7595106712212782, "grad_norm": 0.5710385189259677, "learning_rate": 5.3443633414436334e-06, "loss": 0.4997, "step": 26014 }, { "epoch": 0.7595398674491256, "grad_norm": 0.6471237773270114, "learning_rate": 5.3437145174371455e-06, "loss": 0.5689, "step": 26015 }, { "epoch": 0.759569063676973, "grad_norm": 0.6695549214819768, "learning_rate": 5.343065693430657e-06, "loss": 0.6103, "step": 26016 }, { "epoch": 0.7595982599048203, "grad_norm": 0.5957392537603461, "learning_rate": 5.3424168694241695e-06, "loss": 0.5518, "step": 26017 }, { "epoch": 0.7596274561326677, "grad_norm": 0.6442957571011332, "learning_rate": 5.3417680454176815e-06, "loss": 0.6262, "step": 26018 }, { "epoch": 0.759656652360515, "grad_norm": 0.6292815262140196, "learning_rate": 5.341119221411193e-06, "loss": 0.6098, "step": 26019 }, { "epoch": 0.7596858485883624, "grad_norm": 0.6342532721931647, "learning_rate": 5.340470397404705e-06, "loss": 0.5726, "step": 26020 }, { "epoch": 0.7597150448162098, "grad_norm": 0.6519362788489192, "learning_rate": 5.339821573398216e-06, "loss": 0.626, "step": 26021 }, { "epoch": 0.7597442410440571, "grad_norm": 0.6314388621693727, "learning_rate": 5.339172749391728e-06, "loss": 0.5695, "step": 26022 }, { "epoch": 0.7597734372719045, "grad_norm": 0.7351374755670005, "learning_rate": 5.338523925385239e-06, "loss": 0.6473, "step": 26023 }, { "epoch": 0.7598026334997519, "grad_norm": 0.5947774085208202, "learning_rate": 5.337875101378751e-06, "loss": 0.5549, "step": 26024 }, { "epoch": 0.7598318297275992, "grad_norm": 0.6932488928067488, "learning_rate": 5.337226277372262e-06, "loss": 0.6883, "step": 26025 }, { "epoch": 0.7598610259554466, "grad_norm": 0.6256094163593816, "learning_rate": 5.336577453365775e-06, "loss": 0.5744, "step": 26026 }, { "epoch": 0.7598902221832939, "grad_norm": 0.6752066774346577, "learning_rate": 5.335928629359287e-06, "loss": 0.6257, "step": 26027 }, { "epoch": 0.7599194184111413, "grad_norm": 0.593587015398468, "learning_rate": 5.335279805352798e-06, "loss": 0.5281, "step": 26028 }, { "epoch": 0.7599486146389887, "grad_norm": 0.6301153007696915, "learning_rate": 5.3346309813463104e-06, "loss": 0.568, "step": 26029 }, { "epoch": 0.759977810866836, "grad_norm": 0.647907581689192, "learning_rate": 5.3339821573398225e-06, "loss": 0.6027, "step": 26030 }, { "epoch": 0.7600070070946834, "grad_norm": 0.6547443139212541, "learning_rate": 5.333333333333334e-06, "loss": 0.6126, "step": 26031 }, { "epoch": 0.7600362033225307, "grad_norm": 0.6072359105372683, "learning_rate": 5.332684509326846e-06, "loss": 0.5556, "step": 26032 }, { "epoch": 0.7600653995503781, "grad_norm": 0.6687360442459942, "learning_rate": 5.332035685320357e-06, "loss": 0.6034, "step": 26033 }, { "epoch": 0.7600945957782255, "grad_norm": 0.6795996242242687, "learning_rate": 5.331386861313869e-06, "loss": 0.6608, "step": 26034 }, { "epoch": 0.7601237920060728, "grad_norm": 0.6847135216721854, "learning_rate": 5.33073803730738e-06, "loss": 0.6384, "step": 26035 }, { "epoch": 0.7601529882339202, "grad_norm": 0.659514990950754, "learning_rate": 5.330089213300893e-06, "loss": 0.6495, "step": 26036 }, { "epoch": 0.7601821844617676, "grad_norm": 0.6122179276703832, "learning_rate": 5.329440389294405e-06, "loss": 0.5133, "step": 26037 }, { "epoch": 0.7602113806896149, "grad_norm": 0.6251666150624479, "learning_rate": 5.328791565287916e-06, "loss": 0.5829, "step": 26038 }, { "epoch": 0.7602405769174623, "grad_norm": 0.6584957507928455, "learning_rate": 5.328142741281428e-06, "loss": 0.6031, "step": 26039 }, { "epoch": 0.7602697731453096, "grad_norm": 0.568449756717838, "learning_rate": 5.327493917274939e-06, "loss": 0.4908, "step": 26040 }, { "epoch": 0.760298969373157, "grad_norm": 0.6516671145749593, "learning_rate": 5.326845093268451e-06, "loss": 0.6094, "step": 26041 }, { "epoch": 0.7603281656010044, "grad_norm": 0.628841421620877, "learning_rate": 5.3261962692619626e-06, "loss": 0.5565, "step": 26042 }, { "epoch": 0.7603573618288517, "grad_norm": 0.6566484564392391, "learning_rate": 5.325547445255475e-06, "loss": 0.5831, "step": 26043 }, { "epoch": 0.7603865580566991, "grad_norm": 0.6388853057567014, "learning_rate": 5.324898621248986e-06, "loss": 0.5692, "step": 26044 }, { "epoch": 0.7604157542845464, "grad_norm": 0.7057136750207682, "learning_rate": 5.324249797242499e-06, "loss": 0.6779, "step": 26045 }, { "epoch": 0.7604449505123938, "grad_norm": 0.6223650924049385, "learning_rate": 5.323600973236011e-06, "loss": 0.5592, "step": 26046 }, { "epoch": 0.7604741467402412, "grad_norm": 0.6468326264438718, "learning_rate": 5.322952149229522e-06, "loss": 0.5809, "step": 26047 }, { "epoch": 0.7605033429680885, "grad_norm": 0.6116483478169102, "learning_rate": 5.322303325223034e-06, "loss": 0.5493, "step": 26048 }, { "epoch": 0.7605325391959359, "grad_norm": 0.6577180793896452, "learning_rate": 5.321654501216545e-06, "loss": 0.6296, "step": 26049 }, { "epoch": 0.7605617354237832, "grad_norm": 0.6309589899473795, "learning_rate": 5.321005677210057e-06, "loss": 0.5679, "step": 26050 }, { "epoch": 0.7605909316516306, "grad_norm": 0.6720883785381089, "learning_rate": 5.320356853203569e-06, "loss": 0.5982, "step": 26051 }, { "epoch": 0.760620127879478, "grad_norm": 0.6041859898216401, "learning_rate": 5.31970802919708e-06, "loss": 0.5526, "step": 26052 }, { "epoch": 0.7606493241073253, "grad_norm": 0.6937331172424759, "learning_rate": 5.319059205190592e-06, "loss": 0.639, "step": 26053 }, { "epoch": 0.7606785203351727, "grad_norm": 0.6114823919850184, "learning_rate": 5.318410381184105e-06, "loss": 0.5537, "step": 26054 }, { "epoch": 0.76070771656302, "grad_norm": 0.6348672188921987, "learning_rate": 5.317761557177616e-06, "loss": 0.5807, "step": 26055 }, { "epoch": 0.7607369127908674, "grad_norm": 0.6400935270690516, "learning_rate": 5.317112733171128e-06, "loss": 0.6309, "step": 26056 }, { "epoch": 0.7607661090187148, "grad_norm": 0.656213316531221, "learning_rate": 5.31646390916464e-06, "loss": 0.6219, "step": 26057 }, { "epoch": 0.7607953052465621, "grad_norm": 0.6365407088491597, "learning_rate": 5.315815085158152e-06, "loss": 0.5513, "step": 26058 }, { "epoch": 0.7608245014744095, "grad_norm": 0.605646488753711, "learning_rate": 5.315166261151663e-06, "loss": 0.5439, "step": 26059 }, { "epoch": 0.7608536977022569, "grad_norm": 0.6202650447640237, "learning_rate": 5.314517437145175e-06, "loss": 0.5929, "step": 26060 }, { "epoch": 0.7608828939301042, "grad_norm": 0.6070606106986072, "learning_rate": 5.313868613138686e-06, "loss": 0.5252, "step": 26061 }, { "epoch": 0.7609120901579516, "grad_norm": 0.6276863302486715, "learning_rate": 5.313219789132198e-06, "loss": 0.5855, "step": 26062 }, { "epoch": 0.7609412863857989, "grad_norm": 0.6787757071430145, "learning_rate": 5.312570965125709e-06, "loss": 0.6845, "step": 26063 }, { "epoch": 0.7609704826136463, "grad_norm": 0.6913045592406688, "learning_rate": 5.311922141119222e-06, "loss": 0.6439, "step": 26064 }, { "epoch": 0.7609996788414937, "grad_norm": 0.731083912580633, "learning_rate": 5.311273317112734e-06, "loss": 0.6002, "step": 26065 }, { "epoch": 0.761028875069341, "grad_norm": 0.6812916795591172, "learning_rate": 5.310624493106245e-06, "loss": 0.6173, "step": 26066 }, { "epoch": 0.7610580712971884, "grad_norm": 0.6050251447899527, "learning_rate": 5.309975669099757e-06, "loss": 0.5437, "step": 26067 }, { "epoch": 0.7610872675250357, "grad_norm": 0.625144341012118, "learning_rate": 5.3093268450932685e-06, "loss": 0.5938, "step": 26068 }, { "epoch": 0.7611164637528831, "grad_norm": 0.6317591845583937, "learning_rate": 5.3086780210867805e-06, "loss": 0.6085, "step": 26069 }, { "epoch": 0.7611456599807305, "grad_norm": 0.6650327232753053, "learning_rate": 5.3080291970802926e-06, "loss": 0.6281, "step": 26070 }, { "epoch": 0.7611748562085778, "grad_norm": 0.6284340516499358, "learning_rate": 5.307380373073804e-06, "loss": 0.5891, "step": 26071 }, { "epoch": 0.7612040524364252, "grad_norm": 0.6402892703172013, "learning_rate": 5.306731549067316e-06, "loss": 0.5545, "step": 26072 }, { "epoch": 0.7612332486642726, "grad_norm": 0.6165521425176717, "learning_rate": 5.306082725060829e-06, "loss": 0.543, "step": 26073 }, { "epoch": 0.7612624448921199, "grad_norm": 0.6562612771900862, "learning_rate": 5.30543390105434e-06, "loss": 0.6751, "step": 26074 }, { "epoch": 0.7612916411199673, "grad_norm": 0.6331532156219349, "learning_rate": 5.304785077047852e-06, "loss": 0.5896, "step": 26075 }, { "epoch": 0.7613208373478146, "grad_norm": 0.658026873976169, "learning_rate": 5.304136253041363e-06, "loss": 0.6396, "step": 26076 }, { "epoch": 0.761350033575662, "grad_norm": 0.6805188196101872, "learning_rate": 5.303487429034875e-06, "loss": 0.639, "step": 26077 }, { "epoch": 0.7613792298035094, "grad_norm": 0.5855081215242911, "learning_rate": 5.302838605028386e-06, "loss": 0.5066, "step": 26078 }, { "epoch": 0.7614084260313567, "grad_norm": 0.6642122802309174, "learning_rate": 5.302189781021898e-06, "loss": 0.6165, "step": 26079 }, { "epoch": 0.7614376222592041, "grad_norm": 0.6679086124134757, "learning_rate": 5.3015409570154094e-06, "loss": 0.6032, "step": 26080 }, { "epoch": 0.7614668184870514, "grad_norm": 0.6313098100653876, "learning_rate": 5.3008921330089215e-06, "loss": 0.5608, "step": 26081 }, { "epoch": 0.7614960147148988, "grad_norm": 0.613295730065236, "learning_rate": 5.300243309002433e-06, "loss": 0.5527, "step": 26082 }, { "epoch": 0.7615252109427462, "grad_norm": 0.6711197400979405, "learning_rate": 5.2995944849959455e-06, "loss": 0.6345, "step": 26083 }, { "epoch": 0.7615544071705935, "grad_norm": 0.6409459601233405, "learning_rate": 5.2989456609894575e-06, "loss": 0.5501, "step": 26084 }, { "epoch": 0.7615836033984409, "grad_norm": 0.6781484120428969, "learning_rate": 5.298296836982969e-06, "loss": 0.6586, "step": 26085 }, { "epoch": 0.7616127996262883, "grad_norm": 0.6717978644252824, "learning_rate": 5.297648012976481e-06, "loss": 0.6155, "step": 26086 }, { "epoch": 0.7616419958541356, "grad_norm": 0.6278964888043433, "learning_rate": 5.296999188969992e-06, "loss": 0.586, "step": 26087 }, { "epoch": 0.761671192081983, "grad_norm": 0.6673417012295803, "learning_rate": 5.296350364963504e-06, "loss": 0.6422, "step": 26088 }, { "epoch": 0.7617003883098303, "grad_norm": 0.6229692606814359, "learning_rate": 5.295701540957016e-06, "loss": 0.5741, "step": 26089 }, { "epoch": 0.7617295845376777, "grad_norm": 0.6784851669587912, "learning_rate": 5.295052716950527e-06, "loss": 0.6421, "step": 26090 }, { "epoch": 0.7617587807655251, "grad_norm": 0.6545169450310214, "learning_rate": 5.294403892944039e-06, "loss": 0.6049, "step": 26091 }, { "epoch": 0.7617879769933724, "grad_norm": 0.6255856692342638, "learning_rate": 5.293755068937551e-06, "loss": 0.5458, "step": 26092 }, { "epoch": 0.7618171732212198, "grad_norm": 0.6475793685495107, "learning_rate": 5.293106244931063e-06, "loss": 0.5486, "step": 26093 }, { "epoch": 0.7618463694490671, "grad_norm": 0.6243050184383239, "learning_rate": 5.292457420924575e-06, "loss": 0.5998, "step": 26094 }, { "epoch": 0.7618755656769145, "grad_norm": 0.5979010543142681, "learning_rate": 5.2918085969180865e-06, "loss": 0.5624, "step": 26095 }, { "epoch": 0.7619047619047619, "grad_norm": 0.5894612665774722, "learning_rate": 5.2911597729115985e-06, "loss": 0.4881, "step": 26096 }, { "epoch": 0.7619339581326092, "grad_norm": 0.6504711719865219, "learning_rate": 5.29051094890511e-06, "loss": 0.6237, "step": 26097 }, { "epoch": 0.7619631543604566, "grad_norm": 0.7018104764369434, "learning_rate": 5.289862124898622e-06, "loss": 0.6501, "step": 26098 }, { "epoch": 0.761992350588304, "grad_norm": 0.632222384325325, "learning_rate": 5.289213300892133e-06, "loss": 0.5545, "step": 26099 }, { "epoch": 0.7620215468161513, "grad_norm": 0.7023599449913243, "learning_rate": 5.288564476885645e-06, "loss": 0.6978, "step": 26100 }, { "epoch": 0.7620507430439987, "grad_norm": 0.5753817377373888, "learning_rate": 5.287915652879156e-06, "loss": 0.495, "step": 26101 }, { "epoch": 0.762079939271846, "grad_norm": 0.6242413145769082, "learning_rate": 5.287266828872669e-06, "loss": 0.5741, "step": 26102 }, { "epoch": 0.7621091354996934, "grad_norm": 0.6001153178261093, "learning_rate": 5.286618004866181e-06, "loss": 0.5349, "step": 26103 }, { "epoch": 0.7621383317275408, "grad_norm": 0.5999376486035121, "learning_rate": 5.285969180859692e-06, "loss": 0.5414, "step": 26104 }, { "epoch": 0.7621675279553881, "grad_norm": 0.6022871253563854, "learning_rate": 5.285320356853204e-06, "loss": 0.5106, "step": 26105 }, { "epoch": 0.7621967241832355, "grad_norm": 0.6442408816879618, "learning_rate": 5.284671532846715e-06, "loss": 0.6083, "step": 26106 }, { "epoch": 0.7622259204110828, "grad_norm": 0.6546686003159521, "learning_rate": 5.284022708840227e-06, "loss": 0.6155, "step": 26107 }, { "epoch": 0.7622551166389302, "grad_norm": 0.6700464787799002, "learning_rate": 5.283373884833739e-06, "loss": 0.6303, "step": 26108 }, { "epoch": 0.7622843128667776, "grad_norm": 0.6294095047625927, "learning_rate": 5.282725060827251e-06, "loss": 0.5611, "step": 26109 }, { "epoch": 0.7623135090946249, "grad_norm": 0.6251674919510074, "learning_rate": 5.282076236820763e-06, "loss": 0.6036, "step": 26110 }, { "epoch": 0.7623427053224724, "grad_norm": 0.6491915548671232, "learning_rate": 5.281427412814275e-06, "loss": 0.596, "step": 26111 }, { "epoch": 0.7623719015503198, "grad_norm": 0.6697514969204826, "learning_rate": 5.280778588807787e-06, "loss": 0.6339, "step": 26112 }, { "epoch": 0.7624010977781671, "grad_norm": 0.5777121063993966, "learning_rate": 5.280129764801299e-06, "loss": 0.5248, "step": 26113 }, { "epoch": 0.7624302940060145, "grad_norm": 0.6773902254132107, "learning_rate": 5.27948094079481e-06, "loss": 0.632, "step": 26114 }, { "epoch": 0.7624594902338618, "grad_norm": 0.5704175012718364, "learning_rate": 5.278832116788322e-06, "loss": 0.5083, "step": 26115 }, { "epoch": 0.7624886864617092, "grad_norm": 0.6626423121776468, "learning_rate": 5.278183292781833e-06, "loss": 0.5826, "step": 26116 }, { "epoch": 0.7625178826895566, "grad_norm": 0.6342878428502979, "learning_rate": 5.277534468775345e-06, "loss": 0.5658, "step": 26117 }, { "epoch": 0.7625470789174039, "grad_norm": 0.6467413064867749, "learning_rate": 5.276885644768856e-06, "loss": 0.6114, "step": 26118 }, { "epoch": 0.7625762751452513, "grad_norm": 0.6173994530538667, "learning_rate": 5.276236820762368e-06, "loss": 0.5497, "step": 26119 }, { "epoch": 0.7626054713730986, "grad_norm": 0.5990381251439258, "learning_rate": 5.2755879967558795e-06, "loss": 0.5016, "step": 26120 }, { "epoch": 0.762634667600946, "grad_norm": 0.6638567614689556, "learning_rate": 5.274939172749392e-06, "loss": 0.6405, "step": 26121 }, { "epoch": 0.7626638638287934, "grad_norm": 0.6524716010419951, "learning_rate": 5.274290348742904e-06, "loss": 0.5895, "step": 26122 }, { "epoch": 0.7626930600566407, "grad_norm": 0.6238171883424127, "learning_rate": 5.273641524736416e-06, "loss": 0.5785, "step": 26123 }, { "epoch": 0.7627222562844881, "grad_norm": 0.661298869627747, "learning_rate": 5.272992700729928e-06, "loss": 0.6354, "step": 26124 }, { "epoch": 0.7627514525123354, "grad_norm": 0.6589410757192983, "learning_rate": 5.272343876723439e-06, "loss": 0.6016, "step": 26125 }, { "epoch": 0.7627806487401828, "grad_norm": 0.6347760553490609, "learning_rate": 5.271695052716951e-06, "loss": 0.5793, "step": 26126 }, { "epoch": 0.7628098449680302, "grad_norm": 0.6384226575811371, "learning_rate": 5.271046228710463e-06, "loss": 0.5782, "step": 26127 }, { "epoch": 0.7628390411958775, "grad_norm": 0.6506968007331316, "learning_rate": 5.270397404703974e-06, "loss": 0.611, "step": 26128 }, { "epoch": 0.7628682374237249, "grad_norm": 0.6115150443359966, "learning_rate": 5.269748580697486e-06, "loss": 0.546, "step": 26129 }, { "epoch": 0.7628974336515723, "grad_norm": 0.6336349812834945, "learning_rate": 5.269099756690998e-06, "loss": 0.5947, "step": 26130 }, { "epoch": 0.7629266298794196, "grad_norm": 0.615096371215733, "learning_rate": 5.26845093268451e-06, "loss": 0.5514, "step": 26131 }, { "epoch": 0.762955826107267, "grad_norm": 0.6666530134944436, "learning_rate": 5.267802108678022e-06, "loss": 0.6396, "step": 26132 }, { "epoch": 0.7629850223351143, "grad_norm": 0.6327555553852956, "learning_rate": 5.267153284671533e-06, "loss": 0.615, "step": 26133 }, { "epoch": 0.7630142185629617, "grad_norm": 0.6663651899449216, "learning_rate": 5.266504460665045e-06, "loss": 0.5777, "step": 26134 }, { "epoch": 0.7630434147908091, "grad_norm": 0.6288178551971214, "learning_rate": 5.2658556366585565e-06, "loss": 0.6041, "step": 26135 }, { "epoch": 0.7630726110186564, "grad_norm": 0.655617867707808, "learning_rate": 5.2652068126520686e-06, "loss": 0.5707, "step": 26136 }, { "epoch": 0.7631018072465038, "grad_norm": 0.6341428552616495, "learning_rate": 5.26455798864558e-06, "loss": 0.5923, "step": 26137 }, { "epoch": 0.7631310034743511, "grad_norm": 0.6172559203255651, "learning_rate": 5.263909164639092e-06, "loss": 0.5684, "step": 26138 }, { "epoch": 0.7631601997021985, "grad_norm": 0.6261779665625177, "learning_rate": 5.263260340632605e-06, "loss": 0.5344, "step": 26139 }, { "epoch": 0.7631893959300459, "grad_norm": 0.6979067636198331, "learning_rate": 5.262611516626116e-06, "loss": 0.7132, "step": 26140 }, { "epoch": 0.7632185921578932, "grad_norm": 0.6481047979896096, "learning_rate": 5.261962692619628e-06, "loss": 0.5852, "step": 26141 }, { "epoch": 0.7632477883857406, "grad_norm": 0.5957389251529668, "learning_rate": 5.261313868613139e-06, "loss": 0.5192, "step": 26142 }, { "epoch": 0.763276984613588, "grad_norm": 0.5710436859608716, "learning_rate": 5.260665044606651e-06, "loss": 0.5195, "step": 26143 }, { "epoch": 0.7633061808414353, "grad_norm": 0.6399919192673167, "learning_rate": 5.260016220600162e-06, "loss": 0.5915, "step": 26144 }, { "epoch": 0.7633353770692827, "grad_norm": 0.6643338388140845, "learning_rate": 5.259367396593674e-06, "loss": 0.6351, "step": 26145 }, { "epoch": 0.76336457329713, "grad_norm": 0.6162818413600424, "learning_rate": 5.258718572587186e-06, "loss": 0.5764, "step": 26146 }, { "epoch": 0.7633937695249774, "grad_norm": 0.6158491112492838, "learning_rate": 5.2580697485806975e-06, "loss": 0.5535, "step": 26147 }, { "epoch": 0.7634229657528248, "grad_norm": 0.651785863336239, "learning_rate": 5.2574209245742095e-06, "loss": 0.6542, "step": 26148 }, { "epoch": 0.7634521619806721, "grad_norm": 0.6602245448835645, "learning_rate": 5.2567721005677215e-06, "loss": 0.5702, "step": 26149 }, { "epoch": 0.7634813582085195, "grad_norm": 0.6624922004053604, "learning_rate": 5.2561232765612336e-06, "loss": 0.6533, "step": 26150 }, { "epoch": 0.7635105544363668, "grad_norm": 0.6437166544862571, "learning_rate": 5.255474452554746e-06, "loss": 0.5683, "step": 26151 }, { "epoch": 0.7635397506642142, "grad_norm": 0.6223078896202631, "learning_rate": 5.254825628548257e-06, "loss": 0.5117, "step": 26152 }, { "epoch": 0.7635689468920616, "grad_norm": 0.6404957780904872, "learning_rate": 5.254176804541769e-06, "loss": 0.6377, "step": 26153 }, { "epoch": 0.7635981431199089, "grad_norm": 0.6631964967205672, "learning_rate": 5.25352798053528e-06, "loss": 0.613, "step": 26154 }, { "epoch": 0.7636273393477563, "grad_norm": 0.6355414777714249, "learning_rate": 5.252879156528792e-06, "loss": 0.555, "step": 26155 }, { "epoch": 0.7636565355756036, "grad_norm": 0.6586547130866646, "learning_rate": 5.252230332522303e-06, "loss": 0.6002, "step": 26156 }, { "epoch": 0.763685731803451, "grad_norm": 0.6154753936888416, "learning_rate": 5.251581508515815e-06, "loss": 0.5427, "step": 26157 }, { "epoch": 0.7637149280312984, "grad_norm": 0.658831123669801, "learning_rate": 5.250932684509328e-06, "loss": 0.5758, "step": 26158 }, { "epoch": 0.7637441242591457, "grad_norm": 0.6399108001019234, "learning_rate": 5.250283860502839e-06, "loss": 0.5435, "step": 26159 }, { "epoch": 0.7637733204869931, "grad_norm": 0.6531537196886414, "learning_rate": 5.249635036496351e-06, "loss": 0.5875, "step": 26160 }, { "epoch": 0.7638025167148405, "grad_norm": 0.6105667695522431, "learning_rate": 5.2489862124898625e-06, "loss": 0.5511, "step": 26161 }, { "epoch": 0.7638317129426878, "grad_norm": 0.6960323730422024, "learning_rate": 5.2483373884833745e-06, "loss": 0.5626, "step": 26162 }, { "epoch": 0.7638609091705352, "grad_norm": 0.6504103193703047, "learning_rate": 5.247688564476886e-06, "loss": 0.591, "step": 26163 }, { "epoch": 0.7638901053983825, "grad_norm": 0.6441251934951068, "learning_rate": 5.247039740470398e-06, "loss": 0.6022, "step": 26164 }, { "epoch": 0.7639193016262299, "grad_norm": 0.6381345299807264, "learning_rate": 5.246390916463909e-06, "loss": 0.5349, "step": 26165 }, { "epoch": 0.7639484978540773, "grad_norm": 0.6614145578557572, "learning_rate": 5.245742092457421e-06, "loss": 0.6369, "step": 26166 }, { "epoch": 0.7639776940819246, "grad_norm": 0.668898234011033, "learning_rate": 5.245093268450933e-06, "loss": 0.6401, "step": 26167 }, { "epoch": 0.764006890309772, "grad_norm": 0.6249647835727097, "learning_rate": 5.244444444444445e-06, "loss": 0.5623, "step": 26168 }, { "epoch": 0.7640360865376193, "grad_norm": 0.6442918758734946, "learning_rate": 5.243795620437957e-06, "loss": 0.5906, "step": 26169 }, { "epoch": 0.7640652827654667, "grad_norm": 0.7124721635113848, "learning_rate": 5.243146796431469e-06, "loss": 0.6703, "step": 26170 }, { "epoch": 0.7640944789933141, "grad_norm": 0.6519914305820812, "learning_rate": 5.24249797242498e-06, "loss": 0.6144, "step": 26171 }, { "epoch": 0.7641236752211614, "grad_norm": 0.6370947642908567, "learning_rate": 5.241849148418492e-06, "loss": 0.5644, "step": 26172 }, { "epoch": 0.7641528714490088, "grad_norm": 0.6833653963603472, "learning_rate": 5.241200324412003e-06, "loss": 0.684, "step": 26173 }, { "epoch": 0.7641820676768561, "grad_norm": 0.6335302570279436, "learning_rate": 5.2405515004055154e-06, "loss": 0.5763, "step": 26174 }, { "epoch": 0.7642112639047035, "grad_norm": 0.6670416774941379, "learning_rate": 5.239902676399027e-06, "loss": 0.6593, "step": 26175 }, { "epoch": 0.7642404601325509, "grad_norm": 0.6114922040651589, "learning_rate": 5.239253852392539e-06, "loss": 0.53, "step": 26176 }, { "epoch": 0.7642696563603982, "grad_norm": 0.5821761016719926, "learning_rate": 5.2386050283860515e-06, "loss": 0.5136, "step": 26177 }, { "epoch": 0.7642988525882456, "grad_norm": 0.6582945346749909, "learning_rate": 5.237956204379563e-06, "loss": 0.6265, "step": 26178 }, { "epoch": 0.764328048816093, "grad_norm": 0.636206894079016, "learning_rate": 5.237307380373075e-06, "loss": 0.5438, "step": 26179 }, { "epoch": 0.7643572450439403, "grad_norm": 0.6248568461748657, "learning_rate": 5.236658556366586e-06, "loss": 0.5705, "step": 26180 }, { "epoch": 0.7643864412717877, "grad_norm": 0.6334355879304104, "learning_rate": 5.236009732360098e-06, "loss": 0.5849, "step": 26181 }, { "epoch": 0.764415637499635, "grad_norm": 0.6933332240524546, "learning_rate": 5.235360908353609e-06, "loss": 0.6769, "step": 26182 }, { "epoch": 0.7644448337274824, "grad_norm": 0.6477735474395612, "learning_rate": 5.234712084347121e-06, "loss": 0.5961, "step": 26183 }, { "epoch": 0.7644740299553298, "grad_norm": 0.6243723176788992, "learning_rate": 5.234063260340632e-06, "loss": 0.5308, "step": 26184 }, { "epoch": 0.7645032261831771, "grad_norm": 0.6504301081634042, "learning_rate": 5.233414436334144e-06, "loss": 0.6259, "step": 26185 }, { "epoch": 0.7645324224110245, "grad_norm": 0.7508042425655317, "learning_rate": 5.232765612327656e-06, "loss": 0.7125, "step": 26186 }, { "epoch": 0.7645616186388718, "grad_norm": 0.6294114544599644, "learning_rate": 5.232116788321168e-06, "loss": 0.6222, "step": 26187 }, { "epoch": 0.7645908148667192, "grad_norm": 0.7124360556346528, "learning_rate": 5.23146796431468e-06, "loss": 0.7172, "step": 26188 }, { "epoch": 0.7646200110945666, "grad_norm": 0.6201389221970991, "learning_rate": 5.2308191403081924e-06, "loss": 0.5621, "step": 26189 }, { "epoch": 0.7646492073224139, "grad_norm": 0.6946057445458367, "learning_rate": 5.230170316301704e-06, "loss": 0.6312, "step": 26190 }, { "epoch": 0.7646784035502613, "grad_norm": 0.668583660350995, "learning_rate": 5.229521492295216e-06, "loss": 0.6567, "step": 26191 }, { "epoch": 0.7647075997781086, "grad_norm": 0.6355460676832287, "learning_rate": 5.228872668288727e-06, "loss": 0.599, "step": 26192 }, { "epoch": 0.764736796005956, "grad_norm": 0.6250125427692381, "learning_rate": 5.228223844282239e-06, "loss": 0.5842, "step": 26193 }, { "epoch": 0.7647659922338034, "grad_norm": 0.6800217106753005, "learning_rate": 5.22757502027575e-06, "loss": 0.6563, "step": 26194 }, { "epoch": 0.7647951884616507, "grad_norm": 0.6564156443661737, "learning_rate": 5.226926196269262e-06, "loss": 0.6409, "step": 26195 }, { "epoch": 0.7648243846894981, "grad_norm": 0.633391985210958, "learning_rate": 5.226277372262775e-06, "loss": 0.5544, "step": 26196 }, { "epoch": 0.7648535809173455, "grad_norm": 0.7056291497384464, "learning_rate": 5.225628548256286e-06, "loss": 0.6701, "step": 26197 }, { "epoch": 0.7648827771451928, "grad_norm": 0.6855768280775185, "learning_rate": 5.224979724249798e-06, "loss": 0.5884, "step": 26198 }, { "epoch": 0.7649119733730402, "grad_norm": 0.6527777736954417, "learning_rate": 5.224330900243309e-06, "loss": 0.6231, "step": 26199 }, { "epoch": 0.7649411696008875, "grad_norm": 0.6480741844792722, "learning_rate": 5.223682076236821e-06, "loss": 0.5805, "step": 26200 }, { "epoch": 0.7649703658287349, "grad_norm": 0.6210986838334848, "learning_rate": 5.2230332522303325e-06, "loss": 0.5363, "step": 26201 }, { "epoch": 0.7649995620565823, "grad_norm": 0.6724201421022662, "learning_rate": 5.2223844282238446e-06, "loss": 0.5974, "step": 26202 }, { "epoch": 0.7650287582844296, "grad_norm": 0.6701029920115863, "learning_rate": 5.221735604217356e-06, "loss": 0.6498, "step": 26203 }, { "epoch": 0.765057954512277, "grad_norm": 0.6656249064988722, "learning_rate": 5.221086780210868e-06, "loss": 0.6202, "step": 26204 }, { "epoch": 0.7650871507401243, "grad_norm": 0.6986289064707084, "learning_rate": 5.220437956204381e-06, "loss": 0.6863, "step": 26205 }, { "epoch": 0.7651163469679717, "grad_norm": 0.6599904208996044, "learning_rate": 5.219789132197892e-06, "loss": 0.5887, "step": 26206 }, { "epoch": 0.7651455431958191, "grad_norm": 0.6045162176523, "learning_rate": 5.219140308191404e-06, "loss": 0.5387, "step": 26207 }, { "epoch": 0.7651747394236664, "grad_norm": 0.6965780489075896, "learning_rate": 5.218491484184916e-06, "loss": 0.7027, "step": 26208 }, { "epoch": 0.7652039356515138, "grad_norm": 0.7147360859037721, "learning_rate": 5.217842660178427e-06, "loss": 0.6246, "step": 26209 }, { "epoch": 0.7652331318793612, "grad_norm": 0.694323718507783, "learning_rate": 5.217193836171939e-06, "loss": 0.6646, "step": 26210 }, { "epoch": 0.7652623281072085, "grad_norm": 0.6140078627089299, "learning_rate": 5.21654501216545e-06, "loss": 0.5535, "step": 26211 }, { "epoch": 0.7652915243350559, "grad_norm": 0.6613036365251587, "learning_rate": 5.215896188158962e-06, "loss": 0.6356, "step": 26212 }, { "epoch": 0.7653207205629032, "grad_norm": 0.7356106900838801, "learning_rate": 5.2152473641524735e-06, "loss": 0.7183, "step": 26213 }, { "epoch": 0.7653499167907506, "grad_norm": 0.5914141886702455, "learning_rate": 5.2145985401459855e-06, "loss": 0.4934, "step": 26214 }, { "epoch": 0.765379113018598, "grad_norm": 0.6549203778858605, "learning_rate": 5.213949716139498e-06, "loss": 0.5977, "step": 26215 }, { "epoch": 0.7654083092464453, "grad_norm": 0.6199356442824052, "learning_rate": 5.2133008921330096e-06, "loss": 0.5123, "step": 26216 }, { "epoch": 0.7654375054742927, "grad_norm": 0.6688928482902319, "learning_rate": 5.212652068126522e-06, "loss": 0.664, "step": 26217 }, { "epoch": 0.76546670170214, "grad_norm": 0.6470760137419407, "learning_rate": 5.212003244120033e-06, "loss": 0.6104, "step": 26218 }, { "epoch": 0.7654958979299874, "grad_norm": 0.6205651210584207, "learning_rate": 5.211354420113545e-06, "loss": 0.5858, "step": 26219 }, { "epoch": 0.7655250941578348, "grad_norm": 0.6280138721592281, "learning_rate": 5.210705596107056e-06, "loss": 0.5937, "step": 26220 }, { "epoch": 0.7655542903856821, "grad_norm": 0.5758587147088513, "learning_rate": 5.210056772100568e-06, "loss": 0.4932, "step": 26221 }, { "epoch": 0.7655834866135295, "grad_norm": 0.6654727588556629, "learning_rate": 5.209407948094079e-06, "loss": 0.6521, "step": 26222 }, { "epoch": 0.7656126828413768, "grad_norm": 0.6426749115182488, "learning_rate": 5.208759124087591e-06, "loss": 0.6381, "step": 26223 }, { "epoch": 0.7656418790692242, "grad_norm": 0.675333167581397, "learning_rate": 5.208110300081104e-06, "loss": 0.6374, "step": 26224 }, { "epoch": 0.7656710752970716, "grad_norm": 0.5895181682037413, "learning_rate": 5.207461476074615e-06, "loss": 0.5042, "step": 26225 }, { "epoch": 0.7657002715249189, "grad_norm": 0.6464978078234062, "learning_rate": 5.206812652068127e-06, "loss": 0.6048, "step": 26226 }, { "epoch": 0.7657294677527663, "grad_norm": 0.6692247261180725, "learning_rate": 5.2061638280616385e-06, "loss": 0.6588, "step": 26227 }, { "epoch": 0.7657586639806137, "grad_norm": 0.689653565962633, "learning_rate": 5.2055150040551505e-06, "loss": 0.7142, "step": 26228 }, { "epoch": 0.765787860208461, "grad_norm": 0.6137736530863814, "learning_rate": 5.2048661800486625e-06, "loss": 0.5493, "step": 26229 }, { "epoch": 0.7658170564363084, "grad_norm": 0.6599394450722653, "learning_rate": 5.204217356042174e-06, "loss": 0.6569, "step": 26230 }, { "epoch": 0.7658462526641558, "grad_norm": 0.6685956263661764, "learning_rate": 5.203568532035686e-06, "loss": 0.6468, "step": 26231 }, { "epoch": 0.7658754488920032, "grad_norm": 0.6382491707490016, "learning_rate": 5.202919708029197e-06, "loss": 0.5731, "step": 26232 }, { "epoch": 0.7659046451198506, "grad_norm": 0.6679244814564912, "learning_rate": 5.202270884022709e-06, "loss": 0.661, "step": 26233 }, { "epoch": 0.7659338413476979, "grad_norm": 0.6309040348956783, "learning_rate": 5.201622060016222e-06, "loss": 0.5999, "step": 26234 }, { "epoch": 0.7659630375755453, "grad_norm": 0.6638875834913927, "learning_rate": 5.200973236009733e-06, "loss": 0.6459, "step": 26235 }, { "epoch": 0.7659922338033927, "grad_norm": 0.619136683320765, "learning_rate": 5.200324412003245e-06, "loss": 0.5712, "step": 26236 }, { "epoch": 0.76602143003124, "grad_norm": 0.6938127306157753, "learning_rate": 5.199675587996756e-06, "loss": 0.6328, "step": 26237 }, { "epoch": 0.7660506262590874, "grad_norm": 0.6191447822689033, "learning_rate": 5.199026763990268e-06, "loss": 0.5837, "step": 26238 }, { "epoch": 0.7660798224869347, "grad_norm": 0.6546783439207307, "learning_rate": 5.198377939983779e-06, "loss": 0.633, "step": 26239 }, { "epoch": 0.7661090187147821, "grad_norm": 0.5971269085015134, "learning_rate": 5.1977291159772914e-06, "loss": 0.5228, "step": 26240 }, { "epoch": 0.7661382149426295, "grad_norm": 0.6492787745239175, "learning_rate": 5.197080291970803e-06, "loss": 0.6005, "step": 26241 }, { "epoch": 0.7661674111704768, "grad_norm": 0.6440677532882807, "learning_rate": 5.196431467964315e-06, "loss": 0.5854, "step": 26242 }, { "epoch": 0.7661966073983242, "grad_norm": 0.6298851882296628, "learning_rate": 5.1957826439578275e-06, "loss": 0.6233, "step": 26243 }, { "epoch": 0.7662258036261715, "grad_norm": 0.6106834495655349, "learning_rate": 5.195133819951339e-06, "loss": 0.5326, "step": 26244 }, { "epoch": 0.7662549998540189, "grad_norm": 0.6064226008128888, "learning_rate": 5.194484995944851e-06, "loss": 0.5339, "step": 26245 }, { "epoch": 0.7662841960818663, "grad_norm": 0.6119372710339412, "learning_rate": 5.193836171938362e-06, "loss": 0.5454, "step": 26246 }, { "epoch": 0.7663133923097136, "grad_norm": 0.6726231306002952, "learning_rate": 5.193187347931874e-06, "loss": 0.6035, "step": 26247 }, { "epoch": 0.766342588537561, "grad_norm": 0.6537974343219716, "learning_rate": 5.192538523925386e-06, "loss": 0.6217, "step": 26248 }, { "epoch": 0.7663717847654083, "grad_norm": 0.6702327147512526, "learning_rate": 5.191889699918897e-06, "loss": 0.6434, "step": 26249 }, { "epoch": 0.7664009809932557, "grad_norm": 0.6008613821224876, "learning_rate": 5.191240875912409e-06, "loss": 0.4943, "step": 26250 }, { "epoch": 0.7664301772211031, "grad_norm": 0.6522346179267228, "learning_rate": 5.19059205190592e-06, "loss": 0.6243, "step": 26251 }, { "epoch": 0.7664593734489504, "grad_norm": 0.6720883316262668, "learning_rate": 5.189943227899432e-06, "loss": 0.6188, "step": 26252 }, { "epoch": 0.7664885696767978, "grad_norm": 0.6464239379782755, "learning_rate": 5.189294403892945e-06, "loss": 0.5844, "step": 26253 }, { "epoch": 0.7665177659046452, "grad_norm": 0.6853916356823586, "learning_rate": 5.1886455798864564e-06, "loss": 0.6416, "step": 26254 }, { "epoch": 0.7665469621324925, "grad_norm": 0.618216820943731, "learning_rate": 5.1879967558799685e-06, "loss": 0.5718, "step": 26255 }, { "epoch": 0.7665761583603399, "grad_norm": 0.6316141068163106, "learning_rate": 5.18734793187348e-06, "loss": 0.5681, "step": 26256 }, { "epoch": 0.7666053545881872, "grad_norm": 0.644590708096486, "learning_rate": 5.186699107866992e-06, "loss": 0.6133, "step": 26257 }, { "epoch": 0.7666345508160346, "grad_norm": 0.659775427709826, "learning_rate": 5.186050283860503e-06, "loss": 0.6216, "step": 26258 }, { "epoch": 0.766663747043882, "grad_norm": 0.6562011589126787, "learning_rate": 5.185401459854015e-06, "loss": 0.6254, "step": 26259 }, { "epoch": 0.7666929432717293, "grad_norm": 0.655448063897602, "learning_rate": 5.184752635847526e-06, "loss": 0.6228, "step": 26260 }, { "epoch": 0.7667221394995767, "grad_norm": 0.6573063722110051, "learning_rate": 5.184103811841038e-06, "loss": 0.6318, "step": 26261 }, { "epoch": 0.766751335727424, "grad_norm": 0.6535071397383877, "learning_rate": 5.183454987834551e-06, "loss": 0.5921, "step": 26262 }, { "epoch": 0.7667805319552714, "grad_norm": 0.6863931205197986, "learning_rate": 5.182806163828062e-06, "loss": 0.6379, "step": 26263 }, { "epoch": 0.7668097281831188, "grad_norm": 0.8549023210003722, "learning_rate": 5.182157339821574e-06, "loss": 0.6593, "step": 26264 }, { "epoch": 0.7668389244109661, "grad_norm": 0.6462616997450625, "learning_rate": 5.181508515815085e-06, "loss": 0.6155, "step": 26265 }, { "epoch": 0.7668681206388135, "grad_norm": 0.66520840660205, "learning_rate": 5.180859691808597e-06, "loss": 0.6135, "step": 26266 }, { "epoch": 0.7668973168666608, "grad_norm": 0.634005796212918, "learning_rate": 5.180210867802109e-06, "loss": 0.6251, "step": 26267 }, { "epoch": 0.7669265130945082, "grad_norm": 0.6080936562294038, "learning_rate": 5.1795620437956206e-06, "loss": 0.5501, "step": 26268 }, { "epoch": 0.7669557093223556, "grad_norm": 0.6128712383634778, "learning_rate": 5.178913219789133e-06, "loss": 0.5792, "step": 26269 }, { "epoch": 0.7669849055502029, "grad_norm": 0.6473154260051183, "learning_rate": 5.178264395782644e-06, "loss": 0.6048, "step": 26270 }, { "epoch": 0.7670141017780503, "grad_norm": 0.6043854465964029, "learning_rate": 5.177615571776156e-06, "loss": 0.5376, "step": 26271 }, { "epoch": 0.7670432980058977, "grad_norm": 0.6091917475191025, "learning_rate": 5.176966747769669e-06, "loss": 0.5335, "step": 26272 }, { "epoch": 0.767072494233745, "grad_norm": 0.6532708279187365, "learning_rate": 5.17631792376318e-06, "loss": 0.6059, "step": 26273 }, { "epoch": 0.7671016904615924, "grad_norm": 0.5933011973539601, "learning_rate": 5.175669099756692e-06, "loss": 0.5076, "step": 26274 }, { "epoch": 0.7671308866894397, "grad_norm": 0.659437081411666, "learning_rate": 5.175020275750203e-06, "loss": 0.6207, "step": 26275 }, { "epoch": 0.7671600829172871, "grad_norm": 0.6551818469420114, "learning_rate": 5.174371451743715e-06, "loss": 0.5693, "step": 26276 }, { "epoch": 0.7671892791451345, "grad_norm": 0.7087684362948414, "learning_rate": 5.173722627737226e-06, "loss": 0.7557, "step": 26277 }, { "epoch": 0.7672184753729818, "grad_norm": 0.6285455237730471, "learning_rate": 5.173073803730738e-06, "loss": 0.5923, "step": 26278 }, { "epoch": 0.7672476716008292, "grad_norm": 0.6007806013955715, "learning_rate": 5.1724249797242495e-06, "loss": 0.5622, "step": 26279 }, { "epoch": 0.7672768678286765, "grad_norm": 0.6063531177603981, "learning_rate": 5.1717761557177615e-06, "loss": 0.5428, "step": 26280 }, { "epoch": 0.7673060640565239, "grad_norm": 0.6355626215013095, "learning_rate": 5.171127331711274e-06, "loss": 0.5906, "step": 26281 }, { "epoch": 0.7673352602843713, "grad_norm": 0.6428024655123546, "learning_rate": 5.1704785077047856e-06, "loss": 0.6108, "step": 26282 }, { "epoch": 0.7673644565122186, "grad_norm": 0.6179619756704495, "learning_rate": 5.169829683698298e-06, "loss": 0.5555, "step": 26283 }, { "epoch": 0.767393652740066, "grad_norm": 0.6215663071815075, "learning_rate": 5.169180859691809e-06, "loss": 0.543, "step": 26284 }, { "epoch": 0.7674228489679134, "grad_norm": 0.6384462868593205, "learning_rate": 5.168532035685321e-06, "loss": 0.5858, "step": 26285 }, { "epoch": 0.7674520451957607, "grad_norm": 0.6460575205773825, "learning_rate": 5.167883211678833e-06, "loss": 0.5769, "step": 26286 }, { "epoch": 0.7674812414236081, "grad_norm": 0.6799358287624134, "learning_rate": 5.167234387672344e-06, "loss": 0.6256, "step": 26287 }, { "epoch": 0.7675104376514554, "grad_norm": 0.6881610984076247, "learning_rate": 5.166585563665856e-06, "loss": 0.6559, "step": 26288 }, { "epoch": 0.7675396338793028, "grad_norm": 0.637406086999001, "learning_rate": 5.165936739659367e-06, "loss": 0.5918, "step": 26289 }, { "epoch": 0.7675688301071502, "grad_norm": 0.6178631365294349, "learning_rate": 5.16528791565288e-06, "loss": 0.5507, "step": 26290 }, { "epoch": 0.7675980263349975, "grad_norm": 0.6149576943146907, "learning_rate": 5.164639091646392e-06, "loss": 0.5484, "step": 26291 }, { "epoch": 0.7676272225628449, "grad_norm": 0.7241322954424382, "learning_rate": 5.163990267639903e-06, "loss": 0.6946, "step": 26292 }, { "epoch": 0.7676564187906922, "grad_norm": 0.6483493505537882, "learning_rate": 5.163341443633415e-06, "loss": 0.5961, "step": 26293 }, { "epoch": 0.7676856150185396, "grad_norm": 0.6739761648094085, "learning_rate": 5.1626926196269265e-06, "loss": 0.6746, "step": 26294 }, { "epoch": 0.767714811246387, "grad_norm": 0.6460360284993927, "learning_rate": 5.1620437956204385e-06, "loss": 0.6504, "step": 26295 }, { "epoch": 0.7677440074742343, "grad_norm": 0.6615791661749453, "learning_rate": 5.16139497161395e-06, "loss": 0.6313, "step": 26296 }, { "epoch": 0.7677732037020817, "grad_norm": 0.6105738357014046, "learning_rate": 5.160746147607462e-06, "loss": 0.5491, "step": 26297 }, { "epoch": 0.767802399929929, "grad_norm": 0.6427679413785998, "learning_rate": 5.160097323600973e-06, "loss": 0.6051, "step": 26298 }, { "epoch": 0.7678315961577764, "grad_norm": 0.6922781267346021, "learning_rate": 5.159448499594485e-06, "loss": 0.6675, "step": 26299 }, { "epoch": 0.7678607923856238, "grad_norm": 0.6316479871595418, "learning_rate": 5.158799675587998e-06, "loss": 0.5847, "step": 26300 }, { "epoch": 0.7678899886134711, "grad_norm": 0.7319121334341118, "learning_rate": 5.158150851581509e-06, "loss": 0.6969, "step": 26301 }, { "epoch": 0.7679191848413185, "grad_norm": 0.652503044850456, "learning_rate": 5.157502027575021e-06, "loss": 0.5828, "step": 26302 }, { "epoch": 0.7679483810691659, "grad_norm": 0.674103219019044, "learning_rate": 5.156853203568532e-06, "loss": 0.6737, "step": 26303 }, { "epoch": 0.7679775772970132, "grad_norm": 0.6834141640537518, "learning_rate": 5.156204379562044e-06, "loss": 0.6971, "step": 26304 }, { "epoch": 0.7680067735248606, "grad_norm": 0.6205365800116656, "learning_rate": 5.155555555555556e-06, "loss": 0.5763, "step": 26305 }, { "epoch": 0.7680359697527079, "grad_norm": 0.6665945668498451, "learning_rate": 5.1549067315490674e-06, "loss": 0.6729, "step": 26306 }, { "epoch": 0.7680651659805553, "grad_norm": 0.6026835673316764, "learning_rate": 5.1542579075425795e-06, "loss": 0.5831, "step": 26307 }, { "epoch": 0.7680943622084027, "grad_norm": 0.7108285962041401, "learning_rate": 5.153609083536091e-06, "loss": 0.7366, "step": 26308 }, { "epoch": 0.76812355843625, "grad_norm": 0.640533628599391, "learning_rate": 5.1529602595296035e-06, "loss": 0.5665, "step": 26309 }, { "epoch": 0.7681527546640974, "grad_norm": 0.6476307550279555, "learning_rate": 5.1523114355231156e-06, "loss": 0.6048, "step": 26310 }, { "epoch": 0.7681819508919447, "grad_norm": 0.6224738564209961, "learning_rate": 5.151662611516627e-06, "loss": 0.5412, "step": 26311 }, { "epoch": 0.7682111471197921, "grad_norm": 0.6277759729233582, "learning_rate": 5.151013787510139e-06, "loss": 0.5742, "step": 26312 }, { "epoch": 0.7682403433476395, "grad_norm": 0.6831464482108491, "learning_rate": 5.15036496350365e-06, "loss": 0.6807, "step": 26313 }, { "epoch": 0.7682695395754868, "grad_norm": 0.6367321823839465, "learning_rate": 5.149716139497162e-06, "loss": 0.5585, "step": 26314 }, { "epoch": 0.7682987358033342, "grad_norm": 0.6434779270428731, "learning_rate": 5.149067315490673e-06, "loss": 0.5674, "step": 26315 }, { "epoch": 0.7683279320311815, "grad_norm": 0.6001064237065812, "learning_rate": 5.148418491484185e-06, "loss": 0.53, "step": 26316 }, { "epoch": 0.7683571282590289, "grad_norm": 0.6327050363581399, "learning_rate": 5.147769667477696e-06, "loss": 0.602, "step": 26317 }, { "epoch": 0.7683863244868763, "grad_norm": 0.6570524460995373, "learning_rate": 5.147120843471208e-06, "loss": 0.6354, "step": 26318 }, { "epoch": 0.7684155207147236, "grad_norm": 0.6035764841423964, "learning_rate": 5.146472019464721e-06, "loss": 0.5108, "step": 26319 }, { "epoch": 0.768444716942571, "grad_norm": 0.6332900525117197, "learning_rate": 5.1458231954582324e-06, "loss": 0.6004, "step": 26320 }, { "epoch": 0.7684739131704184, "grad_norm": 0.5946212354122947, "learning_rate": 5.1451743714517445e-06, "loss": 0.517, "step": 26321 }, { "epoch": 0.7685031093982657, "grad_norm": 0.6168531110993233, "learning_rate": 5.144525547445256e-06, "loss": 0.5479, "step": 26322 }, { "epoch": 0.7685323056261131, "grad_norm": 0.6838030307084988, "learning_rate": 5.143876723438768e-06, "loss": 0.6138, "step": 26323 }, { "epoch": 0.7685615018539604, "grad_norm": 0.6603887218594324, "learning_rate": 5.14322789943228e-06, "loss": 0.5714, "step": 26324 }, { "epoch": 0.7685906980818078, "grad_norm": 0.6346412225560532, "learning_rate": 5.142579075425791e-06, "loss": 0.6014, "step": 26325 }, { "epoch": 0.7686198943096552, "grad_norm": 0.6478872922235231, "learning_rate": 5.141930251419303e-06, "loss": 0.6271, "step": 26326 }, { "epoch": 0.7686490905375025, "grad_norm": 0.6276534430331485, "learning_rate": 5.141281427412814e-06, "loss": 0.5646, "step": 26327 }, { "epoch": 0.7686782867653499, "grad_norm": 0.5982471163720021, "learning_rate": 5.140632603406327e-06, "loss": 0.5131, "step": 26328 }, { "epoch": 0.7687074829931972, "grad_norm": 0.6408189970847363, "learning_rate": 5.139983779399839e-06, "loss": 0.6116, "step": 26329 }, { "epoch": 0.7687366792210446, "grad_norm": 0.7116539505016518, "learning_rate": 5.13933495539335e-06, "loss": 0.6195, "step": 26330 }, { "epoch": 0.768765875448892, "grad_norm": 0.6289577087646865, "learning_rate": 5.138686131386862e-06, "loss": 0.5709, "step": 26331 }, { "epoch": 0.7687950716767393, "grad_norm": 0.6737691390014517, "learning_rate": 5.138037307380373e-06, "loss": 0.6353, "step": 26332 }, { "epoch": 0.7688242679045867, "grad_norm": 0.6509027185647557, "learning_rate": 5.137388483373885e-06, "loss": 0.5963, "step": 26333 }, { "epoch": 0.768853464132434, "grad_norm": 0.6277298631425007, "learning_rate": 5.136739659367397e-06, "loss": 0.5533, "step": 26334 }, { "epoch": 0.7688826603602814, "grad_norm": 0.6347903673689025, "learning_rate": 5.136090835360909e-06, "loss": 0.5741, "step": 26335 }, { "epoch": 0.7689118565881288, "grad_norm": 0.6334818641790406, "learning_rate": 5.13544201135442e-06, "loss": 0.5761, "step": 26336 }, { "epoch": 0.7689410528159761, "grad_norm": 0.6310141767579218, "learning_rate": 5.134793187347932e-06, "loss": 0.5685, "step": 26337 }, { "epoch": 0.7689702490438235, "grad_norm": 0.647973858812584, "learning_rate": 5.134144363341445e-06, "loss": 0.5972, "step": 26338 }, { "epoch": 0.7689994452716709, "grad_norm": 0.6048495544080409, "learning_rate": 5.133495539334956e-06, "loss": 0.5416, "step": 26339 }, { "epoch": 0.7690286414995182, "grad_norm": 0.688398002205055, "learning_rate": 5.132846715328468e-06, "loss": 0.7042, "step": 26340 }, { "epoch": 0.7690578377273656, "grad_norm": 0.6291936846760517, "learning_rate": 5.132197891321979e-06, "loss": 0.578, "step": 26341 }, { "epoch": 0.7690870339552129, "grad_norm": 0.6549516552264992, "learning_rate": 5.131549067315491e-06, "loss": 0.6411, "step": 26342 }, { "epoch": 0.7691162301830603, "grad_norm": 0.6267610227261943, "learning_rate": 5.130900243309003e-06, "loss": 0.5719, "step": 26343 }, { "epoch": 0.7691454264109077, "grad_norm": 0.6268397188907692, "learning_rate": 5.130251419302514e-06, "loss": 0.5717, "step": 26344 }, { "epoch": 0.769174622638755, "grad_norm": 0.6740997234562934, "learning_rate": 5.129602595296026e-06, "loss": 0.6078, "step": 26345 }, { "epoch": 0.7692038188666024, "grad_norm": 0.6589084609132834, "learning_rate": 5.1289537712895375e-06, "loss": 0.6158, "step": 26346 }, { "epoch": 0.7692330150944497, "grad_norm": 0.6481035809294895, "learning_rate": 5.12830494728305e-06, "loss": 0.6173, "step": 26347 }, { "epoch": 0.7692622113222971, "grad_norm": 0.6213766983356076, "learning_rate": 5.127656123276562e-06, "loss": 0.5474, "step": 26348 }, { "epoch": 0.7692914075501445, "grad_norm": 0.6608763313301799, "learning_rate": 5.127007299270074e-06, "loss": 0.6516, "step": 26349 }, { "epoch": 0.7693206037779918, "grad_norm": 0.6657012803371058, "learning_rate": 5.126358475263586e-06, "loss": 0.6432, "step": 26350 }, { "epoch": 0.7693498000058392, "grad_norm": 0.634941099058523, "learning_rate": 5.125709651257097e-06, "loss": 0.6349, "step": 26351 }, { "epoch": 0.7693789962336867, "grad_norm": 0.6536798958478615, "learning_rate": 5.125060827250609e-06, "loss": 0.6, "step": 26352 }, { "epoch": 0.769408192461534, "grad_norm": 0.6502223539674146, "learning_rate": 5.12441200324412e-06, "loss": 0.6423, "step": 26353 }, { "epoch": 0.7694373886893814, "grad_norm": 0.6141744621556603, "learning_rate": 5.123763179237632e-06, "loss": 0.5896, "step": 26354 }, { "epoch": 0.7694665849172287, "grad_norm": 0.7206432899375108, "learning_rate": 5.123114355231143e-06, "loss": 0.7019, "step": 26355 }, { "epoch": 0.7694957811450761, "grad_norm": 0.6523492466735683, "learning_rate": 5.122465531224656e-06, "loss": 0.6157, "step": 26356 }, { "epoch": 0.7695249773729235, "grad_norm": 0.6459420415903877, "learning_rate": 5.121816707218168e-06, "loss": 0.6078, "step": 26357 }, { "epoch": 0.7695541736007708, "grad_norm": 0.7013141108359511, "learning_rate": 5.121167883211679e-06, "loss": 0.7033, "step": 26358 }, { "epoch": 0.7695833698286182, "grad_norm": 0.6921909419629709, "learning_rate": 5.120519059205191e-06, "loss": 0.6814, "step": 26359 }, { "epoch": 0.7696125660564656, "grad_norm": 0.6863345181520485, "learning_rate": 5.1198702351987025e-06, "loss": 0.691, "step": 26360 }, { "epoch": 0.7696417622843129, "grad_norm": 0.6432884572712864, "learning_rate": 5.1192214111922145e-06, "loss": 0.6088, "step": 26361 }, { "epoch": 0.7696709585121603, "grad_norm": 0.6669262320643532, "learning_rate": 5.118572587185726e-06, "loss": 0.6191, "step": 26362 }, { "epoch": 0.7697001547400076, "grad_norm": 0.6524515180495681, "learning_rate": 5.117923763179238e-06, "loss": 0.6379, "step": 26363 }, { "epoch": 0.769729350967855, "grad_norm": 0.6186692107294696, "learning_rate": 5.11727493917275e-06, "loss": 0.528, "step": 26364 }, { "epoch": 0.7697585471957024, "grad_norm": 0.6686950337995996, "learning_rate": 5.116626115166261e-06, "loss": 0.641, "step": 26365 }, { "epoch": 0.7697877434235497, "grad_norm": 0.6898460916605914, "learning_rate": 5.115977291159774e-06, "loss": 0.677, "step": 26366 }, { "epoch": 0.7698169396513971, "grad_norm": 0.6873203147339849, "learning_rate": 5.115328467153286e-06, "loss": 0.6396, "step": 26367 }, { "epoch": 0.7698461358792444, "grad_norm": 0.6270234163930695, "learning_rate": 5.114679643146797e-06, "loss": 0.5942, "step": 26368 }, { "epoch": 0.7698753321070918, "grad_norm": 0.6051053606220809, "learning_rate": 5.114030819140309e-06, "loss": 0.5608, "step": 26369 }, { "epoch": 0.7699045283349392, "grad_norm": 0.6587604414726139, "learning_rate": 5.11338199513382e-06, "loss": 0.6005, "step": 26370 }, { "epoch": 0.7699337245627865, "grad_norm": 0.6686683588215684, "learning_rate": 5.112733171127332e-06, "loss": 0.5795, "step": 26371 }, { "epoch": 0.7699629207906339, "grad_norm": 0.6404299959369166, "learning_rate": 5.1120843471208435e-06, "loss": 0.5878, "step": 26372 }, { "epoch": 0.7699921170184812, "grad_norm": 0.6760282225929618, "learning_rate": 5.1114355231143555e-06, "loss": 0.6677, "step": 26373 }, { "epoch": 0.7700213132463286, "grad_norm": 0.5995304685983878, "learning_rate": 5.110786699107867e-06, "loss": 0.5286, "step": 26374 }, { "epoch": 0.770050509474176, "grad_norm": 0.5939027975169678, "learning_rate": 5.1101378751013795e-06, "loss": 0.533, "step": 26375 }, { "epoch": 0.7700797057020233, "grad_norm": 0.687226117187322, "learning_rate": 5.1094890510948916e-06, "loss": 0.7031, "step": 26376 }, { "epoch": 0.7701089019298707, "grad_norm": 0.6585594962296042, "learning_rate": 5.108840227088403e-06, "loss": 0.6689, "step": 26377 }, { "epoch": 0.770138098157718, "grad_norm": 0.6491333786627749, "learning_rate": 5.108191403081915e-06, "loss": 0.6307, "step": 26378 }, { "epoch": 0.7701672943855654, "grad_norm": 0.7447185869573495, "learning_rate": 5.107542579075426e-06, "loss": 0.6074, "step": 26379 }, { "epoch": 0.7701964906134128, "grad_norm": 0.6199781695220459, "learning_rate": 5.106893755068938e-06, "loss": 0.5214, "step": 26380 }, { "epoch": 0.7702256868412601, "grad_norm": 0.6707610742691688, "learning_rate": 5.106244931062449e-06, "loss": 0.6465, "step": 26381 }, { "epoch": 0.7702548830691075, "grad_norm": 0.6629193271776774, "learning_rate": 5.105596107055961e-06, "loss": 0.6486, "step": 26382 }, { "epoch": 0.7702840792969549, "grad_norm": 0.652132452700185, "learning_rate": 5.104947283049473e-06, "loss": 0.6165, "step": 26383 }, { "epoch": 0.7703132755248022, "grad_norm": 0.6325817259700662, "learning_rate": 5.104298459042984e-06, "loss": 0.5915, "step": 26384 }, { "epoch": 0.7703424717526496, "grad_norm": 0.6568012612683358, "learning_rate": 5.103649635036497e-06, "loss": 0.6241, "step": 26385 }, { "epoch": 0.7703716679804969, "grad_norm": 0.8418959784022761, "learning_rate": 5.103000811030009e-06, "loss": 0.6966, "step": 26386 }, { "epoch": 0.7704008642083443, "grad_norm": 0.6558509604977332, "learning_rate": 5.1023519870235205e-06, "loss": 0.6163, "step": 26387 }, { "epoch": 0.7704300604361917, "grad_norm": 0.5607184590228759, "learning_rate": 5.1017031630170325e-06, "loss": 0.4704, "step": 26388 }, { "epoch": 0.770459256664039, "grad_norm": 0.633786719011318, "learning_rate": 5.101054339010544e-06, "loss": 0.5515, "step": 26389 }, { "epoch": 0.7704884528918864, "grad_norm": 0.6233195808117883, "learning_rate": 5.100405515004056e-06, "loss": 0.5687, "step": 26390 }, { "epoch": 0.7705176491197337, "grad_norm": 0.6659352916235156, "learning_rate": 5.099756690997567e-06, "loss": 0.6082, "step": 26391 }, { "epoch": 0.7705468453475811, "grad_norm": 0.6552093978200947, "learning_rate": 5.099107866991079e-06, "loss": 0.6165, "step": 26392 }, { "epoch": 0.7705760415754285, "grad_norm": 0.6053333983587725, "learning_rate": 5.09845904298459e-06, "loss": 0.5362, "step": 26393 }, { "epoch": 0.7706052378032758, "grad_norm": 0.65456230887647, "learning_rate": 5.097810218978103e-06, "loss": 0.5973, "step": 26394 }, { "epoch": 0.7706344340311232, "grad_norm": 0.6829302099728319, "learning_rate": 5.097161394971615e-06, "loss": 0.6547, "step": 26395 }, { "epoch": 0.7706636302589706, "grad_norm": 0.6619391423970247, "learning_rate": 5.096512570965126e-06, "loss": 0.6429, "step": 26396 }, { "epoch": 0.7706928264868179, "grad_norm": 0.6007161216204099, "learning_rate": 5.095863746958638e-06, "loss": 0.512, "step": 26397 }, { "epoch": 0.7707220227146653, "grad_norm": 0.6295881034142258, "learning_rate": 5.095214922952149e-06, "loss": 0.5456, "step": 26398 }, { "epoch": 0.7707512189425126, "grad_norm": 0.6442307947576358, "learning_rate": 5.094566098945661e-06, "loss": 0.6308, "step": 26399 }, { "epoch": 0.77078041517036, "grad_norm": 0.7152858625086336, "learning_rate": 5.093917274939173e-06, "loss": 0.7314, "step": 26400 }, { "epoch": 0.7708096113982074, "grad_norm": 0.6323998195162432, "learning_rate": 5.093268450932685e-06, "loss": 0.601, "step": 26401 }, { "epoch": 0.7708388076260547, "grad_norm": 0.629619774059201, "learning_rate": 5.092619626926197e-06, "loss": 0.5991, "step": 26402 }, { "epoch": 0.7708680038539021, "grad_norm": 0.632510423473758, "learning_rate": 5.091970802919708e-06, "loss": 0.55, "step": 26403 }, { "epoch": 0.7708972000817494, "grad_norm": 0.6283944954036806, "learning_rate": 5.091321978913221e-06, "loss": 0.6223, "step": 26404 }, { "epoch": 0.7709263963095968, "grad_norm": 0.6611438617128692, "learning_rate": 5.090673154906733e-06, "loss": 0.6169, "step": 26405 }, { "epoch": 0.7709555925374442, "grad_norm": 0.6799112147180608, "learning_rate": 5.090024330900244e-06, "loss": 0.6775, "step": 26406 }, { "epoch": 0.7709847887652915, "grad_norm": 0.6706964542108105, "learning_rate": 5.089375506893756e-06, "loss": 0.6133, "step": 26407 }, { "epoch": 0.7710139849931389, "grad_norm": 0.6432316856591186, "learning_rate": 5.088726682887267e-06, "loss": 0.6161, "step": 26408 }, { "epoch": 0.7710431812209863, "grad_norm": 0.6798041489684338, "learning_rate": 5.088077858880779e-06, "loss": 0.6486, "step": 26409 }, { "epoch": 0.7710723774488336, "grad_norm": 0.6255177423352569, "learning_rate": 5.08742903487429e-06, "loss": 0.6021, "step": 26410 }, { "epoch": 0.771101573676681, "grad_norm": 0.6795470565031329, "learning_rate": 5.086780210867802e-06, "loss": 0.7007, "step": 26411 }, { "epoch": 0.7711307699045283, "grad_norm": 0.717853447321529, "learning_rate": 5.0861313868613135e-06, "loss": 0.7224, "step": 26412 }, { "epoch": 0.7711599661323757, "grad_norm": 0.6803856511057492, "learning_rate": 5.085482562854826e-06, "loss": 0.6063, "step": 26413 }, { "epoch": 0.7711891623602231, "grad_norm": 0.6542158177905926, "learning_rate": 5.0848337388483384e-06, "loss": 0.6244, "step": 26414 }, { "epoch": 0.7712183585880704, "grad_norm": 0.6800733201642172, "learning_rate": 5.08418491484185e-06, "loss": 0.6317, "step": 26415 }, { "epoch": 0.7712475548159178, "grad_norm": 0.6587693986714485, "learning_rate": 5.083536090835362e-06, "loss": 0.5981, "step": 26416 }, { "epoch": 0.7712767510437651, "grad_norm": 0.6170846294210599, "learning_rate": 5.082887266828873e-06, "loss": 0.5776, "step": 26417 }, { "epoch": 0.7713059472716125, "grad_norm": 0.6269267458127156, "learning_rate": 5.082238442822385e-06, "loss": 0.5795, "step": 26418 }, { "epoch": 0.7713351434994599, "grad_norm": 0.7418649633718614, "learning_rate": 5.081589618815896e-06, "loss": 0.627, "step": 26419 }, { "epoch": 0.7713643397273072, "grad_norm": 0.6428865826215946, "learning_rate": 5.080940794809408e-06, "loss": 0.6081, "step": 26420 }, { "epoch": 0.7713935359551546, "grad_norm": 0.6231730570990212, "learning_rate": 5.08029197080292e-06, "loss": 0.542, "step": 26421 }, { "epoch": 0.771422732183002, "grad_norm": 0.6808369167124513, "learning_rate": 5.079643146796431e-06, "loss": 0.7206, "step": 26422 }, { "epoch": 0.7714519284108493, "grad_norm": 0.6467141557504178, "learning_rate": 5.078994322789944e-06, "loss": 0.5886, "step": 26423 }, { "epoch": 0.7714811246386967, "grad_norm": 0.664469084229613, "learning_rate": 5.078345498783455e-06, "loss": 0.5995, "step": 26424 }, { "epoch": 0.771510320866544, "grad_norm": 0.6512305084519673, "learning_rate": 5.077696674776967e-06, "loss": 0.5805, "step": 26425 }, { "epoch": 0.7715395170943914, "grad_norm": 0.6389814105131801, "learning_rate": 5.077047850770479e-06, "loss": 0.5882, "step": 26426 }, { "epoch": 0.7715687133222388, "grad_norm": 0.6623022237782862, "learning_rate": 5.0763990267639905e-06, "loss": 0.6051, "step": 26427 }, { "epoch": 0.7715979095500861, "grad_norm": 0.6873297701566571, "learning_rate": 5.0757502027575026e-06, "loss": 0.6635, "step": 26428 }, { "epoch": 0.7716271057779335, "grad_norm": 0.6746233974374359, "learning_rate": 5.075101378751014e-06, "loss": 0.643, "step": 26429 }, { "epoch": 0.7716563020057808, "grad_norm": 0.6226132110673452, "learning_rate": 5.074452554744526e-06, "loss": 0.5827, "step": 26430 }, { "epoch": 0.7716854982336282, "grad_norm": 0.654318529704717, "learning_rate": 5.073803730738037e-06, "loss": 0.6134, "step": 26431 }, { "epoch": 0.7717146944614756, "grad_norm": 0.6449711718025971, "learning_rate": 5.07315490673155e-06, "loss": 0.6151, "step": 26432 }, { "epoch": 0.7717438906893229, "grad_norm": 0.6537462630736337, "learning_rate": 5.072506082725062e-06, "loss": 0.598, "step": 26433 }, { "epoch": 0.7717730869171703, "grad_norm": 0.6478307689482976, "learning_rate": 5.071857258718573e-06, "loss": 0.5861, "step": 26434 }, { "epoch": 0.7718022831450176, "grad_norm": 0.6210078362416038, "learning_rate": 5.071208434712085e-06, "loss": 0.5652, "step": 26435 }, { "epoch": 0.771831479372865, "grad_norm": 0.6298296133121443, "learning_rate": 5.070559610705596e-06, "loss": 0.5793, "step": 26436 }, { "epoch": 0.7718606756007124, "grad_norm": 0.6985902806470247, "learning_rate": 5.069910786699108e-06, "loss": 0.653, "step": 26437 }, { "epoch": 0.7718898718285597, "grad_norm": 0.6629727813815915, "learning_rate": 5.0692619626926195e-06, "loss": 0.6375, "step": 26438 }, { "epoch": 0.7719190680564071, "grad_norm": 0.6839716635922797, "learning_rate": 5.0686131386861315e-06, "loss": 0.6512, "step": 26439 }, { "epoch": 0.7719482642842544, "grad_norm": 0.6157358744598735, "learning_rate": 5.0679643146796435e-06, "loss": 0.5554, "step": 26440 }, { "epoch": 0.7719774605121018, "grad_norm": 0.6529152803070335, "learning_rate": 5.0673154906731555e-06, "loss": 0.5975, "step": 26441 }, { "epoch": 0.7720066567399492, "grad_norm": 0.6045101264238768, "learning_rate": 5.0666666666666676e-06, "loss": 0.5428, "step": 26442 }, { "epoch": 0.7720358529677965, "grad_norm": 0.6477704161937933, "learning_rate": 5.066017842660179e-06, "loss": 0.6002, "step": 26443 }, { "epoch": 0.7720650491956439, "grad_norm": 0.6066365227759252, "learning_rate": 5.065369018653691e-06, "loss": 0.5712, "step": 26444 }, { "epoch": 0.7720942454234913, "grad_norm": 0.6415170870433742, "learning_rate": 5.064720194647203e-06, "loss": 0.5996, "step": 26445 }, { "epoch": 0.7721234416513386, "grad_norm": 0.6917739499503882, "learning_rate": 5.064071370640714e-06, "loss": 0.6615, "step": 26446 }, { "epoch": 0.772152637879186, "grad_norm": 0.6363136660315918, "learning_rate": 5.063422546634226e-06, "loss": 0.5719, "step": 26447 }, { "epoch": 0.7721818341070333, "grad_norm": 0.6395022350931241, "learning_rate": 5.062773722627737e-06, "loss": 0.5783, "step": 26448 }, { "epoch": 0.7722110303348807, "grad_norm": 0.6303063893996484, "learning_rate": 5.062124898621249e-06, "loss": 0.6054, "step": 26449 }, { "epoch": 0.7722402265627281, "grad_norm": 0.6178613849529341, "learning_rate": 5.06147607461476e-06, "loss": 0.5807, "step": 26450 }, { "epoch": 0.7722694227905754, "grad_norm": 0.6499808935884037, "learning_rate": 5.060827250608273e-06, "loss": 0.6209, "step": 26451 }, { "epoch": 0.7722986190184228, "grad_norm": 0.6595368979336134, "learning_rate": 5.060178426601785e-06, "loss": 0.6334, "step": 26452 }, { "epoch": 0.7723278152462701, "grad_norm": 0.6287315172075966, "learning_rate": 5.0595296025952965e-06, "loss": 0.57, "step": 26453 }, { "epoch": 0.7723570114741175, "grad_norm": 0.6591166462995471, "learning_rate": 5.0588807785888085e-06, "loss": 0.5988, "step": 26454 }, { "epoch": 0.7723862077019649, "grad_norm": 0.6486064666377719, "learning_rate": 5.05823195458232e-06, "loss": 0.6154, "step": 26455 }, { "epoch": 0.7724154039298122, "grad_norm": 0.6522203234399546, "learning_rate": 5.057583130575832e-06, "loss": 0.6268, "step": 26456 }, { "epoch": 0.7724446001576596, "grad_norm": 0.6458629695146942, "learning_rate": 5.056934306569343e-06, "loss": 0.596, "step": 26457 }, { "epoch": 0.772473796385507, "grad_norm": 0.6988409464619214, "learning_rate": 5.056285482562855e-06, "loss": 0.6561, "step": 26458 }, { "epoch": 0.7725029926133543, "grad_norm": 0.6596764189236985, "learning_rate": 5.055636658556367e-06, "loss": 0.5982, "step": 26459 }, { "epoch": 0.7725321888412017, "grad_norm": 0.6658666562023662, "learning_rate": 5.054987834549879e-06, "loss": 0.6521, "step": 26460 }, { "epoch": 0.772561385069049, "grad_norm": 0.6880501012351092, "learning_rate": 5.054339010543391e-06, "loss": 0.6608, "step": 26461 }, { "epoch": 0.7725905812968964, "grad_norm": 0.60836372124269, "learning_rate": 5.053690186536902e-06, "loss": 0.5279, "step": 26462 }, { "epoch": 0.7726197775247438, "grad_norm": 0.6597625895429168, "learning_rate": 5.053041362530414e-06, "loss": 0.6006, "step": 26463 }, { "epoch": 0.7726489737525911, "grad_norm": 0.6628481577833021, "learning_rate": 5.052392538523926e-06, "loss": 0.5605, "step": 26464 }, { "epoch": 0.7726781699804385, "grad_norm": 0.639346616746745, "learning_rate": 5.051743714517437e-06, "loss": 0.5863, "step": 26465 }, { "epoch": 0.7727073662082858, "grad_norm": 0.6476399924681029, "learning_rate": 5.0510948905109494e-06, "loss": 0.6081, "step": 26466 }, { "epoch": 0.7727365624361332, "grad_norm": 0.5883364618713891, "learning_rate": 5.050446066504461e-06, "loss": 0.4978, "step": 26467 }, { "epoch": 0.7727657586639806, "grad_norm": 0.6386776297805119, "learning_rate": 5.049797242497973e-06, "loss": 0.5886, "step": 26468 }, { "epoch": 0.7727949548918279, "grad_norm": 0.6261351369509208, "learning_rate": 5.049148418491484e-06, "loss": 0.5658, "step": 26469 }, { "epoch": 0.7728241511196753, "grad_norm": 0.5897061029825308, "learning_rate": 5.048499594484997e-06, "loss": 0.5229, "step": 26470 }, { "epoch": 0.7728533473475226, "grad_norm": 0.6294227226810877, "learning_rate": 5.047850770478509e-06, "loss": 0.5763, "step": 26471 }, { "epoch": 0.77288254357537, "grad_norm": 0.6629273410493731, "learning_rate": 5.04720194647202e-06, "loss": 0.643, "step": 26472 }, { "epoch": 0.7729117398032175, "grad_norm": 0.6523530010417261, "learning_rate": 5.046553122465532e-06, "loss": 0.6618, "step": 26473 }, { "epoch": 0.7729409360310648, "grad_norm": 0.6506390836366064, "learning_rate": 5.045904298459043e-06, "loss": 0.608, "step": 26474 }, { "epoch": 0.7729701322589122, "grad_norm": 0.6436317619059116, "learning_rate": 5.045255474452555e-06, "loss": 0.6037, "step": 26475 }, { "epoch": 0.7729993284867596, "grad_norm": 0.6510433808657112, "learning_rate": 5.044606650446066e-06, "loss": 0.6407, "step": 26476 }, { "epoch": 0.7730285247146069, "grad_norm": 0.7157146196292032, "learning_rate": 5.043957826439578e-06, "loss": 0.6823, "step": 26477 }, { "epoch": 0.7730577209424543, "grad_norm": 0.6144905741411875, "learning_rate": 5.04330900243309e-06, "loss": 0.5551, "step": 26478 }, { "epoch": 0.7730869171703016, "grad_norm": 0.6519498349012125, "learning_rate": 5.042660178426602e-06, "loss": 0.6511, "step": 26479 }, { "epoch": 0.773116113398149, "grad_norm": 0.6363618924869582, "learning_rate": 5.0420113544201144e-06, "loss": 0.5663, "step": 26480 }, { "epoch": 0.7731453096259964, "grad_norm": 0.6497884386072368, "learning_rate": 5.041362530413626e-06, "loss": 0.6463, "step": 26481 }, { "epoch": 0.7731745058538437, "grad_norm": 0.6092560484918113, "learning_rate": 5.040713706407138e-06, "loss": 0.5256, "step": 26482 }, { "epoch": 0.7732037020816911, "grad_norm": 0.6478177685493821, "learning_rate": 5.04006488240065e-06, "loss": 0.6336, "step": 26483 }, { "epoch": 0.7732328983095385, "grad_norm": 0.6306298979282954, "learning_rate": 5.039416058394161e-06, "loss": 0.6073, "step": 26484 }, { "epoch": 0.7732620945373858, "grad_norm": 0.7157646855090051, "learning_rate": 5.038767234387673e-06, "loss": 0.6586, "step": 26485 }, { "epoch": 0.7732912907652332, "grad_norm": 0.6029592670626058, "learning_rate": 5.038118410381184e-06, "loss": 0.5673, "step": 26486 }, { "epoch": 0.7733204869930805, "grad_norm": 0.6355713846640887, "learning_rate": 5.037469586374696e-06, "loss": 0.604, "step": 26487 }, { "epoch": 0.7733496832209279, "grad_norm": 0.7448180214674981, "learning_rate": 5.036820762368207e-06, "loss": 0.6168, "step": 26488 }, { "epoch": 0.7733788794487753, "grad_norm": 0.7031455432075577, "learning_rate": 5.03617193836172e-06, "loss": 0.669, "step": 26489 }, { "epoch": 0.7734080756766226, "grad_norm": 0.6616083146843198, "learning_rate": 5.035523114355232e-06, "loss": 0.5974, "step": 26490 }, { "epoch": 0.77343727190447, "grad_norm": 0.6415396782431253, "learning_rate": 5.034874290348743e-06, "loss": 0.6194, "step": 26491 }, { "epoch": 0.7734664681323173, "grad_norm": 0.632771721827733, "learning_rate": 5.034225466342255e-06, "loss": 0.5884, "step": 26492 }, { "epoch": 0.7734956643601647, "grad_norm": 0.6280871245456384, "learning_rate": 5.0335766423357666e-06, "loss": 0.6203, "step": 26493 }, { "epoch": 0.7735248605880121, "grad_norm": 0.6127559038849774, "learning_rate": 5.032927818329279e-06, "loss": 0.5677, "step": 26494 }, { "epoch": 0.7735540568158594, "grad_norm": 0.6786595859367908, "learning_rate": 5.03227899432279e-06, "loss": 0.658, "step": 26495 }, { "epoch": 0.7735832530437068, "grad_norm": 0.6535109639573756, "learning_rate": 5.031630170316302e-06, "loss": 0.6113, "step": 26496 }, { "epoch": 0.7736124492715541, "grad_norm": 0.6649984967632805, "learning_rate": 5.030981346309813e-06, "loss": 0.6426, "step": 26497 }, { "epoch": 0.7736416454994015, "grad_norm": 0.6334535710176444, "learning_rate": 5.030332522303326e-06, "loss": 0.591, "step": 26498 }, { "epoch": 0.7736708417272489, "grad_norm": 0.6398806165892895, "learning_rate": 5.029683698296838e-06, "loss": 0.5853, "step": 26499 }, { "epoch": 0.7737000379550962, "grad_norm": 0.6436954069436498, "learning_rate": 5.029034874290349e-06, "loss": 0.6434, "step": 26500 }, { "epoch": 0.7737292341829436, "grad_norm": 0.60608603008881, "learning_rate": 5.028386050283861e-06, "loss": 0.5488, "step": 26501 }, { "epoch": 0.773758430410791, "grad_norm": 0.6294782198577765, "learning_rate": 5.027737226277373e-06, "loss": 0.565, "step": 26502 }, { "epoch": 0.7737876266386383, "grad_norm": 0.6181256448318797, "learning_rate": 5.027088402270884e-06, "loss": 0.5503, "step": 26503 }, { "epoch": 0.7738168228664857, "grad_norm": 0.6254614438543576, "learning_rate": 5.026439578264396e-06, "loss": 0.5891, "step": 26504 }, { "epoch": 0.773846019094333, "grad_norm": 0.6484307253340951, "learning_rate": 5.0257907542579075e-06, "loss": 0.5914, "step": 26505 }, { "epoch": 0.7738752153221804, "grad_norm": 0.6915693237986412, "learning_rate": 5.0251419302514195e-06, "loss": 0.6082, "step": 26506 }, { "epoch": 0.7739044115500278, "grad_norm": 0.67152048590614, "learning_rate": 5.024493106244932e-06, "loss": 0.6579, "step": 26507 }, { "epoch": 0.7739336077778751, "grad_norm": 0.6223070414421056, "learning_rate": 5.0238442822384436e-06, "loss": 0.6008, "step": 26508 }, { "epoch": 0.7739628040057225, "grad_norm": 0.6346744662127434, "learning_rate": 5.023195458231956e-06, "loss": 0.5388, "step": 26509 }, { "epoch": 0.7739920002335698, "grad_norm": 0.6126327503139699, "learning_rate": 5.022546634225467e-06, "loss": 0.5723, "step": 26510 }, { "epoch": 0.7740211964614172, "grad_norm": 0.6474237852390567, "learning_rate": 5.021897810218979e-06, "loss": 0.5862, "step": 26511 }, { "epoch": 0.7740503926892646, "grad_norm": 0.663476783566162, "learning_rate": 5.02124898621249e-06, "loss": 0.6726, "step": 26512 }, { "epoch": 0.7740795889171119, "grad_norm": 0.6395141495729443, "learning_rate": 5.020600162206002e-06, "loss": 0.5868, "step": 26513 }, { "epoch": 0.7741087851449593, "grad_norm": 0.6905466589113786, "learning_rate": 5.019951338199513e-06, "loss": 0.6351, "step": 26514 }, { "epoch": 0.7741379813728066, "grad_norm": 0.6277783859857496, "learning_rate": 5.019302514193025e-06, "loss": 0.5931, "step": 26515 }, { "epoch": 0.774167177600654, "grad_norm": 0.6696236346192455, "learning_rate": 5.018653690186536e-06, "loss": 0.6354, "step": 26516 }, { "epoch": 0.7741963738285014, "grad_norm": 0.6694762886558182, "learning_rate": 5.018004866180049e-06, "loss": 0.6115, "step": 26517 }, { "epoch": 0.7742255700563487, "grad_norm": 0.6457594104920855, "learning_rate": 5.017356042173561e-06, "loss": 0.5729, "step": 26518 }, { "epoch": 0.7742547662841961, "grad_norm": 0.6281209200555032, "learning_rate": 5.0167072181670725e-06, "loss": 0.5475, "step": 26519 }, { "epoch": 0.7742839625120435, "grad_norm": 0.6333688759257495, "learning_rate": 5.0160583941605845e-06, "loss": 0.5701, "step": 26520 }, { "epoch": 0.7743131587398908, "grad_norm": 0.5845389662568232, "learning_rate": 5.0154095701540965e-06, "loss": 0.5031, "step": 26521 }, { "epoch": 0.7743423549677382, "grad_norm": 0.6202906000268354, "learning_rate": 5.014760746147608e-06, "loss": 0.5364, "step": 26522 }, { "epoch": 0.7743715511955855, "grad_norm": 0.6467971349921243, "learning_rate": 5.01411192214112e-06, "loss": 0.5585, "step": 26523 }, { "epoch": 0.7744007474234329, "grad_norm": 0.6475402012426903, "learning_rate": 5.013463098134631e-06, "loss": 0.5904, "step": 26524 }, { "epoch": 0.7744299436512803, "grad_norm": 0.6321963213723786, "learning_rate": 5.012814274128143e-06, "loss": 0.5396, "step": 26525 }, { "epoch": 0.7744591398791276, "grad_norm": 0.6364946263070544, "learning_rate": 5.012165450121656e-06, "loss": 0.5878, "step": 26526 }, { "epoch": 0.774488336106975, "grad_norm": 0.6172974986326977, "learning_rate": 5.011516626115167e-06, "loss": 0.5502, "step": 26527 }, { "epoch": 0.7745175323348223, "grad_norm": 0.6190354117091279, "learning_rate": 5.010867802108679e-06, "loss": 0.509, "step": 26528 }, { "epoch": 0.7745467285626697, "grad_norm": 0.6893385662943007, "learning_rate": 5.01021897810219e-06, "loss": 0.6288, "step": 26529 }, { "epoch": 0.7745759247905171, "grad_norm": 0.6514481445471678, "learning_rate": 5.009570154095702e-06, "loss": 0.6272, "step": 26530 }, { "epoch": 0.7746051210183644, "grad_norm": 0.6817138495611834, "learning_rate": 5.008921330089213e-06, "loss": 0.5305, "step": 26531 }, { "epoch": 0.7746343172462118, "grad_norm": 0.6831176484626471, "learning_rate": 5.0082725060827255e-06, "loss": 0.6618, "step": 26532 }, { "epoch": 0.7746635134740592, "grad_norm": 0.6810498881582914, "learning_rate": 5.007623682076237e-06, "loss": 0.5303, "step": 26533 }, { "epoch": 0.7746927097019065, "grad_norm": 0.6300836327761253, "learning_rate": 5.006974858069749e-06, "loss": 0.5642, "step": 26534 }, { "epoch": 0.7747219059297539, "grad_norm": 0.677628817042318, "learning_rate": 5.00632603406326e-06, "loss": 0.5822, "step": 26535 }, { "epoch": 0.7747511021576012, "grad_norm": 0.6359472763070738, "learning_rate": 5.005677210056773e-06, "loss": 0.5456, "step": 26536 }, { "epoch": 0.7747802983854486, "grad_norm": 0.6099834059744322, "learning_rate": 5.005028386050285e-06, "loss": 0.5489, "step": 26537 }, { "epoch": 0.774809494613296, "grad_norm": 0.7096447092567149, "learning_rate": 5.004379562043796e-06, "loss": 0.7018, "step": 26538 }, { "epoch": 0.7748386908411433, "grad_norm": 0.6150981467957516, "learning_rate": 5.003730738037308e-06, "loss": 0.5496, "step": 26539 }, { "epoch": 0.7748678870689907, "grad_norm": 0.6660990315193499, "learning_rate": 5.00308191403082e-06, "loss": 0.6304, "step": 26540 }, { "epoch": 0.774897083296838, "grad_norm": 0.6965238237728396, "learning_rate": 5.002433090024331e-06, "loss": 0.6465, "step": 26541 }, { "epoch": 0.7749262795246854, "grad_norm": 0.6244269125118576, "learning_rate": 5.001784266017843e-06, "loss": 0.5478, "step": 26542 }, { "epoch": 0.7749554757525328, "grad_norm": 0.6238037963886317, "learning_rate": 5.001135442011354e-06, "loss": 0.5776, "step": 26543 }, { "epoch": 0.7749846719803801, "grad_norm": 0.743355383721205, "learning_rate": 5.000486618004866e-06, "loss": 0.6783, "step": 26544 }, { "epoch": 0.7750138682082275, "grad_norm": 0.6325752824903548, "learning_rate": 4.999837793998378e-06, "loss": 0.5975, "step": 26545 }, { "epoch": 0.7750430644360748, "grad_norm": 0.6083031269859309, "learning_rate": 4.99918896999189e-06, "loss": 0.5392, "step": 26546 }, { "epoch": 0.7750722606639222, "grad_norm": 0.620476428208443, "learning_rate": 4.998540145985402e-06, "loss": 0.5141, "step": 26547 }, { "epoch": 0.7751014568917696, "grad_norm": 0.6702022871763869, "learning_rate": 4.997891321978914e-06, "loss": 0.6495, "step": 26548 }, { "epoch": 0.7751306531196169, "grad_norm": 0.7009838369276116, "learning_rate": 4.997242497972426e-06, "loss": 0.7141, "step": 26549 }, { "epoch": 0.7751598493474643, "grad_norm": 0.6149342016671147, "learning_rate": 4.996593673965937e-06, "loss": 0.5648, "step": 26550 }, { "epoch": 0.7751890455753117, "grad_norm": 0.6653633367067229, "learning_rate": 4.995944849959449e-06, "loss": 0.6406, "step": 26551 }, { "epoch": 0.775218241803159, "grad_norm": 0.6360933353472804, "learning_rate": 4.995296025952961e-06, "loss": 0.5745, "step": 26552 }, { "epoch": 0.7752474380310064, "grad_norm": 0.6056663710943323, "learning_rate": 4.994647201946473e-06, "loss": 0.4993, "step": 26553 }, { "epoch": 0.7752766342588537, "grad_norm": 0.6443407574595517, "learning_rate": 4.993998377939984e-06, "loss": 0.6287, "step": 26554 }, { "epoch": 0.7753058304867011, "grad_norm": 0.6761269386270048, "learning_rate": 4.993349553933496e-06, "loss": 0.6201, "step": 26555 }, { "epoch": 0.7753350267145485, "grad_norm": 0.6514647336486947, "learning_rate": 4.992700729927007e-06, "loss": 0.6319, "step": 26556 }, { "epoch": 0.7753642229423958, "grad_norm": 0.6170213291878276, "learning_rate": 4.992051905920519e-06, "loss": 0.5584, "step": 26557 }, { "epoch": 0.7753934191702432, "grad_norm": 0.6487159717890978, "learning_rate": 4.991403081914031e-06, "loss": 0.5799, "step": 26558 }, { "epoch": 0.7754226153980905, "grad_norm": 0.6621802390911303, "learning_rate": 4.9907542579075426e-06, "loss": 0.6057, "step": 26559 }, { "epoch": 0.7754518116259379, "grad_norm": 0.7053410715228662, "learning_rate": 4.990105433901055e-06, "loss": 0.6918, "step": 26560 }, { "epoch": 0.7754810078537853, "grad_norm": 0.6638828966612215, "learning_rate": 4.989456609894567e-06, "loss": 0.6349, "step": 26561 }, { "epoch": 0.7755102040816326, "grad_norm": 0.6143138789482455, "learning_rate": 4.988807785888079e-06, "loss": 0.5599, "step": 26562 }, { "epoch": 0.77553940030948, "grad_norm": 0.6331933703188107, "learning_rate": 4.98815896188159e-06, "loss": 0.5972, "step": 26563 }, { "epoch": 0.7755685965373273, "grad_norm": 0.62864243670011, "learning_rate": 4.987510137875102e-06, "loss": 0.5544, "step": 26564 }, { "epoch": 0.7755977927651747, "grad_norm": 0.6318398000438759, "learning_rate": 4.986861313868613e-06, "loss": 0.5775, "step": 26565 }, { "epoch": 0.7756269889930221, "grad_norm": 0.6273229954962439, "learning_rate": 4.986212489862126e-06, "loss": 0.568, "step": 26566 }, { "epoch": 0.7756561852208694, "grad_norm": 0.6271074178692257, "learning_rate": 4.985563665855637e-06, "loss": 0.5585, "step": 26567 }, { "epoch": 0.7756853814487168, "grad_norm": 0.6690470325403288, "learning_rate": 4.984914841849149e-06, "loss": 0.6644, "step": 26568 }, { "epoch": 0.7757145776765642, "grad_norm": 0.6139648540203212, "learning_rate": 4.98426601784266e-06, "loss": 0.5451, "step": 26569 }, { "epoch": 0.7757437739044115, "grad_norm": 0.648480903962576, "learning_rate": 4.983617193836172e-06, "loss": 0.5852, "step": 26570 }, { "epoch": 0.7757729701322589, "grad_norm": 0.6919112184062131, "learning_rate": 4.982968369829684e-06, "loss": 0.6334, "step": 26571 }, { "epoch": 0.7758021663601062, "grad_norm": 0.6368754779233153, "learning_rate": 4.982319545823196e-06, "loss": 0.574, "step": 26572 }, { "epoch": 0.7758313625879536, "grad_norm": 0.5719530429928487, "learning_rate": 4.9816707218167076e-06, "loss": 0.4881, "step": 26573 }, { "epoch": 0.775860558815801, "grad_norm": 0.6122876858238868, "learning_rate": 4.98102189781022e-06, "loss": 0.519, "step": 26574 }, { "epoch": 0.7758897550436483, "grad_norm": 0.6636479586838712, "learning_rate": 4.980373073803731e-06, "loss": 0.5783, "step": 26575 }, { "epoch": 0.7759189512714957, "grad_norm": 0.6669638668943241, "learning_rate": 4.979724249797243e-06, "loss": 0.5862, "step": 26576 }, { "epoch": 0.775948147499343, "grad_norm": 0.6409992237727469, "learning_rate": 4.979075425790755e-06, "loss": 0.5623, "step": 26577 }, { "epoch": 0.7759773437271904, "grad_norm": 0.7032508542053172, "learning_rate": 4.978426601784266e-06, "loss": 0.6356, "step": 26578 }, { "epoch": 0.7760065399550378, "grad_norm": 0.6649481114292161, "learning_rate": 4.977777777777778e-06, "loss": 0.6236, "step": 26579 }, { "epoch": 0.7760357361828851, "grad_norm": 0.6705266019277509, "learning_rate": 4.97712895377129e-06, "loss": 0.6664, "step": 26580 }, { "epoch": 0.7760649324107325, "grad_norm": 0.645205790035095, "learning_rate": 4.976480129764802e-06, "loss": 0.5761, "step": 26581 }, { "epoch": 0.7760941286385798, "grad_norm": 0.6597651436080234, "learning_rate": 4.975831305758313e-06, "loss": 0.5999, "step": 26582 }, { "epoch": 0.7761233248664272, "grad_norm": 0.6392386411232907, "learning_rate": 4.975182481751825e-06, "loss": 0.5851, "step": 26583 }, { "epoch": 0.7761525210942746, "grad_norm": 0.6748308093327602, "learning_rate": 4.9745336577453365e-06, "loss": 0.6637, "step": 26584 }, { "epoch": 0.7761817173221219, "grad_norm": 0.8523048988360326, "learning_rate": 4.973884833738849e-06, "loss": 0.6584, "step": 26585 }, { "epoch": 0.7762109135499693, "grad_norm": 0.6090955624367747, "learning_rate": 4.9732360097323605e-06, "loss": 0.5005, "step": 26586 }, { "epoch": 0.7762401097778167, "grad_norm": 0.6429473235957416, "learning_rate": 4.9725871857258725e-06, "loss": 0.5813, "step": 26587 }, { "epoch": 0.776269306005664, "grad_norm": 0.6647216421332393, "learning_rate": 4.971938361719384e-06, "loss": 0.6445, "step": 26588 }, { "epoch": 0.7762985022335114, "grad_norm": 0.6547873705369588, "learning_rate": 4.971289537712896e-06, "loss": 0.6215, "step": 26589 }, { "epoch": 0.7763276984613587, "grad_norm": 0.6190906755244809, "learning_rate": 4.970640713706408e-06, "loss": 0.5643, "step": 26590 }, { "epoch": 0.7763568946892061, "grad_norm": 0.6387499307038711, "learning_rate": 4.96999188969992e-06, "loss": 0.6141, "step": 26591 }, { "epoch": 0.7763860909170535, "grad_norm": 0.6304398853940423, "learning_rate": 4.969343065693431e-06, "loss": 0.5363, "step": 26592 }, { "epoch": 0.7764152871449009, "grad_norm": 0.6161602558221517, "learning_rate": 4.968694241686943e-06, "loss": 0.5941, "step": 26593 }, { "epoch": 0.7764444833727483, "grad_norm": 0.6749046616072368, "learning_rate": 4.968045417680454e-06, "loss": 0.6631, "step": 26594 }, { "epoch": 0.7764736796005957, "grad_norm": 0.6699704434971946, "learning_rate": 4.967396593673966e-06, "loss": 0.641, "step": 26595 }, { "epoch": 0.776502875828443, "grad_norm": 0.647868421071306, "learning_rate": 4.966747769667478e-06, "loss": 0.5829, "step": 26596 }, { "epoch": 0.7765320720562904, "grad_norm": 0.6599138210991734, "learning_rate": 4.9660989456609894e-06, "loss": 0.6429, "step": 26597 }, { "epoch": 0.7765612682841377, "grad_norm": 0.6994978462142497, "learning_rate": 4.9654501216545015e-06, "loss": 0.6479, "step": 26598 }, { "epoch": 0.7765904645119851, "grad_norm": 0.7007941134203091, "learning_rate": 4.9648012976480135e-06, "loss": 0.6463, "step": 26599 }, { "epoch": 0.7766196607398325, "grad_norm": 0.6511038813714937, "learning_rate": 4.9641524736415255e-06, "loss": 0.5865, "step": 26600 }, { "epoch": 0.7766488569676798, "grad_norm": 0.6612117518014612, "learning_rate": 4.963503649635037e-06, "loss": 0.5984, "step": 26601 }, { "epoch": 0.7766780531955272, "grad_norm": 0.650903385238505, "learning_rate": 4.962854825628549e-06, "loss": 0.6185, "step": 26602 }, { "epoch": 0.7767072494233745, "grad_norm": 0.6825365068329937, "learning_rate": 4.96220600162206e-06, "loss": 0.6438, "step": 26603 }, { "epoch": 0.7767364456512219, "grad_norm": 0.6748888742534352, "learning_rate": 4.961557177615573e-06, "loss": 0.639, "step": 26604 }, { "epoch": 0.7767656418790693, "grad_norm": 0.6436085584358582, "learning_rate": 4.960908353609084e-06, "loss": 0.6096, "step": 26605 }, { "epoch": 0.7767948381069166, "grad_norm": 0.6632254015896958, "learning_rate": 4.960259529602596e-06, "loss": 0.6187, "step": 26606 }, { "epoch": 0.776824034334764, "grad_norm": 0.6576044374070323, "learning_rate": 4.959610705596107e-06, "loss": 0.5648, "step": 26607 }, { "epoch": 0.7768532305626114, "grad_norm": 0.637071445975757, "learning_rate": 4.958961881589619e-06, "loss": 0.5821, "step": 26608 }, { "epoch": 0.7768824267904587, "grad_norm": 0.6207027061760326, "learning_rate": 4.958313057583131e-06, "loss": 0.5586, "step": 26609 }, { "epoch": 0.7769116230183061, "grad_norm": 0.6963602538187357, "learning_rate": 4.957664233576643e-06, "loss": 0.6563, "step": 26610 }, { "epoch": 0.7769408192461534, "grad_norm": 0.5862864147294223, "learning_rate": 4.957015409570154e-06, "loss": 0.5326, "step": 26611 }, { "epoch": 0.7769700154740008, "grad_norm": 0.6654884186473919, "learning_rate": 4.9563665855636665e-06, "loss": 0.6517, "step": 26612 }, { "epoch": 0.7769992117018482, "grad_norm": 0.6386723147027373, "learning_rate": 4.955717761557178e-06, "loss": 0.588, "step": 26613 }, { "epoch": 0.7770284079296955, "grad_norm": 0.6940271868095963, "learning_rate": 4.95506893755069e-06, "loss": 0.639, "step": 26614 }, { "epoch": 0.7770576041575429, "grad_norm": 0.6505898226334583, "learning_rate": 4.954420113544202e-06, "loss": 0.5948, "step": 26615 }, { "epoch": 0.7770868003853902, "grad_norm": 0.6614732835208784, "learning_rate": 4.953771289537713e-06, "loss": 0.616, "step": 26616 }, { "epoch": 0.7771159966132376, "grad_norm": 0.6129663538641122, "learning_rate": 4.953122465531225e-06, "loss": 0.563, "step": 26617 }, { "epoch": 0.777145192841085, "grad_norm": 0.6534529787688372, "learning_rate": 4.952473641524737e-06, "loss": 0.598, "step": 26618 }, { "epoch": 0.7771743890689323, "grad_norm": 0.7120788178509736, "learning_rate": 4.951824817518249e-06, "loss": 0.6608, "step": 26619 }, { "epoch": 0.7772035852967797, "grad_norm": 0.6055387761827766, "learning_rate": 4.95117599351176e-06, "loss": 0.5005, "step": 26620 }, { "epoch": 0.777232781524627, "grad_norm": 0.6117737993010323, "learning_rate": 4.950527169505272e-06, "loss": 0.5761, "step": 26621 }, { "epoch": 0.7772619777524744, "grad_norm": 0.7025413968676272, "learning_rate": 4.949878345498783e-06, "loss": 0.63, "step": 26622 }, { "epoch": 0.7772911739803218, "grad_norm": 0.6822598323563368, "learning_rate": 4.949229521492296e-06, "loss": 0.6829, "step": 26623 }, { "epoch": 0.7773203702081691, "grad_norm": 0.6775843309949341, "learning_rate": 4.948580697485807e-06, "loss": 0.6556, "step": 26624 }, { "epoch": 0.7773495664360165, "grad_norm": 0.6693123461936844, "learning_rate": 4.947931873479319e-06, "loss": 0.6933, "step": 26625 }, { "epoch": 0.7773787626638639, "grad_norm": 0.6223286013956585, "learning_rate": 4.947283049472831e-06, "loss": 0.5671, "step": 26626 }, { "epoch": 0.7774079588917112, "grad_norm": 0.6421018261995086, "learning_rate": 4.946634225466343e-06, "loss": 0.5561, "step": 26627 }, { "epoch": 0.7774371551195586, "grad_norm": 0.6294068627393278, "learning_rate": 4.945985401459855e-06, "loss": 0.5593, "step": 26628 }, { "epoch": 0.7774663513474059, "grad_norm": 0.6224243210651309, "learning_rate": 4.945336577453367e-06, "loss": 0.5489, "step": 26629 }, { "epoch": 0.7774955475752533, "grad_norm": 0.6166013975205984, "learning_rate": 4.944687753446878e-06, "loss": 0.554, "step": 26630 }, { "epoch": 0.7775247438031007, "grad_norm": 0.6626164181436363, "learning_rate": 4.94403892944039e-06, "loss": 0.6456, "step": 26631 }, { "epoch": 0.777553940030948, "grad_norm": 0.6696804677043713, "learning_rate": 4.943390105433901e-06, "loss": 0.6807, "step": 26632 }, { "epoch": 0.7775831362587954, "grad_norm": 0.6371563671229634, "learning_rate": 4.942741281427413e-06, "loss": 0.5961, "step": 26633 }, { "epoch": 0.7776123324866427, "grad_norm": 0.6999761956400097, "learning_rate": 4.942092457420925e-06, "loss": 0.6458, "step": 26634 }, { "epoch": 0.7776415287144901, "grad_norm": 0.6455185492375742, "learning_rate": 4.941443633414436e-06, "loss": 0.6047, "step": 26635 }, { "epoch": 0.7776707249423375, "grad_norm": 0.6315200055522026, "learning_rate": 4.940794809407948e-06, "loss": 0.5982, "step": 26636 }, { "epoch": 0.7776999211701848, "grad_norm": 0.641570542629272, "learning_rate": 4.94014598540146e-06, "loss": 0.5981, "step": 26637 }, { "epoch": 0.7777291173980322, "grad_norm": 0.6114881344911436, "learning_rate": 4.939497161394972e-06, "loss": 0.5519, "step": 26638 }, { "epoch": 0.7777583136258795, "grad_norm": 0.6748239801205871, "learning_rate": 4.9388483373884836e-06, "loss": 0.6006, "step": 26639 }, { "epoch": 0.7777875098537269, "grad_norm": 0.6289024622842577, "learning_rate": 4.938199513381996e-06, "loss": 0.5847, "step": 26640 }, { "epoch": 0.7778167060815743, "grad_norm": 0.6451842099547471, "learning_rate": 4.937550689375507e-06, "loss": 0.5981, "step": 26641 }, { "epoch": 0.7778459023094216, "grad_norm": 0.6535008290789833, "learning_rate": 4.93690186536902e-06, "loss": 0.6429, "step": 26642 }, { "epoch": 0.777875098537269, "grad_norm": 0.6365253920993909, "learning_rate": 4.936253041362531e-06, "loss": 0.5932, "step": 26643 }, { "epoch": 0.7779042947651164, "grad_norm": 0.6390801046857193, "learning_rate": 4.935604217356043e-06, "loss": 0.5935, "step": 26644 }, { "epoch": 0.7779334909929637, "grad_norm": 0.6440480841734955, "learning_rate": 4.934955393349554e-06, "loss": 0.5846, "step": 26645 }, { "epoch": 0.7779626872208111, "grad_norm": 0.6752267292536813, "learning_rate": 4.934306569343066e-06, "loss": 0.6643, "step": 26646 }, { "epoch": 0.7779918834486584, "grad_norm": 0.6634956761006022, "learning_rate": 4.933657745336578e-06, "loss": 0.638, "step": 26647 }, { "epoch": 0.7780210796765058, "grad_norm": 0.6357279896873836, "learning_rate": 4.933008921330089e-06, "loss": 0.5729, "step": 26648 }, { "epoch": 0.7780502759043532, "grad_norm": 0.677311543348354, "learning_rate": 4.932360097323601e-06, "loss": 0.6634, "step": 26649 }, { "epoch": 0.7780794721322005, "grad_norm": 0.6275420597526493, "learning_rate": 4.931711273317113e-06, "loss": 0.5519, "step": 26650 }, { "epoch": 0.7781086683600479, "grad_norm": 0.6807920028344031, "learning_rate": 4.931062449310625e-06, "loss": 0.6749, "step": 26651 }, { "epoch": 0.7781378645878952, "grad_norm": 0.6490183909175393, "learning_rate": 4.9304136253041365e-06, "loss": 0.5949, "step": 26652 }, { "epoch": 0.7781670608157426, "grad_norm": 0.685593272865515, "learning_rate": 4.9297648012976486e-06, "loss": 0.6411, "step": 26653 }, { "epoch": 0.77819625704359, "grad_norm": 0.6232063084551244, "learning_rate": 4.92911597729116e-06, "loss": 0.5635, "step": 26654 }, { "epoch": 0.7782254532714373, "grad_norm": 0.6669984457386744, "learning_rate": 4.928467153284672e-06, "loss": 0.6377, "step": 26655 }, { "epoch": 0.7782546494992847, "grad_norm": 0.649523015774539, "learning_rate": 4.927818329278184e-06, "loss": 0.6001, "step": 26656 }, { "epoch": 0.778283845727132, "grad_norm": 0.686448551467965, "learning_rate": 4.927169505271696e-06, "loss": 0.6202, "step": 26657 }, { "epoch": 0.7783130419549794, "grad_norm": 0.6439498722012021, "learning_rate": 4.926520681265207e-06, "loss": 0.5801, "step": 26658 }, { "epoch": 0.7783422381828268, "grad_norm": 0.6700567743947501, "learning_rate": 4.925871857258719e-06, "loss": 0.6404, "step": 26659 }, { "epoch": 0.7783714344106741, "grad_norm": 0.6043706917425434, "learning_rate": 4.92522303325223e-06, "loss": 0.5547, "step": 26660 }, { "epoch": 0.7784006306385215, "grad_norm": 0.6216467780304737, "learning_rate": 4.924574209245743e-06, "loss": 0.5627, "step": 26661 }, { "epoch": 0.7784298268663689, "grad_norm": 0.6221986370864738, "learning_rate": 4.923925385239254e-06, "loss": 0.5507, "step": 26662 }, { "epoch": 0.7784590230942162, "grad_norm": 0.6932445756722451, "learning_rate": 4.923276561232766e-06, "loss": 0.6674, "step": 26663 }, { "epoch": 0.7784882193220636, "grad_norm": 0.6175437330600405, "learning_rate": 4.9226277372262775e-06, "loss": 0.568, "step": 26664 }, { "epoch": 0.7785174155499109, "grad_norm": 0.6414456718479387, "learning_rate": 4.9219789132197895e-06, "loss": 0.5974, "step": 26665 }, { "epoch": 0.7785466117777583, "grad_norm": 0.6382709060142181, "learning_rate": 4.9213300892133015e-06, "loss": 0.5737, "step": 26666 }, { "epoch": 0.7785758080056057, "grad_norm": 0.648793828609917, "learning_rate": 4.920681265206813e-06, "loss": 0.6344, "step": 26667 }, { "epoch": 0.778605004233453, "grad_norm": 0.6183862001320524, "learning_rate": 4.920032441200325e-06, "loss": 0.5351, "step": 26668 }, { "epoch": 0.7786342004613004, "grad_norm": 0.6433346278750927, "learning_rate": 4.919383617193837e-06, "loss": 0.6135, "step": 26669 }, { "epoch": 0.7786633966891477, "grad_norm": 0.6724026319984951, "learning_rate": 4.918734793187349e-06, "loss": 0.6694, "step": 26670 }, { "epoch": 0.7786925929169951, "grad_norm": 0.6747144013400759, "learning_rate": 4.91808596918086e-06, "loss": 0.6662, "step": 26671 }, { "epoch": 0.7787217891448425, "grad_norm": 0.6135893987341018, "learning_rate": 4.917437145174372e-06, "loss": 0.5457, "step": 26672 }, { "epoch": 0.7787509853726898, "grad_norm": 0.6063771789018285, "learning_rate": 4.916788321167883e-06, "loss": 0.5505, "step": 26673 }, { "epoch": 0.7787801816005372, "grad_norm": 0.6438595707605078, "learning_rate": 4.916139497161395e-06, "loss": 0.6205, "step": 26674 }, { "epoch": 0.7788093778283846, "grad_norm": 0.6448749644081729, "learning_rate": 4.915490673154907e-06, "loss": 0.5902, "step": 26675 }, { "epoch": 0.7788385740562319, "grad_norm": 0.7127903964118947, "learning_rate": 4.914841849148419e-06, "loss": 0.656, "step": 26676 }, { "epoch": 0.7788677702840793, "grad_norm": 0.6697531842577742, "learning_rate": 4.9141930251419304e-06, "loss": 0.6501, "step": 26677 }, { "epoch": 0.7788969665119266, "grad_norm": 0.6722536398318162, "learning_rate": 4.9135442011354425e-06, "loss": 0.6332, "step": 26678 }, { "epoch": 0.778926162739774, "grad_norm": 0.626340916524304, "learning_rate": 4.912895377128954e-06, "loss": 0.5414, "step": 26679 }, { "epoch": 0.7789553589676214, "grad_norm": 0.6084223374050954, "learning_rate": 4.9122465531224665e-06, "loss": 0.5721, "step": 26680 }, { "epoch": 0.7789845551954687, "grad_norm": 0.6324923691339621, "learning_rate": 4.911597729115978e-06, "loss": 0.5707, "step": 26681 }, { "epoch": 0.7790137514233161, "grad_norm": 0.6291958852059092, "learning_rate": 4.91094890510949e-06, "loss": 0.5578, "step": 26682 }, { "epoch": 0.7790429476511634, "grad_norm": 0.638895841552199, "learning_rate": 4.910300081103001e-06, "loss": 0.5756, "step": 26683 }, { "epoch": 0.7790721438790108, "grad_norm": 0.6510826310890071, "learning_rate": 4.909651257096513e-06, "loss": 0.6226, "step": 26684 }, { "epoch": 0.7791013401068582, "grad_norm": 0.7166025749310049, "learning_rate": 4.909002433090025e-06, "loss": 0.6856, "step": 26685 }, { "epoch": 0.7791305363347055, "grad_norm": 0.6760031557081682, "learning_rate": 4.908353609083536e-06, "loss": 0.6709, "step": 26686 }, { "epoch": 0.7791597325625529, "grad_norm": 0.6514407321323561, "learning_rate": 4.907704785077048e-06, "loss": 0.6082, "step": 26687 }, { "epoch": 0.7791889287904002, "grad_norm": 0.6589050087514579, "learning_rate": 4.90705596107056e-06, "loss": 0.6351, "step": 26688 }, { "epoch": 0.7792181250182476, "grad_norm": 0.5950982138429984, "learning_rate": 4.906407137064072e-06, "loss": 0.509, "step": 26689 }, { "epoch": 0.779247321246095, "grad_norm": 0.6814471232646302, "learning_rate": 4.905758313057583e-06, "loss": 0.7034, "step": 26690 }, { "epoch": 0.7792765174739423, "grad_norm": 0.5616793594560645, "learning_rate": 4.905109489051095e-06, "loss": 0.4748, "step": 26691 }, { "epoch": 0.7793057137017897, "grad_norm": 0.6332111370096104, "learning_rate": 4.904460665044607e-06, "loss": 0.598, "step": 26692 }, { "epoch": 0.779334909929637, "grad_norm": 0.6473108708551402, "learning_rate": 4.903811841038119e-06, "loss": 0.611, "step": 26693 }, { "epoch": 0.7793641061574844, "grad_norm": 0.6141034401140644, "learning_rate": 4.903163017031631e-06, "loss": 0.556, "step": 26694 }, { "epoch": 0.7793933023853318, "grad_norm": 0.6778401492861369, "learning_rate": 4.902514193025143e-06, "loss": 0.6752, "step": 26695 }, { "epoch": 0.7794224986131791, "grad_norm": 0.6333959229366547, "learning_rate": 4.901865369018654e-06, "loss": 0.5723, "step": 26696 }, { "epoch": 0.7794516948410265, "grad_norm": 0.7231703308601942, "learning_rate": 4.901216545012166e-06, "loss": 0.6995, "step": 26697 }, { "epoch": 0.7794808910688739, "grad_norm": 0.6337464656696058, "learning_rate": 4.900567721005677e-06, "loss": 0.6195, "step": 26698 }, { "epoch": 0.7795100872967212, "grad_norm": 0.6505558343809382, "learning_rate": 4.89991889699919e-06, "loss": 0.6372, "step": 26699 }, { "epoch": 0.7795392835245686, "grad_norm": 0.669761266312669, "learning_rate": 4.899270072992701e-06, "loss": 0.613, "step": 26700 }, { "epoch": 0.7795684797524159, "grad_norm": 0.6564569716495101, "learning_rate": 4.898621248986213e-06, "loss": 0.6352, "step": 26701 }, { "epoch": 0.7795976759802633, "grad_norm": 0.7641102077696723, "learning_rate": 4.897972424979724e-06, "loss": 0.6732, "step": 26702 }, { "epoch": 0.7796268722081107, "grad_norm": 0.6457918734798846, "learning_rate": 4.897323600973236e-06, "loss": 0.5956, "step": 26703 }, { "epoch": 0.779656068435958, "grad_norm": 0.6289697580801148, "learning_rate": 4.896674776966748e-06, "loss": 0.5843, "step": 26704 }, { "epoch": 0.7796852646638054, "grad_norm": 0.610602027336554, "learning_rate": 4.8960259529602596e-06, "loss": 0.554, "step": 26705 }, { "epoch": 0.7797144608916527, "grad_norm": 0.6522568968492303, "learning_rate": 4.895377128953772e-06, "loss": 0.5664, "step": 26706 }, { "epoch": 0.7797436571195001, "grad_norm": 0.5978865259644203, "learning_rate": 4.894728304947284e-06, "loss": 0.5417, "step": 26707 }, { "epoch": 0.7797728533473475, "grad_norm": 0.6257295752994838, "learning_rate": 4.894079480940796e-06, "loss": 0.5872, "step": 26708 }, { "epoch": 0.7798020495751948, "grad_norm": 0.5909905141377161, "learning_rate": 4.893430656934307e-06, "loss": 0.548, "step": 26709 }, { "epoch": 0.7798312458030422, "grad_norm": 0.6961359438755191, "learning_rate": 4.892781832927819e-06, "loss": 0.6796, "step": 26710 }, { "epoch": 0.7798604420308896, "grad_norm": 0.633053736678486, "learning_rate": 4.89213300892133e-06, "loss": 0.5636, "step": 26711 }, { "epoch": 0.7798896382587369, "grad_norm": 0.6044386268071912, "learning_rate": 4.891484184914842e-06, "loss": 0.5543, "step": 26712 }, { "epoch": 0.7799188344865843, "grad_norm": 0.6540029262769609, "learning_rate": 4.890835360908354e-06, "loss": 0.5969, "step": 26713 }, { "epoch": 0.7799480307144318, "grad_norm": 0.6533356214876157, "learning_rate": 4.890186536901866e-06, "loss": 0.6488, "step": 26714 }, { "epoch": 0.7799772269422791, "grad_norm": 0.6192895035251728, "learning_rate": 4.889537712895377e-06, "loss": 0.5656, "step": 26715 }, { "epoch": 0.7800064231701265, "grad_norm": 0.689882054935697, "learning_rate": 4.888888888888889e-06, "loss": 0.7203, "step": 26716 }, { "epoch": 0.7800356193979738, "grad_norm": 0.6720062060150693, "learning_rate": 4.888240064882401e-06, "loss": 0.6324, "step": 26717 }, { "epoch": 0.7800648156258212, "grad_norm": 0.7061470497301862, "learning_rate": 4.887591240875913e-06, "loss": 0.7102, "step": 26718 }, { "epoch": 0.7800940118536686, "grad_norm": 0.646464029098292, "learning_rate": 4.8869424168694246e-06, "loss": 0.6364, "step": 26719 }, { "epoch": 0.7801232080815159, "grad_norm": 0.6339803203275601, "learning_rate": 4.886293592862937e-06, "loss": 0.5935, "step": 26720 }, { "epoch": 0.7801524043093633, "grad_norm": 0.6630635203697799, "learning_rate": 4.885644768856448e-06, "loss": 0.5925, "step": 26721 }, { "epoch": 0.7801816005372106, "grad_norm": 0.6558064549137606, "learning_rate": 4.88499594484996e-06, "loss": 0.5159, "step": 26722 }, { "epoch": 0.780210796765058, "grad_norm": 0.564332492321439, "learning_rate": 4.884347120843472e-06, "loss": 0.4785, "step": 26723 }, { "epoch": 0.7802399929929054, "grad_norm": 0.6378261578972866, "learning_rate": 4.883698296836983e-06, "loss": 0.5725, "step": 26724 }, { "epoch": 0.7802691892207527, "grad_norm": 0.6252992726647095, "learning_rate": 4.883049472830495e-06, "loss": 0.5912, "step": 26725 }, { "epoch": 0.7802983854486001, "grad_norm": 0.590978827988281, "learning_rate": 4.882400648824007e-06, "loss": 0.5508, "step": 26726 }, { "epoch": 0.7803275816764474, "grad_norm": 0.6618984551999278, "learning_rate": 4.881751824817519e-06, "loss": 0.641, "step": 26727 }, { "epoch": 0.7803567779042948, "grad_norm": 0.6408993681785518, "learning_rate": 4.88110300081103e-06, "loss": 0.5982, "step": 26728 }, { "epoch": 0.7803859741321422, "grad_norm": 0.6160662021681759, "learning_rate": 4.880454176804542e-06, "loss": 0.55, "step": 26729 }, { "epoch": 0.7804151703599895, "grad_norm": 0.6078140368876636, "learning_rate": 4.8798053527980535e-06, "loss": 0.5253, "step": 26730 }, { "epoch": 0.7804443665878369, "grad_norm": 0.6148626089184678, "learning_rate": 4.8791565287915655e-06, "loss": 0.5614, "step": 26731 }, { "epoch": 0.7804735628156843, "grad_norm": 0.6632573647260436, "learning_rate": 4.8785077047850775e-06, "loss": 0.6298, "step": 26732 }, { "epoch": 0.7805027590435316, "grad_norm": 0.6479240963905156, "learning_rate": 4.8778588807785896e-06, "loss": 0.6069, "step": 26733 }, { "epoch": 0.780531955271379, "grad_norm": 0.6259168586377266, "learning_rate": 4.877210056772101e-06, "loss": 0.5914, "step": 26734 }, { "epoch": 0.7805611514992263, "grad_norm": 0.6461222603626015, "learning_rate": 4.876561232765613e-06, "loss": 0.5768, "step": 26735 }, { "epoch": 0.7805903477270737, "grad_norm": 0.6612392311316162, "learning_rate": 4.875912408759125e-06, "loss": 0.6495, "step": 26736 }, { "epoch": 0.7806195439549211, "grad_norm": 0.6588000781171105, "learning_rate": 4.875263584752636e-06, "loss": 0.6109, "step": 26737 }, { "epoch": 0.7806487401827684, "grad_norm": 0.6609462553730986, "learning_rate": 4.874614760746148e-06, "loss": 0.6433, "step": 26738 }, { "epoch": 0.7806779364106158, "grad_norm": 0.6717399040859298, "learning_rate": 4.87396593673966e-06, "loss": 0.5637, "step": 26739 }, { "epoch": 0.7807071326384631, "grad_norm": 0.6565250361867014, "learning_rate": 4.873317112733171e-06, "loss": 0.6282, "step": 26740 }, { "epoch": 0.7807363288663105, "grad_norm": 0.6697441840889611, "learning_rate": 4.872668288726683e-06, "loss": 0.5871, "step": 26741 }, { "epoch": 0.7807655250941579, "grad_norm": 0.6393254334573722, "learning_rate": 4.872019464720195e-06, "loss": 0.5927, "step": 26742 }, { "epoch": 0.7807947213220052, "grad_norm": 0.6044418688411555, "learning_rate": 4.8713706407137064e-06, "loss": 0.545, "step": 26743 }, { "epoch": 0.7808239175498526, "grad_norm": 0.7144803124846679, "learning_rate": 4.8707218167072185e-06, "loss": 0.6695, "step": 26744 }, { "epoch": 0.7808531137777, "grad_norm": 0.6511128463095409, "learning_rate": 4.8700729927007305e-06, "loss": 0.6165, "step": 26745 }, { "epoch": 0.7808823100055473, "grad_norm": 0.6473389441980799, "learning_rate": 4.8694241686942425e-06, "loss": 0.5955, "step": 26746 }, { "epoch": 0.7809115062333947, "grad_norm": 0.6069928478301899, "learning_rate": 4.868775344687754e-06, "loss": 0.5345, "step": 26747 }, { "epoch": 0.780940702461242, "grad_norm": 0.6755409540077412, "learning_rate": 4.868126520681266e-06, "loss": 0.6378, "step": 26748 }, { "epoch": 0.7809698986890894, "grad_norm": 0.6537962217362656, "learning_rate": 4.867477696674777e-06, "loss": 0.596, "step": 26749 }, { "epoch": 0.7809990949169368, "grad_norm": 0.5893959350954502, "learning_rate": 4.866828872668289e-06, "loss": 0.5123, "step": 26750 }, { "epoch": 0.7810282911447841, "grad_norm": 0.7350634926592693, "learning_rate": 4.866180048661801e-06, "loss": 0.7632, "step": 26751 }, { "epoch": 0.7810574873726315, "grad_norm": 0.6395880071891346, "learning_rate": 4.865531224655313e-06, "loss": 0.5648, "step": 26752 }, { "epoch": 0.7810866836004788, "grad_norm": 0.6098352857363936, "learning_rate": 4.864882400648824e-06, "loss": 0.5453, "step": 26753 }, { "epoch": 0.7811158798283262, "grad_norm": 0.6130263446174258, "learning_rate": 4.864233576642336e-06, "loss": 0.5329, "step": 26754 }, { "epoch": 0.7811450760561736, "grad_norm": 0.6554852737685005, "learning_rate": 4.863584752635848e-06, "loss": 0.6345, "step": 26755 }, { "epoch": 0.7811742722840209, "grad_norm": 0.6546832796721985, "learning_rate": 4.862935928629359e-06, "loss": 0.6192, "step": 26756 }, { "epoch": 0.7812034685118683, "grad_norm": 0.6695358297213487, "learning_rate": 4.8622871046228714e-06, "loss": 0.6249, "step": 26757 }, { "epoch": 0.7812326647397156, "grad_norm": 0.6355619180786451, "learning_rate": 4.8616382806163835e-06, "loss": 0.5994, "step": 26758 }, { "epoch": 0.781261860967563, "grad_norm": 0.5672720422429767, "learning_rate": 4.860989456609895e-06, "loss": 0.4187, "step": 26759 }, { "epoch": 0.7812910571954104, "grad_norm": 0.621411958546379, "learning_rate": 4.860340632603407e-06, "loss": 0.5742, "step": 26760 }, { "epoch": 0.7813202534232577, "grad_norm": 0.590054079378674, "learning_rate": 4.859691808596919e-06, "loss": 0.5151, "step": 26761 }, { "epoch": 0.7813494496511051, "grad_norm": 0.6701793638252596, "learning_rate": 4.85904298459043e-06, "loss": 0.6509, "step": 26762 }, { "epoch": 0.7813786458789524, "grad_norm": 0.6262526283732693, "learning_rate": 4.858394160583942e-06, "loss": 0.5266, "step": 26763 }, { "epoch": 0.7814078421067998, "grad_norm": 0.671303435987263, "learning_rate": 4.857745336577454e-06, "loss": 0.622, "step": 26764 }, { "epoch": 0.7814370383346472, "grad_norm": 0.6151226827748535, "learning_rate": 4.857096512570966e-06, "loss": 0.5581, "step": 26765 }, { "epoch": 0.7814662345624945, "grad_norm": 0.5824071671186422, "learning_rate": 4.856447688564477e-06, "loss": 0.5062, "step": 26766 }, { "epoch": 0.7814954307903419, "grad_norm": 0.6524950788144371, "learning_rate": 4.855798864557989e-06, "loss": 0.6143, "step": 26767 }, { "epoch": 0.7815246270181893, "grad_norm": 0.6015910950905506, "learning_rate": 4.8551500405515e-06, "loss": 0.5272, "step": 26768 }, { "epoch": 0.7815538232460366, "grad_norm": 0.6000660666066588, "learning_rate": 4.854501216545013e-06, "loss": 0.5203, "step": 26769 }, { "epoch": 0.781583019473884, "grad_norm": 0.6397874777553667, "learning_rate": 4.853852392538524e-06, "loss": 0.564, "step": 26770 }, { "epoch": 0.7816122157017313, "grad_norm": 0.6003428912485086, "learning_rate": 4.853203568532036e-06, "loss": 0.5635, "step": 26771 }, { "epoch": 0.7816414119295787, "grad_norm": 0.5975877584737381, "learning_rate": 4.852554744525548e-06, "loss": 0.5348, "step": 26772 }, { "epoch": 0.7816706081574261, "grad_norm": 0.6576878211865647, "learning_rate": 4.85190592051906e-06, "loss": 0.641, "step": 26773 }, { "epoch": 0.7816998043852734, "grad_norm": 0.651144458696738, "learning_rate": 4.851257096512572e-06, "loss": 0.6036, "step": 26774 }, { "epoch": 0.7817290006131208, "grad_norm": 0.6400534953999611, "learning_rate": 4.850608272506083e-06, "loss": 0.6111, "step": 26775 }, { "epoch": 0.7817581968409681, "grad_norm": 0.6078633614087144, "learning_rate": 4.849959448499595e-06, "loss": 0.5465, "step": 26776 }, { "epoch": 0.7817873930688155, "grad_norm": 0.6534958867487755, "learning_rate": 4.849310624493107e-06, "loss": 0.5674, "step": 26777 }, { "epoch": 0.7818165892966629, "grad_norm": 0.6404730453754254, "learning_rate": 4.848661800486618e-06, "loss": 0.5926, "step": 26778 }, { "epoch": 0.7818457855245102, "grad_norm": 0.6657972679193773, "learning_rate": 4.84801297648013e-06, "loss": 0.6408, "step": 26779 }, { "epoch": 0.7818749817523576, "grad_norm": 0.6670354300551162, "learning_rate": 4.847364152473642e-06, "loss": 0.6329, "step": 26780 }, { "epoch": 0.781904177980205, "grad_norm": 0.6149306116780793, "learning_rate": 4.846715328467153e-06, "loss": 0.5608, "step": 26781 }, { "epoch": 0.7819333742080523, "grad_norm": 0.6465618664195233, "learning_rate": 4.846066504460665e-06, "loss": 0.6295, "step": 26782 }, { "epoch": 0.7819625704358997, "grad_norm": 0.6235887876986788, "learning_rate": 4.8454176804541765e-06, "loss": 0.5226, "step": 26783 }, { "epoch": 0.781991766663747, "grad_norm": 0.6391686506075515, "learning_rate": 4.844768856447689e-06, "loss": 0.6156, "step": 26784 }, { "epoch": 0.7820209628915944, "grad_norm": 0.5902865197613872, "learning_rate": 4.8441200324412006e-06, "loss": 0.5393, "step": 26785 }, { "epoch": 0.7820501591194418, "grad_norm": 0.6208336894143215, "learning_rate": 4.843471208434713e-06, "loss": 0.5276, "step": 26786 }, { "epoch": 0.7820793553472891, "grad_norm": 0.6667469253418072, "learning_rate": 4.842822384428224e-06, "loss": 0.6568, "step": 26787 }, { "epoch": 0.7821085515751365, "grad_norm": 0.6623211451367282, "learning_rate": 4.842173560421737e-06, "loss": 0.6469, "step": 26788 }, { "epoch": 0.7821377478029838, "grad_norm": 0.654152668897026, "learning_rate": 4.841524736415248e-06, "loss": 0.6556, "step": 26789 }, { "epoch": 0.7821669440308312, "grad_norm": 0.6310713162220876, "learning_rate": 4.84087591240876e-06, "loss": 0.5729, "step": 26790 }, { "epoch": 0.7821961402586786, "grad_norm": 0.5968222710794089, "learning_rate": 4.840227088402271e-06, "loss": 0.5103, "step": 26791 }, { "epoch": 0.7822253364865259, "grad_norm": 0.6066271664104963, "learning_rate": 4.839578264395783e-06, "loss": 0.6016, "step": 26792 }, { "epoch": 0.7822545327143733, "grad_norm": 0.6405703568161837, "learning_rate": 4.838929440389295e-06, "loss": 0.6208, "step": 26793 }, { "epoch": 0.7822837289422206, "grad_norm": 0.6154130605705977, "learning_rate": 4.838280616382806e-06, "loss": 0.5611, "step": 26794 }, { "epoch": 0.782312925170068, "grad_norm": 0.7259487048996034, "learning_rate": 4.837631792376318e-06, "loss": 0.699, "step": 26795 }, { "epoch": 0.7823421213979154, "grad_norm": 0.6286243174444524, "learning_rate": 4.83698296836983e-06, "loss": 0.5952, "step": 26796 }, { "epoch": 0.7823713176257627, "grad_norm": 0.6499668912232817, "learning_rate": 4.8363341443633415e-06, "loss": 0.5726, "step": 26797 }, { "epoch": 0.7824005138536101, "grad_norm": 0.6404551182827254, "learning_rate": 4.8356853203568535e-06, "loss": 0.5853, "step": 26798 }, { "epoch": 0.7824297100814575, "grad_norm": 0.5880459833515798, "learning_rate": 4.8350364963503656e-06, "loss": 0.5149, "step": 26799 }, { "epoch": 0.7824589063093048, "grad_norm": 0.6468817287481369, "learning_rate": 4.834387672343877e-06, "loss": 0.5724, "step": 26800 }, { "epoch": 0.7824881025371522, "grad_norm": 0.682909492450998, "learning_rate": 4.833738848337389e-06, "loss": 0.6671, "step": 26801 }, { "epoch": 0.7825172987649995, "grad_norm": 0.619154606793917, "learning_rate": 4.833090024330901e-06, "loss": 0.5845, "step": 26802 }, { "epoch": 0.7825464949928469, "grad_norm": 0.6669575629701332, "learning_rate": 4.832441200324413e-06, "loss": 0.5707, "step": 26803 }, { "epoch": 0.7825756912206943, "grad_norm": 0.6522194591806715, "learning_rate": 4.831792376317924e-06, "loss": 0.5692, "step": 26804 }, { "epoch": 0.7826048874485416, "grad_norm": 0.6380400881061455, "learning_rate": 4.831143552311436e-06, "loss": 0.6115, "step": 26805 }, { "epoch": 0.782634083676389, "grad_norm": 0.6862517447980488, "learning_rate": 4.830494728304947e-06, "loss": 0.6407, "step": 26806 }, { "epoch": 0.7826632799042363, "grad_norm": 0.5923192998221426, "learning_rate": 4.82984590429846e-06, "loss": 0.5101, "step": 26807 }, { "epoch": 0.7826924761320837, "grad_norm": 0.6140571168319128, "learning_rate": 4.829197080291971e-06, "loss": 0.5215, "step": 26808 }, { "epoch": 0.7827216723599311, "grad_norm": 0.622000243888498, "learning_rate": 4.828548256285483e-06, "loss": 0.5739, "step": 26809 }, { "epoch": 0.7827508685877784, "grad_norm": 0.6662336107798024, "learning_rate": 4.8278994322789945e-06, "loss": 0.6308, "step": 26810 }, { "epoch": 0.7827800648156258, "grad_norm": 0.6621433375398924, "learning_rate": 4.8272506082725065e-06, "loss": 0.6229, "step": 26811 }, { "epoch": 0.7828092610434731, "grad_norm": 0.6703340851979772, "learning_rate": 4.8266017842660185e-06, "loss": 0.6462, "step": 26812 }, { "epoch": 0.7828384572713205, "grad_norm": 0.6780309709214086, "learning_rate": 4.82595296025953e-06, "loss": 0.6267, "step": 26813 }, { "epoch": 0.7828676534991679, "grad_norm": 0.6554025826621218, "learning_rate": 4.825304136253042e-06, "loss": 0.6309, "step": 26814 }, { "epoch": 0.7828968497270152, "grad_norm": 0.6515319069128263, "learning_rate": 4.824655312246554e-06, "loss": 0.6137, "step": 26815 }, { "epoch": 0.7829260459548626, "grad_norm": 0.7297642487828642, "learning_rate": 4.824006488240065e-06, "loss": 0.696, "step": 26816 }, { "epoch": 0.78295524218271, "grad_norm": 0.7079960244377442, "learning_rate": 4.823357664233577e-06, "loss": 0.6699, "step": 26817 }, { "epoch": 0.7829844384105573, "grad_norm": 0.6813417280257147, "learning_rate": 4.822708840227089e-06, "loss": 0.5989, "step": 26818 }, { "epoch": 0.7830136346384047, "grad_norm": 0.6437650688602063, "learning_rate": 4.8220600162206e-06, "loss": 0.5734, "step": 26819 }, { "epoch": 0.783042830866252, "grad_norm": 0.6698130233065342, "learning_rate": 4.821411192214112e-06, "loss": 0.6658, "step": 26820 }, { "epoch": 0.7830720270940994, "grad_norm": 0.6053220710546418, "learning_rate": 4.820762368207624e-06, "loss": 0.5537, "step": 26821 }, { "epoch": 0.7831012233219468, "grad_norm": 0.61204972773933, "learning_rate": 4.820113544201136e-06, "loss": 0.5562, "step": 26822 }, { "epoch": 0.7831304195497941, "grad_norm": 0.6380131977228588, "learning_rate": 4.8194647201946474e-06, "loss": 0.5717, "step": 26823 }, { "epoch": 0.7831596157776415, "grad_norm": 0.5886515160166095, "learning_rate": 4.8188158961881595e-06, "loss": 0.552, "step": 26824 }, { "epoch": 0.7831888120054888, "grad_norm": 0.6528622718585084, "learning_rate": 4.818167072181671e-06, "loss": 0.6394, "step": 26825 }, { "epoch": 0.7832180082333362, "grad_norm": 0.6406763690994619, "learning_rate": 4.8175182481751835e-06, "loss": 0.5849, "step": 26826 }, { "epoch": 0.7832472044611836, "grad_norm": 0.608120891169884, "learning_rate": 4.816869424168695e-06, "loss": 0.5452, "step": 26827 }, { "epoch": 0.7832764006890309, "grad_norm": 0.6334325041311885, "learning_rate": 4.816220600162207e-06, "loss": 0.5471, "step": 26828 }, { "epoch": 0.7833055969168783, "grad_norm": 0.7147290111819425, "learning_rate": 4.815571776155718e-06, "loss": 0.7175, "step": 26829 }, { "epoch": 0.7833347931447256, "grad_norm": 0.639347535706356, "learning_rate": 4.81492295214923e-06, "loss": 0.5742, "step": 26830 }, { "epoch": 0.783363989372573, "grad_norm": 0.7129339590725221, "learning_rate": 4.814274128142742e-06, "loss": 0.6247, "step": 26831 }, { "epoch": 0.7833931856004204, "grad_norm": 0.6341861588294061, "learning_rate": 4.813625304136253e-06, "loss": 0.5746, "step": 26832 }, { "epoch": 0.7834223818282677, "grad_norm": 0.6674808656086831, "learning_rate": 4.812976480129765e-06, "loss": 0.6861, "step": 26833 }, { "epoch": 0.7834515780561152, "grad_norm": 0.6265626983965555, "learning_rate": 4.812327656123277e-06, "loss": 0.5688, "step": 26834 }, { "epoch": 0.7834807742839626, "grad_norm": 0.6907230622579839, "learning_rate": 4.811678832116789e-06, "loss": 0.6445, "step": 26835 }, { "epoch": 0.7835099705118099, "grad_norm": 0.613648027300316, "learning_rate": 4.8110300081103e-06, "loss": 0.5541, "step": 26836 }, { "epoch": 0.7835391667396573, "grad_norm": 0.6660545674490076, "learning_rate": 4.8103811841038124e-06, "loss": 0.6422, "step": 26837 }, { "epoch": 0.7835683629675047, "grad_norm": 0.6549079521299442, "learning_rate": 4.809732360097324e-06, "loss": 0.5782, "step": 26838 }, { "epoch": 0.783597559195352, "grad_norm": 0.6481117169856773, "learning_rate": 4.809083536090836e-06, "loss": 0.5812, "step": 26839 }, { "epoch": 0.7836267554231994, "grad_norm": 0.6733795600100754, "learning_rate": 4.808434712084348e-06, "loss": 0.6528, "step": 26840 }, { "epoch": 0.7836559516510467, "grad_norm": 0.6886870892955753, "learning_rate": 4.80778588807786e-06, "loss": 0.6661, "step": 26841 }, { "epoch": 0.7836851478788941, "grad_norm": 0.6440778332805902, "learning_rate": 4.807137064071371e-06, "loss": 0.638, "step": 26842 }, { "epoch": 0.7837143441067415, "grad_norm": 0.6624986666365631, "learning_rate": 4.806488240064883e-06, "loss": 0.6319, "step": 26843 }, { "epoch": 0.7837435403345888, "grad_norm": 0.596501931212768, "learning_rate": 4.805839416058394e-06, "loss": 0.5393, "step": 26844 }, { "epoch": 0.7837727365624362, "grad_norm": 0.6615267611033836, "learning_rate": 4.805190592051906e-06, "loss": 0.6112, "step": 26845 }, { "epoch": 0.7838019327902835, "grad_norm": 0.6654520099868548, "learning_rate": 4.804541768045418e-06, "loss": 0.6374, "step": 26846 }, { "epoch": 0.7838311290181309, "grad_norm": 0.6835699786479829, "learning_rate": 4.80389294403893e-06, "loss": 0.613, "step": 26847 }, { "epoch": 0.7838603252459783, "grad_norm": 0.6388276751255108, "learning_rate": 4.803244120032441e-06, "loss": 0.6015, "step": 26848 }, { "epoch": 0.7838895214738256, "grad_norm": 0.6949690668295332, "learning_rate": 4.802595296025953e-06, "loss": 0.6742, "step": 26849 }, { "epoch": 0.783918717701673, "grad_norm": 0.6472191993826668, "learning_rate": 4.801946472019465e-06, "loss": 0.6284, "step": 26850 }, { "epoch": 0.7839479139295203, "grad_norm": 0.6087944270579028, "learning_rate": 4.8012976480129766e-06, "loss": 0.5494, "step": 26851 }, { "epoch": 0.7839771101573677, "grad_norm": 0.645502855628337, "learning_rate": 4.800648824006489e-06, "loss": 0.5637, "step": 26852 }, { "epoch": 0.7840063063852151, "grad_norm": 0.6558495348234936, "learning_rate": 4.800000000000001e-06, "loss": 0.5659, "step": 26853 }, { "epoch": 0.7840355026130624, "grad_norm": 0.6763813047462498, "learning_rate": 4.799351175993513e-06, "loss": 0.6242, "step": 26854 }, { "epoch": 0.7840646988409098, "grad_norm": 0.6440681811829965, "learning_rate": 4.798702351987024e-06, "loss": 0.5613, "step": 26855 }, { "epoch": 0.7840938950687572, "grad_norm": 0.6087950360543096, "learning_rate": 4.798053527980536e-06, "loss": 0.5625, "step": 26856 }, { "epoch": 0.7841230912966045, "grad_norm": 0.6286664971033872, "learning_rate": 4.797404703974047e-06, "loss": 0.5654, "step": 26857 }, { "epoch": 0.7841522875244519, "grad_norm": 0.6647106168595011, "learning_rate": 4.796755879967559e-06, "loss": 0.612, "step": 26858 }, { "epoch": 0.7841814837522992, "grad_norm": 0.6603485878627032, "learning_rate": 4.796107055961071e-06, "loss": 0.6477, "step": 26859 }, { "epoch": 0.7842106799801466, "grad_norm": 0.6559445483998467, "learning_rate": 4.795458231954583e-06, "loss": 0.6242, "step": 26860 }, { "epoch": 0.784239876207994, "grad_norm": 0.6609327959086735, "learning_rate": 4.794809407948094e-06, "loss": 0.6733, "step": 26861 }, { "epoch": 0.7842690724358413, "grad_norm": 0.6734580001635997, "learning_rate": 4.794160583941606e-06, "loss": 0.641, "step": 26862 }, { "epoch": 0.7842982686636887, "grad_norm": 0.5981684324984239, "learning_rate": 4.7935117599351175e-06, "loss": 0.5474, "step": 26863 }, { "epoch": 0.784327464891536, "grad_norm": 0.6740932189945361, "learning_rate": 4.7928629359286295e-06, "loss": 0.6698, "step": 26864 }, { "epoch": 0.7843566611193834, "grad_norm": 0.7342364496383265, "learning_rate": 4.7922141119221416e-06, "loss": 0.6978, "step": 26865 }, { "epoch": 0.7843858573472308, "grad_norm": 0.6289745352846472, "learning_rate": 4.791565287915654e-06, "loss": 0.5878, "step": 26866 }, { "epoch": 0.7844150535750781, "grad_norm": 0.6011613540168452, "learning_rate": 4.790916463909165e-06, "loss": 0.5161, "step": 26867 }, { "epoch": 0.7844442498029255, "grad_norm": 0.6610260295191568, "learning_rate": 4.790267639902677e-06, "loss": 0.5547, "step": 26868 }, { "epoch": 0.7844734460307728, "grad_norm": 0.6311332783544978, "learning_rate": 4.789618815896189e-06, "loss": 0.5568, "step": 26869 }, { "epoch": 0.7845026422586202, "grad_norm": 0.6624435880493902, "learning_rate": 4.7889699918897e-06, "loss": 0.5986, "step": 26870 }, { "epoch": 0.7845318384864676, "grad_norm": 0.6593687553715899, "learning_rate": 4.788321167883212e-06, "loss": 0.6119, "step": 26871 }, { "epoch": 0.7845610347143149, "grad_norm": 0.6057824004974589, "learning_rate": 4.787672343876723e-06, "loss": 0.5178, "step": 26872 }, { "epoch": 0.7845902309421623, "grad_norm": 0.627680061752366, "learning_rate": 4.787023519870236e-06, "loss": 0.52, "step": 26873 }, { "epoch": 0.7846194271700097, "grad_norm": 0.6217536484926997, "learning_rate": 4.786374695863747e-06, "loss": 0.5957, "step": 26874 }, { "epoch": 0.784648623397857, "grad_norm": 0.6044497519430398, "learning_rate": 4.785725871857259e-06, "loss": 0.5533, "step": 26875 }, { "epoch": 0.7846778196257044, "grad_norm": 0.6398522812590192, "learning_rate": 4.7850770478507705e-06, "loss": 0.6179, "step": 26876 }, { "epoch": 0.7847070158535517, "grad_norm": 0.6303910192285781, "learning_rate": 4.7844282238442825e-06, "loss": 0.5804, "step": 26877 }, { "epoch": 0.7847362120813991, "grad_norm": 0.6219581762876745, "learning_rate": 4.7837793998377945e-06, "loss": 0.5884, "step": 26878 }, { "epoch": 0.7847654083092465, "grad_norm": 0.6555855971199628, "learning_rate": 4.7831305758313066e-06, "loss": 0.632, "step": 26879 }, { "epoch": 0.7847946045370938, "grad_norm": 0.6321448081644582, "learning_rate": 4.782481751824818e-06, "loss": 0.5637, "step": 26880 }, { "epoch": 0.7848238007649412, "grad_norm": 0.6794120378505626, "learning_rate": 4.78183292781833e-06, "loss": 0.6558, "step": 26881 }, { "epoch": 0.7848529969927885, "grad_norm": 0.6046575499737151, "learning_rate": 4.781184103811841e-06, "loss": 0.5457, "step": 26882 }, { "epoch": 0.7848821932206359, "grad_norm": 0.6452596558439946, "learning_rate": 4.780535279805353e-06, "loss": 0.5489, "step": 26883 }, { "epoch": 0.7849113894484833, "grad_norm": 0.6401705361589708, "learning_rate": 4.779886455798865e-06, "loss": 0.601, "step": 26884 }, { "epoch": 0.7849405856763306, "grad_norm": 0.6885855524351747, "learning_rate": 4.779237631792377e-06, "loss": 0.6339, "step": 26885 }, { "epoch": 0.784969781904178, "grad_norm": 0.6340221372137994, "learning_rate": 4.778588807785888e-06, "loss": 0.6093, "step": 26886 }, { "epoch": 0.7849989781320253, "grad_norm": 0.640520433189288, "learning_rate": 4.7779399837794e-06, "loss": 0.5792, "step": 26887 }, { "epoch": 0.7850281743598727, "grad_norm": 0.6072389163776717, "learning_rate": 4.777291159772912e-06, "loss": 0.5745, "step": 26888 }, { "epoch": 0.7850573705877201, "grad_norm": 0.6308394168335839, "learning_rate": 4.7766423357664234e-06, "loss": 0.5939, "step": 26889 }, { "epoch": 0.7850865668155674, "grad_norm": 0.645047136443702, "learning_rate": 4.7759935117599355e-06, "loss": 0.6005, "step": 26890 }, { "epoch": 0.7851157630434148, "grad_norm": 0.6716000760629196, "learning_rate": 4.775344687753447e-06, "loss": 0.6432, "step": 26891 }, { "epoch": 0.7851449592712622, "grad_norm": 0.6274490999368313, "learning_rate": 4.7746958637469595e-06, "loss": 0.5678, "step": 26892 }, { "epoch": 0.7851741554991095, "grad_norm": 0.6264597086042358, "learning_rate": 4.774047039740471e-06, "loss": 0.5639, "step": 26893 }, { "epoch": 0.7852033517269569, "grad_norm": 0.6906382949983602, "learning_rate": 4.773398215733983e-06, "loss": 0.688, "step": 26894 }, { "epoch": 0.7852325479548042, "grad_norm": 0.6473344630414213, "learning_rate": 4.772749391727494e-06, "loss": 0.6056, "step": 26895 }, { "epoch": 0.7852617441826516, "grad_norm": 0.6421468428260794, "learning_rate": 4.772100567721006e-06, "loss": 0.5362, "step": 26896 }, { "epoch": 0.785290940410499, "grad_norm": 0.6289678758532502, "learning_rate": 4.771451743714518e-06, "loss": 0.5428, "step": 26897 }, { "epoch": 0.7853201366383463, "grad_norm": 0.6246391869027316, "learning_rate": 4.77080291970803e-06, "loss": 0.5568, "step": 26898 }, { "epoch": 0.7853493328661937, "grad_norm": 0.6889751576024525, "learning_rate": 4.770154095701541e-06, "loss": 0.6595, "step": 26899 }, { "epoch": 0.785378529094041, "grad_norm": 0.6098425831055729, "learning_rate": 4.769505271695053e-06, "loss": 0.5826, "step": 26900 }, { "epoch": 0.7854077253218884, "grad_norm": 0.7708581729963384, "learning_rate": 4.768856447688564e-06, "loss": 0.6826, "step": 26901 }, { "epoch": 0.7854369215497358, "grad_norm": 0.6301466096704648, "learning_rate": 4.768207623682076e-06, "loss": 0.55, "step": 26902 }, { "epoch": 0.7854661177775831, "grad_norm": 0.6543125239203498, "learning_rate": 4.7675587996755884e-06, "loss": 0.5802, "step": 26903 }, { "epoch": 0.7854953140054305, "grad_norm": 0.6911645451316508, "learning_rate": 4.7669099756691005e-06, "loss": 0.6716, "step": 26904 }, { "epoch": 0.7855245102332779, "grad_norm": 0.6755995709946749, "learning_rate": 4.766261151662612e-06, "loss": 0.6319, "step": 26905 }, { "epoch": 0.7855537064611252, "grad_norm": 0.6035405483545478, "learning_rate": 4.765612327656124e-06, "loss": 0.5853, "step": 26906 }, { "epoch": 0.7855829026889726, "grad_norm": 0.6246878787318809, "learning_rate": 4.764963503649636e-06, "loss": 0.5628, "step": 26907 }, { "epoch": 0.7856120989168199, "grad_norm": 0.6655546028352686, "learning_rate": 4.764314679643147e-06, "loss": 0.6602, "step": 26908 }, { "epoch": 0.7856412951446673, "grad_norm": 0.6832023777030354, "learning_rate": 4.763665855636659e-06, "loss": 0.6253, "step": 26909 }, { "epoch": 0.7856704913725147, "grad_norm": 0.6547882749412453, "learning_rate": 4.76301703163017e-06, "loss": 0.6122, "step": 26910 }, { "epoch": 0.785699687600362, "grad_norm": 0.6448964210220217, "learning_rate": 4.762368207623683e-06, "loss": 0.5996, "step": 26911 }, { "epoch": 0.7857288838282094, "grad_norm": 0.6323857846690387, "learning_rate": 4.761719383617194e-06, "loss": 0.604, "step": 26912 }, { "epoch": 0.7857580800560567, "grad_norm": 0.6407722994300354, "learning_rate": 4.761070559610706e-06, "loss": 0.5649, "step": 26913 }, { "epoch": 0.7857872762839041, "grad_norm": 0.6458605690289984, "learning_rate": 4.760421735604217e-06, "loss": 0.6272, "step": 26914 }, { "epoch": 0.7858164725117515, "grad_norm": 0.672194040894396, "learning_rate": 4.759772911597729e-06, "loss": 0.6338, "step": 26915 }, { "epoch": 0.7858456687395988, "grad_norm": 0.6981228380830794, "learning_rate": 4.759124087591241e-06, "loss": 0.6276, "step": 26916 }, { "epoch": 0.7858748649674462, "grad_norm": 0.6313794696666132, "learning_rate": 4.7584752635847534e-06, "loss": 0.5823, "step": 26917 }, { "epoch": 0.7859040611952935, "grad_norm": 0.6666097109993716, "learning_rate": 4.757826439578265e-06, "loss": 0.608, "step": 26918 }, { "epoch": 0.7859332574231409, "grad_norm": 0.7383132207874809, "learning_rate": 4.757177615571777e-06, "loss": 0.7468, "step": 26919 }, { "epoch": 0.7859624536509883, "grad_norm": 0.6489799900274201, "learning_rate": 4.756528791565289e-06, "loss": 0.6019, "step": 26920 }, { "epoch": 0.7859916498788356, "grad_norm": 0.6264193486603415, "learning_rate": 4.7558799675588e-06, "loss": 0.5495, "step": 26921 }, { "epoch": 0.786020846106683, "grad_norm": 0.6087106118654723, "learning_rate": 4.755231143552312e-06, "loss": 0.5173, "step": 26922 }, { "epoch": 0.7860500423345304, "grad_norm": 0.7239179995874447, "learning_rate": 4.754582319545824e-06, "loss": 0.7023, "step": 26923 }, { "epoch": 0.7860792385623777, "grad_norm": 0.6364202685900996, "learning_rate": 4.753933495539335e-06, "loss": 0.6281, "step": 26924 }, { "epoch": 0.7861084347902251, "grad_norm": 0.6647502679600094, "learning_rate": 4.753284671532847e-06, "loss": 0.6227, "step": 26925 }, { "epoch": 0.7861376310180724, "grad_norm": 0.6237934038190169, "learning_rate": 4.752635847526359e-06, "loss": 0.5922, "step": 26926 }, { "epoch": 0.7861668272459198, "grad_norm": 0.629136397033003, "learning_rate": 4.75198702351987e-06, "loss": 0.5574, "step": 26927 }, { "epoch": 0.7861960234737672, "grad_norm": 0.6220253304680188, "learning_rate": 4.751338199513382e-06, "loss": 0.5705, "step": 26928 }, { "epoch": 0.7862252197016145, "grad_norm": 0.6295637861426089, "learning_rate": 4.7506893755068935e-06, "loss": 0.5512, "step": 26929 }, { "epoch": 0.7862544159294619, "grad_norm": 0.6443616797323817, "learning_rate": 4.750040551500406e-06, "loss": 0.5947, "step": 26930 }, { "epoch": 0.7862836121573092, "grad_norm": 0.6147233983125405, "learning_rate": 4.7493917274939176e-06, "loss": 0.5224, "step": 26931 }, { "epoch": 0.7863128083851566, "grad_norm": 0.6133394798493138, "learning_rate": 4.74874290348743e-06, "loss": 0.5655, "step": 26932 }, { "epoch": 0.786342004613004, "grad_norm": 0.6170824243118076, "learning_rate": 4.748094079480941e-06, "loss": 0.5588, "step": 26933 }, { "epoch": 0.7863712008408513, "grad_norm": 0.6607408514426246, "learning_rate": 4.747445255474453e-06, "loss": 0.6226, "step": 26934 }, { "epoch": 0.7864003970686987, "grad_norm": 0.648046067011178, "learning_rate": 4.746796431467965e-06, "loss": 0.6349, "step": 26935 }, { "epoch": 0.786429593296546, "grad_norm": 0.5679550946587412, "learning_rate": 4.746147607461477e-06, "loss": 0.484, "step": 26936 }, { "epoch": 0.7864587895243934, "grad_norm": 0.6494728156821431, "learning_rate": 4.745498783454988e-06, "loss": 0.6171, "step": 26937 }, { "epoch": 0.7864879857522408, "grad_norm": 0.6349717196184838, "learning_rate": 4.7448499594485e-06, "loss": 0.5723, "step": 26938 }, { "epoch": 0.7865171819800881, "grad_norm": 0.6547637065068986, "learning_rate": 4.744201135442012e-06, "loss": 0.6053, "step": 26939 }, { "epoch": 0.7865463782079355, "grad_norm": 0.6386711829750542, "learning_rate": 4.743552311435523e-06, "loss": 0.5469, "step": 26940 }, { "epoch": 0.7865755744357829, "grad_norm": 0.6031035473249143, "learning_rate": 4.742903487429035e-06, "loss": 0.5382, "step": 26941 }, { "epoch": 0.7866047706636302, "grad_norm": 0.6187246127973532, "learning_rate": 4.742254663422547e-06, "loss": 0.5771, "step": 26942 }, { "epoch": 0.7866339668914776, "grad_norm": 0.6761075135953819, "learning_rate": 4.7416058394160585e-06, "loss": 0.6557, "step": 26943 }, { "epoch": 0.7866631631193249, "grad_norm": 0.6439895180836533, "learning_rate": 4.7409570154095705e-06, "loss": 0.6026, "step": 26944 }, { "epoch": 0.7866923593471723, "grad_norm": 0.6199673060018859, "learning_rate": 4.7403081914030826e-06, "loss": 0.5517, "step": 26945 }, { "epoch": 0.7867215555750197, "grad_norm": 0.7034445276643276, "learning_rate": 4.739659367396594e-06, "loss": 0.6853, "step": 26946 }, { "epoch": 0.786750751802867, "grad_norm": 0.6337677298965446, "learning_rate": 4.739010543390106e-06, "loss": 0.5409, "step": 26947 }, { "epoch": 0.7867799480307144, "grad_norm": 0.6227032934569244, "learning_rate": 4.738361719383617e-06, "loss": 0.5296, "step": 26948 }, { "epoch": 0.7868091442585617, "grad_norm": 0.7627929866813794, "learning_rate": 4.73771289537713e-06, "loss": 0.604, "step": 26949 }, { "epoch": 0.7868383404864091, "grad_norm": 0.6288845570540952, "learning_rate": 4.737064071370641e-06, "loss": 0.5887, "step": 26950 }, { "epoch": 0.7868675367142565, "grad_norm": 0.6513758290724692, "learning_rate": 4.736415247364153e-06, "loss": 0.6204, "step": 26951 }, { "epoch": 0.7868967329421038, "grad_norm": 0.6678099020428315, "learning_rate": 4.735766423357664e-06, "loss": 0.6457, "step": 26952 }, { "epoch": 0.7869259291699512, "grad_norm": 0.6395940011835417, "learning_rate": 4.735117599351176e-06, "loss": 0.6217, "step": 26953 }, { "epoch": 0.7869551253977985, "grad_norm": 0.6392385811999339, "learning_rate": 4.734468775344688e-06, "loss": 0.6073, "step": 26954 }, { "epoch": 0.786984321625646, "grad_norm": 0.6075970197895114, "learning_rate": 4.7338199513382e-06, "loss": 0.5313, "step": 26955 }, { "epoch": 0.7870135178534934, "grad_norm": 0.6562476165854383, "learning_rate": 4.7331711273317115e-06, "loss": 0.5547, "step": 26956 }, { "epoch": 0.7870427140813407, "grad_norm": 0.6928425416676771, "learning_rate": 4.7325223033252235e-06, "loss": 0.6542, "step": 26957 }, { "epoch": 0.7870719103091881, "grad_norm": 0.667139280030968, "learning_rate": 4.7318734793187355e-06, "loss": 0.6359, "step": 26958 }, { "epoch": 0.7871011065370355, "grad_norm": 0.6053794897521941, "learning_rate": 4.731224655312247e-06, "loss": 0.5183, "step": 26959 }, { "epoch": 0.7871303027648828, "grad_norm": 0.6274608080226421, "learning_rate": 4.730575831305759e-06, "loss": 0.5686, "step": 26960 }, { "epoch": 0.7871594989927302, "grad_norm": 0.677240177610317, "learning_rate": 4.729927007299271e-06, "loss": 0.6365, "step": 26961 }, { "epoch": 0.7871886952205776, "grad_norm": 0.6016932946407989, "learning_rate": 4.729278183292782e-06, "loss": 0.5176, "step": 26962 }, { "epoch": 0.7872178914484249, "grad_norm": 0.6369051159583774, "learning_rate": 4.728629359286294e-06, "loss": 0.5619, "step": 26963 }, { "epoch": 0.7872470876762723, "grad_norm": 0.5739917755341534, "learning_rate": 4.727980535279806e-06, "loss": 0.5108, "step": 26964 }, { "epoch": 0.7872762839041196, "grad_norm": 0.6128830830402815, "learning_rate": 4.727331711273317e-06, "loss": 0.5565, "step": 26965 }, { "epoch": 0.787305480131967, "grad_norm": 0.7185297268178168, "learning_rate": 4.726682887266829e-06, "loss": 0.7087, "step": 26966 }, { "epoch": 0.7873346763598144, "grad_norm": 0.6728820493107158, "learning_rate": 4.72603406326034e-06, "loss": 0.6267, "step": 26967 }, { "epoch": 0.7873638725876617, "grad_norm": 0.661259340821748, "learning_rate": 4.725385239253853e-06, "loss": 0.6531, "step": 26968 }, { "epoch": 0.7873930688155091, "grad_norm": 0.6241585953259403, "learning_rate": 4.7247364152473644e-06, "loss": 0.5458, "step": 26969 }, { "epoch": 0.7874222650433564, "grad_norm": 0.7022576254552402, "learning_rate": 4.7240875912408765e-06, "loss": 0.6697, "step": 26970 }, { "epoch": 0.7874514612712038, "grad_norm": 0.6536481595135843, "learning_rate": 4.723438767234388e-06, "loss": 0.6104, "step": 26971 }, { "epoch": 0.7874806574990512, "grad_norm": 0.641523684116773, "learning_rate": 4.7227899432279e-06, "loss": 0.5767, "step": 26972 }, { "epoch": 0.7875098537268985, "grad_norm": 0.6162770584245659, "learning_rate": 4.722141119221412e-06, "loss": 0.5846, "step": 26973 }, { "epoch": 0.7875390499547459, "grad_norm": 0.6261333203859559, "learning_rate": 4.721492295214924e-06, "loss": 0.549, "step": 26974 }, { "epoch": 0.7875682461825932, "grad_norm": 0.6367782641824101, "learning_rate": 4.720843471208435e-06, "loss": 0.6077, "step": 26975 }, { "epoch": 0.7875974424104406, "grad_norm": 0.6826941769285736, "learning_rate": 4.720194647201947e-06, "loss": 0.651, "step": 26976 }, { "epoch": 0.787626638638288, "grad_norm": 0.7090985207333427, "learning_rate": 4.719545823195459e-06, "loss": 0.6614, "step": 26977 }, { "epoch": 0.7876558348661353, "grad_norm": 0.6493094426256499, "learning_rate": 4.71889699918897e-06, "loss": 0.6376, "step": 26978 }, { "epoch": 0.7876850310939827, "grad_norm": 0.5954635271834975, "learning_rate": 4.718248175182482e-06, "loss": 0.5303, "step": 26979 }, { "epoch": 0.78771422732183, "grad_norm": 0.6154098338584557, "learning_rate": 4.717599351175993e-06, "loss": 0.58, "step": 26980 }, { "epoch": 0.7877434235496774, "grad_norm": 1.356579759399895, "learning_rate": 4.716950527169505e-06, "loss": 0.6658, "step": 26981 }, { "epoch": 0.7877726197775248, "grad_norm": 0.6352448182686535, "learning_rate": 4.716301703163017e-06, "loss": 0.5763, "step": 26982 }, { "epoch": 0.7878018160053721, "grad_norm": 0.7177894621567034, "learning_rate": 4.7156528791565294e-06, "loss": 0.7743, "step": 26983 }, { "epoch": 0.7878310122332195, "grad_norm": 0.6530840921534911, "learning_rate": 4.715004055150041e-06, "loss": 0.6069, "step": 26984 }, { "epoch": 0.7878602084610669, "grad_norm": 0.6444909701458157, "learning_rate": 4.714355231143553e-06, "loss": 0.5963, "step": 26985 }, { "epoch": 0.7878894046889142, "grad_norm": 0.6831475208350488, "learning_rate": 4.713706407137065e-06, "loss": 0.6745, "step": 26986 }, { "epoch": 0.7879186009167616, "grad_norm": 0.6150275885600206, "learning_rate": 4.713057583130577e-06, "loss": 0.5531, "step": 26987 }, { "epoch": 0.7879477971446089, "grad_norm": 0.7077480586715287, "learning_rate": 4.712408759124088e-06, "loss": 0.7198, "step": 26988 }, { "epoch": 0.7879769933724563, "grad_norm": 0.6130233137627676, "learning_rate": 4.7117599351176e-06, "loss": 0.5845, "step": 26989 }, { "epoch": 0.7880061896003037, "grad_norm": 0.6670478244532102, "learning_rate": 4.711111111111111e-06, "loss": 0.6312, "step": 26990 }, { "epoch": 0.788035385828151, "grad_norm": 0.6322459268119125, "learning_rate": 4.710462287104623e-06, "loss": 0.5842, "step": 26991 }, { "epoch": 0.7880645820559984, "grad_norm": 0.6672040876922063, "learning_rate": 4.709813463098135e-06, "loss": 0.64, "step": 26992 }, { "epoch": 0.7880937782838457, "grad_norm": 0.6247652356346978, "learning_rate": 4.709164639091647e-06, "loss": 0.5485, "step": 26993 }, { "epoch": 0.7881229745116931, "grad_norm": 0.6123056394194941, "learning_rate": 4.708515815085158e-06, "loss": 0.5477, "step": 26994 }, { "epoch": 0.7881521707395405, "grad_norm": 0.6120231572276265, "learning_rate": 4.70786699107867e-06, "loss": 0.5561, "step": 26995 }, { "epoch": 0.7881813669673878, "grad_norm": 0.6676213941688708, "learning_rate": 4.707218167072182e-06, "loss": 0.6191, "step": 26996 }, { "epoch": 0.7882105631952352, "grad_norm": 0.599116205041991, "learning_rate": 4.706569343065694e-06, "loss": 0.504, "step": 26997 }, { "epoch": 0.7882397594230826, "grad_norm": 0.6372598727754814, "learning_rate": 4.705920519059206e-06, "loss": 0.539, "step": 26998 }, { "epoch": 0.7882689556509299, "grad_norm": 0.6587478634496974, "learning_rate": 4.705271695052717e-06, "loss": 0.6089, "step": 26999 }, { "epoch": 0.7882981518787773, "grad_norm": 0.6056204063698625, "learning_rate": 4.704622871046229e-06, "loss": 0.548, "step": 27000 }, { "epoch": 0.7883273481066246, "grad_norm": 0.6350175597745846, "learning_rate": 4.703974047039741e-06, "loss": 0.6161, "step": 27001 }, { "epoch": 0.788356544334472, "grad_norm": 0.640466427506814, "learning_rate": 4.703325223033253e-06, "loss": 0.5813, "step": 27002 }, { "epoch": 0.7883857405623194, "grad_norm": 0.7153342054822924, "learning_rate": 4.702676399026764e-06, "loss": 0.6877, "step": 27003 }, { "epoch": 0.7884149367901667, "grad_norm": 0.6231570000494302, "learning_rate": 4.702027575020276e-06, "loss": 0.5754, "step": 27004 }, { "epoch": 0.7884441330180141, "grad_norm": 0.6170846804167248, "learning_rate": 4.701378751013788e-06, "loss": 0.5419, "step": 27005 }, { "epoch": 0.7884733292458614, "grad_norm": 0.6929396281357977, "learning_rate": 4.7007299270073e-06, "loss": 0.706, "step": 27006 }, { "epoch": 0.7885025254737088, "grad_norm": 0.6342979516414924, "learning_rate": 4.700081103000811e-06, "loss": 0.6019, "step": 27007 }, { "epoch": 0.7885317217015562, "grad_norm": 0.6275065546613939, "learning_rate": 4.699432278994323e-06, "loss": 0.5555, "step": 27008 }, { "epoch": 0.7885609179294035, "grad_norm": 0.6603516470533687, "learning_rate": 4.6987834549878345e-06, "loss": 0.6049, "step": 27009 }, { "epoch": 0.7885901141572509, "grad_norm": 0.6271904335661161, "learning_rate": 4.6981346309813465e-06, "loss": 0.5515, "step": 27010 }, { "epoch": 0.7886193103850982, "grad_norm": 0.6216699654078446, "learning_rate": 4.6974858069748586e-06, "loss": 0.556, "step": 27011 }, { "epoch": 0.7886485066129456, "grad_norm": 0.6818810475260294, "learning_rate": 4.696836982968371e-06, "loss": 0.6737, "step": 27012 }, { "epoch": 0.788677702840793, "grad_norm": 0.640458657721672, "learning_rate": 4.696188158961882e-06, "loss": 0.6005, "step": 27013 }, { "epoch": 0.7887068990686403, "grad_norm": 0.7656176097837352, "learning_rate": 4.695539334955394e-06, "loss": 0.663, "step": 27014 }, { "epoch": 0.7887360952964877, "grad_norm": 0.6932918207197203, "learning_rate": 4.694890510948906e-06, "loss": 0.6761, "step": 27015 }, { "epoch": 0.788765291524335, "grad_norm": 0.6188643480474696, "learning_rate": 4.694241686942417e-06, "loss": 0.5663, "step": 27016 }, { "epoch": 0.7887944877521824, "grad_norm": 0.6089314014288529, "learning_rate": 4.693592862935929e-06, "loss": 0.5503, "step": 27017 }, { "epoch": 0.7888236839800298, "grad_norm": 0.6179758714015151, "learning_rate": 4.69294403892944e-06, "loss": 0.5475, "step": 27018 }, { "epoch": 0.7888528802078771, "grad_norm": 0.6357974065497399, "learning_rate": 4.692295214922952e-06, "loss": 0.5865, "step": 27019 }, { "epoch": 0.7888820764357245, "grad_norm": 0.6182546336016211, "learning_rate": 4.691646390916464e-06, "loss": 0.5403, "step": 27020 }, { "epoch": 0.7889112726635719, "grad_norm": 0.625716888403759, "learning_rate": 4.690997566909976e-06, "loss": 0.6046, "step": 27021 }, { "epoch": 0.7889404688914192, "grad_norm": 0.6666526103604355, "learning_rate": 4.6903487429034875e-06, "loss": 0.6717, "step": 27022 }, { "epoch": 0.7889696651192666, "grad_norm": 0.648250309928208, "learning_rate": 4.6896999188969995e-06, "loss": 0.669, "step": 27023 }, { "epoch": 0.7889988613471139, "grad_norm": 0.5831054669919316, "learning_rate": 4.6890510948905115e-06, "loss": 0.4851, "step": 27024 }, { "epoch": 0.7890280575749613, "grad_norm": 0.6441298414065662, "learning_rate": 4.6884022708840236e-06, "loss": 0.6082, "step": 27025 }, { "epoch": 0.7890572538028087, "grad_norm": 0.6746856662313174, "learning_rate": 4.687753446877535e-06, "loss": 0.6006, "step": 27026 }, { "epoch": 0.789086450030656, "grad_norm": 0.6569146184968765, "learning_rate": 4.687104622871047e-06, "loss": 0.6216, "step": 27027 }, { "epoch": 0.7891156462585034, "grad_norm": 0.6526161533280241, "learning_rate": 4.686455798864558e-06, "loss": 0.6061, "step": 27028 }, { "epoch": 0.7891448424863508, "grad_norm": 0.6023640553319142, "learning_rate": 4.68580697485807e-06, "loss": 0.5378, "step": 27029 }, { "epoch": 0.7891740387141981, "grad_norm": 0.6884089998610146, "learning_rate": 4.685158150851582e-06, "loss": 0.663, "step": 27030 }, { "epoch": 0.7892032349420455, "grad_norm": 0.7133081351909085, "learning_rate": 4.684509326845094e-06, "loss": 0.6364, "step": 27031 }, { "epoch": 0.7892324311698928, "grad_norm": 0.7104634579934315, "learning_rate": 4.683860502838605e-06, "loss": 0.653, "step": 27032 }, { "epoch": 0.7892616273977402, "grad_norm": 0.7589992336595649, "learning_rate": 4.683211678832117e-06, "loss": 0.7199, "step": 27033 }, { "epoch": 0.7892908236255876, "grad_norm": 0.6482870512258009, "learning_rate": 4.682562854825629e-06, "loss": 0.6177, "step": 27034 }, { "epoch": 0.7893200198534349, "grad_norm": 0.6864466972166176, "learning_rate": 4.6819140308191405e-06, "loss": 0.662, "step": 27035 }, { "epoch": 0.7893492160812823, "grad_norm": 0.645124334775183, "learning_rate": 4.6812652068126525e-06, "loss": 0.5849, "step": 27036 }, { "epoch": 0.7893784123091296, "grad_norm": 0.6867302061551673, "learning_rate": 4.680616382806164e-06, "loss": 0.6241, "step": 27037 }, { "epoch": 0.789407608536977, "grad_norm": 0.6575503159485943, "learning_rate": 4.6799675587996765e-06, "loss": 0.5985, "step": 27038 }, { "epoch": 0.7894368047648244, "grad_norm": 0.6800790866075644, "learning_rate": 4.679318734793188e-06, "loss": 0.6493, "step": 27039 }, { "epoch": 0.7894660009926717, "grad_norm": 0.6468638661655036, "learning_rate": 4.6786699107867e-06, "loss": 0.5743, "step": 27040 }, { "epoch": 0.7894951972205191, "grad_norm": 0.6724149010645533, "learning_rate": 4.678021086780211e-06, "loss": 0.6225, "step": 27041 }, { "epoch": 0.7895243934483664, "grad_norm": 0.7046838127259332, "learning_rate": 4.677372262773723e-06, "loss": 0.6441, "step": 27042 }, { "epoch": 0.7895535896762138, "grad_norm": 0.6689732183436666, "learning_rate": 4.676723438767235e-06, "loss": 0.6436, "step": 27043 }, { "epoch": 0.7895827859040612, "grad_norm": 0.6329622745374618, "learning_rate": 4.676074614760747e-06, "loss": 0.516, "step": 27044 }, { "epoch": 0.7896119821319085, "grad_norm": 0.6622124513710327, "learning_rate": 4.675425790754258e-06, "loss": 0.5817, "step": 27045 }, { "epoch": 0.7896411783597559, "grad_norm": 0.6239963339953607, "learning_rate": 4.67477696674777e-06, "loss": 0.5515, "step": 27046 }, { "epoch": 0.7896703745876033, "grad_norm": 0.6668723022295923, "learning_rate": 4.674128142741281e-06, "loss": 0.6715, "step": 27047 }, { "epoch": 0.7896995708154506, "grad_norm": 0.6350539692718998, "learning_rate": 4.673479318734793e-06, "loss": 0.5588, "step": 27048 }, { "epoch": 0.789728767043298, "grad_norm": 0.6193048697309713, "learning_rate": 4.6728304947283054e-06, "loss": 0.5356, "step": 27049 }, { "epoch": 0.7897579632711453, "grad_norm": 0.6647439390352315, "learning_rate": 4.6721816707218175e-06, "loss": 0.6598, "step": 27050 }, { "epoch": 0.7897871594989927, "grad_norm": 0.6020656144461252, "learning_rate": 4.671532846715329e-06, "loss": 0.5477, "step": 27051 }, { "epoch": 0.7898163557268401, "grad_norm": 0.6322058620392113, "learning_rate": 4.670884022708841e-06, "loss": 0.5841, "step": 27052 }, { "epoch": 0.7898455519546874, "grad_norm": 0.6045092491959487, "learning_rate": 4.670235198702353e-06, "loss": 0.5102, "step": 27053 }, { "epoch": 0.7898747481825348, "grad_norm": 0.6211428020952877, "learning_rate": 4.669586374695864e-06, "loss": 0.5681, "step": 27054 }, { "epoch": 0.7899039444103821, "grad_norm": 0.6222964807502904, "learning_rate": 4.668937550689376e-06, "loss": 0.5366, "step": 27055 }, { "epoch": 0.7899331406382295, "grad_norm": 0.6292964728553723, "learning_rate": 4.668288726682887e-06, "loss": 0.5604, "step": 27056 }, { "epoch": 0.7899623368660769, "grad_norm": 0.6797675752559255, "learning_rate": 4.6676399026764e-06, "loss": 0.5918, "step": 27057 }, { "epoch": 0.7899915330939242, "grad_norm": 0.6021132514056031, "learning_rate": 4.666991078669911e-06, "loss": 0.5474, "step": 27058 }, { "epoch": 0.7900207293217716, "grad_norm": 0.6318559879065878, "learning_rate": 4.666342254663423e-06, "loss": 0.584, "step": 27059 }, { "epoch": 0.790049925549619, "grad_norm": 0.5963176045443865, "learning_rate": 4.665693430656934e-06, "loss": 0.5542, "step": 27060 }, { "epoch": 0.7900791217774663, "grad_norm": 0.6578895463850747, "learning_rate": 4.665044606650446e-06, "loss": 0.6155, "step": 27061 }, { "epoch": 0.7901083180053137, "grad_norm": 0.5616287480920277, "learning_rate": 4.664395782643958e-06, "loss": 0.4779, "step": 27062 }, { "epoch": 0.790137514233161, "grad_norm": 0.6292358784063347, "learning_rate": 4.6637469586374704e-06, "loss": 0.5308, "step": 27063 }, { "epoch": 0.7901667104610084, "grad_norm": 0.6492940422955449, "learning_rate": 4.663098134630982e-06, "loss": 0.5666, "step": 27064 }, { "epoch": 0.7901959066888558, "grad_norm": 0.6879066950296712, "learning_rate": 4.662449310624494e-06, "loss": 0.6447, "step": 27065 }, { "epoch": 0.7902251029167031, "grad_norm": 0.6863188253944029, "learning_rate": 4.661800486618005e-06, "loss": 0.6369, "step": 27066 }, { "epoch": 0.7902542991445505, "grad_norm": 0.693295637389981, "learning_rate": 4.661151662611517e-06, "loss": 0.6899, "step": 27067 }, { "epoch": 0.7902834953723978, "grad_norm": 0.6501024878993085, "learning_rate": 4.660502838605029e-06, "loss": 0.5907, "step": 27068 }, { "epoch": 0.7903126916002452, "grad_norm": 0.6032028463274924, "learning_rate": 4.65985401459854e-06, "loss": 0.5558, "step": 27069 }, { "epoch": 0.7903418878280926, "grad_norm": 0.650104819736047, "learning_rate": 4.659205190592052e-06, "loss": 0.6342, "step": 27070 }, { "epoch": 0.7903710840559399, "grad_norm": 0.5975104904905238, "learning_rate": 4.658556366585564e-06, "loss": 0.4806, "step": 27071 }, { "epoch": 0.7904002802837873, "grad_norm": 0.6297468395824088, "learning_rate": 4.657907542579076e-06, "loss": 0.5368, "step": 27072 }, { "epoch": 0.7904294765116346, "grad_norm": 0.654345332839288, "learning_rate": 4.657258718572587e-06, "loss": 0.5866, "step": 27073 }, { "epoch": 0.790458672739482, "grad_norm": 0.6477679333106646, "learning_rate": 4.656609894566099e-06, "loss": 0.6417, "step": 27074 }, { "epoch": 0.7904878689673294, "grad_norm": 0.6356137443430514, "learning_rate": 4.6559610705596105e-06, "loss": 0.5728, "step": 27075 }, { "epoch": 0.7905170651951768, "grad_norm": 0.6296494316860736, "learning_rate": 4.655312246553123e-06, "loss": 0.5638, "step": 27076 }, { "epoch": 0.7905462614230242, "grad_norm": 0.6513789940191854, "learning_rate": 4.654663422546635e-06, "loss": 0.6128, "step": 27077 }, { "epoch": 0.7905754576508716, "grad_norm": 0.7106712477414703, "learning_rate": 4.654014598540147e-06, "loss": 0.6809, "step": 27078 }, { "epoch": 0.7906046538787189, "grad_norm": 0.6228055303235075, "learning_rate": 4.653365774533658e-06, "loss": 0.573, "step": 27079 }, { "epoch": 0.7906338501065663, "grad_norm": 0.6559091008379622, "learning_rate": 4.65271695052717e-06, "loss": 0.6378, "step": 27080 }, { "epoch": 0.7906630463344136, "grad_norm": 0.6919817711628387, "learning_rate": 4.652068126520682e-06, "loss": 0.6021, "step": 27081 }, { "epoch": 0.790692242562261, "grad_norm": 0.6166594531992423, "learning_rate": 4.651419302514194e-06, "loss": 0.5448, "step": 27082 }, { "epoch": 0.7907214387901084, "grad_norm": 0.6671517129535673, "learning_rate": 4.650770478507705e-06, "loss": 0.5942, "step": 27083 }, { "epoch": 0.7907506350179557, "grad_norm": 0.7008582768019178, "learning_rate": 4.650121654501217e-06, "loss": 0.6665, "step": 27084 }, { "epoch": 0.7907798312458031, "grad_norm": 0.744434586963776, "learning_rate": 4.649472830494728e-06, "loss": 0.7179, "step": 27085 }, { "epoch": 0.7908090274736504, "grad_norm": 0.5890087087120845, "learning_rate": 4.64882400648824e-06, "loss": 0.5118, "step": 27086 }, { "epoch": 0.7908382237014978, "grad_norm": 0.649096675914289, "learning_rate": 4.648175182481752e-06, "loss": 0.5606, "step": 27087 }, { "epoch": 0.7908674199293452, "grad_norm": 0.6315511970720779, "learning_rate": 4.6475263584752635e-06, "loss": 0.6095, "step": 27088 }, { "epoch": 0.7908966161571925, "grad_norm": 0.6518217708364816, "learning_rate": 4.6468775344687755e-06, "loss": 0.5892, "step": 27089 }, { "epoch": 0.7909258123850399, "grad_norm": 0.6053153701047654, "learning_rate": 4.6462287104622875e-06, "loss": 0.5184, "step": 27090 }, { "epoch": 0.7909550086128873, "grad_norm": 0.6732944726081477, "learning_rate": 4.6455798864557996e-06, "loss": 0.6157, "step": 27091 }, { "epoch": 0.7909842048407346, "grad_norm": 0.6728694565523468, "learning_rate": 4.644931062449311e-06, "loss": 0.646, "step": 27092 }, { "epoch": 0.791013401068582, "grad_norm": 0.6279393561896663, "learning_rate": 4.644282238442823e-06, "loss": 0.5804, "step": 27093 }, { "epoch": 0.7910425972964293, "grad_norm": 0.6572091015584896, "learning_rate": 4.643633414436334e-06, "loss": 0.5943, "step": 27094 }, { "epoch": 0.7910717935242767, "grad_norm": 0.6524594220855585, "learning_rate": 4.642984590429847e-06, "loss": 0.6607, "step": 27095 }, { "epoch": 0.7911009897521241, "grad_norm": 0.6701901174836139, "learning_rate": 4.642335766423358e-06, "loss": 0.5865, "step": 27096 }, { "epoch": 0.7911301859799714, "grad_norm": 0.6525249751684197, "learning_rate": 4.64168694241687e-06, "loss": 0.6136, "step": 27097 }, { "epoch": 0.7911593822078188, "grad_norm": 0.6468942946289363, "learning_rate": 4.641038118410381e-06, "loss": 0.4992, "step": 27098 }, { "epoch": 0.7911885784356661, "grad_norm": 0.6447179594093501, "learning_rate": 4.640389294403893e-06, "loss": 0.5762, "step": 27099 }, { "epoch": 0.7912177746635135, "grad_norm": 0.5863687453942257, "learning_rate": 4.639740470397405e-06, "loss": 0.4777, "step": 27100 }, { "epoch": 0.7912469708913609, "grad_norm": 0.6556939553978792, "learning_rate": 4.639091646390917e-06, "loss": 0.5846, "step": 27101 }, { "epoch": 0.7912761671192082, "grad_norm": 0.642570037026153, "learning_rate": 4.6384428223844285e-06, "loss": 0.5529, "step": 27102 }, { "epoch": 0.7913053633470556, "grad_norm": 0.6514426941325976, "learning_rate": 4.6377939983779405e-06, "loss": 0.6175, "step": 27103 }, { "epoch": 0.791334559574903, "grad_norm": 0.5969314358260311, "learning_rate": 4.6371451743714525e-06, "loss": 0.5356, "step": 27104 }, { "epoch": 0.7913637558027503, "grad_norm": 0.6509408967923204, "learning_rate": 4.636496350364964e-06, "loss": 0.5917, "step": 27105 }, { "epoch": 0.7913929520305977, "grad_norm": 0.6370267935814582, "learning_rate": 4.635847526358476e-06, "loss": 0.6018, "step": 27106 }, { "epoch": 0.791422148258445, "grad_norm": 0.6207118209501467, "learning_rate": 4.635198702351987e-06, "loss": 0.5411, "step": 27107 }, { "epoch": 0.7914513444862924, "grad_norm": 0.6323960023863398, "learning_rate": 4.634549878345499e-06, "loss": 0.5835, "step": 27108 }, { "epoch": 0.7914805407141398, "grad_norm": 1.0587662304348535, "learning_rate": 4.633901054339011e-06, "loss": 0.6202, "step": 27109 }, { "epoch": 0.7915097369419871, "grad_norm": 0.6193314934499841, "learning_rate": 4.633252230332523e-06, "loss": 0.5882, "step": 27110 }, { "epoch": 0.7915389331698345, "grad_norm": 0.5828223515090536, "learning_rate": 4.632603406326034e-06, "loss": 0.486, "step": 27111 }, { "epoch": 0.7915681293976818, "grad_norm": 0.6741751106144455, "learning_rate": 4.631954582319546e-06, "loss": 0.6287, "step": 27112 }, { "epoch": 0.7915973256255292, "grad_norm": 0.6449811767182269, "learning_rate": 4.631305758313057e-06, "loss": 0.5697, "step": 27113 }, { "epoch": 0.7916265218533766, "grad_norm": 0.6287979578220614, "learning_rate": 4.63065693430657e-06, "loss": 0.576, "step": 27114 }, { "epoch": 0.7916557180812239, "grad_norm": 0.6827789136273026, "learning_rate": 4.6300081103000815e-06, "loss": 0.6565, "step": 27115 }, { "epoch": 0.7916849143090713, "grad_norm": 0.6574860247435842, "learning_rate": 4.6293592862935935e-06, "loss": 0.5844, "step": 27116 }, { "epoch": 0.7917141105369186, "grad_norm": 0.691745427973477, "learning_rate": 4.628710462287105e-06, "loss": 0.5795, "step": 27117 }, { "epoch": 0.791743306764766, "grad_norm": 0.6493409884200386, "learning_rate": 4.628061638280617e-06, "loss": 0.6385, "step": 27118 }, { "epoch": 0.7917725029926134, "grad_norm": 0.670883684135314, "learning_rate": 4.627412814274129e-06, "loss": 0.6534, "step": 27119 }, { "epoch": 0.7918016992204607, "grad_norm": 0.6256894880313519, "learning_rate": 4.626763990267641e-06, "loss": 0.5833, "step": 27120 }, { "epoch": 0.7918308954483081, "grad_norm": 0.6921177598736246, "learning_rate": 4.626115166261152e-06, "loss": 0.6608, "step": 27121 }, { "epoch": 0.7918600916761555, "grad_norm": 0.6172798584131785, "learning_rate": 4.625466342254664e-06, "loss": 0.5612, "step": 27122 }, { "epoch": 0.7918892879040028, "grad_norm": 0.6037729967000591, "learning_rate": 4.624817518248176e-06, "loss": 0.5557, "step": 27123 }, { "epoch": 0.7919184841318502, "grad_norm": 0.6153829321578514, "learning_rate": 4.624168694241687e-06, "loss": 0.5496, "step": 27124 }, { "epoch": 0.7919476803596975, "grad_norm": 0.6440581071365667, "learning_rate": 4.623519870235199e-06, "loss": 0.5696, "step": 27125 }, { "epoch": 0.7919768765875449, "grad_norm": 0.6540872241540057, "learning_rate": 4.62287104622871e-06, "loss": 0.6322, "step": 27126 }, { "epoch": 0.7920060728153923, "grad_norm": 0.6599037189743165, "learning_rate": 4.622222222222222e-06, "loss": 0.6166, "step": 27127 }, { "epoch": 0.7920352690432396, "grad_norm": 0.6144533351076439, "learning_rate": 4.621573398215734e-06, "loss": 0.5557, "step": 27128 }, { "epoch": 0.792064465271087, "grad_norm": 0.5891040482403728, "learning_rate": 4.6209245742092464e-06, "loss": 0.4732, "step": 27129 }, { "epoch": 0.7920936614989343, "grad_norm": 0.6090978494946818, "learning_rate": 4.620275750202758e-06, "loss": 0.5455, "step": 27130 }, { "epoch": 0.7921228577267817, "grad_norm": 0.6623468956093894, "learning_rate": 4.61962692619627e-06, "loss": 0.6306, "step": 27131 }, { "epoch": 0.7921520539546291, "grad_norm": 0.5921191247784791, "learning_rate": 4.618978102189781e-06, "loss": 0.5122, "step": 27132 }, { "epoch": 0.7921812501824764, "grad_norm": 0.6521469099878736, "learning_rate": 4.618329278183294e-06, "loss": 0.5866, "step": 27133 }, { "epoch": 0.7922104464103238, "grad_norm": 0.6817662341922921, "learning_rate": 4.617680454176805e-06, "loss": 0.6498, "step": 27134 }, { "epoch": 0.7922396426381711, "grad_norm": 0.5946322916451484, "learning_rate": 4.617031630170317e-06, "loss": 0.4851, "step": 27135 }, { "epoch": 0.7922688388660185, "grad_norm": 0.644487384276356, "learning_rate": 4.616382806163828e-06, "loss": 0.5965, "step": 27136 }, { "epoch": 0.7922980350938659, "grad_norm": 0.6511133206523364, "learning_rate": 4.61573398215734e-06, "loss": 0.5811, "step": 27137 }, { "epoch": 0.7923272313217132, "grad_norm": 0.6293877653263069, "learning_rate": 4.615085158150852e-06, "loss": 0.5803, "step": 27138 }, { "epoch": 0.7923564275495606, "grad_norm": 0.6001355727321321, "learning_rate": 4.614436334144364e-06, "loss": 0.4722, "step": 27139 }, { "epoch": 0.792385623777408, "grad_norm": 0.7159930490225485, "learning_rate": 4.613787510137875e-06, "loss": 0.6662, "step": 27140 }, { "epoch": 0.7924148200052553, "grad_norm": 0.6567748731342996, "learning_rate": 4.613138686131387e-06, "loss": 0.5641, "step": 27141 }, { "epoch": 0.7924440162331027, "grad_norm": 0.6221516269885304, "learning_rate": 4.612489862124899e-06, "loss": 0.5514, "step": 27142 }, { "epoch": 0.79247321246095, "grad_norm": 0.6992282752434835, "learning_rate": 4.611841038118411e-06, "loss": 0.6319, "step": 27143 }, { "epoch": 0.7925024086887974, "grad_norm": 0.6237182317467824, "learning_rate": 4.611192214111923e-06, "loss": 0.5857, "step": 27144 }, { "epoch": 0.7925316049166448, "grad_norm": 0.6657558002977844, "learning_rate": 4.610543390105434e-06, "loss": 0.61, "step": 27145 }, { "epoch": 0.7925608011444921, "grad_norm": 0.6008994510459538, "learning_rate": 4.609894566098946e-06, "loss": 0.4913, "step": 27146 }, { "epoch": 0.7925899973723395, "grad_norm": 0.71153263143211, "learning_rate": 4.609245742092458e-06, "loss": 0.6625, "step": 27147 }, { "epoch": 0.7926191936001868, "grad_norm": 0.6810651428017499, "learning_rate": 4.60859691808597e-06, "loss": 0.6554, "step": 27148 }, { "epoch": 0.7926483898280342, "grad_norm": 0.616810606765815, "learning_rate": 4.607948094079481e-06, "loss": 0.5245, "step": 27149 }, { "epoch": 0.7926775860558816, "grad_norm": 0.6604056204236131, "learning_rate": 4.607299270072993e-06, "loss": 0.6466, "step": 27150 }, { "epoch": 0.7927067822837289, "grad_norm": 0.6221751084769559, "learning_rate": 4.606650446066504e-06, "loss": 0.5343, "step": 27151 }, { "epoch": 0.7927359785115763, "grad_norm": 0.6072859140071929, "learning_rate": 4.606001622060017e-06, "loss": 0.5365, "step": 27152 }, { "epoch": 0.7927651747394237, "grad_norm": 0.6173855609509984, "learning_rate": 4.605352798053528e-06, "loss": 0.5544, "step": 27153 }, { "epoch": 0.792794370967271, "grad_norm": 0.6656891431082606, "learning_rate": 4.60470397404704e-06, "loss": 0.6114, "step": 27154 }, { "epoch": 0.7928235671951184, "grad_norm": 0.6696323704504367, "learning_rate": 4.6040551500405515e-06, "loss": 0.5922, "step": 27155 }, { "epoch": 0.7928527634229657, "grad_norm": 0.6682741494372498, "learning_rate": 4.6034063260340636e-06, "loss": 0.5993, "step": 27156 }, { "epoch": 0.7928819596508131, "grad_norm": 0.6337818792275812, "learning_rate": 4.602757502027576e-06, "loss": 0.5953, "step": 27157 }, { "epoch": 0.7929111558786605, "grad_norm": 0.6515881112486965, "learning_rate": 4.602108678021087e-06, "loss": 0.6467, "step": 27158 }, { "epoch": 0.7929403521065078, "grad_norm": 0.6669202674859223, "learning_rate": 4.601459854014599e-06, "loss": 0.617, "step": 27159 }, { "epoch": 0.7929695483343552, "grad_norm": 0.6460308481884769, "learning_rate": 4.600811030008111e-06, "loss": 0.5778, "step": 27160 }, { "epoch": 0.7929987445622025, "grad_norm": 0.6252751001966775, "learning_rate": 4.600162206001623e-06, "loss": 0.6009, "step": 27161 }, { "epoch": 0.7930279407900499, "grad_norm": 0.602048281823723, "learning_rate": 4.599513381995134e-06, "loss": 0.5374, "step": 27162 }, { "epoch": 0.7930571370178973, "grad_norm": 0.6895970391104279, "learning_rate": 4.598864557988646e-06, "loss": 0.6339, "step": 27163 }, { "epoch": 0.7930863332457446, "grad_norm": 0.654992963732791, "learning_rate": 4.598215733982157e-06, "loss": 0.5411, "step": 27164 }, { "epoch": 0.793115529473592, "grad_norm": 0.6413192162178172, "learning_rate": 4.597566909975669e-06, "loss": 0.5828, "step": 27165 }, { "epoch": 0.7931447257014393, "grad_norm": 0.6709085890666066, "learning_rate": 4.596918085969181e-06, "loss": 0.6365, "step": 27166 }, { "epoch": 0.7931739219292867, "grad_norm": 0.6600256271856967, "learning_rate": 4.596269261962693e-06, "loss": 0.6164, "step": 27167 }, { "epoch": 0.7932031181571341, "grad_norm": 0.5906947845727055, "learning_rate": 4.5956204379562045e-06, "loss": 0.5157, "step": 27168 }, { "epoch": 0.7932323143849814, "grad_norm": 0.6393992212302327, "learning_rate": 4.5949716139497165e-06, "loss": 0.6365, "step": 27169 }, { "epoch": 0.7932615106128288, "grad_norm": 0.6436440536478502, "learning_rate": 4.594322789943228e-06, "loss": 0.5777, "step": 27170 }, { "epoch": 0.7932907068406762, "grad_norm": 0.6864223844703754, "learning_rate": 4.5936739659367406e-06, "loss": 0.6925, "step": 27171 }, { "epoch": 0.7933199030685235, "grad_norm": 0.6448825367236942, "learning_rate": 4.593025141930252e-06, "loss": 0.5486, "step": 27172 }, { "epoch": 0.7933490992963709, "grad_norm": 0.6290691635455664, "learning_rate": 4.592376317923764e-06, "loss": 0.5913, "step": 27173 }, { "epoch": 0.7933782955242182, "grad_norm": 0.6130033168563578, "learning_rate": 4.591727493917275e-06, "loss": 0.5445, "step": 27174 }, { "epoch": 0.7934074917520656, "grad_norm": 0.6377775417750563, "learning_rate": 4.591078669910787e-06, "loss": 0.5753, "step": 27175 }, { "epoch": 0.793436687979913, "grad_norm": 0.6502808786976926, "learning_rate": 4.590429845904299e-06, "loss": 0.5998, "step": 27176 }, { "epoch": 0.7934658842077603, "grad_norm": 0.6264650866834006, "learning_rate": 4.58978102189781e-06, "loss": 0.6175, "step": 27177 }, { "epoch": 0.7934950804356077, "grad_norm": 0.637681896522092, "learning_rate": 4.589132197891322e-06, "loss": 0.6153, "step": 27178 }, { "epoch": 0.793524276663455, "grad_norm": 0.5841843440148234, "learning_rate": 4.588483373884834e-06, "loss": 0.5188, "step": 27179 }, { "epoch": 0.7935534728913024, "grad_norm": 0.6435316389235672, "learning_rate": 4.587834549878346e-06, "loss": 0.6016, "step": 27180 }, { "epoch": 0.7935826691191498, "grad_norm": 0.6534722842238812, "learning_rate": 4.5871857258718575e-06, "loss": 0.5984, "step": 27181 }, { "epoch": 0.7936118653469971, "grad_norm": 0.6615862373982412, "learning_rate": 4.5865369018653695e-06, "loss": 0.6068, "step": 27182 }, { "epoch": 0.7936410615748445, "grad_norm": 0.6177629598196551, "learning_rate": 4.585888077858881e-06, "loss": 0.5715, "step": 27183 }, { "epoch": 0.7936702578026918, "grad_norm": 0.6600334812055374, "learning_rate": 4.585239253852393e-06, "loss": 0.5974, "step": 27184 }, { "epoch": 0.7936994540305392, "grad_norm": 0.6252919325109921, "learning_rate": 4.584590429845905e-06, "loss": 0.6058, "step": 27185 }, { "epoch": 0.7937286502583866, "grad_norm": 0.7113079278863309, "learning_rate": 4.583941605839417e-06, "loss": 0.7206, "step": 27186 }, { "epoch": 0.7937578464862339, "grad_norm": 0.6256334041387387, "learning_rate": 4.583292781832928e-06, "loss": 0.5579, "step": 27187 }, { "epoch": 0.7937870427140813, "grad_norm": 0.624842158692529, "learning_rate": 4.58264395782644e-06, "loss": 0.529, "step": 27188 }, { "epoch": 0.7938162389419287, "grad_norm": 0.6877927102064826, "learning_rate": 4.581995133819952e-06, "loss": 0.6534, "step": 27189 }, { "epoch": 0.793845435169776, "grad_norm": 0.62827390433097, "learning_rate": 4.581346309813464e-06, "loss": 0.5414, "step": 27190 }, { "epoch": 0.7938746313976234, "grad_norm": 0.6517482545796768, "learning_rate": 4.580697485806975e-06, "loss": 0.5832, "step": 27191 }, { "epoch": 0.7939038276254707, "grad_norm": 0.62374097691233, "learning_rate": 4.580048661800487e-06, "loss": 0.5758, "step": 27192 }, { "epoch": 0.7939330238533181, "grad_norm": 0.6798661233415495, "learning_rate": 4.579399837793998e-06, "loss": 0.6455, "step": 27193 }, { "epoch": 0.7939622200811655, "grad_norm": 0.6516249837460397, "learning_rate": 4.5787510137875104e-06, "loss": 0.5871, "step": 27194 }, { "epoch": 0.7939914163090128, "grad_norm": 0.6736035727973317, "learning_rate": 4.5781021897810225e-06, "loss": 0.6727, "step": 27195 }, { "epoch": 0.7940206125368603, "grad_norm": 0.6317578048607277, "learning_rate": 4.577453365774534e-06, "loss": 0.5619, "step": 27196 }, { "epoch": 0.7940498087647077, "grad_norm": 0.6265769242700178, "learning_rate": 4.576804541768046e-06, "loss": 0.5872, "step": 27197 }, { "epoch": 0.794079004992555, "grad_norm": 0.6748844924883605, "learning_rate": 4.576155717761558e-06, "loss": 0.6076, "step": 27198 }, { "epoch": 0.7941082012204024, "grad_norm": 0.6395185156103842, "learning_rate": 4.57550689375507e-06, "loss": 0.6318, "step": 27199 }, { "epoch": 0.7941373974482497, "grad_norm": 0.6407286912217487, "learning_rate": 4.574858069748581e-06, "loss": 0.5691, "step": 27200 }, { "epoch": 0.7941665936760971, "grad_norm": 0.6082860122183084, "learning_rate": 4.574209245742093e-06, "loss": 0.5348, "step": 27201 }, { "epoch": 0.7941957899039445, "grad_norm": 0.6266422622054868, "learning_rate": 4.573560421735604e-06, "loss": 0.5879, "step": 27202 }, { "epoch": 0.7942249861317918, "grad_norm": 0.6657472028388209, "learning_rate": 4.572911597729116e-06, "loss": 0.6493, "step": 27203 }, { "epoch": 0.7942541823596392, "grad_norm": 0.6810025997325682, "learning_rate": 4.572262773722628e-06, "loss": 0.6854, "step": 27204 }, { "epoch": 0.7942833785874865, "grad_norm": 0.605075093733292, "learning_rate": 4.57161394971614e-06, "loss": 0.5728, "step": 27205 }, { "epoch": 0.7943125748153339, "grad_norm": 0.630035031073029, "learning_rate": 4.570965125709651e-06, "loss": 0.5253, "step": 27206 }, { "epoch": 0.7943417710431813, "grad_norm": 0.6521550309989245, "learning_rate": 4.570316301703163e-06, "loss": 0.5868, "step": 27207 }, { "epoch": 0.7943709672710286, "grad_norm": 0.666275294511414, "learning_rate": 4.569667477696675e-06, "loss": 0.6077, "step": 27208 }, { "epoch": 0.794400163498876, "grad_norm": 0.6336289010386454, "learning_rate": 4.5690186536901874e-06, "loss": 0.5945, "step": 27209 }, { "epoch": 0.7944293597267233, "grad_norm": 0.6419091161531709, "learning_rate": 4.568369829683699e-06, "loss": 0.5539, "step": 27210 }, { "epoch": 0.7944585559545707, "grad_norm": 0.6205312008231807, "learning_rate": 4.567721005677211e-06, "loss": 0.499, "step": 27211 }, { "epoch": 0.7944877521824181, "grad_norm": 0.6507558220798703, "learning_rate": 4.567072181670722e-06, "loss": 0.6204, "step": 27212 }, { "epoch": 0.7945169484102654, "grad_norm": 0.6717117309104763, "learning_rate": 4.566423357664234e-06, "loss": 0.6197, "step": 27213 }, { "epoch": 0.7945461446381128, "grad_norm": 0.6748772077650576, "learning_rate": 4.565774533657746e-06, "loss": 0.6288, "step": 27214 }, { "epoch": 0.7945753408659602, "grad_norm": 0.6459236795395499, "learning_rate": 4.565125709651257e-06, "loss": 0.5624, "step": 27215 }, { "epoch": 0.7946045370938075, "grad_norm": 0.6382733342326103, "learning_rate": 4.564476885644769e-06, "loss": 0.6198, "step": 27216 }, { "epoch": 0.7946337333216549, "grad_norm": 0.6097780026904104, "learning_rate": 4.563828061638281e-06, "loss": 0.5507, "step": 27217 }, { "epoch": 0.7946629295495022, "grad_norm": 0.5888868590952514, "learning_rate": 4.563179237631793e-06, "loss": 0.5192, "step": 27218 }, { "epoch": 0.7946921257773496, "grad_norm": 0.6288294847821119, "learning_rate": 4.562530413625304e-06, "loss": 0.6187, "step": 27219 }, { "epoch": 0.794721322005197, "grad_norm": 0.6632001867457655, "learning_rate": 4.561881589618816e-06, "loss": 0.6236, "step": 27220 }, { "epoch": 0.7947505182330443, "grad_norm": 0.6679599121264796, "learning_rate": 4.5612327656123275e-06, "loss": 0.5687, "step": 27221 }, { "epoch": 0.7947797144608917, "grad_norm": 0.6163003951254612, "learning_rate": 4.56058394160584e-06, "loss": 0.5576, "step": 27222 }, { "epoch": 0.794808910688739, "grad_norm": 0.6244125238930333, "learning_rate": 4.559935117599352e-06, "loss": 0.5949, "step": 27223 }, { "epoch": 0.7948381069165864, "grad_norm": 0.6494048791742794, "learning_rate": 4.559286293592864e-06, "loss": 0.5709, "step": 27224 }, { "epoch": 0.7948673031444338, "grad_norm": 0.6117046276991515, "learning_rate": 4.558637469586375e-06, "loss": 0.556, "step": 27225 }, { "epoch": 0.7948964993722811, "grad_norm": 0.6084379775357787, "learning_rate": 4.557988645579887e-06, "loss": 0.4976, "step": 27226 }, { "epoch": 0.7949256956001285, "grad_norm": 0.6039752505801533, "learning_rate": 4.557339821573399e-06, "loss": 0.5498, "step": 27227 }, { "epoch": 0.7949548918279759, "grad_norm": 0.6801053840442656, "learning_rate": 4.556690997566911e-06, "loss": 0.6493, "step": 27228 }, { "epoch": 0.7949840880558232, "grad_norm": 0.6652627319955887, "learning_rate": 4.556042173560422e-06, "loss": 0.6214, "step": 27229 }, { "epoch": 0.7950132842836706, "grad_norm": 0.69838286497533, "learning_rate": 4.555393349553934e-06, "loss": 0.6307, "step": 27230 }, { "epoch": 0.7950424805115179, "grad_norm": 0.6287457465503997, "learning_rate": 4.554744525547445e-06, "loss": 0.583, "step": 27231 }, { "epoch": 0.7950716767393653, "grad_norm": 0.6324854009042674, "learning_rate": 4.554095701540957e-06, "loss": 0.5449, "step": 27232 }, { "epoch": 0.7951008729672127, "grad_norm": 0.6642192224056341, "learning_rate": 4.553446877534469e-06, "loss": 0.6331, "step": 27233 }, { "epoch": 0.79513006919506, "grad_norm": 0.6393009192905247, "learning_rate": 4.5527980535279805e-06, "loss": 0.5807, "step": 27234 }, { "epoch": 0.7951592654229074, "grad_norm": 0.6751301872479694, "learning_rate": 4.5521492295214925e-06, "loss": 0.6486, "step": 27235 }, { "epoch": 0.7951884616507547, "grad_norm": 0.674026453195581, "learning_rate": 4.5515004055150046e-06, "loss": 0.6, "step": 27236 }, { "epoch": 0.7952176578786021, "grad_norm": 0.6012955450413708, "learning_rate": 4.550851581508517e-06, "loss": 0.5561, "step": 27237 }, { "epoch": 0.7952468541064495, "grad_norm": 0.692174534975336, "learning_rate": 4.550202757502028e-06, "loss": 0.6417, "step": 27238 }, { "epoch": 0.7952760503342968, "grad_norm": 0.7155242422051369, "learning_rate": 4.54955393349554e-06, "loss": 0.6334, "step": 27239 }, { "epoch": 0.7953052465621442, "grad_norm": 0.656714856533245, "learning_rate": 4.548905109489051e-06, "loss": 0.5713, "step": 27240 }, { "epoch": 0.7953344427899915, "grad_norm": 0.5802339652502473, "learning_rate": 4.548256285482564e-06, "loss": 0.4945, "step": 27241 }, { "epoch": 0.7953636390178389, "grad_norm": 0.623062507866502, "learning_rate": 4.547607461476075e-06, "loss": 0.5401, "step": 27242 }, { "epoch": 0.7953928352456863, "grad_norm": 0.6391871036058137, "learning_rate": 4.546958637469587e-06, "loss": 0.564, "step": 27243 }, { "epoch": 0.7954220314735336, "grad_norm": 0.6465397403926657, "learning_rate": 4.546309813463098e-06, "loss": 0.6061, "step": 27244 }, { "epoch": 0.795451227701381, "grad_norm": 0.713963930966573, "learning_rate": 4.54566098945661e-06, "loss": 0.7206, "step": 27245 }, { "epoch": 0.7954804239292284, "grad_norm": 0.6684024962742559, "learning_rate": 4.545012165450122e-06, "loss": 0.6279, "step": 27246 }, { "epoch": 0.7955096201570757, "grad_norm": 0.6354317078718551, "learning_rate": 4.544363341443634e-06, "loss": 0.5972, "step": 27247 }, { "epoch": 0.7955388163849231, "grad_norm": 0.6740794882375516, "learning_rate": 4.5437145174371455e-06, "loss": 0.6396, "step": 27248 }, { "epoch": 0.7955680126127704, "grad_norm": 0.6433605439482049, "learning_rate": 4.5430656934306575e-06, "loss": 0.5968, "step": 27249 }, { "epoch": 0.7955972088406178, "grad_norm": 0.6149827417673576, "learning_rate": 4.542416869424169e-06, "loss": 0.5207, "step": 27250 }, { "epoch": 0.7956264050684652, "grad_norm": 0.6691018053740393, "learning_rate": 4.541768045417681e-06, "loss": 0.618, "step": 27251 }, { "epoch": 0.7956556012963125, "grad_norm": 0.6461435610990891, "learning_rate": 4.541119221411193e-06, "loss": 0.5864, "step": 27252 }, { "epoch": 0.7956847975241599, "grad_norm": 0.6691214761698112, "learning_rate": 4.540470397404704e-06, "loss": 0.6335, "step": 27253 }, { "epoch": 0.7957139937520072, "grad_norm": 0.6420906170299924, "learning_rate": 4.539821573398216e-06, "loss": 0.5658, "step": 27254 }, { "epoch": 0.7957431899798546, "grad_norm": 0.6070704995680278, "learning_rate": 4.539172749391728e-06, "loss": 0.5805, "step": 27255 }, { "epoch": 0.795772386207702, "grad_norm": 0.6151998983699621, "learning_rate": 4.53852392538524e-06, "loss": 0.5456, "step": 27256 }, { "epoch": 0.7958015824355493, "grad_norm": 0.6468250960353643, "learning_rate": 4.537875101378751e-06, "loss": 0.6073, "step": 27257 }, { "epoch": 0.7958307786633967, "grad_norm": 0.6355411912833376, "learning_rate": 4.537226277372263e-06, "loss": 0.5717, "step": 27258 }, { "epoch": 0.795859974891244, "grad_norm": 0.577455841423482, "learning_rate": 4.536577453365774e-06, "loss": 0.4876, "step": 27259 }, { "epoch": 0.7958891711190914, "grad_norm": 0.6216951306754724, "learning_rate": 4.535928629359287e-06, "loss": 0.5464, "step": 27260 }, { "epoch": 0.7959183673469388, "grad_norm": 0.651403358503586, "learning_rate": 4.5352798053527985e-06, "loss": 0.6449, "step": 27261 }, { "epoch": 0.7959475635747861, "grad_norm": 0.6326528058352879, "learning_rate": 4.5346309813463105e-06, "loss": 0.5978, "step": 27262 }, { "epoch": 0.7959767598026335, "grad_norm": 0.6445614136017741, "learning_rate": 4.533982157339822e-06, "loss": 0.6118, "step": 27263 }, { "epoch": 0.7960059560304809, "grad_norm": 0.691689732827408, "learning_rate": 4.533333333333334e-06, "loss": 0.6373, "step": 27264 }, { "epoch": 0.7960351522583282, "grad_norm": 0.5733608664015503, "learning_rate": 4.532684509326846e-06, "loss": 0.492, "step": 27265 }, { "epoch": 0.7960643484861756, "grad_norm": 0.6611873730626694, "learning_rate": 4.532035685320357e-06, "loss": 0.6534, "step": 27266 }, { "epoch": 0.7960935447140229, "grad_norm": 0.6638885144520031, "learning_rate": 4.531386861313869e-06, "loss": 0.5642, "step": 27267 }, { "epoch": 0.7961227409418703, "grad_norm": 0.7058368821405833, "learning_rate": 4.530738037307381e-06, "loss": 0.6476, "step": 27268 }, { "epoch": 0.7961519371697177, "grad_norm": 0.6662805175193919, "learning_rate": 4.530089213300892e-06, "loss": 0.6149, "step": 27269 }, { "epoch": 0.796181133397565, "grad_norm": 0.6787243724379628, "learning_rate": 4.529440389294404e-06, "loss": 0.6028, "step": 27270 }, { "epoch": 0.7962103296254124, "grad_norm": 0.64553074806631, "learning_rate": 4.528791565287916e-06, "loss": 0.6079, "step": 27271 }, { "epoch": 0.7962395258532597, "grad_norm": 0.7254038995174473, "learning_rate": 4.528142741281427e-06, "loss": 0.6349, "step": 27272 }, { "epoch": 0.7962687220811071, "grad_norm": 0.617501388785947, "learning_rate": 4.527493917274939e-06, "loss": 0.5459, "step": 27273 }, { "epoch": 0.7962979183089545, "grad_norm": 0.6462080501413449, "learning_rate": 4.5268450932684514e-06, "loss": 0.5841, "step": 27274 }, { "epoch": 0.7963271145368018, "grad_norm": 0.5942953520074586, "learning_rate": 4.5261962692619635e-06, "loss": 0.5303, "step": 27275 }, { "epoch": 0.7963563107646492, "grad_norm": 0.6412071005801566, "learning_rate": 4.525547445255475e-06, "loss": 0.5991, "step": 27276 }, { "epoch": 0.7963855069924966, "grad_norm": 0.6824093818026961, "learning_rate": 4.524898621248987e-06, "loss": 0.6785, "step": 27277 }, { "epoch": 0.7964147032203439, "grad_norm": 0.6361861096604344, "learning_rate": 4.524249797242498e-06, "loss": 0.5984, "step": 27278 }, { "epoch": 0.7964438994481913, "grad_norm": 0.6141546174757507, "learning_rate": 4.523600973236011e-06, "loss": 0.572, "step": 27279 }, { "epoch": 0.7964730956760386, "grad_norm": 0.6911149844890805, "learning_rate": 4.522952149229522e-06, "loss": 0.6612, "step": 27280 }, { "epoch": 0.796502291903886, "grad_norm": 0.5886397800279587, "learning_rate": 4.522303325223034e-06, "loss": 0.5139, "step": 27281 }, { "epoch": 0.7965314881317334, "grad_norm": 0.6492216284830117, "learning_rate": 4.521654501216545e-06, "loss": 0.567, "step": 27282 }, { "epoch": 0.7965606843595807, "grad_norm": 0.6209137767232974, "learning_rate": 4.521005677210057e-06, "loss": 0.5741, "step": 27283 }, { "epoch": 0.7965898805874281, "grad_norm": 0.661521612127613, "learning_rate": 4.520356853203569e-06, "loss": 0.5684, "step": 27284 }, { "epoch": 0.7966190768152754, "grad_norm": 0.647779341473168, "learning_rate": 4.51970802919708e-06, "loss": 0.6098, "step": 27285 }, { "epoch": 0.7966482730431228, "grad_norm": 0.6472867149278067, "learning_rate": 4.519059205190592e-06, "loss": 0.6088, "step": 27286 }, { "epoch": 0.7966774692709702, "grad_norm": 0.6490507122936098, "learning_rate": 4.518410381184104e-06, "loss": 0.6138, "step": 27287 }, { "epoch": 0.7967066654988175, "grad_norm": 0.733095177631362, "learning_rate": 4.5177615571776156e-06, "loss": 0.6883, "step": 27288 }, { "epoch": 0.7967358617266649, "grad_norm": 0.6269532794003285, "learning_rate": 4.517112733171128e-06, "loss": 0.5656, "step": 27289 }, { "epoch": 0.7967650579545122, "grad_norm": 0.6918558118331345, "learning_rate": 4.51646390916464e-06, "loss": 0.6438, "step": 27290 }, { "epoch": 0.7967942541823596, "grad_norm": 0.6734284205093737, "learning_rate": 4.515815085158151e-06, "loss": 0.6239, "step": 27291 }, { "epoch": 0.796823450410207, "grad_norm": 0.6023976339942755, "learning_rate": 4.515166261151663e-06, "loss": 0.4904, "step": 27292 }, { "epoch": 0.7968526466380543, "grad_norm": 0.5961933877984881, "learning_rate": 4.514517437145175e-06, "loss": 0.4943, "step": 27293 }, { "epoch": 0.7968818428659017, "grad_norm": 0.630058194595471, "learning_rate": 4.513868613138687e-06, "loss": 0.5861, "step": 27294 }, { "epoch": 0.796911039093749, "grad_norm": 0.6223828699713878, "learning_rate": 4.513219789132198e-06, "loss": 0.5606, "step": 27295 }, { "epoch": 0.7969402353215964, "grad_norm": 0.6306655952101026, "learning_rate": 4.51257096512571e-06, "loss": 0.5674, "step": 27296 }, { "epoch": 0.7969694315494438, "grad_norm": 0.5572365715927156, "learning_rate": 4.511922141119221e-06, "loss": 0.4787, "step": 27297 }, { "epoch": 0.7969986277772911, "grad_norm": 0.6590153363519228, "learning_rate": 4.511273317112734e-06, "loss": 0.5616, "step": 27298 }, { "epoch": 0.7970278240051385, "grad_norm": 0.6502478299745807, "learning_rate": 4.510624493106245e-06, "loss": 0.5954, "step": 27299 }, { "epoch": 0.7970570202329859, "grad_norm": 0.6593872834226788, "learning_rate": 4.509975669099757e-06, "loss": 0.5907, "step": 27300 }, { "epoch": 0.7970862164608332, "grad_norm": 0.6147170826231911, "learning_rate": 4.5093268450932685e-06, "loss": 0.5721, "step": 27301 }, { "epoch": 0.7971154126886806, "grad_norm": 0.6561062274715616, "learning_rate": 4.5086780210867806e-06, "loss": 0.6136, "step": 27302 }, { "epoch": 0.7971446089165279, "grad_norm": 0.6394323623491794, "learning_rate": 4.508029197080293e-06, "loss": 0.5961, "step": 27303 }, { "epoch": 0.7971738051443753, "grad_norm": 0.6447734941501807, "learning_rate": 4.507380373073804e-06, "loss": 0.6034, "step": 27304 }, { "epoch": 0.7972030013722227, "grad_norm": 0.6766199854675112, "learning_rate": 4.506731549067316e-06, "loss": 0.6734, "step": 27305 }, { "epoch": 0.79723219760007, "grad_norm": 0.6279681161365998, "learning_rate": 4.506082725060828e-06, "loss": 0.5839, "step": 27306 }, { "epoch": 0.7972613938279174, "grad_norm": 0.6660142707744356, "learning_rate": 4.50543390105434e-06, "loss": 0.6957, "step": 27307 }, { "epoch": 0.7972905900557647, "grad_norm": 0.7217050439927687, "learning_rate": 4.504785077047851e-06, "loss": 0.5781, "step": 27308 }, { "epoch": 0.7973197862836121, "grad_norm": 0.5957608095710614, "learning_rate": 4.504136253041363e-06, "loss": 0.5087, "step": 27309 }, { "epoch": 0.7973489825114595, "grad_norm": 0.7372166312057933, "learning_rate": 4.503487429034874e-06, "loss": 0.6684, "step": 27310 }, { "epoch": 0.7973781787393068, "grad_norm": 0.6393139999560631, "learning_rate": 4.502838605028386e-06, "loss": 0.626, "step": 27311 }, { "epoch": 0.7974073749671542, "grad_norm": 0.678894507408663, "learning_rate": 4.502189781021898e-06, "loss": 0.639, "step": 27312 }, { "epoch": 0.7974365711950016, "grad_norm": 0.641063113767904, "learning_rate": 4.50154095701541e-06, "loss": 0.6094, "step": 27313 }, { "epoch": 0.7974657674228489, "grad_norm": 0.603346938779401, "learning_rate": 4.5008921330089215e-06, "loss": 0.5417, "step": 27314 }, { "epoch": 0.7974949636506963, "grad_norm": 0.655506730574816, "learning_rate": 4.5002433090024335e-06, "loss": 0.5814, "step": 27315 }, { "epoch": 0.7975241598785436, "grad_norm": 0.6477181656473644, "learning_rate": 4.499594484995945e-06, "loss": 0.6068, "step": 27316 }, { "epoch": 0.7975533561063911, "grad_norm": 0.6582910941646853, "learning_rate": 4.498945660989458e-06, "loss": 0.6033, "step": 27317 }, { "epoch": 0.7975825523342385, "grad_norm": 0.6800367673178429, "learning_rate": 4.498296836982969e-06, "loss": 0.5822, "step": 27318 }, { "epoch": 0.7976117485620858, "grad_norm": 0.6611998126771429, "learning_rate": 4.497648012976481e-06, "loss": 0.5984, "step": 27319 }, { "epoch": 0.7976409447899332, "grad_norm": 0.6169286101019327, "learning_rate": 4.496999188969992e-06, "loss": 0.5832, "step": 27320 }, { "epoch": 0.7976701410177806, "grad_norm": 0.6369063648941106, "learning_rate": 4.496350364963504e-06, "loss": 0.6006, "step": 27321 }, { "epoch": 0.7976993372456279, "grad_norm": 0.6363871341754245, "learning_rate": 4.495701540957016e-06, "loss": 0.59, "step": 27322 }, { "epoch": 0.7977285334734753, "grad_norm": 0.6457032923863091, "learning_rate": 4.495052716950527e-06, "loss": 0.6528, "step": 27323 }, { "epoch": 0.7977577297013226, "grad_norm": 0.7011717040514289, "learning_rate": 4.494403892944039e-06, "loss": 0.6507, "step": 27324 }, { "epoch": 0.79778692592917, "grad_norm": 0.6173187095389697, "learning_rate": 4.493755068937551e-06, "loss": 0.5494, "step": 27325 }, { "epoch": 0.7978161221570174, "grad_norm": 0.6619293058621678, "learning_rate": 4.493106244931063e-06, "loss": 0.6302, "step": 27326 }, { "epoch": 0.7978453183848647, "grad_norm": 0.6439972201179279, "learning_rate": 4.4924574209245745e-06, "loss": 0.6088, "step": 27327 }, { "epoch": 0.7978745146127121, "grad_norm": 0.5990854121169554, "learning_rate": 4.4918085969180865e-06, "loss": 0.5132, "step": 27328 }, { "epoch": 0.7979037108405594, "grad_norm": 0.5927196771756429, "learning_rate": 4.491159772911598e-06, "loss": 0.5293, "step": 27329 }, { "epoch": 0.7979329070684068, "grad_norm": 0.7333652407800862, "learning_rate": 4.49051094890511e-06, "loss": 0.7075, "step": 27330 }, { "epoch": 0.7979621032962542, "grad_norm": 0.6041026827999018, "learning_rate": 4.489862124898622e-06, "loss": 0.5224, "step": 27331 }, { "epoch": 0.7979912995241015, "grad_norm": 0.6413576349730699, "learning_rate": 4.489213300892134e-06, "loss": 0.5943, "step": 27332 }, { "epoch": 0.7980204957519489, "grad_norm": 0.6083696556889574, "learning_rate": 4.488564476885645e-06, "loss": 0.5239, "step": 27333 }, { "epoch": 0.7980496919797962, "grad_norm": 0.5933743216359401, "learning_rate": 4.487915652879157e-06, "loss": 0.5316, "step": 27334 }, { "epoch": 0.7980788882076436, "grad_norm": 0.6378650967955773, "learning_rate": 4.487266828872668e-06, "loss": 0.6018, "step": 27335 }, { "epoch": 0.798108084435491, "grad_norm": 0.6970632394240347, "learning_rate": 4.486618004866181e-06, "loss": 0.7155, "step": 27336 }, { "epoch": 0.7981372806633383, "grad_norm": 0.6038483407030975, "learning_rate": 4.485969180859692e-06, "loss": 0.5595, "step": 27337 }, { "epoch": 0.7981664768911857, "grad_norm": 0.64842201631244, "learning_rate": 4.485320356853204e-06, "loss": 0.5749, "step": 27338 }, { "epoch": 0.7981956731190331, "grad_norm": 0.6614507314604378, "learning_rate": 4.484671532846715e-06, "loss": 0.6137, "step": 27339 }, { "epoch": 0.7982248693468804, "grad_norm": 0.6227467920073377, "learning_rate": 4.4840227088402274e-06, "loss": 0.5336, "step": 27340 }, { "epoch": 0.7982540655747278, "grad_norm": 0.6619696121240577, "learning_rate": 4.4833738848337395e-06, "loss": 0.6013, "step": 27341 }, { "epoch": 0.7982832618025751, "grad_norm": 0.6625012556526892, "learning_rate": 4.482725060827251e-06, "loss": 0.6291, "step": 27342 }, { "epoch": 0.7983124580304225, "grad_norm": 0.6541360450003973, "learning_rate": 4.482076236820763e-06, "loss": 0.5862, "step": 27343 }, { "epoch": 0.7983416542582699, "grad_norm": 0.6406352084673941, "learning_rate": 4.481427412814275e-06, "loss": 0.6103, "step": 27344 }, { "epoch": 0.7983708504861172, "grad_norm": 0.6367419329053259, "learning_rate": 4.480778588807787e-06, "loss": 0.626, "step": 27345 }, { "epoch": 0.7984000467139646, "grad_norm": 0.6870752586986915, "learning_rate": 4.480129764801298e-06, "loss": 0.6171, "step": 27346 }, { "epoch": 0.798429242941812, "grad_norm": 0.6653016770316671, "learning_rate": 4.47948094079481e-06, "loss": 0.6646, "step": 27347 }, { "epoch": 0.7984584391696593, "grad_norm": 0.606139043109988, "learning_rate": 4.478832116788321e-06, "loss": 0.5578, "step": 27348 }, { "epoch": 0.7984876353975067, "grad_norm": 0.6309588054193281, "learning_rate": 4.478183292781833e-06, "loss": 0.5753, "step": 27349 }, { "epoch": 0.798516831625354, "grad_norm": 0.6719599463900211, "learning_rate": 4.477534468775345e-06, "loss": 0.6701, "step": 27350 }, { "epoch": 0.7985460278532014, "grad_norm": 0.6693549788676176, "learning_rate": 4.476885644768857e-06, "loss": 0.5824, "step": 27351 }, { "epoch": 0.7985752240810488, "grad_norm": 0.5607685133524863, "learning_rate": 4.476236820762368e-06, "loss": 0.4865, "step": 27352 }, { "epoch": 0.7986044203088961, "grad_norm": 0.6217805397565662, "learning_rate": 4.47558799675588e-06, "loss": 0.5733, "step": 27353 }, { "epoch": 0.7986336165367435, "grad_norm": 0.6328883703354136, "learning_rate": 4.474939172749392e-06, "loss": 0.5759, "step": 27354 }, { "epoch": 0.7986628127645908, "grad_norm": 0.604465260882774, "learning_rate": 4.474290348742904e-06, "loss": 0.5138, "step": 27355 }, { "epoch": 0.7986920089924382, "grad_norm": 0.5988381253267113, "learning_rate": 4.473641524736416e-06, "loss": 0.5506, "step": 27356 }, { "epoch": 0.7987212052202856, "grad_norm": 0.6344980380046332, "learning_rate": 4.472992700729928e-06, "loss": 0.563, "step": 27357 }, { "epoch": 0.7987504014481329, "grad_norm": 0.5983528346581406, "learning_rate": 4.472343876723439e-06, "loss": 0.5212, "step": 27358 }, { "epoch": 0.7987795976759803, "grad_norm": 0.651525230580431, "learning_rate": 4.471695052716951e-06, "loss": 0.5776, "step": 27359 }, { "epoch": 0.7988087939038276, "grad_norm": 0.6754603516519041, "learning_rate": 4.471046228710463e-06, "loss": 0.6689, "step": 27360 }, { "epoch": 0.798837990131675, "grad_norm": 0.5778306711899066, "learning_rate": 4.470397404703974e-06, "loss": 0.4807, "step": 27361 }, { "epoch": 0.7988671863595224, "grad_norm": 0.6573962083144089, "learning_rate": 4.469748580697486e-06, "loss": 0.5937, "step": 27362 }, { "epoch": 0.7988963825873697, "grad_norm": 0.6470511020290776, "learning_rate": 4.469099756690998e-06, "loss": 0.5952, "step": 27363 }, { "epoch": 0.7989255788152171, "grad_norm": 0.6779300814947593, "learning_rate": 4.46845093268451e-06, "loss": 0.6657, "step": 27364 }, { "epoch": 0.7989547750430644, "grad_norm": 0.6325523993159956, "learning_rate": 4.467802108678021e-06, "loss": 0.5855, "step": 27365 }, { "epoch": 0.7989839712709118, "grad_norm": 0.6338595466604853, "learning_rate": 4.467153284671533e-06, "loss": 0.6679, "step": 27366 }, { "epoch": 0.7990131674987592, "grad_norm": 0.6208203090542932, "learning_rate": 4.4665044606650445e-06, "loss": 0.6039, "step": 27367 }, { "epoch": 0.7990423637266065, "grad_norm": 0.656339468030581, "learning_rate": 4.4658556366585566e-06, "loss": 0.5973, "step": 27368 }, { "epoch": 0.7990715599544539, "grad_norm": 0.6690291081354728, "learning_rate": 4.465206812652069e-06, "loss": 0.6567, "step": 27369 }, { "epoch": 0.7991007561823013, "grad_norm": 0.642677852162617, "learning_rate": 4.464557988645581e-06, "loss": 0.62, "step": 27370 }, { "epoch": 0.7991299524101486, "grad_norm": 0.6180744755879588, "learning_rate": 4.463909164639092e-06, "loss": 0.5798, "step": 27371 }, { "epoch": 0.799159148637996, "grad_norm": 0.6434313003276703, "learning_rate": 4.463260340632604e-06, "loss": 0.662, "step": 27372 }, { "epoch": 0.7991883448658433, "grad_norm": 0.6477946610940417, "learning_rate": 4.462611516626116e-06, "loss": 0.5977, "step": 27373 }, { "epoch": 0.7992175410936907, "grad_norm": 0.6546226005534466, "learning_rate": 4.461962692619627e-06, "loss": 0.6322, "step": 27374 }, { "epoch": 0.7992467373215381, "grad_norm": 0.5992974034663546, "learning_rate": 4.461313868613139e-06, "loss": 0.5302, "step": 27375 }, { "epoch": 0.7992759335493854, "grad_norm": 0.6795506093468346, "learning_rate": 4.460665044606651e-06, "loss": 0.6777, "step": 27376 }, { "epoch": 0.7993051297772328, "grad_norm": 0.6510370092152382, "learning_rate": 4.460016220600162e-06, "loss": 0.6116, "step": 27377 }, { "epoch": 0.7993343260050801, "grad_norm": 0.6236142854370261, "learning_rate": 4.459367396593674e-06, "loss": 0.5577, "step": 27378 }, { "epoch": 0.7993635222329275, "grad_norm": 0.6733247345021367, "learning_rate": 4.458718572587186e-06, "loss": 0.6499, "step": 27379 }, { "epoch": 0.7993927184607749, "grad_norm": 0.6425059471008006, "learning_rate": 4.4580697485806975e-06, "loss": 0.5923, "step": 27380 }, { "epoch": 0.7994219146886222, "grad_norm": 0.642266314515052, "learning_rate": 4.4574209245742095e-06, "loss": 0.5835, "step": 27381 }, { "epoch": 0.7994511109164696, "grad_norm": 0.5678959223089367, "learning_rate": 4.456772100567721e-06, "loss": 0.4519, "step": 27382 }, { "epoch": 0.799480307144317, "grad_norm": 0.5913357778223106, "learning_rate": 4.456123276561234e-06, "loss": 0.4804, "step": 27383 }, { "epoch": 0.7995095033721643, "grad_norm": 0.6609816546837266, "learning_rate": 4.455474452554745e-06, "loss": 0.6159, "step": 27384 }, { "epoch": 0.7995386996000117, "grad_norm": 0.6034657210226135, "learning_rate": 4.454825628548257e-06, "loss": 0.5663, "step": 27385 }, { "epoch": 0.799567895827859, "grad_norm": 0.7133069371869777, "learning_rate": 4.454176804541768e-06, "loss": 0.7037, "step": 27386 }, { "epoch": 0.7995970920557064, "grad_norm": 0.6261645951030076, "learning_rate": 4.45352798053528e-06, "loss": 0.5248, "step": 27387 }, { "epoch": 0.7996262882835538, "grad_norm": 0.6521093705521266, "learning_rate": 4.452879156528792e-06, "loss": 0.5825, "step": 27388 }, { "epoch": 0.7996554845114011, "grad_norm": 0.6252780279672273, "learning_rate": 4.452230332522304e-06, "loss": 0.5777, "step": 27389 }, { "epoch": 0.7996846807392485, "grad_norm": 0.6528063498005762, "learning_rate": 4.451581508515815e-06, "loss": 0.5975, "step": 27390 }, { "epoch": 0.7997138769670958, "grad_norm": 0.6309767214010652, "learning_rate": 4.450932684509327e-06, "loss": 0.5785, "step": 27391 }, { "epoch": 0.7997430731949432, "grad_norm": 0.5856295133625088, "learning_rate": 4.450283860502839e-06, "loss": 0.5158, "step": 27392 }, { "epoch": 0.7997722694227906, "grad_norm": 0.6379245693019273, "learning_rate": 4.4496350364963505e-06, "loss": 0.5841, "step": 27393 }, { "epoch": 0.7998014656506379, "grad_norm": 0.6496406452191593, "learning_rate": 4.4489862124898625e-06, "loss": 0.581, "step": 27394 }, { "epoch": 0.7998306618784853, "grad_norm": 0.6559137510111177, "learning_rate": 4.4483373884833745e-06, "loss": 0.5815, "step": 27395 }, { "epoch": 0.7998598581063326, "grad_norm": 0.6251033242730474, "learning_rate": 4.447688564476886e-06, "loss": 0.5698, "step": 27396 }, { "epoch": 0.79988905433418, "grad_norm": 0.6481600311529897, "learning_rate": 4.447039740470398e-06, "loss": 0.6356, "step": 27397 }, { "epoch": 0.7999182505620274, "grad_norm": 0.6579606972976715, "learning_rate": 4.44639091646391e-06, "loss": 0.6355, "step": 27398 }, { "epoch": 0.7999474467898747, "grad_norm": 0.6822524792518888, "learning_rate": 4.445742092457421e-06, "loss": 0.6243, "step": 27399 }, { "epoch": 0.7999766430177221, "grad_norm": 0.6268718084073615, "learning_rate": 4.445093268450933e-06, "loss": 0.5286, "step": 27400 }, { "epoch": 0.8000058392455695, "grad_norm": 0.6421728059394084, "learning_rate": 4.444444444444444e-06, "loss": 0.623, "step": 27401 }, { "epoch": 0.8000350354734168, "grad_norm": 0.6434320864295308, "learning_rate": 4.443795620437957e-06, "loss": 0.5912, "step": 27402 }, { "epoch": 0.8000642317012642, "grad_norm": 0.6717911280869461, "learning_rate": 4.443146796431468e-06, "loss": 0.584, "step": 27403 }, { "epoch": 0.8000934279291115, "grad_norm": 0.664028025359816, "learning_rate": 4.44249797242498e-06, "loss": 0.5938, "step": 27404 }, { "epoch": 0.8001226241569589, "grad_norm": 0.6042014137827151, "learning_rate": 4.441849148418491e-06, "loss": 0.4804, "step": 27405 }, { "epoch": 0.8001518203848063, "grad_norm": 0.6376241236584772, "learning_rate": 4.441200324412004e-06, "loss": 0.5907, "step": 27406 }, { "epoch": 0.8001810166126536, "grad_norm": 0.6699863253672004, "learning_rate": 4.4405515004055155e-06, "loss": 0.6205, "step": 27407 }, { "epoch": 0.800210212840501, "grad_norm": 0.6298064325029074, "learning_rate": 4.4399026763990275e-06, "loss": 0.5718, "step": 27408 }, { "epoch": 0.8002394090683483, "grad_norm": 0.672358340926261, "learning_rate": 4.439253852392539e-06, "loss": 0.5784, "step": 27409 }, { "epoch": 0.8002686052961957, "grad_norm": 0.6413583201394107, "learning_rate": 4.438605028386051e-06, "loss": 0.5871, "step": 27410 }, { "epoch": 0.8002978015240431, "grad_norm": 0.6094483601537954, "learning_rate": 4.437956204379563e-06, "loss": 0.531, "step": 27411 }, { "epoch": 0.8003269977518904, "grad_norm": 0.6614940025994764, "learning_rate": 4.437307380373074e-06, "loss": 0.5819, "step": 27412 }, { "epoch": 0.8003561939797378, "grad_norm": 0.649331827609462, "learning_rate": 4.436658556366586e-06, "loss": 0.5871, "step": 27413 }, { "epoch": 0.8003853902075851, "grad_norm": 0.6607946156065284, "learning_rate": 4.436009732360098e-06, "loss": 0.6273, "step": 27414 }, { "epoch": 0.8004145864354325, "grad_norm": 0.668436661719877, "learning_rate": 4.435360908353609e-06, "loss": 0.6549, "step": 27415 }, { "epoch": 0.8004437826632799, "grad_norm": 0.6686688031545127, "learning_rate": 4.434712084347121e-06, "loss": 0.6482, "step": 27416 }, { "epoch": 0.8004729788911272, "grad_norm": 0.737476133666616, "learning_rate": 4.434063260340633e-06, "loss": 0.738, "step": 27417 }, { "epoch": 0.8005021751189746, "grad_norm": 0.6685795172106823, "learning_rate": 4.433414436334144e-06, "loss": 0.5724, "step": 27418 }, { "epoch": 0.800531371346822, "grad_norm": 0.6230262862275301, "learning_rate": 4.432765612327656e-06, "loss": 0.5213, "step": 27419 }, { "epoch": 0.8005605675746693, "grad_norm": 0.6146497520885287, "learning_rate": 4.432116788321168e-06, "loss": 0.5672, "step": 27420 }, { "epoch": 0.8005897638025167, "grad_norm": 0.6868687686429685, "learning_rate": 4.4314679643146805e-06, "loss": 0.62, "step": 27421 }, { "epoch": 0.800618960030364, "grad_norm": 0.5999532173678823, "learning_rate": 4.430819140308192e-06, "loss": 0.5306, "step": 27422 }, { "epoch": 0.8006481562582114, "grad_norm": 0.5816309671082621, "learning_rate": 4.430170316301704e-06, "loss": 0.5129, "step": 27423 }, { "epoch": 0.8006773524860588, "grad_norm": 0.6630713316712757, "learning_rate": 4.429521492295215e-06, "loss": 0.5275, "step": 27424 }, { "epoch": 0.8007065487139061, "grad_norm": 0.6237008243209471, "learning_rate": 4.428872668288728e-06, "loss": 0.5275, "step": 27425 }, { "epoch": 0.8007357449417535, "grad_norm": 0.5738634132787117, "learning_rate": 4.428223844282239e-06, "loss": 0.4943, "step": 27426 }, { "epoch": 0.8007649411696008, "grad_norm": 0.593078306511267, "learning_rate": 4.427575020275751e-06, "loss": 0.5192, "step": 27427 }, { "epoch": 0.8007941373974482, "grad_norm": 0.6692743702794974, "learning_rate": 4.426926196269262e-06, "loss": 0.5881, "step": 27428 }, { "epoch": 0.8008233336252956, "grad_norm": 0.6955512920959638, "learning_rate": 4.426277372262774e-06, "loss": 0.5838, "step": 27429 }, { "epoch": 0.8008525298531429, "grad_norm": 0.6684953381178164, "learning_rate": 4.425628548256286e-06, "loss": 0.688, "step": 27430 }, { "epoch": 0.8008817260809903, "grad_norm": 0.6689249430072836, "learning_rate": 4.424979724249797e-06, "loss": 0.6461, "step": 27431 }, { "epoch": 0.8009109223088376, "grad_norm": 0.6596911789258673, "learning_rate": 4.424330900243309e-06, "loss": 0.5623, "step": 27432 }, { "epoch": 0.800940118536685, "grad_norm": 0.694018075138082, "learning_rate": 4.423682076236821e-06, "loss": 0.6944, "step": 27433 }, { "epoch": 0.8009693147645324, "grad_norm": 0.6323257752550285, "learning_rate": 4.4230332522303326e-06, "loss": 0.5795, "step": 27434 }, { "epoch": 0.8009985109923797, "grad_norm": 0.6353760237094189, "learning_rate": 4.422384428223845e-06, "loss": 0.5802, "step": 27435 }, { "epoch": 0.8010277072202271, "grad_norm": 0.6209471536807524, "learning_rate": 4.421735604217357e-06, "loss": 0.5715, "step": 27436 }, { "epoch": 0.8010569034480746, "grad_norm": 0.6106191018376632, "learning_rate": 4.421086780210868e-06, "loss": 0.5352, "step": 27437 }, { "epoch": 0.8010860996759219, "grad_norm": 0.6697352121108858, "learning_rate": 4.42043795620438e-06, "loss": 0.6016, "step": 27438 }, { "epoch": 0.8011152959037693, "grad_norm": 0.6812627388682108, "learning_rate": 4.419789132197891e-06, "loss": 0.6012, "step": 27439 }, { "epoch": 0.8011444921316166, "grad_norm": 0.6605446296418228, "learning_rate": 4.419140308191404e-06, "loss": 0.6476, "step": 27440 }, { "epoch": 0.801173688359464, "grad_norm": 0.6666138445802439, "learning_rate": 4.418491484184915e-06, "loss": 0.6512, "step": 27441 }, { "epoch": 0.8012028845873114, "grad_norm": 0.6370053371743231, "learning_rate": 4.417842660178427e-06, "loss": 0.6141, "step": 27442 }, { "epoch": 0.8012320808151587, "grad_norm": 0.5795662783195088, "learning_rate": 4.417193836171938e-06, "loss": 0.5167, "step": 27443 }, { "epoch": 0.8012612770430061, "grad_norm": 0.667031696811168, "learning_rate": 4.41654501216545e-06, "loss": 0.6364, "step": 27444 }, { "epoch": 0.8012904732708535, "grad_norm": 0.6143266589078465, "learning_rate": 4.415896188158962e-06, "loss": 0.5704, "step": 27445 }, { "epoch": 0.8013196694987008, "grad_norm": 0.6679042135697715, "learning_rate": 4.415247364152474e-06, "loss": 0.663, "step": 27446 }, { "epoch": 0.8013488657265482, "grad_norm": 0.6214170691520108, "learning_rate": 4.4145985401459855e-06, "loss": 0.545, "step": 27447 }, { "epoch": 0.8013780619543955, "grad_norm": 0.584304273533081, "learning_rate": 4.4139497161394976e-06, "loss": 0.4804, "step": 27448 }, { "epoch": 0.8014072581822429, "grad_norm": 0.659710376605613, "learning_rate": 4.41330089213301e-06, "loss": 0.6177, "step": 27449 }, { "epoch": 0.8014364544100903, "grad_norm": 0.6354123752112345, "learning_rate": 4.412652068126521e-06, "loss": 0.5313, "step": 27450 }, { "epoch": 0.8014656506379376, "grad_norm": 0.7277270100198262, "learning_rate": 4.412003244120033e-06, "loss": 0.6783, "step": 27451 }, { "epoch": 0.801494846865785, "grad_norm": 0.6876122523623674, "learning_rate": 4.411354420113545e-06, "loss": 0.6137, "step": 27452 }, { "epoch": 0.8015240430936323, "grad_norm": 0.5987692573808129, "learning_rate": 4.410705596107056e-06, "loss": 0.5329, "step": 27453 }, { "epoch": 0.8015532393214797, "grad_norm": 0.5894450883389438, "learning_rate": 4.410056772100568e-06, "loss": 0.4979, "step": 27454 }, { "epoch": 0.8015824355493271, "grad_norm": 0.5911832403825168, "learning_rate": 4.40940794809408e-06, "loss": 0.5224, "step": 27455 }, { "epoch": 0.8016116317771744, "grad_norm": 0.6093783240172375, "learning_rate": 4.408759124087591e-06, "loss": 0.5543, "step": 27456 }, { "epoch": 0.8016408280050218, "grad_norm": 0.6468094934157382, "learning_rate": 4.408110300081103e-06, "loss": 0.619, "step": 27457 }, { "epoch": 0.8016700242328691, "grad_norm": 0.5984573793817588, "learning_rate": 4.407461476074615e-06, "loss": 0.5357, "step": 27458 }, { "epoch": 0.8016992204607165, "grad_norm": 0.6546395706516815, "learning_rate": 4.406812652068127e-06, "loss": 0.6105, "step": 27459 }, { "epoch": 0.8017284166885639, "grad_norm": 0.6788900319976604, "learning_rate": 4.4061638280616385e-06, "loss": 0.6744, "step": 27460 }, { "epoch": 0.8017576129164112, "grad_norm": 0.6631120317313305, "learning_rate": 4.4055150040551505e-06, "loss": 0.6443, "step": 27461 }, { "epoch": 0.8017868091442586, "grad_norm": 0.6043535682291654, "learning_rate": 4.404866180048662e-06, "loss": 0.5317, "step": 27462 }, { "epoch": 0.801816005372106, "grad_norm": 0.612296951101576, "learning_rate": 4.404217356042174e-06, "loss": 0.5708, "step": 27463 }, { "epoch": 0.8018452015999533, "grad_norm": 0.6701961610234342, "learning_rate": 4.403568532035686e-06, "loss": 0.559, "step": 27464 }, { "epoch": 0.8018743978278007, "grad_norm": 0.6227063941364317, "learning_rate": 4.402919708029198e-06, "loss": 0.5928, "step": 27465 }, { "epoch": 0.801903594055648, "grad_norm": 0.6617433532746548, "learning_rate": 4.402270884022709e-06, "loss": 0.6186, "step": 27466 }, { "epoch": 0.8019327902834954, "grad_norm": 0.6379112171752533, "learning_rate": 4.401622060016221e-06, "loss": 0.5421, "step": 27467 }, { "epoch": 0.8019619865113428, "grad_norm": 0.7224779005433384, "learning_rate": 4.400973236009733e-06, "loss": 0.6229, "step": 27468 }, { "epoch": 0.8019911827391901, "grad_norm": 0.707566714353056, "learning_rate": 4.400324412003244e-06, "loss": 0.66, "step": 27469 }, { "epoch": 0.8020203789670375, "grad_norm": 0.6810225122178162, "learning_rate": 4.399675587996756e-06, "loss": 0.6491, "step": 27470 }, { "epoch": 0.8020495751948848, "grad_norm": 0.6856819206566585, "learning_rate": 4.399026763990268e-06, "loss": 0.6984, "step": 27471 }, { "epoch": 0.8020787714227322, "grad_norm": 0.6919686340023575, "learning_rate": 4.3983779399837794e-06, "loss": 0.6616, "step": 27472 }, { "epoch": 0.8021079676505796, "grad_norm": 0.6214572124838855, "learning_rate": 4.3977291159772915e-06, "loss": 0.5521, "step": 27473 }, { "epoch": 0.8021371638784269, "grad_norm": 0.6549323935435746, "learning_rate": 4.3970802919708035e-06, "loss": 0.5876, "step": 27474 }, { "epoch": 0.8021663601062743, "grad_norm": 0.6399198539747798, "learning_rate": 4.396431467964315e-06, "loss": 0.5387, "step": 27475 }, { "epoch": 0.8021955563341217, "grad_norm": 0.6418627972776543, "learning_rate": 4.395782643957827e-06, "loss": 0.5696, "step": 27476 }, { "epoch": 0.802224752561969, "grad_norm": 0.6368150572683913, "learning_rate": 4.395133819951339e-06, "loss": 0.5697, "step": 27477 }, { "epoch": 0.8022539487898164, "grad_norm": 0.6138456135238085, "learning_rate": 4.394484995944851e-06, "loss": 0.532, "step": 27478 }, { "epoch": 0.8022831450176637, "grad_norm": 0.6390532693734593, "learning_rate": 4.393836171938362e-06, "loss": 0.5858, "step": 27479 }, { "epoch": 0.8023123412455111, "grad_norm": 0.6234376944770391, "learning_rate": 4.393187347931874e-06, "loss": 0.5813, "step": 27480 }, { "epoch": 0.8023415374733585, "grad_norm": 0.6424847147921645, "learning_rate": 4.392538523925385e-06, "loss": 0.5771, "step": 27481 }, { "epoch": 0.8023707337012058, "grad_norm": 0.7232397226732119, "learning_rate": 4.391889699918897e-06, "loss": 0.685, "step": 27482 }, { "epoch": 0.8023999299290532, "grad_norm": 0.5999625752225485, "learning_rate": 4.391240875912409e-06, "loss": 0.5588, "step": 27483 }, { "epoch": 0.8024291261569005, "grad_norm": 0.6120514916467297, "learning_rate": 4.390592051905921e-06, "loss": 0.5643, "step": 27484 }, { "epoch": 0.8024583223847479, "grad_norm": 0.6459929640656852, "learning_rate": 4.389943227899432e-06, "loss": 0.6179, "step": 27485 }, { "epoch": 0.8024875186125953, "grad_norm": 0.6189855376990305, "learning_rate": 4.3892944038929444e-06, "loss": 0.5197, "step": 27486 }, { "epoch": 0.8025167148404426, "grad_norm": 0.6568091275138859, "learning_rate": 4.3886455798864565e-06, "loss": 0.6252, "step": 27487 }, { "epoch": 0.80254591106829, "grad_norm": 0.6614927892574384, "learning_rate": 4.387996755879968e-06, "loss": 0.6383, "step": 27488 }, { "epoch": 0.8025751072961373, "grad_norm": 0.9228051906295137, "learning_rate": 4.38734793187348e-06, "loss": 0.7548, "step": 27489 }, { "epoch": 0.8026043035239847, "grad_norm": 0.6754476499120153, "learning_rate": 4.386699107866991e-06, "loss": 0.6245, "step": 27490 }, { "epoch": 0.8026334997518321, "grad_norm": 0.6469657261541252, "learning_rate": 4.386050283860504e-06, "loss": 0.608, "step": 27491 }, { "epoch": 0.8026626959796794, "grad_norm": 0.6111561121220621, "learning_rate": 4.385401459854015e-06, "loss": 0.5538, "step": 27492 }, { "epoch": 0.8026918922075268, "grad_norm": 0.5971063570980979, "learning_rate": 4.384752635847527e-06, "loss": 0.5202, "step": 27493 }, { "epoch": 0.8027210884353742, "grad_norm": 0.6124586003181893, "learning_rate": 4.384103811841038e-06, "loss": 0.5541, "step": 27494 }, { "epoch": 0.8027502846632215, "grad_norm": 0.6810127284953221, "learning_rate": 4.38345498783455e-06, "loss": 0.6218, "step": 27495 }, { "epoch": 0.8027794808910689, "grad_norm": 0.685655961131154, "learning_rate": 4.382806163828062e-06, "loss": 0.6547, "step": 27496 }, { "epoch": 0.8028086771189162, "grad_norm": 0.6497853661955949, "learning_rate": 4.382157339821574e-06, "loss": 0.6266, "step": 27497 }, { "epoch": 0.8028378733467636, "grad_norm": 0.6762056873738903, "learning_rate": 4.381508515815085e-06, "loss": 0.6718, "step": 27498 }, { "epoch": 0.802867069574611, "grad_norm": 0.604990109005014, "learning_rate": 4.380859691808597e-06, "loss": 0.5338, "step": 27499 }, { "epoch": 0.8028962658024583, "grad_norm": 0.6412850794316883, "learning_rate": 4.380210867802109e-06, "loss": 0.5748, "step": 27500 }, { "epoch": 0.8029254620303057, "grad_norm": 0.639473303417957, "learning_rate": 4.379562043795621e-06, "loss": 0.5632, "step": 27501 }, { "epoch": 0.802954658258153, "grad_norm": 0.6401554507894481, "learning_rate": 4.378913219789133e-06, "loss": 0.5966, "step": 27502 }, { "epoch": 0.8029838544860004, "grad_norm": 0.6574366282936174, "learning_rate": 4.378264395782645e-06, "loss": 0.5855, "step": 27503 }, { "epoch": 0.8030130507138478, "grad_norm": 0.6687658215681762, "learning_rate": 4.377615571776156e-06, "loss": 0.6246, "step": 27504 }, { "epoch": 0.8030422469416951, "grad_norm": 0.62479944172064, "learning_rate": 4.376966747769668e-06, "loss": 0.5449, "step": 27505 }, { "epoch": 0.8030714431695425, "grad_norm": 0.635876304426678, "learning_rate": 4.37631792376318e-06, "loss": 0.5668, "step": 27506 }, { "epoch": 0.8031006393973898, "grad_norm": 0.6405572400707439, "learning_rate": 4.375669099756691e-06, "loss": 0.6298, "step": 27507 }, { "epoch": 0.8031298356252372, "grad_norm": 0.6722124301446121, "learning_rate": 4.375020275750203e-06, "loss": 0.5751, "step": 27508 }, { "epoch": 0.8031590318530846, "grad_norm": 0.6457678236684984, "learning_rate": 4.374371451743714e-06, "loss": 0.6162, "step": 27509 }, { "epoch": 0.8031882280809319, "grad_norm": 0.6411472152753991, "learning_rate": 4.373722627737227e-06, "loss": 0.6121, "step": 27510 }, { "epoch": 0.8032174243087793, "grad_norm": 0.64629638969481, "learning_rate": 4.373073803730738e-06, "loss": 0.5776, "step": 27511 }, { "epoch": 0.8032466205366267, "grad_norm": 0.6954281422296358, "learning_rate": 4.37242497972425e-06, "loss": 0.6691, "step": 27512 }, { "epoch": 0.803275816764474, "grad_norm": 0.6497938754272586, "learning_rate": 4.3717761557177615e-06, "loss": 0.6297, "step": 27513 }, { "epoch": 0.8033050129923214, "grad_norm": 0.6466895635965437, "learning_rate": 4.3711273317112736e-06, "loss": 0.6465, "step": 27514 }, { "epoch": 0.8033342092201687, "grad_norm": 0.6511065344827647, "learning_rate": 4.370478507704786e-06, "loss": 0.6315, "step": 27515 }, { "epoch": 0.8033634054480161, "grad_norm": 0.6595056988655056, "learning_rate": 4.369829683698298e-06, "loss": 0.629, "step": 27516 }, { "epoch": 0.8033926016758635, "grad_norm": 0.6666093575425714, "learning_rate": 4.369180859691809e-06, "loss": 0.686, "step": 27517 }, { "epoch": 0.8034217979037108, "grad_norm": 0.651460139475347, "learning_rate": 4.368532035685321e-06, "loss": 0.617, "step": 27518 }, { "epoch": 0.8034509941315582, "grad_norm": 0.6847550650104097, "learning_rate": 4.367883211678832e-06, "loss": 0.6752, "step": 27519 }, { "epoch": 0.8034801903594055, "grad_norm": 0.6235967504617369, "learning_rate": 4.367234387672344e-06, "loss": 0.5944, "step": 27520 }, { "epoch": 0.8035093865872529, "grad_norm": 0.6523300039229714, "learning_rate": 4.366585563665856e-06, "loss": 0.6283, "step": 27521 }, { "epoch": 0.8035385828151003, "grad_norm": 0.6274718697742303, "learning_rate": 4.365936739659368e-06, "loss": 0.566, "step": 27522 }, { "epoch": 0.8035677790429476, "grad_norm": 0.6275379988409034, "learning_rate": 4.365287915652879e-06, "loss": 0.611, "step": 27523 }, { "epoch": 0.803596975270795, "grad_norm": 0.6616631733539917, "learning_rate": 4.364639091646391e-06, "loss": 0.606, "step": 27524 }, { "epoch": 0.8036261714986423, "grad_norm": 0.6647228689547043, "learning_rate": 4.363990267639903e-06, "loss": 0.6267, "step": 27525 }, { "epoch": 0.8036553677264897, "grad_norm": 0.6138660155891495, "learning_rate": 4.3633414436334145e-06, "loss": 0.5798, "step": 27526 }, { "epoch": 0.8036845639543371, "grad_norm": 0.6531503380619612, "learning_rate": 4.3626926196269265e-06, "loss": 0.6447, "step": 27527 }, { "epoch": 0.8037137601821844, "grad_norm": 0.6798286395794404, "learning_rate": 4.362043795620438e-06, "loss": 0.6258, "step": 27528 }, { "epoch": 0.8037429564100318, "grad_norm": 0.650846632052367, "learning_rate": 4.361394971613951e-06, "loss": 0.5705, "step": 27529 }, { "epoch": 0.8037721526378792, "grad_norm": 0.5982322770195113, "learning_rate": 4.360746147607462e-06, "loss": 0.5418, "step": 27530 }, { "epoch": 0.8038013488657265, "grad_norm": 0.6941302663930533, "learning_rate": 4.360097323600974e-06, "loss": 0.6604, "step": 27531 }, { "epoch": 0.8038305450935739, "grad_norm": 0.6671614263533852, "learning_rate": 4.359448499594485e-06, "loss": 0.6238, "step": 27532 }, { "epoch": 0.8038597413214212, "grad_norm": 0.6281053120982949, "learning_rate": 4.358799675587997e-06, "loss": 0.5418, "step": 27533 }, { "epoch": 0.8038889375492686, "grad_norm": 0.6512548379613797, "learning_rate": 4.358150851581509e-06, "loss": 0.6117, "step": 27534 }, { "epoch": 0.803918133777116, "grad_norm": 0.6988073017440765, "learning_rate": 4.357502027575021e-06, "loss": 0.7286, "step": 27535 }, { "epoch": 0.8039473300049633, "grad_norm": 0.6640367488434917, "learning_rate": 4.356853203568532e-06, "loss": 0.6403, "step": 27536 }, { "epoch": 0.8039765262328107, "grad_norm": 0.6433591754098095, "learning_rate": 4.356204379562044e-06, "loss": 0.5281, "step": 27537 }, { "epoch": 0.804005722460658, "grad_norm": 0.6089468534086709, "learning_rate": 4.3555555555555555e-06, "loss": 0.5581, "step": 27538 }, { "epoch": 0.8040349186885054, "grad_norm": 0.6302490658521795, "learning_rate": 4.3549067315490675e-06, "loss": 0.555, "step": 27539 }, { "epoch": 0.8040641149163528, "grad_norm": 0.6796620180411642, "learning_rate": 4.3542579075425795e-06, "loss": 0.6939, "step": 27540 }, { "epoch": 0.8040933111442001, "grad_norm": 0.6763923433756677, "learning_rate": 4.3536090835360915e-06, "loss": 0.6172, "step": 27541 }, { "epoch": 0.8041225073720475, "grad_norm": 0.6576051332614011, "learning_rate": 4.352960259529603e-06, "loss": 0.6312, "step": 27542 }, { "epoch": 0.8041517035998949, "grad_norm": 0.6571784837431496, "learning_rate": 4.352311435523115e-06, "loss": 0.5859, "step": 27543 }, { "epoch": 0.8041808998277422, "grad_norm": 0.6291809724243018, "learning_rate": 4.351662611516627e-06, "loss": 0.5823, "step": 27544 }, { "epoch": 0.8042100960555896, "grad_norm": 0.6417213695032221, "learning_rate": 4.351013787510138e-06, "loss": 0.6085, "step": 27545 }, { "epoch": 0.8042392922834369, "grad_norm": 0.6365338243680654, "learning_rate": 4.35036496350365e-06, "loss": 0.6205, "step": 27546 }, { "epoch": 0.8042684885112843, "grad_norm": 0.6523481918387131, "learning_rate": 4.349716139497161e-06, "loss": 0.5529, "step": 27547 }, { "epoch": 0.8042976847391317, "grad_norm": 0.6494945636171398, "learning_rate": 4.349067315490674e-06, "loss": 0.5904, "step": 27548 }, { "epoch": 0.804326880966979, "grad_norm": 0.6787519716560834, "learning_rate": 4.348418491484185e-06, "loss": 0.6568, "step": 27549 }, { "epoch": 0.8043560771948264, "grad_norm": 0.6613706242885892, "learning_rate": 4.347769667477697e-06, "loss": 0.6291, "step": 27550 }, { "epoch": 0.8043852734226737, "grad_norm": 0.652747267168668, "learning_rate": 4.347120843471208e-06, "loss": 0.6022, "step": 27551 }, { "epoch": 0.8044144696505211, "grad_norm": 0.6546666106650434, "learning_rate": 4.3464720194647204e-06, "loss": 0.6362, "step": 27552 }, { "epoch": 0.8044436658783685, "grad_norm": 0.6337292012391433, "learning_rate": 4.3458231954582325e-06, "loss": 0.5733, "step": 27553 }, { "epoch": 0.8044728621062158, "grad_norm": 0.5886594911488797, "learning_rate": 4.3451743714517445e-06, "loss": 0.5513, "step": 27554 }, { "epoch": 0.8045020583340632, "grad_norm": 0.6323504386844777, "learning_rate": 4.344525547445256e-06, "loss": 0.5851, "step": 27555 }, { "epoch": 0.8045312545619105, "grad_norm": 0.6836852434173142, "learning_rate": 4.343876723438768e-06, "loss": 0.6411, "step": 27556 }, { "epoch": 0.8045604507897579, "grad_norm": 0.7136417441318041, "learning_rate": 4.34322789943228e-06, "loss": 0.6741, "step": 27557 }, { "epoch": 0.8045896470176054, "grad_norm": 0.6427278367552693, "learning_rate": 4.342579075425791e-06, "loss": 0.5959, "step": 27558 }, { "epoch": 0.8046188432454527, "grad_norm": 0.6740925638300705, "learning_rate": 4.341930251419303e-06, "loss": 0.627, "step": 27559 }, { "epoch": 0.8046480394733001, "grad_norm": 0.6583940793901288, "learning_rate": 4.341281427412815e-06, "loss": 0.597, "step": 27560 }, { "epoch": 0.8046772357011475, "grad_norm": 0.6834803281116619, "learning_rate": 4.340632603406326e-06, "loss": 0.6294, "step": 27561 }, { "epoch": 0.8047064319289948, "grad_norm": 0.6570003906893451, "learning_rate": 4.339983779399838e-06, "loss": 0.6291, "step": 27562 }, { "epoch": 0.8047356281568422, "grad_norm": 0.6443547289197052, "learning_rate": 4.33933495539335e-06, "loss": 0.5759, "step": 27563 }, { "epoch": 0.8047648243846895, "grad_norm": 0.6579847011791742, "learning_rate": 4.338686131386861e-06, "loss": 0.5938, "step": 27564 }, { "epoch": 0.8047940206125369, "grad_norm": 0.6199835800401426, "learning_rate": 4.338037307380373e-06, "loss": 0.5605, "step": 27565 }, { "epoch": 0.8048232168403843, "grad_norm": 0.6555326611444823, "learning_rate": 4.337388483373885e-06, "loss": 0.593, "step": 27566 }, { "epoch": 0.8048524130682316, "grad_norm": 0.6404263042171388, "learning_rate": 4.3367396593673975e-06, "loss": 0.6001, "step": 27567 }, { "epoch": 0.804881609296079, "grad_norm": 0.614252033136751, "learning_rate": 4.336090835360909e-06, "loss": 0.5798, "step": 27568 }, { "epoch": 0.8049108055239264, "grad_norm": 0.6634844177492719, "learning_rate": 4.335442011354421e-06, "loss": 0.6228, "step": 27569 }, { "epoch": 0.8049400017517737, "grad_norm": 0.6282188498206256, "learning_rate": 4.334793187347932e-06, "loss": 0.5744, "step": 27570 }, { "epoch": 0.8049691979796211, "grad_norm": 0.6533938078109467, "learning_rate": 4.334144363341444e-06, "loss": 0.6136, "step": 27571 }, { "epoch": 0.8049983942074684, "grad_norm": 0.643502251013652, "learning_rate": 4.333495539334956e-06, "loss": 0.5954, "step": 27572 }, { "epoch": 0.8050275904353158, "grad_norm": 0.6411907093828756, "learning_rate": 4.332846715328468e-06, "loss": 0.559, "step": 27573 }, { "epoch": 0.8050567866631632, "grad_norm": 0.6251305176176717, "learning_rate": 4.332197891321979e-06, "loss": 0.537, "step": 27574 }, { "epoch": 0.8050859828910105, "grad_norm": 0.6656911608222216, "learning_rate": 4.331549067315491e-06, "loss": 0.6028, "step": 27575 }, { "epoch": 0.8051151791188579, "grad_norm": 0.6788685150629417, "learning_rate": 4.330900243309003e-06, "loss": 0.6638, "step": 27576 }, { "epoch": 0.8051443753467052, "grad_norm": 0.6351077866684597, "learning_rate": 4.330251419302514e-06, "loss": 0.5968, "step": 27577 }, { "epoch": 0.8051735715745526, "grad_norm": 0.6669430559547207, "learning_rate": 4.329602595296026e-06, "loss": 0.6339, "step": 27578 }, { "epoch": 0.8052027678024, "grad_norm": 0.6140684858385642, "learning_rate": 4.3289537712895376e-06, "loss": 0.5552, "step": 27579 }, { "epoch": 0.8052319640302473, "grad_norm": 0.669689052368616, "learning_rate": 4.32830494728305e-06, "loss": 0.6599, "step": 27580 }, { "epoch": 0.8052611602580947, "grad_norm": 0.7281867156333421, "learning_rate": 4.327656123276562e-06, "loss": 0.7507, "step": 27581 }, { "epoch": 0.805290356485942, "grad_norm": 0.6436319027329208, "learning_rate": 4.327007299270074e-06, "loss": 0.599, "step": 27582 }, { "epoch": 0.8053195527137894, "grad_norm": 0.6460687086836101, "learning_rate": 4.326358475263585e-06, "loss": 0.6186, "step": 27583 }, { "epoch": 0.8053487489416368, "grad_norm": 0.6173704525411787, "learning_rate": 4.325709651257097e-06, "loss": 0.6004, "step": 27584 }, { "epoch": 0.8053779451694841, "grad_norm": 0.6630694856247578, "learning_rate": 4.325060827250608e-06, "loss": 0.5838, "step": 27585 }, { "epoch": 0.8054071413973315, "grad_norm": 0.585400048355371, "learning_rate": 4.324412003244121e-06, "loss": 0.5214, "step": 27586 }, { "epoch": 0.8054363376251789, "grad_norm": 0.6532102882295235, "learning_rate": 4.323763179237632e-06, "loss": 0.569, "step": 27587 }, { "epoch": 0.8054655338530262, "grad_norm": 0.6853684148452541, "learning_rate": 4.323114355231144e-06, "loss": 0.6148, "step": 27588 }, { "epoch": 0.8054947300808736, "grad_norm": 0.6323740361080308, "learning_rate": 4.322465531224655e-06, "loss": 0.5927, "step": 27589 }, { "epoch": 0.8055239263087209, "grad_norm": 0.6226914488608828, "learning_rate": 4.321816707218167e-06, "loss": 0.5402, "step": 27590 }, { "epoch": 0.8055531225365683, "grad_norm": 0.6277003862453786, "learning_rate": 4.321167883211679e-06, "loss": 0.5856, "step": 27591 }, { "epoch": 0.8055823187644157, "grad_norm": 0.6481402538186355, "learning_rate": 4.320519059205191e-06, "loss": 0.6192, "step": 27592 }, { "epoch": 0.805611514992263, "grad_norm": 0.6500903877242631, "learning_rate": 4.3198702351987025e-06, "loss": 0.6155, "step": 27593 }, { "epoch": 0.8056407112201104, "grad_norm": 0.6290119232724661, "learning_rate": 4.3192214111922146e-06, "loss": 0.5266, "step": 27594 }, { "epoch": 0.8056699074479577, "grad_norm": 0.6139797323099855, "learning_rate": 4.318572587185727e-06, "loss": 0.5576, "step": 27595 }, { "epoch": 0.8056991036758051, "grad_norm": 0.6831057035632365, "learning_rate": 4.317923763179238e-06, "loss": 0.6657, "step": 27596 }, { "epoch": 0.8057282999036525, "grad_norm": 0.6264174647220416, "learning_rate": 4.31727493917275e-06, "loss": 0.5814, "step": 27597 }, { "epoch": 0.8057574961314998, "grad_norm": 0.6282710245189805, "learning_rate": 4.316626115166261e-06, "loss": 0.5779, "step": 27598 }, { "epoch": 0.8057866923593472, "grad_norm": 0.6487322828793194, "learning_rate": 4.315977291159773e-06, "loss": 0.5948, "step": 27599 }, { "epoch": 0.8058158885871946, "grad_norm": 0.6060750996013572, "learning_rate": 4.315328467153285e-06, "loss": 0.5567, "step": 27600 }, { "epoch": 0.8058450848150419, "grad_norm": 0.6305109084895169, "learning_rate": 4.314679643146797e-06, "loss": 0.5533, "step": 27601 }, { "epoch": 0.8058742810428893, "grad_norm": 0.6300351125629562, "learning_rate": 4.314030819140308e-06, "loss": 0.5495, "step": 27602 }, { "epoch": 0.8059034772707366, "grad_norm": 0.6668439749381686, "learning_rate": 4.31338199513382e-06, "loss": 0.6664, "step": 27603 }, { "epoch": 0.805932673498584, "grad_norm": 0.6336467163556124, "learning_rate": 4.3127331711273315e-06, "loss": 0.5741, "step": 27604 }, { "epoch": 0.8059618697264314, "grad_norm": 0.5748163720044296, "learning_rate": 4.312084347120844e-06, "loss": 0.4813, "step": 27605 }, { "epoch": 0.8059910659542787, "grad_norm": 0.6961140083916709, "learning_rate": 4.3114355231143555e-06, "loss": 0.7037, "step": 27606 }, { "epoch": 0.8060202621821261, "grad_norm": 0.6331690428370217, "learning_rate": 4.3107866991078675e-06, "loss": 0.5941, "step": 27607 }, { "epoch": 0.8060494584099734, "grad_norm": 0.610302252622444, "learning_rate": 4.310137875101379e-06, "loss": 0.5512, "step": 27608 }, { "epoch": 0.8060786546378208, "grad_norm": 0.6615661597572273, "learning_rate": 4.309489051094891e-06, "loss": 0.62, "step": 27609 }, { "epoch": 0.8061078508656682, "grad_norm": 0.7111123837347583, "learning_rate": 4.308840227088403e-06, "loss": 0.6737, "step": 27610 }, { "epoch": 0.8061370470935155, "grad_norm": 0.6381559887340057, "learning_rate": 4.308191403081915e-06, "loss": 0.6178, "step": 27611 }, { "epoch": 0.8061662433213629, "grad_norm": 0.6230966862620625, "learning_rate": 4.307542579075426e-06, "loss": 0.5643, "step": 27612 }, { "epoch": 0.8061954395492102, "grad_norm": 0.6597625483765486, "learning_rate": 4.306893755068938e-06, "loss": 0.6218, "step": 27613 }, { "epoch": 0.8062246357770576, "grad_norm": 0.5986991851599116, "learning_rate": 4.30624493106245e-06, "loss": 0.5535, "step": 27614 }, { "epoch": 0.806253832004905, "grad_norm": 0.6471885597657513, "learning_rate": 4.305596107055961e-06, "loss": 0.5853, "step": 27615 }, { "epoch": 0.8062830282327523, "grad_norm": 0.6236008782857898, "learning_rate": 4.304947283049473e-06, "loss": 0.5924, "step": 27616 }, { "epoch": 0.8063122244605997, "grad_norm": 0.6774286645367708, "learning_rate": 4.3042984590429844e-06, "loss": 0.6217, "step": 27617 }, { "epoch": 0.806341420688447, "grad_norm": 0.6899998158802917, "learning_rate": 4.3036496350364965e-06, "loss": 0.6968, "step": 27618 }, { "epoch": 0.8063706169162944, "grad_norm": 0.6557053420786874, "learning_rate": 4.3030008110300085e-06, "loss": 0.6076, "step": 27619 }, { "epoch": 0.8063998131441418, "grad_norm": 0.6280788485823112, "learning_rate": 4.3023519870235205e-06, "loss": 0.5994, "step": 27620 }, { "epoch": 0.8064290093719891, "grad_norm": 0.6392816671029813, "learning_rate": 4.301703163017032e-06, "loss": 0.5783, "step": 27621 }, { "epoch": 0.8064582055998365, "grad_norm": 0.5931324189349608, "learning_rate": 4.301054339010544e-06, "loss": 0.5307, "step": 27622 }, { "epoch": 0.8064874018276839, "grad_norm": 0.6999211668073023, "learning_rate": 4.300405515004055e-06, "loss": 0.6987, "step": 27623 }, { "epoch": 0.8065165980555312, "grad_norm": 0.6174633986429591, "learning_rate": 4.299756690997568e-06, "loss": 0.549, "step": 27624 }, { "epoch": 0.8065457942833786, "grad_norm": 0.6199954438701509, "learning_rate": 4.299107866991079e-06, "loss": 0.549, "step": 27625 }, { "epoch": 0.8065749905112259, "grad_norm": 0.63001358072855, "learning_rate": 4.298459042984591e-06, "loss": 0.595, "step": 27626 }, { "epoch": 0.8066041867390733, "grad_norm": 0.6292785934277855, "learning_rate": 4.297810218978102e-06, "loss": 0.5798, "step": 27627 }, { "epoch": 0.8066333829669207, "grad_norm": 0.6639066045479254, "learning_rate": 4.297161394971614e-06, "loss": 0.6379, "step": 27628 }, { "epoch": 0.806662579194768, "grad_norm": 0.6676933533372652, "learning_rate": 4.296512570965126e-06, "loss": 0.6242, "step": 27629 }, { "epoch": 0.8066917754226154, "grad_norm": 0.7054347247430434, "learning_rate": 4.295863746958638e-06, "loss": 0.6568, "step": 27630 }, { "epoch": 0.8067209716504627, "grad_norm": 0.5895366068004874, "learning_rate": 4.295214922952149e-06, "loss": 0.4703, "step": 27631 }, { "epoch": 0.8067501678783101, "grad_norm": 0.6364106043413909, "learning_rate": 4.2945660989456614e-06, "loss": 0.5851, "step": 27632 }, { "epoch": 0.8067793641061575, "grad_norm": 0.6538275570966275, "learning_rate": 4.2939172749391735e-06, "loss": 0.5885, "step": 27633 }, { "epoch": 0.8068085603340048, "grad_norm": 0.6277093612791185, "learning_rate": 4.293268450932685e-06, "loss": 0.6141, "step": 27634 }, { "epoch": 0.8068377565618522, "grad_norm": 0.6242614361519997, "learning_rate": 4.292619626926197e-06, "loss": 0.5728, "step": 27635 }, { "epoch": 0.8068669527896996, "grad_norm": 0.6673719516611082, "learning_rate": 4.291970802919708e-06, "loss": 0.6345, "step": 27636 }, { "epoch": 0.8068961490175469, "grad_norm": 0.6470122012760935, "learning_rate": 4.29132197891322e-06, "loss": 0.5897, "step": 27637 }, { "epoch": 0.8069253452453943, "grad_norm": 0.6303107259535654, "learning_rate": 4.290673154906732e-06, "loss": 0.5393, "step": 27638 }, { "epoch": 0.8069545414732416, "grad_norm": 0.6606952934932916, "learning_rate": 4.290024330900244e-06, "loss": 0.6399, "step": 27639 }, { "epoch": 0.806983737701089, "grad_norm": 0.7187763951663316, "learning_rate": 4.289375506893755e-06, "loss": 0.6734, "step": 27640 }, { "epoch": 0.8070129339289364, "grad_norm": 0.6232088322384457, "learning_rate": 4.288726682887267e-06, "loss": 0.5511, "step": 27641 }, { "epoch": 0.8070421301567837, "grad_norm": 0.6173144720691401, "learning_rate": 4.288077858880779e-06, "loss": 0.569, "step": 27642 }, { "epoch": 0.8070713263846311, "grad_norm": 0.6374456873057941, "learning_rate": 4.287429034874291e-06, "loss": 0.5649, "step": 27643 }, { "epoch": 0.8071005226124784, "grad_norm": 0.6037520871110313, "learning_rate": 4.286780210867802e-06, "loss": 0.5658, "step": 27644 }, { "epoch": 0.8071297188403258, "grad_norm": 0.6186999982757864, "learning_rate": 4.286131386861314e-06, "loss": 0.5342, "step": 27645 }, { "epoch": 0.8071589150681732, "grad_norm": 0.6524638117669457, "learning_rate": 4.285482562854826e-06, "loss": 0.618, "step": 27646 }, { "epoch": 0.8071881112960205, "grad_norm": 0.6474674367386936, "learning_rate": 4.284833738848338e-06, "loss": 0.6007, "step": 27647 }, { "epoch": 0.8072173075238679, "grad_norm": 0.6269809044475674, "learning_rate": 4.28418491484185e-06, "loss": 0.5515, "step": 27648 }, { "epoch": 0.8072465037517152, "grad_norm": 0.6576232826962898, "learning_rate": 4.283536090835362e-06, "loss": 0.6196, "step": 27649 }, { "epoch": 0.8072756999795626, "grad_norm": 0.6490943961566159, "learning_rate": 4.282887266828873e-06, "loss": 0.606, "step": 27650 }, { "epoch": 0.80730489620741, "grad_norm": 0.6097240038309389, "learning_rate": 4.282238442822385e-06, "loss": 0.5205, "step": 27651 }, { "epoch": 0.8073340924352573, "grad_norm": 0.6502920326346175, "learning_rate": 4.281589618815897e-06, "loss": 0.5998, "step": 27652 }, { "epoch": 0.8073632886631047, "grad_norm": 0.6264500786350872, "learning_rate": 4.280940794809408e-06, "loss": 0.5604, "step": 27653 }, { "epoch": 0.8073924848909521, "grad_norm": 0.6452177361799546, "learning_rate": 4.28029197080292e-06, "loss": 0.6186, "step": 27654 }, { "epoch": 0.8074216811187994, "grad_norm": 0.654835602737299, "learning_rate": 4.279643146796431e-06, "loss": 0.5842, "step": 27655 }, { "epoch": 0.8074508773466468, "grad_norm": 0.6739026566730605, "learning_rate": 4.278994322789943e-06, "loss": 0.624, "step": 27656 }, { "epoch": 0.8074800735744941, "grad_norm": 0.6651481383474313, "learning_rate": 4.278345498783455e-06, "loss": 0.6351, "step": 27657 }, { "epoch": 0.8075092698023415, "grad_norm": 0.6101724773494538, "learning_rate": 4.277696674776967e-06, "loss": 0.5282, "step": 27658 }, { "epoch": 0.8075384660301889, "grad_norm": 0.6394650840171259, "learning_rate": 4.2770478507704786e-06, "loss": 0.5936, "step": 27659 }, { "epoch": 0.8075676622580362, "grad_norm": 0.6686210089856927, "learning_rate": 4.276399026763991e-06, "loss": 0.539, "step": 27660 }, { "epoch": 0.8075968584858836, "grad_norm": 0.6858398992983041, "learning_rate": 4.275750202757503e-06, "loss": 0.6789, "step": 27661 }, { "epoch": 0.807626054713731, "grad_norm": 0.6477710761218536, "learning_rate": 4.275101378751015e-06, "loss": 0.5969, "step": 27662 }, { "epoch": 0.8076552509415783, "grad_norm": 0.5782568732642793, "learning_rate": 4.274452554744526e-06, "loss": 0.4806, "step": 27663 }, { "epoch": 0.8076844471694257, "grad_norm": 0.6261382923625879, "learning_rate": 4.273803730738038e-06, "loss": 0.5452, "step": 27664 }, { "epoch": 0.807713643397273, "grad_norm": 0.6736814479383756, "learning_rate": 4.273154906731549e-06, "loss": 0.6183, "step": 27665 }, { "epoch": 0.8077428396251204, "grad_norm": 0.6734718452732849, "learning_rate": 4.272506082725061e-06, "loss": 0.647, "step": 27666 }, { "epoch": 0.8077720358529678, "grad_norm": 0.6500499642211411, "learning_rate": 4.271857258718573e-06, "loss": 0.5975, "step": 27667 }, { "epoch": 0.8078012320808151, "grad_norm": 0.6810303230354418, "learning_rate": 4.271208434712085e-06, "loss": 0.6559, "step": 27668 }, { "epoch": 0.8078304283086625, "grad_norm": 0.6462895803458377, "learning_rate": 4.270559610705596e-06, "loss": 0.5683, "step": 27669 }, { "epoch": 0.8078596245365098, "grad_norm": 0.6161965749234662, "learning_rate": 4.269910786699108e-06, "loss": 0.5527, "step": 27670 }, { "epoch": 0.8078888207643572, "grad_norm": 0.5976673587891208, "learning_rate": 4.26926196269262e-06, "loss": 0.5392, "step": 27671 }, { "epoch": 0.8079180169922046, "grad_norm": 0.6527698797953893, "learning_rate": 4.2686131386861315e-06, "loss": 0.6246, "step": 27672 }, { "epoch": 0.8079472132200519, "grad_norm": 0.6882612800062361, "learning_rate": 4.2679643146796435e-06, "loss": 0.6196, "step": 27673 }, { "epoch": 0.8079764094478993, "grad_norm": 0.6018767738636097, "learning_rate": 4.267315490673155e-06, "loss": 0.5397, "step": 27674 }, { "epoch": 0.8080056056757466, "grad_norm": 0.6302330932345933, "learning_rate": 4.266666666666668e-06, "loss": 0.5991, "step": 27675 }, { "epoch": 0.808034801903594, "grad_norm": 0.585562223513411, "learning_rate": 4.266017842660179e-06, "loss": 0.4692, "step": 27676 }, { "epoch": 0.8080639981314414, "grad_norm": 0.6577542465738813, "learning_rate": 4.265369018653691e-06, "loss": 0.6172, "step": 27677 }, { "epoch": 0.8080931943592887, "grad_norm": 0.6662198306944479, "learning_rate": 4.264720194647202e-06, "loss": 0.6052, "step": 27678 }, { "epoch": 0.8081223905871362, "grad_norm": 0.6604254490214923, "learning_rate": 4.264071370640714e-06, "loss": 0.6161, "step": 27679 }, { "epoch": 0.8081515868149836, "grad_norm": 0.6556512830731663, "learning_rate": 4.263422546634226e-06, "loss": 0.5761, "step": 27680 }, { "epoch": 0.8081807830428309, "grad_norm": 0.6529169246343797, "learning_rate": 4.262773722627738e-06, "loss": 0.6252, "step": 27681 }, { "epoch": 0.8082099792706783, "grad_norm": 0.6983871218875716, "learning_rate": 4.262124898621249e-06, "loss": 0.5949, "step": 27682 }, { "epoch": 0.8082391754985256, "grad_norm": 0.6384703052921769, "learning_rate": 4.261476074614761e-06, "loss": 0.6018, "step": 27683 }, { "epoch": 0.808268371726373, "grad_norm": 0.6276747431488359, "learning_rate": 4.2608272506082725e-06, "loss": 0.5564, "step": 27684 }, { "epoch": 0.8082975679542204, "grad_norm": 0.6982999729174586, "learning_rate": 4.2601784266017845e-06, "loss": 0.6916, "step": 27685 }, { "epoch": 0.8083267641820677, "grad_norm": 0.6355572135746184, "learning_rate": 4.2595296025952965e-06, "loss": 0.618, "step": 27686 }, { "epoch": 0.8083559604099151, "grad_norm": 0.6939456451886927, "learning_rate": 4.258880778588808e-06, "loss": 0.6356, "step": 27687 }, { "epoch": 0.8083851566377624, "grad_norm": 0.6219046367888709, "learning_rate": 4.25823195458232e-06, "loss": 0.5472, "step": 27688 }, { "epoch": 0.8084143528656098, "grad_norm": 0.5985036330459864, "learning_rate": 4.257583130575832e-06, "loss": 0.4959, "step": 27689 }, { "epoch": 0.8084435490934572, "grad_norm": 0.647980888961731, "learning_rate": 4.256934306569344e-06, "loss": 0.6326, "step": 27690 }, { "epoch": 0.8084727453213045, "grad_norm": 0.6744668543368988, "learning_rate": 4.256285482562855e-06, "loss": 0.663, "step": 27691 }, { "epoch": 0.8085019415491519, "grad_norm": 0.6609597588185036, "learning_rate": 4.255636658556367e-06, "loss": 0.6181, "step": 27692 }, { "epoch": 0.8085311377769993, "grad_norm": 0.6532092571861943, "learning_rate": 4.254987834549878e-06, "loss": 0.5855, "step": 27693 }, { "epoch": 0.8085603340048466, "grad_norm": 0.5988626629120857, "learning_rate": 4.254339010543391e-06, "loss": 0.498, "step": 27694 }, { "epoch": 0.808589530232694, "grad_norm": 0.6887658441391985, "learning_rate": 4.253690186536902e-06, "loss": 0.6347, "step": 27695 }, { "epoch": 0.8086187264605413, "grad_norm": 0.7039833440710704, "learning_rate": 4.253041362530414e-06, "loss": 0.6466, "step": 27696 }, { "epoch": 0.8086479226883887, "grad_norm": 0.5886676457108811, "learning_rate": 4.2523925385239254e-06, "loss": 0.5343, "step": 27697 }, { "epoch": 0.8086771189162361, "grad_norm": 0.7247247532071092, "learning_rate": 4.2517437145174375e-06, "loss": 0.6612, "step": 27698 }, { "epoch": 0.8087063151440834, "grad_norm": 0.6642260310478689, "learning_rate": 4.2510948905109495e-06, "loss": 0.6413, "step": 27699 }, { "epoch": 0.8087355113719308, "grad_norm": 0.6475111557395737, "learning_rate": 4.2504460665044615e-06, "loss": 0.5777, "step": 27700 }, { "epoch": 0.8087647075997781, "grad_norm": 0.6761514763328338, "learning_rate": 4.249797242497973e-06, "loss": 0.6473, "step": 27701 }, { "epoch": 0.8087939038276255, "grad_norm": 0.6270094070131679, "learning_rate": 4.249148418491485e-06, "loss": 0.5463, "step": 27702 }, { "epoch": 0.8088231000554729, "grad_norm": 0.6450289297525987, "learning_rate": 4.248499594484996e-06, "loss": 0.5467, "step": 27703 }, { "epoch": 0.8088522962833202, "grad_norm": 0.6287249034640414, "learning_rate": 4.247850770478508e-06, "loss": 0.5966, "step": 27704 }, { "epoch": 0.8088814925111676, "grad_norm": 0.6504189293073235, "learning_rate": 4.24720194647202e-06, "loss": 0.5963, "step": 27705 }, { "epoch": 0.808910688739015, "grad_norm": 0.609669391882375, "learning_rate": 4.246553122465531e-06, "loss": 0.5232, "step": 27706 }, { "epoch": 0.8089398849668623, "grad_norm": 0.6095957655277869, "learning_rate": 4.245904298459043e-06, "loss": 0.5375, "step": 27707 }, { "epoch": 0.8089690811947097, "grad_norm": 0.6196391056274028, "learning_rate": 4.245255474452555e-06, "loss": 0.5333, "step": 27708 }, { "epoch": 0.808998277422557, "grad_norm": 0.6496981139655792, "learning_rate": 4.244606650446067e-06, "loss": 0.5703, "step": 27709 }, { "epoch": 0.8090274736504044, "grad_norm": 0.6654563201882834, "learning_rate": 4.243957826439578e-06, "loss": 0.5989, "step": 27710 }, { "epoch": 0.8090566698782518, "grad_norm": 0.599133613933639, "learning_rate": 4.24330900243309e-06, "loss": 0.5567, "step": 27711 }, { "epoch": 0.8090858661060991, "grad_norm": 0.6265528121993594, "learning_rate": 4.242660178426602e-06, "loss": 0.5782, "step": 27712 }, { "epoch": 0.8091150623339465, "grad_norm": 0.6198288200510407, "learning_rate": 4.2420113544201145e-06, "loss": 0.5901, "step": 27713 }, { "epoch": 0.8091442585617938, "grad_norm": 0.6491949002353394, "learning_rate": 4.241362530413626e-06, "loss": 0.6418, "step": 27714 }, { "epoch": 0.8091734547896412, "grad_norm": 0.6533065682509877, "learning_rate": 4.240713706407138e-06, "loss": 0.6639, "step": 27715 }, { "epoch": 0.8092026510174886, "grad_norm": 0.6362181663416252, "learning_rate": 4.240064882400649e-06, "loss": 0.5755, "step": 27716 }, { "epoch": 0.8092318472453359, "grad_norm": 0.6573409365393932, "learning_rate": 4.239416058394161e-06, "loss": 0.5922, "step": 27717 }, { "epoch": 0.8092610434731833, "grad_norm": 0.625332779027033, "learning_rate": 4.238767234387673e-06, "loss": 0.6183, "step": 27718 }, { "epoch": 0.8092902397010306, "grad_norm": 0.5952243518954543, "learning_rate": 4.238118410381185e-06, "loss": 0.5035, "step": 27719 }, { "epoch": 0.809319435928878, "grad_norm": 0.6181771700220711, "learning_rate": 4.237469586374696e-06, "loss": 0.5585, "step": 27720 }, { "epoch": 0.8093486321567254, "grad_norm": 0.5932768714212597, "learning_rate": 4.236820762368208e-06, "loss": 0.5101, "step": 27721 }, { "epoch": 0.8093778283845727, "grad_norm": 0.6461340526930048, "learning_rate": 4.236171938361719e-06, "loss": 0.5363, "step": 27722 }, { "epoch": 0.8094070246124201, "grad_norm": 0.6842423378518124, "learning_rate": 4.235523114355231e-06, "loss": 0.6415, "step": 27723 }, { "epoch": 0.8094362208402675, "grad_norm": 0.6383542122584964, "learning_rate": 4.234874290348743e-06, "loss": 0.5609, "step": 27724 }, { "epoch": 0.8094654170681148, "grad_norm": 0.6973127974148365, "learning_rate": 4.2342254663422546e-06, "loss": 0.67, "step": 27725 }, { "epoch": 0.8094946132959622, "grad_norm": 0.7239208670913726, "learning_rate": 4.233576642335767e-06, "loss": 0.6566, "step": 27726 }, { "epoch": 0.8095238095238095, "grad_norm": 0.6773826698174884, "learning_rate": 4.232927818329279e-06, "loss": 0.6055, "step": 27727 }, { "epoch": 0.8095530057516569, "grad_norm": 0.6179314294548839, "learning_rate": 4.232278994322791e-06, "loss": 0.5661, "step": 27728 }, { "epoch": 0.8095822019795043, "grad_norm": 0.6039636648222183, "learning_rate": 4.231630170316302e-06, "loss": 0.5385, "step": 27729 }, { "epoch": 0.8096113982073516, "grad_norm": 0.677838482460222, "learning_rate": 4.230981346309814e-06, "loss": 0.6028, "step": 27730 }, { "epoch": 0.809640594435199, "grad_norm": 0.6748636903649722, "learning_rate": 4.230332522303325e-06, "loss": 0.6366, "step": 27731 }, { "epoch": 0.8096697906630463, "grad_norm": 0.7121373017799713, "learning_rate": 4.229683698296838e-06, "loss": 0.7083, "step": 27732 }, { "epoch": 0.8096989868908937, "grad_norm": 0.6931387832393764, "learning_rate": 4.229034874290349e-06, "loss": 0.5797, "step": 27733 }, { "epoch": 0.8097281831187411, "grad_norm": 0.6963703887425039, "learning_rate": 4.228386050283861e-06, "loss": 0.7004, "step": 27734 }, { "epoch": 0.8097573793465884, "grad_norm": 0.647321962364818, "learning_rate": 4.227737226277372e-06, "loss": 0.6111, "step": 27735 }, { "epoch": 0.8097865755744358, "grad_norm": 0.6560623739671693, "learning_rate": 4.227088402270884e-06, "loss": 0.5855, "step": 27736 }, { "epoch": 0.8098157718022831, "grad_norm": 0.6329014473629028, "learning_rate": 4.226439578264396e-06, "loss": 0.5779, "step": 27737 }, { "epoch": 0.8098449680301305, "grad_norm": 0.6191896559372799, "learning_rate": 4.225790754257908e-06, "loss": 0.5758, "step": 27738 }, { "epoch": 0.8098741642579779, "grad_norm": 0.6418060408971085, "learning_rate": 4.2251419302514196e-06, "loss": 0.6163, "step": 27739 }, { "epoch": 0.8099033604858252, "grad_norm": 0.6494884123723822, "learning_rate": 4.224493106244932e-06, "loss": 0.5903, "step": 27740 }, { "epoch": 0.8099325567136726, "grad_norm": 0.6786597259655832, "learning_rate": 4.223844282238443e-06, "loss": 0.6433, "step": 27741 }, { "epoch": 0.80996175294152, "grad_norm": 0.6657888277994012, "learning_rate": 4.223195458231955e-06, "loss": 0.6794, "step": 27742 }, { "epoch": 0.8099909491693673, "grad_norm": 0.6461706720827395, "learning_rate": 4.222546634225467e-06, "loss": 0.6326, "step": 27743 }, { "epoch": 0.8100201453972147, "grad_norm": 0.663758312264053, "learning_rate": 4.221897810218978e-06, "loss": 0.6286, "step": 27744 }, { "epoch": 0.810049341625062, "grad_norm": 0.6739136249239635, "learning_rate": 4.22124898621249e-06, "loss": 0.6386, "step": 27745 }, { "epoch": 0.8100785378529094, "grad_norm": 0.6219427393342964, "learning_rate": 4.220600162206002e-06, "loss": 0.5901, "step": 27746 }, { "epoch": 0.8101077340807568, "grad_norm": 0.62741653450685, "learning_rate": 4.219951338199514e-06, "loss": 0.5396, "step": 27747 }, { "epoch": 0.8101369303086041, "grad_norm": 0.6253696088000021, "learning_rate": 4.219302514193025e-06, "loss": 0.5463, "step": 27748 }, { "epoch": 0.8101661265364515, "grad_norm": 0.6610688349201107, "learning_rate": 4.218653690186537e-06, "loss": 0.628, "step": 27749 }, { "epoch": 0.8101953227642988, "grad_norm": 0.6219329494724154, "learning_rate": 4.2180048661800485e-06, "loss": 0.5386, "step": 27750 }, { "epoch": 0.8102245189921462, "grad_norm": 0.6019945882232182, "learning_rate": 4.217356042173561e-06, "loss": 0.5002, "step": 27751 }, { "epoch": 0.8102537152199936, "grad_norm": 0.6715339790609002, "learning_rate": 4.2167072181670725e-06, "loss": 0.6522, "step": 27752 }, { "epoch": 0.8102829114478409, "grad_norm": 0.6715043893900373, "learning_rate": 4.2160583941605845e-06, "loss": 0.5877, "step": 27753 }, { "epoch": 0.8103121076756883, "grad_norm": 0.6188854223562661, "learning_rate": 4.215409570154096e-06, "loss": 0.5161, "step": 27754 }, { "epoch": 0.8103413039035356, "grad_norm": 0.6502305019838382, "learning_rate": 4.214760746147608e-06, "loss": 0.6274, "step": 27755 }, { "epoch": 0.810370500131383, "grad_norm": 0.623027530407401, "learning_rate": 4.21411192214112e-06, "loss": 0.5571, "step": 27756 }, { "epoch": 0.8103996963592304, "grad_norm": 0.643697762737087, "learning_rate": 4.213463098134632e-06, "loss": 0.6621, "step": 27757 }, { "epoch": 0.8104288925870777, "grad_norm": 0.6582385693013191, "learning_rate": 4.212814274128143e-06, "loss": 0.5786, "step": 27758 }, { "epoch": 0.8104580888149251, "grad_norm": 0.6710851283852864, "learning_rate": 4.212165450121655e-06, "loss": 0.6319, "step": 27759 }, { "epoch": 0.8104872850427725, "grad_norm": 0.6067712206304186, "learning_rate": 4.211516626115167e-06, "loss": 0.5193, "step": 27760 }, { "epoch": 0.8105164812706198, "grad_norm": 0.7166233420929488, "learning_rate": 4.210867802108678e-06, "loss": 0.7557, "step": 27761 }, { "epoch": 0.8105456774984672, "grad_norm": 0.6628939909200865, "learning_rate": 4.21021897810219e-06, "loss": 0.6332, "step": 27762 }, { "epoch": 0.8105748737263145, "grad_norm": 0.6715492650440278, "learning_rate": 4.2095701540957014e-06, "loss": 0.6371, "step": 27763 }, { "epoch": 0.8106040699541619, "grad_norm": 0.6499389671106182, "learning_rate": 4.2089213300892135e-06, "loss": 0.6522, "step": 27764 }, { "epoch": 0.8106332661820093, "grad_norm": 0.6606855495483778, "learning_rate": 4.2082725060827255e-06, "loss": 0.6041, "step": 27765 }, { "epoch": 0.8106624624098566, "grad_norm": 0.6351339059066131, "learning_rate": 4.2076236820762375e-06, "loss": 0.633, "step": 27766 }, { "epoch": 0.810691658637704, "grad_norm": 0.654481291998691, "learning_rate": 4.206974858069749e-06, "loss": 0.6209, "step": 27767 }, { "epoch": 0.8107208548655513, "grad_norm": 0.6943216347859829, "learning_rate": 4.206326034063261e-06, "loss": 0.6612, "step": 27768 }, { "epoch": 0.8107500510933987, "grad_norm": 0.6658368786556103, "learning_rate": 4.205677210056772e-06, "loss": 0.6329, "step": 27769 }, { "epoch": 0.8107792473212461, "grad_norm": 0.6524773259547444, "learning_rate": 4.205028386050285e-06, "loss": 0.6167, "step": 27770 }, { "epoch": 0.8108084435490934, "grad_norm": 0.6619550790951549, "learning_rate": 4.204379562043796e-06, "loss": 0.5831, "step": 27771 }, { "epoch": 0.8108376397769408, "grad_norm": 0.6458081978378377, "learning_rate": 4.203730738037308e-06, "loss": 0.6348, "step": 27772 }, { "epoch": 0.8108668360047881, "grad_norm": 0.6292784118205436, "learning_rate": 4.203081914030819e-06, "loss": 0.5825, "step": 27773 }, { "epoch": 0.8108960322326355, "grad_norm": 0.6538817942615781, "learning_rate": 4.202433090024331e-06, "loss": 0.6574, "step": 27774 }, { "epoch": 0.8109252284604829, "grad_norm": 0.640444621046263, "learning_rate": 4.201784266017843e-06, "loss": 0.6081, "step": 27775 }, { "epoch": 0.8109544246883302, "grad_norm": 0.6294509372386609, "learning_rate": 4.201135442011354e-06, "loss": 0.599, "step": 27776 }, { "epoch": 0.8109836209161776, "grad_norm": 0.6199156767555833, "learning_rate": 4.2004866180048664e-06, "loss": 0.5417, "step": 27777 }, { "epoch": 0.811012817144025, "grad_norm": 0.7314324375077272, "learning_rate": 4.1998377939983785e-06, "loss": 0.6955, "step": 27778 }, { "epoch": 0.8110420133718723, "grad_norm": 0.5814163226386441, "learning_rate": 4.1991889699918905e-06, "loss": 0.5241, "step": 27779 }, { "epoch": 0.8110712095997197, "grad_norm": 0.7327530435170708, "learning_rate": 4.198540145985402e-06, "loss": 0.6714, "step": 27780 }, { "epoch": 0.811100405827567, "grad_norm": 0.7018517637884111, "learning_rate": 4.197891321978914e-06, "loss": 0.6819, "step": 27781 }, { "epoch": 0.8111296020554144, "grad_norm": 0.6131940043356853, "learning_rate": 4.197242497972425e-06, "loss": 0.5292, "step": 27782 }, { "epoch": 0.8111587982832618, "grad_norm": 0.6124978912078719, "learning_rate": 4.196593673965937e-06, "loss": 0.5233, "step": 27783 }, { "epoch": 0.8111879945111091, "grad_norm": 0.6113434812185893, "learning_rate": 4.195944849959449e-06, "loss": 0.5529, "step": 27784 }, { "epoch": 0.8112171907389565, "grad_norm": 0.6802807140856921, "learning_rate": 4.195296025952961e-06, "loss": 0.612, "step": 27785 }, { "epoch": 0.8112463869668038, "grad_norm": 0.7851626117212831, "learning_rate": 4.194647201946472e-06, "loss": 0.6368, "step": 27786 }, { "epoch": 0.8112755831946512, "grad_norm": 0.7027523480983439, "learning_rate": 4.193998377939984e-06, "loss": 0.661, "step": 27787 }, { "epoch": 0.8113047794224986, "grad_norm": 0.6132961823713496, "learning_rate": 4.193349553933495e-06, "loss": 0.5806, "step": 27788 }, { "epoch": 0.8113339756503459, "grad_norm": 0.6655654023078951, "learning_rate": 4.192700729927008e-06, "loss": 0.6702, "step": 27789 }, { "epoch": 0.8113631718781933, "grad_norm": 0.688782788242493, "learning_rate": 4.192051905920519e-06, "loss": 0.6294, "step": 27790 }, { "epoch": 0.8113923681060407, "grad_norm": 0.6269058488301689, "learning_rate": 4.191403081914031e-06, "loss": 0.551, "step": 27791 }, { "epoch": 0.811421564333888, "grad_norm": 0.713399867471789, "learning_rate": 4.190754257907543e-06, "loss": 0.6283, "step": 27792 }, { "epoch": 0.8114507605617354, "grad_norm": 0.6338378700615537, "learning_rate": 4.190105433901055e-06, "loss": 0.5838, "step": 27793 }, { "epoch": 0.8114799567895827, "grad_norm": 0.6509838428103207, "learning_rate": 4.189456609894567e-06, "loss": 0.558, "step": 27794 }, { "epoch": 0.8115091530174301, "grad_norm": 0.6412313582566282, "learning_rate": 4.188807785888078e-06, "loss": 0.6047, "step": 27795 }, { "epoch": 0.8115383492452775, "grad_norm": 0.662641624764611, "learning_rate": 4.18815896188159e-06, "loss": 0.5866, "step": 27796 }, { "epoch": 0.8115675454731248, "grad_norm": 0.6424095914337613, "learning_rate": 4.187510137875102e-06, "loss": 0.652, "step": 27797 }, { "epoch": 0.8115967417009722, "grad_norm": 0.6623381566274337, "learning_rate": 4.186861313868614e-06, "loss": 0.701, "step": 27798 }, { "epoch": 0.8116259379288197, "grad_norm": 0.6780729618026976, "learning_rate": 4.186212489862125e-06, "loss": 0.6528, "step": 27799 }, { "epoch": 0.811655134156667, "grad_norm": 0.6408208094534863, "learning_rate": 4.185563665855637e-06, "loss": 0.5636, "step": 27800 }, { "epoch": 0.8116843303845144, "grad_norm": 0.6810573786502311, "learning_rate": 4.184914841849148e-06, "loss": 0.658, "step": 27801 }, { "epoch": 0.8117135266123617, "grad_norm": 0.7049802204852628, "learning_rate": 4.18426601784266e-06, "loss": 0.6329, "step": 27802 }, { "epoch": 0.8117427228402091, "grad_norm": 0.702339075034461, "learning_rate": 4.183617193836172e-06, "loss": 0.6898, "step": 27803 }, { "epoch": 0.8117719190680565, "grad_norm": 0.6395708654263434, "learning_rate": 4.182968369829684e-06, "loss": 0.5676, "step": 27804 }, { "epoch": 0.8118011152959038, "grad_norm": 0.6647298501388478, "learning_rate": 4.1823195458231956e-06, "loss": 0.6185, "step": 27805 }, { "epoch": 0.8118303115237512, "grad_norm": 0.6059527828824673, "learning_rate": 4.181670721816708e-06, "loss": 0.5279, "step": 27806 }, { "epoch": 0.8118595077515985, "grad_norm": 0.6318739128513793, "learning_rate": 4.181021897810219e-06, "loss": 0.5364, "step": 27807 }, { "epoch": 0.8118887039794459, "grad_norm": 0.628970929671667, "learning_rate": 4.180373073803732e-06, "loss": 0.5293, "step": 27808 }, { "epoch": 0.8119179002072933, "grad_norm": 0.668043230093593, "learning_rate": 4.179724249797243e-06, "loss": 0.627, "step": 27809 }, { "epoch": 0.8119470964351406, "grad_norm": 0.6578684610508209, "learning_rate": 4.179075425790755e-06, "loss": 0.5864, "step": 27810 }, { "epoch": 0.811976292662988, "grad_norm": 0.6274999834670455, "learning_rate": 4.178426601784266e-06, "loss": 0.5539, "step": 27811 }, { "epoch": 0.8120054888908353, "grad_norm": 0.7018994020920527, "learning_rate": 4.177777777777778e-06, "loss": 0.66, "step": 27812 }, { "epoch": 0.8120346851186827, "grad_norm": 0.6378699337129823, "learning_rate": 4.17712895377129e-06, "loss": 0.5504, "step": 27813 }, { "epoch": 0.8120638813465301, "grad_norm": 0.6768962172252837, "learning_rate": 4.176480129764801e-06, "loss": 0.6715, "step": 27814 }, { "epoch": 0.8120930775743774, "grad_norm": 0.6132613727135171, "learning_rate": 4.175831305758313e-06, "loss": 0.5393, "step": 27815 }, { "epoch": 0.8121222738022248, "grad_norm": 0.6481325502870813, "learning_rate": 4.175182481751825e-06, "loss": 0.5756, "step": 27816 }, { "epoch": 0.8121514700300722, "grad_norm": 0.6290471981385669, "learning_rate": 4.174533657745337e-06, "loss": 0.5784, "step": 27817 }, { "epoch": 0.8121806662579195, "grad_norm": 0.6403605875439647, "learning_rate": 4.1738848337388485e-06, "loss": 0.6095, "step": 27818 }, { "epoch": 0.8122098624857669, "grad_norm": 0.6408329956747932, "learning_rate": 4.1732360097323606e-06, "loss": 0.606, "step": 27819 }, { "epoch": 0.8122390587136142, "grad_norm": 0.6287912228732557, "learning_rate": 4.172587185725872e-06, "loss": 0.5785, "step": 27820 }, { "epoch": 0.8122682549414616, "grad_norm": 0.6312342717580912, "learning_rate": 4.171938361719384e-06, "loss": 0.5606, "step": 27821 }, { "epoch": 0.812297451169309, "grad_norm": 0.5995340089488718, "learning_rate": 4.171289537712896e-06, "loss": 0.5368, "step": 27822 }, { "epoch": 0.8123266473971563, "grad_norm": 0.6711537047366068, "learning_rate": 4.170640713706408e-06, "loss": 0.6614, "step": 27823 }, { "epoch": 0.8123558436250037, "grad_norm": 0.6921793584737156, "learning_rate": 4.169991889699919e-06, "loss": 0.6747, "step": 27824 }, { "epoch": 0.812385039852851, "grad_norm": 0.7352388806426856, "learning_rate": 4.169343065693431e-06, "loss": 0.537, "step": 27825 }, { "epoch": 0.8124142360806984, "grad_norm": 0.6331035234469575, "learning_rate": 4.168694241686943e-06, "loss": 0.5751, "step": 27826 }, { "epoch": 0.8124434323085458, "grad_norm": 0.7189383890379966, "learning_rate": 4.168045417680455e-06, "loss": 0.6635, "step": 27827 }, { "epoch": 0.8124726285363931, "grad_norm": 0.6134473536203732, "learning_rate": 4.167396593673966e-06, "loss": 0.5513, "step": 27828 }, { "epoch": 0.8125018247642405, "grad_norm": 0.7135970282965604, "learning_rate": 4.166747769667478e-06, "loss": 0.5993, "step": 27829 }, { "epoch": 0.8125310209920878, "grad_norm": 0.64712822800942, "learning_rate": 4.1660989456609895e-06, "loss": 0.5561, "step": 27830 }, { "epoch": 0.8125602172199352, "grad_norm": 0.6261853405546582, "learning_rate": 4.1654501216545015e-06, "loss": 0.5855, "step": 27831 }, { "epoch": 0.8125894134477826, "grad_norm": 0.598065506013796, "learning_rate": 4.1648012976480135e-06, "loss": 0.5454, "step": 27832 }, { "epoch": 0.8126186096756299, "grad_norm": 0.5916643163668097, "learning_rate": 4.164152473641525e-06, "loss": 0.5166, "step": 27833 }, { "epoch": 0.8126478059034773, "grad_norm": 0.6602144387207848, "learning_rate": 4.163503649635037e-06, "loss": 0.5896, "step": 27834 }, { "epoch": 0.8126770021313247, "grad_norm": 0.7156347249681316, "learning_rate": 4.162854825628549e-06, "loss": 0.6232, "step": 27835 }, { "epoch": 0.812706198359172, "grad_norm": 0.6573474024786576, "learning_rate": 4.162206001622061e-06, "loss": 0.6162, "step": 27836 }, { "epoch": 0.8127353945870194, "grad_norm": 0.637610890673534, "learning_rate": 4.161557177615572e-06, "loss": 0.6001, "step": 27837 }, { "epoch": 0.8127645908148667, "grad_norm": 0.6353133950098017, "learning_rate": 4.160908353609084e-06, "loss": 0.603, "step": 27838 }, { "epoch": 0.8127937870427141, "grad_norm": 0.6340198699287194, "learning_rate": 4.160259529602595e-06, "loss": 0.559, "step": 27839 }, { "epoch": 0.8128229832705615, "grad_norm": 0.6572288234768273, "learning_rate": 4.159610705596107e-06, "loss": 0.6499, "step": 27840 }, { "epoch": 0.8128521794984088, "grad_norm": 0.6863116699850242, "learning_rate": 4.158961881589619e-06, "loss": 0.6733, "step": 27841 }, { "epoch": 0.8128813757262562, "grad_norm": 0.6242480344286031, "learning_rate": 4.158313057583131e-06, "loss": 0.5509, "step": 27842 }, { "epoch": 0.8129105719541035, "grad_norm": 0.6841201068181819, "learning_rate": 4.1576642335766424e-06, "loss": 0.6477, "step": 27843 }, { "epoch": 0.8129397681819509, "grad_norm": 0.6509177869905532, "learning_rate": 4.1570154095701545e-06, "loss": 0.6051, "step": 27844 }, { "epoch": 0.8129689644097983, "grad_norm": 0.626502047988312, "learning_rate": 4.1563665855636665e-06, "loss": 0.552, "step": 27845 }, { "epoch": 0.8129981606376456, "grad_norm": 0.6517748973903462, "learning_rate": 4.1557177615571785e-06, "loss": 0.6216, "step": 27846 }, { "epoch": 0.813027356865493, "grad_norm": 0.651168688184044, "learning_rate": 4.15506893755069e-06, "loss": 0.5934, "step": 27847 }, { "epoch": 0.8130565530933404, "grad_norm": 0.5721802793170008, "learning_rate": 4.154420113544202e-06, "loss": 0.5003, "step": 27848 }, { "epoch": 0.8130857493211877, "grad_norm": 0.648147340377591, "learning_rate": 4.153771289537713e-06, "loss": 0.6366, "step": 27849 }, { "epoch": 0.8131149455490351, "grad_norm": 0.6589688671335237, "learning_rate": 4.153122465531225e-06, "loss": 0.5998, "step": 27850 }, { "epoch": 0.8131441417768824, "grad_norm": 0.7377285907210499, "learning_rate": 4.152473641524737e-06, "loss": 0.5988, "step": 27851 }, { "epoch": 0.8131733380047298, "grad_norm": 0.6661687323912956, "learning_rate": 4.151824817518248e-06, "loss": 0.5776, "step": 27852 }, { "epoch": 0.8132025342325772, "grad_norm": 0.637580779718571, "learning_rate": 4.15117599351176e-06, "loss": 0.6104, "step": 27853 }, { "epoch": 0.8132317304604245, "grad_norm": 0.6952477617041022, "learning_rate": 4.150527169505272e-06, "loss": 0.6605, "step": 27854 }, { "epoch": 0.8132609266882719, "grad_norm": 0.6926199041806045, "learning_rate": 4.149878345498784e-06, "loss": 0.6171, "step": 27855 }, { "epoch": 0.8132901229161192, "grad_norm": 0.6628814768912327, "learning_rate": 4.149229521492295e-06, "loss": 0.644, "step": 27856 }, { "epoch": 0.8133193191439666, "grad_norm": 0.8032468650538774, "learning_rate": 4.1485806974858074e-06, "loss": 0.6112, "step": 27857 }, { "epoch": 0.813348515371814, "grad_norm": 0.6559423346753929, "learning_rate": 4.147931873479319e-06, "loss": 0.5764, "step": 27858 }, { "epoch": 0.8133777115996613, "grad_norm": 0.6587342770256798, "learning_rate": 4.147283049472831e-06, "loss": 0.6706, "step": 27859 }, { "epoch": 0.8134069078275087, "grad_norm": 0.6176135221164354, "learning_rate": 4.146634225466343e-06, "loss": 0.571, "step": 27860 }, { "epoch": 0.813436104055356, "grad_norm": 0.7200584137138719, "learning_rate": 4.145985401459855e-06, "loss": 0.6769, "step": 27861 }, { "epoch": 0.8134653002832034, "grad_norm": 0.5926831869702583, "learning_rate": 4.145336577453366e-06, "loss": 0.5494, "step": 27862 }, { "epoch": 0.8134944965110508, "grad_norm": 0.6550368919252095, "learning_rate": 4.144687753446878e-06, "loss": 0.5522, "step": 27863 }, { "epoch": 0.8135236927388981, "grad_norm": 0.659195972865539, "learning_rate": 4.14403892944039e-06, "loss": 0.6021, "step": 27864 }, { "epoch": 0.8135528889667455, "grad_norm": 0.6709278351562828, "learning_rate": 4.143390105433901e-06, "loss": 0.6407, "step": 27865 }, { "epoch": 0.8135820851945929, "grad_norm": 0.6610618258009091, "learning_rate": 4.142741281427413e-06, "loss": 0.561, "step": 27866 }, { "epoch": 0.8136112814224402, "grad_norm": 0.6690126003186604, "learning_rate": 4.142092457420925e-06, "loss": 0.6225, "step": 27867 }, { "epoch": 0.8136404776502876, "grad_norm": 0.6000415219803948, "learning_rate": 4.141443633414436e-06, "loss": 0.5165, "step": 27868 }, { "epoch": 0.8136696738781349, "grad_norm": 0.7463272489527598, "learning_rate": 4.140794809407948e-06, "loss": 0.7518, "step": 27869 }, { "epoch": 0.8136988701059823, "grad_norm": 0.606666081344144, "learning_rate": 4.14014598540146e-06, "loss": 0.5605, "step": 27870 }, { "epoch": 0.8137280663338297, "grad_norm": 0.6761881590201722, "learning_rate": 4.1394971613949716e-06, "loss": 0.6435, "step": 27871 }, { "epoch": 0.813757262561677, "grad_norm": 0.6317402446951056, "learning_rate": 4.138848337388484e-06, "loss": 0.5782, "step": 27872 }, { "epoch": 0.8137864587895244, "grad_norm": 0.5838875478048726, "learning_rate": 4.138199513381996e-06, "loss": 0.5137, "step": 27873 }, { "epoch": 0.8138156550173717, "grad_norm": 0.6774736828025175, "learning_rate": 4.137550689375508e-06, "loss": 0.6371, "step": 27874 }, { "epoch": 0.8138448512452191, "grad_norm": 0.6805542960889208, "learning_rate": 4.136901865369019e-06, "loss": 0.6785, "step": 27875 }, { "epoch": 0.8138740474730665, "grad_norm": 0.6425909275631483, "learning_rate": 4.136253041362531e-06, "loss": 0.5506, "step": 27876 }, { "epoch": 0.8139032437009138, "grad_norm": 0.6167586286517134, "learning_rate": 4.135604217356042e-06, "loss": 0.5431, "step": 27877 }, { "epoch": 0.8139324399287612, "grad_norm": 0.636790755268781, "learning_rate": 4.134955393349555e-06, "loss": 0.6134, "step": 27878 }, { "epoch": 0.8139616361566085, "grad_norm": 0.6227817476017342, "learning_rate": 4.134306569343066e-06, "loss": 0.5763, "step": 27879 }, { "epoch": 0.8139908323844559, "grad_norm": 0.7133157126440279, "learning_rate": 4.133657745336578e-06, "loss": 0.6827, "step": 27880 }, { "epoch": 0.8140200286123033, "grad_norm": 0.6110791915431302, "learning_rate": 4.133008921330089e-06, "loss": 0.5628, "step": 27881 }, { "epoch": 0.8140492248401506, "grad_norm": 0.6284428414950073, "learning_rate": 4.132360097323601e-06, "loss": 0.5397, "step": 27882 }, { "epoch": 0.814078421067998, "grad_norm": 0.6272765051973399, "learning_rate": 4.131711273317113e-06, "loss": 0.5737, "step": 27883 }, { "epoch": 0.8141076172958454, "grad_norm": 0.6187539190023909, "learning_rate": 4.1310624493106245e-06, "loss": 0.5463, "step": 27884 }, { "epoch": 0.8141368135236927, "grad_norm": 0.6089207316289768, "learning_rate": 4.1304136253041366e-06, "loss": 0.567, "step": 27885 }, { "epoch": 0.8141660097515401, "grad_norm": 0.6023451725671594, "learning_rate": 4.129764801297649e-06, "loss": 0.5618, "step": 27886 }, { "epoch": 0.8141952059793874, "grad_norm": 0.7485217509909917, "learning_rate": 4.12911597729116e-06, "loss": 0.7467, "step": 27887 }, { "epoch": 0.8142244022072348, "grad_norm": 0.7053147096528638, "learning_rate": 4.128467153284672e-06, "loss": 0.7118, "step": 27888 }, { "epoch": 0.8142535984350822, "grad_norm": 0.6704758631285412, "learning_rate": 4.127818329278184e-06, "loss": 0.6142, "step": 27889 }, { "epoch": 0.8142827946629295, "grad_norm": 0.6229658473069597, "learning_rate": 4.127169505271695e-06, "loss": 0.5605, "step": 27890 }, { "epoch": 0.8143119908907769, "grad_norm": 0.673929958761246, "learning_rate": 4.126520681265207e-06, "loss": 0.5784, "step": 27891 }, { "epoch": 0.8143411871186242, "grad_norm": 0.6294694455541487, "learning_rate": 4.125871857258719e-06, "loss": 0.5293, "step": 27892 }, { "epoch": 0.8143703833464716, "grad_norm": 0.6491581604593031, "learning_rate": 4.125223033252231e-06, "loss": 0.5542, "step": 27893 }, { "epoch": 0.814399579574319, "grad_norm": 0.6068289792176519, "learning_rate": 4.124574209245742e-06, "loss": 0.5706, "step": 27894 }, { "epoch": 0.8144287758021663, "grad_norm": 0.650567957831778, "learning_rate": 4.123925385239254e-06, "loss": 0.6263, "step": 27895 }, { "epoch": 0.8144579720300137, "grad_norm": 0.6515935403118153, "learning_rate": 4.1232765612327655e-06, "loss": 0.6334, "step": 27896 }, { "epoch": 0.814487168257861, "grad_norm": 0.6038782578527998, "learning_rate": 4.122627737226278e-06, "loss": 0.5032, "step": 27897 }, { "epoch": 0.8145163644857084, "grad_norm": 0.7626789767088669, "learning_rate": 4.1219789132197895e-06, "loss": 0.6499, "step": 27898 }, { "epoch": 0.8145455607135558, "grad_norm": 0.6758238498251602, "learning_rate": 4.1213300892133016e-06, "loss": 0.6771, "step": 27899 }, { "epoch": 0.8145747569414031, "grad_norm": 0.6540917021243038, "learning_rate": 4.120681265206813e-06, "loss": 0.5854, "step": 27900 }, { "epoch": 0.8146039531692505, "grad_norm": 0.6720852132434099, "learning_rate": 4.120032441200325e-06, "loss": 0.6659, "step": 27901 }, { "epoch": 0.8146331493970979, "grad_norm": 0.6907086723363941, "learning_rate": 4.119383617193837e-06, "loss": 0.7252, "step": 27902 }, { "epoch": 0.8146623456249452, "grad_norm": 0.5825713436396903, "learning_rate": 4.118734793187348e-06, "loss": 0.5285, "step": 27903 }, { "epoch": 0.8146915418527926, "grad_norm": 0.7222263576679036, "learning_rate": 4.11808596918086e-06, "loss": 0.6964, "step": 27904 }, { "epoch": 0.8147207380806399, "grad_norm": 0.7106587984171426, "learning_rate": 4.117437145174372e-06, "loss": 0.6484, "step": 27905 }, { "epoch": 0.8147499343084873, "grad_norm": 0.632481354690552, "learning_rate": 4.116788321167883e-06, "loss": 0.6115, "step": 27906 }, { "epoch": 0.8147791305363347, "grad_norm": 0.6640872552370475, "learning_rate": 4.116139497161395e-06, "loss": 0.6413, "step": 27907 }, { "epoch": 0.814808326764182, "grad_norm": 0.5995689943686269, "learning_rate": 4.115490673154907e-06, "loss": 0.6016, "step": 27908 }, { "epoch": 0.8148375229920294, "grad_norm": 0.6538779740924847, "learning_rate": 4.1148418491484184e-06, "loss": 0.62, "step": 27909 }, { "epoch": 0.8148667192198767, "grad_norm": 0.6345307343285949, "learning_rate": 4.1141930251419305e-06, "loss": 0.6034, "step": 27910 }, { "epoch": 0.8148959154477241, "grad_norm": 0.6428790125392848, "learning_rate": 4.1135442011354425e-06, "loss": 0.614, "step": 27911 }, { "epoch": 0.8149251116755715, "grad_norm": 0.6946396709360658, "learning_rate": 4.1128953771289545e-06, "loss": 0.6225, "step": 27912 }, { "epoch": 0.8149543079034188, "grad_norm": 0.6649719211210636, "learning_rate": 4.112246553122466e-06, "loss": 0.6535, "step": 27913 }, { "epoch": 0.8149835041312662, "grad_norm": 0.676152712140579, "learning_rate": 4.111597729115978e-06, "loss": 0.6539, "step": 27914 }, { "epoch": 0.8150127003591136, "grad_norm": 0.6416738560233995, "learning_rate": 4.110948905109489e-06, "loss": 0.5823, "step": 27915 }, { "epoch": 0.8150418965869609, "grad_norm": 0.6124622523585045, "learning_rate": 4.110300081103002e-06, "loss": 0.5617, "step": 27916 }, { "epoch": 0.8150710928148083, "grad_norm": 0.5952702053341008, "learning_rate": 4.109651257096513e-06, "loss": 0.4898, "step": 27917 }, { "epoch": 0.8151002890426556, "grad_norm": 0.6231240164460723, "learning_rate": 4.109002433090025e-06, "loss": 0.5541, "step": 27918 }, { "epoch": 0.815129485270503, "grad_norm": 0.6514074547331843, "learning_rate": 4.108353609083536e-06, "loss": 0.5782, "step": 27919 }, { "epoch": 0.8151586814983505, "grad_norm": 0.631084147917112, "learning_rate": 4.107704785077048e-06, "loss": 0.585, "step": 27920 }, { "epoch": 0.8151878777261978, "grad_norm": 0.656074951300587, "learning_rate": 4.10705596107056e-06, "loss": 0.6422, "step": 27921 }, { "epoch": 0.8152170739540452, "grad_norm": 0.6645093854507846, "learning_rate": 4.106407137064071e-06, "loss": 0.6257, "step": 27922 }, { "epoch": 0.8152462701818926, "grad_norm": 0.5709872889627245, "learning_rate": 4.1057583130575834e-06, "loss": 0.4957, "step": 27923 }, { "epoch": 0.8152754664097399, "grad_norm": 0.6601244751869274, "learning_rate": 4.1051094890510955e-06, "loss": 0.6369, "step": 27924 }, { "epoch": 0.8153046626375873, "grad_norm": 0.6161788428786522, "learning_rate": 4.104460665044607e-06, "loss": 0.5182, "step": 27925 }, { "epoch": 0.8153338588654346, "grad_norm": 0.6673372104120082, "learning_rate": 4.103811841038119e-06, "loss": 0.579, "step": 27926 }, { "epoch": 0.815363055093282, "grad_norm": 0.6377750383362428, "learning_rate": 4.103163017031631e-06, "loss": 0.5999, "step": 27927 }, { "epoch": 0.8153922513211294, "grad_norm": 0.640946716005875, "learning_rate": 4.102514193025142e-06, "loss": 0.5904, "step": 27928 }, { "epoch": 0.8154214475489767, "grad_norm": 0.6514214752865676, "learning_rate": 4.101865369018654e-06, "loss": 0.5933, "step": 27929 }, { "epoch": 0.8154506437768241, "grad_norm": 0.633951153760479, "learning_rate": 4.101216545012166e-06, "loss": 0.6045, "step": 27930 }, { "epoch": 0.8154798400046714, "grad_norm": 0.647434261859472, "learning_rate": 4.100567721005678e-06, "loss": 0.5829, "step": 27931 }, { "epoch": 0.8155090362325188, "grad_norm": 0.6421159829934715, "learning_rate": 4.099918896999189e-06, "loss": 0.5731, "step": 27932 }, { "epoch": 0.8155382324603662, "grad_norm": 0.6625514955730707, "learning_rate": 4.099270072992701e-06, "loss": 0.6405, "step": 27933 }, { "epoch": 0.8155674286882135, "grad_norm": 0.6173617735988076, "learning_rate": 4.098621248986212e-06, "loss": 0.5702, "step": 27934 }, { "epoch": 0.8155966249160609, "grad_norm": 0.6150991790794943, "learning_rate": 4.097972424979725e-06, "loss": 0.5687, "step": 27935 }, { "epoch": 0.8156258211439082, "grad_norm": 0.632371923295562, "learning_rate": 4.097323600973236e-06, "loss": 0.6052, "step": 27936 }, { "epoch": 0.8156550173717556, "grad_norm": 0.6303667446591448, "learning_rate": 4.0966747769667484e-06, "loss": 0.592, "step": 27937 }, { "epoch": 0.815684213599603, "grad_norm": 0.6895013874994091, "learning_rate": 4.09602595296026e-06, "loss": 0.6634, "step": 27938 }, { "epoch": 0.8157134098274503, "grad_norm": 0.6287270896931606, "learning_rate": 4.095377128953772e-06, "loss": 0.55, "step": 27939 }, { "epoch": 0.8157426060552977, "grad_norm": 0.6089675575930052, "learning_rate": 4.094728304947284e-06, "loss": 0.5091, "step": 27940 }, { "epoch": 0.815771802283145, "grad_norm": 0.6458433587243587, "learning_rate": 4.094079480940795e-06, "loss": 0.5752, "step": 27941 }, { "epoch": 0.8158009985109924, "grad_norm": 0.61926260891888, "learning_rate": 4.093430656934307e-06, "loss": 0.5475, "step": 27942 }, { "epoch": 0.8158301947388398, "grad_norm": 0.6604867958746999, "learning_rate": 4.092781832927819e-06, "loss": 0.604, "step": 27943 }, { "epoch": 0.8158593909666871, "grad_norm": 0.6548366741774956, "learning_rate": 4.092133008921331e-06, "loss": 0.6674, "step": 27944 }, { "epoch": 0.8158885871945345, "grad_norm": 0.5946648521945589, "learning_rate": 4.091484184914842e-06, "loss": 0.5513, "step": 27945 }, { "epoch": 0.8159177834223819, "grad_norm": 0.6634111047536078, "learning_rate": 4.090835360908354e-06, "loss": 0.6079, "step": 27946 }, { "epoch": 0.8159469796502292, "grad_norm": 0.659936255207692, "learning_rate": 4.090186536901865e-06, "loss": 0.6704, "step": 27947 }, { "epoch": 0.8159761758780766, "grad_norm": 0.6648605813173261, "learning_rate": 4.089537712895377e-06, "loss": 0.6438, "step": 27948 }, { "epoch": 0.8160053721059239, "grad_norm": 0.7067260044851534, "learning_rate": 4.088888888888889e-06, "loss": 0.7246, "step": 27949 }, { "epoch": 0.8160345683337713, "grad_norm": 0.6195269841501212, "learning_rate": 4.088240064882401e-06, "loss": 0.5549, "step": 27950 }, { "epoch": 0.8160637645616187, "grad_norm": 0.634177959868839, "learning_rate": 4.0875912408759126e-06, "loss": 0.5914, "step": 27951 }, { "epoch": 0.816092960789466, "grad_norm": 0.6528397185557829, "learning_rate": 4.086942416869425e-06, "loss": 0.6043, "step": 27952 }, { "epoch": 0.8161221570173134, "grad_norm": 0.6503993047359387, "learning_rate": 4.086293592862936e-06, "loss": 0.6307, "step": 27953 }, { "epoch": 0.8161513532451607, "grad_norm": 0.6433847064096647, "learning_rate": 4.085644768856449e-06, "loss": 0.582, "step": 27954 }, { "epoch": 0.8161805494730081, "grad_norm": 0.681430325603148, "learning_rate": 4.08499594484996e-06, "loss": 0.6683, "step": 27955 }, { "epoch": 0.8162097457008555, "grad_norm": 0.6642331577470394, "learning_rate": 4.084347120843472e-06, "loss": 0.6515, "step": 27956 }, { "epoch": 0.8162389419287028, "grad_norm": 0.6563121236338469, "learning_rate": 4.083698296836983e-06, "loss": 0.6039, "step": 27957 }, { "epoch": 0.8162681381565502, "grad_norm": 0.6533531211491165, "learning_rate": 4.083049472830495e-06, "loss": 0.5958, "step": 27958 }, { "epoch": 0.8162973343843976, "grad_norm": 0.6332571325025347, "learning_rate": 4.082400648824007e-06, "loss": 0.5963, "step": 27959 }, { "epoch": 0.8163265306122449, "grad_norm": 0.6717349220200722, "learning_rate": 4.081751824817518e-06, "loss": 0.574, "step": 27960 }, { "epoch": 0.8163557268400923, "grad_norm": 0.6129954384846052, "learning_rate": 4.08110300081103e-06, "loss": 0.5511, "step": 27961 }, { "epoch": 0.8163849230679396, "grad_norm": 0.6330162853485966, "learning_rate": 4.080454176804542e-06, "loss": 0.5867, "step": 27962 }, { "epoch": 0.816414119295787, "grad_norm": 0.6695700581120525, "learning_rate": 4.079805352798054e-06, "loss": 0.5899, "step": 27963 }, { "epoch": 0.8164433155236344, "grad_norm": 0.6576328692869676, "learning_rate": 4.0791565287915655e-06, "loss": 0.6252, "step": 27964 }, { "epoch": 0.8164725117514817, "grad_norm": 0.6304411338572287, "learning_rate": 4.0785077047850776e-06, "loss": 0.5787, "step": 27965 }, { "epoch": 0.8165017079793291, "grad_norm": 0.6467679847199055, "learning_rate": 4.077858880778589e-06, "loss": 0.6059, "step": 27966 }, { "epoch": 0.8165309042071764, "grad_norm": 0.6577967635500959, "learning_rate": 4.077210056772101e-06, "loss": 0.6132, "step": 27967 }, { "epoch": 0.8165601004350238, "grad_norm": 0.6361616367477922, "learning_rate": 4.076561232765613e-06, "loss": 0.5311, "step": 27968 }, { "epoch": 0.8165892966628712, "grad_norm": 0.6607019818525778, "learning_rate": 4.075912408759125e-06, "loss": 0.6158, "step": 27969 }, { "epoch": 0.8166184928907185, "grad_norm": 0.6639294762036598, "learning_rate": 4.075263584752636e-06, "loss": 0.5988, "step": 27970 }, { "epoch": 0.8166476891185659, "grad_norm": 0.6941034839380728, "learning_rate": 4.074614760746148e-06, "loss": 0.6517, "step": 27971 }, { "epoch": 0.8166768853464133, "grad_norm": 0.6634380007948056, "learning_rate": 4.073965936739659e-06, "loss": 0.627, "step": 27972 }, { "epoch": 0.8167060815742606, "grad_norm": 0.6344059553352864, "learning_rate": 4.073317112733171e-06, "loss": 0.5661, "step": 27973 }, { "epoch": 0.816735277802108, "grad_norm": 0.6336712376490382, "learning_rate": 4.072668288726683e-06, "loss": 0.615, "step": 27974 }, { "epoch": 0.8167644740299553, "grad_norm": 0.666017006576233, "learning_rate": 4.072019464720195e-06, "loss": 0.6053, "step": 27975 }, { "epoch": 0.8167936702578027, "grad_norm": 0.632109906897808, "learning_rate": 4.0713706407137065e-06, "loss": 0.5755, "step": 27976 }, { "epoch": 0.8168228664856501, "grad_norm": 0.6430891863416346, "learning_rate": 4.0707218167072185e-06, "loss": 0.5953, "step": 27977 }, { "epoch": 0.8168520627134974, "grad_norm": 0.7103572331083569, "learning_rate": 4.0700729927007305e-06, "loss": 0.6321, "step": 27978 }, { "epoch": 0.8168812589413448, "grad_norm": 0.6024705183749561, "learning_rate": 4.069424168694242e-06, "loss": 0.525, "step": 27979 }, { "epoch": 0.8169104551691921, "grad_norm": 0.6409219047612978, "learning_rate": 4.068775344687754e-06, "loss": 0.598, "step": 27980 }, { "epoch": 0.8169396513970395, "grad_norm": 0.6561573582240752, "learning_rate": 4.068126520681266e-06, "loss": 0.5796, "step": 27981 }, { "epoch": 0.8169688476248869, "grad_norm": 0.7036394392037417, "learning_rate": 4.067477696674778e-06, "loss": 0.6492, "step": 27982 }, { "epoch": 0.8169980438527342, "grad_norm": 0.6504347697628992, "learning_rate": 4.066828872668289e-06, "loss": 0.5679, "step": 27983 }, { "epoch": 0.8170272400805816, "grad_norm": 0.6555645408306333, "learning_rate": 4.066180048661801e-06, "loss": 0.5478, "step": 27984 }, { "epoch": 0.817056436308429, "grad_norm": 0.6314280810183198, "learning_rate": 4.065531224655312e-06, "loss": 0.6, "step": 27985 }, { "epoch": 0.8170856325362763, "grad_norm": 0.7058482268774559, "learning_rate": 4.064882400648824e-06, "loss": 0.7011, "step": 27986 }, { "epoch": 0.8171148287641237, "grad_norm": 0.6356042324498465, "learning_rate": 4.064233576642336e-06, "loss": 0.6138, "step": 27987 }, { "epoch": 0.817144024991971, "grad_norm": 0.6600516674469272, "learning_rate": 4.063584752635848e-06, "loss": 0.5868, "step": 27988 }, { "epoch": 0.8171732212198184, "grad_norm": 0.6832162506067813, "learning_rate": 4.0629359286293594e-06, "loss": 0.5917, "step": 27989 }, { "epoch": 0.8172024174476658, "grad_norm": 0.6280627654628467, "learning_rate": 4.0622871046228715e-06, "loss": 0.5821, "step": 27990 }, { "epoch": 0.8172316136755131, "grad_norm": 0.5917348034558323, "learning_rate": 4.061638280616383e-06, "loss": 0.525, "step": 27991 }, { "epoch": 0.8172608099033605, "grad_norm": 0.6506738033510394, "learning_rate": 4.060989456609895e-06, "loss": 0.5612, "step": 27992 }, { "epoch": 0.8172900061312078, "grad_norm": 0.6668405698843308, "learning_rate": 4.060340632603407e-06, "loss": 0.6215, "step": 27993 }, { "epoch": 0.8173192023590552, "grad_norm": 0.5656011468646691, "learning_rate": 4.059691808596919e-06, "loss": 0.4789, "step": 27994 }, { "epoch": 0.8173483985869026, "grad_norm": 0.6210251366792684, "learning_rate": 4.05904298459043e-06, "loss": 0.5765, "step": 27995 }, { "epoch": 0.8173775948147499, "grad_norm": 0.659536591876567, "learning_rate": 4.058394160583942e-06, "loss": 0.6162, "step": 27996 }, { "epoch": 0.8174067910425973, "grad_norm": 0.6491257045549681, "learning_rate": 4.057745336577454e-06, "loss": 0.5994, "step": 27997 }, { "epoch": 0.8174359872704446, "grad_norm": 0.6586653233243116, "learning_rate": 4.057096512570965e-06, "loss": 0.6329, "step": 27998 }, { "epoch": 0.817465183498292, "grad_norm": 0.6423870947636028, "learning_rate": 4.056447688564477e-06, "loss": 0.582, "step": 27999 }, { "epoch": 0.8174943797261394, "grad_norm": 0.6196216172209695, "learning_rate": 4.055798864557988e-06, "loss": 0.5706, "step": 28000 }, { "epoch": 0.8175235759539867, "grad_norm": 0.6155045170099771, "learning_rate": 4.055150040551501e-06, "loss": 0.5088, "step": 28001 }, { "epoch": 0.8175527721818341, "grad_norm": 0.6871719781758165, "learning_rate": 4.054501216545012e-06, "loss": 0.6695, "step": 28002 }, { "epoch": 0.8175819684096814, "grad_norm": 0.6398482919386086, "learning_rate": 4.0538523925385244e-06, "loss": 0.6162, "step": 28003 }, { "epoch": 0.8176111646375288, "grad_norm": 0.6750419144341709, "learning_rate": 4.053203568532036e-06, "loss": 0.6768, "step": 28004 }, { "epoch": 0.8176403608653762, "grad_norm": 0.6452683350425787, "learning_rate": 4.052554744525548e-06, "loss": 0.5984, "step": 28005 }, { "epoch": 0.8176695570932235, "grad_norm": 0.6587880819378489, "learning_rate": 4.05190592051906e-06, "loss": 0.6378, "step": 28006 }, { "epoch": 0.8176987533210709, "grad_norm": 0.6078669098101036, "learning_rate": 4.051257096512572e-06, "loss": 0.5629, "step": 28007 }, { "epoch": 0.8177279495489183, "grad_norm": 0.6528771537820758, "learning_rate": 4.050608272506083e-06, "loss": 0.5932, "step": 28008 }, { "epoch": 0.8177571457767656, "grad_norm": 0.6607737891756921, "learning_rate": 4.049959448499595e-06, "loss": 0.6327, "step": 28009 }, { "epoch": 0.817786342004613, "grad_norm": 0.6662777531436388, "learning_rate": 4.049310624493106e-06, "loss": 0.6062, "step": 28010 }, { "epoch": 0.8178155382324603, "grad_norm": 0.7120811785092717, "learning_rate": 4.048661800486618e-06, "loss": 0.6409, "step": 28011 }, { "epoch": 0.8178447344603077, "grad_norm": 0.6364810965108209, "learning_rate": 4.04801297648013e-06, "loss": 0.5811, "step": 28012 }, { "epoch": 0.8178739306881551, "grad_norm": 0.6843985471336942, "learning_rate": 4.047364152473642e-06, "loss": 0.6643, "step": 28013 }, { "epoch": 0.8179031269160024, "grad_norm": 0.6320601750933456, "learning_rate": 4.046715328467153e-06, "loss": 0.5946, "step": 28014 }, { "epoch": 0.8179323231438498, "grad_norm": 0.6402116052411212, "learning_rate": 4.046066504460665e-06, "loss": 0.5914, "step": 28015 }, { "epoch": 0.8179615193716971, "grad_norm": 0.645612508817088, "learning_rate": 4.045417680454177e-06, "loss": 0.6092, "step": 28016 }, { "epoch": 0.8179907155995445, "grad_norm": 0.6058636269219423, "learning_rate": 4.044768856447689e-06, "loss": 0.5529, "step": 28017 }, { "epoch": 0.8180199118273919, "grad_norm": 0.6405570355524454, "learning_rate": 4.044120032441201e-06, "loss": 0.5541, "step": 28018 }, { "epoch": 0.8180491080552392, "grad_norm": 0.5844261887854555, "learning_rate": 4.043471208434712e-06, "loss": 0.5012, "step": 28019 }, { "epoch": 0.8180783042830866, "grad_norm": 0.6095668287953439, "learning_rate": 4.042822384428225e-06, "loss": 0.5722, "step": 28020 }, { "epoch": 0.818107500510934, "grad_norm": 0.5908333646398558, "learning_rate": 4.042173560421736e-06, "loss": 0.5085, "step": 28021 }, { "epoch": 0.8181366967387813, "grad_norm": 0.613112642146903, "learning_rate": 4.041524736415248e-06, "loss": 0.5575, "step": 28022 }, { "epoch": 0.8181658929666287, "grad_norm": 0.5986598235483358, "learning_rate": 4.040875912408759e-06, "loss": 0.4919, "step": 28023 }, { "epoch": 0.818195089194476, "grad_norm": 0.6573082991488578, "learning_rate": 4.040227088402271e-06, "loss": 0.5814, "step": 28024 }, { "epoch": 0.8182242854223234, "grad_norm": 0.6550494540823287, "learning_rate": 4.039578264395783e-06, "loss": 0.5859, "step": 28025 }, { "epoch": 0.8182534816501708, "grad_norm": 0.6060841073729187, "learning_rate": 4.038929440389295e-06, "loss": 0.5576, "step": 28026 }, { "epoch": 0.8182826778780181, "grad_norm": 0.6713197103885883, "learning_rate": 4.038280616382806e-06, "loss": 0.6436, "step": 28027 }, { "epoch": 0.8183118741058655, "grad_norm": 0.6331708363784172, "learning_rate": 4.037631792376318e-06, "loss": 0.5833, "step": 28028 }, { "epoch": 0.8183410703337128, "grad_norm": 0.6900009862759809, "learning_rate": 4.03698296836983e-06, "loss": 0.6803, "step": 28029 }, { "epoch": 0.8183702665615602, "grad_norm": 0.6797948782168872, "learning_rate": 4.0363341443633415e-06, "loss": 0.6585, "step": 28030 }, { "epoch": 0.8183994627894076, "grad_norm": 0.7047491998783008, "learning_rate": 4.0356853203568536e-06, "loss": 0.6767, "step": 28031 }, { "epoch": 0.8184286590172549, "grad_norm": 0.6564675039832784, "learning_rate": 4.035036496350366e-06, "loss": 0.6031, "step": 28032 }, { "epoch": 0.8184578552451023, "grad_norm": 0.6946328353415757, "learning_rate": 4.034387672343877e-06, "loss": 0.6575, "step": 28033 }, { "epoch": 0.8184870514729496, "grad_norm": 0.6668530158843087, "learning_rate": 4.033738848337389e-06, "loss": 0.6194, "step": 28034 }, { "epoch": 0.818516247700797, "grad_norm": 0.6653740699203913, "learning_rate": 4.033090024330901e-06, "loss": 0.6149, "step": 28035 }, { "epoch": 0.8185454439286444, "grad_norm": 0.6710314642954752, "learning_rate": 4.032441200324412e-06, "loss": 0.6246, "step": 28036 }, { "epoch": 0.8185746401564917, "grad_norm": 0.6491593783914809, "learning_rate": 4.031792376317924e-06, "loss": 0.6072, "step": 28037 }, { "epoch": 0.8186038363843391, "grad_norm": 0.6357745827858503, "learning_rate": 4.031143552311435e-06, "loss": 0.6101, "step": 28038 }, { "epoch": 0.8186330326121865, "grad_norm": 0.6391084058663822, "learning_rate": 4.030494728304948e-06, "loss": 0.577, "step": 28039 }, { "epoch": 0.8186622288400339, "grad_norm": 0.6239368042915113, "learning_rate": 4.029845904298459e-06, "loss": 0.5702, "step": 28040 }, { "epoch": 0.8186914250678813, "grad_norm": 0.6860508341596744, "learning_rate": 4.029197080291971e-06, "loss": 0.6493, "step": 28041 }, { "epoch": 0.8187206212957286, "grad_norm": 0.6624411493418515, "learning_rate": 4.0285482562854825e-06, "loss": 0.6368, "step": 28042 }, { "epoch": 0.818749817523576, "grad_norm": 0.6254951274916596, "learning_rate": 4.0278994322789945e-06, "loss": 0.6097, "step": 28043 }, { "epoch": 0.8187790137514234, "grad_norm": 0.6560343609863529, "learning_rate": 4.0272506082725065e-06, "loss": 0.6065, "step": 28044 }, { "epoch": 0.8188082099792707, "grad_norm": 0.6505343396840764, "learning_rate": 4.0266017842660186e-06, "loss": 0.6154, "step": 28045 }, { "epoch": 0.8188374062071181, "grad_norm": 0.6743705566968572, "learning_rate": 4.02595296025953e-06, "loss": 0.6478, "step": 28046 }, { "epoch": 0.8188666024349655, "grad_norm": 0.6209955550789678, "learning_rate": 4.025304136253042e-06, "loss": 0.5821, "step": 28047 }, { "epoch": 0.8188957986628128, "grad_norm": 0.6645602512382867, "learning_rate": 4.024655312246554e-06, "loss": 0.6286, "step": 28048 }, { "epoch": 0.8189249948906602, "grad_norm": 0.679085261392881, "learning_rate": 4.024006488240065e-06, "loss": 0.6652, "step": 28049 }, { "epoch": 0.8189541911185075, "grad_norm": 0.6836157611062592, "learning_rate": 4.023357664233577e-06, "loss": 0.6195, "step": 28050 }, { "epoch": 0.8189833873463549, "grad_norm": 0.6350841187446561, "learning_rate": 4.022708840227089e-06, "loss": 0.6003, "step": 28051 }, { "epoch": 0.8190125835742023, "grad_norm": 0.6612068602181788, "learning_rate": 4.0220600162206e-06, "loss": 0.6299, "step": 28052 }, { "epoch": 0.8190417798020496, "grad_norm": 0.6313671259481941, "learning_rate": 4.021411192214112e-06, "loss": 0.5959, "step": 28053 }, { "epoch": 0.819070976029897, "grad_norm": 0.6888634874221932, "learning_rate": 4.020762368207624e-06, "loss": 0.7062, "step": 28054 }, { "epoch": 0.8191001722577443, "grad_norm": 0.6341824452354765, "learning_rate": 4.0201135442011354e-06, "loss": 0.5871, "step": 28055 }, { "epoch": 0.8191293684855917, "grad_norm": 0.7099910395064523, "learning_rate": 4.0194647201946475e-06, "loss": 0.6557, "step": 28056 }, { "epoch": 0.8191585647134391, "grad_norm": 0.7044694569096437, "learning_rate": 4.018815896188159e-06, "loss": 0.6911, "step": 28057 }, { "epoch": 0.8191877609412864, "grad_norm": 0.603372094859145, "learning_rate": 4.0181670721816715e-06, "loss": 0.5465, "step": 28058 }, { "epoch": 0.8192169571691338, "grad_norm": 0.6760129394940255, "learning_rate": 4.017518248175183e-06, "loss": 0.6436, "step": 28059 }, { "epoch": 0.8192461533969811, "grad_norm": 0.6401540201766586, "learning_rate": 4.016869424168695e-06, "loss": 0.6168, "step": 28060 }, { "epoch": 0.8192753496248285, "grad_norm": 0.6696050866883501, "learning_rate": 4.016220600162206e-06, "loss": 0.618, "step": 28061 }, { "epoch": 0.8193045458526759, "grad_norm": 0.6339294292507103, "learning_rate": 4.015571776155718e-06, "loss": 0.5555, "step": 28062 }, { "epoch": 0.8193337420805232, "grad_norm": 0.6386501443927455, "learning_rate": 4.01492295214923e-06, "loss": 0.5731, "step": 28063 }, { "epoch": 0.8193629383083706, "grad_norm": 0.5954940627820041, "learning_rate": 4.014274128142742e-06, "loss": 0.4955, "step": 28064 }, { "epoch": 0.819392134536218, "grad_norm": 0.6363207389437765, "learning_rate": 4.013625304136253e-06, "loss": 0.5483, "step": 28065 }, { "epoch": 0.8194213307640653, "grad_norm": 0.6529021901643737, "learning_rate": 4.012976480129765e-06, "loss": 0.5835, "step": 28066 }, { "epoch": 0.8194505269919127, "grad_norm": 0.6821511067222208, "learning_rate": 4.012327656123277e-06, "loss": 0.6435, "step": 28067 }, { "epoch": 0.81947972321976, "grad_norm": 0.6419431996194099, "learning_rate": 4.011678832116788e-06, "loss": 0.5996, "step": 28068 }, { "epoch": 0.8195089194476074, "grad_norm": 0.5726834871350905, "learning_rate": 4.0110300081103004e-06, "loss": 0.4948, "step": 28069 }, { "epoch": 0.8195381156754548, "grad_norm": 0.6310531142290042, "learning_rate": 4.0103811841038125e-06, "loss": 0.5473, "step": 28070 }, { "epoch": 0.8195673119033021, "grad_norm": 0.6111130344941158, "learning_rate": 4.009732360097324e-06, "loss": 0.5227, "step": 28071 }, { "epoch": 0.8195965081311495, "grad_norm": 0.6665030245125245, "learning_rate": 4.009083536090836e-06, "loss": 0.6235, "step": 28072 }, { "epoch": 0.8196257043589968, "grad_norm": 0.6486735872416111, "learning_rate": 4.008434712084348e-06, "loss": 0.5648, "step": 28073 }, { "epoch": 0.8196549005868442, "grad_norm": 0.6731322793662506, "learning_rate": 4.007785888077859e-06, "loss": 0.6503, "step": 28074 }, { "epoch": 0.8196840968146916, "grad_norm": 0.6620070592955424, "learning_rate": 4.007137064071371e-06, "loss": 0.6543, "step": 28075 }, { "epoch": 0.8197132930425389, "grad_norm": 0.609896003624823, "learning_rate": 4.006488240064882e-06, "loss": 0.5572, "step": 28076 }, { "epoch": 0.8197424892703863, "grad_norm": 0.6819933463453156, "learning_rate": 4.005839416058395e-06, "loss": 0.7142, "step": 28077 }, { "epoch": 0.8197716854982336, "grad_norm": 0.7112647346738649, "learning_rate": 4.005190592051906e-06, "loss": 0.595, "step": 28078 }, { "epoch": 0.819800881726081, "grad_norm": 0.6470241944261677, "learning_rate": 4.004541768045418e-06, "loss": 0.6307, "step": 28079 }, { "epoch": 0.8198300779539284, "grad_norm": 0.640183921318116, "learning_rate": 4.003892944038929e-06, "loss": 0.6097, "step": 28080 }, { "epoch": 0.8198592741817757, "grad_norm": 0.7045314800121105, "learning_rate": 4.003244120032441e-06, "loss": 0.6328, "step": 28081 }, { "epoch": 0.8198884704096231, "grad_norm": 0.6225852689723788, "learning_rate": 4.002595296025953e-06, "loss": 0.5711, "step": 28082 }, { "epoch": 0.8199176666374705, "grad_norm": 0.600729605392711, "learning_rate": 4.0019464720194654e-06, "loss": 0.5393, "step": 28083 }, { "epoch": 0.8199468628653178, "grad_norm": 0.6490697126610624, "learning_rate": 4.001297648012977e-06, "loss": 0.5885, "step": 28084 }, { "epoch": 0.8199760590931652, "grad_norm": 0.6276974670489223, "learning_rate": 4.000648824006489e-06, "loss": 0.5604, "step": 28085 }, { "epoch": 0.8200052553210125, "grad_norm": 0.6819930687010536, "learning_rate": 4.000000000000001e-06, "loss": 0.6478, "step": 28086 }, { "epoch": 0.8200344515488599, "grad_norm": 0.6576323896329351, "learning_rate": 3.999351175993512e-06, "loss": 0.6115, "step": 28087 }, { "epoch": 0.8200636477767073, "grad_norm": 0.7034824472330741, "learning_rate": 3.998702351987024e-06, "loss": 0.6386, "step": 28088 }, { "epoch": 0.8200928440045546, "grad_norm": 0.673070424738551, "learning_rate": 3.998053527980535e-06, "loss": 0.6669, "step": 28089 }, { "epoch": 0.820122040232402, "grad_norm": 0.6397119971564383, "learning_rate": 3.997404703974047e-06, "loss": 0.6165, "step": 28090 }, { "epoch": 0.8201512364602493, "grad_norm": 0.6772612412006843, "learning_rate": 3.996755879967559e-06, "loss": 0.6581, "step": 28091 }, { "epoch": 0.8201804326880967, "grad_norm": 0.6436871756515999, "learning_rate": 3.996107055961071e-06, "loss": 0.5869, "step": 28092 }, { "epoch": 0.8202096289159441, "grad_norm": 0.643068606498514, "learning_rate": 3.995458231954582e-06, "loss": 0.6089, "step": 28093 }, { "epoch": 0.8202388251437914, "grad_norm": 0.6484054692997018, "learning_rate": 3.994809407948094e-06, "loss": 0.6279, "step": 28094 }, { "epoch": 0.8202680213716388, "grad_norm": 0.6023819183751212, "learning_rate": 3.994160583941606e-06, "loss": 0.5468, "step": 28095 }, { "epoch": 0.8202972175994862, "grad_norm": 0.5744370625163986, "learning_rate": 3.993511759935118e-06, "loss": 0.4532, "step": 28096 }, { "epoch": 0.8203264138273335, "grad_norm": 0.6273374666463792, "learning_rate": 3.99286293592863e-06, "loss": 0.563, "step": 28097 }, { "epoch": 0.8203556100551809, "grad_norm": 0.6008395189398938, "learning_rate": 3.992214111922142e-06, "loss": 0.5185, "step": 28098 }, { "epoch": 0.8203848062830282, "grad_norm": 0.6585655102570424, "learning_rate": 3.991565287915653e-06, "loss": 0.6118, "step": 28099 }, { "epoch": 0.8204140025108756, "grad_norm": 0.6833525009630551, "learning_rate": 3.990916463909165e-06, "loss": 0.6077, "step": 28100 }, { "epoch": 0.820443198738723, "grad_norm": 0.6430120674352361, "learning_rate": 3.990267639902677e-06, "loss": 0.5755, "step": 28101 }, { "epoch": 0.8204723949665703, "grad_norm": 0.6176471872133112, "learning_rate": 3.989618815896189e-06, "loss": 0.5296, "step": 28102 }, { "epoch": 0.8205015911944177, "grad_norm": 0.6101567164403433, "learning_rate": 3.9889699918897e-06, "loss": 0.5281, "step": 28103 }, { "epoch": 0.820530787422265, "grad_norm": 0.6261287193347588, "learning_rate": 3.988321167883212e-06, "loss": 0.5724, "step": 28104 }, { "epoch": 0.8205599836501124, "grad_norm": 0.6408923147145498, "learning_rate": 3.987672343876724e-06, "loss": 0.5917, "step": 28105 }, { "epoch": 0.8205891798779598, "grad_norm": 0.675106479004935, "learning_rate": 3.987023519870235e-06, "loss": 0.6604, "step": 28106 }, { "epoch": 0.8206183761058071, "grad_norm": 0.6640635849721698, "learning_rate": 3.986374695863747e-06, "loss": 0.6359, "step": 28107 }, { "epoch": 0.8206475723336545, "grad_norm": 0.6141618887312148, "learning_rate": 3.9857258718572585e-06, "loss": 0.5416, "step": 28108 }, { "epoch": 0.8206767685615018, "grad_norm": 0.6295389284841997, "learning_rate": 3.9850770478507705e-06, "loss": 0.5678, "step": 28109 }, { "epoch": 0.8207059647893492, "grad_norm": 0.6645704743045329, "learning_rate": 3.9844282238442825e-06, "loss": 0.6327, "step": 28110 }, { "epoch": 0.8207351610171966, "grad_norm": 0.6230448816696648, "learning_rate": 3.9837793998377946e-06, "loss": 0.6084, "step": 28111 }, { "epoch": 0.8207643572450439, "grad_norm": 0.6973325495631942, "learning_rate": 3.983130575831306e-06, "loss": 0.65, "step": 28112 }, { "epoch": 0.8207935534728913, "grad_norm": 0.5979695256738277, "learning_rate": 3.982481751824818e-06, "loss": 0.5364, "step": 28113 }, { "epoch": 0.8208227497007387, "grad_norm": 0.624224450056463, "learning_rate": 3.98183292781833e-06, "loss": 0.5886, "step": 28114 }, { "epoch": 0.820851945928586, "grad_norm": 0.6789538906885728, "learning_rate": 3.981184103811842e-06, "loss": 0.6555, "step": 28115 }, { "epoch": 0.8208811421564334, "grad_norm": 0.6448513197294401, "learning_rate": 3.980535279805353e-06, "loss": 0.5799, "step": 28116 }, { "epoch": 0.8209103383842807, "grad_norm": 0.6636522887715506, "learning_rate": 3.979886455798865e-06, "loss": 0.5901, "step": 28117 }, { "epoch": 0.8209395346121281, "grad_norm": 0.7691500292905027, "learning_rate": 3.979237631792376e-06, "loss": 0.5996, "step": 28118 }, { "epoch": 0.8209687308399755, "grad_norm": 0.6708760812589325, "learning_rate": 3.978588807785888e-06, "loss": 0.6248, "step": 28119 }, { "epoch": 0.8209979270678228, "grad_norm": 0.6042992453256819, "learning_rate": 3.9779399837794e-06, "loss": 0.5371, "step": 28120 }, { "epoch": 0.8210271232956702, "grad_norm": 0.6522641269484493, "learning_rate": 3.977291159772912e-06, "loss": 0.619, "step": 28121 }, { "epoch": 0.8210563195235175, "grad_norm": 0.6691262081816209, "learning_rate": 3.9766423357664235e-06, "loss": 0.6403, "step": 28122 }, { "epoch": 0.8210855157513649, "grad_norm": 0.678666221951418, "learning_rate": 3.9759935117599355e-06, "loss": 0.585, "step": 28123 }, { "epoch": 0.8211147119792123, "grad_norm": 0.6032010132581087, "learning_rate": 3.9753446877534475e-06, "loss": 0.5376, "step": 28124 }, { "epoch": 0.8211439082070596, "grad_norm": 0.667509146375348, "learning_rate": 3.974695863746959e-06, "loss": 0.6319, "step": 28125 }, { "epoch": 0.821173104434907, "grad_norm": 0.6688916042304626, "learning_rate": 3.974047039740471e-06, "loss": 0.6307, "step": 28126 }, { "epoch": 0.8212023006627543, "grad_norm": 0.6127299481011874, "learning_rate": 3.973398215733982e-06, "loss": 0.5287, "step": 28127 }, { "epoch": 0.8212314968906017, "grad_norm": 0.6597816543465346, "learning_rate": 3.972749391727495e-06, "loss": 0.5606, "step": 28128 }, { "epoch": 0.8212606931184491, "grad_norm": 0.6770857487195059, "learning_rate": 3.972100567721006e-06, "loss": 0.6513, "step": 28129 }, { "epoch": 0.8212898893462964, "grad_norm": 0.695742424169408, "learning_rate": 3.971451743714518e-06, "loss": 0.7026, "step": 28130 }, { "epoch": 0.8213190855741438, "grad_norm": 0.6044605267834757, "learning_rate": 3.970802919708029e-06, "loss": 0.5576, "step": 28131 }, { "epoch": 0.8213482818019912, "grad_norm": 0.6571927334240044, "learning_rate": 3.970154095701541e-06, "loss": 0.6101, "step": 28132 }, { "epoch": 0.8213774780298385, "grad_norm": 0.6286230847649185, "learning_rate": 3.969505271695053e-06, "loss": 0.5665, "step": 28133 }, { "epoch": 0.8214066742576859, "grad_norm": 0.6593584055486111, "learning_rate": 3.968856447688565e-06, "loss": 0.6003, "step": 28134 }, { "epoch": 0.8214358704855332, "grad_norm": 0.6588130781202594, "learning_rate": 3.9682076236820764e-06, "loss": 0.6251, "step": 28135 }, { "epoch": 0.8214650667133806, "grad_norm": 0.6507569102916678, "learning_rate": 3.9675587996755885e-06, "loss": 0.6024, "step": 28136 }, { "epoch": 0.821494262941228, "grad_norm": 0.6506837292139311, "learning_rate": 3.9669099756691e-06, "loss": 0.6048, "step": 28137 }, { "epoch": 0.8215234591690753, "grad_norm": 0.5908155604196248, "learning_rate": 3.966261151662612e-06, "loss": 0.5028, "step": 28138 }, { "epoch": 0.8215526553969227, "grad_norm": 0.6636715103571518, "learning_rate": 3.965612327656124e-06, "loss": 0.6418, "step": 28139 }, { "epoch": 0.82158185162477, "grad_norm": 0.6423451752716609, "learning_rate": 3.964963503649636e-06, "loss": 0.5992, "step": 28140 }, { "epoch": 0.8216110478526174, "grad_norm": 0.6688700037240858, "learning_rate": 3.964314679643147e-06, "loss": 0.4763, "step": 28141 }, { "epoch": 0.8216402440804648, "grad_norm": 0.6635129642949968, "learning_rate": 3.963665855636659e-06, "loss": 0.6174, "step": 28142 }, { "epoch": 0.8216694403083121, "grad_norm": 0.62039760232236, "learning_rate": 3.963017031630171e-06, "loss": 0.5821, "step": 28143 }, { "epoch": 0.8216986365361595, "grad_norm": 0.6160719718617151, "learning_rate": 3.962368207623682e-06, "loss": 0.537, "step": 28144 }, { "epoch": 0.8217278327640068, "grad_norm": 0.6309895365919668, "learning_rate": 3.961719383617194e-06, "loss": 0.6128, "step": 28145 }, { "epoch": 0.8217570289918542, "grad_norm": 0.6425768187220718, "learning_rate": 3.961070559610705e-06, "loss": 0.5789, "step": 28146 }, { "epoch": 0.8217862252197016, "grad_norm": 0.595352626527962, "learning_rate": 3.960421735604218e-06, "loss": 0.5081, "step": 28147 }, { "epoch": 0.8218154214475489, "grad_norm": 0.6171507639894359, "learning_rate": 3.959772911597729e-06, "loss": 0.5444, "step": 28148 }, { "epoch": 0.8218446176753963, "grad_norm": 0.615554046471414, "learning_rate": 3.9591240875912414e-06, "loss": 0.556, "step": 28149 }, { "epoch": 0.8218738139032437, "grad_norm": 0.6492348875863337, "learning_rate": 3.958475263584753e-06, "loss": 0.6271, "step": 28150 }, { "epoch": 0.821903010131091, "grad_norm": 0.6339651773458698, "learning_rate": 3.957826439578265e-06, "loss": 0.5834, "step": 28151 }, { "epoch": 0.8219322063589384, "grad_norm": 0.6404454821684762, "learning_rate": 3.957177615571777e-06, "loss": 0.6173, "step": 28152 }, { "epoch": 0.8219614025867857, "grad_norm": 0.6222279156350065, "learning_rate": 3.956528791565289e-06, "loss": 0.5938, "step": 28153 }, { "epoch": 0.8219905988146331, "grad_norm": 0.6251893287176844, "learning_rate": 3.9558799675588e-06, "loss": 0.5817, "step": 28154 }, { "epoch": 0.8220197950424805, "grad_norm": 0.7017036479046953, "learning_rate": 3.955231143552312e-06, "loss": 0.6051, "step": 28155 }, { "epoch": 0.8220489912703278, "grad_norm": 0.6511294424867304, "learning_rate": 3.954582319545823e-06, "loss": 0.5843, "step": 28156 }, { "epoch": 0.8220781874981752, "grad_norm": 0.6367867728477208, "learning_rate": 3.953933495539335e-06, "loss": 0.5281, "step": 28157 }, { "epoch": 0.8221073837260225, "grad_norm": 0.6666920239466722, "learning_rate": 3.953284671532847e-06, "loss": 0.6316, "step": 28158 }, { "epoch": 0.8221365799538699, "grad_norm": 0.6356814128307867, "learning_rate": 3.952635847526359e-06, "loss": 0.6061, "step": 28159 }, { "epoch": 0.8221657761817173, "grad_norm": 0.6218757349407702, "learning_rate": 3.95198702351987e-06, "loss": 0.5569, "step": 28160 }, { "epoch": 0.8221949724095647, "grad_norm": 0.6182430025347129, "learning_rate": 3.951338199513382e-06, "loss": 0.5269, "step": 28161 }, { "epoch": 0.8222241686374121, "grad_norm": 0.6755533652441269, "learning_rate": 3.950689375506894e-06, "loss": 0.6184, "step": 28162 }, { "epoch": 0.8222533648652595, "grad_norm": 0.6027064218239375, "learning_rate": 3.950040551500406e-06, "loss": 0.5283, "step": 28163 }, { "epoch": 0.8222825610931068, "grad_norm": 0.6068260132310829, "learning_rate": 3.949391727493918e-06, "loss": 0.5387, "step": 28164 }, { "epoch": 0.8223117573209542, "grad_norm": 0.621417848129225, "learning_rate": 3.948742903487429e-06, "loss": 0.5505, "step": 28165 }, { "epoch": 0.8223409535488015, "grad_norm": 0.6685502760327102, "learning_rate": 3.948094079480942e-06, "loss": 0.6266, "step": 28166 }, { "epoch": 0.8223701497766489, "grad_norm": 0.6132036148746483, "learning_rate": 3.947445255474453e-06, "loss": 0.5488, "step": 28167 }, { "epoch": 0.8223993460044963, "grad_norm": 0.6016772731891604, "learning_rate": 3.946796431467965e-06, "loss": 0.5188, "step": 28168 }, { "epoch": 0.8224285422323436, "grad_norm": 0.6664475570919901, "learning_rate": 3.946147607461476e-06, "loss": 0.6293, "step": 28169 }, { "epoch": 0.822457738460191, "grad_norm": 0.6366971840353564, "learning_rate": 3.945498783454988e-06, "loss": 0.5442, "step": 28170 }, { "epoch": 0.8224869346880384, "grad_norm": 0.6361933139821225, "learning_rate": 3.9448499594485e-06, "loss": 0.6109, "step": 28171 }, { "epoch": 0.8225161309158857, "grad_norm": 0.687598888869388, "learning_rate": 3.944201135442012e-06, "loss": 0.7013, "step": 28172 }, { "epoch": 0.8225453271437331, "grad_norm": 0.6066218755150746, "learning_rate": 3.943552311435523e-06, "loss": 0.5506, "step": 28173 }, { "epoch": 0.8225745233715804, "grad_norm": 0.6707557496358724, "learning_rate": 3.942903487429035e-06, "loss": 0.6356, "step": 28174 }, { "epoch": 0.8226037195994278, "grad_norm": 0.6768897670667313, "learning_rate": 3.9422546634225465e-06, "loss": 0.6211, "step": 28175 }, { "epoch": 0.8226329158272752, "grad_norm": 0.6389043676137879, "learning_rate": 3.9416058394160585e-06, "loss": 0.5881, "step": 28176 }, { "epoch": 0.8226621120551225, "grad_norm": 0.6729315014759236, "learning_rate": 3.940957015409571e-06, "loss": 0.6206, "step": 28177 }, { "epoch": 0.8226913082829699, "grad_norm": 0.6364501521636368, "learning_rate": 3.940308191403083e-06, "loss": 0.5468, "step": 28178 }, { "epoch": 0.8227205045108172, "grad_norm": 0.6817311385656114, "learning_rate": 3.939659367396594e-06, "loss": 0.6696, "step": 28179 }, { "epoch": 0.8227497007386646, "grad_norm": 0.7060122176809523, "learning_rate": 3.939010543390106e-06, "loss": 0.6777, "step": 28180 }, { "epoch": 0.822778896966512, "grad_norm": 0.6580140673000298, "learning_rate": 3.938361719383618e-06, "loss": 0.6376, "step": 28181 }, { "epoch": 0.8228080931943593, "grad_norm": 0.6565248798270267, "learning_rate": 3.937712895377129e-06, "loss": 0.5794, "step": 28182 }, { "epoch": 0.8228372894222067, "grad_norm": 0.624809384603228, "learning_rate": 3.937064071370641e-06, "loss": 0.5743, "step": 28183 }, { "epoch": 0.822866485650054, "grad_norm": 0.64894495959006, "learning_rate": 3.936415247364152e-06, "loss": 0.6076, "step": 28184 }, { "epoch": 0.8228956818779014, "grad_norm": 0.6743956635430679, "learning_rate": 3.935766423357665e-06, "loss": 0.6556, "step": 28185 }, { "epoch": 0.8229248781057488, "grad_norm": 0.6430252239929467, "learning_rate": 3.935117599351176e-06, "loss": 0.6105, "step": 28186 }, { "epoch": 0.8229540743335961, "grad_norm": 0.6154693651523666, "learning_rate": 3.934468775344688e-06, "loss": 0.5839, "step": 28187 }, { "epoch": 0.8229832705614435, "grad_norm": 0.6388316875731086, "learning_rate": 3.9338199513381995e-06, "loss": 0.588, "step": 28188 }, { "epoch": 0.8230124667892909, "grad_norm": 0.659191096208582, "learning_rate": 3.9331711273317115e-06, "loss": 0.6026, "step": 28189 }, { "epoch": 0.8230416630171382, "grad_norm": 0.6660063448044019, "learning_rate": 3.9325223033252235e-06, "loss": 0.6532, "step": 28190 }, { "epoch": 0.8230708592449856, "grad_norm": 0.6393607132697977, "learning_rate": 3.9318734793187356e-06, "loss": 0.568, "step": 28191 }, { "epoch": 0.8231000554728329, "grad_norm": 0.6370447761196046, "learning_rate": 3.931224655312247e-06, "loss": 0.5706, "step": 28192 }, { "epoch": 0.8231292517006803, "grad_norm": 0.6441132779621174, "learning_rate": 3.930575831305759e-06, "loss": 0.5779, "step": 28193 }, { "epoch": 0.8231584479285277, "grad_norm": 0.6572839912573275, "learning_rate": 3.92992700729927e-06, "loss": 0.566, "step": 28194 }, { "epoch": 0.823187644156375, "grad_norm": 0.6327629169668029, "learning_rate": 3.929278183292782e-06, "loss": 0.5657, "step": 28195 }, { "epoch": 0.8232168403842224, "grad_norm": 0.6550521280355784, "learning_rate": 3.928629359286294e-06, "loss": 0.6118, "step": 28196 }, { "epoch": 0.8232460366120697, "grad_norm": 0.6513265614934431, "learning_rate": 3.927980535279805e-06, "loss": 0.5377, "step": 28197 }, { "epoch": 0.8232752328399171, "grad_norm": 0.6169385672987887, "learning_rate": 3.927331711273317e-06, "loss": 0.5606, "step": 28198 }, { "epoch": 0.8233044290677645, "grad_norm": 0.6601197921031604, "learning_rate": 3.926682887266829e-06, "loss": 0.5066, "step": 28199 }, { "epoch": 0.8233336252956118, "grad_norm": 0.694135063768691, "learning_rate": 3.926034063260341e-06, "loss": 0.6733, "step": 28200 }, { "epoch": 0.8233628215234592, "grad_norm": 0.6035852397695434, "learning_rate": 3.9253852392538525e-06, "loss": 0.5263, "step": 28201 }, { "epoch": 0.8233920177513065, "grad_norm": 0.6567584713577058, "learning_rate": 3.9247364152473645e-06, "loss": 0.6145, "step": 28202 }, { "epoch": 0.8234212139791539, "grad_norm": 0.6513519176750894, "learning_rate": 3.924087591240876e-06, "loss": 0.5854, "step": 28203 }, { "epoch": 0.8234504102070013, "grad_norm": 0.6527956353861756, "learning_rate": 3.9234387672343885e-06, "loss": 0.5982, "step": 28204 }, { "epoch": 0.8234796064348486, "grad_norm": 0.6669514036077275, "learning_rate": 3.9227899432279e-06, "loss": 0.6245, "step": 28205 }, { "epoch": 0.823508802662696, "grad_norm": 0.6151236571975496, "learning_rate": 3.922141119221412e-06, "loss": 0.5532, "step": 28206 }, { "epoch": 0.8235379988905434, "grad_norm": 0.6626795166300786, "learning_rate": 3.921492295214923e-06, "loss": 0.5929, "step": 28207 }, { "epoch": 0.8235671951183907, "grad_norm": 0.6471208355935242, "learning_rate": 3.920843471208435e-06, "loss": 0.6139, "step": 28208 }, { "epoch": 0.8235963913462381, "grad_norm": 0.6040807979976811, "learning_rate": 3.920194647201947e-06, "loss": 0.5397, "step": 28209 }, { "epoch": 0.8236255875740854, "grad_norm": 0.6114522568945661, "learning_rate": 3.919545823195459e-06, "loss": 0.5271, "step": 28210 }, { "epoch": 0.8236547838019328, "grad_norm": 0.7724729258302448, "learning_rate": 3.91889699918897e-06, "loss": 0.6584, "step": 28211 }, { "epoch": 0.8236839800297802, "grad_norm": 0.5844448943033105, "learning_rate": 3.918248175182482e-06, "loss": 0.5026, "step": 28212 }, { "epoch": 0.8237131762576275, "grad_norm": 0.6713808230803828, "learning_rate": 3.917599351175994e-06, "loss": 0.6131, "step": 28213 }, { "epoch": 0.8237423724854749, "grad_norm": 0.6656913731211048, "learning_rate": 3.916950527169505e-06, "loss": 0.5996, "step": 28214 }, { "epoch": 0.8237715687133222, "grad_norm": 0.6487475702848922, "learning_rate": 3.9163017031630174e-06, "loss": 0.5962, "step": 28215 }, { "epoch": 0.8238007649411696, "grad_norm": 0.6706149820996518, "learning_rate": 3.915652879156529e-06, "loss": 0.6335, "step": 28216 }, { "epoch": 0.823829961169017, "grad_norm": 0.6401361751821126, "learning_rate": 3.915004055150041e-06, "loss": 0.5367, "step": 28217 }, { "epoch": 0.8238591573968643, "grad_norm": 0.6307156145521294, "learning_rate": 3.914355231143553e-06, "loss": 0.6077, "step": 28218 }, { "epoch": 0.8238883536247117, "grad_norm": 0.6612253400142641, "learning_rate": 3.913706407137065e-06, "loss": 0.5999, "step": 28219 }, { "epoch": 0.823917549852559, "grad_norm": 0.6293840245411406, "learning_rate": 3.913057583130576e-06, "loss": 0.5667, "step": 28220 }, { "epoch": 0.8239467460804064, "grad_norm": 0.6224043507440356, "learning_rate": 3.912408759124088e-06, "loss": 0.5941, "step": 28221 }, { "epoch": 0.8239759423082538, "grad_norm": 0.578702299708484, "learning_rate": 3.911759935117599e-06, "loss": 0.5191, "step": 28222 }, { "epoch": 0.8240051385361011, "grad_norm": 0.6371195036821816, "learning_rate": 3.911111111111112e-06, "loss": 0.5599, "step": 28223 }, { "epoch": 0.8240343347639485, "grad_norm": 0.6057261440059974, "learning_rate": 3.910462287104623e-06, "loss": 0.5406, "step": 28224 }, { "epoch": 0.8240635309917959, "grad_norm": 0.6186029222704392, "learning_rate": 3.909813463098135e-06, "loss": 0.5917, "step": 28225 }, { "epoch": 0.8240927272196432, "grad_norm": 0.6378779794443832, "learning_rate": 3.909164639091646e-06, "loss": 0.5583, "step": 28226 }, { "epoch": 0.8241219234474906, "grad_norm": 0.6718949389632328, "learning_rate": 3.908515815085158e-06, "loss": 0.6649, "step": 28227 }, { "epoch": 0.8241511196753379, "grad_norm": 0.6173024621490669, "learning_rate": 3.90786699107867e-06, "loss": 0.5915, "step": 28228 }, { "epoch": 0.8241803159031853, "grad_norm": 0.6117445820276499, "learning_rate": 3.9072181670721824e-06, "loss": 0.5872, "step": 28229 }, { "epoch": 0.8242095121310327, "grad_norm": 0.6843574657663016, "learning_rate": 3.906569343065694e-06, "loss": 0.6657, "step": 28230 }, { "epoch": 0.82423870835888, "grad_norm": 0.6482262984266138, "learning_rate": 3.905920519059206e-06, "loss": 0.6264, "step": 28231 }, { "epoch": 0.8242679045867274, "grad_norm": 0.6114400251947556, "learning_rate": 3.905271695052718e-06, "loss": 0.5749, "step": 28232 }, { "epoch": 0.8242971008145747, "grad_norm": 0.6288668172832849, "learning_rate": 3.904622871046229e-06, "loss": 0.5928, "step": 28233 }, { "epoch": 0.8243262970424221, "grad_norm": 0.6552243262314175, "learning_rate": 3.903974047039741e-06, "loss": 0.5945, "step": 28234 }, { "epoch": 0.8243554932702695, "grad_norm": 0.646114951799124, "learning_rate": 3.903325223033252e-06, "loss": 0.6269, "step": 28235 }, { "epoch": 0.8243846894981168, "grad_norm": 0.6679517122453684, "learning_rate": 3.902676399026764e-06, "loss": 0.6201, "step": 28236 }, { "epoch": 0.8244138857259642, "grad_norm": 0.5969629812156516, "learning_rate": 3.902027575020276e-06, "loss": 0.5326, "step": 28237 }, { "epoch": 0.8244430819538116, "grad_norm": 0.6673250769801342, "learning_rate": 3.901378751013788e-06, "loss": 0.628, "step": 28238 }, { "epoch": 0.8244722781816589, "grad_norm": 0.6390625382274643, "learning_rate": 3.900729927007299e-06, "loss": 0.6021, "step": 28239 }, { "epoch": 0.8245014744095063, "grad_norm": 0.6512702325543025, "learning_rate": 3.900081103000811e-06, "loss": 0.6526, "step": 28240 }, { "epoch": 0.8245306706373536, "grad_norm": 0.60997684962216, "learning_rate": 3.8994322789943225e-06, "loss": 0.5468, "step": 28241 }, { "epoch": 0.824559866865201, "grad_norm": 0.6743759251880143, "learning_rate": 3.898783454987835e-06, "loss": 0.6032, "step": 28242 }, { "epoch": 0.8245890630930484, "grad_norm": 0.6156183154642839, "learning_rate": 3.898134630981347e-06, "loss": 0.5092, "step": 28243 }, { "epoch": 0.8246182593208957, "grad_norm": 0.6098280230646785, "learning_rate": 3.897485806974859e-06, "loss": 0.5491, "step": 28244 }, { "epoch": 0.8246474555487431, "grad_norm": 0.6698331823724174, "learning_rate": 3.89683698296837e-06, "loss": 0.609, "step": 28245 }, { "epoch": 0.8246766517765904, "grad_norm": 0.6692657092977712, "learning_rate": 3.896188158961882e-06, "loss": 0.6616, "step": 28246 }, { "epoch": 0.8247058480044378, "grad_norm": 0.6276542133534946, "learning_rate": 3.895539334955394e-06, "loss": 0.514, "step": 28247 }, { "epoch": 0.8247350442322852, "grad_norm": 0.6234513907149671, "learning_rate": 3.894890510948906e-06, "loss": 0.5637, "step": 28248 }, { "epoch": 0.8247642404601325, "grad_norm": 0.6244333468774443, "learning_rate": 3.894241686942417e-06, "loss": 0.5472, "step": 28249 }, { "epoch": 0.8247934366879799, "grad_norm": 0.6373407510154698, "learning_rate": 3.893592862935929e-06, "loss": 0.5514, "step": 28250 }, { "epoch": 0.8248226329158272, "grad_norm": 0.6209655864150067, "learning_rate": 3.892944038929441e-06, "loss": 0.5271, "step": 28251 }, { "epoch": 0.8248518291436746, "grad_norm": 0.650458122953065, "learning_rate": 3.892295214922952e-06, "loss": 0.6055, "step": 28252 }, { "epoch": 0.824881025371522, "grad_norm": 0.6214184288783955, "learning_rate": 3.891646390916464e-06, "loss": 0.5531, "step": 28253 }, { "epoch": 0.8249102215993693, "grad_norm": 0.5923587708853781, "learning_rate": 3.8909975669099755e-06, "loss": 0.5216, "step": 28254 }, { "epoch": 0.8249394178272167, "grad_norm": 0.622253669400847, "learning_rate": 3.8903487429034875e-06, "loss": 0.5933, "step": 28255 }, { "epoch": 0.824968614055064, "grad_norm": 0.6396054450084734, "learning_rate": 3.8896999188969995e-06, "loss": 0.5785, "step": 28256 }, { "epoch": 0.8249978102829114, "grad_norm": 0.6385652799267213, "learning_rate": 3.889051094890512e-06, "loss": 0.5494, "step": 28257 }, { "epoch": 0.8250270065107588, "grad_norm": 0.6722441389306355, "learning_rate": 3.888402270884023e-06, "loss": 0.6577, "step": 28258 }, { "epoch": 0.8250562027386061, "grad_norm": 0.6565242412047279, "learning_rate": 3.887753446877535e-06, "loss": 0.5627, "step": 28259 }, { "epoch": 0.8250853989664535, "grad_norm": 0.7139286959395525, "learning_rate": 3.887104622871046e-06, "loss": 0.7371, "step": 28260 }, { "epoch": 0.8251145951943009, "grad_norm": 0.683579717992611, "learning_rate": 3.886455798864559e-06, "loss": 0.6456, "step": 28261 }, { "epoch": 0.8251437914221482, "grad_norm": 0.7183345315370797, "learning_rate": 3.88580697485807e-06, "loss": 0.6848, "step": 28262 }, { "epoch": 0.8251729876499956, "grad_norm": 0.7075555729848406, "learning_rate": 3.885158150851582e-06, "loss": 0.7066, "step": 28263 }, { "epoch": 0.8252021838778429, "grad_norm": 0.6273141776844451, "learning_rate": 3.884509326845093e-06, "loss": 0.6064, "step": 28264 }, { "epoch": 0.8252313801056903, "grad_norm": 0.6552109355887682, "learning_rate": 3.883860502838605e-06, "loss": 0.5897, "step": 28265 }, { "epoch": 0.8252605763335377, "grad_norm": 0.6688005896023341, "learning_rate": 3.883211678832117e-06, "loss": 0.6631, "step": 28266 }, { "epoch": 0.825289772561385, "grad_norm": 0.6412138150904783, "learning_rate": 3.882562854825629e-06, "loss": 0.6234, "step": 28267 }, { "epoch": 0.8253189687892324, "grad_norm": 0.6523830208875214, "learning_rate": 3.8819140308191405e-06, "loss": 0.602, "step": 28268 }, { "epoch": 0.8253481650170797, "grad_norm": 0.6879862050635429, "learning_rate": 3.8812652068126525e-06, "loss": 0.6184, "step": 28269 }, { "epoch": 0.8253773612449271, "grad_norm": 0.6679290441222383, "learning_rate": 3.8806163828061645e-06, "loss": 0.6396, "step": 28270 }, { "epoch": 0.8254065574727745, "grad_norm": 0.6311029425903238, "learning_rate": 3.879967558799676e-06, "loss": 0.6062, "step": 28271 }, { "epoch": 0.8254357537006218, "grad_norm": 0.6566106734356871, "learning_rate": 3.879318734793188e-06, "loss": 0.6016, "step": 28272 }, { "epoch": 0.8254649499284692, "grad_norm": 0.6252459432462136, "learning_rate": 3.878669910786699e-06, "loss": 0.5378, "step": 28273 }, { "epoch": 0.8254941461563166, "grad_norm": 0.6459184067336415, "learning_rate": 3.878021086780211e-06, "loss": 0.6071, "step": 28274 }, { "epoch": 0.8255233423841639, "grad_norm": 0.6544247869164618, "learning_rate": 3.877372262773723e-06, "loss": 0.5724, "step": 28275 }, { "epoch": 0.8255525386120113, "grad_norm": 0.6474473267850391, "learning_rate": 3.876723438767235e-06, "loss": 0.6384, "step": 28276 }, { "epoch": 0.8255817348398586, "grad_norm": 0.705868091265635, "learning_rate": 3.876074614760746e-06, "loss": 0.6577, "step": 28277 }, { "epoch": 0.825610931067706, "grad_norm": 0.6650099801348548, "learning_rate": 3.875425790754258e-06, "loss": 0.632, "step": 28278 }, { "epoch": 0.8256401272955534, "grad_norm": 0.6467275788731564, "learning_rate": 3.874776966747769e-06, "loss": 0.6024, "step": 28279 }, { "epoch": 0.8256693235234007, "grad_norm": 0.6254903099785039, "learning_rate": 3.874128142741282e-06, "loss": 0.5692, "step": 28280 }, { "epoch": 0.8256985197512481, "grad_norm": 0.625313571879366, "learning_rate": 3.8734793187347935e-06, "loss": 0.5961, "step": 28281 }, { "epoch": 0.8257277159790956, "grad_norm": 0.6190836300224358, "learning_rate": 3.8728304947283055e-06, "loss": 0.5634, "step": 28282 }, { "epoch": 0.8257569122069429, "grad_norm": 0.630178128990555, "learning_rate": 3.872181670721817e-06, "loss": 0.582, "step": 28283 }, { "epoch": 0.8257861084347903, "grad_norm": 0.6106718687741819, "learning_rate": 3.871532846715329e-06, "loss": 0.5332, "step": 28284 }, { "epoch": 0.8258153046626376, "grad_norm": 0.6620495198695315, "learning_rate": 3.870884022708841e-06, "loss": 0.6536, "step": 28285 }, { "epoch": 0.825844500890485, "grad_norm": 0.6658482660776994, "learning_rate": 3.870235198702352e-06, "loss": 0.595, "step": 28286 }, { "epoch": 0.8258736971183324, "grad_norm": 0.6318004030897292, "learning_rate": 3.869586374695864e-06, "loss": 0.5996, "step": 28287 }, { "epoch": 0.8259028933461797, "grad_norm": 0.7082381060720151, "learning_rate": 3.868937550689376e-06, "loss": 0.707, "step": 28288 }, { "epoch": 0.8259320895740271, "grad_norm": 0.6712468913021697, "learning_rate": 3.868288726682888e-06, "loss": 0.5674, "step": 28289 }, { "epoch": 0.8259612858018744, "grad_norm": 0.690194939710148, "learning_rate": 3.867639902676399e-06, "loss": 0.6388, "step": 28290 }, { "epoch": 0.8259904820297218, "grad_norm": 0.6747606703284575, "learning_rate": 3.866991078669911e-06, "loss": 0.6593, "step": 28291 }, { "epoch": 0.8260196782575692, "grad_norm": 0.6576480457325983, "learning_rate": 3.866342254663422e-06, "loss": 0.5974, "step": 28292 }, { "epoch": 0.8260488744854165, "grad_norm": 0.6532766972698821, "learning_rate": 3.865693430656934e-06, "loss": 0.6499, "step": 28293 }, { "epoch": 0.8260780707132639, "grad_norm": 0.6165002334318186, "learning_rate": 3.865044606650446e-06, "loss": 0.5567, "step": 28294 }, { "epoch": 0.8261072669411113, "grad_norm": 0.6366715494924744, "learning_rate": 3.8643957826439584e-06, "loss": 0.5702, "step": 28295 }, { "epoch": 0.8261364631689586, "grad_norm": 0.6622250006175008, "learning_rate": 3.86374695863747e-06, "loss": 0.658, "step": 28296 }, { "epoch": 0.826165659396806, "grad_norm": 0.6590855174886805, "learning_rate": 3.863098134630982e-06, "loss": 0.5995, "step": 28297 }, { "epoch": 0.8261948556246533, "grad_norm": 0.644228629151925, "learning_rate": 3.862449310624494e-06, "loss": 0.6163, "step": 28298 }, { "epoch": 0.8262240518525007, "grad_norm": 0.6119103714615763, "learning_rate": 3.861800486618006e-06, "loss": 0.5584, "step": 28299 }, { "epoch": 0.8262532480803481, "grad_norm": 0.7057106889040929, "learning_rate": 3.861151662611517e-06, "loss": 0.6072, "step": 28300 }, { "epoch": 0.8262824443081954, "grad_norm": 0.6739462537803371, "learning_rate": 3.860502838605029e-06, "loss": 0.6302, "step": 28301 }, { "epoch": 0.8263116405360428, "grad_norm": 0.6661491786472215, "learning_rate": 3.85985401459854e-06, "loss": 0.613, "step": 28302 }, { "epoch": 0.8263408367638901, "grad_norm": 0.6448439804158127, "learning_rate": 3.859205190592052e-06, "loss": 0.5542, "step": 28303 }, { "epoch": 0.8263700329917375, "grad_norm": 0.6354217155888284, "learning_rate": 3.858556366585564e-06, "loss": 0.5739, "step": 28304 }, { "epoch": 0.8263992292195849, "grad_norm": 0.5777755009038968, "learning_rate": 3.857907542579075e-06, "loss": 0.4793, "step": 28305 }, { "epoch": 0.8264284254474322, "grad_norm": 0.6288200081046472, "learning_rate": 3.857258718572587e-06, "loss": 0.5919, "step": 28306 }, { "epoch": 0.8264576216752796, "grad_norm": 0.6342561401741873, "learning_rate": 3.856609894566099e-06, "loss": 0.5694, "step": 28307 }, { "epoch": 0.826486817903127, "grad_norm": 0.6396732213689416, "learning_rate": 3.855961070559611e-06, "loss": 0.598, "step": 28308 }, { "epoch": 0.8265160141309743, "grad_norm": 0.5858659978790999, "learning_rate": 3.855312246553123e-06, "loss": 0.5288, "step": 28309 }, { "epoch": 0.8265452103588217, "grad_norm": 0.6685197291968353, "learning_rate": 3.854663422546635e-06, "loss": 0.5907, "step": 28310 }, { "epoch": 0.826574406586669, "grad_norm": 0.6427073092962269, "learning_rate": 3.854014598540146e-06, "loss": 0.5566, "step": 28311 }, { "epoch": 0.8266036028145164, "grad_norm": 0.6207257457793564, "learning_rate": 3.853365774533658e-06, "loss": 0.5475, "step": 28312 }, { "epoch": 0.8266327990423638, "grad_norm": 0.6249996108641866, "learning_rate": 3.85271695052717e-06, "loss": 0.5419, "step": 28313 }, { "epoch": 0.8266619952702111, "grad_norm": 0.6569463474560001, "learning_rate": 3.852068126520682e-06, "loss": 0.6558, "step": 28314 }, { "epoch": 0.8266911914980585, "grad_norm": 0.6033057265744519, "learning_rate": 3.851419302514193e-06, "loss": 0.5184, "step": 28315 }, { "epoch": 0.8267203877259058, "grad_norm": 0.6574573272919134, "learning_rate": 3.850770478507705e-06, "loss": 0.6206, "step": 28316 }, { "epoch": 0.8267495839537532, "grad_norm": 0.6473336089521484, "learning_rate": 3.850121654501217e-06, "loss": 0.5847, "step": 28317 }, { "epoch": 0.8267787801816006, "grad_norm": 0.6474059897323166, "learning_rate": 3.849472830494729e-06, "loss": 0.5399, "step": 28318 }, { "epoch": 0.8268079764094479, "grad_norm": 0.6308353737507534, "learning_rate": 3.84882400648824e-06, "loss": 0.5208, "step": 28319 }, { "epoch": 0.8268371726372953, "grad_norm": 0.6757498516462431, "learning_rate": 3.848175182481752e-06, "loss": 0.6062, "step": 28320 }, { "epoch": 0.8268663688651426, "grad_norm": 0.6394348163431698, "learning_rate": 3.8475263584752635e-06, "loss": 0.5496, "step": 28321 }, { "epoch": 0.82689556509299, "grad_norm": 0.6900418124376215, "learning_rate": 3.8468775344687756e-06, "loss": 0.6166, "step": 28322 }, { "epoch": 0.8269247613208374, "grad_norm": 0.688543211473762, "learning_rate": 3.846228710462288e-06, "loss": 0.6471, "step": 28323 }, { "epoch": 0.8269539575486847, "grad_norm": 0.8560793732622312, "learning_rate": 3.845579886455799e-06, "loss": 0.6989, "step": 28324 }, { "epoch": 0.8269831537765321, "grad_norm": 0.6287379998896322, "learning_rate": 3.844931062449311e-06, "loss": 0.5459, "step": 28325 }, { "epoch": 0.8270123500043794, "grad_norm": 0.6856061560257082, "learning_rate": 3.844282238442823e-06, "loss": 0.6764, "step": 28326 }, { "epoch": 0.8270415462322268, "grad_norm": 0.6108824342922564, "learning_rate": 3.843633414436335e-06, "loss": 0.5342, "step": 28327 }, { "epoch": 0.8270707424600742, "grad_norm": 0.6334118461295313, "learning_rate": 3.842984590429846e-06, "loss": 0.561, "step": 28328 }, { "epoch": 0.8270999386879215, "grad_norm": 0.641781661709096, "learning_rate": 3.842335766423358e-06, "loss": 0.5834, "step": 28329 }, { "epoch": 0.8271291349157689, "grad_norm": 0.6201178720706941, "learning_rate": 3.841686942416869e-06, "loss": 0.5318, "step": 28330 }, { "epoch": 0.8271583311436163, "grad_norm": 0.7576047317229976, "learning_rate": 3.841038118410382e-06, "loss": 0.6757, "step": 28331 }, { "epoch": 0.8271875273714636, "grad_norm": 0.6859464293083392, "learning_rate": 3.840389294403893e-06, "loss": 0.6213, "step": 28332 }, { "epoch": 0.827216723599311, "grad_norm": 0.6882402814579089, "learning_rate": 3.839740470397405e-06, "loss": 0.6378, "step": 28333 }, { "epoch": 0.8272459198271583, "grad_norm": 0.6467441469905241, "learning_rate": 3.8390916463909165e-06, "loss": 0.5818, "step": 28334 }, { "epoch": 0.8272751160550057, "grad_norm": 0.6517966106492508, "learning_rate": 3.8384428223844285e-06, "loss": 0.64, "step": 28335 }, { "epoch": 0.8273043122828531, "grad_norm": 0.699647021106487, "learning_rate": 3.8377939983779405e-06, "loss": 0.6905, "step": 28336 }, { "epoch": 0.8273335085107004, "grad_norm": 0.6598530617006193, "learning_rate": 3.837145174371453e-06, "loss": 0.6686, "step": 28337 }, { "epoch": 0.8273627047385478, "grad_norm": 0.6133779873567742, "learning_rate": 3.836496350364964e-06, "loss": 0.539, "step": 28338 }, { "epoch": 0.8273919009663951, "grad_norm": 0.6519577165598177, "learning_rate": 3.835847526358476e-06, "loss": 0.6302, "step": 28339 }, { "epoch": 0.8274210971942425, "grad_norm": 0.6635305949554102, "learning_rate": 3.835198702351987e-06, "loss": 0.6068, "step": 28340 }, { "epoch": 0.8274502934220899, "grad_norm": 0.5952792178256202, "learning_rate": 3.834549878345499e-06, "loss": 0.5432, "step": 28341 }, { "epoch": 0.8274794896499372, "grad_norm": 0.6452166901038507, "learning_rate": 3.833901054339011e-06, "loss": 0.6032, "step": 28342 }, { "epoch": 0.8275086858777846, "grad_norm": 0.6519080956037901, "learning_rate": 3.833252230332522e-06, "loss": 0.6141, "step": 28343 }, { "epoch": 0.827537882105632, "grad_norm": 0.6265802436541849, "learning_rate": 3.832603406326034e-06, "loss": 0.6052, "step": 28344 }, { "epoch": 0.8275670783334793, "grad_norm": 0.6196686749674672, "learning_rate": 3.831954582319546e-06, "loss": 0.5472, "step": 28345 }, { "epoch": 0.8275962745613267, "grad_norm": 0.636668406960344, "learning_rate": 3.831305758313058e-06, "loss": 0.5717, "step": 28346 }, { "epoch": 0.827625470789174, "grad_norm": 0.627509801366884, "learning_rate": 3.8306569343065695e-06, "loss": 0.5927, "step": 28347 }, { "epoch": 0.8276546670170214, "grad_norm": 0.603470005967785, "learning_rate": 3.8300081103000815e-06, "loss": 0.5614, "step": 28348 }, { "epoch": 0.8276838632448688, "grad_norm": 0.6330930276091581, "learning_rate": 3.829359286293593e-06, "loss": 0.6064, "step": 28349 }, { "epoch": 0.8277130594727161, "grad_norm": 0.6402664624420604, "learning_rate": 3.8287104622871055e-06, "loss": 0.571, "step": 28350 }, { "epoch": 0.8277422557005635, "grad_norm": 0.6434132636287742, "learning_rate": 3.828061638280617e-06, "loss": 0.5766, "step": 28351 }, { "epoch": 0.8277714519284108, "grad_norm": 0.6599802083580286, "learning_rate": 3.827412814274129e-06, "loss": 0.5749, "step": 28352 }, { "epoch": 0.8278006481562582, "grad_norm": 0.7197988921424522, "learning_rate": 3.82676399026764e-06, "loss": 0.6707, "step": 28353 }, { "epoch": 0.8278298443841056, "grad_norm": 0.6692353723391404, "learning_rate": 3.826115166261152e-06, "loss": 0.6454, "step": 28354 }, { "epoch": 0.8278590406119529, "grad_norm": 0.6695600851275545, "learning_rate": 3.825466342254664e-06, "loss": 0.5773, "step": 28355 }, { "epoch": 0.8278882368398003, "grad_norm": 0.6806223019148891, "learning_rate": 3.824817518248176e-06, "loss": 0.5671, "step": 28356 }, { "epoch": 0.8279174330676476, "grad_norm": 0.7265159908614448, "learning_rate": 3.824168694241687e-06, "loss": 0.7152, "step": 28357 }, { "epoch": 0.827946629295495, "grad_norm": 0.6820858903530344, "learning_rate": 3.823519870235199e-06, "loss": 0.6269, "step": 28358 }, { "epoch": 0.8279758255233424, "grad_norm": 0.5975368336482813, "learning_rate": 3.82287104622871e-06, "loss": 0.5225, "step": 28359 }, { "epoch": 0.8280050217511897, "grad_norm": 0.6321447104594039, "learning_rate": 3.8222222222222224e-06, "loss": 0.5937, "step": 28360 }, { "epoch": 0.8280342179790371, "grad_norm": 0.6416258880712187, "learning_rate": 3.8215733982157345e-06, "loss": 0.562, "step": 28361 }, { "epoch": 0.8280634142068845, "grad_norm": 0.6890224289432341, "learning_rate": 3.820924574209246e-06, "loss": 0.6789, "step": 28362 }, { "epoch": 0.8280926104347318, "grad_norm": 0.667797732179214, "learning_rate": 3.820275750202758e-06, "loss": 0.5867, "step": 28363 }, { "epoch": 0.8281218066625792, "grad_norm": 0.6326207811767908, "learning_rate": 3.81962692619627e-06, "loss": 0.5582, "step": 28364 }, { "epoch": 0.8281510028904265, "grad_norm": 0.6411916353669583, "learning_rate": 3.818978102189782e-06, "loss": 0.578, "step": 28365 }, { "epoch": 0.8281801991182739, "grad_norm": 0.6165000446081518, "learning_rate": 3.818329278183293e-06, "loss": 0.5558, "step": 28366 }, { "epoch": 0.8282093953461213, "grad_norm": 0.6635806932345505, "learning_rate": 3.817680454176805e-06, "loss": 0.6216, "step": 28367 }, { "epoch": 0.8282385915739686, "grad_norm": 0.6052902328683634, "learning_rate": 3.817031630170316e-06, "loss": 0.5381, "step": 28368 }, { "epoch": 0.828267787801816, "grad_norm": 0.6469885940809484, "learning_rate": 3.816382806163829e-06, "loss": 0.5819, "step": 28369 }, { "epoch": 0.8282969840296633, "grad_norm": 0.6298118364084557, "learning_rate": 3.81573398215734e-06, "loss": 0.5885, "step": 28370 }, { "epoch": 0.8283261802575107, "grad_norm": 0.633443321243118, "learning_rate": 3.815085158150852e-06, "loss": 0.5769, "step": 28371 }, { "epoch": 0.8283553764853581, "grad_norm": 0.6417730753483438, "learning_rate": 3.8144363341443634e-06, "loss": 0.6057, "step": 28372 }, { "epoch": 0.8283845727132054, "grad_norm": 0.6812210686639876, "learning_rate": 3.8137875101378754e-06, "loss": 0.6438, "step": 28373 }, { "epoch": 0.8284137689410528, "grad_norm": 0.6531989457656269, "learning_rate": 3.8131386861313874e-06, "loss": 0.6503, "step": 28374 }, { "epoch": 0.8284429651689001, "grad_norm": 0.6724792604171986, "learning_rate": 3.812489862124899e-06, "loss": 0.696, "step": 28375 }, { "epoch": 0.8284721613967475, "grad_norm": 0.6722489799214728, "learning_rate": 3.8118410381184106e-06, "loss": 0.6398, "step": 28376 }, { "epoch": 0.8285013576245949, "grad_norm": 0.6827027840163313, "learning_rate": 3.8111922141119222e-06, "loss": 0.6811, "step": 28377 }, { "epoch": 0.8285305538524422, "grad_norm": 0.6151191786342798, "learning_rate": 3.810543390105434e-06, "loss": 0.5563, "step": 28378 }, { "epoch": 0.8285597500802896, "grad_norm": 0.6356491578251834, "learning_rate": 3.8098945660989463e-06, "loss": 0.6164, "step": 28379 }, { "epoch": 0.828588946308137, "grad_norm": 0.6676206755484488, "learning_rate": 3.809245742092458e-06, "loss": 0.6775, "step": 28380 }, { "epoch": 0.8286181425359843, "grad_norm": 0.742547980259831, "learning_rate": 3.8085969180859695e-06, "loss": 0.7048, "step": 28381 }, { "epoch": 0.8286473387638317, "grad_norm": 0.6350937641989286, "learning_rate": 3.807948094079481e-06, "loss": 0.538, "step": 28382 }, { "epoch": 0.828676534991679, "grad_norm": 0.6670621906902309, "learning_rate": 3.807299270072993e-06, "loss": 0.6326, "step": 28383 }, { "epoch": 0.8287057312195264, "grad_norm": 0.676367124620672, "learning_rate": 3.8066504460665047e-06, "loss": 0.6491, "step": 28384 }, { "epoch": 0.8287349274473738, "grad_norm": 0.7061550741548533, "learning_rate": 3.8060016220600168e-06, "loss": 0.6184, "step": 28385 }, { "epoch": 0.8287641236752211, "grad_norm": 1.0968445265492812, "learning_rate": 3.8053527980535284e-06, "loss": 0.6255, "step": 28386 }, { "epoch": 0.8287933199030685, "grad_norm": 0.6599702229103523, "learning_rate": 3.80470397404704e-06, "loss": 0.605, "step": 28387 }, { "epoch": 0.8288225161309158, "grad_norm": 0.6636849902937586, "learning_rate": 3.804055150040552e-06, "loss": 0.6493, "step": 28388 }, { "epoch": 0.8288517123587632, "grad_norm": 0.7069925966133681, "learning_rate": 3.8034063260340636e-06, "loss": 0.6685, "step": 28389 }, { "epoch": 0.8288809085866106, "grad_norm": 0.6254751878361707, "learning_rate": 3.802757502027575e-06, "loss": 0.5581, "step": 28390 }, { "epoch": 0.8289101048144579, "grad_norm": 0.6499174605502639, "learning_rate": 3.802108678021087e-06, "loss": 0.6343, "step": 28391 }, { "epoch": 0.8289393010423053, "grad_norm": 0.6012061466081225, "learning_rate": 3.8014598540145984e-06, "loss": 0.5333, "step": 28392 }, { "epoch": 0.8289684972701526, "grad_norm": 0.6482394156257171, "learning_rate": 3.800811030008111e-06, "loss": 0.5969, "step": 28393 }, { "epoch": 0.828997693498, "grad_norm": 0.6684589746793459, "learning_rate": 3.8001622060016225e-06, "loss": 0.6287, "step": 28394 }, { "epoch": 0.8290268897258474, "grad_norm": 0.6171135261740398, "learning_rate": 3.799513381995134e-06, "loss": 0.561, "step": 28395 }, { "epoch": 0.8290560859536947, "grad_norm": 0.6315687617259828, "learning_rate": 3.7988645579886457e-06, "loss": 0.6097, "step": 28396 }, { "epoch": 0.8290852821815421, "grad_norm": 0.6858560217489919, "learning_rate": 3.798215733982158e-06, "loss": 0.6315, "step": 28397 }, { "epoch": 0.8291144784093895, "grad_norm": 0.6229879624342545, "learning_rate": 3.7975669099756697e-06, "loss": 0.5739, "step": 28398 }, { "epoch": 0.8291436746372368, "grad_norm": 0.5872265329679869, "learning_rate": 3.7969180859691813e-06, "loss": 0.4979, "step": 28399 }, { "epoch": 0.8291728708650842, "grad_norm": 0.628331472193754, "learning_rate": 3.796269261962693e-06, "loss": 0.5729, "step": 28400 }, { "epoch": 0.8292020670929315, "grad_norm": 0.65142875222314, "learning_rate": 3.7956204379562045e-06, "loss": 0.6301, "step": 28401 }, { "epoch": 0.829231263320779, "grad_norm": 0.6507593315500491, "learning_rate": 3.7949716139497166e-06, "loss": 0.612, "step": 28402 }, { "epoch": 0.8292604595486264, "grad_norm": 0.6085217954516744, "learning_rate": 3.794322789943228e-06, "loss": 0.5575, "step": 28403 }, { "epoch": 0.8292896557764737, "grad_norm": 0.6815779070909249, "learning_rate": 3.79367396593674e-06, "loss": 0.6697, "step": 28404 }, { "epoch": 0.8293188520043211, "grad_norm": 0.6348533677364869, "learning_rate": 3.793025141930252e-06, "loss": 0.555, "step": 28405 }, { "epoch": 0.8293480482321685, "grad_norm": 0.6700989945549809, "learning_rate": 3.7923763179237634e-06, "loss": 0.607, "step": 28406 }, { "epoch": 0.8293772444600158, "grad_norm": 0.634702466983042, "learning_rate": 3.7917274939172754e-06, "loss": 0.5325, "step": 28407 }, { "epoch": 0.8294064406878632, "grad_norm": 0.6768159824625299, "learning_rate": 3.791078669910787e-06, "loss": 0.6419, "step": 28408 }, { "epoch": 0.8294356369157105, "grad_norm": 0.6521217437740148, "learning_rate": 3.7904298459042986e-06, "loss": 0.6017, "step": 28409 }, { "epoch": 0.8294648331435579, "grad_norm": 0.6538313682749678, "learning_rate": 3.7897810218978102e-06, "loss": 0.6132, "step": 28410 }, { "epoch": 0.8294940293714053, "grad_norm": 0.6217991720854917, "learning_rate": 3.789132197891322e-06, "loss": 0.5912, "step": 28411 }, { "epoch": 0.8295232255992526, "grad_norm": 0.6252038088389832, "learning_rate": 3.7884833738848343e-06, "loss": 0.5519, "step": 28412 }, { "epoch": 0.8295524218271, "grad_norm": 0.6024915387203538, "learning_rate": 3.787834549878346e-06, "loss": 0.5172, "step": 28413 }, { "epoch": 0.8295816180549473, "grad_norm": 0.638152170312344, "learning_rate": 3.7871857258718575e-06, "loss": 0.5946, "step": 28414 }, { "epoch": 0.8296108142827947, "grad_norm": 0.647447818220904, "learning_rate": 3.786536901865369e-06, "loss": 0.5904, "step": 28415 }, { "epoch": 0.8296400105106421, "grad_norm": 0.6797750703415858, "learning_rate": 3.7858880778588815e-06, "loss": 0.6856, "step": 28416 }, { "epoch": 0.8296692067384894, "grad_norm": 0.6738130849006105, "learning_rate": 3.785239253852393e-06, "loss": 0.6329, "step": 28417 }, { "epoch": 0.8296984029663368, "grad_norm": 0.6674773514835405, "learning_rate": 3.7845904298459048e-06, "loss": 0.6201, "step": 28418 }, { "epoch": 0.8297275991941842, "grad_norm": 0.6244159278588791, "learning_rate": 3.7839416058394164e-06, "loss": 0.5818, "step": 28419 }, { "epoch": 0.8297567954220315, "grad_norm": 0.6512287122263501, "learning_rate": 3.783292781832928e-06, "loss": 0.6111, "step": 28420 }, { "epoch": 0.8297859916498789, "grad_norm": 0.6370182372653033, "learning_rate": 3.78264395782644e-06, "loss": 0.5628, "step": 28421 }, { "epoch": 0.8298151878777262, "grad_norm": 0.677338915444356, "learning_rate": 3.7819951338199516e-06, "loss": 0.6785, "step": 28422 }, { "epoch": 0.8298443841055736, "grad_norm": 0.5890970566943448, "learning_rate": 3.781346309813463e-06, "loss": 0.509, "step": 28423 }, { "epoch": 0.829873580333421, "grad_norm": 0.6076972539696601, "learning_rate": 3.7806974858069752e-06, "loss": 0.5333, "step": 28424 }, { "epoch": 0.8299027765612683, "grad_norm": 0.5885802032909165, "learning_rate": 3.780048661800487e-06, "loss": 0.5094, "step": 28425 }, { "epoch": 0.8299319727891157, "grad_norm": 0.6439732772827353, "learning_rate": 3.779399837793999e-06, "loss": 0.6277, "step": 28426 }, { "epoch": 0.829961169016963, "grad_norm": 0.6271890217003842, "learning_rate": 3.7787510137875105e-06, "loss": 0.5624, "step": 28427 }, { "epoch": 0.8299903652448104, "grad_norm": 0.7231222657434294, "learning_rate": 3.778102189781022e-06, "loss": 0.6505, "step": 28428 }, { "epoch": 0.8300195614726578, "grad_norm": 0.7310936524000569, "learning_rate": 3.7774533657745337e-06, "loss": 0.6812, "step": 28429 }, { "epoch": 0.8300487577005051, "grad_norm": 0.6470390347623304, "learning_rate": 3.7768045417680453e-06, "loss": 0.5744, "step": 28430 }, { "epoch": 0.8300779539283525, "grad_norm": 0.6436311126923951, "learning_rate": 3.7761557177615577e-06, "loss": 0.6219, "step": 28431 }, { "epoch": 0.8301071501561998, "grad_norm": 0.6100606224862054, "learning_rate": 3.7755068937550693e-06, "loss": 0.4923, "step": 28432 }, { "epoch": 0.8301363463840472, "grad_norm": 0.6281932002051542, "learning_rate": 3.774858069748581e-06, "loss": 0.563, "step": 28433 }, { "epoch": 0.8301655426118946, "grad_norm": 0.6810296713296817, "learning_rate": 3.7742092457420925e-06, "loss": 0.6957, "step": 28434 }, { "epoch": 0.8301947388397419, "grad_norm": 0.689744558516407, "learning_rate": 3.773560421735605e-06, "loss": 0.6988, "step": 28435 }, { "epoch": 0.8302239350675893, "grad_norm": 0.6619189195961462, "learning_rate": 3.7729115977291166e-06, "loss": 0.5719, "step": 28436 }, { "epoch": 0.8302531312954367, "grad_norm": 0.6308484713204761, "learning_rate": 3.772262773722628e-06, "loss": 0.6253, "step": 28437 }, { "epoch": 0.830282327523284, "grad_norm": 0.6157805310402837, "learning_rate": 3.77161394971614e-06, "loss": 0.5304, "step": 28438 }, { "epoch": 0.8303115237511314, "grad_norm": 0.6261819769368036, "learning_rate": 3.7709651257096514e-06, "loss": 0.5501, "step": 28439 }, { "epoch": 0.8303407199789787, "grad_norm": 0.6133101660669213, "learning_rate": 3.7703163017031634e-06, "loss": 0.5383, "step": 28440 }, { "epoch": 0.8303699162068261, "grad_norm": 0.5660671316820488, "learning_rate": 3.769667477696675e-06, "loss": 0.4872, "step": 28441 }, { "epoch": 0.8303991124346735, "grad_norm": 0.6670169778645824, "learning_rate": 3.7690186536901866e-06, "loss": 0.6188, "step": 28442 }, { "epoch": 0.8304283086625208, "grad_norm": 0.6539221205903198, "learning_rate": 3.7683698296836987e-06, "loss": 0.6126, "step": 28443 }, { "epoch": 0.8304575048903682, "grad_norm": 0.6482383723767988, "learning_rate": 3.7677210056772103e-06, "loss": 0.6142, "step": 28444 }, { "epoch": 0.8304867011182155, "grad_norm": 0.6913898160810159, "learning_rate": 3.7670721816707223e-06, "loss": 0.6822, "step": 28445 }, { "epoch": 0.8305158973460629, "grad_norm": 0.6686374664528203, "learning_rate": 3.766423357664234e-06, "loss": 0.5778, "step": 28446 }, { "epoch": 0.8305450935739103, "grad_norm": 0.6892691009164437, "learning_rate": 3.7657745336577455e-06, "loss": 0.6255, "step": 28447 }, { "epoch": 0.8305742898017576, "grad_norm": 0.6143532839948653, "learning_rate": 3.765125709651257e-06, "loss": 0.5466, "step": 28448 }, { "epoch": 0.830603486029605, "grad_norm": 0.6726311593570325, "learning_rate": 3.7644768856447696e-06, "loss": 0.6186, "step": 28449 }, { "epoch": 0.8306326822574523, "grad_norm": 0.6454529309044998, "learning_rate": 3.763828061638281e-06, "loss": 0.6484, "step": 28450 }, { "epoch": 0.8306618784852997, "grad_norm": 0.6196476779435008, "learning_rate": 3.7631792376317928e-06, "loss": 0.547, "step": 28451 }, { "epoch": 0.8306910747131471, "grad_norm": 0.6368624579392205, "learning_rate": 3.7625304136253044e-06, "loss": 0.5757, "step": 28452 }, { "epoch": 0.8307202709409944, "grad_norm": 0.6741562686476938, "learning_rate": 3.761881589618816e-06, "loss": 0.6681, "step": 28453 }, { "epoch": 0.8307494671688418, "grad_norm": 0.6334901157557536, "learning_rate": 3.761232765612328e-06, "loss": 0.5926, "step": 28454 }, { "epoch": 0.8307786633966892, "grad_norm": 0.649464271152115, "learning_rate": 3.76058394160584e-06, "loss": 0.5667, "step": 28455 }, { "epoch": 0.8308078596245365, "grad_norm": 0.6086458672693763, "learning_rate": 3.7599351175993516e-06, "loss": 0.5139, "step": 28456 }, { "epoch": 0.8308370558523839, "grad_norm": 0.7353135895148987, "learning_rate": 3.7592862935928632e-06, "loss": 0.7019, "step": 28457 }, { "epoch": 0.8308662520802312, "grad_norm": 0.6180523584459278, "learning_rate": 3.758637469586375e-06, "loss": 0.5715, "step": 28458 }, { "epoch": 0.8308954483080786, "grad_norm": 0.6591136241871981, "learning_rate": 3.757988645579887e-06, "loss": 0.6537, "step": 28459 }, { "epoch": 0.830924644535926, "grad_norm": 0.6201142941918485, "learning_rate": 3.7573398215733985e-06, "loss": 0.5794, "step": 28460 }, { "epoch": 0.8309538407637733, "grad_norm": 0.6258902814171665, "learning_rate": 3.75669099756691e-06, "loss": 0.5709, "step": 28461 }, { "epoch": 0.8309830369916207, "grad_norm": 0.6166533297273036, "learning_rate": 3.756042173560422e-06, "loss": 0.5298, "step": 28462 }, { "epoch": 0.831012233219468, "grad_norm": 0.6519784853742823, "learning_rate": 3.7553933495539337e-06, "loss": 0.5827, "step": 28463 }, { "epoch": 0.8310414294473154, "grad_norm": 0.6075683392398266, "learning_rate": 3.7547445255474457e-06, "loss": 0.5425, "step": 28464 }, { "epoch": 0.8310706256751628, "grad_norm": 0.6098082287639424, "learning_rate": 3.7540957015409573e-06, "loss": 0.525, "step": 28465 }, { "epoch": 0.8310998219030101, "grad_norm": 0.6568646151016704, "learning_rate": 3.753446877534469e-06, "loss": 0.5883, "step": 28466 }, { "epoch": 0.8311290181308575, "grad_norm": 0.6119515121245706, "learning_rate": 3.7527980535279805e-06, "loss": 0.556, "step": 28467 }, { "epoch": 0.8311582143587048, "grad_norm": 0.6389483665372266, "learning_rate": 3.752149229521493e-06, "loss": 0.574, "step": 28468 }, { "epoch": 0.8311874105865522, "grad_norm": 0.633546489437632, "learning_rate": 3.7515004055150046e-06, "loss": 0.557, "step": 28469 }, { "epoch": 0.8312166068143996, "grad_norm": 0.6280358152680255, "learning_rate": 3.750851581508516e-06, "loss": 0.586, "step": 28470 }, { "epoch": 0.8312458030422469, "grad_norm": 0.6529437159633784, "learning_rate": 3.750202757502028e-06, "loss": 0.586, "step": 28471 }, { "epoch": 0.8312749992700943, "grad_norm": 0.5682941667862793, "learning_rate": 3.7495539334955394e-06, "loss": 0.4495, "step": 28472 }, { "epoch": 0.8313041954979417, "grad_norm": 0.6717481032619371, "learning_rate": 3.7489051094890514e-06, "loss": 0.6613, "step": 28473 }, { "epoch": 0.831333391725789, "grad_norm": 0.6787671447462976, "learning_rate": 3.7482562854825635e-06, "loss": 0.6198, "step": 28474 }, { "epoch": 0.8313625879536364, "grad_norm": 0.6739702447062959, "learning_rate": 3.747607461476075e-06, "loss": 0.6515, "step": 28475 }, { "epoch": 0.8313917841814837, "grad_norm": 0.6436086970090409, "learning_rate": 3.7469586374695867e-06, "loss": 0.6069, "step": 28476 }, { "epoch": 0.8314209804093311, "grad_norm": 0.6162704480807392, "learning_rate": 3.7463098134630983e-06, "loss": 0.5235, "step": 28477 }, { "epoch": 0.8314501766371785, "grad_norm": 0.6932786854398262, "learning_rate": 3.7456609894566103e-06, "loss": 0.6172, "step": 28478 }, { "epoch": 0.8314793728650258, "grad_norm": 0.667167620546046, "learning_rate": 3.745012165450122e-06, "loss": 0.6199, "step": 28479 }, { "epoch": 0.8315085690928732, "grad_norm": 0.6842312193461075, "learning_rate": 3.7443633414436335e-06, "loss": 0.6278, "step": 28480 }, { "epoch": 0.8315377653207205, "grad_norm": 0.6307141580844301, "learning_rate": 3.743714517437145e-06, "loss": 0.5748, "step": 28481 }, { "epoch": 0.8315669615485679, "grad_norm": 0.6413397110882308, "learning_rate": 3.7430656934306576e-06, "loss": 0.5549, "step": 28482 }, { "epoch": 0.8315961577764153, "grad_norm": 0.6028903579739167, "learning_rate": 3.742416869424169e-06, "loss": 0.5276, "step": 28483 }, { "epoch": 0.8316253540042626, "grad_norm": 0.676342044516306, "learning_rate": 3.7417680454176808e-06, "loss": 0.6392, "step": 28484 }, { "epoch": 0.83165455023211, "grad_norm": 0.6287378647286769, "learning_rate": 3.7411192214111924e-06, "loss": 0.5283, "step": 28485 }, { "epoch": 0.8316837464599574, "grad_norm": 0.6766089791400592, "learning_rate": 3.740470397404704e-06, "loss": 0.6373, "step": 28486 }, { "epoch": 0.8317129426878047, "grad_norm": 0.6612221092009392, "learning_rate": 3.7398215733982164e-06, "loss": 0.6358, "step": 28487 }, { "epoch": 0.8317421389156521, "grad_norm": 0.6583295366192153, "learning_rate": 3.739172749391728e-06, "loss": 0.6728, "step": 28488 }, { "epoch": 0.8317713351434994, "grad_norm": 0.6078952704376304, "learning_rate": 3.7385239253852396e-06, "loss": 0.5695, "step": 28489 }, { "epoch": 0.8318005313713468, "grad_norm": 0.6383015613083874, "learning_rate": 3.7378751013787512e-06, "loss": 0.5771, "step": 28490 }, { "epoch": 0.8318297275991942, "grad_norm": 0.6361278683358792, "learning_rate": 3.737226277372263e-06, "loss": 0.5945, "step": 28491 }, { "epoch": 0.8318589238270415, "grad_norm": 0.6233118515775122, "learning_rate": 3.736577453365775e-06, "loss": 0.5714, "step": 28492 }, { "epoch": 0.8318881200548889, "grad_norm": 0.7111043159757089, "learning_rate": 3.735928629359287e-06, "loss": 0.6888, "step": 28493 }, { "epoch": 0.8319173162827362, "grad_norm": 0.6524267969851693, "learning_rate": 3.7352798053527985e-06, "loss": 0.5847, "step": 28494 }, { "epoch": 0.8319465125105836, "grad_norm": 0.641934334431323, "learning_rate": 3.73463098134631e-06, "loss": 0.6252, "step": 28495 }, { "epoch": 0.831975708738431, "grad_norm": 0.573965719241983, "learning_rate": 3.7339821573398217e-06, "loss": 0.4731, "step": 28496 }, { "epoch": 0.8320049049662783, "grad_norm": 0.6663808559689973, "learning_rate": 3.7333333333333337e-06, "loss": 0.6448, "step": 28497 }, { "epoch": 0.8320341011941257, "grad_norm": 0.6513550887947661, "learning_rate": 3.7326845093268453e-06, "loss": 0.6255, "step": 28498 }, { "epoch": 0.832063297421973, "grad_norm": 0.6861794038315586, "learning_rate": 3.732035685320357e-06, "loss": 0.6965, "step": 28499 }, { "epoch": 0.8320924936498204, "grad_norm": 0.7035908041921395, "learning_rate": 3.7313868613138685e-06, "loss": 0.6104, "step": 28500 }, { "epoch": 0.8321216898776678, "grad_norm": 0.637021234904561, "learning_rate": 3.730738037307381e-06, "loss": 0.6096, "step": 28501 }, { "epoch": 0.8321508861055151, "grad_norm": 0.6785380027618944, "learning_rate": 3.7300892133008926e-06, "loss": 0.6663, "step": 28502 }, { "epoch": 0.8321800823333625, "grad_norm": 0.6691455603974056, "learning_rate": 3.729440389294404e-06, "loss": 0.6286, "step": 28503 }, { "epoch": 0.8322092785612099, "grad_norm": 0.636868356795791, "learning_rate": 3.728791565287916e-06, "loss": 0.5778, "step": 28504 }, { "epoch": 0.8322384747890572, "grad_norm": 0.6994075662451185, "learning_rate": 3.7281427412814274e-06, "loss": 0.6567, "step": 28505 }, { "epoch": 0.8322676710169046, "grad_norm": 0.6770690106074946, "learning_rate": 3.72749391727494e-06, "loss": 0.6745, "step": 28506 }, { "epoch": 0.8322968672447519, "grad_norm": 0.6320118583426697, "learning_rate": 3.7268450932684515e-06, "loss": 0.5522, "step": 28507 }, { "epoch": 0.8323260634725993, "grad_norm": 0.6558251656005689, "learning_rate": 3.726196269261963e-06, "loss": 0.6061, "step": 28508 }, { "epoch": 0.8323552597004467, "grad_norm": 0.7087756189657696, "learning_rate": 3.7255474452554747e-06, "loss": 0.6054, "step": 28509 }, { "epoch": 0.832384455928294, "grad_norm": 0.6489536639932756, "learning_rate": 3.7248986212489863e-06, "loss": 0.5999, "step": 28510 }, { "epoch": 0.8324136521561414, "grad_norm": 0.6362558074502052, "learning_rate": 3.7242497972424983e-06, "loss": 0.6225, "step": 28511 }, { "epoch": 0.8324428483839887, "grad_norm": 0.6662779799873926, "learning_rate": 3.72360097323601e-06, "loss": 0.5993, "step": 28512 }, { "epoch": 0.8324720446118361, "grad_norm": 0.6282343784514298, "learning_rate": 3.722952149229522e-06, "loss": 0.5447, "step": 28513 }, { "epoch": 0.8325012408396835, "grad_norm": 0.6161114128295303, "learning_rate": 3.7223033252230335e-06, "loss": 0.5544, "step": 28514 }, { "epoch": 0.8325304370675308, "grad_norm": 0.6679820024703972, "learning_rate": 3.7216545012165456e-06, "loss": 0.6112, "step": 28515 }, { "epoch": 0.8325596332953782, "grad_norm": 0.6507309111599957, "learning_rate": 3.721005677210057e-06, "loss": 0.6395, "step": 28516 }, { "epoch": 0.8325888295232255, "grad_norm": 0.6085398217818406, "learning_rate": 3.7203568532035688e-06, "loss": 0.5041, "step": 28517 }, { "epoch": 0.8326180257510729, "grad_norm": 0.6522727194509703, "learning_rate": 3.7197080291970804e-06, "loss": 0.6209, "step": 28518 }, { "epoch": 0.8326472219789203, "grad_norm": 0.6012206807549393, "learning_rate": 3.719059205190592e-06, "loss": 0.5479, "step": 28519 }, { "epoch": 0.8326764182067676, "grad_norm": 0.620965897703643, "learning_rate": 3.7184103811841044e-06, "loss": 0.5751, "step": 28520 }, { "epoch": 0.832705614434615, "grad_norm": 0.6274643529012014, "learning_rate": 3.717761557177616e-06, "loss": 0.5991, "step": 28521 }, { "epoch": 0.8327348106624624, "grad_norm": 0.6858702312517815, "learning_rate": 3.7171127331711276e-06, "loss": 0.6722, "step": 28522 }, { "epoch": 0.8327640068903098, "grad_norm": 0.6280194758068157, "learning_rate": 3.7164639091646392e-06, "loss": 0.556, "step": 28523 }, { "epoch": 0.8327932031181572, "grad_norm": 0.6818825077231274, "learning_rate": 3.715815085158151e-06, "loss": 0.6656, "step": 28524 }, { "epoch": 0.8328223993460045, "grad_norm": 0.6640278132575371, "learning_rate": 3.7151662611516633e-06, "loss": 0.65, "step": 28525 }, { "epoch": 0.8328515955738519, "grad_norm": 0.6581045655505754, "learning_rate": 3.714517437145175e-06, "loss": 0.6184, "step": 28526 }, { "epoch": 0.8328807918016993, "grad_norm": 0.6311869143961178, "learning_rate": 3.7138686131386865e-06, "loss": 0.5911, "step": 28527 }, { "epoch": 0.8329099880295466, "grad_norm": 0.6310218799244363, "learning_rate": 3.713219789132198e-06, "loss": 0.5917, "step": 28528 }, { "epoch": 0.832939184257394, "grad_norm": 0.6770204332202113, "learning_rate": 3.7125709651257097e-06, "loss": 0.6728, "step": 28529 }, { "epoch": 0.8329683804852414, "grad_norm": 0.6275906556971085, "learning_rate": 3.7119221411192217e-06, "loss": 0.5935, "step": 28530 }, { "epoch": 0.8329975767130887, "grad_norm": 0.6599052286334839, "learning_rate": 3.7112733171127333e-06, "loss": 0.6306, "step": 28531 }, { "epoch": 0.8330267729409361, "grad_norm": 0.752891797457386, "learning_rate": 3.7106244931062454e-06, "loss": 0.6273, "step": 28532 }, { "epoch": 0.8330559691687834, "grad_norm": 0.6714611430127114, "learning_rate": 3.709975669099757e-06, "loss": 0.5534, "step": 28533 }, { "epoch": 0.8330851653966308, "grad_norm": 0.7175022844856052, "learning_rate": 3.709326845093269e-06, "loss": 0.6773, "step": 28534 }, { "epoch": 0.8331143616244782, "grad_norm": 0.672558255163925, "learning_rate": 3.7086780210867806e-06, "loss": 0.5855, "step": 28535 }, { "epoch": 0.8331435578523255, "grad_norm": 0.7148119370337485, "learning_rate": 3.708029197080292e-06, "loss": 0.7222, "step": 28536 }, { "epoch": 0.8331727540801729, "grad_norm": 0.6239436653544151, "learning_rate": 3.707380373073804e-06, "loss": 0.6013, "step": 28537 }, { "epoch": 0.8332019503080202, "grad_norm": 0.6178342149563184, "learning_rate": 3.7067315490673154e-06, "loss": 0.587, "step": 28538 }, { "epoch": 0.8332311465358676, "grad_norm": 0.6691887091287559, "learning_rate": 3.706082725060828e-06, "loss": 0.6349, "step": 28539 }, { "epoch": 0.833260342763715, "grad_norm": 0.5915183045572203, "learning_rate": 3.7054339010543395e-06, "loss": 0.5079, "step": 28540 }, { "epoch": 0.8332895389915623, "grad_norm": 0.6333473652477601, "learning_rate": 3.704785077047851e-06, "loss": 0.5832, "step": 28541 }, { "epoch": 0.8333187352194097, "grad_norm": 0.6822989034912968, "learning_rate": 3.7041362530413627e-06, "loss": 0.6213, "step": 28542 }, { "epoch": 0.833347931447257, "grad_norm": 0.5882998582449155, "learning_rate": 3.7034874290348743e-06, "loss": 0.4981, "step": 28543 }, { "epoch": 0.8333771276751044, "grad_norm": 0.6231943742351305, "learning_rate": 3.7028386050283867e-06, "loss": 0.5823, "step": 28544 }, { "epoch": 0.8334063239029518, "grad_norm": 0.6205130733367059, "learning_rate": 3.7021897810218983e-06, "loss": 0.56, "step": 28545 }, { "epoch": 0.8334355201307991, "grad_norm": 0.6228707201320385, "learning_rate": 3.70154095701541e-06, "loss": 0.5485, "step": 28546 }, { "epoch": 0.8334647163586465, "grad_norm": 0.6571681311089085, "learning_rate": 3.7008921330089215e-06, "loss": 0.6219, "step": 28547 }, { "epoch": 0.8334939125864939, "grad_norm": 0.6758132869570949, "learning_rate": 3.7002433090024336e-06, "loss": 0.6511, "step": 28548 }, { "epoch": 0.8335231088143412, "grad_norm": 0.6179411147821893, "learning_rate": 3.699594484995945e-06, "loss": 0.567, "step": 28549 }, { "epoch": 0.8335523050421886, "grad_norm": 0.6247040919168221, "learning_rate": 3.6989456609894568e-06, "loss": 0.5626, "step": 28550 }, { "epoch": 0.8335815012700359, "grad_norm": 0.6544873793447851, "learning_rate": 3.698296836982969e-06, "loss": 0.5642, "step": 28551 }, { "epoch": 0.8336106974978833, "grad_norm": 0.6903050325436222, "learning_rate": 3.6976480129764804e-06, "loss": 0.6373, "step": 28552 }, { "epoch": 0.8336398937257307, "grad_norm": 0.652854258252478, "learning_rate": 3.6969991889699924e-06, "loss": 0.5718, "step": 28553 }, { "epoch": 0.833669089953578, "grad_norm": 0.6866817773069732, "learning_rate": 3.696350364963504e-06, "loss": 0.6833, "step": 28554 }, { "epoch": 0.8336982861814254, "grad_norm": 0.6587108972427285, "learning_rate": 3.6957015409570156e-06, "loss": 0.5934, "step": 28555 }, { "epoch": 0.8337274824092727, "grad_norm": 0.6665582891780473, "learning_rate": 3.6950527169505272e-06, "loss": 0.5906, "step": 28556 }, { "epoch": 0.8337566786371201, "grad_norm": 0.6722477018098806, "learning_rate": 3.694403892944039e-06, "loss": 0.6576, "step": 28557 }, { "epoch": 0.8337858748649675, "grad_norm": 0.6637755826500168, "learning_rate": 3.6937550689375513e-06, "loss": 0.5756, "step": 28558 }, { "epoch": 0.8338150710928148, "grad_norm": 0.6523504367045689, "learning_rate": 3.693106244931063e-06, "loss": 0.5681, "step": 28559 }, { "epoch": 0.8338442673206622, "grad_norm": 0.6487905915338248, "learning_rate": 3.6924574209245745e-06, "loss": 0.6223, "step": 28560 }, { "epoch": 0.8338734635485096, "grad_norm": 0.6272240909672806, "learning_rate": 3.691808596918086e-06, "loss": 0.5851, "step": 28561 }, { "epoch": 0.8339026597763569, "grad_norm": 0.5974213607506565, "learning_rate": 3.6911597729115977e-06, "loss": 0.5394, "step": 28562 }, { "epoch": 0.8339318560042043, "grad_norm": 0.6298672936435875, "learning_rate": 3.69051094890511e-06, "loss": 0.5848, "step": 28563 }, { "epoch": 0.8339610522320516, "grad_norm": 0.6299799734596779, "learning_rate": 3.6898621248986218e-06, "loss": 0.5739, "step": 28564 }, { "epoch": 0.833990248459899, "grad_norm": 0.631907718704797, "learning_rate": 3.6892133008921334e-06, "loss": 0.6049, "step": 28565 }, { "epoch": 0.8340194446877464, "grad_norm": 0.6124505721840688, "learning_rate": 3.688564476885645e-06, "loss": 0.5185, "step": 28566 }, { "epoch": 0.8340486409155937, "grad_norm": 0.743959243738868, "learning_rate": 3.687915652879157e-06, "loss": 0.6446, "step": 28567 }, { "epoch": 0.8340778371434411, "grad_norm": 0.6669534603604591, "learning_rate": 3.6872668288726686e-06, "loss": 0.6267, "step": 28568 }, { "epoch": 0.8341070333712884, "grad_norm": 0.631236834280606, "learning_rate": 3.68661800486618e-06, "loss": 0.5732, "step": 28569 }, { "epoch": 0.8341362295991358, "grad_norm": 0.6408798474317724, "learning_rate": 3.685969180859692e-06, "loss": 0.5641, "step": 28570 }, { "epoch": 0.8341654258269832, "grad_norm": 0.679309293200003, "learning_rate": 3.685320356853204e-06, "loss": 0.6329, "step": 28571 }, { "epoch": 0.8341946220548305, "grad_norm": 0.6378126682030192, "learning_rate": 3.684671532846716e-06, "loss": 0.6106, "step": 28572 }, { "epoch": 0.8342238182826779, "grad_norm": 0.6554335505043615, "learning_rate": 3.6840227088402275e-06, "loss": 0.6362, "step": 28573 }, { "epoch": 0.8342530145105252, "grad_norm": 0.61797573750111, "learning_rate": 3.683373884833739e-06, "loss": 0.5697, "step": 28574 }, { "epoch": 0.8342822107383726, "grad_norm": 0.7143655209517484, "learning_rate": 3.6827250608272507e-06, "loss": 0.6343, "step": 28575 }, { "epoch": 0.83431140696622, "grad_norm": 0.6453471168811973, "learning_rate": 3.6820762368207623e-06, "loss": 0.5604, "step": 28576 }, { "epoch": 0.8343406031940673, "grad_norm": 0.6285066683263376, "learning_rate": 3.6814274128142747e-06, "loss": 0.5926, "step": 28577 }, { "epoch": 0.8343697994219147, "grad_norm": 0.6445489438424655, "learning_rate": 3.6807785888077863e-06, "loss": 0.6037, "step": 28578 }, { "epoch": 0.834398995649762, "grad_norm": 0.6642038271376903, "learning_rate": 3.680129764801298e-06, "loss": 0.6458, "step": 28579 }, { "epoch": 0.8344281918776094, "grad_norm": 0.6878475704383215, "learning_rate": 3.6794809407948095e-06, "loss": 0.5743, "step": 28580 }, { "epoch": 0.8344573881054568, "grad_norm": 0.6564610875043317, "learning_rate": 3.678832116788321e-06, "loss": 0.6466, "step": 28581 }, { "epoch": 0.8344865843333041, "grad_norm": 0.6609678658931409, "learning_rate": 3.6781832927818336e-06, "loss": 0.5727, "step": 28582 }, { "epoch": 0.8345157805611515, "grad_norm": 0.6509642287068745, "learning_rate": 3.677534468775345e-06, "loss": 0.6374, "step": 28583 }, { "epoch": 0.8345449767889989, "grad_norm": 0.6386100007960424, "learning_rate": 3.676885644768857e-06, "loss": 0.5816, "step": 28584 }, { "epoch": 0.8345741730168462, "grad_norm": 0.5631190748899543, "learning_rate": 3.6762368207623684e-06, "loss": 0.4247, "step": 28585 }, { "epoch": 0.8346033692446936, "grad_norm": 0.671777988825386, "learning_rate": 3.6755879967558804e-06, "loss": 0.6265, "step": 28586 }, { "epoch": 0.8346325654725409, "grad_norm": 0.6185113725421666, "learning_rate": 3.674939172749392e-06, "loss": 0.5349, "step": 28587 }, { "epoch": 0.8346617617003883, "grad_norm": 0.6169073122071579, "learning_rate": 3.6742903487429036e-06, "loss": 0.5672, "step": 28588 }, { "epoch": 0.8346909579282357, "grad_norm": 0.6874053258072019, "learning_rate": 3.6736415247364152e-06, "loss": 0.6728, "step": 28589 }, { "epoch": 0.834720154156083, "grad_norm": 0.6762989285409038, "learning_rate": 3.6729927007299273e-06, "loss": 0.6677, "step": 28590 }, { "epoch": 0.8347493503839304, "grad_norm": 0.7001095867224886, "learning_rate": 3.6723438767234393e-06, "loss": 0.568, "step": 28591 }, { "epoch": 0.8347785466117777, "grad_norm": 0.630252437562383, "learning_rate": 3.671695052716951e-06, "loss": 0.6095, "step": 28592 }, { "epoch": 0.8348077428396251, "grad_norm": 0.6172247076334888, "learning_rate": 3.6710462287104625e-06, "loss": 0.5778, "step": 28593 }, { "epoch": 0.8348369390674725, "grad_norm": 0.6700428643918127, "learning_rate": 3.670397404703974e-06, "loss": 0.6428, "step": 28594 }, { "epoch": 0.8348661352953198, "grad_norm": 0.6456531752046053, "learning_rate": 3.6697485806974857e-06, "loss": 0.6031, "step": 28595 }, { "epoch": 0.8348953315231672, "grad_norm": 0.7091499912688444, "learning_rate": 3.669099756690998e-06, "loss": 0.6576, "step": 28596 }, { "epoch": 0.8349245277510146, "grad_norm": 0.6231780830236715, "learning_rate": 3.6684509326845098e-06, "loss": 0.6159, "step": 28597 }, { "epoch": 0.8349537239788619, "grad_norm": 0.6671454867530953, "learning_rate": 3.6678021086780214e-06, "loss": 0.656, "step": 28598 }, { "epoch": 0.8349829202067093, "grad_norm": 0.634360032310784, "learning_rate": 3.667153284671533e-06, "loss": 0.5627, "step": 28599 }, { "epoch": 0.8350121164345566, "grad_norm": 0.649749777333898, "learning_rate": 3.666504460665045e-06, "loss": 0.5727, "step": 28600 }, { "epoch": 0.835041312662404, "grad_norm": 0.6259079373551114, "learning_rate": 3.6658556366585566e-06, "loss": 0.5364, "step": 28601 }, { "epoch": 0.8350705088902514, "grad_norm": 0.6308974380906659, "learning_rate": 3.6652068126520686e-06, "loss": 0.5882, "step": 28602 }, { "epoch": 0.8350997051180987, "grad_norm": 0.6675286177172748, "learning_rate": 3.6645579886455802e-06, "loss": 0.5697, "step": 28603 }, { "epoch": 0.8351289013459461, "grad_norm": 0.5959841220416896, "learning_rate": 3.663909164639092e-06, "loss": 0.51, "step": 28604 }, { "epoch": 0.8351580975737934, "grad_norm": 0.6315907918097489, "learning_rate": 3.663260340632604e-06, "loss": 0.5607, "step": 28605 }, { "epoch": 0.8351872938016408, "grad_norm": 0.6764821075788195, "learning_rate": 3.6626115166261155e-06, "loss": 0.5982, "step": 28606 }, { "epoch": 0.8352164900294882, "grad_norm": 0.7064870348259954, "learning_rate": 3.661962692619627e-06, "loss": 0.6517, "step": 28607 }, { "epoch": 0.8352456862573355, "grad_norm": 0.5855365939087306, "learning_rate": 3.6613138686131387e-06, "loss": 0.503, "step": 28608 }, { "epoch": 0.8352748824851829, "grad_norm": 0.64211845538095, "learning_rate": 3.6606650446066507e-06, "loss": 0.5734, "step": 28609 }, { "epoch": 0.8353040787130303, "grad_norm": 0.6904835898502547, "learning_rate": 3.6600162206001627e-06, "loss": 0.6751, "step": 28610 }, { "epoch": 0.8353332749408776, "grad_norm": 0.65015690591365, "learning_rate": 3.6593673965936743e-06, "loss": 0.6057, "step": 28611 }, { "epoch": 0.835362471168725, "grad_norm": 0.6461088294080094, "learning_rate": 3.658718572587186e-06, "loss": 0.5997, "step": 28612 }, { "epoch": 0.8353916673965723, "grad_norm": 0.6758597001865588, "learning_rate": 3.6580697485806975e-06, "loss": 0.6295, "step": 28613 }, { "epoch": 0.8354208636244197, "grad_norm": 0.628805261209383, "learning_rate": 3.657420924574209e-06, "loss": 0.576, "step": 28614 }, { "epoch": 0.8354500598522671, "grad_norm": 0.6486456268234981, "learning_rate": 3.6567721005677216e-06, "loss": 0.584, "step": 28615 }, { "epoch": 0.8354792560801144, "grad_norm": 0.646145959982423, "learning_rate": 3.656123276561233e-06, "loss": 0.6186, "step": 28616 }, { "epoch": 0.8355084523079618, "grad_norm": 0.6558477242871908, "learning_rate": 3.655474452554745e-06, "loss": 0.5834, "step": 28617 }, { "epoch": 0.8355376485358091, "grad_norm": 0.6833294058577277, "learning_rate": 3.6548256285482564e-06, "loss": 0.6723, "step": 28618 }, { "epoch": 0.8355668447636565, "grad_norm": 0.6432293393165549, "learning_rate": 3.6541768045417684e-06, "loss": 0.5677, "step": 28619 }, { "epoch": 0.8355960409915039, "grad_norm": 0.6402533729536478, "learning_rate": 3.65352798053528e-06, "loss": 0.5483, "step": 28620 }, { "epoch": 0.8356252372193512, "grad_norm": 0.622708994676216, "learning_rate": 3.652879156528792e-06, "loss": 0.5987, "step": 28621 }, { "epoch": 0.8356544334471986, "grad_norm": 0.6896562207709529, "learning_rate": 3.6522303325223037e-06, "loss": 0.6132, "step": 28622 }, { "epoch": 0.835683629675046, "grad_norm": 0.6407039792100434, "learning_rate": 3.6515815085158153e-06, "loss": 0.5387, "step": 28623 }, { "epoch": 0.8357128259028933, "grad_norm": 0.6847684977352798, "learning_rate": 3.6509326845093273e-06, "loss": 0.5962, "step": 28624 }, { "epoch": 0.8357420221307407, "grad_norm": 0.6466331056452012, "learning_rate": 3.650283860502839e-06, "loss": 0.6085, "step": 28625 }, { "epoch": 0.835771218358588, "grad_norm": 0.6252114374407607, "learning_rate": 3.6496350364963505e-06, "loss": 0.5581, "step": 28626 }, { "epoch": 0.8358004145864354, "grad_norm": 0.6771118402399176, "learning_rate": 3.648986212489862e-06, "loss": 0.6858, "step": 28627 }, { "epoch": 0.8358296108142828, "grad_norm": 0.6497642979749602, "learning_rate": 3.648337388483374e-06, "loss": 0.6286, "step": 28628 }, { "epoch": 0.8358588070421301, "grad_norm": 0.6238570389928195, "learning_rate": 3.647688564476886e-06, "loss": 0.5912, "step": 28629 }, { "epoch": 0.8358880032699775, "grad_norm": 0.6097343335512205, "learning_rate": 3.6470397404703978e-06, "loss": 0.5274, "step": 28630 }, { "epoch": 0.8359171994978248, "grad_norm": 0.6501630578583575, "learning_rate": 3.6463909164639094e-06, "loss": 0.5729, "step": 28631 }, { "epoch": 0.8359463957256722, "grad_norm": 0.6620160477328036, "learning_rate": 3.645742092457421e-06, "loss": 0.5951, "step": 28632 }, { "epoch": 0.8359755919535196, "grad_norm": 0.6720052436156269, "learning_rate": 3.6450932684509334e-06, "loss": 0.6308, "step": 28633 }, { "epoch": 0.8360047881813669, "grad_norm": 0.6500281842135739, "learning_rate": 3.644444444444445e-06, "loss": 0.6205, "step": 28634 }, { "epoch": 0.8360339844092143, "grad_norm": 0.6426678483411112, "learning_rate": 3.6437956204379566e-06, "loss": 0.6244, "step": 28635 }, { "epoch": 0.8360631806370616, "grad_norm": 0.635370937810952, "learning_rate": 3.6431467964314682e-06, "loss": 0.5869, "step": 28636 }, { "epoch": 0.836092376864909, "grad_norm": 0.6109335690620604, "learning_rate": 3.64249797242498e-06, "loss": 0.551, "step": 28637 }, { "epoch": 0.8361215730927564, "grad_norm": 0.6400498432473427, "learning_rate": 3.641849148418492e-06, "loss": 0.6117, "step": 28638 }, { "epoch": 0.8361507693206037, "grad_norm": 0.615879258973664, "learning_rate": 3.6412003244120035e-06, "loss": 0.5089, "step": 28639 }, { "epoch": 0.8361799655484511, "grad_norm": 0.6372134869311697, "learning_rate": 3.6405515004055155e-06, "loss": 0.5811, "step": 28640 }, { "epoch": 0.8362091617762984, "grad_norm": 0.6449772430945842, "learning_rate": 3.639902676399027e-06, "loss": 0.5649, "step": 28641 }, { "epoch": 0.8362383580041458, "grad_norm": 0.6394089531283693, "learning_rate": 3.6392538523925387e-06, "loss": 0.6064, "step": 28642 }, { "epoch": 0.8362675542319932, "grad_norm": 0.6650934525112933, "learning_rate": 3.6386050283860507e-06, "loss": 0.5957, "step": 28643 }, { "epoch": 0.8362967504598406, "grad_norm": 0.6555347750360548, "learning_rate": 3.6379562043795623e-06, "loss": 0.6037, "step": 28644 }, { "epoch": 0.836325946687688, "grad_norm": 0.6360394871219285, "learning_rate": 3.637307380373074e-06, "loss": 0.6103, "step": 28645 }, { "epoch": 0.8363551429155354, "grad_norm": 0.6473046917394135, "learning_rate": 3.6366585563665855e-06, "loss": 0.5916, "step": 28646 }, { "epoch": 0.8363843391433827, "grad_norm": 0.6474504289506463, "learning_rate": 3.636009732360097e-06, "loss": 0.5678, "step": 28647 }, { "epoch": 0.8364135353712301, "grad_norm": 0.60967518952592, "learning_rate": 3.6353609083536096e-06, "loss": 0.5575, "step": 28648 }, { "epoch": 0.8364427315990774, "grad_norm": 0.6433435221400551, "learning_rate": 3.634712084347121e-06, "loss": 0.5723, "step": 28649 }, { "epoch": 0.8364719278269248, "grad_norm": 0.6429840596030949, "learning_rate": 3.634063260340633e-06, "loss": 0.5478, "step": 28650 }, { "epoch": 0.8365011240547722, "grad_norm": 0.6561121144078346, "learning_rate": 3.6334144363341444e-06, "loss": 0.6261, "step": 28651 }, { "epoch": 0.8365303202826195, "grad_norm": 0.6573623571276597, "learning_rate": 3.632765612327657e-06, "loss": 0.5957, "step": 28652 }, { "epoch": 0.8365595165104669, "grad_norm": 0.6552871613397712, "learning_rate": 3.6321167883211685e-06, "loss": 0.6495, "step": 28653 }, { "epoch": 0.8365887127383143, "grad_norm": 0.6685050971478407, "learning_rate": 3.63146796431468e-06, "loss": 0.62, "step": 28654 }, { "epoch": 0.8366179089661616, "grad_norm": 0.6180346207561772, "learning_rate": 3.6308191403081917e-06, "loss": 0.5396, "step": 28655 }, { "epoch": 0.836647105194009, "grad_norm": 0.6072296207752321, "learning_rate": 3.6301703163017033e-06, "loss": 0.542, "step": 28656 }, { "epoch": 0.8366763014218563, "grad_norm": 0.6668125955164138, "learning_rate": 3.6295214922952153e-06, "loss": 0.5915, "step": 28657 }, { "epoch": 0.8367054976497037, "grad_norm": 0.6468294121458912, "learning_rate": 3.628872668288727e-06, "loss": 0.5961, "step": 28658 }, { "epoch": 0.8367346938775511, "grad_norm": 0.6047864929409271, "learning_rate": 3.628223844282239e-06, "loss": 0.5446, "step": 28659 }, { "epoch": 0.8367638901053984, "grad_norm": 0.6613188580355575, "learning_rate": 3.6275750202757505e-06, "loss": 0.6126, "step": 28660 }, { "epoch": 0.8367930863332458, "grad_norm": 0.6225824060933709, "learning_rate": 3.626926196269262e-06, "loss": 0.5821, "step": 28661 }, { "epoch": 0.8368222825610931, "grad_norm": 0.6574278739378138, "learning_rate": 3.626277372262774e-06, "loss": 0.5876, "step": 28662 }, { "epoch": 0.8368514787889405, "grad_norm": 0.6001804664928336, "learning_rate": 3.6256285482562858e-06, "loss": 0.4987, "step": 28663 }, { "epoch": 0.8368806750167879, "grad_norm": 0.6180128703081786, "learning_rate": 3.6249797242497974e-06, "loss": 0.5299, "step": 28664 }, { "epoch": 0.8369098712446352, "grad_norm": 0.7268623409941812, "learning_rate": 3.624330900243309e-06, "loss": 0.7888, "step": 28665 }, { "epoch": 0.8369390674724826, "grad_norm": 0.6396820798791466, "learning_rate": 3.6236820762368214e-06, "loss": 0.6024, "step": 28666 }, { "epoch": 0.83696826370033, "grad_norm": 0.6024821515983275, "learning_rate": 3.623033252230333e-06, "loss": 0.5128, "step": 28667 }, { "epoch": 0.8369974599281773, "grad_norm": 0.6444994244509078, "learning_rate": 3.6223844282238446e-06, "loss": 0.5796, "step": 28668 }, { "epoch": 0.8370266561560247, "grad_norm": 0.6766723813543963, "learning_rate": 3.6217356042173562e-06, "loss": 0.6078, "step": 28669 }, { "epoch": 0.837055852383872, "grad_norm": 0.6862044952281445, "learning_rate": 3.621086780210868e-06, "loss": 0.6467, "step": 28670 }, { "epoch": 0.8370850486117194, "grad_norm": 0.6394709227238878, "learning_rate": 3.6204379562043803e-06, "loss": 0.5671, "step": 28671 }, { "epoch": 0.8371142448395668, "grad_norm": 0.6689085268748018, "learning_rate": 3.619789132197892e-06, "loss": 0.6572, "step": 28672 }, { "epoch": 0.8371434410674141, "grad_norm": 0.5997961069854387, "learning_rate": 3.6191403081914035e-06, "loss": 0.5087, "step": 28673 }, { "epoch": 0.8371726372952615, "grad_norm": 0.6316623723599772, "learning_rate": 3.618491484184915e-06, "loss": 0.5465, "step": 28674 }, { "epoch": 0.8372018335231088, "grad_norm": 0.629860556298362, "learning_rate": 3.6178426601784267e-06, "loss": 0.5931, "step": 28675 }, { "epoch": 0.8372310297509562, "grad_norm": 0.6108703686398791, "learning_rate": 3.6171938361719387e-06, "loss": 0.5512, "step": 28676 }, { "epoch": 0.8372602259788036, "grad_norm": 0.7246009042460987, "learning_rate": 3.6165450121654503e-06, "loss": 0.6754, "step": 28677 }, { "epoch": 0.8372894222066509, "grad_norm": 0.673840575662449, "learning_rate": 3.615896188158962e-06, "loss": 0.6442, "step": 28678 }, { "epoch": 0.8373186184344983, "grad_norm": 0.632607750945896, "learning_rate": 3.615247364152474e-06, "loss": 0.5444, "step": 28679 }, { "epoch": 0.8373478146623456, "grad_norm": 0.6057078117844117, "learning_rate": 3.6145985401459856e-06, "loss": 0.5384, "step": 28680 }, { "epoch": 0.837377010890193, "grad_norm": 0.6344137017684933, "learning_rate": 3.6139497161394976e-06, "loss": 0.6098, "step": 28681 }, { "epoch": 0.8374062071180404, "grad_norm": 0.6569033429270099, "learning_rate": 3.613300892133009e-06, "loss": 0.6026, "step": 28682 }, { "epoch": 0.8374354033458877, "grad_norm": 0.683297606188525, "learning_rate": 3.612652068126521e-06, "loss": 0.6323, "step": 28683 }, { "epoch": 0.8374645995737351, "grad_norm": 0.6436869950677391, "learning_rate": 3.6120032441200324e-06, "loss": 0.5893, "step": 28684 }, { "epoch": 0.8374937958015825, "grad_norm": 0.6176819241031613, "learning_rate": 3.611354420113545e-06, "loss": 0.5448, "step": 28685 }, { "epoch": 0.8375229920294298, "grad_norm": 0.6426915508897859, "learning_rate": 3.6107055961070565e-06, "loss": 0.5914, "step": 28686 }, { "epoch": 0.8375521882572772, "grad_norm": 0.6249946181623288, "learning_rate": 3.610056772100568e-06, "loss": 0.5592, "step": 28687 }, { "epoch": 0.8375813844851245, "grad_norm": 0.6508551801999336, "learning_rate": 3.6094079480940797e-06, "loss": 0.6093, "step": 28688 }, { "epoch": 0.8376105807129719, "grad_norm": 0.6962925209678057, "learning_rate": 3.6087591240875913e-06, "loss": 0.6608, "step": 28689 }, { "epoch": 0.8376397769408193, "grad_norm": 0.7087794102726737, "learning_rate": 3.6081103000811037e-06, "loss": 0.7202, "step": 28690 }, { "epoch": 0.8376689731686666, "grad_norm": 0.6455923958439108, "learning_rate": 3.6074614760746153e-06, "loss": 0.5729, "step": 28691 }, { "epoch": 0.837698169396514, "grad_norm": 0.6013932051909513, "learning_rate": 3.606812652068127e-06, "loss": 0.5482, "step": 28692 }, { "epoch": 0.8377273656243613, "grad_norm": 0.6583866121689218, "learning_rate": 3.6061638280616385e-06, "loss": 0.6096, "step": 28693 }, { "epoch": 0.8377565618522087, "grad_norm": 0.6330192371005299, "learning_rate": 3.60551500405515e-06, "loss": 0.5504, "step": 28694 }, { "epoch": 0.8377857580800561, "grad_norm": 0.6149030645616738, "learning_rate": 3.604866180048662e-06, "loss": 0.5784, "step": 28695 }, { "epoch": 0.8378149543079034, "grad_norm": 0.6186415252291366, "learning_rate": 3.6042173560421738e-06, "loss": 0.5812, "step": 28696 }, { "epoch": 0.8378441505357508, "grad_norm": 0.6622064306189763, "learning_rate": 3.6035685320356854e-06, "loss": 0.6177, "step": 28697 }, { "epoch": 0.8378733467635981, "grad_norm": 0.6307288124704394, "learning_rate": 3.6029197080291974e-06, "loss": 0.5663, "step": 28698 }, { "epoch": 0.8379025429914455, "grad_norm": 0.6996200063927084, "learning_rate": 3.602270884022709e-06, "loss": 0.6572, "step": 28699 }, { "epoch": 0.8379317392192929, "grad_norm": 0.5887875538962425, "learning_rate": 3.601622060016221e-06, "loss": 0.4616, "step": 28700 }, { "epoch": 0.8379609354471402, "grad_norm": 0.6461357595550361, "learning_rate": 3.6009732360097326e-06, "loss": 0.5542, "step": 28701 }, { "epoch": 0.8379901316749876, "grad_norm": 0.6070889749215794, "learning_rate": 3.6003244120032442e-06, "loss": 0.5439, "step": 28702 }, { "epoch": 0.838019327902835, "grad_norm": 0.6290104357211577, "learning_rate": 3.599675587996756e-06, "loss": 0.5951, "step": 28703 }, { "epoch": 0.8380485241306823, "grad_norm": 0.6643981268965164, "learning_rate": 3.5990267639902683e-06, "loss": 0.5954, "step": 28704 }, { "epoch": 0.8380777203585297, "grad_norm": 0.6607363578357599, "learning_rate": 3.59837793998378e-06, "loss": 0.6232, "step": 28705 }, { "epoch": 0.838106916586377, "grad_norm": 0.6503855522185519, "learning_rate": 3.5977291159772915e-06, "loss": 0.6072, "step": 28706 }, { "epoch": 0.8381361128142244, "grad_norm": 0.6285483443347119, "learning_rate": 3.597080291970803e-06, "loss": 0.5721, "step": 28707 }, { "epoch": 0.8381653090420718, "grad_norm": 0.6446132734938079, "learning_rate": 3.5964314679643147e-06, "loss": 0.5391, "step": 28708 }, { "epoch": 0.8381945052699191, "grad_norm": 0.6863999636639735, "learning_rate": 3.5957826439578267e-06, "loss": 0.6497, "step": 28709 }, { "epoch": 0.8382237014977665, "grad_norm": 0.6816978729214908, "learning_rate": 3.5951338199513388e-06, "loss": 0.6301, "step": 28710 }, { "epoch": 0.8382528977256138, "grad_norm": 0.6583643537174795, "learning_rate": 3.5944849959448504e-06, "loss": 0.5915, "step": 28711 }, { "epoch": 0.8382820939534612, "grad_norm": 0.612183733733143, "learning_rate": 3.593836171938362e-06, "loss": 0.5619, "step": 28712 }, { "epoch": 0.8383112901813086, "grad_norm": 0.6657135317494219, "learning_rate": 3.5931873479318736e-06, "loss": 0.6072, "step": 28713 }, { "epoch": 0.8383404864091559, "grad_norm": 0.6665159932593743, "learning_rate": 3.5925385239253856e-06, "loss": 0.5803, "step": 28714 }, { "epoch": 0.8383696826370033, "grad_norm": 0.6854101769360487, "learning_rate": 3.5918896999188972e-06, "loss": 0.5643, "step": 28715 }, { "epoch": 0.8383988788648506, "grad_norm": 0.6248436073448892, "learning_rate": 3.591240875912409e-06, "loss": 0.601, "step": 28716 }, { "epoch": 0.838428075092698, "grad_norm": 0.6708126638982642, "learning_rate": 3.590592051905921e-06, "loss": 0.649, "step": 28717 }, { "epoch": 0.8384572713205454, "grad_norm": 0.6943591770431833, "learning_rate": 3.589943227899433e-06, "loss": 0.6358, "step": 28718 }, { "epoch": 0.8384864675483927, "grad_norm": 0.571402597983456, "learning_rate": 3.5892944038929445e-06, "loss": 0.496, "step": 28719 }, { "epoch": 0.8385156637762401, "grad_norm": 0.6349954705269606, "learning_rate": 3.588645579886456e-06, "loss": 0.5632, "step": 28720 }, { "epoch": 0.8385448600040875, "grad_norm": 0.6556303160387355, "learning_rate": 3.5879967558799677e-06, "loss": 0.5913, "step": 28721 }, { "epoch": 0.8385740562319348, "grad_norm": 0.6833556997771254, "learning_rate": 3.5873479318734793e-06, "loss": 0.6269, "step": 28722 }, { "epoch": 0.8386032524597822, "grad_norm": 0.647357264040384, "learning_rate": 3.5866991078669917e-06, "loss": 0.6256, "step": 28723 }, { "epoch": 0.8386324486876295, "grad_norm": 0.6492061883423522, "learning_rate": 3.5860502838605033e-06, "loss": 0.5685, "step": 28724 }, { "epoch": 0.8386616449154769, "grad_norm": 0.6497283670866123, "learning_rate": 3.585401459854015e-06, "loss": 0.6334, "step": 28725 }, { "epoch": 0.8386908411433243, "grad_norm": 0.685875439273393, "learning_rate": 3.5847526358475265e-06, "loss": 0.6245, "step": 28726 }, { "epoch": 0.8387200373711716, "grad_norm": 0.626489281781037, "learning_rate": 3.584103811841038e-06, "loss": 0.5665, "step": 28727 }, { "epoch": 0.838749233599019, "grad_norm": 0.6740400673799811, "learning_rate": 3.58345498783455e-06, "loss": 0.6523, "step": 28728 }, { "epoch": 0.8387784298268663, "grad_norm": 0.673281146396395, "learning_rate": 3.582806163828062e-06, "loss": 0.668, "step": 28729 }, { "epoch": 0.8388076260547137, "grad_norm": 0.6301406470402708, "learning_rate": 3.582157339821574e-06, "loss": 0.5467, "step": 28730 }, { "epoch": 0.8388368222825611, "grad_norm": 0.5991372246719484, "learning_rate": 3.5815085158150854e-06, "loss": 0.5257, "step": 28731 }, { "epoch": 0.8388660185104084, "grad_norm": 0.6309816783674754, "learning_rate": 3.580859691808597e-06, "loss": 0.5845, "step": 28732 }, { "epoch": 0.8388952147382558, "grad_norm": 0.63887993372065, "learning_rate": 3.580210867802109e-06, "loss": 0.5764, "step": 28733 }, { "epoch": 0.8389244109661032, "grad_norm": 0.6536079204441482, "learning_rate": 3.5795620437956206e-06, "loss": 0.6255, "step": 28734 }, { "epoch": 0.8389536071939505, "grad_norm": 0.653648230633157, "learning_rate": 3.5789132197891323e-06, "loss": 0.6225, "step": 28735 }, { "epoch": 0.8389828034217979, "grad_norm": 0.6512352683686304, "learning_rate": 3.578264395782644e-06, "loss": 0.6086, "step": 28736 }, { "epoch": 0.8390119996496452, "grad_norm": 0.6255834061905369, "learning_rate": 3.5776155717761563e-06, "loss": 0.567, "step": 28737 }, { "epoch": 0.8390411958774926, "grad_norm": 0.6382115392825208, "learning_rate": 3.576966747769668e-06, "loss": 0.617, "step": 28738 }, { "epoch": 0.83907039210534, "grad_norm": 0.6382735548330949, "learning_rate": 3.5763179237631795e-06, "loss": 0.5792, "step": 28739 }, { "epoch": 0.8390995883331873, "grad_norm": 0.6542377906289164, "learning_rate": 3.575669099756691e-06, "loss": 0.6397, "step": 28740 }, { "epoch": 0.8391287845610347, "grad_norm": 0.6291310885989572, "learning_rate": 3.5750202757502027e-06, "loss": 0.5404, "step": 28741 }, { "epoch": 0.839157980788882, "grad_norm": 0.6367146726676587, "learning_rate": 3.574371451743715e-06, "loss": 0.5585, "step": 28742 }, { "epoch": 0.8391871770167294, "grad_norm": 0.6576729041657472, "learning_rate": 3.5737226277372268e-06, "loss": 0.6287, "step": 28743 }, { "epoch": 0.8392163732445768, "grad_norm": 0.6465422327487087, "learning_rate": 3.5730738037307384e-06, "loss": 0.5902, "step": 28744 }, { "epoch": 0.8392455694724241, "grad_norm": 0.6446258829528295, "learning_rate": 3.57242497972425e-06, "loss": 0.6251, "step": 28745 }, { "epoch": 0.8392747657002715, "grad_norm": 0.6515162730978915, "learning_rate": 3.5717761557177616e-06, "loss": 0.5642, "step": 28746 }, { "epoch": 0.8393039619281188, "grad_norm": 0.669594198402884, "learning_rate": 3.5711273317112736e-06, "loss": 0.6464, "step": 28747 }, { "epoch": 0.8393331581559662, "grad_norm": 0.625979819796863, "learning_rate": 3.5704785077047856e-06, "loss": 0.5555, "step": 28748 }, { "epoch": 0.8393623543838136, "grad_norm": 0.6037503108246057, "learning_rate": 3.5698296836982972e-06, "loss": 0.5618, "step": 28749 }, { "epoch": 0.8393915506116609, "grad_norm": 0.6772289560130071, "learning_rate": 3.569180859691809e-06, "loss": 0.627, "step": 28750 }, { "epoch": 0.8394207468395083, "grad_norm": 0.6255391217963482, "learning_rate": 3.568532035685321e-06, "loss": 0.5716, "step": 28751 }, { "epoch": 0.8394499430673557, "grad_norm": 0.6398815768262545, "learning_rate": 3.5678832116788325e-06, "loss": 0.5921, "step": 28752 }, { "epoch": 0.839479139295203, "grad_norm": 0.7729108642733722, "learning_rate": 3.567234387672344e-06, "loss": 0.625, "step": 28753 }, { "epoch": 0.8395083355230504, "grad_norm": 0.6251976541254272, "learning_rate": 3.5665855636658557e-06, "loss": 0.5891, "step": 28754 }, { "epoch": 0.8395375317508977, "grad_norm": 0.6847075949183117, "learning_rate": 3.5659367396593673e-06, "loss": 0.6183, "step": 28755 }, { "epoch": 0.8395667279787451, "grad_norm": 0.6535598557903742, "learning_rate": 3.5652879156528797e-06, "loss": 0.61, "step": 28756 }, { "epoch": 0.8395959242065925, "grad_norm": 0.6594556480280447, "learning_rate": 3.5646390916463913e-06, "loss": 0.6064, "step": 28757 }, { "epoch": 0.8396251204344398, "grad_norm": 0.681356184951034, "learning_rate": 3.563990267639903e-06, "loss": 0.6587, "step": 28758 }, { "epoch": 0.8396543166622872, "grad_norm": 0.6811832515049928, "learning_rate": 3.5633414436334146e-06, "loss": 0.6083, "step": 28759 }, { "epoch": 0.8396835128901345, "grad_norm": 0.619991334445994, "learning_rate": 3.562692619626926e-06, "loss": 0.6086, "step": 28760 }, { "epoch": 0.8397127091179819, "grad_norm": 0.6597201102851996, "learning_rate": 3.5620437956204386e-06, "loss": 0.6066, "step": 28761 }, { "epoch": 0.8397419053458293, "grad_norm": 0.6496573255478224, "learning_rate": 3.56139497161395e-06, "loss": 0.6333, "step": 28762 }, { "epoch": 0.8397711015736766, "grad_norm": 0.6395655768759003, "learning_rate": 3.560746147607462e-06, "loss": 0.6017, "step": 28763 }, { "epoch": 0.8398002978015241, "grad_norm": 0.6512531858821053, "learning_rate": 3.5600973236009734e-06, "loss": 0.6049, "step": 28764 }, { "epoch": 0.8398294940293715, "grad_norm": 0.6825728878905195, "learning_rate": 3.559448499594485e-06, "loss": 0.6631, "step": 28765 }, { "epoch": 0.8398586902572188, "grad_norm": 0.611474455087325, "learning_rate": 3.558799675587997e-06, "loss": 0.5294, "step": 28766 }, { "epoch": 0.8398878864850662, "grad_norm": 0.6542355707059979, "learning_rate": 3.5581508515815087e-06, "loss": 0.5942, "step": 28767 }, { "epoch": 0.8399170827129135, "grad_norm": 0.650128177815931, "learning_rate": 3.5575020275750207e-06, "loss": 0.6302, "step": 28768 }, { "epoch": 0.8399462789407609, "grad_norm": 0.7144094030949697, "learning_rate": 3.5568532035685323e-06, "loss": 0.6904, "step": 28769 }, { "epoch": 0.8399754751686083, "grad_norm": 0.6462399408285644, "learning_rate": 3.5562043795620443e-06, "loss": 0.5506, "step": 28770 }, { "epoch": 0.8400046713964556, "grad_norm": 0.6786382922355602, "learning_rate": 3.555555555555556e-06, "loss": 0.6297, "step": 28771 }, { "epoch": 0.840033867624303, "grad_norm": 0.6735371251098423, "learning_rate": 3.5549067315490675e-06, "loss": 0.6541, "step": 28772 }, { "epoch": 0.8400630638521503, "grad_norm": 0.6127325102591638, "learning_rate": 3.554257907542579e-06, "loss": 0.502, "step": 28773 }, { "epoch": 0.8400922600799977, "grad_norm": 0.6444001285903088, "learning_rate": 3.5536090835360907e-06, "loss": 0.6062, "step": 28774 }, { "epoch": 0.8401214563078451, "grad_norm": 0.6841719778312522, "learning_rate": 3.552960259529603e-06, "loss": 0.5667, "step": 28775 }, { "epoch": 0.8401506525356924, "grad_norm": 0.6893226425459356, "learning_rate": 3.5523114355231148e-06, "loss": 0.7027, "step": 28776 }, { "epoch": 0.8401798487635398, "grad_norm": 0.6717688182810612, "learning_rate": 3.5516626115166264e-06, "loss": 0.5803, "step": 28777 }, { "epoch": 0.8402090449913872, "grad_norm": 0.6386391938898683, "learning_rate": 3.551013787510138e-06, "loss": 0.61, "step": 28778 }, { "epoch": 0.8402382412192345, "grad_norm": 0.66748897986048, "learning_rate": 3.5503649635036496e-06, "loss": 0.6214, "step": 28779 }, { "epoch": 0.8402674374470819, "grad_norm": 0.7275596009621128, "learning_rate": 3.549716139497162e-06, "loss": 0.7729, "step": 28780 }, { "epoch": 0.8402966336749292, "grad_norm": 0.6462373121522311, "learning_rate": 3.5490673154906736e-06, "loss": 0.5629, "step": 28781 }, { "epoch": 0.8403258299027766, "grad_norm": 0.6156788407804523, "learning_rate": 3.5484184914841852e-06, "loss": 0.5489, "step": 28782 }, { "epoch": 0.840355026130624, "grad_norm": 0.6491547264852104, "learning_rate": 3.547769667477697e-06, "loss": 0.5941, "step": 28783 }, { "epoch": 0.8403842223584713, "grad_norm": 0.6236599165190112, "learning_rate": 3.547120843471209e-06, "loss": 0.5798, "step": 28784 }, { "epoch": 0.8404134185863187, "grad_norm": 0.6062179020729541, "learning_rate": 3.5464720194647205e-06, "loss": 0.5187, "step": 28785 }, { "epoch": 0.840442614814166, "grad_norm": 0.7498901242493381, "learning_rate": 3.545823195458232e-06, "loss": 0.5478, "step": 28786 }, { "epoch": 0.8404718110420134, "grad_norm": 0.614127858776324, "learning_rate": 3.545174371451744e-06, "loss": 0.571, "step": 28787 }, { "epoch": 0.8405010072698608, "grad_norm": 0.6212220944799961, "learning_rate": 3.5445255474452557e-06, "loss": 0.5638, "step": 28788 }, { "epoch": 0.8405302034977081, "grad_norm": 0.6548560298577131, "learning_rate": 3.5438767234387677e-06, "loss": 0.6298, "step": 28789 }, { "epoch": 0.8405593997255555, "grad_norm": 0.6342917171482045, "learning_rate": 3.5432278994322793e-06, "loss": 0.6036, "step": 28790 }, { "epoch": 0.8405885959534029, "grad_norm": 0.6326764291378971, "learning_rate": 3.542579075425791e-06, "loss": 0.605, "step": 28791 }, { "epoch": 0.8406177921812502, "grad_norm": 0.7188156247996486, "learning_rate": 3.5419302514193026e-06, "loss": 0.6143, "step": 28792 }, { "epoch": 0.8406469884090976, "grad_norm": 0.6427370728760932, "learning_rate": 3.541281427412814e-06, "loss": 0.5801, "step": 28793 }, { "epoch": 0.8406761846369449, "grad_norm": 0.7376538010756979, "learning_rate": 3.5406326034063266e-06, "loss": 0.7108, "step": 28794 }, { "epoch": 0.8407053808647923, "grad_norm": 0.6394971295718521, "learning_rate": 3.5399837793998382e-06, "loss": 0.6253, "step": 28795 }, { "epoch": 0.8407345770926397, "grad_norm": 0.6453100399315771, "learning_rate": 3.53933495539335e-06, "loss": 0.6022, "step": 28796 }, { "epoch": 0.840763773320487, "grad_norm": 0.6094004693256903, "learning_rate": 3.5386861313868614e-06, "loss": 0.543, "step": 28797 }, { "epoch": 0.8407929695483344, "grad_norm": 0.6619361168456072, "learning_rate": 3.538037307380373e-06, "loss": 0.6127, "step": 28798 }, { "epoch": 0.8408221657761817, "grad_norm": 0.6322080762621957, "learning_rate": 3.5373884833738855e-06, "loss": 0.5519, "step": 28799 }, { "epoch": 0.8408513620040291, "grad_norm": 0.7164252152798087, "learning_rate": 3.536739659367397e-06, "loss": 0.7005, "step": 28800 }, { "epoch": 0.8408805582318765, "grad_norm": 0.6109880590133465, "learning_rate": 3.5360908353609087e-06, "loss": 0.573, "step": 28801 }, { "epoch": 0.8409097544597238, "grad_norm": 0.6316813558887372, "learning_rate": 3.5354420113544203e-06, "loss": 0.5866, "step": 28802 }, { "epoch": 0.8409389506875712, "grad_norm": 0.6433226074213446, "learning_rate": 3.5347931873479323e-06, "loss": 0.5929, "step": 28803 }, { "epoch": 0.8409681469154185, "grad_norm": 0.5919062004880128, "learning_rate": 3.534144363341444e-06, "loss": 0.5045, "step": 28804 }, { "epoch": 0.8409973431432659, "grad_norm": 0.6496779185353909, "learning_rate": 3.5334955393349555e-06, "loss": 0.6263, "step": 28805 }, { "epoch": 0.8410265393711133, "grad_norm": 0.6478466044773121, "learning_rate": 3.5328467153284675e-06, "loss": 0.5813, "step": 28806 }, { "epoch": 0.8410557355989606, "grad_norm": 0.6426604041554607, "learning_rate": 3.532197891321979e-06, "loss": 0.5971, "step": 28807 }, { "epoch": 0.841084931826808, "grad_norm": 0.6024337439964335, "learning_rate": 3.531549067315491e-06, "loss": 0.5286, "step": 28808 }, { "epoch": 0.8411141280546554, "grad_norm": 0.6825461734426358, "learning_rate": 3.5309002433090028e-06, "loss": 0.6261, "step": 28809 }, { "epoch": 0.8411433242825027, "grad_norm": 0.5965512465621281, "learning_rate": 3.5302514193025144e-06, "loss": 0.5229, "step": 28810 }, { "epoch": 0.8411725205103501, "grad_norm": 0.6227035630432773, "learning_rate": 3.529602595296026e-06, "loss": 0.5749, "step": 28811 }, { "epoch": 0.8412017167381974, "grad_norm": 0.640824196727923, "learning_rate": 3.5289537712895376e-06, "loss": 0.6248, "step": 28812 }, { "epoch": 0.8412309129660448, "grad_norm": 0.6384517880008066, "learning_rate": 3.52830494728305e-06, "loss": 0.5936, "step": 28813 }, { "epoch": 0.8412601091938922, "grad_norm": 0.6716629546304128, "learning_rate": 3.5276561232765616e-06, "loss": 0.6533, "step": 28814 }, { "epoch": 0.8412893054217395, "grad_norm": 0.6381199694512597, "learning_rate": 3.5270072992700733e-06, "loss": 0.5724, "step": 28815 }, { "epoch": 0.8413185016495869, "grad_norm": 0.6361547451976352, "learning_rate": 3.526358475263585e-06, "loss": 0.6093, "step": 28816 }, { "epoch": 0.8413476978774342, "grad_norm": 0.6596528817277092, "learning_rate": 3.525709651257097e-06, "loss": 0.5813, "step": 28817 }, { "epoch": 0.8413768941052816, "grad_norm": 0.6436175868463919, "learning_rate": 3.525060827250609e-06, "loss": 0.5963, "step": 28818 }, { "epoch": 0.841406090333129, "grad_norm": 0.6148159396814136, "learning_rate": 3.5244120032441205e-06, "loss": 0.5282, "step": 28819 }, { "epoch": 0.8414352865609763, "grad_norm": 0.6779760487803475, "learning_rate": 3.523763179237632e-06, "loss": 0.6546, "step": 28820 }, { "epoch": 0.8414644827888237, "grad_norm": 0.6616608397932783, "learning_rate": 3.5231143552311437e-06, "loss": 0.5905, "step": 28821 }, { "epoch": 0.841493679016671, "grad_norm": 0.6102527328364268, "learning_rate": 3.5224655312246557e-06, "loss": 0.5293, "step": 28822 }, { "epoch": 0.8415228752445184, "grad_norm": 0.6129280908714773, "learning_rate": 3.5218167072181674e-06, "loss": 0.5244, "step": 28823 }, { "epoch": 0.8415520714723658, "grad_norm": 0.6327953247699506, "learning_rate": 3.521167883211679e-06, "loss": 0.5828, "step": 28824 }, { "epoch": 0.8415812677002131, "grad_norm": 0.6726027599949298, "learning_rate": 3.520519059205191e-06, "loss": 0.6258, "step": 28825 }, { "epoch": 0.8416104639280605, "grad_norm": 0.6095687558864153, "learning_rate": 3.5198702351987026e-06, "loss": 0.5758, "step": 28826 }, { "epoch": 0.8416396601559079, "grad_norm": 0.6357172678007876, "learning_rate": 3.5192214111922146e-06, "loss": 0.5698, "step": 28827 }, { "epoch": 0.8416688563837552, "grad_norm": 0.6438716993385487, "learning_rate": 3.5185725871857262e-06, "loss": 0.575, "step": 28828 }, { "epoch": 0.8416980526116026, "grad_norm": 0.6547029905426923, "learning_rate": 3.517923763179238e-06, "loss": 0.6651, "step": 28829 }, { "epoch": 0.8417272488394499, "grad_norm": 0.6219509647659347, "learning_rate": 3.5172749391727494e-06, "loss": 0.6015, "step": 28830 }, { "epoch": 0.8417564450672973, "grad_norm": 0.6760659223832018, "learning_rate": 3.516626115166261e-06, "loss": 0.5913, "step": 28831 }, { "epoch": 0.8417856412951447, "grad_norm": 0.6414933700136819, "learning_rate": 3.5159772911597735e-06, "loss": 0.5557, "step": 28832 }, { "epoch": 0.841814837522992, "grad_norm": 0.6971704439915971, "learning_rate": 3.515328467153285e-06, "loss": 0.6834, "step": 28833 }, { "epoch": 0.8418440337508394, "grad_norm": 0.7168877898212883, "learning_rate": 3.5146796431467967e-06, "loss": 0.6814, "step": 28834 }, { "epoch": 0.8418732299786867, "grad_norm": 0.6226753656528025, "learning_rate": 3.5140308191403083e-06, "loss": 0.583, "step": 28835 }, { "epoch": 0.8419024262065341, "grad_norm": 0.6243498526585993, "learning_rate": 3.5133819951338203e-06, "loss": 0.584, "step": 28836 }, { "epoch": 0.8419316224343815, "grad_norm": 0.6754807741595427, "learning_rate": 3.5127331711273323e-06, "loss": 0.7105, "step": 28837 }, { "epoch": 0.8419608186622288, "grad_norm": 0.6288801344946119, "learning_rate": 3.512084347120844e-06, "loss": 0.5932, "step": 28838 }, { "epoch": 0.8419900148900762, "grad_norm": 0.6222161413028923, "learning_rate": 3.5114355231143556e-06, "loss": 0.5797, "step": 28839 }, { "epoch": 0.8420192111179235, "grad_norm": 0.6754468421867853, "learning_rate": 3.510786699107867e-06, "loss": 0.6294, "step": 28840 }, { "epoch": 0.8420484073457709, "grad_norm": 0.6458368973832479, "learning_rate": 3.510137875101379e-06, "loss": 0.6132, "step": 28841 }, { "epoch": 0.8420776035736183, "grad_norm": 0.6336978898124477, "learning_rate": 3.5094890510948908e-06, "loss": 0.5511, "step": 28842 }, { "epoch": 0.8421067998014656, "grad_norm": 0.6484592936391192, "learning_rate": 3.5088402270884024e-06, "loss": 0.5373, "step": 28843 }, { "epoch": 0.842135996029313, "grad_norm": 0.6288921364951175, "learning_rate": 3.508191403081914e-06, "loss": 0.5681, "step": 28844 }, { "epoch": 0.8421651922571604, "grad_norm": 0.671955505942093, "learning_rate": 3.507542579075426e-06, "loss": 0.6625, "step": 28845 }, { "epoch": 0.8421943884850077, "grad_norm": 0.7114240532397016, "learning_rate": 3.506893755068938e-06, "loss": 0.642, "step": 28846 }, { "epoch": 0.8422235847128551, "grad_norm": 0.6622845659139952, "learning_rate": 3.5062449310624497e-06, "loss": 0.6182, "step": 28847 }, { "epoch": 0.8422527809407024, "grad_norm": 0.5977601630740581, "learning_rate": 3.5055961070559613e-06, "loss": 0.5639, "step": 28848 }, { "epoch": 0.8422819771685498, "grad_norm": 0.6455740499504697, "learning_rate": 3.504947283049473e-06, "loss": 0.5793, "step": 28849 }, { "epoch": 0.8423111733963972, "grad_norm": 0.6209073730867479, "learning_rate": 3.5042984590429845e-06, "loss": 0.5688, "step": 28850 }, { "epoch": 0.8423403696242445, "grad_norm": 0.6847883099859706, "learning_rate": 3.503649635036497e-06, "loss": 0.7109, "step": 28851 }, { "epoch": 0.8423695658520919, "grad_norm": 0.6650666937593953, "learning_rate": 3.5030008110300085e-06, "loss": 0.6148, "step": 28852 }, { "epoch": 0.8423987620799392, "grad_norm": 0.6458769376223895, "learning_rate": 3.50235198702352e-06, "loss": 0.6148, "step": 28853 }, { "epoch": 0.8424279583077866, "grad_norm": 0.5963441239184686, "learning_rate": 3.5017031630170317e-06, "loss": 0.514, "step": 28854 }, { "epoch": 0.842457154535634, "grad_norm": 0.6649367411063025, "learning_rate": 3.5010543390105438e-06, "loss": 0.6372, "step": 28855 }, { "epoch": 0.8424863507634813, "grad_norm": 0.642522768395739, "learning_rate": 3.5004055150040554e-06, "loss": 0.6067, "step": 28856 }, { "epoch": 0.8425155469913287, "grad_norm": 0.6769876975941306, "learning_rate": 3.4997566909975674e-06, "loss": 0.6677, "step": 28857 }, { "epoch": 0.842544743219176, "grad_norm": 0.6069132010313801, "learning_rate": 3.499107866991079e-06, "loss": 0.5477, "step": 28858 }, { "epoch": 0.8425739394470234, "grad_norm": 0.6969286341169526, "learning_rate": 3.4984590429845906e-06, "loss": 0.6437, "step": 28859 }, { "epoch": 0.8426031356748708, "grad_norm": 0.5896402331157806, "learning_rate": 3.4978102189781026e-06, "loss": 0.5033, "step": 28860 }, { "epoch": 0.8426323319027181, "grad_norm": 0.6457523820124851, "learning_rate": 3.4971613949716142e-06, "loss": 0.5623, "step": 28861 }, { "epoch": 0.8426615281305655, "grad_norm": 0.6250470842462933, "learning_rate": 3.496512570965126e-06, "loss": 0.6091, "step": 28862 }, { "epoch": 0.8426907243584129, "grad_norm": 0.6633489043130456, "learning_rate": 3.4958637469586374e-06, "loss": 0.5887, "step": 28863 }, { "epoch": 0.8427199205862602, "grad_norm": 0.7029618195854233, "learning_rate": 3.4952149229521495e-06, "loss": 0.6616, "step": 28864 }, { "epoch": 0.8427491168141076, "grad_norm": 0.761381508529714, "learning_rate": 3.4945660989456615e-06, "loss": 0.5855, "step": 28865 }, { "epoch": 0.8427783130419549, "grad_norm": 0.634186310780242, "learning_rate": 3.493917274939173e-06, "loss": 0.5613, "step": 28866 }, { "epoch": 0.8428075092698023, "grad_norm": 0.6642328570693077, "learning_rate": 3.4932684509326847e-06, "loss": 0.6274, "step": 28867 }, { "epoch": 0.8428367054976497, "grad_norm": 0.7043865551383838, "learning_rate": 3.4926196269261963e-06, "loss": 0.683, "step": 28868 }, { "epoch": 0.842865901725497, "grad_norm": 0.596219161447685, "learning_rate": 3.4919708029197087e-06, "loss": 0.5231, "step": 28869 }, { "epoch": 0.8428950979533444, "grad_norm": 0.6286582157422853, "learning_rate": 3.4913219789132203e-06, "loss": 0.5788, "step": 28870 }, { "epoch": 0.8429242941811917, "grad_norm": 0.6442560353457701, "learning_rate": 3.490673154906732e-06, "loss": 0.5932, "step": 28871 }, { "epoch": 0.8429534904090391, "grad_norm": 0.6187226141243325, "learning_rate": 3.4900243309002436e-06, "loss": 0.546, "step": 28872 }, { "epoch": 0.8429826866368865, "grad_norm": 0.6288782996288206, "learning_rate": 3.489375506893755e-06, "loss": 0.573, "step": 28873 }, { "epoch": 0.8430118828647338, "grad_norm": 0.6464743721430245, "learning_rate": 3.488726682887267e-06, "loss": 0.5624, "step": 28874 }, { "epoch": 0.8430410790925812, "grad_norm": 0.6382442975317956, "learning_rate": 3.488077858880779e-06, "loss": 0.5557, "step": 28875 }, { "epoch": 0.8430702753204286, "grad_norm": 0.6461828849282815, "learning_rate": 3.487429034874291e-06, "loss": 0.5782, "step": 28876 }, { "epoch": 0.8430994715482759, "grad_norm": 0.624464447923111, "learning_rate": 3.4867802108678024e-06, "loss": 0.5433, "step": 28877 }, { "epoch": 0.8431286677761233, "grad_norm": 0.5885322631178187, "learning_rate": 3.486131386861314e-06, "loss": 0.5102, "step": 28878 }, { "epoch": 0.8431578640039706, "grad_norm": 0.6480682101216987, "learning_rate": 3.485482562854826e-06, "loss": 0.6029, "step": 28879 }, { "epoch": 0.843187060231818, "grad_norm": 0.6568594889854736, "learning_rate": 3.4848337388483377e-06, "loss": 0.634, "step": 28880 }, { "epoch": 0.8432162564596654, "grad_norm": 0.6234994518831605, "learning_rate": 3.4841849148418493e-06, "loss": 0.5586, "step": 28881 }, { "epoch": 0.8432454526875127, "grad_norm": 0.6688321923475212, "learning_rate": 3.483536090835361e-06, "loss": 0.5868, "step": 28882 }, { "epoch": 0.8432746489153601, "grad_norm": 0.6618642936658076, "learning_rate": 3.482887266828873e-06, "loss": 0.6147, "step": 28883 }, { "epoch": 0.8433038451432074, "grad_norm": 0.6523999172794273, "learning_rate": 3.482238442822385e-06, "loss": 0.6376, "step": 28884 }, { "epoch": 0.8433330413710549, "grad_norm": 0.6485714832238205, "learning_rate": 3.4815896188158965e-06, "loss": 0.5877, "step": 28885 }, { "epoch": 0.8433622375989023, "grad_norm": 0.6490646512388095, "learning_rate": 3.480940794809408e-06, "loss": 0.587, "step": 28886 }, { "epoch": 0.8433914338267496, "grad_norm": 0.6328781376038605, "learning_rate": 3.4802919708029197e-06, "loss": 0.6089, "step": 28887 }, { "epoch": 0.843420630054597, "grad_norm": 0.6846861061518281, "learning_rate": 3.479643146796432e-06, "loss": 0.6415, "step": 28888 }, { "epoch": 0.8434498262824444, "grad_norm": 0.6583237818501267, "learning_rate": 3.4789943227899438e-06, "loss": 0.6425, "step": 28889 }, { "epoch": 0.8434790225102917, "grad_norm": 0.6481673712304322, "learning_rate": 3.4783454987834554e-06, "loss": 0.6142, "step": 28890 }, { "epoch": 0.8435082187381391, "grad_norm": 0.6123829085106831, "learning_rate": 3.477696674776967e-06, "loss": 0.5557, "step": 28891 }, { "epoch": 0.8435374149659864, "grad_norm": 0.6298199781682498, "learning_rate": 3.4770478507704786e-06, "loss": 0.6199, "step": 28892 }, { "epoch": 0.8435666111938338, "grad_norm": 0.6631456355467357, "learning_rate": 3.4763990267639906e-06, "loss": 0.6037, "step": 28893 }, { "epoch": 0.8435958074216812, "grad_norm": 0.6386567677880336, "learning_rate": 3.4757502027575022e-06, "loss": 0.5851, "step": 28894 }, { "epoch": 0.8436250036495285, "grad_norm": 0.6718106959256034, "learning_rate": 3.4751013787510143e-06, "loss": 0.6286, "step": 28895 }, { "epoch": 0.8436541998773759, "grad_norm": 0.6375422525815401, "learning_rate": 3.474452554744526e-06, "loss": 0.6255, "step": 28896 }, { "epoch": 0.8436833961052232, "grad_norm": 0.6288209197935442, "learning_rate": 3.4738037307380375e-06, "loss": 0.5683, "step": 28897 }, { "epoch": 0.8437125923330706, "grad_norm": 0.6198586184822419, "learning_rate": 3.4731549067315495e-06, "loss": 0.558, "step": 28898 }, { "epoch": 0.843741788560918, "grad_norm": 0.6176341642129419, "learning_rate": 3.472506082725061e-06, "loss": 0.5377, "step": 28899 }, { "epoch": 0.8437709847887653, "grad_norm": 0.6688088202788117, "learning_rate": 3.4718572587185727e-06, "loss": 0.6709, "step": 28900 }, { "epoch": 0.8438001810166127, "grad_norm": 0.6300326420612332, "learning_rate": 3.4712084347120843e-06, "loss": 0.6086, "step": 28901 }, { "epoch": 0.84382937724446, "grad_norm": 0.5995810254857598, "learning_rate": 3.4705596107055967e-06, "loss": 0.5344, "step": 28902 }, { "epoch": 0.8438585734723074, "grad_norm": 0.6492037218435956, "learning_rate": 3.4699107866991084e-06, "loss": 0.5689, "step": 28903 }, { "epoch": 0.8438877697001548, "grad_norm": 0.6692037450188059, "learning_rate": 3.46926196269262e-06, "loss": 0.6382, "step": 28904 }, { "epoch": 0.8439169659280021, "grad_norm": 0.6063597239194962, "learning_rate": 3.4686131386861316e-06, "loss": 0.5402, "step": 28905 }, { "epoch": 0.8439461621558495, "grad_norm": 0.6273678846604068, "learning_rate": 3.467964314679643e-06, "loss": 0.5601, "step": 28906 }, { "epoch": 0.8439753583836969, "grad_norm": 0.6391377397328905, "learning_rate": 3.4673154906731556e-06, "loss": 0.5534, "step": 28907 }, { "epoch": 0.8440045546115442, "grad_norm": 0.6177557183703027, "learning_rate": 3.4666666666666672e-06, "loss": 0.5692, "step": 28908 }, { "epoch": 0.8440337508393916, "grad_norm": 0.6944685327551927, "learning_rate": 3.466017842660179e-06, "loss": 0.67, "step": 28909 }, { "epoch": 0.844062947067239, "grad_norm": 0.6142742758112129, "learning_rate": 3.4653690186536904e-06, "loss": 0.5736, "step": 28910 }, { "epoch": 0.8440921432950863, "grad_norm": 0.6210665694215438, "learning_rate": 3.464720194647202e-06, "loss": 0.5452, "step": 28911 }, { "epoch": 0.8441213395229337, "grad_norm": 0.6839453803223954, "learning_rate": 3.464071370640714e-06, "loss": 0.6445, "step": 28912 }, { "epoch": 0.844150535750781, "grad_norm": 0.6793618266668525, "learning_rate": 3.4634225466342257e-06, "loss": 0.6364, "step": 28913 }, { "epoch": 0.8441797319786284, "grad_norm": 0.6279171288688664, "learning_rate": 3.4627737226277377e-06, "loss": 0.5851, "step": 28914 }, { "epoch": 0.8442089282064758, "grad_norm": 0.6632780563056737, "learning_rate": 3.4621248986212493e-06, "loss": 0.6144, "step": 28915 }, { "epoch": 0.8442381244343231, "grad_norm": 0.6610458438578865, "learning_rate": 3.461476074614761e-06, "loss": 0.6012, "step": 28916 }, { "epoch": 0.8442673206621705, "grad_norm": 0.6544503366170908, "learning_rate": 3.460827250608273e-06, "loss": 0.5927, "step": 28917 }, { "epoch": 0.8442965168900178, "grad_norm": 0.6481694356147328, "learning_rate": 3.4601784266017845e-06, "loss": 0.5978, "step": 28918 }, { "epoch": 0.8443257131178652, "grad_norm": 0.6691506378409731, "learning_rate": 3.459529602595296e-06, "loss": 0.6126, "step": 28919 }, { "epoch": 0.8443549093457126, "grad_norm": 0.638624027048858, "learning_rate": 3.4588807785888077e-06, "loss": 0.6251, "step": 28920 }, { "epoch": 0.8443841055735599, "grad_norm": 0.6724811298468232, "learning_rate": 3.45823195458232e-06, "loss": 0.6507, "step": 28921 }, { "epoch": 0.8444133018014073, "grad_norm": 0.6842316000062947, "learning_rate": 3.4575831305758318e-06, "loss": 0.681, "step": 28922 }, { "epoch": 0.8444424980292546, "grad_norm": 0.6318409676433738, "learning_rate": 3.4569343065693434e-06, "loss": 0.5639, "step": 28923 }, { "epoch": 0.844471694257102, "grad_norm": 0.6625442480459061, "learning_rate": 3.456285482562855e-06, "loss": 0.6772, "step": 28924 }, { "epoch": 0.8445008904849494, "grad_norm": 0.6217478886410885, "learning_rate": 3.4556366585563666e-06, "loss": 0.57, "step": 28925 }, { "epoch": 0.8445300867127967, "grad_norm": 0.643741011502757, "learning_rate": 3.454987834549879e-06, "loss": 0.5767, "step": 28926 }, { "epoch": 0.8445592829406441, "grad_norm": 0.6311436656477997, "learning_rate": 3.4543390105433907e-06, "loss": 0.5879, "step": 28927 }, { "epoch": 0.8445884791684914, "grad_norm": 0.6544335365562227, "learning_rate": 3.4536901865369023e-06, "loss": 0.6063, "step": 28928 }, { "epoch": 0.8446176753963388, "grad_norm": 0.6351833663828269, "learning_rate": 3.453041362530414e-06, "loss": 0.5853, "step": 28929 }, { "epoch": 0.8446468716241862, "grad_norm": 0.6366456675104997, "learning_rate": 3.4523925385239255e-06, "loss": 0.5767, "step": 28930 }, { "epoch": 0.8446760678520335, "grad_norm": 0.6379664364294108, "learning_rate": 3.4517437145174375e-06, "loss": 0.5672, "step": 28931 }, { "epoch": 0.8447052640798809, "grad_norm": 0.655701837748721, "learning_rate": 3.451094890510949e-06, "loss": 0.6435, "step": 28932 }, { "epoch": 0.8447344603077283, "grad_norm": 0.6430671163238963, "learning_rate": 3.4504460665044607e-06, "loss": 0.567, "step": 28933 }, { "epoch": 0.8447636565355756, "grad_norm": 0.6557903226956049, "learning_rate": 3.4497972424979727e-06, "loss": 0.6548, "step": 28934 }, { "epoch": 0.844792852763423, "grad_norm": 0.6568323835227101, "learning_rate": 3.4491484184914848e-06, "loss": 0.57, "step": 28935 }, { "epoch": 0.8448220489912703, "grad_norm": 0.6583312597264057, "learning_rate": 3.4484995944849964e-06, "loss": 0.6331, "step": 28936 }, { "epoch": 0.8448512452191177, "grad_norm": 0.6487226673618813, "learning_rate": 3.447850770478508e-06, "loss": 0.6454, "step": 28937 }, { "epoch": 0.8448804414469651, "grad_norm": 0.6567209616190867, "learning_rate": 3.4472019464720196e-06, "loss": 0.5879, "step": 28938 }, { "epoch": 0.8449096376748124, "grad_norm": 0.650251387996147, "learning_rate": 3.446553122465531e-06, "loss": 0.5741, "step": 28939 }, { "epoch": 0.8449388339026598, "grad_norm": 0.6458942649761059, "learning_rate": 3.4459042984590436e-06, "loss": 0.594, "step": 28940 }, { "epoch": 0.8449680301305071, "grad_norm": 0.6407025773736891, "learning_rate": 3.4452554744525552e-06, "loss": 0.5602, "step": 28941 }, { "epoch": 0.8449972263583545, "grad_norm": 0.6477472689382779, "learning_rate": 3.444606650446067e-06, "loss": 0.6156, "step": 28942 }, { "epoch": 0.8450264225862019, "grad_norm": 0.666071867925386, "learning_rate": 3.4439578264395784e-06, "loss": 0.6157, "step": 28943 }, { "epoch": 0.8450556188140492, "grad_norm": 0.6372953613385806, "learning_rate": 3.44330900243309e-06, "loss": 0.5857, "step": 28944 }, { "epoch": 0.8450848150418966, "grad_norm": 0.5880222339148027, "learning_rate": 3.4426601784266025e-06, "loss": 0.5162, "step": 28945 }, { "epoch": 0.845114011269744, "grad_norm": 0.6752826099254657, "learning_rate": 3.442011354420114e-06, "loss": 0.6416, "step": 28946 }, { "epoch": 0.8451432074975913, "grad_norm": 0.6230308298186803, "learning_rate": 3.4413625304136257e-06, "loss": 0.5763, "step": 28947 }, { "epoch": 0.8451724037254387, "grad_norm": 0.7002679910723593, "learning_rate": 3.4407137064071373e-06, "loss": 0.6338, "step": 28948 }, { "epoch": 0.845201599953286, "grad_norm": 0.6750088491455533, "learning_rate": 3.440064882400649e-06, "loss": 0.59, "step": 28949 }, { "epoch": 0.8452307961811334, "grad_norm": 0.6356076883778478, "learning_rate": 3.439416058394161e-06, "loss": 0.5524, "step": 28950 }, { "epoch": 0.8452599924089808, "grad_norm": 0.6568017767320246, "learning_rate": 3.4387672343876725e-06, "loss": 0.6009, "step": 28951 }, { "epoch": 0.8452891886368281, "grad_norm": 0.6797507528950331, "learning_rate": 3.438118410381184e-06, "loss": 0.6439, "step": 28952 }, { "epoch": 0.8453183848646755, "grad_norm": 0.6890523640375841, "learning_rate": 3.437469586374696e-06, "loss": 0.6702, "step": 28953 }, { "epoch": 0.8453475810925228, "grad_norm": 0.6818069938710191, "learning_rate": 3.436820762368208e-06, "loss": 0.6187, "step": 28954 }, { "epoch": 0.8453767773203702, "grad_norm": 0.6198135798702136, "learning_rate": 3.43617193836172e-06, "loss": 0.543, "step": 28955 }, { "epoch": 0.8454059735482176, "grad_norm": 0.6236642160944595, "learning_rate": 3.4355231143552314e-06, "loss": 0.5921, "step": 28956 }, { "epoch": 0.8454351697760649, "grad_norm": 0.6501765383402563, "learning_rate": 3.434874290348743e-06, "loss": 0.6285, "step": 28957 }, { "epoch": 0.8454643660039123, "grad_norm": 0.6626733286249867, "learning_rate": 3.4342254663422546e-06, "loss": 0.5862, "step": 28958 }, { "epoch": 0.8454935622317596, "grad_norm": 0.6708226868914628, "learning_rate": 3.433576642335767e-06, "loss": 0.6601, "step": 28959 }, { "epoch": 0.845522758459607, "grad_norm": 0.6388354203918669, "learning_rate": 3.4329278183292787e-06, "loss": 0.5573, "step": 28960 }, { "epoch": 0.8455519546874544, "grad_norm": 0.8272164185029122, "learning_rate": 3.4322789943227903e-06, "loss": 0.6391, "step": 28961 }, { "epoch": 0.8455811509153017, "grad_norm": 0.5949420898837416, "learning_rate": 3.431630170316302e-06, "loss": 0.5417, "step": 28962 }, { "epoch": 0.8456103471431491, "grad_norm": 0.6233472430252945, "learning_rate": 3.4309813463098135e-06, "loss": 0.5293, "step": 28963 }, { "epoch": 0.8456395433709964, "grad_norm": 0.633404250390486, "learning_rate": 3.4303325223033255e-06, "loss": 0.5878, "step": 28964 }, { "epoch": 0.8456687395988438, "grad_norm": 0.7060262463529499, "learning_rate": 3.4296836982968375e-06, "loss": 0.6888, "step": 28965 }, { "epoch": 0.8456979358266912, "grad_norm": 0.6233605042350513, "learning_rate": 3.429034874290349e-06, "loss": 0.5547, "step": 28966 }, { "epoch": 0.8457271320545385, "grad_norm": 0.5973978909913576, "learning_rate": 3.4283860502838607e-06, "loss": 0.5282, "step": 28967 }, { "epoch": 0.8457563282823859, "grad_norm": 0.6831174523208654, "learning_rate": 3.4277372262773728e-06, "loss": 0.6462, "step": 28968 }, { "epoch": 0.8457855245102333, "grad_norm": 0.6175438019864163, "learning_rate": 3.4270884022708844e-06, "loss": 0.5384, "step": 28969 }, { "epoch": 0.8458147207380806, "grad_norm": 0.6651713151664062, "learning_rate": 3.426439578264396e-06, "loss": 0.6618, "step": 28970 }, { "epoch": 0.845843916965928, "grad_norm": 0.6538664351808716, "learning_rate": 3.4257907542579076e-06, "loss": 0.6367, "step": 28971 }, { "epoch": 0.8458731131937753, "grad_norm": 0.6438035395981547, "learning_rate": 3.4251419302514196e-06, "loss": 0.637, "step": 28972 }, { "epoch": 0.8459023094216227, "grad_norm": 0.5996630632714183, "learning_rate": 3.4244931062449316e-06, "loss": 0.4992, "step": 28973 }, { "epoch": 0.8459315056494701, "grad_norm": 0.6069031534369418, "learning_rate": 3.4238442822384432e-06, "loss": 0.5377, "step": 28974 }, { "epoch": 0.8459607018773174, "grad_norm": 0.661137747371122, "learning_rate": 3.423195458231955e-06, "loss": 0.6126, "step": 28975 }, { "epoch": 0.8459898981051648, "grad_norm": 0.6152624678792636, "learning_rate": 3.4225466342254664e-06, "loss": 0.5572, "step": 28976 }, { "epoch": 0.8460190943330121, "grad_norm": 0.6532745243963858, "learning_rate": 3.421897810218978e-06, "loss": 0.5951, "step": 28977 }, { "epoch": 0.8460482905608595, "grad_norm": 0.6728547934957104, "learning_rate": 3.4212489862124905e-06, "loss": 0.5865, "step": 28978 }, { "epoch": 0.8460774867887069, "grad_norm": 0.6668144886239153, "learning_rate": 3.420600162206002e-06, "loss": 0.669, "step": 28979 }, { "epoch": 0.8461066830165542, "grad_norm": 0.6255319762936863, "learning_rate": 3.4199513381995137e-06, "loss": 0.5465, "step": 28980 }, { "epoch": 0.8461358792444016, "grad_norm": 0.6667094994836431, "learning_rate": 3.4193025141930253e-06, "loss": 0.626, "step": 28981 }, { "epoch": 0.846165075472249, "grad_norm": 0.6444903161066915, "learning_rate": 3.418653690186537e-06, "loss": 0.5965, "step": 28982 }, { "epoch": 0.8461942717000963, "grad_norm": 0.6551260248347601, "learning_rate": 3.418004866180049e-06, "loss": 0.6157, "step": 28983 }, { "epoch": 0.8462234679279437, "grad_norm": 0.7078199128019838, "learning_rate": 3.417356042173561e-06, "loss": 0.7162, "step": 28984 }, { "epoch": 0.846252664155791, "grad_norm": 0.5899435870235056, "learning_rate": 3.4167072181670726e-06, "loss": 0.4867, "step": 28985 }, { "epoch": 0.8462818603836384, "grad_norm": 0.6768907148346863, "learning_rate": 3.416058394160584e-06, "loss": 0.6806, "step": 28986 }, { "epoch": 0.8463110566114858, "grad_norm": 0.6862481475776282, "learning_rate": 3.415409570154096e-06, "loss": 0.6549, "step": 28987 }, { "epoch": 0.8463402528393331, "grad_norm": 0.6615668658817666, "learning_rate": 3.414760746147608e-06, "loss": 0.6335, "step": 28988 }, { "epoch": 0.8463694490671805, "grad_norm": 0.6325270861661502, "learning_rate": 3.4141119221411194e-06, "loss": 0.6007, "step": 28989 }, { "epoch": 0.8463986452950278, "grad_norm": 0.6735485641565612, "learning_rate": 3.413463098134631e-06, "loss": 0.6371, "step": 28990 }, { "epoch": 0.8464278415228752, "grad_norm": 0.6612192912604019, "learning_rate": 3.4128142741281426e-06, "loss": 0.6219, "step": 28991 }, { "epoch": 0.8464570377507226, "grad_norm": 0.5902319315789814, "learning_rate": 3.412165450121655e-06, "loss": 0.5079, "step": 28992 }, { "epoch": 0.8464862339785699, "grad_norm": 0.6936664036752088, "learning_rate": 3.4115166261151667e-06, "loss": 0.6964, "step": 28993 }, { "epoch": 0.8465154302064173, "grad_norm": 0.6606959748139688, "learning_rate": 3.4108678021086783e-06, "loss": 0.6491, "step": 28994 }, { "epoch": 0.8465446264342646, "grad_norm": 0.6391418162213266, "learning_rate": 3.41021897810219e-06, "loss": 0.57, "step": 28995 }, { "epoch": 0.846573822662112, "grad_norm": 0.5928742969966164, "learning_rate": 3.4095701540957015e-06, "loss": 0.5247, "step": 28996 }, { "epoch": 0.8466030188899594, "grad_norm": 0.6177953348406219, "learning_rate": 3.408921330089214e-06, "loss": 0.5582, "step": 28997 }, { "epoch": 0.8466322151178067, "grad_norm": 0.6629507832589346, "learning_rate": 3.4082725060827255e-06, "loss": 0.6334, "step": 28998 }, { "epoch": 0.8466614113456541, "grad_norm": 0.6438981985834488, "learning_rate": 3.407623682076237e-06, "loss": 0.5654, "step": 28999 }, { "epoch": 0.8466906075735015, "grad_norm": 0.6305893006384099, "learning_rate": 3.4069748580697487e-06, "loss": 0.5719, "step": 29000 }, { "epoch": 0.8467198038013488, "grad_norm": 0.5887172156830879, "learning_rate": 3.4063260340632603e-06, "loss": 0.5095, "step": 29001 }, { "epoch": 0.8467490000291962, "grad_norm": 0.6206325229165336, "learning_rate": 3.4056772100567724e-06, "loss": 0.5841, "step": 29002 }, { "epoch": 0.8467781962570435, "grad_norm": 0.6148161217550161, "learning_rate": 3.4050283860502844e-06, "loss": 0.5829, "step": 29003 }, { "epoch": 0.8468073924848909, "grad_norm": 0.6891358539403426, "learning_rate": 3.404379562043796e-06, "loss": 0.6426, "step": 29004 }, { "epoch": 0.8468365887127384, "grad_norm": 0.6545288402331796, "learning_rate": 3.4037307380373076e-06, "loss": 0.6412, "step": 29005 }, { "epoch": 0.8468657849405857, "grad_norm": 0.69271939425752, "learning_rate": 3.4030819140308196e-06, "loss": 0.6368, "step": 29006 }, { "epoch": 0.8468949811684331, "grad_norm": 0.6524979249092214, "learning_rate": 3.4024330900243312e-06, "loss": 0.5675, "step": 29007 }, { "epoch": 0.8469241773962805, "grad_norm": 0.586747493147863, "learning_rate": 3.401784266017843e-06, "loss": 0.5226, "step": 29008 }, { "epoch": 0.8469533736241278, "grad_norm": 0.6473947471461716, "learning_rate": 3.4011354420113544e-06, "loss": 0.6115, "step": 29009 }, { "epoch": 0.8469825698519752, "grad_norm": 0.6659892247588814, "learning_rate": 3.400486618004866e-06, "loss": 0.6081, "step": 29010 }, { "epoch": 0.8470117660798225, "grad_norm": 0.6516242574381109, "learning_rate": 3.3998377939983785e-06, "loss": 0.6155, "step": 29011 }, { "epoch": 0.8470409623076699, "grad_norm": 0.6426669458556994, "learning_rate": 3.39918896999189e-06, "loss": 0.5884, "step": 29012 }, { "epoch": 0.8470701585355173, "grad_norm": 0.6467615546595653, "learning_rate": 3.3985401459854017e-06, "loss": 0.6341, "step": 29013 }, { "epoch": 0.8470993547633646, "grad_norm": 0.6450221490045394, "learning_rate": 3.3978913219789133e-06, "loss": 0.595, "step": 29014 }, { "epoch": 0.847128550991212, "grad_norm": 0.6667942582315952, "learning_rate": 3.397242497972425e-06, "loss": 0.5999, "step": 29015 }, { "epoch": 0.8471577472190593, "grad_norm": 0.5891740805093116, "learning_rate": 3.3965936739659374e-06, "loss": 0.5139, "step": 29016 }, { "epoch": 0.8471869434469067, "grad_norm": 0.6242232026665984, "learning_rate": 3.395944849959449e-06, "loss": 0.5328, "step": 29017 }, { "epoch": 0.8472161396747541, "grad_norm": 0.6659278128956819, "learning_rate": 3.3952960259529606e-06, "loss": 0.6426, "step": 29018 }, { "epoch": 0.8472453359026014, "grad_norm": 0.6362679149025483, "learning_rate": 3.394647201946472e-06, "loss": 0.6016, "step": 29019 }, { "epoch": 0.8472745321304488, "grad_norm": 0.6085742555194995, "learning_rate": 3.393998377939984e-06, "loss": 0.5298, "step": 29020 }, { "epoch": 0.8473037283582961, "grad_norm": 0.618683701304284, "learning_rate": 3.393349553933496e-06, "loss": 0.6251, "step": 29021 }, { "epoch": 0.8473329245861435, "grad_norm": 0.6714318890306349, "learning_rate": 3.3927007299270074e-06, "loss": 0.6607, "step": 29022 }, { "epoch": 0.8473621208139909, "grad_norm": 0.6052708928648, "learning_rate": 3.3920519059205194e-06, "loss": 0.5282, "step": 29023 }, { "epoch": 0.8473913170418382, "grad_norm": 0.6205387969827696, "learning_rate": 3.391403081914031e-06, "loss": 0.5321, "step": 29024 }, { "epoch": 0.8474205132696856, "grad_norm": 0.6403902495620569, "learning_rate": 3.390754257907543e-06, "loss": 0.569, "step": 29025 }, { "epoch": 0.847449709497533, "grad_norm": 0.6243145182186345, "learning_rate": 3.3901054339010547e-06, "loss": 0.5356, "step": 29026 }, { "epoch": 0.8474789057253803, "grad_norm": 0.6476071363006588, "learning_rate": 3.3894566098945663e-06, "loss": 0.6182, "step": 29027 }, { "epoch": 0.8475081019532277, "grad_norm": 0.6330461291199755, "learning_rate": 3.388807785888078e-06, "loss": 0.5799, "step": 29028 }, { "epoch": 0.847537298181075, "grad_norm": 0.6218898902733597, "learning_rate": 3.3881589618815895e-06, "loss": 0.5618, "step": 29029 }, { "epoch": 0.8475664944089224, "grad_norm": 0.6352241265274818, "learning_rate": 3.387510137875102e-06, "loss": 0.6322, "step": 29030 }, { "epoch": 0.8475956906367698, "grad_norm": 0.5830623962369377, "learning_rate": 3.3868613138686135e-06, "loss": 0.4951, "step": 29031 }, { "epoch": 0.8476248868646171, "grad_norm": 0.6730608897831454, "learning_rate": 3.386212489862125e-06, "loss": 0.6397, "step": 29032 }, { "epoch": 0.8476540830924645, "grad_norm": 0.6410221660906221, "learning_rate": 3.3855636658556367e-06, "loss": 0.5781, "step": 29033 }, { "epoch": 0.8476832793203118, "grad_norm": 0.62971445104081, "learning_rate": 3.3849148418491483e-06, "loss": 0.5739, "step": 29034 }, { "epoch": 0.8477124755481592, "grad_norm": 0.6679694949828913, "learning_rate": 3.3842660178426608e-06, "loss": 0.5441, "step": 29035 }, { "epoch": 0.8477416717760066, "grad_norm": 0.6406226094207363, "learning_rate": 3.3836171938361724e-06, "loss": 0.6011, "step": 29036 }, { "epoch": 0.8477708680038539, "grad_norm": 0.6975689072958315, "learning_rate": 3.382968369829684e-06, "loss": 0.6737, "step": 29037 }, { "epoch": 0.8478000642317013, "grad_norm": 0.6355289597656789, "learning_rate": 3.3823195458231956e-06, "loss": 0.6029, "step": 29038 }, { "epoch": 0.8478292604595487, "grad_norm": 0.6143934162017476, "learning_rate": 3.3816707218167076e-06, "loss": 0.5468, "step": 29039 }, { "epoch": 0.847858456687396, "grad_norm": 0.6531984325694922, "learning_rate": 3.3810218978102192e-06, "loss": 0.6234, "step": 29040 }, { "epoch": 0.8478876529152434, "grad_norm": 0.6265288376248203, "learning_rate": 3.380373073803731e-06, "loss": 0.5224, "step": 29041 }, { "epoch": 0.8479168491430907, "grad_norm": 0.672444313669244, "learning_rate": 3.379724249797243e-06, "loss": 0.6173, "step": 29042 }, { "epoch": 0.8479460453709381, "grad_norm": 0.6907584857611482, "learning_rate": 3.3790754257907545e-06, "loss": 0.678, "step": 29043 }, { "epoch": 0.8479752415987855, "grad_norm": 0.621581934293311, "learning_rate": 3.3784266017842665e-06, "loss": 0.5583, "step": 29044 }, { "epoch": 0.8480044378266328, "grad_norm": 0.7324786174705431, "learning_rate": 3.377777777777778e-06, "loss": 0.7817, "step": 29045 }, { "epoch": 0.8480336340544802, "grad_norm": 0.639426699045295, "learning_rate": 3.3771289537712897e-06, "loss": 0.596, "step": 29046 }, { "epoch": 0.8480628302823275, "grad_norm": 0.6655060479469467, "learning_rate": 3.3764801297648013e-06, "loss": 0.6325, "step": 29047 }, { "epoch": 0.8480920265101749, "grad_norm": 0.5875670038185178, "learning_rate": 3.375831305758313e-06, "loss": 0.4852, "step": 29048 }, { "epoch": 0.8481212227380223, "grad_norm": 0.6374741482069624, "learning_rate": 3.3751824817518254e-06, "loss": 0.6003, "step": 29049 }, { "epoch": 0.8481504189658696, "grad_norm": 0.651310704663978, "learning_rate": 3.374533657745337e-06, "loss": 0.5911, "step": 29050 }, { "epoch": 0.848179615193717, "grad_norm": 0.6477670103523705, "learning_rate": 3.3738848337388486e-06, "loss": 0.5291, "step": 29051 }, { "epoch": 0.8482088114215643, "grad_norm": 0.6401642632789704, "learning_rate": 3.37323600973236e-06, "loss": 0.5773, "step": 29052 }, { "epoch": 0.8482380076494117, "grad_norm": 0.6394190218036147, "learning_rate": 3.372587185725872e-06, "loss": 0.5984, "step": 29053 }, { "epoch": 0.8482672038772591, "grad_norm": 0.6310558287989467, "learning_rate": 3.3719383617193842e-06, "loss": 0.5671, "step": 29054 }, { "epoch": 0.8482964001051064, "grad_norm": 0.6621801057937173, "learning_rate": 3.371289537712896e-06, "loss": 0.6, "step": 29055 }, { "epoch": 0.8483255963329538, "grad_norm": 0.6407817445845025, "learning_rate": 3.3706407137064074e-06, "loss": 0.5791, "step": 29056 }, { "epoch": 0.8483547925608012, "grad_norm": 0.6076963815801651, "learning_rate": 3.369991889699919e-06, "loss": 0.5337, "step": 29057 }, { "epoch": 0.8483839887886485, "grad_norm": 0.6162730695353844, "learning_rate": 3.369343065693431e-06, "loss": 0.5859, "step": 29058 }, { "epoch": 0.8484131850164959, "grad_norm": 0.670309673365076, "learning_rate": 3.3686942416869427e-06, "loss": 0.6348, "step": 29059 }, { "epoch": 0.8484423812443432, "grad_norm": 0.6383759872766946, "learning_rate": 3.3680454176804543e-06, "loss": 0.567, "step": 29060 }, { "epoch": 0.8484715774721906, "grad_norm": 0.6601533680779582, "learning_rate": 3.3673965936739663e-06, "loss": 0.6303, "step": 29061 }, { "epoch": 0.848500773700038, "grad_norm": 0.6537553245953831, "learning_rate": 3.366747769667478e-06, "loss": 0.5966, "step": 29062 }, { "epoch": 0.8485299699278853, "grad_norm": 0.6709813813126584, "learning_rate": 3.36609894566099e-06, "loss": 0.6184, "step": 29063 }, { "epoch": 0.8485591661557327, "grad_norm": 0.6530955402550873, "learning_rate": 3.3654501216545015e-06, "loss": 0.6036, "step": 29064 }, { "epoch": 0.84858836238358, "grad_norm": 0.6170278031421631, "learning_rate": 3.364801297648013e-06, "loss": 0.5986, "step": 29065 }, { "epoch": 0.8486175586114274, "grad_norm": 0.6403659037257134, "learning_rate": 3.3641524736415247e-06, "loss": 0.6027, "step": 29066 }, { "epoch": 0.8486467548392748, "grad_norm": 0.6732514749622648, "learning_rate": 3.3635036496350363e-06, "loss": 0.5928, "step": 29067 }, { "epoch": 0.8486759510671221, "grad_norm": 0.6173632467027895, "learning_rate": 3.362854825628549e-06, "loss": 0.5619, "step": 29068 }, { "epoch": 0.8487051472949695, "grad_norm": 0.6054676903409483, "learning_rate": 3.3622060016220604e-06, "loss": 0.541, "step": 29069 }, { "epoch": 0.8487343435228168, "grad_norm": 0.6697165403129195, "learning_rate": 3.361557177615572e-06, "loss": 0.6491, "step": 29070 }, { "epoch": 0.8487635397506642, "grad_norm": 0.6205281858991134, "learning_rate": 3.3609083536090836e-06, "loss": 0.5334, "step": 29071 }, { "epoch": 0.8487927359785116, "grad_norm": 0.7144205166276183, "learning_rate": 3.3602595296025956e-06, "loss": 0.6667, "step": 29072 }, { "epoch": 0.8488219322063589, "grad_norm": 0.5916435646819383, "learning_rate": 3.3596107055961077e-06, "loss": 0.4854, "step": 29073 }, { "epoch": 0.8488511284342063, "grad_norm": 0.6586218333349332, "learning_rate": 3.3589618815896193e-06, "loss": 0.5963, "step": 29074 }, { "epoch": 0.8488803246620537, "grad_norm": 0.6475243008481673, "learning_rate": 3.358313057583131e-06, "loss": 0.6166, "step": 29075 }, { "epoch": 0.848909520889901, "grad_norm": 0.6764336303934312, "learning_rate": 3.3576642335766425e-06, "loss": 0.6492, "step": 29076 }, { "epoch": 0.8489387171177484, "grad_norm": 0.7153128205678627, "learning_rate": 3.3570154095701545e-06, "loss": 0.6576, "step": 29077 }, { "epoch": 0.8489679133455957, "grad_norm": 0.6507391642215663, "learning_rate": 3.356366585563666e-06, "loss": 0.6152, "step": 29078 }, { "epoch": 0.8489971095734431, "grad_norm": 0.6379690370476223, "learning_rate": 3.3557177615571777e-06, "loss": 0.592, "step": 29079 }, { "epoch": 0.8490263058012905, "grad_norm": 0.6714506563603386, "learning_rate": 3.3550689375506897e-06, "loss": 0.6407, "step": 29080 }, { "epoch": 0.8490555020291378, "grad_norm": 0.6352302375317439, "learning_rate": 3.3544201135442013e-06, "loss": 0.5634, "step": 29081 }, { "epoch": 0.8490846982569852, "grad_norm": 0.6918095966814514, "learning_rate": 3.3537712895377134e-06, "loss": 0.562, "step": 29082 }, { "epoch": 0.8491138944848325, "grad_norm": 0.6531658531170832, "learning_rate": 3.353122465531225e-06, "loss": 0.6317, "step": 29083 }, { "epoch": 0.8491430907126799, "grad_norm": 0.6356701893396801, "learning_rate": 3.3524736415247366e-06, "loss": 0.5963, "step": 29084 }, { "epoch": 0.8491722869405273, "grad_norm": 0.6678265414492529, "learning_rate": 3.351824817518248e-06, "loss": 0.6152, "step": 29085 }, { "epoch": 0.8492014831683746, "grad_norm": 0.6067326301561636, "learning_rate": 3.3511759935117606e-06, "loss": 0.5216, "step": 29086 }, { "epoch": 0.849230679396222, "grad_norm": 0.5821401458508295, "learning_rate": 3.3505271695052722e-06, "loss": 0.4525, "step": 29087 }, { "epoch": 0.8492598756240693, "grad_norm": 0.6369829338489914, "learning_rate": 3.349878345498784e-06, "loss": 0.5652, "step": 29088 }, { "epoch": 0.8492890718519167, "grad_norm": 0.6553194776379924, "learning_rate": 3.3492295214922954e-06, "loss": 0.6545, "step": 29089 }, { "epoch": 0.8493182680797641, "grad_norm": 0.646034213818437, "learning_rate": 3.348580697485807e-06, "loss": 0.5809, "step": 29090 }, { "epoch": 0.8493474643076114, "grad_norm": 0.6343982691635898, "learning_rate": 3.347931873479319e-06, "loss": 0.5977, "step": 29091 }, { "epoch": 0.8493766605354588, "grad_norm": 0.6370128876124962, "learning_rate": 3.347283049472831e-06, "loss": 0.5278, "step": 29092 }, { "epoch": 0.8494058567633062, "grad_norm": 0.682732107759791, "learning_rate": 3.3466342254663427e-06, "loss": 0.6073, "step": 29093 }, { "epoch": 0.8494350529911535, "grad_norm": 0.6926271763361322, "learning_rate": 3.3459854014598543e-06, "loss": 0.662, "step": 29094 }, { "epoch": 0.8494642492190009, "grad_norm": 0.6985075760978238, "learning_rate": 3.345336577453366e-06, "loss": 0.6701, "step": 29095 }, { "epoch": 0.8494934454468482, "grad_norm": 0.6770353542596403, "learning_rate": 3.344687753446878e-06, "loss": 0.605, "step": 29096 }, { "epoch": 0.8495226416746956, "grad_norm": 0.734661965886558, "learning_rate": 3.3440389294403895e-06, "loss": 0.7572, "step": 29097 }, { "epoch": 0.849551837902543, "grad_norm": 0.6764248494719752, "learning_rate": 3.343390105433901e-06, "loss": 0.6276, "step": 29098 }, { "epoch": 0.8495810341303903, "grad_norm": 0.6423008819653225, "learning_rate": 3.3427412814274127e-06, "loss": 0.5713, "step": 29099 }, { "epoch": 0.8496102303582377, "grad_norm": 0.6367204023944825, "learning_rate": 3.3420924574209248e-06, "loss": 0.5689, "step": 29100 }, { "epoch": 0.849639426586085, "grad_norm": 0.6070857230311839, "learning_rate": 3.341443633414437e-06, "loss": 0.5614, "step": 29101 }, { "epoch": 0.8496686228139324, "grad_norm": 0.6350250878904794, "learning_rate": 3.3407948094079484e-06, "loss": 0.5828, "step": 29102 }, { "epoch": 0.8496978190417798, "grad_norm": 0.6769793265425, "learning_rate": 3.34014598540146e-06, "loss": 0.6653, "step": 29103 }, { "epoch": 0.8497270152696271, "grad_norm": 0.6004912167054717, "learning_rate": 3.3394971613949716e-06, "loss": 0.5098, "step": 29104 }, { "epoch": 0.8497562114974745, "grad_norm": 0.6904477867190878, "learning_rate": 3.338848337388484e-06, "loss": 0.6469, "step": 29105 }, { "epoch": 0.8497854077253219, "grad_norm": 0.6967261010114717, "learning_rate": 3.3381995133819957e-06, "loss": 0.6969, "step": 29106 }, { "epoch": 0.8498146039531692, "grad_norm": 0.6652128071953458, "learning_rate": 3.3375506893755073e-06, "loss": 0.6238, "step": 29107 }, { "epoch": 0.8498438001810166, "grad_norm": 0.7033232877903784, "learning_rate": 3.336901865369019e-06, "loss": 0.6621, "step": 29108 }, { "epoch": 0.8498729964088639, "grad_norm": 0.6882460156536218, "learning_rate": 3.3362530413625305e-06, "loss": 0.7051, "step": 29109 }, { "epoch": 0.8499021926367113, "grad_norm": 0.6292560176334621, "learning_rate": 3.3356042173560425e-06, "loss": 0.5489, "step": 29110 }, { "epoch": 0.8499313888645587, "grad_norm": 0.6448904366621856, "learning_rate": 3.3349553933495545e-06, "loss": 0.5792, "step": 29111 }, { "epoch": 0.849960585092406, "grad_norm": 0.6335940673493101, "learning_rate": 3.334306569343066e-06, "loss": 0.6225, "step": 29112 }, { "epoch": 0.8499897813202534, "grad_norm": 0.6929527440688809, "learning_rate": 3.3336577453365777e-06, "loss": 0.6548, "step": 29113 }, { "epoch": 0.8500189775481007, "grad_norm": 0.613929676208307, "learning_rate": 3.3330089213300893e-06, "loss": 0.5476, "step": 29114 }, { "epoch": 0.8500481737759481, "grad_norm": 0.6637864501798523, "learning_rate": 3.3323600973236014e-06, "loss": 0.6246, "step": 29115 }, { "epoch": 0.8500773700037955, "grad_norm": 0.6206041872524298, "learning_rate": 3.331711273317113e-06, "loss": 0.5797, "step": 29116 }, { "epoch": 0.8501065662316428, "grad_norm": 0.6404928968716007, "learning_rate": 3.3310624493106246e-06, "loss": 0.6186, "step": 29117 }, { "epoch": 0.8501357624594902, "grad_norm": 0.6334833185111816, "learning_rate": 3.330413625304136e-06, "loss": 0.5785, "step": 29118 }, { "epoch": 0.8501649586873375, "grad_norm": 0.6338975435412787, "learning_rate": 3.329764801297648e-06, "loss": 0.5717, "step": 29119 }, { "epoch": 0.8501941549151849, "grad_norm": 0.6758162414339582, "learning_rate": 3.3291159772911602e-06, "loss": 0.6402, "step": 29120 }, { "epoch": 0.8502233511430323, "grad_norm": 0.6050752810677259, "learning_rate": 3.328467153284672e-06, "loss": 0.5123, "step": 29121 }, { "epoch": 0.8502525473708796, "grad_norm": 0.6068021975557286, "learning_rate": 3.3278183292781834e-06, "loss": 0.529, "step": 29122 }, { "epoch": 0.850281743598727, "grad_norm": 0.6276367467367624, "learning_rate": 3.327169505271695e-06, "loss": 0.579, "step": 29123 }, { "epoch": 0.8503109398265744, "grad_norm": 0.661714723029989, "learning_rate": 3.3265206812652075e-06, "loss": 0.6018, "step": 29124 }, { "epoch": 0.8503401360544217, "grad_norm": 0.6764505589722305, "learning_rate": 3.325871857258719e-06, "loss": 0.6309, "step": 29125 }, { "epoch": 0.8503693322822692, "grad_norm": 0.6870554770102416, "learning_rate": 3.3252230332522307e-06, "loss": 0.6608, "step": 29126 }, { "epoch": 0.8503985285101165, "grad_norm": 0.6499537738205, "learning_rate": 3.3245742092457423e-06, "loss": 0.6122, "step": 29127 }, { "epoch": 0.8504277247379639, "grad_norm": 0.628804909327059, "learning_rate": 3.323925385239254e-06, "loss": 0.5448, "step": 29128 }, { "epoch": 0.8504569209658113, "grad_norm": 0.6495523376163448, "learning_rate": 3.323276561232766e-06, "loss": 0.6102, "step": 29129 }, { "epoch": 0.8504861171936586, "grad_norm": 0.6969878936315785, "learning_rate": 3.3226277372262775e-06, "loss": 0.6278, "step": 29130 }, { "epoch": 0.850515313421506, "grad_norm": 0.6704900883181087, "learning_rate": 3.3219789132197896e-06, "loss": 0.6053, "step": 29131 }, { "epoch": 0.8505445096493534, "grad_norm": 0.6224068341898974, "learning_rate": 3.321330089213301e-06, "loss": 0.525, "step": 29132 }, { "epoch": 0.8505737058772007, "grad_norm": 0.674461268930817, "learning_rate": 3.3206812652068128e-06, "loss": 0.6451, "step": 29133 }, { "epoch": 0.8506029021050481, "grad_norm": 0.6503164463068218, "learning_rate": 3.320032441200325e-06, "loss": 0.6345, "step": 29134 }, { "epoch": 0.8506320983328954, "grad_norm": 0.6811069276838833, "learning_rate": 3.3193836171938364e-06, "loss": 0.6142, "step": 29135 }, { "epoch": 0.8506612945607428, "grad_norm": 0.7083812645221989, "learning_rate": 3.318734793187348e-06, "loss": 0.5297, "step": 29136 }, { "epoch": 0.8506904907885902, "grad_norm": 0.6453237302201198, "learning_rate": 3.3180859691808596e-06, "loss": 0.5946, "step": 29137 }, { "epoch": 0.8507196870164375, "grad_norm": 0.6588004851826454, "learning_rate": 3.317437145174372e-06, "loss": 0.621, "step": 29138 }, { "epoch": 0.8507488832442849, "grad_norm": 0.6210235978140765, "learning_rate": 3.3167883211678837e-06, "loss": 0.5347, "step": 29139 }, { "epoch": 0.8507780794721322, "grad_norm": 0.5894175494160699, "learning_rate": 3.3161394971613953e-06, "loss": 0.5245, "step": 29140 }, { "epoch": 0.8508072756999796, "grad_norm": 0.6569387351747425, "learning_rate": 3.315490673154907e-06, "loss": 0.6038, "step": 29141 }, { "epoch": 0.850836471927827, "grad_norm": 0.6725155620300933, "learning_rate": 3.3148418491484185e-06, "loss": 0.6471, "step": 29142 }, { "epoch": 0.8508656681556743, "grad_norm": 0.600697965686873, "learning_rate": 3.314193025141931e-06, "loss": 0.5205, "step": 29143 }, { "epoch": 0.8508948643835217, "grad_norm": 0.648975520055424, "learning_rate": 3.3135442011354425e-06, "loss": 0.6277, "step": 29144 }, { "epoch": 0.850924060611369, "grad_norm": 0.5968055671280017, "learning_rate": 3.312895377128954e-06, "loss": 0.5206, "step": 29145 }, { "epoch": 0.8509532568392164, "grad_norm": 0.6613008161171006, "learning_rate": 3.3122465531224657e-06, "loss": 0.6301, "step": 29146 }, { "epoch": 0.8509824530670638, "grad_norm": 0.6457070518166191, "learning_rate": 3.3115977291159773e-06, "loss": 0.6118, "step": 29147 }, { "epoch": 0.8510116492949111, "grad_norm": 0.6974577859744208, "learning_rate": 3.3109489051094894e-06, "loss": 0.6981, "step": 29148 }, { "epoch": 0.8510408455227585, "grad_norm": 0.7081159190917704, "learning_rate": 3.310300081103001e-06, "loss": 0.583, "step": 29149 }, { "epoch": 0.8510700417506059, "grad_norm": 0.6734075327847254, "learning_rate": 3.309651257096513e-06, "loss": 0.6876, "step": 29150 }, { "epoch": 0.8510992379784532, "grad_norm": 0.6999718314113964, "learning_rate": 3.3090024330900246e-06, "loss": 0.6165, "step": 29151 }, { "epoch": 0.8511284342063006, "grad_norm": 0.6428066246539895, "learning_rate": 3.308353609083536e-06, "loss": 0.6053, "step": 29152 }, { "epoch": 0.8511576304341479, "grad_norm": 0.6563308402538302, "learning_rate": 3.3077047850770482e-06, "loss": 0.6352, "step": 29153 }, { "epoch": 0.8511868266619953, "grad_norm": 0.6629051522661441, "learning_rate": 3.30705596107056e-06, "loss": 0.6498, "step": 29154 }, { "epoch": 0.8512160228898427, "grad_norm": 0.6742812787681745, "learning_rate": 3.3064071370640714e-06, "loss": 0.6258, "step": 29155 }, { "epoch": 0.85124521911769, "grad_norm": 0.6182283611764754, "learning_rate": 3.305758313057583e-06, "loss": 0.5565, "step": 29156 }, { "epoch": 0.8512744153455374, "grad_norm": 0.653531385603953, "learning_rate": 3.3051094890510955e-06, "loss": 0.6065, "step": 29157 }, { "epoch": 0.8513036115733847, "grad_norm": 0.6262549422025635, "learning_rate": 3.304460665044607e-06, "loss": 0.5635, "step": 29158 }, { "epoch": 0.8513328078012321, "grad_norm": 0.6412358501972171, "learning_rate": 3.3038118410381187e-06, "loss": 0.5954, "step": 29159 }, { "epoch": 0.8513620040290795, "grad_norm": 0.6078676616681451, "learning_rate": 3.3031630170316303e-06, "loss": 0.5329, "step": 29160 }, { "epoch": 0.8513912002569268, "grad_norm": 0.6380233544302497, "learning_rate": 3.302514193025142e-06, "loss": 0.5477, "step": 29161 }, { "epoch": 0.8514203964847742, "grad_norm": 0.6467100741026186, "learning_rate": 3.3018653690186544e-06, "loss": 0.6103, "step": 29162 }, { "epoch": 0.8514495927126216, "grad_norm": 0.6576387427122463, "learning_rate": 3.301216545012166e-06, "loss": 0.6633, "step": 29163 }, { "epoch": 0.8514787889404689, "grad_norm": 0.6259820189407606, "learning_rate": 3.3005677210056776e-06, "loss": 0.5603, "step": 29164 }, { "epoch": 0.8515079851683163, "grad_norm": 0.6446782516769719, "learning_rate": 3.299918896999189e-06, "loss": 0.5763, "step": 29165 }, { "epoch": 0.8515371813961636, "grad_norm": 0.617844636708486, "learning_rate": 3.2992700729927008e-06, "loss": 0.5786, "step": 29166 }, { "epoch": 0.851566377624011, "grad_norm": 0.631715522957112, "learning_rate": 3.298621248986213e-06, "loss": 0.5553, "step": 29167 }, { "epoch": 0.8515955738518584, "grad_norm": 0.696665879313274, "learning_rate": 3.2979724249797244e-06, "loss": 0.6733, "step": 29168 }, { "epoch": 0.8516247700797057, "grad_norm": 0.610338715657354, "learning_rate": 3.2973236009732364e-06, "loss": 0.5316, "step": 29169 }, { "epoch": 0.8516539663075531, "grad_norm": 0.6070502162524979, "learning_rate": 3.296674776966748e-06, "loss": 0.5443, "step": 29170 }, { "epoch": 0.8516831625354004, "grad_norm": 0.6196716729070715, "learning_rate": 3.29602595296026e-06, "loss": 0.5501, "step": 29171 }, { "epoch": 0.8517123587632478, "grad_norm": 0.642402939991179, "learning_rate": 3.2953771289537717e-06, "loss": 0.5348, "step": 29172 }, { "epoch": 0.8517415549910952, "grad_norm": 0.6486911855039063, "learning_rate": 3.2947283049472833e-06, "loss": 0.618, "step": 29173 }, { "epoch": 0.8517707512189425, "grad_norm": 0.6697077469298933, "learning_rate": 3.294079480940795e-06, "loss": 0.6531, "step": 29174 }, { "epoch": 0.8517999474467899, "grad_norm": 0.6075186768727928, "learning_rate": 3.2934306569343065e-06, "loss": 0.5514, "step": 29175 }, { "epoch": 0.8518291436746372, "grad_norm": 0.6539175953844929, "learning_rate": 3.292781832927819e-06, "loss": 0.617, "step": 29176 }, { "epoch": 0.8518583399024846, "grad_norm": 0.6593810624666502, "learning_rate": 3.2921330089213305e-06, "loss": 0.6018, "step": 29177 }, { "epoch": 0.851887536130332, "grad_norm": 0.611563709857951, "learning_rate": 3.291484184914842e-06, "loss": 0.5745, "step": 29178 }, { "epoch": 0.8519167323581793, "grad_norm": 0.6244742103526796, "learning_rate": 3.2908353609083537e-06, "loss": 0.5524, "step": 29179 }, { "epoch": 0.8519459285860267, "grad_norm": 0.6608654271156329, "learning_rate": 3.2901865369018653e-06, "loss": 0.6034, "step": 29180 }, { "epoch": 0.851975124813874, "grad_norm": 0.631721231630833, "learning_rate": 3.289537712895378e-06, "loss": 0.6172, "step": 29181 }, { "epoch": 0.8520043210417214, "grad_norm": 0.6527549093837062, "learning_rate": 3.2888888888888894e-06, "loss": 0.6328, "step": 29182 }, { "epoch": 0.8520335172695688, "grad_norm": 0.6073973448661144, "learning_rate": 3.288240064882401e-06, "loss": 0.4778, "step": 29183 }, { "epoch": 0.8520627134974161, "grad_norm": 0.6448858163246799, "learning_rate": 3.2875912408759126e-06, "loss": 0.6092, "step": 29184 }, { "epoch": 0.8520919097252635, "grad_norm": 0.6212145412390223, "learning_rate": 3.286942416869424e-06, "loss": 0.5781, "step": 29185 }, { "epoch": 0.8521211059531109, "grad_norm": 0.7425897469432825, "learning_rate": 3.2862935928629362e-06, "loss": 0.5737, "step": 29186 }, { "epoch": 0.8521503021809582, "grad_norm": 0.657520101670315, "learning_rate": 3.285644768856448e-06, "loss": 0.5718, "step": 29187 }, { "epoch": 0.8521794984088056, "grad_norm": 0.6113417678922934, "learning_rate": 3.2849959448499594e-06, "loss": 0.5538, "step": 29188 }, { "epoch": 0.8522086946366529, "grad_norm": 0.6616428605601868, "learning_rate": 3.2843471208434715e-06, "loss": 0.6333, "step": 29189 }, { "epoch": 0.8522378908645003, "grad_norm": 0.6491185947051733, "learning_rate": 3.2836982968369835e-06, "loss": 0.5922, "step": 29190 }, { "epoch": 0.8522670870923477, "grad_norm": 0.6233800408437212, "learning_rate": 3.283049472830495e-06, "loss": 0.5593, "step": 29191 }, { "epoch": 0.852296283320195, "grad_norm": 0.6434559372628537, "learning_rate": 3.2824006488240067e-06, "loss": 0.5411, "step": 29192 }, { "epoch": 0.8523254795480424, "grad_norm": 0.6580445790546079, "learning_rate": 3.2817518248175183e-06, "loss": 0.6394, "step": 29193 }, { "epoch": 0.8523546757758897, "grad_norm": 0.6875351032651595, "learning_rate": 3.28110300081103e-06, "loss": 0.6555, "step": 29194 }, { "epoch": 0.8523838720037371, "grad_norm": 0.6235599330755983, "learning_rate": 3.2804541768045424e-06, "loss": 0.5289, "step": 29195 }, { "epoch": 0.8524130682315845, "grad_norm": 0.6773568580776661, "learning_rate": 3.279805352798054e-06, "loss": 0.6257, "step": 29196 }, { "epoch": 0.8524422644594318, "grad_norm": 0.6112425915211229, "learning_rate": 3.2791565287915656e-06, "loss": 0.583, "step": 29197 }, { "epoch": 0.8524714606872792, "grad_norm": 0.6603640197263342, "learning_rate": 3.278507704785077e-06, "loss": 0.6216, "step": 29198 }, { "epoch": 0.8525006569151266, "grad_norm": 0.7402383612285746, "learning_rate": 3.2778588807785888e-06, "loss": 0.5337, "step": 29199 }, { "epoch": 0.8525298531429739, "grad_norm": 0.6142591852566472, "learning_rate": 3.2772100567721012e-06, "loss": 0.5506, "step": 29200 }, { "epoch": 0.8525590493708213, "grad_norm": 0.6488745484987154, "learning_rate": 3.276561232765613e-06, "loss": 0.6199, "step": 29201 }, { "epoch": 0.8525882455986686, "grad_norm": 0.6994723175553964, "learning_rate": 3.2759124087591244e-06, "loss": 0.6666, "step": 29202 }, { "epoch": 0.852617441826516, "grad_norm": 0.6476435498459407, "learning_rate": 3.275263584752636e-06, "loss": 0.6281, "step": 29203 }, { "epoch": 0.8526466380543634, "grad_norm": 0.6501016155053897, "learning_rate": 3.274614760746148e-06, "loss": 0.6001, "step": 29204 }, { "epoch": 0.8526758342822107, "grad_norm": 0.6219455835090301, "learning_rate": 3.2739659367396597e-06, "loss": 0.5519, "step": 29205 }, { "epoch": 0.8527050305100581, "grad_norm": 0.6670973889041871, "learning_rate": 3.2733171127331713e-06, "loss": 0.6528, "step": 29206 }, { "epoch": 0.8527342267379054, "grad_norm": 0.6162296576961265, "learning_rate": 3.272668288726683e-06, "loss": 0.5459, "step": 29207 }, { "epoch": 0.8527634229657528, "grad_norm": 0.5762064277343404, "learning_rate": 3.272019464720195e-06, "loss": 0.5193, "step": 29208 }, { "epoch": 0.8527926191936002, "grad_norm": 0.6596690957973346, "learning_rate": 3.271370640713707e-06, "loss": 0.6323, "step": 29209 }, { "epoch": 0.8528218154214475, "grad_norm": 0.6441837704335055, "learning_rate": 3.2707218167072185e-06, "loss": 0.5764, "step": 29210 }, { "epoch": 0.8528510116492949, "grad_norm": 0.6073348859321045, "learning_rate": 3.27007299270073e-06, "loss": 0.5182, "step": 29211 }, { "epoch": 0.8528802078771422, "grad_norm": 0.5892721626301393, "learning_rate": 3.2694241686942417e-06, "loss": 0.5216, "step": 29212 }, { "epoch": 0.8529094041049896, "grad_norm": 0.6043811172492992, "learning_rate": 3.2687753446877533e-06, "loss": 0.5386, "step": 29213 }, { "epoch": 0.852938600332837, "grad_norm": 0.6023930113046625, "learning_rate": 3.268126520681266e-06, "loss": 0.5174, "step": 29214 }, { "epoch": 0.8529677965606843, "grad_norm": 0.6492955800645783, "learning_rate": 3.2674776966747774e-06, "loss": 0.6001, "step": 29215 }, { "epoch": 0.8529969927885317, "grad_norm": 0.6177616410764657, "learning_rate": 3.266828872668289e-06, "loss": 0.5778, "step": 29216 }, { "epoch": 0.853026189016379, "grad_norm": 0.7671245329854541, "learning_rate": 3.2661800486618006e-06, "loss": 0.6417, "step": 29217 }, { "epoch": 0.8530553852442264, "grad_norm": 0.6440818264022331, "learning_rate": 3.2655312246553122e-06, "loss": 0.5348, "step": 29218 }, { "epoch": 0.8530845814720738, "grad_norm": 0.6603504259924737, "learning_rate": 3.2648824006488242e-06, "loss": 0.6316, "step": 29219 }, { "epoch": 0.8531137776999211, "grad_norm": 0.6438022162223781, "learning_rate": 3.2642335766423363e-06, "loss": 0.6139, "step": 29220 }, { "epoch": 0.8531429739277685, "grad_norm": 0.6573654166215227, "learning_rate": 3.263584752635848e-06, "loss": 0.5751, "step": 29221 }, { "epoch": 0.8531721701556159, "grad_norm": 0.6154217146299983, "learning_rate": 3.2629359286293595e-06, "loss": 0.552, "step": 29222 }, { "epoch": 0.8532013663834632, "grad_norm": 0.6602236662806503, "learning_rate": 3.2622871046228715e-06, "loss": 0.6351, "step": 29223 }, { "epoch": 0.8532305626113106, "grad_norm": 0.6413574421337046, "learning_rate": 3.261638280616383e-06, "loss": 0.5667, "step": 29224 }, { "epoch": 0.853259758839158, "grad_norm": 0.6468310033789275, "learning_rate": 3.2609894566098947e-06, "loss": 0.5653, "step": 29225 }, { "epoch": 0.8532889550670053, "grad_norm": 0.6357369413612752, "learning_rate": 3.2603406326034063e-06, "loss": 0.5972, "step": 29226 }, { "epoch": 0.8533181512948527, "grad_norm": 0.7146814099049835, "learning_rate": 3.2596918085969183e-06, "loss": 0.7243, "step": 29227 }, { "epoch": 0.8533473475227, "grad_norm": 0.6552554408381921, "learning_rate": 3.2590429845904304e-06, "loss": 0.6108, "step": 29228 }, { "epoch": 0.8533765437505474, "grad_norm": 0.6323675526437126, "learning_rate": 3.258394160583942e-06, "loss": 0.614, "step": 29229 }, { "epoch": 0.8534057399783948, "grad_norm": 0.7328104188565001, "learning_rate": 3.2577453365774536e-06, "loss": 0.5415, "step": 29230 }, { "epoch": 0.8534349362062421, "grad_norm": 0.7138314813478362, "learning_rate": 3.257096512570965e-06, "loss": 0.752, "step": 29231 }, { "epoch": 0.8534641324340895, "grad_norm": 0.6553819017955826, "learning_rate": 3.2564476885644768e-06, "loss": 0.5912, "step": 29232 }, { "epoch": 0.8534933286619368, "grad_norm": 0.7119878164899243, "learning_rate": 3.2557988645579892e-06, "loss": 0.6837, "step": 29233 }, { "epoch": 0.8535225248897842, "grad_norm": 0.6154604944971932, "learning_rate": 3.255150040551501e-06, "loss": 0.5558, "step": 29234 }, { "epoch": 0.8535517211176316, "grad_norm": 0.6434841875013174, "learning_rate": 3.2545012165450124e-06, "loss": 0.6029, "step": 29235 }, { "epoch": 0.8535809173454789, "grad_norm": 0.614811459031637, "learning_rate": 3.253852392538524e-06, "loss": 0.5298, "step": 29236 }, { "epoch": 0.8536101135733263, "grad_norm": 0.7079645502691689, "learning_rate": 3.253203568532036e-06, "loss": 0.66, "step": 29237 }, { "epoch": 0.8536393098011736, "grad_norm": 0.6729600909521706, "learning_rate": 3.2525547445255477e-06, "loss": 0.6157, "step": 29238 }, { "epoch": 0.853668506029021, "grad_norm": 0.712436797402336, "learning_rate": 3.2519059205190597e-06, "loss": 0.6545, "step": 29239 }, { "epoch": 0.8536977022568684, "grad_norm": 0.6459996264314193, "learning_rate": 3.2512570965125713e-06, "loss": 0.6145, "step": 29240 }, { "epoch": 0.8537268984847157, "grad_norm": 0.6103314881862185, "learning_rate": 3.250608272506083e-06, "loss": 0.5064, "step": 29241 }, { "epoch": 0.8537560947125631, "grad_norm": 0.6548509811379639, "learning_rate": 3.249959448499595e-06, "loss": 0.5763, "step": 29242 }, { "epoch": 0.8537852909404104, "grad_norm": 0.6745653477797279, "learning_rate": 3.2493106244931065e-06, "loss": 0.6544, "step": 29243 }, { "epoch": 0.8538144871682578, "grad_norm": 0.6247691925072749, "learning_rate": 3.248661800486618e-06, "loss": 0.5967, "step": 29244 }, { "epoch": 0.8538436833961052, "grad_norm": 0.6296989756592568, "learning_rate": 3.2480129764801297e-06, "loss": 0.5744, "step": 29245 }, { "epoch": 0.8538728796239525, "grad_norm": 0.6468651885678484, "learning_rate": 3.2473641524736414e-06, "loss": 0.5932, "step": 29246 }, { "epoch": 0.8539020758518, "grad_norm": 0.6392217360568256, "learning_rate": 3.246715328467154e-06, "loss": 0.5926, "step": 29247 }, { "epoch": 0.8539312720796474, "grad_norm": 0.6889080368268371, "learning_rate": 3.2460665044606654e-06, "loss": 0.639, "step": 29248 }, { "epoch": 0.8539604683074947, "grad_norm": 0.6441775525575772, "learning_rate": 3.245417680454177e-06, "loss": 0.6372, "step": 29249 }, { "epoch": 0.8539896645353421, "grad_norm": 0.6153205135082389, "learning_rate": 3.2447688564476886e-06, "loss": 0.5338, "step": 29250 }, { "epoch": 0.8540188607631894, "grad_norm": 0.6401173430021331, "learning_rate": 3.2441200324412002e-06, "loss": 0.5683, "step": 29251 }, { "epoch": 0.8540480569910368, "grad_norm": 0.6817412303414977, "learning_rate": 3.2434712084347127e-06, "loss": 0.6499, "step": 29252 }, { "epoch": 0.8540772532188842, "grad_norm": 0.653678948267359, "learning_rate": 3.2428223844282243e-06, "loss": 0.6474, "step": 29253 }, { "epoch": 0.8541064494467315, "grad_norm": 0.6585330832708629, "learning_rate": 3.242173560421736e-06, "loss": 0.6154, "step": 29254 }, { "epoch": 0.8541356456745789, "grad_norm": 0.6858832211881123, "learning_rate": 3.2415247364152475e-06, "loss": 0.6144, "step": 29255 }, { "epoch": 0.8541648419024263, "grad_norm": 0.6558997361860079, "learning_rate": 3.2408759124087595e-06, "loss": 0.6044, "step": 29256 }, { "epoch": 0.8541940381302736, "grad_norm": 0.6006722870016272, "learning_rate": 3.240227088402271e-06, "loss": 0.5075, "step": 29257 }, { "epoch": 0.854223234358121, "grad_norm": 0.6561878687489562, "learning_rate": 3.239578264395783e-06, "loss": 0.6556, "step": 29258 }, { "epoch": 0.8542524305859683, "grad_norm": 0.6312394079812109, "learning_rate": 3.2389294403892947e-06, "loss": 0.5491, "step": 29259 }, { "epoch": 0.8542816268138157, "grad_norm": 0.6238393843696213, "learning_rate": 3.2382806163828063e-06, "loss": 0.5948, "step": 29260 }, { "epoch": 0.8543108230416631, "grad_norm": 0.6441884556277909, "learning_rate": 3.2376317923763184e-06, "loss": 0.5833, "step": 29261 }, { "epoch": 0.8543400192695104, "grad_norm": 0.6146782308337054, "learning_rate": 3.23698296836983e-06, "loss": 0.5384, "step": 29262 }, { "epoch": 0.8543692154973578, "grad_norm": 0.6323166946077902, "learning_rate": 3.2363341443633416e-06, "loss": 0.5863, "step": 29263 }, { "epoch": 0.8543984117252051, "grad_norm": 0.6155003232721535, "learning_rate": 3.235685320356853e-06, "loss": 0.5757, "step": 29264 }, { "epoch": 0.8544276079530525, "grad_norm": 0.6371769559979662, "learning_rate": 3.2350364963503648e-06, "loss": 0.592, "step": 29265 }, { "epoch": 0.8544568041808999, "grad_norm": 0.6231132997724604, "learning_rate": 3.2343876723438772e-06, "loss": 0.5379, "step": 29266 }, { "epoch": 0.8544860004087472, "grad_norm": 0.635601696570288, "learning_rate": 3.233738848337389e-06, "loss": 0.5685, "step": 29267 }, { "epoch": 0.8545151966365946, "grad_norm": 0.6407526128318526, "learning_rate": 3.2330900243309004e-06, "loss": 0.5816, "step": 29268 }, { "epoch": 0.854544392864442, "grad_norm": 0.6609984759859343, "learning_rate": 3.232441200324412e-06, "loss": 0.639, "step": 29269 }, { "epoch": 0.8545735890922893, "grad_norm": 0.6512860567964018, "learning_rate": 3.2317923763179237e-06, "loss": 0.6003, "step": 29270 }, { "epoch": 0.8546027853201367, "grad_norm": 0.6277531699318067, "learning_rate": 3.231143552311436e-06, "loss": 0.5422, "step": 29271 }, { "epoch": 0.854631981547984, "grad_norm": 0.6126658562188155, "learning_rate": 3.2304947283049477e-06, "loss": 0.5681, "step": 29272 }, { "epoch": 0.8546611777758314, "grad_norm": 0.6238016065283687, "learning_rate": 3.2298459042984593e-06, "loss": 0.5403, "step": 29273 }, { "epoch": 0.8546903740036788, "grad_norm": 0.6646830632902094, "learning_rate": 3.229197080291971e-06, "loss": 0.6216, "step": 29274 }, { "epoch": 0.8547195702315261, "grad_norm": 0.6394608988876126, "learning_rate": 3.228548256285483e-06, "loss": 0.6222, "step": 29275 }, { "epoch": 0.8547487664593735, "grad_norm": 0.6316822355643262, "learning_rate": 3.2278994322789945e-06, "loss": 0.628, "step": 29276 }, { "epoch": 0.8547779626872208, "grad_norm": 0.6577590755070055, "learning_rate": 3.227250608272506e-06, "loss": 0.5913, "step": 29277 }, { "epoch": 0.8548071589150682, "grad_norm": 0.6598469532958368, "learning_rate": 3.226601784266018e-06, "loss": 0.5961, "step": 29278 }, { "epoch": 0.8548363551429156, "grad_norm": 0.6667259711052484, "learning_rate": 3.2259529602595298e-06, "loss": 0.6081, "step": 29279 }, { "epoch": 0.8548655513707629, "grad_norm": 0.7085234386555095, "learning_rate": 3.225304136253042e-06, "loss": 0.6145, "step": 29280 }, { "epoch": 0.8548947475986103, "grad_norm": 0.596882576346803, "learning_rate": 3.2246553122465534e-06, "loss": 0.5262, "step": 29281 }, { "epoch": 0.8549239438264576, "grad_norm": 0.6299905159572147, "learning_rate": 3.224006488240065e-06, "loss": 0.5504, "step": 29282 }, { "epoch": 0.854953140054305, "grad_norm": 0.6144740993342817, "learning_rate": 3.2233576642335766e-06, "loss": 0.5425, "step": 29283 }, { "epoch": 0.8549823362821524, "grad_norm": 0.6479289232158227, "learning_rate": 3.2227088402270882e-06, "loss": 0.5731, "step": 29284 }, { "epoch": 0.8550115325099997, "grad_norm": 0.6272899167222861, "learning_rate": 3.2220600162206007e-06, "loss": 0.5643, "step": 29285 }, { "epoch": 0.8550407287378471, "grad_norm": 0.7551673451309129, "learning_rate": 3.2214111922141123e-06, "loss": 0.6852, "step": 29286 }, { "epoch": 0.8550699249656945, "grad_norm": 0.6731571531597312, "learning_rate": 3.220762368207624e-06, "loss": 0.628, "step": 29287 }, { "epoch": 0.8550991211935418, "grad_norm": 0.6880969633373868, "learning_rate": 3.2201135442011355e-06, "loss": 0.6487, "step": 29288 }, { "epoch": 0.8551283174213892, "grad_norm": 0.6591329866420944, "learning_rate": 3.219464720194648e-06, "loss": 0.5917, "step": 29289 }, { "epoch": 0.8551575136492365, "grad_norm": 0.6631154584252886, "learning_rate": 3.2188158961881595e-06, "loss": 0.6036, "step": 29290 }, { "epoch": 0.8551867098770839, "grad_norm": 0.6414173348412798, "learning_rate": 3.218167072181671e-06, "loss": 0.6256, "step": 29291 }, { "epoch": 0.8552159061049313, "grad_norm": 0.65256999631166, "learning_rate": 3.2175182481751827e-06, "loss": 0.644, "step": 29292 }, { "epoch": 0.8552451023327786, "grad_norm": 0.6523320317149268, "learning_rate": 3.2168694241686943e-06, "loss": 0.5695, "step": 29293 }, { "epoch": 0.855274298560626, "grad_norm": 0.6616966266863942, "learning_rate": 3.2162206001622064e-06, "loss": 0.5843, "step": 29294 }, { "epoch": 0.8553034947884733, "grad_norm": 0.650314757292533, "learning_rate": 3.215571776155718e-06, "loss": 0.5883, "step": 29295 }, { "epoch": 0.8553326910163207, "grad_norm": 0.668399226051589, "learning_rate": 3.2149229521492296e-06, "loss": 0.5955, "step": 29296 }, { "epoch": 0.8553618872441681, "grad_norm": 0.6189119660424331, "learning_rate": 3.2142741281427416e-06, "loss": 0.5467, "step": 29297 }, { "epoch": 0.8553910834720154, "grad_norm": 0.6588225316235856, "learning_rate": 3.2136253041362532e-06, "loss": 0.6552, "step": 29298 }, { "epoch": 0.8554202796998628, "grad_norm": 0.6574184982366625, "learning_rate": 3.2129764801297652e-06, "loss": 0.6273, "step": 29299 }, { "epoch": 0.8554494759277101, "grad_norm": 0.6400889908144113, "learning_rate": 3.212327656123277e-06, "loss": 0.5851, "step": 29300 }, { "epoch": 0.8554786721555575, "grad_norm": 0.6336172705719756, "learning_rate": 3.2116788321167884e-06, "loss": 0.5388, "step": 29301 }, { "epoch": 0.8555078683834049, "grad_norm": 0.6862269227412149, "learning_rate": 3.2110300081103e-06, "loss": 0.6708, "step": 29302 }, { "epoch": 0.8555370646112522, "grad_norm": 0.7157300547516087, "learning_rate": 3.2103811841038117e-06, "loss": 0.6716, "step": 29303 }, { "epoch": 0.8555662608390996, "grad_norm": 0.6565281198260867, "learning_rate": 3.209732360097324e-06, "loss": 0.5802, "step": 29304 }, { "epoch": 0.855595457066947, "grad_norm": 0.6442284517420949, "learning_rate": 3.2090835360908357e-06, "loss": 0.6236, "step": 29305 }, { "epoch": 0.8556246532947943, "grad_norm": 0.648928441919268, "learning_rate": 3.2084347120843473e-06, "loss": 0.6266, "step": 29306 }, { "epoch": 0.8556538495226417, "grad_norm": 0.6852265080711694, "learning_rate": 3.207785888077859e-06, "loss": 0.6499, "step": 29307 }, { "epoch": 0.855683045750489, "grad_norm": 0.6437035536803957, "learning_rate": 3.207137064071371e-06, "loss": 0.6194, "step": 29308 }, { "epoch": 0.8557122419783364, "grad_norm": 0.6760097322494868, "learning_rate": 3.206488240064883e-06, "loss": 0.6265, "step": 29309 }, { "epoch": 0.8557414382061838, "grad_norm": 0.6441939996466297, "learning_rate": 3.2058394160583946e-06, "loss": 0.6099, "step": 29310 }, { "epoch": 0.8557706344340311, "grad_norm": 0.6778152010051046, "learning_rate": 3.205190592051906e-06, "loss": 0.6207, "step": 29311 }, { "epoch": 0.8557998306618785, "grad_norm": 0.6033406878908992, "learning_rate": 3.2045417680454178e-06, "loss": 0.5382, "step": 29312 }, { "epoch": 0.8558290268897258, "grad_norm": 0.6516337562761438, "learning_rate": 3.20389294403893e-06, "loss": 0.5802, "step": 29313 }, { "epoch": 0.8558582231175732, "grad_norm": 0.7249265778947015, "learning_rate": 3.2032441200324414e-06, "loss": 0.7185, "step": 29314 }, { "epoch": 0.8558874193454206, "grad_norm": 0.7014314409894966, "learning_rate": 3.202595296025953e-06, "loss": 0.6747, "step": 29315 }, { "epoch": 0.8559166155732679, "grad_norm": 0.6173629942382003, "learning_rate": 3.201946472019465e-06, "loss": 0.5519, "step": 29316 }, { "epoch": 0.8559458118011153, "grad_norm": 0.6049647648343733, "learning_rate": 3.2012976480129766e-06, "loss": 0.5588, "step": 29317 }, { "epoch": 0.8559750080289626, "grad_norm": 0.6653188953693103, "learning_rate": 3.2006488240064887e-06, "loss": 0.6416, "step": 29318 }, { "epoch": 0.85600420425681, "grad_norm": 0.711535793836346, "learning_rate": 3.2000000000000003e-06, "loss": 0.7382, "step": 29319 }, { "epoch": 0.8560334004846574, "grad_norm": 0.6201005141049617, "learning_rate": 3.199351175993512e-06, "loss": 0.535, "step": 29320 }, { "epoch": 0.8560625967125047, "grad_norm": 0.6743367841667627, "learning_rate": 3.1987023519870235e-06, "loss": 0.5895, "step": 29321 }, { "epoch": 0.8560917929403521, "grad_norm": 0.6611971222325189, "learning_rate": 3.198053527980536e-06, "loss": 0.6213, "step": 29322 }, { "epoch": 0.8561209891681995, "grad_norm": 0.6345213424522985, "learning_rate": 3.1974047039740475e-06, "loss": 0.5889, "step": 29323 }, { "epoch": 0.8561501853960468, "grad_norm": 0.6284878574128234, "learning_rate": 3.196755879967559e-06, "loss": 0.5564, "step": 29324 }, { "epoch": 0.8561793816238942, "grad_norm": 0.5922596348639112, "learning_rate": 3.1961070559610707e-06, "loss": 0.4902, "step": 29325 }, { "epoch": 0.8562085778517415, "grad_norm": 0.6427596204975639, "learning_rate": 3.1954582319545824e-06, "loss": 0.5801, "step": 29326 }, { "epoch": 0.8562377740795889, "grad_norm": 0.6689957047282383, "learning_rate": 3.1948094079480944e-06, "loss": 0.6108, "step": 29327 }, { "epoch": 0.8562669703074363, "grad_norm": 0.5773755264882516, "learning_rate": 3.1941605839416064e-06, "loss": 0.5078, "step": 29328 }, { "epoch": 0.8562961665352836, "grad_norm": 0.6810022641447128, "learning_rate": 3.193511759935118e-06, "loss": 0.6077, "step": 29329 }, { "epoch": 0.856325362763131, "grad_norm": 0.664819411308195, "learning_rate": 3.1928629359286296e-06, "loss": 0.5813, "step": 29330 }, { "epoch": 0.8563545589909783, "grad_norm": 0.6683084945781549, "learning_rate": 3.1922141119221412e-06, "loss": 0.6549, "step": 29331 }, { "epoch": 0.8563837552188257, "grad_norm": 0.6614851334674557, "learning_rate": 3.1915652879156532e-06, "loss": 0.5928, "step": 29332 }, { "epoch": 0.8564129514466731, "grad_norm": 0.6570079539215901, "learning_rate": 3.190916463909165e-06, "loss": 0.6298, "step": 29333 }, { "epoch": 0.8564421476745204, "grad_norm": 0.6282358407651797, "learning_rate": 3.1902676399026765e-06, "loss": 0.5856, "step": 29334 }, { "epoch": 0.8564713439023678, "grad_norm": 0.697046314261405, "learning_rate": 3.1896188158961885e-06, "loss": 0.6004, "step": 29335 }, { "epoch": 0.8565005401302151, "grad_norm": 0.6726551818794305, "learning_rate": 3.1889699918897e-06, "loss": 0.6482, "step": 29336 }, { "epoch": 0.8565297363580625, "grad_norm": 0.6520791899837179, "learning_rate": 3.188321167883212e-06, "loss": 0.6157, "step": 29337 }, { "epoch": 0.8565589325859099, "grad_norm": 0.6704089293224569, "learning_rate": 3.1876723438767237e-06, "loss": 0.6499, "step": 29338 }, { "epoch": 0.8565881288137572, "grad_norm": 0.6589341438912868, "learning_rate": 3.1870235198702353e-06, "loss": 0.6428, "step": 29339 }, { "epoch": 0.8566173250416046, "grad_norm": 0.6258083184090614, "learning_rate": 3.186374695863747e-06, "loss": 0.5591, "step": 29340 }, { "epoch": 0.856646521269452, "grad_norm": 0.6114142034621088, "learning_rate": 3.1857258718572594e-06, "loss": 0.5684, "step": 29341 }, { "epoch": 0.8566757174972993, "grad_norm": 0.6893899584240281, "learning_rate": 3.185077047850771e-06, "loss": 0.671, "step": 29342 }, { "epoch": 0.8567049137251467, "grad_norm": 0.624139846198193, "learning_rate": 3.1844282238442826e-06, "loss": 0.5688, "step": 29343 }, { "epoch": 0.856734109952994, "grad_norm": 0.65277001363424, "learning_rate": 3.183779399837794e-06, "loss": 0.614, "step": 29344 }, { "epoch": 0.8567633061808414, "grad_norm": 0.6167141561899151, "learning_rate": 3.1831305758313058e-06, "loss": 0.5507, "step": 29345 }, { "epoch": 0.8567925024086888, "grad_norm": 0.6464306805981112, "learning_rate": 3.182481751824818e-06, "loss": 0.6183, "step": 29346 }, { "epoch": 0.8568216986365361, "grad_norm": 0.6379540164778974, "learning_rate": 3.18183292781833e-06, "loss": 0.5996, "step": 29347 }, { "epoch": 0.8568508948643835, "grad_norm": 0.6353916118751531, "learning_rate": 3.1811841038118414e-06, "loss": 0.555, "step": 29348 }, { "epoch": 0.8568800910922308, "grad_norm": 0.6048000244406304, "learning_rate": 3.180535279805353e-06, "loss": 0.5205, "step": 29349 }, { "epoch": 0.8569092873200782, "grad_norm": 0.6948046739428568, "learning_rate": 3.1798864557988647e-06, "loss": 0.7239, "step": 29350 }, { "epoch": 0.8569384835479256, "grad_norm": 0.6315444717347463, "learning_rate": 3.1792376317923767e-06, "loss": 0.6121, "step": 29351 }, { "epoch": 0.8569676797757729, "grad_norm": 0.6287312986277259, "learning_rate": 3.1785888077858883e-06, "loss": 0.5716, "step": 29352 }, { "epoch": 0.8569968760036203, "grad_norm": 0.6385944651906721, "learning_rate": 3.1779399837794e-06, "loss": 0.5793, "step": 29353 }, { "epoch": 0.8570260722314677, "grad_norm": 0.6437097470941914, "learning_rate": 3.1772911597729115e-06, "loss": 0.6105, "step": 29354 }, { "epoch": 0.857055268459315, "grad_norm": 0.6011161453695671, "learning_rate": 3.176642335766424e-06, "loss": 0.5196, "step": 29355 }, { "epoch": 0.8570844646871624, "grad_norm": 0.6293286288169333, "learning_rate": 3.1759935117599355e-06, "loss": 0.5781, "step": 29356 }, { "epoch": 0.8571136609150097, "grad_norm": 0.5963443582004084, "learning_rate": 3.175344687753447e-06, "loss": 0.4832, "step": 29357 }, { "epoch": 0.8571428571428571, "grad_norm": 0.6566059396527654, "learning_rate": 3.1746958637469588e-06, "loss": 0.6374, "step": 29358 }, { "epoch": 0.8571720533707045, "grad_norm": 0.6591835577688913, "learning_rate": 3.1740470397404704e-06, "loss": 0.6058, "step": 29359 }, { "epoch": 0.8572012495985518, "grad_norm": 0.6644948073569342, "learning_rate": 3.173398215733983e-06, "loss": 0.6082, "step": 29360 }, { "epoch": 0.8572304458263992, "grad_norm": 0.7220809568425933, "learning_rate": 3.1727493917274944e-06, "loss": 0.6711, "step": 29361 }, { "epoch": 0.8572596420542465, "grad_norm": 0.6041971541394126, "learning_rate": 3.172100567721006e-06, "loss": 0.5005, "step": 29362 }, { "epoch": 0.8572888382820939, "grad_norm": 0.6530519962936957, "learning_rate": 3.1714517437145176e-06, "loss": 0.6017, "step": 29363 }, { "epoch": 0.8573180345099413, "grad_norm": 0.6465073354909648, "learning_rate": 3.1708029197080292e-06, "loss": 0.5847, "step": 29364 }, { "epoch": 0.8573472307377886, "grad_norm": 0.5937408986876085, "learning_rate": 3.1701540957015412e-06, "loss": 0.5458, "step": 29365 }, { "epoch": 0.857376426965636, "grad_norm": 0.6868972178833168, "learning_rate": 3.1695052716950533e-06, "loss": 0.666, "step": 29366 }, { "epoch": 0.8574056231934835, "grad_norm": 0.6459830295001922, "learning_rate": 3.168856447688565e-06, "loss": 0.5928, "step": 29367 }, { "epoch": 0.8574348194213308, "grad_norm": 0.6301099176807037, "learning_rate": 3.1682076236820765e-06, "loss": 0.5509, "step": 29368 }, { "epoch": 0.8574640156491782, "grad_norm": 0.6367747395191905, "learning_rate": 3.167558799675588e-06, "loss": 0.5766, "step": 29369 }, { "epoch": 0.8574932118770255, "grad_norm": 0.6555068497297096, "learning_rate": 3.1669099756691e-06, "loss": 0.5732, "step": 29370 }, { "epoch": 0.8575224081048729, "grad_norm": 0.5989817189633312, "learning_rate": 3.1662611516626117e-06, "loss": 0.5144, "step": 29371 }, { "epoch": 0.8575516043327203, "grad_norm": 0.645925620109291, "learning_rate": 3.1656123276561233e-06, "loss": 0.6029, "step": 29372 }, { "epoch": 0.8575808005605676, "grad_norm": 0.6830678163794311, "learning_rate": 3.164963503649635e-06, "loss": 0.6432, "step": 29373 }, { "epoch": 0.857609996788415, "grad_norm": 0.6168641259221528, "learning_rate": 3.1643146796431474e-06, "loss": 0.5588, "step": 29374 }, { "epoch": 0.8576391930162623, "grad_norm": 0.696946704810898, "learning_rate": 3.163665855636659e-06, "loss": 0.7312, "step": 29375 }, { "epoch": 0.8576683892441097, "grad_norm": 0.611322275447222, "learning_rate": 3.1630170316301706e-06, "loss": 0.5285, "step": 29376 }, { "epoch": 0.8576975854719571, "grad_norm": 0.6422443249827728, "learning_rate": 3.162368207623682e-06, "loss": 0.5826, "step": 29377 }, { "epoch": 0.8577267816998044, "grad_norm": 0.6772223599198478, "learning_rate": 3.161719383617194e-06, "loss": 0.6718, "step": 29378 }, { "epoch": 0.8577559779276518, "grad_norm": 0.6694418382606857, "learning_rate": 3.1610705596107062e-06, "loss": 0.6138, "step": 29379 }, { "epoch": 0.8577851741554992, "grad_norm": 0.6282519746505671, "learning_rate": 3.160421735604218e-06, "loss": 0.5845, "step": 29380 }, { "epoch": 0.8578143703833465, "grad_norm": 0.6733507902596483, "learning_rate": 3.1597729115977294e-06, "loss": 0.6382, "step": 29381 }, { "epoch": 0.8578435666111939, "grad_norm": 0.6995540692637904, "learning_rate": 3.159124087591241e-06, "loss": 0.6587, "step": 29382 }, { "epoch": 0.8578727628390412, "grad_norm": 0.6319509386247483, "learning_rate": 3.1584752635847527e-06, "loss": 0.5761, "step": 29383 }, { "epoch": 0.8579019590668886, "grad_norm": 0.6446044157012252, "learning_rate": 3.1578264395782647e-06, "loss": 0.5869, "step": 29384 }, { "epoch": 0.857931155294736, "grad_norm": 0.6936439139453011, "learning_rate": 3.1571776155717763e-06, "loss": 0.6461, "step": 29385 }, { "epoch": 0.8579603515225833, "grad_norm": 0.6181175648553275, "learning_rate": 3.1565287915652883e-06, "loss": 0.5745, "step": 29386 }, { "epoch": 0.8579895477504307, "grad_norm": 0.632807703436268, "learning_rate": 3.1558799675588e-06, "loss": 0.5788, "step": 29387 }, { "epoch": 0.858018743978278, "grad_norm": 0.5804661869156408, "learning_rate": 3.155231143552312e-06, "loss": 0.5251, "step": 29388 }, { "epoch": 0.8580479402061254, "grad_norm": 0.6260785059449278, "learning_rate": 3.1545823195458235e-06, "loss": 0.5848, "step": 29389 }, { "epoch": 0.8580771364339728, "grad_norm": 0.6954882012000787, "learning_rate": 3.153933495539335e-06, "loss": 0.6287, "step": 29390 }, { "epoch": 0.8581063326618201, "grad_norm": 0.6390516030480349, "learning_rate": 3.1532846715328468e-06, "loss": 0.5983, "step": 29391 }, { "epoch": 0.8581355288896675, "grad_norm": 0.7206664589794626, "learning_rate": 3.1526358475263584e-06, "loss": 0.6863, "step": 29392 }, { "epoch": 0.8581647251175148, "grad_norm": 0.61104271559488, "learning_rate": 3.151987023519871e-06, "loss": 0.5511, "step": 29393 }, { "epoch": 0.8581939213453622, "grad_norm": 0.6700851316521292, "learning_rate": 3.1513381995133824e-06, "loss": 0.6496, "step": 29394 }, { "epoch": 0.8582231175732096, "grad_norm": 0.6428362944320515, "learning_rate": 3.150689375506894e-06, "loss": 0.6189, "step": 29395 }, { "epoch": 0.8582523138010569, "grad_norm": 0.6452113734586326, "learning_rate": 3.1500405515004056e-06, "loss": 0.6153, "step": 29396 }, { "epoch": 0.8582815100289043, "grad_norm": 0.6330977621925418, "learning_rate": 3.1493917274939172e-06, "loss": 0.581, "step": 29397 }, { "epoch": 0.8583107062567517, "grad_norm": 0.6644547591705903, "learning_rate": 3.1487429034874297e-06, "loss": 0.5818, "step": 29398 }, { "epoch": 0.858339902484599, "grad_norm": 0.6287529951790346, "learning_rate": 3.1480940794809413e-06, "loss": 0.5737, "step": 29399 }, { "epoch": 0.8583690987124464, "grad_norm": 0.6575768954800127, "learning_rate": 3.147445255474453e-06, "loss": 0.6569, "step": 29400 }, { "epoch": 0.8583982949402937, "grad_norm": 0.6570117297838897, "learning_rate": 3.1467964314679645e-06, "loss": 0.633, "step": 29401 }, { "epoch": 0.8584274911681411, "grad_norm": 0.6481978277020365, "learning_rate": 3.146147607461476e-06, "loss": 0.6255, "step": 29402 }, { "epoch": 0.8584566873959885, "grad_norm": 0.6197555703247015, "learning_rate": 3.145498783454988e-06, "loss": 0.5615, "step": 29403 }, { "epoch": 0.8584858836238358, "grad_norm": 0.66838628114857, "learning_rate": 3.1448499594484997e-06, "loss": 0.6136, "step": 29404 }, { "epoch": 0.8585150798516832, "grad_norm": 0.6331312886415899, "learning_rate": 3.1442011354420117e-06, "loss": 0.5761, "step": 29405 }, { "epoch": 0.8585442760795305, "grad_norm": 0.6515950401181037, "learning_rate": 3.1435523114355234e-06, "loss": 0.6035, "step": 29406 }, { "epoch": 0.8585734723073779, "grad_norm": 0.6021494513616018, "learning_rate": 3.1429034874290354e-06, "loss": 0.5148, "step": 29407 }, { "epoch": 0.8586026685352253, "grad_norm": 0.6475799709552621, "learning_rate": 3.142254663422547e-06, "loss": 0.5388, "step": 29408 }, { "epoch": 0.8586318647630726, "grad_norm": 0.6834040913593534, "learning_rate": 3.1416058394160586e-06, "loss": 0.6376, "step": 29409 }, { "epoch": 0.85866106099092, "grad_norm": 0.6662586930251643, "learning_rate": 3.14095701540957e-06, "loss": 0.6436, "step": 29410 }, { "epoch": 0.8586902572187673, "grad_norm": 0.6096461030949772, "learning_rate": 3.140308191403082e-06, "loss": 0.5199, "step": 29411 }, { "epoch": 0.8587194534466147, "grad_norm": 0.659905889473139, "learning_rate": 3.1396593673965942e-06, "loss": 0.6006, "step": 29412 }, { "epoch": 0.8587486496744621, "grad_norm": 0.5652745775638971, "learning_rate": 3.139010543390106e-06, "loss": 0.4582, "step": 29413 }, { "epoch": 0.8587778459023094, "grad_norm": 0.6544899495554698, "learning_rate": 3.1383617193836175e-06, "loss": 0.657, "step": 29414 }, { "epoch": 0.8588070421301568, "grad_norm": 0.6615939820901686, "learning_rate": 3.137712895377129e-06, "loss": 0.6026, "step": 29415 }, { "epoch": 0.8588362383580042, "grad_norm": 0.6222670488407409, "learning_rate": 3.1370640713706407e-06, "loss": 0.582, "step": 29416 }, { "epoch": 0.8588654345858515, "grad_norm": 0.6097346475112769, "learning_rate": 3.136415247364153e-06, "loss": 0.5527, "step": 29417 }, { "epoch": 0.8588946308136989, "grad_norm": 0.6882836278217548, "learning_rate": 3.1357664233576647e-06, "loss": 0.6354, "step": 29418 }, { "epoch": 0.8589238270415462, "grad_norm": 0.6134695050717623, "learning_rate": 3.1351175993511763e-06, "loss": 0.5299, "step": 29419 }, { "epoch": 0.8589530232693936, "grad_norm": 0.6438391232241356, "learning_rate": 3.134468775344688e-06, "loss": 0.5818, "step": 29420 }, { "epoch": 0.858982219497241, "grad_norm": 0.6288623620104173, "learning_rate": 3.1338199513381995e-06, "loss": 0.5753, "step": 29421 }, { "epoch": 0.8590114157250883, "grad_norm": 0.5710707033881469, "learning_rate": 3.1331711273317116e-06, "loss": 0.4827, "step": 29422 }, { "epoch": 0.8590406119529357, "grad_norm": 0.6766665430370444, "learning_rate": 3.132522303325223e-06, "loss": 0.6078, "step": 29423 }, { "epoch": 0.859069808180783, "grad_norm": 0.6888515622123831, "learning_rate": 3.131873479318735e-06, "loss": 0.673, "step": 29424 }, { "epoch": 0.8590990044086304, "grad_norm": 0.6257023337342142, "learning_rate": 3.1312246553122468e-06, "loss": 0.5667, "step": 29425 }, { "epoch": 0.8591282006364778, "grad_norm": 0.6060487494572243, "learning_rate": 3.130575831305759e-06, "loss": 0.5377, "step": 29426 }, { "epoch": 0.8591573968643251, "grad_norm": 0.6244321974175461, "learning_rate": 3.1299270072992704e-06, "loss": 0.5422, "step": 29427 }, { "epoch": 0.8591865930921725, "grad_norm": 0.6071181256614214, "learning_rate": 3.129278183292782e-06, "loss": 0.5339, "step": 29428 }, { "epoch": 0.8592157893200199, "grad_norm": 0.6653879972087067, "learning_rate": 3.1286293592862936e-06, "loss": 0.6087, "step": 29429 }, { "epoch": 0.8592449855478672, "grad_norm": 0.6246302022106106, "learning_rate": 3.1279805352798052e-06, "loss": 0.5493, "step": 29430 }, { "epoch": 0.8592741817757146, "grad_norm": 0.6410126401316099, "learning_rate": 3.1273317112733177e-06, "loss": 0.6065, "step": 29431 }, { "epoch": 0.8593033780035619, "grad_norm": 0.5832007613936312, "learning_rate": 3.1266828872668293e-06, "loss": 0.517, "step": 29432 }, { "epoch": 0.8593325742314093, "grad_norm": 0.6302047657571842, "learning_rate": 3.126034063260341e-06, "loss": 0.5911, "step": 29433 }, { "epoch": 0.8593617704592567, "grad_norm": 0.5949758805233935, "learning_rate": 3.1253852392538525e-06, "loss": 0.5147, "step": 29434 }, { "epoch": 0.859390966687104, "grad_norm": 0.675565290932698, "learning_rate": 3.124736415247364e-06, "loss": 0.6404, "step": 29435 }, { "epoch": 0.8594201629149514, "grad_norm": 0.6300188514830115, "learning_rate": 3.1240875912408765e-06, "loss": 0.6167, "step": 29436 }, { "epoch": 0.8594493591427987, "grad_norm": 0.6430329791798076, "learning_rate": 3.123438767234388e-06, "loss": 0.538, "step": 29437 }, { "epoch": 0.8594785553706461, "grad_norm": 0.6042688576907564, "learning_rate": 3.1227899432278998e-06, "loss": 0.5358, "step": 29438 }, { "epoch": 0.8595077515984935, "grad_norm": 0.6713682483533183, "learning_rate": 3.1221411192214114e-06, "loss": 0.6753, "step": 29439 }, { "epoch": 0.8595369478263408, "grad_norm": 0.6663195759549099, "learning_rate": 3.1214922952149234e-06, "loss": 0.6182, "step": 29440 }, { "epoch": 0.8595661440541882, "grad_norm": 0.6062044112620949, "learning_rate": 3.120843471208435e-06, "loss": 0.5399, "step": 29441 }, { "epoch": 0.8595953402820355, "grad_norm": 0.628917374961504, "learning_rate": 3.1201946472019466e-06, "loss": 0.548, "step": 29442 }, { "epoch": 0.8596245365098829, "grad_norm": 0.6485213496795192, "learning_rate": 3.119545823195458e-06, "loss": 0.6115, "step": 29443 }, { "epoch": 0.8596537327377303, "grad_norm": 0.6185754237246985, "learning_rate": 3.1188969991889702e-06, "loss": 0.553, "step": 29444 }, { "epoch": 0.8596829289655776, "grad_norm": 0.6070162483542079, "learning_rate": 3.1182481751824822e-06, "loss": 0.4946, "step": 29445 }, { "epoch": 0.859712125193425, "grad_norm": 0.6748964972772877, "learning_rate": 3.117599351175994e-06, "loss": 0.6104, "step": 29446 }, { "epoch": 0.8597413214212724, "grad_norm": 0.6559156143080365, "learning_rate": 3.1169505271695055e-06, "loss": 0.5971, "step": 29447 }, { "epoch": 0.8597705176491197, "grad_norm": 0.5975671926639298, "learning_rate": 3.116301703163017e-06, "loss": 0.5247, "step": 29448 }, { "epoch": 0.8597997138769671, "grad_norm": 0.70386901821335, "learning_rate": 3.1156528791565287e-06, "loss": 0.6856, "step": 29449 }, { "epoch": 0.8598289101048144, "grad_norm": 0.6897710808978803, "learning_rate": 3.115004055150041e-06, "loss": 0.6614, "step": 29450 }, { "epoch": 0.8598581063326618, "grad_norm": 0.6476070665383952, "learning_rate": 3.1143552311435527e-06, "loss": 0.6615, "step": 29451 }, { "epoch": 0.8598873025605092, "grad_norm": 0.7160140611897071, "learning_rate": 3.1137064071370643e-06, "loss": 0.6499, "step": 29452 }, { "epoch": 0.8599164987883565, "grad_norm": 0.6460137303874389, "learning_rate": 3.113057583130576e-06, "loss": 0.6023, "step": 29453 }, { "epoch": 0.8599456950162039, "grad_norm": 0.6707830271166247, "learning_rate": 3.1124087591240875e-06, "loss": 0.6484, "step": 29454 }, { "epoch": 0.8599748912440512, "grad_norm": 0.6515735903201293, "learning_rate": 3.1117599351176e-06, "loss": 0.561, "step": 29455 }, { "epoch": 0.8600040874718986, "grad_norm": 0.6241672690327271, "learning_rate": 3.1111111111111116e-06, "loss": 0.5528, "step": 29456 }, { "epoch": 0.860033283699746, "grad_norm": 0.6280833995792374, "learning_rate": 3.110462287104623e-06, "loss": 0.5958, "step": 29457 }, { "epoch": 0.8600624799275933, "grad_norm": 0.7355654177201059, "learning_rate": 3.109813463098135e-06, "loss": 0.7002, "step": 29458 }, { "epoch": 0.8600916761554407, "grad_norm": 0.6719548560418432, "learning_rate": 3.109164639091647e-06, "loss": 0.616, "step": 29459 }, { "epoch": 0.860120872383288, "grad_norm": 0.6611667505165216, "learning_rate": 3.1085158150851584e-06, "loss": 0.5875, "step": 29460 }, { "epoch": 0.8601500686111354, "grad_norm": 0.6456132305887493, "learning_rate": 3.10786699107867e-06, "loss": 0.5876, "step": 29461 }, { "epoch": 0.8601792648389828, "grad_norm": 0.5999192095138765, "learning_rate": 3.1072181670721816e-06, "loss": 0.5038, "step": 29462 }, { "epoch": 0.8602084610668301, "grad_norm": 0.6512433914338294, "learning_rate": 3.1065693430656937e-06, "loss": 0.611, "step": 29463 }, { "epoch": 0.8602376572946775, "grad_norm": 0.6281171117925749, "learning_rate": 3.1059205190592057e-06, "loss": 0.5688, "step": 29464 }, { "epoch": 0.8602668535225249, "grad_norm": 0.6817905454728244, "learning_rate": 3.1052716950527173e-06, "loss": 0.6181, "step": 29465 }, { "epoch": 0.8602960497503722, "grad_norm": 0.6051002629035165, "learning_rate": 3.104622871046229e-06, "loss": 0.5277, "step": 29466 }, { "epoch": 0.8603252459782196, "grad_norm": 0.6670423861421756, "learning_rate": 3.1039740470397405e-06, "loss": 0.6533, "step": 29467 }, { "epoch": 0.8603544422060669, "grad_norm": 0.6394567948181216, "learning_rate": 3.103325223033252e-06, "loss": 0.6157, "step": 29468 }, { "epoch": 0.8603836384339143, "grad_norm": 0.6214876037050591, "learning_rate": 3.1026763990267645e-06, "loss": 0.5119, "step": 29469 }, { "epoch": 0.8604128346617617, "grad_norm": 0.6987992111624982, "learning_rate": 3.102027575020276e-06, "loss": 0.6543, "step": 29470 }, { "epoch": 0.860442030889609, "grad_norm": 0.6605157873164884, "learning_rate": 3.1013787510137878e-06, "loss": 0.6433, "step": 29471 }, { "epoch": 0.8604712271174564, "grad_norm": 0.7768643168081415, "learning_rate": 3.1007299270072994e-06, "loss": 0.8269, "step": 29472 }, { "epoch": 0.8605004233453037, "grad_norm": 0.5936822903911029, "learning_rate": 3.1000811030008114e-06, "loss": 0.5373, "step": 29473 }, { "epoch": 0.8605296195731511, "grad_norm": 0.5899801282189253, "learning_rate": 3.099432278994323e-06, "loss": 0.5444, "step": 29474 }, { "epoch": 0.8605588158009985, "grad_norm": 0.700522946543132, "learning_rate": 3.098783454987835e-06, "loss": 0.6992, "step": 29475 }, { "epoch": 0.8605880120288458, "grad_norm": 0.708953600035239, "learning_rate": 3.0981346309813466e-06, "loss": 0.619, "step": 29476 }, { "epoch": 0.8606172082566932, "grad_norm": 0.6644731649586737, "learning_rate": 3.0974858069748582e-06, "loss": 0.5944, "step": 29477 }, { "epoch": 0.8606464044845406, "grad_norm": 0.6175151885640403, "learning_rate": 3.0968369829683703e-06, "loss": 0.5177, "step": 29478 }, { "epoch": 0.8606756007123879, "grad_norm": 0.6372546466930893, "learning_rate": 3.096188158961882e-06, "loss": 0.5734, "step": 29479 }, { "epoch": 0.8607047969402353, "grad_norm": 0.6750117150390297, "learning_rate": 3.0955393349553935e-06, "loss": 0.6534, "step": 29480 }, { "epoch": 0.8607339931680826, "grad_norm": 0.6827947066885932, "learning_rate": 3.094890510948905e-06, "loss": 0.6492, "step": 29481 }, { "epoch": 0.86076318939593, "grad_norm": 0.6112641239455221, "learning_rate": 3.094241686942417e-06, "loss": 0.5122, "step": 29482 }, { "epoch": 0.8607923856237774, "grad_norm": 0.6704716154612823, "learning_rate": 3.093592862935929e-06, "loss": 0.5767, "step": 29483 }, { "epoch": 0.8608215818516247, "grad_norm": 0.7055819430076917, "learning_rate": 3.0929440389294407e-06, "loss": 0.72, "step": 29484 }, { "epoch": 0.8608507780794721, "grad_norm": 0.6181962824895224, "learning_rate": 3.0922952149229523e-06, "loss": 0.5503, "step": 29485 }, { "epoch": 0.8608799743073194, "grad_norm": 0.6795117560590465, "learning_rate": 3.091646390916464e-06, "loss": 0.6222, "step": 29486 }, { "epoch": 0.8609091705351668, "grad_norm": 0.6363362209245432, "learning_rate": 3.0909975669099755e-06, "loss": 0.6298, "step": 29487 }, { "epoch": 0.8609383667630143, "grad_norm": 0.6264229014570141, "learning_rate": 3.090348742903488e-06, "loss": 0.5656, "step": 29488 }, { "epoch": 0.8609675629908616, "grad_norm": 0.6345687751254013, "learning_rate": 3.0896999188969996e-06, "loss": 0.5663, "step": 29489 }, { "epoch": 0.860996759218709, "grad_norm": 0.659646782877077, "learning_rate": 3.089051094890511e-06, "loss": 0.5925, "step": 29490 }, { "epoch": 0.8610259554465564, "grad_norm": 0.6454997528578985, "learning_rate": 3.088402270884023e-06, "loss": 0.5989, "step": 29491 }, { "epoch": 0.8610551516744037, "grad_norm": 0.6053524321221809, "learning_rate": 3.087753446877535e-06, "loss": 0.5548, "step": 29492 }, { "epoch": 0.8610843479022511, "grad_norm": 0.6848878818007479, "learning_rate": 3.0871046228710464e-06, "loss": 0.6864, "step": 29493 }, { "epoch": 0.8611135441300984, "grad_norm": 0.6696764269860958, "learning_rate": 3.0864557988645585e-06, "loss": 0.6548, "step": 29494 }, { "epoch": 0.8611427403579458, "grad_norm": 0.740101320223517, "learning_rate": 3.08580697485807e-06, "loss": 0.6721, "step": 29495 }, { "epoch": 0.8611719365857932, "grad_norm": 0.6290012546669158, "learning_rate": 3.0851581508515817e-06, "loss": 0.5473, "step": 29496 }, { "epoch": 0.8612011328136405, "grad_norm": 0.634108970095234, "learning_rate": 3.0845093268450937e-06, "loss": 0.5493, "step": 29497 }, { "epoch": 0.8612303290414879, "grad_norm": 0.6482547801168533, "learning_rate": 3.0838605028386053e-06, "loss": 0.5825, "step": 29498 }, { "epoch": 0.8612595252693352, "grad_norm": 0.6441125986271061, "learning_rate": 3.083211678832117e-06, "loss": 0.6383, "step": 29499 }, { "epoch": 0.8612887214971826, "grad_norm": 0.6600086095473797, "learning_rate": 3.0825628548256285e-06, "loss": 0.6024, "step": 29500 }, { "epoch": 0.86131791772503, "grad_norm": 0.6897264104013862, "learning_rate": 3.0819140308191405e-06, "loss": 0.6366, "step": 29501 }, { "epoch": 0.8613471139528773, "grad_norm": 0.6960555305726844, "learning_rate": 3.0812652068126526e-06, "loss": 0.6661, "step": 29502 }, { "epoch": 0.8613763101807247, "grad_norm": 0.680201672345185, "learning_rate": 3.080616382806164e-06, "loss": 0.6162, "step": 29503 }, { "epoch": 0.861405506408572, "grad_norm": 0.626954081467909, "learning_rate": 3.0799675587996758e-06, "loss": 0.5965, "step": 29504 }, { "epoch": 0.8614347026364194, "grad_norm": 0.644210084456004, "learning_rate": 3.0793187347931874e-06, "loss": 0.5833, "step": 29505 }, { "epoch": 0.8614638988642668, "grad_norm": 0.5780827413623744, "learning_rate": 3.0786699107867e-06, "loss": 0.49, "step": 29506 }, { "epoch": 0.8614930950921141, "grad_norm": 0.6722582345746674, "learning_rate": 3.0780210867802114e-06, "loss": 0.6085, "step": 29507 }, { "epoch": 0.8615222913199615, "grad_norm": 0.6595081312966462, "learning_rate": 3.077372262773723e-06, "loss": 0.599, "step": 29508 }, { "epoch": 0.8615514875478089, "grad_norm": 0.6908299609333168, "learning_rate": 3.0767234387672346e-06, "loss": 0.6944, "step": 29509 }, { "epoch": 0.8615806837756562, "grad_norm": 0.7042456919875756, "learning_rate": 3.0760746147607462e-06, "loss": 0.5951, "step": 29510 }, { "epoch": 0.8616098800035036, "grad_norm": 0.6449430317247885, "learning_rate": 3.0754257907542583e-06, "loss": 0.6142, "step": 29511 }, { "epoch": 0.8616390762313509, "grad_norm": 0.6780084083965797, "learning_rate": 3.07477696674777e-06, "loss": 0.6741, "step": 29512 }, { "epoch": 0.8616682724591983, "grad_norm": 0.681575079971936, "learning_rate": 3.074128142741282e-06, "loss": 0.6821, "step": 29513 }, { "epoch": 0.8616974686870457, "grad_norm": 0.6629939966759649, "learning_rate": 3.0734793187347935e-06, "loss": 0.6044, "step": 29514 }, { "epoch": 0.861726664914893, "grad_norm": 0.6400048042388451, "learning_rate": 3.072830494728305e-06, "loss": 0.5783, "step": 29515 }, { "epoch": 0.8617558611427404, "grad_norm": 0.6926212540428944, "learning_rate": 3.072181670721817e-06, "loss": 0.6474, "step": 29516 }, { "epoch": 0.8617850573705877, "grad_norm": 0.6506673172875848, "learning_rate": 3.0715328467153287e-06, "loss": 0.6066, "step": 29517 }, { "epoch": 0.8618142535984351, "grad_norm": 0.7040066605358007, "learning_rate": 3.0708840227088403e-06, "loss": 0.609, "step": 29518 }, { "epoch": 0.8618434498262825, "grad_norm": 0.6733125895275873, "learning_rate": 3.070235198702352e-06, "loss": 0.6362, "step": 29519 }, { "epoch": 0.8618726460541298, "grad_norm": 0.6634416074947621, "learning_rate": 3.0695863746958635e-06, "loss": 0.6136, "step": 29520 }, { "epoch": 0.8619018422819772, "grad_norm": 0.6529247882620247, "learning_rate": 3.068937550689376e-06, "loss": 0.6141, "step": 29521 }, { "epoch": 0.8619310385098246, "grad_norm": 0.6574469795311112, "learning_rate": 3.0682887266828876e-06, "loss": 0.6021, "step": 29522 }, { "epoch": 0.8619602347376719, "grad_norm": 0.6182796535166569, "learning_rate": 3.067639902676399e-06, "loss": 0.5461, "step": 29523 }, { "epoch": 0.8619894309655193, "grad_norm": 0.6291694600331561, "learning_rate": 3.066991078669911e-06, "loss": 0.5413, "step": 29524 }, { "epoch": 0.8620186271933666, "grad_norm": 0.6580069640846724, "learning_rate": 3.0663422546634232e-06, "loss": 0.6002, "step": 29525 }, { "epoch": 0.862047823421214, "grad_norm": 0.6889894033850182, "learning_rate": 3.065693430656935e-06, "loss": 0.5986, "step": 29526 }, { "epoch": 0.8620770196490614, "grad_norm": 0.6743727980857741, "learning_rate": 3.0650446066504465e-06, "loss": 0.6559, "step": 29527 }, { "epoch": 0.8621062158769087, "grad_norm": 0.6730159497801992, "learning_rate": 3.064395782643958e-06, "loss": 0.6651, "step": 29528 }, { "epoch": 0.8621354121047561, "grad_norm": 0.625060882125016, "learning_rate": 3.0637469586374697e-06, "loss": 0.5846, "step": 29529 }, { "epoch": 0.8621646083326034, "grad_norm": 0.6136911778617699, "learning_rate": 3.0630981346309817e-06, "loss": 0.5491, "step": 29530 }, { "epoch": 0.8621938045604508, "grad_norm": 0.7419655544603747, "learning_rate": 3.0624493106244933e-06, "loss": 0.6416, "step": 29531 }, { "epoch": 0.8622230007882982, "grad_norm": 0.6162752487180828, "learning_rate": 3.061800486618005e-06, "loss": 0.549, "step": 29532 }, { "epoch": 0.8622521970161455, "grad_norm": 0.6599827333591869, "learning_rate": 3.061151662611517e-06, "loss": 0.6658, "step": 29533 }, { "epoch": 0.8622813932439929, "grad_norm": 0.6217834510735675, "learning_rate": 3.0605028386050285e-06, "loss": 0.5277, "step": 29534 }, { "epoch": 0.8623105894718402, "grad_norm": 0.6186842213540714, "learning_rate": 3.0598540145985406e-06, "loss": 0.5348, "step": 29535 }, { "epoch": 0.8623397856996876, "grad_norm": 0.7276066779974569, "learning_rate": 3.059205190592052e-06, "loss": 0.6605, "step": 29536 }, { "epoch": 0.862368981927535, "grad_norm": 0.6974198827877857, "learning_rate": 3.0585563665855638e-06, "loss": 0.6636, "step": 29537 }, { "epoch": 0.8623981781553823, "grad_norm": 0.6877133815086017, "learning_rate": 3.0579075425790754e-06, "loss": 0.6255, "step": 29538 }, { "epoch": 0.8624273743832297, "grad_norm": 0.6205024905764454, "learning_rate": 3.057258718572587e-06, "loss": 0.5971, "step": 29539 }, { "epoch": 0.8624565706110771, "grad_norm": 0.6469778139751502, "learning_rate": 3.0566098945660994e-06, "loss": 0.6181, "step": 29540 }, { "epoch": 0.8624857668389244, "grad_norm": 0.6658568255622938, "learning_rate": 3.055961070559611e-06, "loss": 0.6239, "step": 29541 }, { "epoch": 0.8625149630667718, "grad_norm": 0.6692177401367896, "learning_rate": 3.0553122465531226e-06, "loss": 0.602, "step": 29542 }, { "epoch": 0.8625441592946191, "grad_norm": 0.6044450482649256, "learning_rate": 3.0546634225466342e-06, "loss": 0.5111, "step": 29543 }, { "epoch": 0.8625733555224665, "grad_norm": 0.6533981113987114, "learning_rate": 3.0540145985401467e-06, "loss": 0.5786, "step": 29544 }, { "epoch": 0.8626025517503139, "grad_norm": 0.642979409865105, "learning_rate": 3.0533657745336583e-06, "loss": 0.6236, "step": 29545 }, { "epoch": 0.8626317479781612, "grad_norm": 0.6636777914256289, "learning_rate": 3.05271695052717e-06, "loss": 0.6468, "step": 29546 }, { "epoch": 0.8626609442060086, "grad_norm": 0.5776800221376572, "learning_rate": 3.0520681265206815e-06, "loss": 0.4946, "step": 29547 }, { "epoch": 0.862690140433856, "grad_norm": 0.6412466332942629, "learning_rate": 3.051419302514193e-06, "loss": 0.5854, "step": 29548 }, { "epoch": 0.8627193366617033, "grad_norm": 0.6212585351676959, "learning_rate": 3.050770478507705e-06, "loss": 0.5677, "step": 29549 }, { "epoch": 0.8627485328895507, "grad_norm": 0.6074902735483276, "learning_rate": 3.0501216545012167e-06, "loss": 0.5346, "step": 29550 }, { "epoch": 0.862777729117398, "grad_norm": 0.6339874499957059, "learning_rate": 3.0494728304947283e-06, "loss": 0.6225, "step": 29551 }, { "epoch": 0.8628069253452454, "grad_norm": 0.6584173027887642, "learning_rate": 3.0488240064882404e-06, "loss": 0.5841, "step": 29552 }, { "epoch": 0.8628361215730928, "grad_norm": 0.6450654783254195, "learning_rate": 3.048175182481752e-06, "loss": 0.6236, "step": 29553 }, { "epoch": 0.8628653178009401, "grad_norm": 0.6683433058714902, "learning_rate": 3.047526358475264e-06, "loss": 0.6015, "step": 29554 }, { "epoch": 0.8628945140287875, "grad_norm": 0.6409077155027699, "learning_rate": 3.0468775344687756e-06, "loss": 0.6105, "step": 29555 }, { "epoch": 0.8629237102566348, "grad_norm": 0.6402388678121911, "learning_rate": 3.046228710462287e-06, "loss": 0.6032, "step": 29556 }, { "epoch": 0.8629529064844822, "grad_norm": 0.6773570213503527, "learning_rate": 3.045579886455799e-06, "loss": 0.6233, "step": 29557 }, { "epoch": 0.8629821027123296, "grad_norm": 0.6356910411617793, "learning_rate": 3.0449310624493113e-06, "loss": 0.5993, "step": 29558 }, { "epoch": 0.8630112989401769, "grad_norm": 0.594231586656171, "learning_rate": 3.044282238442823e-06, "loss": 0.5513, "step": 29559 }, { "epoch": 0.8630404951680243, "grad_norm": 0.6950969797298622, "learning_rate": 3.0436334144363345e-06, "loss": 0.6597, "step": 29560 }, { "epoch": 0.8630696913958716, "grad_norm": 0.6816532968454232, "learning_rate": 3.042984590429846e-06, "loss": 0.6241, "step": 29561 }, { "epoch": 0.863098887623719, "grad_norm": 0.6058723292771385, "learning_rate": 3.0423357664233577e-06, "loss": 0.5057, "step": 29562 }, { "epoch": 0.8631280838515664, "grad_norm": 0.6883694477842235, "learning_rate": 3.0416869424168697e-06, "loss": 0.631, "step": 29563 }, { "epoch": 0.8631572800794137, "grad_norm": 0.5595385921368102, "learning_rate": 3.0410381184103817e-06, "loss": 0.4962, "step": 29564 }, { "epoch": 0.8631864763072611, "grad_norm": 0.6120220976630054, "learning_rate": 3.0403892944038933e-06, "loss": 0.5452, "step": 29565 }, { "epoch": 0.8632156725351084, "grad_norm": 0.6508667775759672, "learning_rate": 3.039740470397405e-06, "loss": 0.5961, "step": 29566 }, { "epoch": 0.8632448687629558, "grad_norm": 0.6013228840675127, "learning_rate": 3.0390916463909165e-06, "loss": 0.4942, "step": 29567 }, { "epoch": 0.8632740649908032, "grad_norm": 0.6850813777882861, "learning_rate": 3.0384428223844286e-06, "loss": 0.6632, "step": 29568 }, { "epoch": 0.8633032612186505, "grad_norm": 0.6113280529663584, "learning_rate": 3.03779399837794e-06, "loss": 0.4866, "step": 29569 }, { "epoch": 0.8633324574464979, "grad_norm": 0.5731187239092943, "learning_rate": 3.0371451743714518e-06, "loss": 0.4988, "step": 29570 }, { "epoch": 0.8633616536743453, "grad_norm": 0.6328280591233023, "learning_rate": 3.036496350364964e-06, "loss": 0.5109, "step": 29571 }, { "epoch": 0.8633908499021926, "grad_norm": 0.6176053106978189, "learning_rate": 3.0358475263584754e-06, "loss": 0.5689, "step": 29572 }, { "epoch": 0.86342004613004, "grad_norm": 0.5929900537628933, "learning_rate": 3.0351987023519874e-06, "loss": 0.4944, "step": 29573 }, { "epoch": 0.8634492423578873, "grad_norm": 0.6465359747285375, "learning_rate": 3.034549878345499e-06, "loss": 0.5359, "step": 29574 }, { "epoch": 0.8634784385857347, "grad_norm": 0.7115678336382357, "learning_rate": 3.0339010543390106e-06, "loss": 0.6975, "step": 29575 }, { "epoch": 0.8635076348135821, "grad_norm": 0.6172573069520191, "learning_rate": 3.0332522303325222e-06, "loss": 0.5406, "step": 29576 }, { "epoch": 0.8635368310414294, "grad_norm": 0.6310274070943492, "learning_rate": 3.0326034063260347e-06, "loss": 0.5875, "step": 29577 }, { "epoch": 0.8635660272692768, "grad_norm": 0.6067096749352032, "learning_rate": 3.0319545823195463e-06, "loss": 0.5469, "step": 29578 }, { "epoch": 0.8635952234971241, "grad_norm": 0.6906541550149697, "learning_rate": 3.031305758313058e-06, "loss": 0.6439, "step": 29579 }, { "epoch": 0.8636244197249715, "grad_norm": 0.6691491828577213, "learning_rate": 3.0306569343065695e-06, "loss": 0.6363, "step": 29580 }, { "epoch": 0.8636536159528189, "grad_norm": 0.7108374180262761, "learning_rate": 3.030008110300081e-06, "loss": 0.6056, "step": 29581 }, { "epoch": 0.8636828121806662, "grad_norm": 0.6248363046580483, "learning_rate": 3.029359286293593e-06, "loss": 0.5672, "step": 29582 }, { "epoch": 0.8637120084085136, "grad_norm": 0.61607895744797, "learning_rate": 3.028710462287105e-06, "loss": 0.5444, "step": 29583 }, { "epoch": 0.863741204636361, "grad_norm": 0.6103451886561956, "learning_rate": 3.0280616382806168e-06, "loss": 0.531, "step": 29584 }, { "epoch": 0.8637704008642083, "grad_norm": 0.6397359212670968, "learning_rate": 3.0274128142741284e-06, "loss": 0.5647, "step": 29585 }, { "epoch": 0.8637995970920557, "grad_norm": 0.6763367237841601, "learning_rate": 3.02676399026764e-06, "loss": 0.6287, "step": 29586 }, { "epoch": 0.863828793319903, "grad_norm": 0.6112278025863649, "learning_rate": 3.026115166261152e-06, "loss": 0.5832, "step": 29587 }, { "epoch": 0.8638579895477504, "grad_norm": 0.609512379951758, "learning_rate": 3.0254663422546636e-06, "loss": 0.5442, "step": 29588 }, { "epoch": 0.8638871857755978, "grad_norm": 0.6467196627914482, "learning_rate": 3.024817518248175e-06, "loss": 0.5402, "step": 29589 }, { "epoch": 0.8639163820034451, "grad_norm": 0.7166064999862815, "learning_rate": 3.0241686942416872e-06, "loss": 0.6001, "step": 29590 }, { "epoch": 0.8639455782312925, "grad_norm": 0.670357565004855, "learning_rate": 3.0235198702351993e-06, "loss": 0.5835, "step": 29591 }, { "epoch": 0.8639747744591398, "grad_norm": 0.6002188265871512, "learning_rate": 3.022871046228711e-06, "loss": 0.5159, "step": 29592 }, { "epoch": 0.8640039706869872, "grad_norm": 0.6175018323352252, "learning_rate": 3.0222222222222225e-06, "loss": 0.5473, "step": 29593 }, { "epoch": 0.8640331669148346, "grad_norm": 0.7084545232612295, "learning_rate": 3.021573398215734e-06, "loss": 0.6162, "step": 29594 }, { "epoch": 0.8640623631426819, "grad_norm": 0.6696576795902722, "learning_rate": 3.0209245742092457e-06, "loss": 0.6113, "step": 29595 }, { "epoch": 0.8640915593705293, "grad_norm": 0.6913247530846802, "learning_rate": 3.020275750202758e-06, "loss": 0.6489, "step": 29596 }, { "epoch": 0.8641207555983766, "grad_norm": 0.5974957916912115, "learning_rate": 3.0196269261962697e-06, "loss": 0.528, "step": 29597 }, { "epoch": 0.864149951826224, "grad_norm": 0.6824552879315374, "learning_rate": 3.0189781021897813e-06, "loss": 0.6384, "step": 29598 }, { "epoch": 0.8641791480540714, "grad_norm": 0.6006593834368187, "learning_rate": 3.018329278183293e-06, "loss": 0.5084, "step": 29599 }, { "epoch": 0.8642083442819187, "grad_norm": 0.6824804106161436, "learning_rate": 3.0176804541768045e-06, "loss": 0.6403, "step": 29600 }, { "epoch": 0.8642375405097661, "grad_norm": 0.6298650277906898, "learning_rate": 3.0170316301703166e-06, "loss": 0.5847, "step": 29601 }, { "epoch": 0.8642667367376135, "grad_norm": 0.6362177965182757, "learning_rate": 3.0163828061638286e-06, "loss": 0.5815, "step": 29602 }, { "epoch": 0.8642959329654608, "grad_norm": 0.6465238579561376, "learning_rate": 3.01573398215734e-06, "loss": 0.5904, "step": 29603 }, { "epoch": 0.8643251291933082, "grad_norm": 0.6828511332590285, "learning_rate": 3.015085158150852e-06, "loss": 0.6771, "step": 29604 }, { "epoch": 0.8643543254211555, "grad_norm": 0.6210129395986188, "learning_rate": 3.0144363341443634e-06, "loss": 0.5471, "step": 29605 }, { "epoch": 0.8643835216490029, "grad_norm": 0.6324358296426068, "learning_rate": 3.0137875101378754e-06, "loss": 0.515, "step": 29606 }, { "epoch": 0.8644127178768503, "grad_norm": 0.6372293772105133, "learning_rate": 3.013138686131387e-06, "loss": 0.5885, "step": 29607 }, { "epoch": 0.8644419141046977, "grad_norm": 0.579690800679788, "learning_rate": 3.0124898621248986e-06, "loss": 0.5054, "step": 29608 }, { "epoch": 0.8644711103325451, "grad_norm": 0.5967640737017105, "learning_rate": 3.0118410381184102e-06, "loss": 0.509, "step": 29609 }, { "epoch": 0.8645003065603925, "grad_norm": 0.6419110169748381, "learning_rate": 3.0111922141119227e-06, "loss": 0.6158, "step": 29610 }, { "epoch": 0.8645295027882398, "grad_norm": 0.6249453085756931, "learning_rate": 3.0105433901054343e-06, "loss": 0.5203, "step": 29611 }, { "epoch": 0.8645586990160872, "grad_norm": 0.6226883088438018, "learning_rate": 3.009894566098946e-06, "loss": 0.5511, "step": 29612 }, { "epoch": 0.8645878952439345, "grad_norm": 0.6593483201448666, "learning_rate": 3.0092457420924575e-06, "loss": 0.625, "step": 29613 }, { "epoch": 0.8646170914717819, "grad_norm": 0.6676232466686868, "learning_rate": 3.008596918085969e-06, "loss": 0.6278, "step": 29614 }, { "epoch": 0.8646462876996293, "grad_norm": 0.6673419485504347, "learning_rate": 3.0079480940794816e-06, "loss": 0.6582, "step": 29615 }, { "epoch": 0.8646754839274766, "grad_norm": 0.6414203288990361, "learning_rate": 3.007299270072993e-06, "loss": 0.6124, "step": 29616 }, { "epoch": 0.864704680155324, "grad_norm": 0.6532727120319708, "learning_rate": 3.0066504460665048e-06, "loss": 0.6174, "step": 29617 }, { "epoch": 0.8647338763831713, "grad_norm": 0.6030920016613968, "learning_rate": 3.0060016220600164e-06, "loss": 0.5113, "step": 29618 }, { "epoch": 0.8647630726110187, "grad_norm": 0.6380666885496493, "learning_rate": 3.005352798053528e-06, "loss": 0.5696, "step": 29619 }, { "epoch": 0.8647922688388661, "grad_norm": 0.6658965454437356, "learning_rate": 3.00470397404704e-06, "loss": 0.6541, "step": 29620 }, { "epoch": 0.8648214650667134, "grad_norm": 0.6672870188552107, "learning_rate": 3.004055150040552e-06, "loss": 0.6396, "step": 29621 }, { "epoch": 0.8648506612945608, "grad_norm": 0.7324885113809876, "learning_rate": 3.0034063260340636e-06, "loss": 0.6576, "step": 29622 }, { "epoch": 0.8648798575224081, "grad_norm": 0.6625034935797379, "learning_rate": 3.0027575020275752e-06, "loss": 0.6467, "step": 29623 }, { "epoch": 0.8649090537502555, "grad_norm": 0.6374403513763482, "learning_rate": 3.0021086780210873e-06, "loss": 0.5887, "step": 29624 }, { "epoch": 0.8649382499781029, "grad_norm": 0.6383067819214351, "learning_rate": 3.001459854014599e-06, "loss": 0.5648, "step": 29625 }, { "epoch": 0.8649674462059502, "grad_norm": 0.6471091321210962, "learning_rate": 3.0008110300081105e-06, "loss": 0.6059, "step": 29626 }, { "epoch": 0.8649966424337976, "grad_norm": 0.6918199780968106, "learning_rate": 3.000162206001622e-06, "loss": 0.5627, "step": 29627 }, { "epoch": 0.865025838661645, "grad_norm": 0.622742589084996, "learning_rate": 2.9995133819951337e-06, "loss": 0.5646, "step": 29628 }, { "epoch": 0.8650550348894923, "grad_norm": 0.6946019113973304, "learning_rate": 2.998864557988646e-06, "loss": 0.6878, "step": 29629 }, { "epoch": 0.8650842311173397, "grad_norm": 0.6446663062365741, "learning_rate": 2.9982157339821577e-06, "loss": 0.612, "step": 29630 }, { "epoch": 0.865113427345187, "grad_norm": 0.663178903154836, "learning_rate": 2.9975669099756693e-06, "loss": 0.6077, "step": 29631 }, { "epoch": 0.8651426235730344, "grad_norm": 0.6270904251235926, "learning_rate": 2.996918085969181e-06, "loss": 0.5888, "step": 29632 }, { "epoch": 0.8651718198008818, "grad_norm": 0.6566227641402316, "learning_rate": 2.9962692619626925e-06, "loss": 0.6254, "step": 29633 }, { "epoch": 0.8652010160287291, "grad_norm": 0.6808693945851446, "learning_rate": 2.995620437956205e-06, "loss": 0.6231, "step": 29634 }, { "epoch": 0.8652302122565765, "grad_norm": 0.6829825481994349, "learning_rate": 2.9949716139497166e-06, "loss": 0.6515, "step": 29635 }, { "epoch": 0.8652594084844238, "grad_norm": 0.67540696787933, "learning_rate": 2.994322789943228e-06, "loss": 0.6479, "step": 29636 }, { "epoch": 0.8652886047122712, "grad_norm": 0.6546013382704207, "learning_rate": 2.99367396593674e-06, "loss": 0.6281, "step": 29637 }, { "epoch": 0.8653178009401186, "grad_norm": 0.632780418925645, "learning_rate": 2.9930251419302514e-06, "loss": 0.6025, "step": 29638 }, { "epoch": 0.8653469971679659, "grad_norm": 0.6401455056662038, "learning_rate": 2.9923763179237634e-06, "loss": 0.5501, "step": 29639 }, { "epoch": 0.8653761933958133, "grad_norm": 0.6180573454098751, "learning_rate": 2.991727493917275e-06, "loss": 0.5527, "step": 29640 }, { "epoch": 0.8654053896236606, "grad_norm": 0.6034903428088805, "learning_rate": 2.991078669910787e-06, "loss": 0.5383, "step": 29641 }, { "epoch": 0.865434585851508, "grad_norm": 0.6125015606097269, "learning_rate": 2.9904298459042987e-06, "loss": 0.5063, "step": 29642 }, { "epoch": 0.8654637820793554, "grad_norm": 0.662785180489863, "learning_rate": 2.9897810218978107e-06, "loss": 0.5898, "step": 29643 }, { "epoch": 0.8654929783072027, "grad_norm": 0.7037157219238894, "learning_rate": 2.9891321978913223e-06, "loss": 0.7193, "step": 29644 }, { "epoch": 0.8655221745350501, "grad_norm": 0.6293176718371276, "learning_rate": 2.988483373884834e-06, "loss": 0.5651, "step": 29645 }, { "epoch": 0.8655513707628975, "grad_norm": 0.6726566921526999, "learning_rate": 2.9878345498783455e-06, "loss": 0.5628, "step": 29646 }, { "epoch": 0.8655805669907448, "grad_norm": 0.6386747629718038, "learning_rate": 2.987185725871857e-06, "loss": 0.5871, "step": 29647 }, { "epoch": 0.8656097632185922, "grad_norm": 0.6576623398329443, "learning_rate": 2.9865369018653696e-06, "loss": 0.6211, "step": 29648 }, { "epoch": 0.8656389594464395, "grad_norm": 0.6189174578702608, "learning_rate": 2.985888077858881e-06, "loss": 0.5518, "step": 29649 }, { "epoch": 0.8656681556742869, "grad_norm": 0.5975512908883412, "learning_rate": 2.9852392538523928e-06, "loss": 0.521, "step": 29650 }, { "epoch": 0.8656973519021343, "grad_norm": 0.6663137794670436, "learning_rate": 2.9845904298459044e-06, "loss": 0.5935, "step": 29651 }, { "epoch": 0.8657265481299816, "grad_norm": 0.5843465504593757, "learning_rate": 2.983941605839416e-06, "loss": 0.5385, "step": 29652 }, { "epoch": 0.865755744357829, "grad_norm": 0.6492642715948169, "learning_rate": 2.9832927818329284e-06, "loss": 0.6246, "step": 29653 }, { "epoch": 0.8657849405856763, "grad_norm": 0.6448419201019513, "learning_rate": 2.98264395782644e-06, "loss": 0.5462, "step": 29654 }, { "epoch": 0.8658141368135237, "grad_norm": 0.6227616493723171, "learning_rate": 2.9819951338199516e-06, "loss": 0.5247, "step": 29655 }, { "epoch": 0.8658433330413711, "grad_norm": 0.6095880789638953, "learning_rate": 2.9813463098134632e-06, "loss": 0.5131, "step": 29656 }, { "epoch": 0.8658725292692184, "grad_norm": 0.6288360550764353, "learning_rate": 2.9806974858069753e-06, "loss": 0.566, "step": 29657 }, { "epoch": 0.8659017254970658, "grad_norm": 0.589856133328143, "learning_rate": 2.980048661800487e-06, "loss": 0.5257, "step": 29658 }, { "epoch": 0.8659309217249131, "grad_norm": 0.6416813284903325, "learning_rate": 2.9793998377939985e-06, "loss": 0.5775, "step": 29659 }, { "epoch": 0.8659601179527605, "grad_norm": 0.6222540186384115, "learning_rate": 2.9787510137875105e-06, "loss": 0.524, "step": 29660 }, { "epoch": 0.8659893141806079, "grad_norm": 0.6179488308715554, "learning_rate": 2.978102189781022e-06, "loss": 0.5437, "step": 29661 }, { "epoch": 0.8660185104084552, "grad_norm": 0.674266690125759, "learning_rate": 2.977453365774534e-06, "loss": 0.6677, "step": 29662 }, { "epoch": 0.8660477066363026, "grad_norm": 0.6920649754408457, "learning_rate": 2.9768045417680457e-06, "loss": 0.6059, "step": 29663 }, { "epoch": 0.86607690286415, "grad_norm": 0.6470448669561065, "learning_rate": 2.9761557177615573e-06, "loss": 0.5732, "step": 29664 }, { "epoch": 0.8661060990919973, "grad_norm": 0.607691338737418, "learning_rate": 2.975506893755069e-06, "loss": 0.5405, "step": 29665 }, { "epoch": 0.8661352953198447, "grad_norm": 0.6013620993984803, "learning_rate": 2.9748580697485805e-06, "loss": 0.475, "step": 29666 }, { "epoch": 0.866164491547692, "grad_norm": 0.6564223828263462, "learning_rate": 2.974209245742093e-06, "loss": 0.6021, "step": 29667 }, { "epoch": 0.8661936877755394, "grad_norm": 0.6325235324241077, "learning_rate": 2.9735604217356046e-06, "loss": 0.5593, "step": 29668 }, { "epoch": 0.8662228840033868, "grad_norm": 0.6529259766640838, "learning_rate": 2.972911597729116e-06, "loss": 0.6058, "step": 29669 }, { "epoch": 0.8662520802312341, "grad_norm": 0.6140148444403625, "learning_rate": 2.972262773722628e-06, "loss": 0.524, "step": 29670 }, { "epoch": 0.8662812764590815, "grad_norm": 0.6648981990473536, "learning_rate": 2.9716139497161394e-06, "loss": 0.6125, "step": 29671 }, { "epoch": 0.8663104726869288, "grad_norm": 0.6729510715657887, "learning_rate": 2.970965125709652e-06, "loss": 0.5957, "step": 29672 }, { "epoch": 0.8663396689147762, "grad_norm": 0.6543447627379765, "learning_rate": 2.9703163017031635e-06, "loss": 0.6082, "step": 29673 }, { "epoch": 0.8663688651426236, "grad_norm": 0.6951285164269282, "learning_rate": 2.969667477696675e-06, "loss": 0.6269, "step": 29674 }, { "epoch": 0.8663980613704709, "grad_norm": 0.6556559836250514, "learning_rate": 2.9690186536901867e-06, "loss": 0.6281, "step": 29675 }, { "epoch": 0.8664272575983183, "grad_norm": 0.6955771911328262, "learning_rate": 2.9683698296836987e-06, "loss": 0.6986, "step": 29676 }, { "epoch": 0.8664564538261657, "grad_norm": 0.6481122047744243, "learning_rate": 2.9677210056772103e-06, "loss": 0.6288, "step": 29677 }, { "epoch": 0.866485650054013, "grad_norm": 0.631439294789546, "learning_rate": 2.967072181670722e-06, "loss": 0.5977, "step": 29678 }, { "epoch": 0.8665148462818604, "grad_norm": 0.5982435736359805, "learning_rate": 2.966423357664234e-06, "loss": 0.4953, "step": 29679 }, { "epoch": 0.8665440425097077, "grad_norm": 0.6552884700456951, "learning_rate": 2.9657745336577455e-06, "loss": 0.6173, "step": 29680 }, { "epoch": 0.8665732387375551, "grad_norm": 0.6396664326786315, "learning_rate": 2.9651257096512576e-06, "loss": 0.5621, "step": 29681 }, { "epoch": 0.8666024349654025, "grad_norm": 0.6447476166730318, "learning_rate": 2.964476885644769e-06, "loss": 0.6022, "step": 29682 }, { "epoch": 0.8666316311932498, "grad_norm": 0.7103144091941569, "learning_rate": 2.9638280616382808e-06, "loss": 0.6865, "step": 29683 }, { "epoch": 0.8666608274210972, "grad_norm": 0.6041595533199703, "learning_rate": 2.9631792376317924e-06, "loss": 0.5029, "step": 29684 }, { "epoch": 0.8666900236489445, "grad_norm": 0.630504478924801, "learning_rate": 2.962530413625304e-06, "loss": 0.5979, "step": 29685 }, { "epoch": 0.8667192198767919, "grad_norm": 0.643827929761739, "learning_rate": 2.9618815896188164e-06, "loss": 0.5891, "step": 29686 }, { "epoch": 0.8667484161046393, "grad_norm": 0.6240995118886933, "learning_rate": 2.961232765612328e-06, "loss": 0.595, "step": 29687 }, { "epoch": 0.8667776123324866, "grad_norm": 0.6088720776123973, "learning_rate": 2.9605839416058396e-06, "loss": 0.5254, "step": 29688 }, { "epoch": 0.866806808560334, "grad_norm": 0.619026652099713, "learning_rate": 2.9599351175993512e-06, "loss": 0.5329, "step": 29689 }, { "epoch": 0.8668360047881813, "grad_norm": 0.5882428490232766, "learning_rate": 2.959286293592863e-06, "loss": 0.4955, "step": 29690 }, { "epoch": 0.8668652010160287, "grad_norm": 0.6596302376391117, "learning_rate": 2.9586374695863753e-06, "loss": 0.6255, "step": 29691 }, { "epoch": 0.8668943972438761, "grad_norm": 0.7116782540108738, "learning_rate": 2.957988645579887e-06, "loss": 0.7172, "step": 29692 }, { "epoch": 0.8669235934717234, "grad_norm": 0.6217265058829788, "learning_rate": 2.9573398215733985e-06, "loss": 0.5589, "step": 29693 }, { "epoch": 0.8669527896995708, "grad_norm": 0.661641925523341, "learning_rate": 2.95669099756691e-06, "loss": 0.6418, "step": 29694 }, { "epoch": 0.8669819859274182, "grad_norm": 0.6474392074091866, "learning_rate": 2.956042173560422e-06, "loss": 0.5516, "step": 29695 }, { "epoch": 0.8670111821552655, "grad_norm": 0.6469169298305579, "learning_rate": 2.9553933495539337e-06, "loss": 0.5628, "step": 29696 }, { "epoch": 0.8670403783831129, "grad_norm": 0.6551796810773857, "learning_rate": 2.9547445255474453e-06, "loss": 0.6065, "step": 29697 }, { "epoch": 0.8670695746109602, "grad_norm": 0.6767659900284483, "learning_rate": 2.954095701540957e-06, "loss": 0.6214, "step": 29698 }, { "epoch": 0.8670987708388076, "grad_norm": 0.6459614163242006, "learning_rate": 2.953446877534469e-06, "loss": 0.5956, "step": 29699 }, { "epoch": 0.867127967066655, "grad_norm": 0.6658967419457137, "learning_rate": 2.952798053527981e-06, "loss": 0.6105, "step": 29700 }, { "epoch": 0.8671571632945023, "grad_norm": 0.5963452748417527, "learning_rate": 2.9521492295214926e-06, "loss": 0.5155, "step": 29701 }, { "epoch": 0.8671863595223497, "grad_norm": 0.5933260346486603, "learning_rate": 2.951500405515004e-06, "loss": 0.532, "step": 29702 }, { "epoch": 0.867215555750197, "grad_norm": 0.6250390066801664, "learning_rate": 2.950851581508516e-06, "loss": 0.5487, "step": 29703 }, { "epoch": 0.8672447519780444, "grad_norm": 0.6160298072306097, "learning_rate": 2.9502027575020274e-06, "loss": 0.5741, "step": 29704 }, { "epoch": 0.8672739482058918, "grad_norm": 0.6447435289590912, "learning_rate": 2.94955393349554e-06, "loss": 0.5863, "step": 29705 }, { "epoch": 0.8673031444337391, "grad_norm": 0.6420934817284462, "learning_rate": 2.9489051094890515e-06, "loss": 0.6191, "step": 29706 }, { "epoch": 0.8673323406615865, "grad_norm": 0.6323474236161135, "learning_rate": 2.948256285482563e-06, "loss": 0.5907, "step": 29707 }, { "epoch": 0.8673615368894338, "grad_norm": 0.6652812150865535, "learning_rate": 2.9476074614760747e-06, "loss": 0.5922, "step": 29708 }, { "epoch": 0.8673907331172812, "grad_norm": 0.6044503394586541, "learning_rate": 2.9469586374695867e-06, "loss": 0.5528, "step": 29709 }, { "epoch": 0.8674199293451286, "grad_norm": 0.6884755289962423, "learning_rate": 2.9463098134630987e-06, "loss": 0.635, "step": 29710 }, { "epoch": 0.8674491255729759, "grad_norm": 0.660235382832574, "learning_rate": 2.9456609894566103e-06, "loss": 0.599, "step": 29711 }, { "epoch": 0.8674783218008233, "grad_norm": 0.6667174892089868, "learning_rate": 2.945012165450122e-06, "loss": 0.6192, "step": 29712 }, { "epoch": 0.8675075180286707, "grad_norm": 0.6237838454516907, "learning_rate": 2.9443633414436335e-06, "loss": 0.5672, "step": 29713 }, { "epoch": 0.867536714256518, "grad_norm": 0.5633518442292952, "learning_rate": 2.9437145174371456e-06, "loss": 0.4659, "step": 29714 }, { "epoch": 0.8675659104843654, "grad_norm": 0.6272980084949964, "learning_rate": 2.943065693430657e-06, "loss": 0.5576, "step": 29715 }, { "epoch": 0.8675951067122127, "grad_norm": 0.5972938293164689, "learning_rate": 2.9424168694241688e-06, "loss": 0.4861, "step": 29716 }, { "epoch": 0.8676243029400601, "grad_norm": 0.6775969607394096, "learning_rate": 2.9417680454176804e-06, "loss": 0.6454, "step": 29717 }, { "epoch": 0.8676534991679075, "grad_norm": 0.6423840814213169, "learning_rate": 2.9411192214111924e-06, "loss": 0.5768, "step": 29718 }, { "epoch": 0.8676826953957548, "grad_norm": 0.5889310086617723, "learning_rate": 2.9404703974047044e-06, "loss": 0.4754, "step": 29719 }, { "epoch": 0.8677118916236022, "grad_norm": 0.6391042843970339, "learning_rate": 2.939821573398216e-06, "loss": 0.5927, "step": 29720 }, { "epoch": 0.8677410878514495, "grad_norm": 0.6310689961147036, "learning_rate": 2.9391727493917276e-06, "loss": 0.5718, "step": 29721 }, { "epoch": 0.8677702840792969, "grad_norm": 0.6079668093361602, "learning_rate": 2.9385239253852392e-06, "loss": 0.5319, "step": 29722 }, { "epoch": 0.8677994803071443, "grad_norm": 0.6454473743075547, "learning_rate": 2.937875101378751e-06, "loss": 0.5857, "step": 29723 }, { "epoch": 0.8678286765349916, "grad_norm": 0.6714432512388556, "learning_rate": 2.9372262773722633e-06, "loss": 0.6545, "step": 29724 }, { "epoch": 0.867857872762839, "grad_norm": 0.7372932600909333, "learning_rate": 2.936577453365775e-06, "loss": 0.6525, "step": 29725 }, { "epoch": 0.8678870689906864, "grad_norm": 0.8201267334018406, "learning_rate": 2.9359286293592865e-06, "loss": 0.6006, "step": 29726 }, { "epoch": 0.8679162652185337, "grad_norm": 0.6545493507489071, "learning_rate": 2.935279805352798e-06, "loss": 0.5737, "step": 29727 }, { "epoch": 0.8679454614463811, "grad_norm": 0.6385512827868958, "learning_rate": 2.93463098134631e-06, "loss": 0.566, "step": 29728 }, { "epoch": 0.8679746576742285, "grad_norm": 0.6309454698038316, "learning_rate": 2.9339821573398217e-06, "loss": 0.5413, "step": 29729 }, { "epoch": 0.8680038539020759, "grad_norm": 0.6676444971810864, "learning_rate": 2.9333333333333338e-06, "loss": 0.5957, "step": 29730 }, { "epoch": 0.8680330501299233, "grad_norm": 0.661711239817489, "learning_rate": 2.9326845093268454e-06, "loss": 0.6243, "step": 29731 }, { "epoch": 0.8680622463577706, "grad_norm": 0.6227469667590186, "learning_rate": 2.932035685320357e-06, "loss": 0.5716, "step": 29732 }, { "epoch": 0.868091442585618, "grad_norm": 0.7281572278536377, "learning_rate": 2.931386861313869e-06, "loss": 0.6611, "step": 29733 }, { "epoch": 0.8681206388134654, "grad_norm": 0.626687837386846, "learning_rate": 2.9307380373073806e-06, "loss": 0.5317, "step": 29734 }, { "epoch": 0.8681498350413127, "grad_norm": 0.6270270863554456, "learning_rate": 2.930089213300892e-06, "loss": 0.5723, "step": 29735 }, { "epoch": 0.8681790312691601, "grad_norm": 0.6029450520678082, "learning_rate": 2.929440389294404e-06, "loss": 0.4948, "step": 29736 }, { "epoch": 0.8682082274970074, "grad_norm": 0.6235002510734509, "learning_rate": 2.928791565287916e-06, "loss": 0.5093, "step": 29737 }, { "epoch": 0.8682374237248548, "grad_norm": 0.6656148090065803, "learning_rate": 2.928142741281428e-06, "loss": 0.5999, "step": 29738 }, { "epoch": 0.8682666199527022, "grad_norm": 0.655587668239454, "learning_rate": 2.9274939172749395e-06, "loss": 0.6008, "step": 29739 }, { "epoch": 0.8682958161805495, "grad_norm": 0.6060082023333188, "learning_rate": 2.926845093268451e-06, "loss": 0.5411, "step": 29740 }, { "epoch": 0.8683250124083969, "grad_norm": 0.6682600985663586, "learning_rate": 2.9261962692619627e-06, "loss": 0.5596, "step": 29741 }, { "epoch": 0.8683542086362442, "grad_norm": 0.605865227842157, "learning_rate": 2.925547445255475e-06, "loss": 0.5396, "step": 29742 }, { "epoch": 0.8683834048640916, "grad_norm": 0.7033647911424428, "learning_rate": 2.9248986212489867e-06, "loss": 0.6772, "step": 29743 }, { "epoch": 0.868412601091939, "grad_norm": 0.6320416412501496, "learning_rate": 2.9242497972424983e-06, "loss": 0.5628, "step": 29744 }, { "epoch": 0.8684417973197863, "grad_norm": 0.6208136749606716, "learning_rate": 2.92360097323601e-06, "loss": 0.5531, "step": 29745 }, { "epoch": 0.8684709935476337, "grad_norm": 0.5960222202074418, "learning_rate": 2.9229521492295215e-06, "loss": 0.5005, "step": 29746 }, { "epoch": 0.868500189775481, "grad_norm": 0.6384408443460182, "learning_rate": 2.9223033252230336e-06, "loss": 0.5969, "step": 29747 }, { "epoch": 0.8685293860033284, "grad_norm": 0.6394219006104196, "learning_rate": 2.921654501216545e-06, "loss": 0.5734, "step": 29748 }, { "epoch": 0.8685585822311758, "grad_norm": 0.6093385311629336, "learning_rate": 2.921005677210057e-06, "loss": 0.5273, "step": 29749 }, { "epoch": 0.8685877784590231, "grad_norm": 0.59099027345446, "learning_rate": 2.920356853203569e-06, "loss": 0.5398, "step": 29750 }, { "epoch": 0.8686169746868705, "grad_norm": 0.6056056547150758, "learning_rate": 2.9197080291970804e-06, "loss": 0.531, "step": 29751 }, { "epoch": 0.8686461709147179, "grad_norm": 0.6494847479127435, "learning_rate": 2.9190592051905924e-06, "loss": 0.6629, "step": 29752 }, { "epoch": 0.8686753671425652, "grad_norm": 0.6727729507055963, "learning_rate": 2.918410381184104e-06, "loss": 0.5836, "step": 29753 }, { "epoch": 0.8687045633704126, "grad_norm": 0.6757682108293787, "learning_rate": 2.9177615571776156e-06, "loss": 0.6279, "step": 29754 }, { "epoch": 0.8687337595982599, "grad_norm": 0.679119158850511, "learning_rate": 2.9171127331711272e-06, "loss": 0.6504, "step": 29755 }, { "epoch": 0.8687629558261073, "grad_norm": 0.6095933339580137, "learning_rate": 2.9164639091646393e-06, "loss": 0.5326, "step": 29756 }, { "epoch": 0.8687921520539547, "grad_norm": 0.6483994674067963, "learning_rate": 2.9158150851581513e-06, "loss": 0.5905, "step": 29757 }, { "epoch": 0.868821348281802, "grad_norm": 0.6723497171653412, "learning_rate": 2.915166261151663e-06, "loss": 0.6487, "step": 29758 }, { "epoch": 0.8688505445096494, "grad_norm": 0.6749364054821335, "learning_rate": 2.9145174371451745e-06, "loss": 0.6555, "step": 29759 }, { "epoch": 0.8688797407374967, "grad_norm": 0.6420878752374845, "learning_rate": 2.913868613138686e-06, "loss": 0.6004, "step": 29760 }, { "epoch": 0.8689089369653441, "grad_norm": 0.6889284991993736, "learning_rate": 2.9132197891321986e-06, "loss": 0.6962, "step": 29761 }, { "epoch": 0.8689381331931915, "grad_norm": 0.7225634783168947, "learning_rate": 2.91257096512571e-06, "loss": 0.7483, "step": 29762 }, { "epoch": 0.8689673294210388, "grad_norm": 0.6742164867287189, "learning_rate": 2.9119221411192218e-06, "loss": 0.6197, "step": 29763 }, { "epoch": 0.8689965256488862, "grad_norm": 0.6717768549201085, "learning_rate": 2.9112733171127334e-06, "loss": 0.6346, "step": 29764 }, { "epoch": 0.8690257218767335, "grad_norm": 0.6351489568773301, "learning_rate": 2.910624493106245e-06, "loss": 0.5619, "step": 29765 }, { "epoch": 0.8690549181045809, "grad_norm": 0.6260728240803216, "learning_rate": 2.909975669099757e-06, "loss": 0.5209, "step": 29766 }, { "epoch": 0.8690841143324283, "grad_norm": 0.6373047630701236, "learning_rate": 2.9093268450932686e-06, "loss": 0.5765, "step": 29767 }, { "epoch": 0.8691133105602756, "grad_norm": 0.6293342523204606, "learning_rate": 2.9086780210867806e-06, "loss": 0.5901, "step": 29768 }, { "epoch": 0.869142506788123, "grad_norm": 0.6878309097981384, "learning_rate": 2.9080291970802922e-06, "loss": 0.6505, "step": 29769 }, { "epoch": 0.8691717030159704, "grad_norm": 0.6908774889007522, "learning_rate": 2.907380373073804e-06, "loss": 0.6235, "step": 29770 }, { "epoch": 0.8692008992438177, "grad_norm": 0.6874426480089614, "learning_rate": 2.906731549067316e-06, "loss": 0.6933, "step": 29771 }, { "epoch": 0.8692300954716651, "grad_norm": 0.6240623286553267, "learning_rate": 2.9060827250608275e-06, "loss": 0.5429, "step": 29772 }, { "epoch": 0.8692592916995124, "grad_norm": 0.6363687663906565, "learning_rate": 2.905433901054339e-06, "loss": 0.5777, "step": 29773 }, { "epoch": 0.8692884879273598, "grad_norm": 0.6669337368405029, "learning_rate": 2.9047850770478507e-06, "loss": 0.6272, "step": 29774 }, { "epoch": 0.8693176841552072, "grad_norm": 0.6531520885218436, "learning_rate": 2.904136253041363e-06, "loss": 0.5984, "step": 29775 }, { "epoch": 0.8693468803830545, "grad_norm": 0.6266890020256597, "learning_rate": 2.9034874290348747e-06, "loss": 0.5561, "step": 29776 }, { "epoch": 0.8693760766109019, "grad_norm": 0.5912351090559597, "learning_rate": 2.9028386050283863e-06, "loss": 0.4801, "step": 29777 }, { "epoch": 0.8694052728387492, "grad_norm": 0.6542175675357805, "learning_rate": 2.902189781021898e-06, "loss": 0.5832, "step": 29778 }, { "epoch": 0.8694344690665966, "grad_norm": 0.6467131327486008, "learning_rate": 2.9015409570154095e-06, "loss": 0.5721, "step": 29779 }, { "epoch": 0.869463665294444, "grad_norm": 0.6174638734931835, "learning_rate": 2.900892133008922e-06, "loss": 0.5427, "step": 29780 }, { "epoch": 0.8694928615222913, "grad_norm": 0.6350109866583137, "learning_rate": 2.9002433090024336e-06, "loss": 0.5552, "step": 29781 }, { "epoch": 0.8695220577501387, "grad_norm": 0.6619613020701232, "learning_rate": 2.899594484995945e-06, "loss": 0.6084, "step": 29782 }, { "epoch": 0.869551253977986, "grad_norm": 0.6705585257374101, "learning_rate": 2.898945660989457e-06, "loss": 0.6159, "step": 29783 }, { "epoch": 0.8695804502058334, "grad_norm": 0.6963427019327781, "learning_rate": 2.8982968369829684e-06, "loss": 0.6825, "step": 29784 }, { "epoch": 0.8696096464336808, "grad_norm": 0.6562166462378989, "learning_rate": 2.8976480129764804e-06, "loss": 0.5933, "step": 29785 }, { "epoch": 0.8696388426615281, "grad_norm": 0.6801824388440109, "learning_rate": 2.896999188969992e-06, "loss": 0.6869, "step": 29786 }, { "epoch": 0.8696680388893755, "grad_norm": 0.641954123567597, "learning_rate": 2.896350364963504e-06, "loss": 0.5931, "step": 29787 }, { "epoch": 0.8696972351172229, "grad_norm": 0.6723365224618829, "learning_rate": 2.8957015409570157e-06, "loss": 0.6455, "step": 29788 }, { "epoch": 0.8697264313450702, "grad_norm": 0.6990555226752523, "learning_rate": 2.8950527169505273e-06, "loss": 0.702, "step": 29789 }, { "epoch": 0.8697556275729176, "grad_norm": 0.6251050529154135, "learning_rate": 2.8944038929440393e-06, "loss": 0.5699, "step": 29790 }, { "epoch": 0.8697848238007649, "grad_norm": 0.6635260780934485, "learning_rate": 2.893755068937551e-06, "loss": 0.673, "step": 29791 }, { "epoch": 0.8698140200286123, "grad_norm": 0.6188429994737754, "learning_rate": 2.8931062449310625e-06, "loss": 0.551, "step": 29792 }, { "epoch": 0.8698432162564597, "grad_norm": 0.6064261862396788, "learning_rate": 2.892457420924574e-06, "loss": 0.5306, "step": 29793 }, { "epoch": 0.869872412484307, "grad_norm": 0.6588745150599731, "learning_rate": 2.8918085969180866e-06, "loss": 0.5783, "step": 29794 }, { "epoch": 0.8699016087121544, "grad_norm": 0.6168410466707765, "learning_rate": 2.891159772911598e-06, "loss": 0.5733, "step": 29795 }, { "epoch": 0.8699308049400017, "grad_norm": 0.6777300914095624, "learning_rate": 2.8905109489051098e-06, "loss": 0.5867, "step": 29796 }, { "epoch": 0.8699600011678491, "grad_norm": 0.6455128180497718, "learning_rate": 2.8898621248986214e-06, "loss": 0.6003, "step": 29797 }, { "epoch": 0.8699891973956965, "grad_norm": 0.6809604385813304, "learning_rate": 2.889213300892133e-06, "loss": 0.6024, "step": 29798 }, { "epoch": 0.8700183936235438, "grad_norm": 0.6834795927341983, "learning_rate": 2.8885644768856454e-06, "loss": 0.6927, "step": 29799 }, { "epoch": 0.8700475898513912, "grad_norm": 0.6323634381884626, "learning_rate": 2.887915652879157e-06, "loss": 0.586, "step": 29800 }, { "epoch": 0.8700767860792386, "grad_norm": 0.700734713970625, "learning_rate": 2.8872668288726686e-06, "loss": 0.6767, "step": 29801 }, { "epoch": 0.8701059823070859, "grad_norm": 0.6465200527127197, "learning_rate": 2.8866180048661802e-06, "loss": 0.6198, "step": 29802 }, { "epoch": 0.8701351785349333, "grad_norm": 0.598582600471158, "learning_rate": 2.885969180859692e-06, "loss": 0.4917, "step": 29803 }, { "epoch": 0.8701643747627806, "grad_norm": 0.6063958159730842, "learning_rate": 2.885320356853204e-06, "loss": 0.5646, "step": 29804 }, { "epoch": 0.870193570990628, "grad_norm": 0.6817820111717016, "learning_rate": 2.8846715328467155e-06, "loss": 0.6741, "step": 29805 }, { "epoch": 0.8702227672184754, "grad_norm": 0.657827025549114, "learning_rate": 2.884022708840227e-06, "loss": 0.5989, "step": 29806 }, { "epoch": 0.8702519634463227, "grad_norm": 0.6634733500804213, "learning_rate": 2.883373884833739e-06, "loss": 0.6592, "step": 29807 }, { "epoch": 0.8702811596741701, "grad_norm": 0.6122582233100422, "learning_rate": 2.882725060827251e-06, "loss": 0.5464, "step": 29808 }, { "epoch": 0.8703103559020174, "grad_norm": 0.6700531476006327, "learning_rate": 2.8820762368207627e-06, "loss": 0.6442, "step": 29809 }, { "epoch": 0.8703395521298648, "grad_norm": 0.6841118934728219, "learning_rate": 2.8814274128142743e-06, "loss": 0.6403, "step": 29810 }, { "epoch": 0.8703687483577122, "grad_norm": 0.663461548636917, "learning_rate": 2.880778588807786e-06, "loss": 0.6148, "step": 29811 }, { "epoch": 0.8703979445855595, "grad_norm": 0.6389856065550109, "learning_rate": 2.8801297648012975e-06, "loss": 0.6117, "step": 29812 }, { "epoch": 0.8704271408134069, "grad_norm": 0.6341721243955643, "learning_rate": 2.87948094079481e-06, "loss": 0.5878, "step": 29813 }, { "epoch": 0.8704563370412542, "grad_norm": 0.5939776503456763, "learning_rate": 2.8788321167883216e-06, "loss": 0.5088, "step": 29814 }, { "epoch": 0.8704855332691016, "grad_norm": 0.6566035665169929, "learning_rate": 2.878183292781833e-06, "loss": 0.5935, "step": 29815 }, { "epoch": 0.870514729496949, "grad_norm": 0.6744617353888652, "learning_rate": 2.877534468775345e-06, "loss": 0.6125, "step": 29816 }, { "epoch": 0.8705439257247963, "grad_norm": 0.6568965627710361, "learning_rate": 2.8768856447688564e-06, "loss": 0.5486, "step": 29817 }, { "epoch": 0.8705731219526437, "grad_norm": 0.6341360041454538, "learning_rate": 2.8762368207623684e-06, "loss": 0.546, "step": 29818 }, { "epoch": 0.870602318180491, "grad_norm": 0.6395173550656137, "learning_rate": 2.8755879967558805e-06, "loss": 0.5809, "step": 29819 }, { "epoch": 0.8706315144083384, "grad_norm": 0.6479772685122289, "learning_rate": 2.874939172749392e-06, "loss": 0.5911, "step": 29820 }, { "epoch": 0.8706607106361858, "grad_norm": 0.6654412151113197, "learning_rate": 2.8742903487429037e-06, "loss": 0.5938, "step": 29821 }, { "epoch": 0.8706899068640331, "grad_norm": 0.6555523861277803, "learning_rate": 2.8736415247364153e-06, "loss": 0.5718, "step": 29822 }, { "epoch": 0.8707191030918805, "grad_norm": 0.6535949764610627, "learning_rate": 2.8729927007299273e-06, "loss": 0.6413, "step": 29823 }, { "epoch": 0.8707482993197279, "grad_norm": 0.5980966678032971, "learning_rate": 2.872343876723439e-06, "loss": 0.5204, "step": 29824 }, { "epoch": 0.8707774955475752, "grad_norm": 0.6370552218352036, "learning_rate": 2.8716950527169505e-06, "loss": 0.5115, "step": 29825 }, { "epoch": 0.8708066917754226, "grad_norm": 0.67672587585779, "learning_rate": 2.8710462287104625e-06, "loss": 0.5592, "step": 29826 }, { "epoch": 0.8708358880032699, "grad_norm": 0.6576866353007389, "learning_rate": 2.8703974047039746e-06, "loss": 0.5877, "step": 29827 }, { "epoch": 0.8708650842311173, "grad_norm": 0.6470229500745792, "learning_rate": 2.869748580697486e-06, "loss": 0.5937, "step": 29828 }, { "epoch": 0.8708942804589647, "grad_norm": 0.6781330987879813, "learning_rate": 2.8690997566909978e-06, "loss": 0.6027, "step": 29829 }, { "epoch": 0.870923476686812, "grad_norm": 0.6606558860269961, "learning_rate": 2.8684509326845094e-06, "loss": 0.6326, "step": 29830 }, { "epoch": 0.8709526729146594, "grad_norm": 0.6569670623967659, "learning_rate": 2.867802108678021e-06, "loss": 0.6109, "step": 29831 }, { "epoch": 0.8709818691425067, "grad_norm": 0.6622442539254393, "learning_rate": 2.8671532846715334e-06, "loss": 0.6376, "step": 29832 }, { "epoch": 0.8710110653703541, "grad_norm": 0.6427088072066598, "learning_rate": 2.866504460665045e-06, "loss": 0.5565, "step": 29833 }, { "epoch": 0.8710402615982015, "grad_norm": 0.6194222866826343, "learning_rate": 2.8658556366585566e-06, "loss": 0.5417, "step": 29834 }, { "epoch": 0.8710694578260488, "grad_norm": 0.6394223876142457, "learning_rate": 2.8652068126520682e-06, "loss": 0.58, "step": 29835 }, { "epoch": 0.8710986540538962, "grad_norm": 0.6398291992928157, "learning_rate": 2.86455798864558e-06, "loss": 0.5807, "step": 29836 }, { "epoch": 0.8711278502817436, "grad_norm": 0.5823856836825396, "learning_rate": 2.863909164639092e-06, "loss": 0.4934, "step": 29837 }, { "epoch": 0.8711570465095909, "grad_norm": 0.6267806781886741, "learning_rate": 2.863260340632604e-06, "loss": 0.5462, "step": 29838 }, { "epoch": 0.8711862427374383, "grad_norm": 0.6551713330477622, "learning_rate": 2.8626115166261155e-06, "loss": 0.6455, "step": 29839 }, { "epoch": 0.8712154389652856, "grad_norm": 0.6577615094776237, "learning_rate": 2.861962692619627e-06, "loss": 0.5975, "step": 29840 }, { "epoch": 0.871244635193133, "grad_norm": 0.6777557990394477, "learning_rate": 2.8613138686131387e-06, "loss": 0.5801, "step": 29841 }, { "epoch": 0.8712738314209804, "grad_norm": 0.6616269125168353, "learning_rate": 2.8606650446066507e-06, "loss": 0.5912, "step": 29842 }, { "epoch": 0.8713030276488277, "grad_norm": 0.6459229774311975, "learning_rate": 2.8600162206001623e-06, "loss": 0.5716, "step": 29843 }, { "epoch": 0.8713322238766751, "grad_norm": 0.6300723685171086, "learning_rate": 2.859367396593674e-06, "loss": 0.5736, "step": 29844 }, { "epoch": 0.8713614201045224, "grad_norm": 0.669682884814535, "learning_rate": 2.858718572587186e-06, "loss": 0.6478, "step": 29845 }, { "epoch": 0.8713906163323698, "grad_norm": 0.5885773907404102, "learning_rate": 2.858069748580698e-06, "loss": 0.5118, "step": 29846 }, { "epoch": 0.8714198125602172, "grad_norm": 0.6337389683561855, "learning_rate": 2.8574209245742096e-06, "loss": 0.5638, "step": 29847 }, { "epoch": 0.8714490087880645, "grad_norm": 0.6277000632189698, "learning_rate": 2.856772100567721e-06, "loss": 0.5394, "step": 29848 }, { "epoch": 0.8714782050159119, "grad_norm": 0.6905125365976027, "learning_rate": 2.856123276561233e-06, "loss": 0.674, "step": 29849 }, { "epoch": 0.8715074012437594, "grad_norm": 0.6073707026075137, "learning_rate": 2.8554744525547444e-06, "loss": 0.5219, "step": 29850 }, { "epoch": 0.8715365974716067, "grad_norm": 0.668648162180386, "learning_rate": 2.854825628548257e-06, "loss": 0.6792, "step": 29851 }, { "epoch": 0.8715657936994541, "grad_norm": 0.6136011414606748, "learning_rate": 2.8541768045417685e-06, "loss": 0.5386, "step": 29852 }, { "epoch": 0.8715949899273014, "grad_norm": 0.6130003566724372, "learning_rate": 2.85352798053528e-06, "loss": 0.5518, "step": 29853 }, { "epoch": 0.8716241861551488, "grad_norm": 0.5988454430980884, "learning_rate": 2.8528791565287917e-06, "loss": 0.5062, "step": 29854 }, { "epoch": 0.8716533823829962, "grad_norm": 0.6479156565950145, "learning_rate": 2.8522303325223033e-06, "loss": 0.6363, "step": 29855 }, { "epoch": 0.8716825786108435, "grad_norm": 0.7039799979510697, "learning_rate": 2.8515815085158153e-06, "loss": 0.6664, "step": 29856 }, { "epoch": 0.8717117748386909, "grad_norm": 0.6606725095964077, "learning_rate": 2.8509326845093273e-06, "loss": 0.5736, "step": 29857 }, { "epoch": 0.8717409710665383, "grad_norm": 0.6300491162930485, "learning_rate": 2.850283860502839e-06, "loss": 0.6003, "step": 29858 }, { "epoch": 0.8717701672943856, "grad_norm": 0.6471596089835692, "learning_rate": 2.8496350364963505e-06, "loss": 0.5728, "step": 29859 }, { "epoch": 0.871799363522233, "grad_norm": 0.6661977637180448, "learning_rate": 2.8489862124898626e-06, "loss": 0.5962, "step": 29860 }, { "epoch": 0.8718285597500803, "grad_norm": 0.6689899327638301, "learning_rate": 2.848337388483374e-06, "loss": 0.5839, "step": 29861 }, { "epoch": 0.8718577559779277, "grad_norm": 0.647675661697536, "learning_rate": 2.8476885644768858e-06, "loss": 0.5885, "step": 29862 }, { "epoch": 0.8718869522057751, "grad_norm": 0.6050488777886187, "learning_rate": 2.8470397404703974e-06, "loss": 0.5114, "step": 29863 }, { "epoch": 0.8719161484336224, "grad_norm": 0.5772580510870832, "learning_rate": 2.846390916463909e-06, "loss": 0.4951, "step": 29864 }, { "epoch": 0.8719453446614698, "grad_norm": 0.6872430083109841, "learning_rate": 2.8457420924574214e-06, "loss": 0.6293, "step": 29865 }, { "epoch": 0.8719745408893171, "grad_norm": 0.6385392123880946, "learning_rate": 2.845093268450933e-06, "loss": 0.5978, "step": 29866 }, { "epoch": 0.8720037371171645, "grad_norm": 0.616896188827514, "learning_rate": 2.8444444444444446e-06, "loss": 0.5596, "step": 29867 }, { "epoch": 0.8720329333450119, "grad_norm": 0.6580775892139533, "learning_rate": 2.8437956204379562e-06, "loss": 0.5731, "step": 29868 }, { "epoch": 0.8720621295728592, "grad_norm": 0.671416924483836, "learning_rate": 2.843146796431468e-06, "loss": 0.5839, "step": 29869 }, { "epoch": 0.8720913258007066, "grad_norm": 0.6910536397318778, "learning_rate": 2.8424979724249803e-06, "loss": 0.6722, "step": 29870 }, { "epoch": 0.872120522028554, "grad_norm": 0.6414114919568511, "learning_rate": 2.841849148418492e-06, "loss": 0.6315, "step": 29871 }, { "epoch": 0.8721497182564013, "grad_norm": 0.6524210877602218, "learning_rate": 2.8412003244120035e-06, "loss": 0.6295, "step": 29872 }, { "epoch": 0.8721789144842487, "grad_norm": 0.6112171682943679, "learning_rate": 2.840551500405515e-06, "loss": 0.5512, "step": 29873 }, { "epoch": 0.872208110712096, "grad_norm": 0.6359755753367862, "learning_rate": 2.8399026763990267e-06, "loss": 0.5305, "step": 29874 }, { "epoch": 0.8722373069399434, "grad_norm": 0.6437953956931539, "learning_rate": 2.8392538523925387e-06, "loss": 0.5889, "step": 29875 }, { "epoch": 0.8722665031677908, "grad_norm": 0.599233450534492, "learning_rate": 2.8386050283860508e-06, "loss": 0.4902, "step": 29876 }, { "epoch": 0.8722956993956381, "grad_norm": 0.6568763019656546, "learning_rate": 2.8379562043795624e-06, "loss": 0.599, "step": 29877 }, { "epoch": 0.8723248956234855, "grad_norm": 0.6469255134312318, "learning_rate": 2.837307380373074e-06, "loss": 0.5579, "step": 29878 }, { "epoch": 0.8723540918513328, "grad_norm": 0.6689633484055364, "learning_rate": 2.836658556366586e-06, "loss": 0.6604, "step": 29879 }, { "epoch": 0.8723832880791802, "grad_norm": 0.6722622033438722, "learning_rate": 2.8360097323600976e-06, "loss": 0.6494, "step": 29880 }, { "epoch": 0.8724124843070276, "grad_norm": 0.6431105705514369, "learning_rate": 2.8353609083536092e-06, "loss": 0.5734, "step": 29881 }, { "epoch": 0.8724416805348749, "grad_norm": 0.6915884336250074, "learning_rate": 2.834712084347121e-06, "loss": 0.7112, "step": 29882 }, { "epoch": 0.8724708767627223, "grad_norm": 1.042442468766603, "learning_rate": 2.8340632603406324e-06, "loss": 0.7378, "step": 29883 }, { "epoch": 0.8725000729905696, "grad_norm": 0.6572302309840105, "learning_rate": 2.833414436334145e-06, "loss": 0.6018, "step": 29884 }, { "epoch": 0.872529269218417, "grad_norm": 0.6227911004351772, "learning_rate": 2.8327656123276565e-06, "loss": 0.5628, "step": 29885 }, { "epoch": 0.8725584654462644, "grad_norm": 0.6369542442521968, "learning_rate": 2.832116788321168e-06, "loss": 0.6077, "step": 29886 }, { "epoch": 0.8725876616741117, "grad_norm": 0.5962883711948107, "learning_rate": 2.8314679643146797e-06, "loss": 0.5167, "step": 29887 }, { "epoch": 0.8726168579019591, "grad_norm": 0.6503268493247294, "learning_rate": 2.8308191403081913e-06, "loss": 0.5801, "step": 29888 }, { "epoch": 0.8726460541298064, "grad_norm": 0.6714713065428597, "learning_rate": 2.8301703163017037e-06, "loss": 0.625, "step": 29889 }, { "epoch": 0.8726752503576538, "grad_norm": 0.6732546384315125, "learning_rate": 2.8295214922952153e-06, "loss": 0.6162, "step": 29890 }, { "epoch": 0.8727044465855012, "grad_norm": 0.6814149347267444, "learning_rate": 2.828872668288727e-06, "loss": 0.6863, "step": 29891 }, { "epoch": 0.8727336428133485, "grad_norm": 0.6499073841540245, "learning_rate": 2.8282238442822385e-06, "loss": 0.6, "step": 29892 }, { "epoch": 0.8727628390411959, "grad_norm": 0.6698398818832063, "learning_rate": 2.8275750202757506e-06, "loss": 0.5988, "step": 29893 }, { "epoch": 0.8727920352690433, "grad_norm": 0.6042563192669875, "learning_rate": 2.826926196269262e-06, "loss": 0.5565, "step": 29894 }, { "epoch": 0.8728212314968906, "grad_norm": 0.6855071196955689, "learning_rate": 2.8262773722627738e-06, "loss": 0.6857, "step": 29895 }, { "epoch": 0.872850427724738, "grad_norm": 0.6681570035662252, "learning_rate": 2.825628548256286e-06, "loss": 0.5975, "step": 29896 }, { "epoch": 0.8728796239525853, "grad_norm": 0.6427874922175435, "learning_rate": 2.8249797242497974e-06, "loss": 0.5739, "step": 29897 }, { "epoch": 0.8729088201804327, "grad_norm": 0.7458795202306799, "learning_rate": 2.8243309002433094e-06, "loss": 0.6187, "step": 29898 }, { "epoch": 0.8729380164082801, "grad_norm": 0.5999834320444615, "learning_rate": 2.823682076236821e-06, "loss": 0.5128, "step": 29899 }, { "epoch": 0.8729672126361274, "grad_norm": 0.6324543186129213, "learning_rate": 2.8230332522303326e-06, "loss": 0.5503, "step": 29900 }, { "epoch": 0.8729964088639748, "grad_norm": 0.661503863768964, "learning_rate": 2.8223844282238443e-06, "loss": 0.6718, "step": 29901 }, { "epoch": 0.8730256050918221, "grad_norm": 0.6794957600280512, "learning_rate": 2.821735604217356e-06, "loss": 0.6691, "step": 29902 }, { "epoch": 0.8730548013196695, "grad_norm": 0.645419854817576, "learning_rate": 2.8210867802108683e-06, "loss": 0.5708, "step": 29903 }, { "epoch": 0.8730839975475169, "grad_norm": 0.5949173621201348, "learning_rate": 2.82043795620438e-06, "loss": 0.5219, "step": 29904 }, { "epoch": 0.8731131937753642, "grad_norm": 0.6677993098466249, "learning_rate": 2.8197891321978915e-06, "loss": 0.5632, "step": 29905 }, { "epoch": 0.8731423900032116, "grad_norm": 0.6644405160874283, "learning_rate": 2.819140308191403e-06, "loss": 0.6588, "step": 29906 }, { "epoch": 0.873171586231059, "grad_norm": 0.6447226057467164, "learning_rate": 2.8184914841849147e-06, "loss": 0.559, "step": 29907 }, { "epoch": 0.8732007824589063, "grad_norm": 0.6899158773572821, "learning_rate": 2.817842660178427e-06, "loss": 0.7025, "step": 29908 }, { "epoch": 0.8732299786867537, "grad_norm": 0.6750526697914312, "learning_rate": 2.8171938361719388e-06, "loss": 0.6642, "step": 29909 }, { "epoch": 0.873259174914601, "grad_norm": 0.6448910109284324, "learning_rate": 2.8165450121654504e-06, "loss": 0.6174, "step": 29910 }, { "epoch": 0.8732883711424484, "grad_norm": 0.7089495332265534, "learning_rate": 2.815896188158962e-06, "loss": 0.6971, "step": 29911 }, { "epoch": 0.8733175673702958, "grad_norm": 0.6875408067063594, "learning_rate": 2.815247364152474e-06, "loss": 0.6581, "step": 29912 }, { "epoch": 0.8733467635981431, "grad_norm": 0.6392839259934741, "learning_rate": 2.8145985401459856e-06, "loss": 0.6002, "step": 29913 }, { "epoch": 0.8733759598259905, "grad_norm": 0.6143082510727117, "learning_rate": 2.8139497161394972e-06, "loss": 0.5542, "step": 29914 }, { "epoch": 0.8734051560538378, "grad_norm": 0.6529874981853355, "learning_rate": 2.8133008921330092e-06, "loss": 0.6064, "step": 29915 }, { "epoch": 0.8734343522816852, "grad_norm": 0.6220608251166921, "learning_rate": 2.812652068126521e-06, "loss": 0.5638, "step": 29916 }, { "epoch": 0.8734635485095326, "grad_norm": 0.6139932967486798, "learning_rate": 2.812003244120033e-06, "loss": 0.5665, "step": 29917 }, { "epoch": 0.8734927447373799, "grad_norm": 0.6188110831756378, "learning_rate": 2.8113544201135445e-06, "loss": 0.5237, "step": 29918 }, { "epoch": 0.8735219409652273, "grad_norm": 0.6459521725920632, "learning_rate": 2.810705596107056e-06, "loss": 0.5907, "step": 29919 }, { "epoch": 0.8735511371930746, "grad_norm": 0.6400008897376964, "learning_rate": 2.8100567721005677e-06, "loss": 0.5741, "step": 29920 }, { "epoch": 0.873580333420922, "grad_norm": 0.6367435801841456, "learning_rate": 2.8094079480940793e-06, "loss": 0.563, "step": 29921 }, { "epoch": 0.8736095296487694, "grad_norm": 0.6497011287574633, "learning_rate": 2.8087591240875917e-06, "loss": 0.6327, "step": 29922 }, { "epoch": 0.8736387258766167, "grad_norm": 0.6339752308774231, "learning_rate": 2.8081103000811033e-06, "loss": 0.5915, "step": 29923 }, { "epoch": 0.8736679221044641, "grad_norm": 0.6223326258778172, "learning_rate": 2.807461476074615e-06, "loss": 0.5176, "step": 29924 }, { "epoch": 0.8736971183323115, "grad_norm": 0.6347984564921466, "learning_rate": 2.8068126520681266e-06, "loss": 0.5679, "step": 29925 }, { "epoch": 0.8737263145601588, "grad_norm": 0.6034782730325343, "learning_rate": 2.8061638280616386e-06, "loss": 0.5482, "step": 29926 }, { "epoch": 0.8737555107880062, "grad_norm": 0.6025927545113664, "learning_rate": 2.8055150040551506e-06, "loss": 0.4924, "step": 29927 }, { "epoch": 0.8737847070158535, "grad_norm": 0.65051629586828, "learning_rate": 2.804866180048662e-06, "loss": 0.5511, "step": 29928 }, { "epoch": 0.8738139032437009, "grad_norm": 0.5801814301100436, "learning_rate": 2.804217356042174e-06, "loss": 0.497, "step": 29929 }, { "epoch": 0.8738430994715483, "grad_norm": 0.6181128092103608, "learning_rate": 2.8035685320356854e-06, "loss": 0.5395, "step": 29930 }, { "epoch": 0.8738722956993956, "grad_norm": 0.6330086703974365, "learning_rate": 2.8029197080291974e-06, "loss": 0.5839, "step": 29931 }, { "epoch": 0.873901491927243, "grad_norm": 0.6582088748545531, "learning_rate": 2.802270884022709e-06, "loss": 0.58, "step": 29932 }, { "epoch": 0.8739306881550903, "grad_norm": 0.6141626872507047, "learning_rate": 2.8016220600162207e-06, "loss": 0.5488, "step": 29933 }, { "epoch": 0.8739598843829377, "grad_norm": 0.6802052887554861, "learning_rate": 2.8009732360097327e-06, "loss": 0.6434, "step": 29934 }, { "epoch": 0.8739890806107851, "grad_norm": 0.6330801745109266, "learning_rate": 2.8003244120032443e-06, "loss": 0.5694, "step": 29935 }, { "epoch": 0.8740182768386324, "grad_norm": 0.6202738632005279, "learning_rate": 2.7996755879967563e-06, "loss": 0.5853, "step": 29936 }, { "epoch": 0.8740474730664798, "grad_norm": 0.6480768337154692, "learning_rate": 2.799026763990268e-06, "loss": 0.5773, "step": 29937 }, { "epoch": 0.8740766692943271, "grad_norm": 0.655858685116556, "learning_rate": 2.7983779399837795e-06, "loss": 0.6055, "step": 29938 }, { "epoch": 0.8741058655221745, "grad_norm": 0.6797329944009246, "learning_rate": 2.797729115977291e-06, "loss": 0.6738, "step": 29939 }, { "epoch": 0.8741350617500219, "grad_norm": 0.6817845831136302, "learning_rate": 2.7970802919708027e-06, "loss": 0.6193, "step": 29940 }, { "epoch": 0.8741642579778692, "grad_norm": 0.5781302633981866, "learning_rate": 2.796431467964315e-06, "loss": 0.4985, "step": 29941 }, { "epoch": 0.8741934542057166, "grad_norm": 0.5797433272826654, "learning_rate": 2.7957826439578268e-06, "loss": 0.4985, "step": 29942 }, { "epoch": 0.874222650433564, "grad_norm": 0.6063238148158424, "learning_rate": 2.7951338199513384e-06, "loss": 0.5212, "step": 29943 }, { "epoch": 0.8742518466614113, "grad_norm": 0.6024431764157586, "learning_rate": 2.79448499594485e-06, "loss": 0.5057, "step": 29944 }, { "epoch": 0.8742810428892587, "grad_norm": 0.6670155819137913, "learning_rate": 2.793836171938362e-06, "loss": 0.604, "step": 29945 }, { "epoch": 0.874310239117106, "grad_norm": 0.599281543443176, "learning_rate": 2.793187347931874e-06, "loss": 0.5842, "step": 29946 }, { "epoch": 0.8743394353449534, "grad_norm": 0.5965911634224, "learning_rate": 2.7925385239253856e-06, "loss": 0.5044, "step": 29947 }, { "epoch": 0.8743686315728008, "grad_norm": 0.6370982973007182, "learning_rate": 2.7918896999188972e-06, "loss": 0.5642, "step": 29948 }, { "epoch": 0.8743978278006481, "grad_norm": 0.6285397508187978, "learning_rate": 2.791240875912409e-06, "loss": 0.5804, "step": 29949 }, { "epoch": 0.8744270240284955, "grad_norm": 0.6126171852198832, "learning_rate": 2.790592051905921e-06, "loss": 0.5502, "step": 29950 }, { "epoch": 0.8744562202563428, "grad_norm": 0.6409054486313882, "learning_rate": 2.7899432278994325e-06, "loss": 0.5847, "step": 29951 }, { "epoch": 0.8744854164841902, "grad_norm": 0.6400965794976775, "learning_rate": 2.789294403892944e-06, "loss": 0.5944, "step": 29952 }, { "epoch": 0.8745146127120376, "grad_norm": 0.6421198189020971, "learning_rate": 2.7886455798864557e-06, "loss": 0.584, "step": 29953 }, { "epoch": 0.8745438089398849, "grad_norm": 0.635023989168173, "learning_rate": 2.7879967558799677e-06, "loss": 0.5772, "step": 29954 }, { "epoch": 0.8745730051677323, "grad_norm": 0.6776581985793032, "learning_rate": 2.7873479318734797e-06, "loss": 0.6273, "step": 29955 }, { "epoch": 0.8746022013955796, "grad_norm": 0.6568048972900571, "learning_rate": 2.7866991078669913e-06, "loss": 0.6134, "step": 29956 }, { "epoch": 0.874631397623427, "grad_norm": 0.6332234355874737, "learning_rate": 2.786050283860503e-06, "loss": 0.5729, "step": 29957 }, { "epoch": 0.8746605938512744, "grad_norm": 0.6547352501367365, "learning_rate": 2.7854014598540146e-06, "loss": 0.5994, "step": 29958 }, { "epoch": 0.8746897900791217, "grad_norm": 0.6126198452611132, "learning_rate": 2.784752635847526e-06, "loss": 0.5489, "step": 29959 }, { "epoch": 0.8747189863069691, "grad_norm": 0.6607982558176131, "learning_rate": 2.7841038118410386e-06, "loss": 0.6094, "step": 29960 }, { "epoch": 0.8747481825348165, "grad_norm": 0.5254554209208039, "learning_rate": 2.7834549878345502e-06, "loss": 0.3793, "step": 29961 }, { "epoch": 0.8747773787626638, "grad_norm": 0.6402638229837562, "learning_rate": 2.782806163828062e-06, "loss": 0.5762, "step": 29962 }, { "epoch": 0.8748065749905112, "grad_norm": 0.636726574052673, "learning_rate": 2.7821573398215734e-06, "loss": 0.5827, "step": 29963 }, { "epoch": 0.8748357712183585, "grad_norm": 0.7071985596342919, "learning_rate": 2.7815085158150854e-06, "loss": 0.5899, "step": 29964 }, { "epoch": 0.8748649674462059, "grad_norm": 0.6878708435777295, "learning_rate": 2.7808596918085975e-06, "loss": 0.6383, "step": 29965 }, { "epoch": 0.8748941636740533, "grad_norm": 0.6554089376866626, "learning_rate": 2.780210867802109e-06, "loss": 0.601, "step": 29966 }, { "epoch": 0.8749233599019006, "grad_norm": 0.6011646922703414, "learning_rate": 2.7795620437956207e-06, "loss": 0.5401, "step": 29967 }, { "epoch": 0.874952556129748, "grad_norm": 0.6481271372716711, "learning_rate": 2.7789132197891323e-06, "loss": 0.5891, "step": 29968 }, { "epoch": 0.8749817523575953, "grad_norm": 0.6692670630226847, "learning_rate": 2.7782643957826443e-06, "loss": 0.6077, "step": 29969 }, { "epoch": 0.8750109485854428, "grad_norm": 0.6461555129729559, "learning_rate": 2.777615571776156e-06, "loss": 0.584, "step": 29970 }, { "epoch": 0.8750401448132902, "grad_norm": 0.660793889301373, "learning_rate": 2.7769667477696675e-06, "loss": 0.6065, "step": 29971 }, { "epoch": 0.8750693410411375, "grad_norm": 0.6237882449662852, "learning_rate": 2.776317923763179e-06, "loss": 0.5667, "step": 29972 }, { "epoch": 0.8750985372689849, "grad_norm": 0.6451130999131718, "learning_rate": 2.775669099756691e-06, "loss": 0.6082, "step": 29973 }, { "epoch": 0.8751277334968323, "grad_norm": 0.6798755400197104, "learning_rate": 2.775020275750203e-06, "loss": 0.6485, "step": 29974 }, { "epoch": 0.8751569297246796, "grad_norm": 0.6523608903124389, "learning_rate": 2.7743714517437148e-06, "loss": 0.6253, "step": 29975 }, { "epoch": 0.875186125952527, "grad_norm": 0.6740174193821951, "learning_rate": 2.7737226277372264e-06, "loss": 0.6622, "step": 29976 }, { "epoch": 0.8752153221803743, "grad_norm": 0.6802776080358391, "learning_rate": 2.773073803730738e-06, "loss": 0.6536, "step": 29977 }, { "epoch": 0.8752445184082217, "grad_norm": 0.6419931962184884, "learning_rate": 2.7724249797242504e-06, "loss": 0.5278, "step": 29978 }, { "epoch": 0.8752737146360691, "grad_norm": 0.6213422879682727, "learning_rate": 2.771776155717762e-06, "loss": 0.5382, "step": 29979 }, { "epoch": 0.8753029108639164, "grad_norm": 0.6050570629085247, "learning_rate": 2.7711273317112736e-06, "loss": 0.5195, "step": 29980 }, { "epoch": 0.8753321070917638, "grad_norm": 0.611825103715699, "learning_rate": 2.7704785077047853e-06, "loss": 0.5602, "step": 29981 }, { "epoch": 0.8753613033196112, "grad_norm": 0.6793799310883748, "learning_rate": 2.769829683698297e-06, "loss": 0.6135, "step": 29982 }, { "epoch": 0.8753904995474585, "grad_norm": 0.6323015280239388, "learning_rate": 2.769180859691809e-06, "loss": 0.6346, "step": 29983 }, { "epoch": 0.8754196957753059, "grad_norm": 0.6535987572338773, "learning_rate": 2.7685320356853205e-06, "loss": 0.5893, "step": 29984 }, { "epoch": 0.8754488920031532, "grad_norm": 0.6878616747267589, "learning_rate": 2.7678832116788325e-06, "loss": 0.6667, "step": 29985 }, { "epoch": 0.8754780882310006, "grad_norm": 0.6700789825208278, "learning_rate": 2.767234387672344e-06, "loss": 0.5889, "step": 29986 }, { "epoch": 0.875507284458848, "grad_norm": 0.6088427289465927, "learning_rate": 2.7665855636658557e-06, "loss": 0.4886, "step": 29987 }, { "epoch": 0.8755364806866953, "grad_norm": 0.6571706810837, "learning_rate": 2.7659367396593677e-06, "loss": 0.6602, "step": 29988 }, { "epoch": 0.8755656769145427, "grad_norm": 0.6138940428213419, "learning_rate": 2.7652879156528794e-06, "loss": 0.5191, "step": 29989 }, { "epoch": 0.87559487314239, "grad_norm": 0.62532853733586, "learning_rate": 2.764639091646391e-06, "loss": 0.5934, "step": 29990 }, { "epoch": 0.8756240693702374, "grad_norm": 0.6311093795742978, "learning_rate": 2.7639902676399026e-06, "loss": 0.5692, "step": 29991 }, { "epoch": 0.8756532655980848, "grad_norm": 0.6476958165649855, "learning_rate": 2.7633414436334146e-06, "loss": 0.6008, "step": 29992 }, { "epoch": 0.8756824618259321, "grad_norm": 0.6447958201220236, "learning_rate": 2.7626926196269266e-06, "loss": 0.5928, "step": 29993 }, { "epoch": 0.8757116580537795, "grad_norm": 0.6189129088667831, "learning_rate": 2.7620437956204382e-06, "loss": 0.5429, "step": 29994 }, { "epoch": 0.8757408542816268, "grad_norm": 0.6585894235192963, "learning_rate": 2.76139497161395e-06, "loss": 0.599, "step": 29995 }, { "epoch": 0.8757700505094742, "grad_norm": 0.5900795948676026, "learning_rate": 2.7607461476074614e-06, "loss": 0.5049, "step": 29996 }, { "epoch": 0.8757992467373216, "grad_norm": 0.6263118784502008, "learning_rate": 2.760097323600974e-06, "loss": 0.5569, "step": 29997 }, { "epoch": 0.8758284429651689, "grad_norm": 0.6419488760049824, "learning_rate": 2.7594484995944855e-06, "loss": 0.6069, "step": 29998 }, { "epoch": 0.8758576391930163, "grad_norm": 0.6377425525024817, "learning_rate": 2.758799675587997e-06, "loss": 0.5988, "step": 29999 }, { "epoch": 0.8758868354208637, "grad_norm": 1.0833197222684188, "learning_rate": 2.7581508515815087e-06, "loss": 0.6045, "step": 30000 }, { "epoch": 0.875916031648711, "grad_norm": 0.6291493592396944, "learning_rate": 2.7575020275750203e-06, "loss": 0.564, "step": 30001 }, { "epoch": 0.8759452278765584, "grad_norm": 0.6629103555172432, "learning_rate": 2.7568532035685323e-06, "loss": 0.6049, "step": 30002 }, { "epoch": 0.8759744241044057, "grad_norm": 0.6636618902495849, "learning_rate": 2.756204379562044e-06, "loss": 0.599, "step": 30003 }, { "epoch": 0.8760036203322531, "grad_norm": 0.6850593450235393, "learning_rate": 2.755555555555556e-06, "loss": 0.5827, "step": 30004 }, { "epoch": 0.8760328165601005, "grad_norm": 0.6700001958015929, "learning_rate": 2.7549067315490676e-06, "loss": 0.6477, "step": 30005 }, { "epoch": 0.8760620127879478, "grad_norm": 0.6444527992508539, "learning_rate": 2.754257907542579e-06, "loss": 0.5949, "step": 30006 }, { "epoch": 0.8760912090157952, "grad_norm": 0.688222692699516, "learning_rate": 2.753609083536091e-06, "loss": 0.6047, "step": 30007 }, { "epoch": 0.8761204052436425, "grad_norm": 0.7000650346655388, "learning_rate": 2.7529602595296028e-06, "loss": 0.693, "step": 30008 }, { "epoch": 0.8761496014714899, "grad_norm": 0.7023434759789758, "learning_rate": 2.7523114355231144e-06, "loss": 0.6644, "step": 30009 }, { "epoch": 0.8761787976993373, "grad_norm": 0.6413127298957276, "learning_rate": 2.751662611516626e-06, "loss": 0.5607, "step": 30010 }, { "epoch": 0.8762079939271846, "grad_norm": 0.692732584000538, "learning_rate": 2.7510137875101384e-06, "loss": 0.672, "step": 30011 }, { "epoch": 0.876237190155032, "grad_norm": 0.675815299352918, "learning_rate": 2.75036496350365e-06, "loss": 0.6622, "step": 30012 }, { "epoch": 0.8762663863828793, "grad_norm": 0.6480626743090796, "learning_rate": 2.7497161394971617e-06, "loss": 0.6143, "step": 30013 }, { "epoch": 0.8762955826107267, "grad_norm": 0.6630678316538356, "learning_rate": 2.7490673154906733e-06, "loss": 0.6111, "step": 30014 }, { "epoch": 0.8763247788385741, "grad_norm": 0.5731771486556411, "learning_rate": 2.748418491484185e-06, "loss": 0.5018, "step": 30015 }, { "epoch": 0.8763539750664214, "grad_norm": 0.6259096476831498, "learning_rate": 2.7477696674776973e-06, "loss": 0.6175, "step": 30016 }, { "epoch": 0.8763831712942688, "grad_norm": 0.6673251426403984, "learning_rate": 2.747120843471209e-06, "loss": 0.5749, "step": 30017 }, { "epoch": 0.8764123675221162, "grad_norm": 0.6835712852228115, "learning_rate": 2.7464720194647205e-06, "loss": 0.6434, "step": 30018 }, { "epoch": 0.8764415637499635, "grad_norm": 0.5912600811521395, "learning_rate": 2.745823195458232e-06, "loss": 0.4871, "step": 30019 }, { "epoch": 0.8764707599778109, "grad_norm": 0.7066002843986122, "learning_rate": 2.7451743714517437e-06, "loss": 0.7042, "step": 30020 }, { "epoch": 0.8764999562056582, "grad_norm": 0.6566101106848913, "learning_rate": 2.7445255474452558e-06, "loss": 0.6072, "step": 30021 }, { "epoch": 0.8765291524335056, "grad_norm": 0.6621177988293331, "learning_rate": 2.7438767234387674e-06, "loss": 0.6323, "step": 30022 }, { "epoch": 0.876558348661353, "grad_norm": 0.6252284531417848, "learning_rate": 2.7432278994322794e-06, "loss": 0.5664, "step": 30023 }, { "epoch": 0.8765875448892003, "grad_norm": 0.5903352633951756, "learning_rate": 2.742579075425791e-06, "loss": 0.5236, "step": 30024 }, { "epoch": 0.8766167411170477, "grad_norm": 0.6429225094312563, "learning_rate": 2.7419302514193026e-06, "loss": 0.5821, "step": 30025 }, { "epoch": 0.876645937344895, "grad_norm": 0.6460717191509178, "learning_rate": 2.7412814274128146e-06, "loss": 0.553, "step": 30026 }, { "epoch": 0.8766751335727424, "grad_norm": 0.6222052285220196, "learning_rate": 2.7406326034063262e-06, "loss": 0.5373, "step": 30027 }, { "epoch": 0.8767043298005898, "grad_norm": 0.6126953745201975, "learning_rate": 2.739983779399838e-06, "loss": 0.5832, "step": 30028 }, { "epoch": 0.8767335260284371, "grad_norm": 0.671165554418638, "learning_rate": 2.7393349553933494e-06, "loss": 0.5994, "step": 30029 }, { "epoch": 0.8767627222562845, "grad_norm": 0.636260493152526, "learning_rate": 2.738686131386862e-06, "loss": 0.5977, "step": 30030 }, { "epoch": 0.8767919184841318, "grad_norm": 0.6785405585056881, "learning_rate": 2.7380373073803735e-06, "loss": 0.6868, "step": 30031 }, { "epoch": 0.8768211147119792, "grad_norm": 0.6843493136039248, "learning_rate": 2.737388483373885e-06, "loss": 0.6997, "step": 30032 }, { "epoch": 0.8768503109398266, "grad_norm": 0.6812143471567809, "learning_rate": 2.7367396593673967e-06, "loss": 0.6253, "step": 30033 }, { "epoch": 0.8768795071676739, "grad_norm": 0.6391979653126872, "learning_rate": 2.7360908353609083e-06, "loss": 0.5775, "step": 30034 }, { "epoch": 0.8769087033955213, "grad_norm": 0.5952506539479058, "learning_rate": 2.7354420113544207e-06, "loss": 0.5199, "step": 30035 }, { "epoch": 0.8769378996233687, "grad_norm": 0.6307891553107715, "learning_rate": 2.7347931873479323e-06, "loss": 0.5776, "step": 30036 }, { "epoch": 0.876967095851216, "grad_norm": 0.6460973017480002, "learning_rate": 2.734144363341444e-06, "loss": 0.6172, "step": 30037 }, { "epoch": 0.8769962920790634, "grad_norm": 0.6304647987210569, "learning_rate": 2.7334955393349556e-06, "loss": 0.5684, "step": 30038 }, { "epoch": 0.8770254883069107, "grad_norm": 0.6308465100464561, "learning_rate": 2.732846715328467e-06, "loss": 0.5799, "step": 30039 }, { "epoch": 0.8770546845347581, "grad_norm": 0.5953392237806568, "learning_rate": 2.732197891321979e-06, "loss": 0.4823, "step": 30040 }, { "epoch": 0.8770838807626055, "grad_norm": 0.6716185196347899, "learning_rate": 2.731549067315491e-06, "loss": 0.6062, "step": 30041 }, { "epoch": 0.8771130769904528, "grad_norm": 0.6093390493239904, "learning_rate": 2.730900243309003e-06, "loss": 0.5567, "step": 30042 }, { "epoch": 0.8771422732183002, "grad_norm": 0.6718726956465488, "learning_rate": 2.7302514193025144e-06, "loss": 0.6131, "step": 30043 }, { "epoch": 0.8771714694461475, "grad_norm": 0.6176538987221997, "learning_rate": 2.7296025952960264e-06, "loss": 0.5322, "step": 30044 }, { "epoch": 0.8772006656739949, "grad_norm": 0.6759981220918139, "learning_rate": 2.728953771289538e-06, "loss": 0.6331, "step": 30045 }, { "epoch": 0.8772298619018423, "grad_norm": 0.6214846518363536, "learning_rate": 2.7283049472830497e-06, "loss": 0.5501, "step": 30046 }, { "epoch": 0.8772590581296896, "grad_norm": 0.6198144405479076, "learning_rate": 2.7276561232765613e-06, "loss": 0.5552, "step": 30047 }, { "epoch": 0.877288254357537, "grad_norm": 0.6265748838437706, "learning_rate": 2.727007299270073e-06, "loss": 0.5778, "step": 30048 }, { "epoch": 0.8773174505853844, "grad_norm": 0.6600603530183335, "learning_rate": 2.7263584752635853e-06, "loss": 0.6689, "step": 30049 }, { "epoch": 0.8773466468132317, "grad_norm": 0.6471564779089369, "learning_rate": 2.725709651257097e-06, "loss": 0.5893, "step": 30050 }, { "epoch": 0.8773758430410791, "grad_norm": 0.6041973279433966, "learning_rate": 2.7250608272506085e-06, "loss": 0.5374, "step": 30051 }, { "epoch": 0.8774050392689264, "grad_norm": 0.6297691021745069, "learning_rate": 2.72441200324412e-06, "loss": 0.5812, "step": 30052 }, { "epoch": 0.8774342354967738, "grad_norm": 0.6742727359308711, "learning_rate": 2.7237631792376317e-06, "loss": 0.6805, "step": 30053 }, { "epoch": 0.8774634317246212, "grad_norm": 0.6561292180197076, "learning_rate": 2.723114355231144e-06, "loss": 0.5929, "step": 30054 }, { "epoch": 0.8774926279524685, "grad_norm": 0.6000531472549936, "learning_rate": 2.7224655312246558e-06, "loss": 0.5293, "step": 30055 }, { "epoch": 0.8775218241803159, "grad_norm": 0.653058661031134, "learning_rate": 2.7218167072181674e-06, "loss": 0.584, "step": 30056 }, { "epoch": 0.8775510204081632, "grad_norm": 0.6367246995219101, "learning_rate": 2.721167883211679e-06, "loss": 0.5553, "step": 30057 }, { "epoch": 0.8775802166360106, "grad_norm": 0.6433271587537658, "learning_rate": 2.7205190592051906e-06, "loss": 0.5798, "step": 30058 }, { "epoch": 0.877609412863858, "grad_norm": 0.7309643580445925, "learning_rate": 2.7198702351987026e-06, "loss": 0.5555, "step": 30059 }, { "epoch": 0.8776386090917053, "grad_norm": 0.6309193789194045, "learning_rate": 2.7192214111922142e-06, "loss": 0.543, "step": 30060 }, { "epoch": 0.8776678053195527, "grad_norm": 0.681418514865015, "learning_rate": 2.718572587185726e-06, "loss": 0.6665, "step": 30061 }, { "epoch": 0.8776970015474, "grad_norm": 0.6167104007531946, "learning_rate": 2.717923763179238e-06, "loss": 0.5712, "step": 30062 }, { "epoch": 0.8777261977752474, "grad_norm": 0.6475362706559511, "learning_rate": 2.71727493917275e-06, "loss": 0.5798, "step": 30063 }, { "epoch": 0.8777553940030948, "grad_norm": 0.661810873864229, "learning_rate": 2.7166261151662615e-06, "loss": 0.6043, "step": 30064 }, { "epoch": 0.8777845902309421, "grad_norm": 0.6182701666112767, "learning_rate": 2.715977291159773e-06, "loss": 0.5602, "step": 30065 }, { "epoch": 0.8778137864587895, "grad_norm": 0.6410690949627118, "learning_rate": 2.7153284671532847e-06, "loss": 0.5448, "step": 30066 }, { "epoch": 0.8778429826866369, "grad_norm": 0.6913626831770854, "learning_rate": 2.7146796431467963e-06, "loss": 0.694, "step": 30067 }, { "epoch": 0.8778721789144842, "grad_norm": 0.6584530155147996, "learning_rate": 2.7140308191403087e-06, "loss": 0.6196, "step": 30068 }, { "epoch": 0.8779013751423316, "grad_norm": 0.5883342662218752, "learning_rate": 2.7133819951338204e-06, "loss": 0.5231, "step": 30069 }, { "epoch": 0.8779305713701789, "grad_norm": 0.6431031783219027, "learning_rate": 2.712733171127332e-06, "loss": 0.5814, "step": 30070 }, { "epoch": 0.8779597675980263, "grad_norm": 0.681865272476098, "learning_rate": 2.7120843471208436e-06, "loss": 0.6307, "step": 30071 }, { "epoch": 0.8779889638258737, "grad_norm": 0.7131651569421245, "learning_rate": 2.711435523114355e-06, "loss": 0.714, "step": 30072 }, { "epoch": 0.878018160053721, "grad_norm": 0.6138569507768882, "learning_rate": 2.7107866991078676e-06, "loss": 0.5503, "step": 30073 }, { "epoch": 0.8780473562815684, "grad_norm": 0.6263147817092934, "learning_rate": 2.7101378751013792e-06, "loss": 0.5388, "step": 30074 }, { "epoch": 0.8780765525094157, "grad_norm": 0.6278844107480918, "learning_rate": 2.709489051094891e-06, "loss": 0.6218, "step": 30075 }, { "epoch": 0.8781057487372631, "grad_norm": 0.6385309387004062, "learning_rate": 2.7088402270884024e-06, "loss": 0.5792, "step": 30076 }, { "epoch": 0.8781349449651105, "grad_norm": 0.6625524606001388, "learning_rate": 2.7081914030819145e-06, "loss": 0.6255, "step": 30077 }, { "epoch": 0.8781641411929578, "grad_norm": 0.6366575885994046, "learning_rate": 2.707542579075426e-06, "loss": 0.5941, "step": 30078 }, { "epoch": 0.8781933374208052, "grad_norm": 0.6495417370326143, "learning_rate": 2.7068937550689377e-06, "loss": 0.5911, "step": 30079 }, { "epoch": 0.8782225336486525, "grad_norm": 0.6445715209836395, "learning_rate": 2.7062449310624493e-06, "loss": 0.5776, "step": 30080 }, { "epoch": 0.8782517298764999, "grad_norm": 0.6530789985956804, "learning_rate": 2.7055961070559613e-06, "loss": 0.6037, "step": 30081 }, { "epoch": 0.8782809261043473, "grad_norm": 0.6216212403458292, "learning_rate": 2.7049472830494733e-06, "loss": 0.572, "step": 30082 }, { "epoch": 0.8783101223321946, "grad_norm": 0.6164277479733486, "learning_rate": 2.704298459042985e-06, "loss": 0.4923, "step": 30083 }, { "epoch": 0.878339318560042, "grad_norm": 0.581311866844086, "learning_rate": 2.7036496350364965e-06, "loss": 0.5117, "step": 30084 }, { "epoch": 0.8783685147878894, "grad_norm": 0.6829655587692939, "learning_rate": 2.703000811030008e-06, "loss": 0.6447, "step": 30085 }, { "epoch": 0.8783977110157367, "grad_norm": 0.7641782163514965, "learning_rate": 2.7023519870235197e-06, "loss": 0.6157, "step": 30086 }, { "epoch": 0.8784269072435841, "grad_norm": 0.6296739694834296, "learning_rate": 2.701703163017032e-06, "loss": 0.5602, "step": 30087 }, { "epoch": 0.8784561034714314, "grad_norm": 0.632925996861159, "learning_rate": 2.7010543390105438e-06, "loss": 0.6099, "step": 30088 }, { "epoch": 0.8784852996992788, "grad_norm": 0.6524696656251765, "learning_rate": 2.7004055150040554e-06, "loss": 0.556, "step": 30089 }, { "epoch": 0.8785144959271262, "grad_norm": 0.6287845458027371, "learning_rate": 2.699756690997567e-06, "loss": 0.5334, "step": 30090 }, { "epoch": 0.8785436921549736, "grad_norm": 0.6626613895826421, "learning_rate": 2.6991078669910786e-06, "loss": 0.6249, "step": 30091 }, { "epoch": 0.878572888382821, "grad_norm": 0.6016518719325881, "learning_rate": 2.6984590429845906e-06, "loss": 0.5437, "step": 30092 }, { "epoch": 0.8786020846106684, "grad_norm": 0.6235601718314754, "learning_rate": 2.6978102189781027e-06, "loss": 0.5462, "step": 30093 }, { "epoch": 0.8786312808385157, "grad_norm": 0.6432646468085582, "learning_rate": 2.6971613949716143e-06, "loss": 0.578, "step": 30094 }, { "epoch": 0.8786604770663631, "grad_norm": 0.6628943129646158, "learning_rate": 2.696512570965126e-06, "loss": 0.6184, "step": 30095 }, { "epoch": 0.8786896732942104, "grad_norm": 0.712813444097631, "learning_rate": 2.695863746958638e-06, "loss": 0.666, "step": 30096 }, { "epoch": 0.8787188695220578, "grad_norm": 0.6374173572878569, "learning_rate": 2.6952149229521495e-06, "loss": 0.5958, "step": 30097 }, { "epoch": 0.8787480657499052, "grad_norm": 0.6217915226717782, "learning_rate": 2.694566098945661e-06, "loss": 0.5957, "step": 30098 }, { "epoch": 0.8787772619777525, "grad_norm": 0.6198875541563058, "learning_rate": 2.6939172749391727e-06, "loss": 0.5466, "step": 30099 }, { "epoch": 0.8788064582055999, "grad_norm": 0.6033918807169043, "learning_rate": 2.6932684509326847e-06, "loss": 0.5433, "step": 30100 }, { "epoch": 0.8788356544334472, "grad_norm": 0.629923655813002, "learning_rate": 2.6926196269261968e-06, "loss": 0.5595, "step": 30101 }, { "epoch": 0.8788648506612946, "grad_norm": 0.6515441299113355, "learning_rate": 2.6919708029197084e-06, "loss": 0.5578, "step": 30102 }, { "epoch": 0.878894046889142, "grad_norm": 0.6457424940722365, "learning_rate": 2.69132197891322e-06, "loss": 0.6297, "step": 30103 }, { "epoch": 0.8789232431169893, "grad_norm": 0.67901533903683, "learning_rate": 2.6906731549067316e-06, "loss": 0.6033, "step": 30104 }, { "epoch": 0.8789524393448367, "grad_norm": 0.643938721651653, "learning_rate": 2.690024330900243e-06, "loss": 0.5879, "step": 30105 }, { "epoch": 0.878981635572684, "grad_norm": 0.6760261033386229, "learning_rate": 2.6893755068937556e-06, "loss": 0.6295, "step": 30106 }, { "epoch": 0.8790108318005314, "grad_norm": 0.6388828536515326, "learning_rate": 2.6887266828872672e-06, "loss": 0.6007, "step": 30107 }, { "epoch": 0.8790400280283788, "grad_norm": 0.6099069003354806, "learning_rate": 2.688077858880779e-06, "loss": 0.5537, "step": 30108 }, { "epoch": 0.8790692242562261, "grad_norm": 0.6409229537620843, "learning_rate": 2.6874290348742904e-06, "loss": 0.566, "step": 30109 }, { "epoch": 0.8790984204840735, "grad_norm": 0.6592403926201447, "learning_rate": 2.686780210867802e-06, "loss": 0.6075, "step": 30110 }, { "epoch": 0.8791276167119209, "grad_norm": 0.5714793793510926, "learning_rate": 2.686131386861314e-06, "loss": 0.4763, "step": 30111 }, { "epoch": 0.8791568129397682, "grad_norm": 0.6600547835154416, "learning_rate": 2.685482562854826e-06, "loss": 0.605, "step": 30112 }, { "epoch": 0.8791860091676156, "grad_norm": 0.6617049498445046, "learning_rate": 2.6848337388483377e-06, "loss": 0.6017, "step": 30113 }, { "epoch": 0.8792152053954629, "grad_norm": 0.6000100104932635, "learning_rate": 2.6841849148418493e-06, "loss": 0.5401, "step": 30114 }, { "epoch": 0.8792444016233103, "grad_norm": 0.6493052765635957, "learning_rate": 2.6835360908353613e-06, "loss": 0.5848, "step": 30115 }, { "epoch": 0.8792735978511577, "grad_norm": 0.7101103435134724, "learning_rate": 2.682887266828873e-06, "loss": 0.7481, "step": 30116 }, { "epoch": 0.879302794079005, "grad_norm": 0.6685126850399525, "learning_rate": 2.6822384428223845e-06, "loss": 0.6621, "step": 30117 }, { "epoch": 0.8793319903068524, "grad_norm": 0.6328319152234757, "learning_rate": 2.681589618815896e-06, "loss": 0.568, "step": 30118 }, { "epoch": 0.8793611865346997, "grad_norm": 0.6368400247153083, "learning_rate": 2.6809407948094077e-06, "loss": 0.6062, "step": 30119 }, { "epoch": 0.8793903827625471, "grad_norm": 0.6031825529943062, "learning_rate": 2.68029197080292e-06, "loss": 0.5449, "step": 30120 }, { "epoch": 0.8794195789903945, "grad_norm": 0.601209103453772, "learning_rate": 2.679643146796432e-06, "loss": 0.5305, "step": 30121 }, { "epoch": 0.8794487752182418, "grad_norm": 0.6124535338293166, "learning_rate": 2.6789943227899434e-06, "loss": 0.5301, "step": 30122 }, { "epoch": 0.8794779714460892, "grad_norm": 0.6539658334340959, "learning_rate": 2.678345498783455e-06, "loss": 0.6127, "step": 30123 }, { "epoch": 0.8795071676739366, "grad_norm": 0.7003482152363435, "learning_rate": 2.6776966747769666e-06, "loss": 0.6578, "step": 30124 }, { "epoch": 0.8795363639017839, "grad_norm": 0.6195504168248315, "learning_rate": 2.677047850770479e-06, "loss": 0.5624, "step": 30125 }, { "epoch": 0.8795655601296313, "grad_norm": 0.6360726944449727, "learning_rate": 2.6763990267639907e-06, "loss": 0.5975, "step": 30126 }, { "epoch": 0.8795947563574786, "grad_norm": 0.6240156318163994, "learning_rate": 2.6757502027575023e-06, "loss": 0.5458, "step": 30127 }, { "epoch": 0.879623952585326, "grad_norm": 0.6579796884095557, "learning_rate": 2.675101378751014e-06, "loss": 0.6414, "step": 30128 }, { "epoch": 0.8796531488131734, "grad_norm": 0.6601210725085682, "learning_rate": 2.674452554744526e-06, "loss": 0.6907, "step": 30129 }, { "epoch": 0.8796823450410207, "grad_norm": 0.6351164660397658, "learning_rate": 2.6738037307380375e-06, "loss": 0.5563, "step": 30130 }, { "epoch": 0.8797115412688681, "grad_norm": 0.5954972687727244, "learning_rate": 2.6731549067315495e-06, "loss": 0.527, "step": 30131 }, { "epoch": 0.8797407374967154, "grad_norm": 0.5866286343857547, "learning_rate": 2.672506082725061e-06, "loss": 0.4927, "step": 30132 }, { "epoch": 0.8797699337245628, "grad_norm": 0.6868743394456391, "learning_rate": 2.6718572587185727e-06, "loss": 0.6686, "step": 30133 }, { "epoch": 0.8797991299524102, "grad_norm": 0.6600033300411876, "learning_rate": 2.6712084347120848e-06, "loss": 0.6453, "step": 30134 }, { "epoch": 0.8798283261802575, "grad_norm": 0.6484868430876835, "learning_rate": 2.6705596107055964e-06, "loss": 0.6167, "step": 30135 }, { "epoch": 0.8798575224081049, "grad_norm": 0.690391006080514, "learning_rate": 2.669910786699108e-06, "loss": 0.7069, "step": 30136 }, { "epoch": 0.8798867186359522, "grad_norm": 0.6324962321876503, "learning_rate": 2.6692619626926196e-06, "loss": 0.5692, "step": 30137 }, { "epoch": 0.8799159148637996, "grad_norm": 0.5962522296174307, "learning_rate": 2.668613138686131e-06, "loss": 0.4754, "step": 30138 }, { "epoch": 0.879945111091647, "grad_norm": 0.6469291523080452, "learning_rate": 2.6679643146796436e-06, "loss": 0.5824, "step": 30139 }, { "epoch": 0.8799743073194943, "grad_norm": 0.6478200218972686, "learning_rate": 2.6673154906731552e-06, "loss": 0.6024, "step": 30140 }, { "epoch": 0.8800035035473417, "grad_norm": 0.6087718684944243, "learning_rate": 2.666666666666667e-06, "loss": 0.5292, "step": 30141 }, { "epoch": 0.880032699775189, "grad_norm": 0.6753887150502628, "learning_rate": 2.6660178426601784e-06, "loss": 0.6443, "step": 30142 }, { "epoch": 0.8800618960030364, "grad_norm": 0.7178603979689848, "learning_rate": 2.66536901865369e-06, "loss": 0.6174, "step": 30143 }, { "epoch": 0.8800910922308838, "grad_norm": 0.6300217656897019, "learning_rate": 2.6647201946472025e-06, "loss": 0.571, "step": 30144 }, { "epoch": 0.8801202884587311, "grad_norm": 0.6427588751530244, "learning_rate": 2.664071370640714e-06, "loss": 0.6129, "step": 30145 }, { "epoch": 0.8801494846865785, "grad_norm": 0.6472372320740443, "learning_rate": 2.6634225466342257e-06, "loss": 0.562, "step": 30146 }, { "epoch": 0.8801786809144259, "grad_norm": 0.6607513954430896, "learning_rate": 2.6627737226277373e-06, "loss": 0.6058, "step": 30147 }, { "epoch": 0.8802078771422732, "grad_norm": 0.6217008450330584, "learning_rate": 2.6621248986212493e-06, "loss": 0.5543, "step": 30148 }, { "epoch": 0.8802370733701206, "grad_norm": 0.625357839973491, "learning_rate": 2.661476074614761e-06, "loss": 0.5888, "step": 30149 }, { "epoch": 0.8802662695979679, "grad_norm": 0.6721128266144806, "learning_rate": 2.6608272506082725e-06, "loss": 0.6507, "step": 30150 }, { "epoch": 0.8802954658258153, "grad_norm": 0.6770344156384642, "learning_rate": 2.6601784266017846e-06, "loss": 0.6257, "step": 30151 }, { "epoch": 0.8803246620536627, "grad_norm": 0.6051651404270465, "learning_rate": 2.659529602595296e-06, "loss": 0.5128, "step": 30152 }, { "epoch": 0.88035385828151, "grad_norm": 0.6220360353516614, "learning_rate": 2.658880778588808e-06, "loss": 0.5486, "step": 30153 }, { "epoch": 0.8803830545093574, "grad_norm": 0.7320294803716476, "learning_rate": 2.65823195458232e-06, "loss": 0.5905, "step": 30154 }, { "epoch": 0.8804122507372047, "grad_norm": 0.5456681935874494, "learning_rate": 2.6575831305758314e-06, "loss": 0.4608, "step": 30155 }, { "epoch": 0.8804414469650521, "grad_norm": 0.6436993228098771, "learning_rate": 2.656934306569343e-06, "loss": 0.6004, "step": 30156 }, { "epoch": 0.8804706431928995, "grad_norm": 0.6234530279285714, "learning_rate": 2.6562854825628546e-06, "loss": 0.5877, "step": 30157 }, { "epoch": 0.8804998394207468, "grad_norm": 0.6536131761685748, "learning_rate": 2.655636658556367e-06, "loss": 0.579, "step": 30158 }, { "epoch": 0.8805290356485942, "grad_norm": 0.6261908292437411, "learning_rate": 2.6549878345498787e-06, "loss": 0.5423, "step": 30159 }, { "epoch": 0.8805582318764416, "grad_norm": 0.5968388490649565, "learning_rate": 2.6543390105433903e-06, "loss": 0.5013, "step": 30160 }, { "epoch": 0.8805874281042889, "grad_norm": 0.7183856644216731, "learning_rate": 2.653690186536902e-06, "loss": 0.7103, "step": 30161 }, { "epoch": 0.8806166243321363, "grad_norm": 0.7057143679878849, "learning_rate": 2.6530413625304143e-06, "loss": 0.6399, "step": 30162 }, { "epoch": 0.8806458205599836, "grad_norm": 0.6613287533324034, "learning_rate": 2.652392538523926e-06, "loss": 0.6492, "step": 30163 }, { "epoch": 0.880675016787831, "grad_norm": 0.643953201601279, "learning_rate": 2.6517437145174375e-06, "loss": 0.5897, "step": 30164 }, { "epoch": 0.8807042130156784, "grad_norm": 0.5582406656289487, "learning_rate": 2.651094890510949e-06, "loss": 0.4543, "step": 30165 }, { "epoch": 0.8807334092435257, "grad_norm": 0.6044841480019466, "learning_rate": 2.6504460665044607e-06, "loss": 0.5458, "step": 30166 }, { "epoch": 0.8807626054713731, "grad_norm": 0.6282444253918703, "learning_rate": 2.6497972424979728e-06, "loss": 0.5861, "step": 30167 }, { "epoch": 0.8807918016992204, "grad_norm": 0.6356487224859889, "learning_rate": 2.6491484184914844e-06, "loss": 0.5886, "step": 30168 }, { "epoch": 0.8808209979270678, "grad_norm": 0.622631052652967, "learning_rate": 2.648499594484996e-06, "loss": 0.5265, "step": 30169 }, { "epoch": 0.8808501941549152, "grad_norm": 0.6713886305862748, "learning_rate": 2.647850770478508e-06, "loss": 0.6326, "step": 30170 }, { "epoch": 0.8808793903827625, "grad_norm": 0.6371974421387767, "learning_rate": 2.6472019464720196e-06, "loss": 0.647, "step": 30171 }, { "epoch": 0.8809085866106099, "grad_norm": 0.8036412785750329, "learning_rate": 2.6465531224655316e-06, "loss": 0.7081, "step": 30172 }, { "epoch": 0.8809377828384573, "grad_norm": 0.6312552255577469, "learning_rate": 2.6459042984590432e-06, "loss": 0.5313, "step": 30173 }, { "epoch": 0.8809669790663046, "grad_norm": 0.6137104113212138, "learning_rate": 2.645255474452555e-06, "loss": 0.526, "step": 30174 }, { "epoch": 0.880996175294152, "grad_norm": 0.6214415794185882, "learning_rate": 2.6446066504460664e-06, "loss": 0.5683, "step": 30175 }, { "epoch": 0.8810253715219993, "grad_norm": 0.6764667336196072, "learning_rate": 2.643957826439578e-06, "loss": 0.6078, "step": 30176 }, { "epoch": 0.8810545677498467, "grad_norm": 0.6363582239638513, "learning_rate": 2.6433090024330905e-06, "loss": 0.5781, "step": 30177 }, { "epoch": 0.8810837639776941, "grad_norm": 0.6632339978125246, "learning_rate": 2.642660178426602e-06, "loss": 0.6128, "step": 30178 }, { "epoch": 0.8811129602055414, "grad_norm": 0.6667599466487444, "learning_rate": 2.6420113544201137e-06, "loss": 0.5899, "step": 30179 }, { "epoch": 0.8811421564333888, "grad_norm": 0.6174116116183622, "learning_rate": 2.6413625304136253e-06, "loss": 0.5506, "step": 30180 }, { "epoch": 0.8811713526612361, "grad_norm": 0.6571996792842139, "learning_rate": 2.6407137064071373e-06, "loss": 0.6176, "step": 30181 }, { "epoch": 0.8812005488890835, "grad_norm": 0.6738442752080291, "learning_rate": 2.6400648824006494e-06, "loss": 0.631, "step": 30182 }, { "epoch": 0.8812297451169309, "grad_norm": 0.6423491369316319, "learning_rate": 2.639416058394161e-06, "loss": 0.5905, "step": 30183 }, { "epoch": 0.8812589413447782, "grad_norm": 0.6316830092311123, "learning_rate": 2.6387672343876726e-06, "loss": 0.5851, "step": 30184 }, { "epoch": 0.8812881375726256, "grad_norm": 0.6515793487055077, "learning_rate": 2.638118410381184e-06, "loss": 0.6275, "step": 30185 }, { "epoch": 0.881317333800473, "grad_norm": 0.6657589217758106, "learning_rate": 2.637469586374696e-06, "loss": 0.6089, "step": 30186 }, { "epoch": 0.8813465300283203, "grad_norm": 0.6469664488811989, "learning_rate": 2.636820762368208e-06, "loss": 0.614, "step": 30187 }, { "epoch": 0.8813757262561677, "grad_norm": 0.6599961339215371, "learning_rate": 2.6361719383617194e-06, "loss": 0.5889, "step": 30188 }, { "epoch": 0.881404922484015, "grad_norm": 0.8941616932934872, "learning_rate": 2.6355231143552314e-06, "loss": 0.6939, "step": 30189 }, { "epoch": 0.8814341187118624, "grad_norm": 0.6732303135510657, "learning_rate": 2.634874290348743e-06, "loss": 0.6, "step": 30190 }, { "epoch": 0.8814633149397098, "grad_norm": 0.6649100520113772, "learning_rate": 2.634225466342255e-06, "loss": 0.6044, "step": 30191 }, { "epoch": 0.8814925111675571, "grad_norm": 0.7177162985778833, "learning_rate": 2.6335766423357667e-06, "loss": 0.6913, "step": 30192 }, { "epoch": 0.8815217073954045, "grad_norm": 0.6832861393430886, "learning_rate": 2.6329278183292783e-06, "loss": 0.6171, "step": 30193 }, { "epoch": 0.8815509036232518, "grad_norm": 0.6870368344556732, "learning_rate": 2.63227899432279e-06, "loss": 0.6006, "step": 30194 }, { "epoch": 0.8815800998510992, "grad_norm": 0.6620296658566627, "learning_rate": 2.6316301703163023e-06, "loss": 0.588, "step": 30195 }, { "epoch": 0.8816092960789466, "grad_norm": 0.6483131869595209, "learning_rate": 2.630981346309814e-06, "loss": 0.5968, "step": 30196 }, { "epoch": 0.8816384923067939, "grad_norm": 0.6331151782704063, "learning_rate": 2.6303325223033255e-06, "loss": 0.5936, "step": 30197 }, { "epoch": 0.8816676885346413, "grad_norm": 0.6609717366656993, "learning_rate": 2.629683698296837e-06, "loss": 0.566, "step": 30198 }, { "epoch": 0.8816968847624886, "grad_norm": 0.6382848221398068, "learning_rate": 2.6290348742903487e-06, "loss": 0.5143, "step": 30199 }, { "epoch": 0.881726080990336, "grad_norm": 0.6810447057743855, "learning_rate": 2.6283860502838608e-06, "loss": 0.6486, "step": 30200 }, { "epoch": 0.8817552772181834, "grad_norm": 0.6390021759193691, "learning_rate": 2.627737226277373e-06, "loss": 0.5744, "step": 30201 }, { "epoch": 0.8817844734460307, "grad_norm": 0.6776564707349557, "learning_rate": 2.6270884022708844e-06, "loss": 0.707, "step": 30202 }, { "epoch": 0.8818136696738781, "grad_norm": 0.7261192714429212, "learning_rate": 2.626439578264396e-06, "loss": 0.6615, "step": 30203 }, { "epoch": 0.8818428659017254, "grad_norm": 0.6292850652530779, "learning_rate": 2.6257907542579076e-06, "loss": 0.5766, "step": 30204 }, { "epoch": 0.8818720621295728, "grad_norm": 0.6662753953617258, "learning_rate": 2.6251419302514196e-06, "loss": 0.5902, "step": 30205 }, { "epoch": 0.8819012583574202, "grad_norm": 0.6377953662797714, "learning_rate": 2.6244931062449312e-06, "loss": 0.6058, "step": 30206 }, { "epoch": 0.8819304545852675, "grad_norm": 0.6218275875973281, "learning_rate": 2.623844282238443e-06, "loss": 0.5483, "step": 30207 }, { "epoch": 0.8819596508131149, "grad_norm": 0.6592092146006088, "learning_rate": 2.6231954582319544e-06, "loss": 0.5842, "step": 30208 }, { "epoch": 0.8819888470409623, "grad_norm": 0.6654813833228647, "learning_rate": 2.6225466342254665e-06, "loss": 0.6304, "step": 30209 }, { "epoch": 0.8820180432688096, "grad_norm": 0.6645617889350361, "learning_rate": 2.6218978102189785e-06, "loss": 0.6411, "step": 30210 }, { "epoch": 0.8820472394966571, "grad_norm": 0.6556215627656805, "learning_rate": 2.62124898621249e-06, "loss": 0.5517, "step": 30211 }, { "epoch": 0.8820764357245044, "grad_norm": 0.6504903769184844, "learning_rate": 2.6206001622060017e-06, "loss": 0.615, "step": 30212 }, { "epoch": 0.8821056319523518, "grad_norm": 0.6355042487337642, "learning_rate": 2.6199513381995133e-06, "loss": 0.6183, "step": 30213 }, { "epoch": 0.8821348281801992, "grad_norm": 0.6747154834266602, "learning_rate": 2.6193025141930258e-06, "loss": 0.6228, "step": 30214 }, { "epoch": 0.8821640244080465, "grad_norm": 0.6893220006759432, "learning_rate": 2.6186536901865374e-06, "loss": 0.6603, "step": 30215 }, { "epoch": 0.8821932206358939, "grad_norm": 0.631222473381398, "learning_rate": 2.618004866180049e-06, "loss": 0.5861, "step": 30216 }, { "epoch": 0.8822224168637413, "grad_norm": 0.7010655976169197, "learning_rate": 2.6173560421735606e-06, "loss": 0.6702, "step": 30217 }, { "epoch": 0.8822516130915886, "grad_norm": 0.6154636197795588, "learning_rate": 2.616707218167072e-06, "loss": 0.5755, "step": 30218 }, { "epoch": 0.882280809319436, "grad_norm": 0.6913490525059772, "learning_rate": 2.616058394160584e-06, "loss": 0.6131, "step": 30219 }, { "epoch": 0.8823100055472833, "grad_norm": 0.6566117941051458, "learning_rate": 2.6154095701540962e-06, "loss": 0.6105, "step": 30220 }, { "epoch": 0.8823392017751307, "grad_norm": 0.6277059396990139, "learning_rate": 2.614760746147608e-06, "loss": 0.573, "step": 30221 }, { "epoch": 0.8823683980029781, "grad_norm": 0.6439084653363195, "learning_rate": 2.6141119221411194e-06, "loss": 0.5712, "step": 30222 }, { "epoch": 0.8823975942308254, "grad_norm": 0.6615290728898184, "learning_rate": 2.613463098134631e-06, "loss": 0.5793, "step": 30223 }, { "epoch": 0.8824267904586728, "grad_norm": 0.6803120903750052, "learning_rate": 2.612814274128143e-06, "loss": 0.6517, "step": 30224 }, { "epoch": 0.8824559866865201, "grad_norm": 0.6882812264858251, "learning_rate": 2.6121654501216547e-06, "loss": 0.625, "step": 30225 }, { "epoch": 0.8824851829143675, "grad_norm": 0.6451494285139828, "learning_rate": 2.6115166261151663e-06, "loss": 0.5502, "step": 30226 }, { "epoch": 0.8825143791422149, "grad_norm": 0.6793296596602956, "learning_rate": 2.610867802108678e-06, "loss": 0.6395, "step": 30227 }, { "epoch": 0.8825435753700622, "grad_norm": 0.5754901324574876, "learning_rate": 2.6102189781021903e-06, "loss": 0.4819, "step": 30228 }, { "epoch": 0.8825727715979096, "grad_norm": 0.6719504534375234, "learning_rate": 2.609570154095702e-06, "loss": 0.607, "step": 30229 }, { "epoch": 0.882601967825757, "grad_norm": 0.5928249854629406, "learning_rate": 2.6089213300892135e-06, "loss": 0.5361, "step": 30230 }, { "epoch": 0.8826311640536043, "grad_norm": 0.6854754656491778, "learning_rate": 2.608272506082725e-06, "loss": 0.6648, "step": 30231 }, { "epoch": 0.8826603602814517, "grad_norm": 0.6758814690051793, "learning_rate": 2.6076236820762367e-06, "loss": 0.6544, "step": 30232 }, { "epoch": 0.882689556509299, "grad_norm": 0.6577270233448178, "learning_rate": 2.606974858069749e-06, "loss": 0.6348, "step": 30233 }, { "epoch": 0.8827187527371464, "grad_norm": 0.6437585370737783, "learning_rate": 2.606326034063261e-06, "loss": 0.5648, "step": 30234 }, { "epoch": 0.8827479489649938, "grad_norm": 0.6571480438227291, "learning_rate": 2.6056772100567724e-06, "loss": 0.6333, "step": 30235 }, { "epoch": 0.8827771451928411, "grad_norm": 0.667155769591068, "learning_rate": 2.605028386050284e-06, "loss": 0.6257, "step": 30236 }, { "epoch": 0.8828063414206885, "grad_norm": 0.6876340603155474, "learning_rate": 2.6043795620437956e-06, "loss": 0.6616, "step": 30237 }, { "epoch": 0.8828355376485358, "grad_norm": 0.6352149012641052, "learning_rate": 2.6037307380373076e-06, "loss": 0.5776, "step": 30238 }, { "epoch": 0.8828647338763832, "grad_norm": 0.609263930174077, "learning_rate": 2.6030819140308192e-06, "loss": 0.5436, "step": 30239 }, { "epoch": 0.8828939301042306, "grad_norm": 0.6085312037704785, "learning_rate": 2.6024330900243313e-06, "loss": 0.5347, "step": 30240 }, { "epoch": 0.8829231263320779, "grad_norm": 0.5887699047339504, "learning_rate": 2.601784266017843e-06, "loss": 0.4938, "step": 30241 }, { "epoch": 0.8829523225599253, "grad_norm": 0.6321522724236514, "learning_rate": 2.6011354420113545e-06, "loss": 0.5793, "step": 30242 }, { "epoch": 0.8829815187877726, "grad_norm": 0.6618406767676619, "learning_rate": 2.6004866180048665e-06, "loss": 0.6807, "step": 30243 }, { "epoch": 0.88301071501562, "grad_norm": 0.6637427996611794, "learning_rate": 2.599837793998378e-06, "loss": 0.5985, "step": 30244 }, { "epoch": 0.8830399112434674, "grad_norm": 0.6533644987119769, "learning_rate": 2.5991889699918897e-06, "loss": 0.6381, "step": 30245 }, { "epoch": 0.8830691074713147, "grad_norm": 0.6674053400982051, "learning_rate": 2.5985401459854013e-06, "loss": 0.6064, "step": 30246 }, { "epoch": 0.8830983036991621, "grad_norm": 0.668754550904833, "learning_rate": 2.5978913219789138e-06, "loss": 0.6874, "step": 30247 }, { "epoch": 0.8831274999270095, "grad_norm": 0.6394406338933171, "learning_rate": 2.5972424979724254e-06, "loss": 0.5635, "step": 30248 }, { "epoch": 0.8831566961548568, "grad_norm": 0.6706533848373052, "learning_rate": 2.596593673965937e-06, "loss": 0.6551, "step": 30249 }, { "epoch": 0.8831858923827042, "grad_norm": 0.6437304000013154, "learning_rate": 2.5959448499594486e-06, "loss": 0.5803, "step": 30250 }, { "epoch": 0.8832150886105515, "grad_norm": 0.5816170084627069, "learning_rate": 2.59529602595296e-06, "loss": 0.5164, "step": 30251 }, { "epoch": 0.8832442848383989, "grad_norm": 0.6370996351957913, "learning_rate": 2.5946472019464726e-06, "loss": 0.5993, "step": 30252 }, { "epoch": 0.8832734810662463, "grad_norm": 0.6391969227705164, "learning_rate": 2.5939983779399842e-06, "loss": 0.5688, "step": 30253 }, { "epoch": 0.8833026772940936, "grad_norm": 0.660320055486475, "learning_rate": 2.593349553933496e-06, "loss": 0.652, "step": 30254 }, { "epoch": 0.883331873521941, "grad_norm": 0.6938940551834961, "learning_rate": 2.5927007299270074e-06, "loss": 0.6815, "step": 30255 }, { "epoch": 0.8833610697497883, "grad_norm": 0.6311739398690526, "learning_rate": 2.592051905920519e-06, "loss": 0.5712, "step": 30256 }, { "epoch": 0.8833902659776357, "grad_norm": 0.6092810708649086, "learning_rate": 2.591403081914031e-06, "loss": 0.5412, "step": 30257 }, { "epoch": 0.8834194622054831, "grad_norm": 0.6522361709063081, "learning_rate": 2.5907542579075427e-06, "loss": 0.5999, "step": 30258 }, { "epoch": 0.8834486584333304, "grad_norm": 0.5930711180422354, "learning_rate": 2.5901054339010547e-06, "loss": 0.5309, "step": 30259 }, { "epoch": 0.8834778546611778, "grad_norm": 0.6628120200724086, "learning_rate": 2.5894566098945663e-06, "loss": 0.6455, "step": 30260 }, { "epoch": 0.8835070508890251, "grad_norm": 0.6754750555748305, "learning_rate": 2.588807785888078e-06, "loss": 0.6346, "step": 30261 }, { "epoch": 0.8835362471168725, "grad_norm": 0.6255992216085076, "learning_rate": 2.58815896188159e-06, "loss": 0.5736, "step": 30262 }, { "epoch": 0.8835654433447199, "grad_norm": 0.6579514910586225, "learning_rate": 2.5875101378751015e-06, "loss": 0.6247, "step": 30263 }, { "epoch": 0.8835946395725672, "grad_norm": 0.6620911307333337, "learning_rate": 2.586861313868613e-06, "loss": 0.6287, "step": 30264 }, { "epoch": 0.8836238358004146, "grad_norm": 0.6719389857058946, "learning_rate": 2.5862124898621247e-06, "loss": 0.6545, "step": 30265 }, { "epoch": 0.883653032028262, "grad_norm": 0.7188044837220401, "learning_rate": 2.585563665855637e-06, "loss": 0.6742, "step": 30266 }, { "epoch": 0.8836822282561093, "grad_norm": 0.7376903785608452, "learning_rate": 2.584914841849149e-06, "loss": 0.5857, "step": 30267 }, { "epoch": 0.8837114244839567, "grad_norm": 0.6432889562231393, "learning_rate": 2.5842660178426604e-06, "loss": 0.5641, "step": 30268 }, { "epoch": 0.883740620711804, "grad_norm": 0.7035446918219832, "learning_rate": 2.583617193836172e-06, "loss": 0.6655, "step": 30269 }, { "epoch": 0.8837698169396514, "grad_norm": 0.6419838560028808, "learning_rate": 2.5829683698296836e-06, "loss": 0.5947, "step": 30270 }, { "epoch": 0.8837990131674988, "grad_norm": 0.6315744448494685, "learning_rate": 2.582319545823196e-06, "loss": 0.5748, "step": 30271 }, { "epoch": 0.8838282093953461, "grad_norm": 0.6844046388274848, "learning_rate": 2.5816707218167077e-06, "loss": 0.6236, "step": 30272 }, { "epoch": 0.8838574056231935, "grad_norm": 0.6297859937084929, "learning_rate": 2.5810218978102193e-06, "loss": 0.5985, "step": 30273 }, { "epoch": 0.8838866018510408, "grad_norm": 0.6037755014697485, "learning_rate": 2.580373073803731e-06, "loss": 0.5437, "step": 30274 }, { "epoch": 0.8839157980788882, "grad_norm": 0.6515165993033402, "learning_rate": 2.5797242497972425e-06, "loss": 0.6418, "step": 30275 }, { "epoch": 0.8839449943067356, "grad_norm": 0.6114929555391634, "learning_rate": 2.5790754257907545e-06, "loss": 0.5584, "step": 30276 }, { "epoch": 0.8839741905345829, "grad_norm": 0.6435925617607082, "learning_rate": 2.578426601784266e-06, "loss": 0.6, "step": 30277 }, { "epoch": 0.8840033867624303, "grad_norm": 0.6384184141989208, "learning_rate": 2.577777777777778e-06, "loss": 0.5942, "step": 30278 }, { "epoch": 0.8840325829902776, "grad_norm": 0.669359564229197, "learning_rate": 2.5771289537712897e-06, "loss": 0.6754, "step": 30279 }, { "epoch": 0.884061779218125, "grad_norm": 0.6571591100783306, "learning_rate": 2.5764801297648018e-06, "loss": 0.5148, "step": 30280 }, { "epoch": 0.8840909754459724, "grad_norm": 0.6495365637786471, "learning_rate": 2.5758313057583134e-06, "loss": 0.6199, "step": 30281 }, { "epoch": 0.8841201716738197, "grad_norm": 0.6223447250242194, "learning_rate": 2.575182481751825e-06, "loss": 0.5413, "step": 30282 }, { "epoch": 0.8841493679016671, "grad_norm": 0.6431112732917028, "learning_rate": 2.5745336577453366e-06, "loss": 0.5173, "step": 30283 }, { "epoch": 0.8841785641295145, "grad_norm": 0.649480818002726, "learning_rate": 2.573884833738848e-06, "loss": 0.6296, "step": 30284 }, { "epoch": 0.8842077603573618, "grad_norm": 0.639765450755843, "learning_rate": 2.5732360097323606e-06, "loss": 0.5745, "step": 30285 }, { "epoch": 0.8842369565852092, "grad_norm": 0.6863472913972118, "learning_rate": 2.5725871857258722e-06, "loss": 0.6791, "step": 30286 }, { "epoch": 0.8842661528130565, "grad_norm": 0.6220743186939854, "learning_rate": 2.571938361719384e-06, "loss": 0.5946, "step": 30287 }, { "epoch": 0.8842953490409039, "grad_norm": 0.613080210918469, "learning_rate": 2.5712895377128954e-06, "loss": 0.5285, "step": 30288 }, { "epoch": 0.8843245452687513, "grad_norm": 0.5930759689583001, "learning_rate": 2.570640713706407e-06, "loss": 0.5121, "step": 30289 }, { "epoch": 0.8843537414965986, "grad_norm": 0.6388310163271211, "learning_rate": 2.5699918896999195e-06, "loss": 0.5799, "step": 30290 }, { "epoch": 0.884382937724446, "grad_norm": 0.6437089179544433, "learning_rate": 2.569343065693431e-06, "loss": 0.613, "step": 30291 }, { "epoch": 0.8844121339522933, "grad_norm": 0.6754041508563999, "learning_rate": 2.5686942416869427e-06, "loss": 0.6267, "step": 30292 }, { "epoch": 0.8844413301801407, "grad_norm": 0.6568722282717958, "learning_rate": 2.5680454176804543e-06, "loss": 0.6192, "step": 30293 }, { "epoch": 0.8844705264079881, "grad_norm": 0.6134631832767937, "learning_rate": 2.567396593673966e-06, "loss": 0.564, "step": 30294 }, { "epoch": 0.8844997226358354, "grad_norm": 0.6514782668205459, "learning_rate": 2.566747769667478e-06, "loss": 0.5687, "step": 30295 }, { "epoch": 0.8845289188636828, "grad_norm": 0.6101542370939338, "learning_rate": 2.5660989456609895e-06, "loss": 0.5492, "step": 30296 }, { "epoch": 0.8845581150915302, "grad_norm": 0.6181097072184081, "learning_rate": 2.5654501216545016e-06, "loss": 0.5766, "step": 30297 }, { "epoch": 0.8845873113193775, "grad_norm": 0.6187973597591343, "learning_rate": 2.564801297648013e-06, "loss": 0.5473, "step": 30298 }, { "epoch": 0.8846165075472249, "grad_norm": 0.6552637447448718, "learning_rate": 2.564152473641525e-06, "loss": 0.6138, "step": 30299 }, { "epoch": 0.8846457037750722, "grad_norm": 0.6209708834101221, "learning_rate": 2.563503649635037e-06, "loss": 0.5807, "step": 30300 }, { "epoch": 0.8846749000029196, "grad_norm": 0.6599784005130968, "learning_rate": 2.5628548256285484e-06, "loss": 0.5969, "step": 30301 }, { "epoch": 0.884704096230767, "grad_norm": 0.6812571178973655, "learning_rate": 2.56220600162206e-06, "loss": 0.6776, "step": 30302 }, { "epoch": 0.8847332924586143, "grad_norm": 0.6480519495931585, "learning_rate": 2.5615571776155716e-06, "loss": 0.5937, "step": 30303 }, { "epoch": 0.8847624886864617, "grad_norm": 0.7101979872402606, "learning_rate": 2.560908353609084e-06, "loss": 0.6454, "step": 30304 }, { "epoch": 0.884791684914309, "grad_norm": 0.6184587056795978, "learning_rate": 2.5602595296025957e-06, "loss": 0.5618, "step": 30305 }, { "epoch": 0.8848208811421564, "grad_norm": 0.6432462059520321, "learning_rate": 2.5596107055961073e-06, "loss": 0.5547, "step": 30306 }, { "epoch": 0.8848500773700038, "grad_norm": 0.7550236859299992, "learning_rate": 2.558961881589619e-06, "loss": 0.6429, "step": 30307 }, { "epoch": 0.8848792735978511, "grad_norm": 0.6248348132288827, "learning_rate": 2.5583130575831305e-06, "loss": 0.5925, "step": 30308 }, { "epoch": 0.8849084698256985, "grad_norm": 0.6816481930615599, "learning_rate": 2.557664233576643e-06, "loss": 0.6868, "step": 30309 }, { "epoch": 0.8849376660535458, "grad_norm": 0.6766851688107995, "learning_rate": 2.5570154095701545e-06, "loss": 0.5998, "step": 30310 }, { "epoch": 0.8849668622813932, "grad_norm": 0.6197036871320402, "learning_rate": 2.556366585563666e-06, "loss": 0.5532, "step": 30311 }, { "epoch": 0.8849960585092406, "grad_norm": 0.6457260936748246, "learning_rate": 2.5557177615571777e-06, "loss": 0.6245, "step": 30312 }, { "epoch": 0.8850252547370879, "grad_norm": 0.6238192586905098, "learning_rate": 2.5550689375506898e-06, "loss": 0.5408, "step": 30313 }, { "epoch": 0.8850544509649353, "grad_norm": 0.6404322673567773, "learning_rate": 2.5544201135442014e-06, "loss": 0.5902, "step": 30314 }, { "epoch": 0.8850836471927827, "grad_norm": 0.6722018267599541, "learning_rate": 2.553771289537713e-06, "loss": 0.6541, "step": 30315 }, { "epoch": 0.88511284342063, "grad_norm": 0.6251223877877832, "learning_rate": 2.5531224655312246e-06, "loss": 0.5837, "step": 30316 }, { "epoch": 0.8851420396484774, "grad_norm": 0.6617616773409074, "learning_rate": 2.5524736415247366e-06, "loss": 0.5889, "step": 30317 }, { "epoch": 0.8851712358763247, "grad_norm": 0.7522012875001595, "learning_rate": 2.5518248175182486e-06, "loss": 0.5467, "step": 30318 }, { "epoch": 0.8852004321041721, "grad_norm": 0.6657960255773334, "learning_rate": 2.5511759935117602e-06, "loss": 0.615, "step": 30319 }, { "epoch": 0.8852296283320195, "grad_norm": 0.6667360735039942, "learning_rate": 2.550527169505272e-06, "loss": 0.6522, "step": 30320 }, { "epoch": 0.8852588245598668, "grad_norm": 0.6275571025390863, "learning_rate": 2.5498783454987834e-06, "loss": 0.5835, "step": 30321 }, { "epoch": 0.8852880207877142, "grad_norm": 0.6650403172853944, "learning_rate": 2.549229521492295e-06, "loss": 0.6658, "step": 30322 }, { "epoch": 0.8853172170155615, "grad_norm": 0.6007875709487732, "learning_rate": 2.5485806974858075e-06, "loss": 0.5493, "step": 30323 }, { "epoch": 0.8853464132434089, "grad_norm": 0.6075572771573069, "learning_rate": 2.547931873479319e-06, "loss": 0.5471, "step": 30324 }, { "epoch": 0.8853756094712563, "grad_norm": 0.6155237529218262, "learning_rate": 2.5472830494728307e-06, "loss": 0.5795, "step": 30325 }, { "epoch": 0.8854048056991036, "grad_norm": 0.6348433507939153, "learning_rate": 2.5466342254663423e-06, "loss": 0.5703, "step": 30326 }, { "epoch": 0.885434001926951, "grad_norm": 0.6554339296931192, "learning_rate": 2.545985401459854e-06, "loss": 0.6118, "step": 30327 }, { "epoch": 0.8854631981547983, "grad_norm": 0.6716279503446628, "learning_rate": 2.5453365774533664e-06, "loss": 0.4985, "step": 30328 }, { "epoch": 0.8854923943826457, "grad_norm": 0.6316026221395109, "learning_rate": 2.544687753446878e-06, "loss": 0.5548, "step": 30329 }, { "epoch": 0.8855215906104931, "grad_norm": 0.6002963771319235, "learning_rate": 2.5440389294403896e-06, "loss": 0.5134, "step": 30330 }, { "epoch": 0.8855507868383404, "grad_norm": 0.639379681501127, "learning_rate": 2.543390105433901e-06, "loss": 0.5922, "step": 30331 }, { "epoch": 0.8855799830661879, "grad_norm": 0.613901015838734, "learning_rate": 2.542741281427413e-06, "loss": 0.5231, "step": 30332 }, { "epoch": 0.8856091792940353, "grad_norm": 0.6466362860284974, "learning_rate": 2.542092457420925e-06, "loss": 0.6313, "step": 30333 }, { "epoch": 0.8856383755218826, "grad_norm": 0.7149596290947561, "learning_rate": 2.5414436334144364e-06, "loss": 0.6794, "step": 30334 }, { "epoch": 0.88566757174973, "grad_norm": 0.6871397290026261, "learning_rate": 2.540794809407948e-06, "loss": 0.6433, "step": 30335 }, { "epoch": 0.8856967679775773, "grad_norm": 0.6134864084641288, "learning_rate": 2.54014598540146e-06, "loss": 0.5282, "step": 30336 }, { "epoch": 0.8857259642054247, "grad_norm": 0.6753020312071857, "learning_rate": 2.539497161394972e-06, "loss": 0.6673, "step": 30337 }, { "epoch": 0.8857551604332721, "grad_norm": 0.673072676581132, "learning_rate": 2.5388483373884837e-06, "loss": 0.5926, "step": 30338 }, { "epoch": 0.8857843566611194, "grad_norm": 0.6044771282943745, "learning_rate": 2.5381995133819953e-06, "loss": 0.5427, "step": 30339 }, { "epoch": 0.8858135528889668, "grad_norm": 0.6903545985459437, "learning_rate": 2.537550689375507e-06, "loss": 0.6444, "step": 30340 }, { "epoch": 0.8858427491168142, "grad_norm": 0.6381991056113983, "learning_rate": 2.5369018653690185e-06, "loss": 0.585, "step": 30341 }, { "epoch": 0.8858719453446615, "grad_norm": 0.6250066683424346, "learning_rate": 2.536253041362531e-06, "loss": 0.5343, "step": 30342 }, { "epoch": 0.8859011415725089, "grad_norm": 0.6674547886804878, "learning_rate": 2.5356042173560425e-06, "loss": 0.6269, "step": 30343 }, { "epoch": 0.8859303378003562, "grad_norm": 0.6362715122273114, "learning_rate": 2.534955393349554e-06, "loss": 0.6163, "step": 30344 }, { "epoch": 0.8859595340282036, "grad_norm": 0.6392373382264321, "learning_rate": 2.5343065693430657e-06, "loss": 0.5903, "step": 30345 }, { "epoch": 0.885988730256051, "grad_norm": 0.6268639231269003, "learning_rate": 2.5336577453365778e-06, "loss": 0.5862, "step": 30346 }, { "epoch": 0.8860179264838983, "grad_norm": 0.6507361575748615, "learning_rate": 2.5330089213300894e-06, "loss": 0.5903, "step": 30347 }, { "epoch": 0.8860471227117457, "grad_norm": 0.651901331015022, "learning_rate": 2.5323600973236014e-06, "loss": 0.632, "step": 30348 }, { "epoch": 0.886076318939593, "grad_norm": 0.6501463545430941, "learning_rate": 2.531711273317113e-06, "loss": 0.6273, "step": 30349 }, { "epoch": 0.8861055151674404, "grad_norm": 0.6832354845255444, "learning_rate": 2.5310624493106246e-06, "loss": 0.6707, "step": 30350 }, { "epoch": 0.8861347113952878, "grad_norm": 0.6326727921458234, "learning_rate": 2.5304136253041366e-06, "loss": 0.5601, "step": 30351 }, { "epoch": 0.8861639076231351, "grad_norm": 0.6471904725045958, "learning_rate": 2.5297648012976482e-06, "loss": 0.6212, "step": 30352 }, { "epoch": 0.8861931038509825, "grad_norm": 0.6495525927809441, "learning_rate": 2.52911597729116e-06, "loss": 0.613, "step": 30353 }, { "epoch": 0.8862223000788298, "grad_norm": 0.6604843688082397, "learning_rate": 2.5284671532846714e-06, "loss": 0.627, "step": 30354 }, { "epoch": 0.8862514963066772, "grad_norm": 0.6585196388963633, "learning_rate": 2.5278183292781835e-06, "loss": 0.5962, "step": 30355 }, { "epoch": 0.8862806925345246, "grad_norm": 0.6184282955754367, "learning_rate": 2.5271695052716955e-06, "loss": 0.5207, "step": 30356 }, { "epoch": 0.8863098887623719, "grad_norm": 0.6090767020303433, "learning_rate": 2.526520681265207e-06, "loss": 0.5451, "step": 30357 }, { "epoch": 0.8863390849902193, "grad_norm": 0.6949796045472415, "learning_rate": 2.5258718572587187e-06, "loss": 0.569, "step": 30358 }, { "epoch": 0.8863682812180667, "grad_norm": 0.6350068827752364, "learning_rate": 2.5252230332522303e-06, "loss": 0.5488, "step": 30359 }, { "epoch": 0.886397477445914, "grad_norm": 0.6556140926456431, "learning_rate": 2.524574209245742e-06, "loss": 0.6386, "step": 30360 }, { "epoch": 0.8864266736737614, "grad_norm": 0.6280543628980476, "learning_rate": 2.5239253852392544e-06, "loss": 0.5969, "step": 30361 }, { "epoch": 0.8864558699016087, "grad_norm": 0.6668649819438272, "learning_rate": 2.523276561232766e-06, "loss": 0.6136, "step": 30362 }, { "epoch": 0.8864850661294561, "grad_norm": 0.6595860248413674, "learning_rate": 2.5226277372262776e-06, "loss": 0.6149, "step": 30363 }, { "epoch": 0.8865142623573035, "grad_norm": 0.6177045010131281, "learning_rate": 2.521978913219789e-06, "loss": 0.5355, "step": 30364 }, { "epoch": 0.8865434585851508, "grad_norm": 0.6290679969343873, "learning_rate": 2.521330089213301e-06, "loss": 0.556, "step": 30365 }, { "epoch": 0.8865726548129982, "grad_norm": 0.6567498505998377, "learning_rate": 2.520681265206813e-06, "loss": 0.6175, "step": 30366 }, { "epoch": 0.8866018510408455, "grad_norm": 0.7722600775931558, "learning_rate": 2.520032441200325e-06, "loss": 0.7948, "step": 30367 }, { "epoch": 0.8866310472686929, "grad_norm": 0.6397026321726543, "learning_rate": 2.5193836171938364e-06, "loss": 0.6402, "step": 30368 }, { "epoch": 0.8866602434965403, "grad_norm": 0.6631296925563746, "learning_rate": 2.518734793187348e-06, "loss": 0.5747, "step": 30369 }, { "epoch": 0.8866894397243876, "grad_norm": 0.6477565914820097, "learning_rate": 2.51808596918086e-06, "loss": 0.6526, "step": 30370 }, { "epoch": 0.886718635952235, "grad_norm": 0.6199205569821733, "learning_rate": 2.5174371451743717e-06, "loss": 0.5648, "step": 30371 }, { "epoch": 0.8867478321800824, "grad_norm": 0.6500701597521179, "learning_rate": 2.5167883211678833e-06, "loss": 0.6273, "step": 30372 }, { "epoch": 0.8867770284079297, "grad_norm": 0.6800117331699054, "learning_rate": 2.516139497161395e-06, "loss": 0.6793, "step": 30373 }, { "epoch": 0.8868062246357771, "grad_norm": 0.6174544239222849, "learning_rate": 2.5154906731549065e-06, "loss": 0.5421, "step": 30374 }, { "epoch": 0.8868354208636244, "grad_norm": 0.6805150310856464, "learning_rate": 2.514841849148419e-06, "loss": 0.6492, "step": 30375 }, { "epoch": 0.8868646170914718, "grad_norm": 0.6459868138684709, "learning_rate": 2.5141930251419305e-06, "loss": 0.5953, "step": 30376 }, { "epoch": 0.8868938133193192, "grad_norm": 0.7068034427997436, "learning_rate": 2.513544201135442e-06, "loss": 0.6741, "step": 30377 }, { "epoch": 0.8869230095471665, "grad_norm": 0.6307815533563803, "learning_rate": 2.5128953771289537e-06, "loss": 0.564, "step": 30378 }, { "epoch": 0.8869522057750139, "grad_norm": 0.6382891046293421, "learning_rate": 2.512246553122466e-06, "loss": 0.5558, "step": 30379 }, { "epoch": 0.8869814020028612, "grad_norm": 0.6418560883377155, "learning_rate": 2.511597729115978e-06, "loss": 0.5774, "step": 30380 }, { "epoch": 0.8870105982307086, "grad_norm": 0.6061884514872187, "learning_rate": 2.5109489051094894e-06, "loss": 0.5523, "step": 30381 }, { "epoch": 0.887039794458556, "grad_norm": 0.6390073240280898, "learning_rate": 2.510300081103001e-06, "loss": 0.6044, "step": 30382 }, { "epoch": 0.8870689906864033, "grad_norm": 0.6485957786912164, "learning_rate": 2.5096512570965126e-06, "loss": 0.5999, "step": 30383 }, { "epoch": 0.8870981869142507, "grad_norm": 0.6528080152505302, "learning_rate": 2.5090024330900246e-06, "loss": 0.5512, "step": 30384 }, { "epoch": 0.887127383142098, "grad_norm": 0.6675997359158055, "learning_rate": 2.5083536090835362e-06, "loss": 0.6245, "step": 30385 }, { "epoch": 0.8871565793699454, "grad_norm": 0.6546108509276964, "learning_rate": 2.5077047850770483e-06, "loss": 0.5712, "step": 30386 }, { "epoch": 0.8871857755977928, "grad_norm": 0.6665060291184525, "learning_rate": 2.50705596107056e-06, "loss": 0.602, "step": 30387 }, { "epoch": 0.8872149718256401, "grad_norm": 0.6618829247273939, "learning_rate": 2.5064071370640715e-06, "loss": 0.6244, "step": 30388 }, { "epoch": 0.8872441680534875, "grad_norm": 0.5965279469410455, "learning_rate": 2.5057583130575835e-06, "loss": 0.5076, "step": 30389 }, { "epoch": 0.8872733642813349, "grad_norm": 0.691910149652211, "learning_rate": 2.505109489051095e-06, "loss": 0.7036, "step": 30390 }, { "epoch": 0.8873025605091822, "grad_norm": 0.669489516904968, "learning_rate": 2.5044606650446067e-06, "loss": 0.6327, "step": 30391 }, { "epoch": 0.8873317567370296, "grad_norm": 0.6190207137004036, "learning_rate": 2.5038118410381183e-06, "loss": 0.5841, "step": 30392 }, { "epoch": 0.8873609529648769, "grad_norm": 0.6552292799684822, "learning_rate": 2.50316301703163e-06, "loss": 0.6111, "step": 30393 }, { "epoch": 0.8873901491927243, "grad_norm": 0.6405132906929083, "learning_rate": 2.5025141930251424e-06, "loss": 0.6526, "step": 30394 }, { "epoch": 0.8874193454205717, "grad_norm": 0.6507702914367663, "learning_rate": 2.501865369018654e-06, "loss": 0.5579, "step": 30395 }, { "epoch": 0.887448541648419, "grad_norm": 0.7148026705119963, "learning_rate": 2.5012165450121656e-06, "loss": 0.5972, "step": 30396 }, { "epoch": 0.8874777378762664, "grad_norm": 0.5863226982347978, "learning_rate": 2.500567721005677e-06, "loss": 0.5113, "step": 30397 }, { "epoch": 0.8875069341041137, "grad_norm": 0.6351138239987996, "learning_rate": 2.499918896999189e-06, "loss": 0.5695, "step": 30398 }, { "epoch": 0.8875361303319611, "grad_norm": 0.6764923273188322, "learning_rate": 2.499270072992701e-06, "loss": 0.6396, "step": 30399 }, { "epoch": 0.8875653265598085, "grad_norm": 0.6373918347272703, "learning_rate": 2.498621248986213e-06, "loss": 0.6015, "step": 30400 }, { "epoch": 0.8875945227876558, "grad_norm": 0.6287923065339329, "learning_rate": 2.4979724249797244e-06, "loss": 0.5588, "step": 30401 }, { "epoch": 0.8876237190155032, "grad_norm": 0.6653537881890068, "learning_rate": 2.4973236009732365e-06, "loss": 0.5742, "step": 30402 }, { "epoch": 0.8876529152433505, "grad_norm": 0.585063021027628, "learning_rate": 2.496674776966748e-06, "loss": 0.4995, "step": 30403 }, { "epoch": 0.8876821114711979, "grad_norm": 0.6374554297633925, "learning_rate": 2.4960259529602597e-06, "loss": 0.6009, "step": 30404 }, { "epoch": 0.8877113076990453, "grad_norm": 0.6176854036499116, "learning_rate": 2.4953771289537713e-06, "loss": 0.5693, "step": 30405 }, { "epoch": 0.8877405039268926, "grad_norm": 0.6352560089988105, "learning_rate": 2.4947283049472833e-06, "loss": 0.5904, "step": 30406 }, { "epoch": 0.88776970015474, "grad_norm": 0.6560848407298175, "learning_rate": 2.494079480940795e-06, "loss": 0.6127, "step": 30407 }, { "epoch": 0.8877988963825874, "grad_norm": 0.6476205360562652, "learning_rate": 2.4934306569343065e-06, "loss": 0.6166, "step": 30408 }, { "epoch": 0.8878280926104347, "grad_norm": 0.6229635978227742, "learning_rate": 2.4927818329278185e-06, "loss": 0.5349, "step": 30409 }, { "epoch": 0.8878572888382821, "grad_norm": 0.7110632010857539, "learning_rate": 2.49213300892133e-06, "loss": 0.6382, "step": 30410 }, { "epoch": 0.8878864850661294, "grad_norm": 0.560668315322065, "learning_rate": 2.491484184914842e-06, "loss": 0.4869, "step": 30411 }, { "epoch": 0.8879156812939768, "grad_norm": 0.6440586808678924, "learning_rate": 2.4908353609083538e-06, "loss": 0.56, "step": 30412 }, { "epoch": 0.8879448775218242, "grad_norm": 0.6254537348504202, "learning_rate": 2.4901865369018654e-06, "loss": 0.5723, "step": 30413 }, { "epoch": 0.8879740737496715, "grad_norm": 0.6174556796589087, "learning_rate": 2.4895377128953774e-06, "loss": 0.5285, "step": 30414 }, { "epoch": 0.8880032699775189, "grad_norm": 0.6594133789075386, "learning_rate": 2.488888888888889e-06, "loss": 0.6369, "step": 30415 }, { "epoch": 0.8880324662053662, "grad_norm": 0.6271257617424121, "learning_rate": 2.488240064882401e-06, "loss": 0.5518, "step": 30416 }, { "epoch": 0.8880616624332136, "grad_norm": 0.6733473807982129, "learning_rate": 2.4875912408759126e-06, "loss": 0.669, "step": 30417 }, { "epoch": 0.888090858661061, "grad_norm": 0.6513794637208814, "learning_rate": 2.4869424168694247e-06, "loss": 0.5948, "step": 30418 }, { "epoch": 0.8881200548889083, "grad_norm": 0.6367155764704402, "learning_rate": 2.4862935928629363e-06, "loss": 0.5879, "step": 30419 }, { "epoch": 0.8881492511167557, "grad_norm": 0.6332754751526638, "learning_rate": 2.485644768856448e-06, "loss": 0.5741, "step": 30420 }, { "epoch": 0.888178447344603, "grad_norm": 0.6476652314570595, "learning_rate": 2.48499594484996e-06, "loss": 0.6006, "step": 30421 }, { "epoch": 0.8882076435724504, "grad_norm": 0.614271488584354, "learning_rate": 2.4843471208434715e-06, "loss": 0.5023, "step": 30422 }, { "epoch": 0.8882368398002978, "grad_norm": 0.6658689545307894, "learning_rate": 2.483698296836983e-06, "loss": 0.6367, "step": 30423 }, { "epoch": 0.8882660360281451, "grad_norm": 0.6331500643449325, "learning_rate": 2.4830494728304947e-06, "loss": 0.5406, "step": 30424 }, { "epoch": 0.8882952322559925, "grad_norm": 0.6358184523971462, "learning_rate": 2.4824006488240067e-06, "loss": 0.5719, "step": 30425 }, { "epoch": 0.8883244284838399, "grad_norm": 0.6314707545172484, "learning_rate": 2.4817518248175183e-06, "loss": 0.5436, "step": 30426 }, { "epoch": 0.8883536247116872, "grad_norm": 0.615698414392606, "learning_rate": 2.48110300081103e-06, "loss": 0.5638, "step": 30427 }, { "epoch": 0.8883828209395346, "grad_norm": 0.663703189867318, "learning_rate": 2.480454176804542e-06, "loss": 0.6509, "step": 30428 }, { "epoch": 0.8884120171673819, "grad_norm": 0.6788959016558398, "learning_rate": 2.4798053527980536e-06, "loss": 0.6164, "step": 30429 }, { "epoch": 0.8884412133952293, "grad_norm": 0.6514518222930573, "learning_rate": 2.4791565287915656e-06, "loss": 0.5887, "step": 30430 }, { "epoch": 0.8884704096230767, "grad_norm": 0.6480303178394019, "learning_rate": 2.478507704785077e-06, "loss": 0.6026, "step": 30431 }, { "epoch": 0.888499605850924, "grad_norm": 0.6055571698853357, "learning_rate": 2.477858880778589e-06, "loss": 0.5377, "step": 30432 }, { "epoch": 0.8885288020787714, "grad_norm": 0.6930846339111791, "learning_rate": 2.477210056772101e-06, "loss": 0.6783, "step": 30433 }, { "epoch": 0.8885579983066187, "grad_norm": 0.6526963971743027, "learning_rate": 2.4765612327656124e-06, "loss": 0.5738, "step": 30434 }, { "epoch": 0.8885871945344661, "grad_norm": 0.6343338461284795, "learning_rate": 2.4759124087591245e-06, "loss": 0.5287, "step": 30435 }, { "epoch": 0.8886163907623135, "grad_norm": 0.6648372592682079, "learning_rate": 2.475263584752636e-06, "loss": 0.6663, "step": 30436 }, { "epoch": 0.8886455869901608, "grad_norm": 0.6500060941160845, "learning_rate": 2.474614760746148e-06, "loss": 0.6074, "step": 30437 }, { "epoch": 0.8886747832180082, "grad_norm": 0.623110137882688, "learning_rate": 2.4739659367396597e-06, "loss": 0.588, "step": 30438 }, { "epoch": 0.8887039794458556, "grad_norm": 0.7002546376952058, "learning_rate": 2.4733171127331713e-06, "loss": 0.6988, "step": 30439 }, { "epoch": 0.8887331756737029, "grad_norm": 0.6927193129433363, "learning_rate": 2.4726682887266833e-06, "loss": 0.7224, "step": 30440 }, { "epoch": 0.8887623719015503, "grad_norm": 0.6549073674092143, "learning_rate": 2.472019464720195e-06, "loss": 0.5966, "step": 30441 }, { "epoch": 0.8887915681293976, "grad_norm": 0.6548225932353421, "learning_rate": 2.4713706407137065e-06, "loss": 0.5582, "step": 30442 }, { "epoch": 0.888820764357245, "grad_norm": 0.6795994040727906, "learning_rate": 2.470721816707218e-06, "loss": 0.6453, "step": 30443 }, { "epoch": 0.8888499605850924, "grad_norm": 0.6463075552523838, "learning_rate": 2.47007299270073e-06, "loss": 0.5895, "step": 30444 }, { "epoch": 0.8888791568129397, "grad_norm": 0.6515402283987587, "learning_rate": 2.4694241686942418e-06, "loss": 0.5602, "step": 30445 }, { "epoch": 0.8889083530407871, "grad_norm": 0.6379156222970965, "learning_rate": 2.4687753446877534e-06, "loss": 0.5988, "step": 30446 }, { "epoch": 0.8889375492686344, "grad_norm": 0.6017751868932346, "learning_rate": 2.4681265206812654e-06, "loss": 0.508, "step": 30447 }, { "epoch": 0.8889667454964818, "grad_norm": 0.6202654659884359, "learning_rate": 2.467477696674777e-06, "loss": 0.565, "step": 30448 }, { "epoch": 0.8889959417243292, "grad_norm": 0.6888801058295907, "learning_rate": 2.466828872668289e-06, "loss": 0.675, "step": 30449 }, { "epoch": 0.8890251379521765, "grad_norm": 0.5789650617252426, "learning_rate": 2.4661800486618006e-06, "loss": 0.4969, "step": 30450 }, { "epoch": 0.8890543341800239, "grad_norm": 0.6883937174224987, "learning_rate": 2.4655312246553127e-06, "loss": 0.6365, "step": 30451 }, { "epoch": 0.8890835304078712, "grad_norm": 0.6457031131687437, "learning_rate": 2.4648824006488243e-06, "loss": 0.5918, "step": 30452 }, { "epoch": 0.8891127266357187, "grad_norm": 0.6309564085584881, "learning_rate": 2.464233576642336e-06, "loss": 0.6327, "step": 30453 }, { "epoch": 0.8891419228635661, "grad_norm": 0.6353262935742984, "learning_rate": 2.463584752635848e-06, "loss": 0.5981, "step": 30454 }, { "epoch": 0.8891711190914134, "grad_norm": 0.6199892571550122, "learning_rate": 2.4629359286293595e-06, "loss": 0.5661, "step": 30455 }, { "epoch": 0.8892003153192608, "grad_norm": 0.6712553816843821, "learning_rate": 2.4622871046228715e-06, "loss": 0.6393, "step": 30456 }, { "epoch": 0.8892295115471082, "grad_norm": 0.6927452816362927, "learning_rate": 2.461638280616383e-06, "loss": 0.705, "step": 30457 }, { "epoch": 0.8892587077749555, "grad_norm": 0.6736229013027466, "learning_rate": 2.4609894566098947e-06, "loss": 0.6278, "step": 30458 }, { "epoch": 0.8892879040028029, "grad_norm": 0.6903355591723224, "learning_rate": 2.4603406326034063e-06, "loss": 0.6384, "step": 30459 }, { "epoch": 0.8893171002306502, "grad_norm": 0.6312497829404001, "learning_rate": 2.4596918085969184e-06, "loss": 0.6018, "step": 30460 }, { "epoch": 0.8893462964584976, "grad_norm": 0.6719934012485945, "learning_rate": 2.45904298459043e-06, "loss": 0.6187, "step": 30461 }, { "epoch": 0.889375492686345, "grad_norm": 0.6749239905450926, "learning_rate": 2.4583941605839416e-06, "loss": 0.5962, "step": 30462 }, { "epoch": 0.8894046889141923, "grad_norm": 0.7007992672027749, "learning_rate": 2.4577453365774536e-06, "loss": 0.6846, "step": 30463 }, { "epoch": 0.8894338851420397, "grad_norm": 0.6477241348882434, "learning_rate": 2.4570965125709652e-06, "loss": 0.5532, "step": 30464 }, { "epoch": 0.889463081369887, "grad_norm": 0.6620784607354822, "learning_rate": 2.456447688564477e-06, "loss": 0.5849, "step": 30465 }, { "epoch": 0.8894922775977344, "grad_norm": 0.6315959913043011, "learning_rate": 2.455798864557989e-06, "loss": 0.5686, "step": 30466 }, { "epoch": 0.8895214738255818, "grad_norm": 0.6889727960265221, "learning_rate": 2.4551500405515004e-06, "loss": 0.664, "step": 30467 }, { "epoch": 0.8895506700534291, "grad_norm": 0.6380247512599154, "learning_rate": 2.4545012165450125e-06, "loss": 0.5914, "step": 30468 }, { "epoch": 0.8895798662812765, "grad_norm": 0.738548390886971, "learning_rate": 2.453852392538524e-06, "loss": 0.6538, "step": 30469 }, { "epoch": 0.8896090625091239, "grad_norm": 0.6217711800894441, "learning_rate": 2.453203568532036e-06, "loss": 0.5311, "step": 30470 }, { "epoch": 0.8896382587369712, "grad_norm": 0.6776736743015559, "learning_rate": 2.4525547445255477e-06, "loss": 0.6497, "step": 30471 }, { "epoch": 0.8896674549648186, "grad_norm": 0.6534012571948667, "learning_rate": 2.4519059205190593e-06, "loss": 0.5669, "step": 30472 }, { "epoch": 0.8896966511926659, "grad_norm": 0.6145198611939462, "learning_rate": 2.4512570965125713e-06, "loss": 0.5597, "step": 30473 }, { "epoch": 0.8897258474205133, "grad_norm": 0.6446882024207398, "learning_rate": 2.450608272506083e-06, "loss": 0.6145, "step": 30474 }, { "epoch": 0.8897550436483607, "grad_norm": 0.5955597833030104, "learning_rate": 2.449959448499595e-06, "loss": 0.5292, "step": 30475 }, { "epoch": 0.889784239876208, "grad_norm": 0.6324355243259954, "learning_rate": 2.4493106244931066e-06, "loss": 0.5622, "step": 30476 }, { "epoch": 0.8898134361040554, "grad_norm": 0.6676440299237988, "learning_rate": 2.448661800486618e-06, "loss": 0.5871, "step": 30477 }, { "epoch": 0.8898426323319027, "grad_norm": 0.619809288602995, "learning_rate": 2.4480129764801298e-06, "loss": 0.5629, "step": 30478 }, { "epoch": 0.8898718285597501, "grad_norm": 0.6658677964663167, "learning_rate": 2.447364152473642e-06, "loss": 0.6244, "step": 30479 }, { "epoch": 0.8899010247875975, "grad_norm": 0.641985728015002, "learning_rate": 2.4467153284671534e-06, "loss": 0.5739, "step": 30480 }, { "epoch": 0.8899302210154448, "grad_norm": 0.6519956564975538, "learning_rate": 2.446066504460665e-06, "loss": 0.6231, "step": 30481 }, { "epoch": 0.8899594172432922, "grad_norm": 0.6155072089921071, "learning_rate": 2.445417680454177e-06, "loss": 0.5639, "step": 30482 }, { "epoch": 0.8899886134711396, "grad_norm": 0.6460120902424514, "learning_rate": 2.4447688564476886e-06, "loss": 0.6045, "step": 30483 }, { "epoch": 0.8900178096989869, "grad_norm": 0.6285513285220374, "learning_rate": 2.4441200324412007e-06, "loss": 0.5961, "step": 30484 }, { "epoch": 0.8900470059268343, "grad_norm": 0.6546771665148486, "learning_rate": 2.4434712084347123e-06, "loss": 0.6206, "step": 30485 }, { "epoch": 0.8900762021546816, "grad_norm": 0.6263052295754665, "learning_rate": 2.442822384428224e-06, "loss": 0.5312, "step": 30486 }, { "epoch": 0.890105398382529, "grad_norm": 0.6391669448735099, "learning_rate": 2.442173560421736e-06, "loss": 0.6082, "step": 30487 }, { "epoch": 0.8901345946103764, "grad_norm": 0.63333815574216, "learning_rate": 2.4415247364152475e-06, "loss": 0.5572, "step": 30488 }, { "epoch": 0.8901637908382237, "grad_norm": 0.6447739974569179, "learning_rate": 2.4408759124087595e-06, "loss": 0.6249, "step": 30489 }, { "epoch": 0.8901929870660711, "grad_norm": 0.635319333734909, "learning_rate": 2.440227088402271e-06, "loss": 0.5817, "step": 30490 }, { "epoch": 0.8902221832939184, "grad_norm": 0.6223310611155464, "learning_rate": 2.4395782643957827e-06, "loss": 0.5308, "step": 30491 }, { "epoch": 0.8902513795217658, "grad_norm": 0.640382947115473, "learning_rate": 2.4389294403892948e-06, "loss": 0.6223, "step": 30492 }, { "epoch": 0.8902805757496132, "grad_norm": 0.6879948769077155, "learning_rate": 2.4382806163828064e-06, "loss": 0.6469, "step": 30493 }, { "epoch": 0.8903097719774605, "grad_norm": 0.7155768827224273, "learning_rate": 2.437631792376318e-06, "loss": 0.6561, "step": 30494 }, { "epoch": 0.8903389682053079, "grad_norm": 0.6015783012515652, "learning_rate": 2.43698296836983e-06, "loss": 0.5004, "step": 30495 }, { "epoch": 0.8903681644331553, "grad_norm": 0.74883385149902, "learning_rate": 2.4363341443633416e-06, "loss": 0.694, "step": 30496 }, { "epoch": 0.8903973606610026, "grad_norm": 0.6592723992120985, "learning_rate": 2.4356853203568532e-06, "loss": 0.5711, "step": 30497 }, { "epoch": 0.89042655688885, "grad_norm": 0.6616521440234302, "learning_rate": 2.4350364963503652e-06, "loss": 0.5884, "step": 30498 }, { "epoch": 0.8904557531166973, "grad_norm": 0.6930010227657079, "learning_rate": 2.434387672343877e-06, "loss": 0.6623, "step": 30499 }, { "epoch": 0.8904849493445447, "grad_norm": 0.6806451905809802, "learning_rate": 2.4337388483373885e-06, "loss": 0.6543, "step": 30500 }, { "epoch": 0.8905141455723921, "grad_norm": 0.627844723682933, "learning_rate": 2.4330900243309005e-06, "loss": 0.5787, "step": 30501 }, { "epoch": 0.8905433418002394, "grad_norm": 0.6709110259878223, "learning_rate": 2.432441200324412e-06, "loss": 0.5723, "step": 30502 }, { "epoch": 0.8905725380280868, "grad_norm": 0.6118884694590498, "learning_rate": 2.431792376317924e-06, "loss": 0.5064, "step": 30503 }, { "epoch": 0.8906017342559341, "grad_norm": 0.6290781747538735, "learning_rate": 2.4311435523114357e-06, "loss": 0.5594, "step": 30504 }, { "epoch": 0.8906309304837815, "grad_norm": 0.6395937808243238, "learning_rate": 2.4304947283049473e-06, "loss": 0.5838, "step": 30505 }, { "epoch": 0.8906601267116289, "grad_norm": 0.6138041737126767, "learning_rate": 2.4298459042984593e-06, "loss": 0.5456, "step": 30506 }, { "epoch": 0.8906893229394762, "grad_norm": 0.6503396016674716, "learning_rate": 2.429197080291971e-06, "loss": 0.5599, "step": 30507 }, { "epoch": 0.8907185191673236, "grad_norm": 0.6279169487061004, "learning_rate": 2.428548256285483e-06, "loss": 0.5738, "step": 30508 }, { "epoch": 0.890747715395171, "grad_norm": 0.6448169326464259, "learning_rate": 2.4278994322789946e-06, "loss": 0.6027, "step": 30509 }, { "epoch": 0.8907769116230183, "grad_norm": 0.6317045309397925, "learning_rate": 2.4272506082725066e-06, "loss": 0.5675, "step": 30510 }, { "epoch": 0.8908061078508657, "grad_norm": 0.703712690242452, "learning_rate": 2.426601784266018e-06, "loss": 0.6479, "step": 30511 }, { "epoch": 0.890835304078713, "grad_norm": 0.6491789976311133, "learning_rate": 2.42595296025953e-06, "loss": 0.6099, "step": 30512 }, { "epoch": 0.8908645003065604, "grad_norm": 0.59021940481654, "learning_rate": 2.4253041362530414e-06, "loss": 0.5178, "step": 30513 }, { "epoch": 0.8908936965344078, "grad_norm": 0.6171386633717725, "learning_rate": 2.4246553122465534e-06, "loss": 0.5684, "step": 30514 }, { "epoch": 0.8909228927622551, "grad_norm": 0.6408236579072268, "learning_rate": 2.424006488240065e-06, "loss": 0.5743, "step": 30515 }, { "epoch": 0.8909520889901025, "grad_norm": 0.6767874140830782, "learning_rate": 2.4233576642335767e-06, "loss": 0.6074, "step": 30516 }, { "epoch": 0.8909812852179498, "grad_norm": 0.652252683557003, "learning_rate": 2.4227088402270883e-06, "loss": 0.5946, "step": 30517 }, { "epoch": 0.8910104814457972, "grad_norm": 0.6285777009444089, "learning_rate": 2.4220600162206003e-06, "loss": 0.5187, "step": 30518 }, { "epoch": 0.8910396776736446, "grad_norm": 0.6271591392226282, "learning_rate": 2.421411192214112e-06, "loss": 0.5984, "step": 30519 }, { "epoch": 0.8910688739014919, "grad_norm": 0.6654815693099065, "learning_rate": 2.420762368207624e-06, "loss": 0.6058, "step": 30520 }, { "epoch": 0.8910980701293393, "grad_norm": 0.6734057717557727, "learning_rate": 2.4201135442011355e-06, "loss": 0.5736, "step": 30521 }, { "epoch": 0.8911272663571866, "grad_norm": 0.6364371131532793, "learning_rate": 2.4194647201946475e-06, "loss": 0.5986, "step": 30522 }, { "epoch": 0.891156462585034, "grad_norm": 0.6705209854256248, "learning_rate": 2.418815896188159e-06, "loss": 0.6036, "step": 30523 }, { "epoch": 0.8911856588128814, "grad_norm": 0.6929236821827884, "learning_rate": 2.4181670721816708e-06, "loss": 0.7181, "step": 30524 }, { "epoch": 0.8912148550407287, "grad_norm": 0.6911182061537445, "learning_rate": 2.4175182481751828e-06, "loss": 0.6996, "step": 30525 }, { "epoch": 0.8912440512685761, "grad_norm": 0.6216683867876754, "learning_rate": 2.4168694241686944e-06, "loss": 0.5354, "step": 30526 }, { "epoch": 0.8912732474964234, "grad_norm": 0.6782867698021516, "learning_rate": 2.4162206001622064e-06, "loss": 0.6749, "step": 30527 }, { "epoch": 0.8913024437242708, "grad_norm": 0.6314707146443339, "learning_rate": 2.415571776155718e-06, "loss": 0.5445, "step": 30528 }, { "epoch": 0.8913316399521182, "grad_norm": 0.6543812570240776, "learning_rate": 2.41492295214923e-06, "loss": 0.6157, "step": 30529 }, { "epoch": 0.8913608361799655, "grad_norm": 0.6888728223306773, "learning_rate": 2.4142741281427416e-06, "loss": 0.6463, "step": 30530 }, { "epoch": 0.8913900324078129, "grad_norm": 0.6507554793208735, "learning_rate": 2.4136253041362532e-06, "loss": 0.5964, "step": 30531 }, { "epoch": 0.8914192286356603, "grad_norm": 0.6551528503440635, "learning_rate": 2.412976480129765e-06, "loss": 0.5983, "step": 30532 }, { "epoch": 0.8914484248635076, "grad_norm": 0.6108651746400326, "learning_rate": 2.412327656123277e-06, "loss": 0.5589, "step": 30533 }, { "epoch": 0.891477621091355, "grad_norm": 0.6330551645868729, "learning_rate": 2.4116788321167885e-06, "loss": 0.5676, "step": 30534 }, { "epoch": 0.8915068173192023, "grad_norm": 0.649524850125464, "learning_rate": 2.4110300081103e-06, "loss": 0.5613, "step": 30535 }, { "epoch": 0.8915360135470497, "grad_norm": 0.6712674551760465, "learning_rate": 2.410381184103812e-06, "loss": 0.655, "step": 30536 }, { "epoch": 0.8915652097748971, "grad_norm": 0.7286645472980179, "learning_rate": 2.4097323600973237e-06, "loss": 0.6212, "step": 30537 }, { "epoch": 0.8915944060027444, "grad_norm": 0.6163476847605427, "learning_rate": 2.4090835360908353e-06, "loss": 0.5825, "step": 30538 }, { "epoch": 0.8916236022305918, "grad_norm": 0.6744939503082166, "learning_rate": 2.4084347120843473e-06, "loss": 0.6066, "step": 30539 }, { "epoch": 0.8916527984584391, "grad_norm": 0.6265448973296077, "learning_rate": 2.407785888077859e-06, "loss": 0.5652, "step": 30540 }, { "epoch": 0.8916819946862865, "grad_norm": 0.6711244229203888, "learning_rate": 2.407137064071371e-06, "loss": 0.6614, "step": 30541 }, { "epoch": 0.8917111909141339, "grad_norm": 0.5786663115816898, "learning_rate": 2.4064882400648826e-06, "loss": 0.4987, "step": 30542 }, { "epoch": 0.8917403871419812, "grad_norm": 0.6046208025829125, "learning_rate": 2.4058394160583946e-06, "loss": 0.5407, "step": 30543 }, { "epoch": 0.8917695833698286, "grad_norm": 0.7074940905696712, "learning_rate": 2.4051905920519062e-06, "loss": 0.6657, "step": 30544 }, { "epoch": 0.891798779597676, "grad_norm": 0.612935532171159, "learning_rate": 2.404541768045418e-06, "loss": 0.549, "step": 30545 }, { "epoch": 0.8918279758255233, "grad_norm": 0.7267940971530358, "learning_rate": 2.40389294403893e-06, "loss": 0.6856, "step": 30546 }, { "epoch": 0.8918571720533707, "grad_norm": 0.6654542504790881, "learning_rate": 2.4032441200324414e-06, "loss": 0.5877, "step": 30547 }, { "epoch": 0.891886368281218, "grad_norm": 0.6256117631990975, "learning_rate": 2.402595296025953e-06, "loss": 0.5309, "step": 30548 }, { "epoch": 0.8919155645090654, "grad_norm": 0.5859293812547635, "learning_rate": 2.401946472019465e-06, "loss": 0.5224, "step": 30549 }, { "epoch": 0.8919447607369128, "grad_norm": 0.6544417173796828, "learning_rate": 2.4012976480129767e-06, "loss": 0.615, "step": 30550 }, { "epoch": 0.8919739569647601, "grad_norm": 0.6254889870740398, "learning_rate": 2.4006488240064883e-06, "loss": 0.541, "step": 30551 }, { "epoch": 0.8920031531926075, "grad_norm": 0.6922998639389737, "learning_rate": 2.4000000000000003e-06, "loss": 0.6683, "step": 30552 }, { "epoch": 0.8920323494204548, "grad_norm": 0.5873691121586084, "learning_rate": 2.399351175993512e-06, "loss": 0.498, "step": 30553 }, { "epoch": 0.8920615456483022, "grad_norm": 0.645899044610316, "learning_rate": 2.3987023519870235e-06, "loss": 0.5975, "step": 30554 }, { "epoch": 0.8920907418761496, "grad_norm": 0.6785987256451668, "learning_rate": 2.3980535279805355e-06, "loss": 0.6506, "step": 30555 }, { "epoch": 0.8921199381039969, "grad_norm": 0.60960224945666, "learning_rate": 2.397404703974047e-06, "loss": 0.5081, "step": 30556 }, { "epoch": 0.8921491343318443, "grad_norm": 0.6632141801721848, "learning_rate": 2.3967558799675588e-06, "loss": 0.6172, "step": 30557 }, { "epoch": 0.8921783305596916, "grad_norm": 0.5710133147621046, "learning_rate": 2.3961070559610708e-06, "loss": 0.4593, "step": 30558 }, { "epoch": 0.892207526787539, "grad_norm": 0.7245340712094066, "learning_rate": 2.3954582319545824e-06, "loss": 0.6947, "step": 30559 }, { "epoch": 0.8922367230153864, "grad_norm": 0.6767824220159002, "learning_rate": 2.3948094079480944e-06, "loss": 0.6587, "step": 30560 }, { "epoch": 0.8922659192432337, "grad_norm": 0.6069682845281728, "learning_rate": 2.394160583941606e-06, "loss": 0.5263, "step": 30561 }, { "epoch": 0.8922951154710811, "grad_norm": 0.6595185886283252, "learning_rate": 2.393511759935118e-06, "loss": 0.6501, "step": 30562 }, { "epoch": 0.8923243116989285, "grad_norm": 0.6432523554570826, "learning_rate": 2.3928629359286296e-06, "loss": 0.6132, "step": 30563 }, { "epoch": 0.8923535079267758, "grad_norm": 0.6263687735307224, "learning_rate": 2.3922141119221413e-06, "loss": 0.5402, "step": 30564 }, { "epoch": 0.8923827041546232, "grad_norm": 0.6407584302677776, "learning_rate": 2.3915652879156533e-06, "loss": 0.599, "step": 30565 }, { "epoch": 0.8924119003824705, "grad_norm": 0.6837385806054886, "learning_rate": 2.390916463909165e-06, "loss": 0.6487, "step": 30566 }, { "epoch": 0.8924410966103179, "grad_norm": 0.7062053215017264, "learning_rate": 2.3902676399026765e-06, "loss": 0.6827, "step": 30567 }, { "epoch": 0.8924702928381653, "grad_norm": 0.6670935117423997, "learning_rate": 2.3896188158961885e-06, "loss": 0.6089, "step": 30568 }, { "epoch": 0.8924994890660126, "grad_norm": 0.5868900924793768, "learning_rate": 2.3889699918897e-06, "loss": 0.5072, "step": 30569 }, { "epoch": 0.89252868529386, "grad_norm": 0.639398570201054, "learning_rate": 2.3883211678832117e-06, "loss": 0.5571, "step": 30570 }, { "epoch": 0.8925578815217073, "grad_norm": 0.665517237535639, "learning_rate": 2.3876723438767233e-06, "loss": 0.5896, "step": 30571 }, { "epoch": 0.8925870777495547, "grad_norm": 0.6072032081793792, "learning_rate": 2.3870235198702354e-06, "loss": 0.5327, "step": 30572 }, { "epoch": 0.8926162739774022, "grad_norm": 0.7158447985206139, "learning_rate": 2.386374695863747e-06, "loss": 0.7335, "step": 30573 }, { "epoch": 0.8926454702052495, "grad_norm": 0.6354149796102321, "learning_rate": 2.385725871857259e-06, "loss": 0.5268, "step": 30574 }, { "epoch": 0.8926746664330969, "grad_norm": 0.6436810797581918, "learning_rate": 2.3850770478507706e-06, "loss": 0.5899, "step": 30575 }, { "epoch": 0.8927038626609443, "grad_norm": 0.6698068473066568, "learning_rate": 2.384428223844282e-06, "loss": 0.6235, "step": 30576 }, { "epoch": 0.8927330588887916, "grad_norm": 0.6456539687263575, "learning_rate": 2.3837793998377942e-06, "loss": 0.6058, "step": 30577 }, { "epoch": 0.892762255116639, "grad_norm": 0.6386528145144246, "learning_rate": 2.383130575831306e-06, "loss": 0.6068, "step": 30578 }, { "epoch": 0.8927914513444863, "grad_norm": 0.6287695388105958, "learning_rate": 2.382481751824818e-06, "loss": 0.5587, "step": 30579 }, { "epoch": 0.8928206475723337, "grad_norm": 0.6154806745946995, "learning_rate": 2.3818329278183295e-06, "loss": 0.5594, "step": 30580 }, { "epoch": 0.8928498438001811, "grad_norm": 0.6179865677506455, "learning_rate": 2.3811841038118415e-06, "loss": 0.5373, "step": 30581 }, { "epoch": 0.8928790400280284, "grad_norm": 0.6162849212242427, "learning_rate": 2.380535279805353e-06, "loss": 0.567, "step": 30582 }, { "epoch": 0.8929082362558758, "grad_norm": 0.6618143047911327, "learning_rate": 2.3798864557988647e-06, "loss": 0.5792, "step": 30583 }, { "epoch": 0.8929374324837231, "grad_norm": 0.5894906245267855, "learning_rate": 2.3792376317923767e-06, "loss": 0.524, "step": 30584 }, { "epoch": 0.8929666287115705, "grad_norm": 0.6120849579824607, "learning_rate": 2.3785888077858883e-06, "loss": 0.5315, "step": 30585 }, { "epoch": 0.8929958249394179, "grad_norm": 0.6797027279427525, "learning_rate": 2.3779399837794e-06, "loss": 0.6877, "step": 30586 }, { "epoch": 0.8930250211672652, "grad_norm": 0.6453001670266307, "learning_rate": 2.377291159772912e-06, "loss": 0.5978, "step": 30587 }, { "epoch": 0.8930542173951126, "grad_norm": 0.6396161470837395, "learning_rate": 2.3766423357664236e-06, "loss": 0.6062, "step": 30588 }, { "epoch": 0.89308341362296, "grad_norm": 0.644173548020943, "learning_rate": 2.375993511759935e-06, "loss": 0.5721, "step": 30589 }, { "epoch": 0.8931126098508073, "grad_norm": 0.6745335379316114, "learning_rate": 2.3753446877534468e-06, "loss": 0.6649, "step": 30590 }, { "epoch": 0.8931418060786547, "grad_norm": 0.6949951926323756, "learning_rate": 2.3746958637469588e-06, "loss": 0.6664, "step": 30591 }, { "epoch": 0.893171002306502, "grad_norm": 0.6173884582799127, "learning_rate": 2.3740470397404704e-06, "loss": 0.5517, "step": 30592 }, { "epoch": 0.8932001985343494, "grad_norm": 0.6674553520431951, "learning_rate": 2.3733982157339824e-06, "loss": 0.6064, "step": 30593 }, { "epoch": 0.8932293947621968, "grad_norm": 0.6536053085311729, "learning_rate": 2.372749391727494e-06, "loss": 0.5804, "step": 30594 }, { "epoch": 0.8932585909900441, "grad_norm": 0.6164438078359528, "learning_rate": 2.372100567721006e-06, "loss": 0.5328, "step": 30595 }, { "epoch": 0.8932877872178915, "grad_norm": 0.6446919345559254, "learning_rate": 2.3714517437145177e-06, "loss": 0.5575, "step": 30596 }, { "epoch": 0.8933169834457388, "grad_norm": 0.6513497365571889, "learning_rate": 2.3708029197080293e-06, "loss": 0.6181, "step": 30597 }, { "epoch": 0.8933461796735862, "grad_norm": 0.6798562781756123, "learning_rate": 2.3701540957015413e-06, "loss": 0.6497, "step": 30598 }, { "epoch": 0.8933753759014336, "grad_norm": 0.7086576672857515, "learning_rate": 2.369505271695053e-06, "loss": 0.636, "step": 30599 }, { "epoch": 0.8934045721292809, "grad_norm": 0.5764417324885567, "learning_rate": 2.368856447688565e-06, "loss": 0.4813, "step": 30600 }, { "epoch": 0.8934337683571283, "grad_norm": 0.6613143824559176, "learning_rate": 2.3682076236820765e-06, "loss": 0.6366, "step": 30601 }, { "epoch": 0.8934629645849756, "grad_norm": 0.6358992528890361, "learning_rate": 2.367558799675588e-06, "loss": 0.5666, "step": 30602 }, { "epoch": 0.893492160812823, "grad_norm": 0.6622190033435753, "learning_rate": 2.3669099756691e-06, "loss": 0.6267, "step": 30603 }, { "epoch": 0.8935213570406704, "grad_norm": 0.7278985780244684, "learning_rate": 2.3662611516626118e-06, "loss": 0.7079, "step": 30604 }, { "epoch": 0.8935505532685177, "grad_norm": 0.6825964257012411, "learning_rate": 2.3656123276561234e-06, "loss": 0.6588, "step": 30605 }, { "epoch": 0.8935797494963651, "grad_norm": 0.6456018215869211, "learning_rate": 2.3649635036496354e-06, "loss": 0.5931, "step": 30606 }, { "epoch": 0.8936089457242125, "grad_norm": 0.6588691724867324, "learning_rate": 2.364314679643147e-06, "loss": 0.6463, "step": 30607 }, { "epoch": 0.8936381419520598, "grad_norm": 0.6359817113297882, "learning_rate": 2.3636658556366586e-06, "loss": 0.6168, "step": 30608 }, { "epoch": 0.8936673381799072, "grad_norm": 0.753727675722431, "learning_rate": 2.36301703163017e-06, "loss": 0.63, "step": 30609 }, { "epoch": 0.8936965344077545, "grad_norm": 0.8142520134081047, "learning_rate": 2.3623682076236822e-06, "loss": 0.6932, "step": 30610 }, { "epoch": 0.8937257306356019, "grad_norm": 0.6262395558978571, "learning_rate": 2.361719383617194e-06, "loss": 0.5751, "step": 30611 }, { "epoch": 0.8937549268634493, "grad_norm": 0.6502515592217463, "learning_rate": 2.361070559610706e-06, "loss": 0.5952, "step": 30612 }, { "epoch": 0.8937841230912966, "grad_norm": 0.6040923710083751, "learning_rate": 2.3604217356042175e-06, "loss": 0.5437, "step": 30613 }, { "epoch": 0.893813319319144, "grad_norm": 0.661215563449801, "learning_rate": 2.3597729115977295e-06, "loss": 0.5498, "step": 30614 }, { "epoch": 0.8938425155469913, "grad_norm": 0.6102491822764561, "learning_rate": 2.359124087591241e-06, "loss": 0.5502, "step": 30615 }, { "epoch": 0.8938717117748387, "grad_norm": 0.6801898833866402, "learning_rate": 2.3584752635847527e-06, "loss": 0.6441, "step": 30616 }, { "epoch": 0.8939009080026861, "grad_norm": 0.6138949755306755, "learning_rate": 2.3578264395782647e-06, "loss": 0.5524, "step": 30617 }, { "epoch": 0.8939301042305334, "grad_norm": 0.6872488713288434, "learning_rate": 2.3571776155717763e-06, "loss": 0.6382, "step": 30618 }, { "epoch": 0.8939593004583808, "grad_norm": 0.6349158689031533, "learning_rate": 2.3565287915652883e-06, "loss": 0.5809, "step": 30619 }, { "epoch": 0.8939884966862282, "grad_norm": 0.6946062064161858, "learning_rate": 2.3558799675588e-06, "loss": 0.5415, "step": 30620 }, { "epoch": 0.8940176929140755, "grad_norm": 0.6671315125865526, "learning_rate": 2.3552311435523116e-06, "loss": 0.6369, "step": 30621 }, { "epoch": 0.8940468891419229, "grad_norm": 0.6681102529139094, "learning_rate": 2.3545823195458236e-06, "loss": 0.5853, "step": 30622 }, { "epoch": 0.8940760853697702, "grad_norm": 0.6064066102793648, "learning_rate": 2.353933495539335e-06, "loss": 0.572, "step": 30623 }, { "epoch": 0.8941052815976176, "grad_norm": 0.6517895433869632, "learning_rate": 2.353284671532847e-06, "loss": 0.5383, "step": 30624 }, { "epoch": 0.894134477825465, "grad_norm": 0.6084083864244413, "learning_rate": 2.3526358475263584e-06, "loss": 0.5253, "step": 30625 }, { "epoch": 0.8941636740533123, "grad_norm": 0.6512792573765609, "learning_rate": 2.3519870235198704e-06, "loss": 0.5842, "step": 30626 }, { "epoch": 0.8941928702811597, "grad_norm": 0.625040726212978, "learning_rate": 2.351338199513382e-06, "loss": 0.5935, "step": 30627 }, { "epoch": 0.894222066509007, "grad_norm": 0.6086072981174745, "learning_rate": 2.350689375506894e-06, "loss": 0.5335, "step": 30628 }, { "epoch": 0.8942512627368544, "grad_norm": 0.681548809701648, "learning_rate": 2.3500405515004057e-06, "loss": 0.6503, "step": 30629 }, { "epoch": 0.8942804589647018, "grad_norm": 0.6415313558598342, "learning_rate": 2.3493917274939173e-06, "loss": 0.5905, "step": 30630 }, { "epoch": 0.8943096551925491, "grad_norm": 0.6116780219392507, "learning_rate": 2.3487429034874293e-06, "loss": 0.5269, "step": 30631 }, { "epoch": 0.8943388514203965, "grad_norm": 0.6561586360977906, "learning_rate": 2.348094079480941e-06, "loss": 0.6237, "step": 30632 }, { "epoch": 0.8943680476482438, "grad_norm": 0.6390707784209896, "learning_rate": 2.347445255474453e-06, "loss": 0.5843, "step": 30633 }, { "epoch": 0.8943972438760912, "grad_norm": 0.6353300007069145, "learning_rate": 2.3467964314679645e-06, "loss": 0.6082, "step": 30634 }, { "epoch": 0.8944264401039386, "grad_norm": 0.635254543702295, "learning_rate": 2.346147607461476e-06, "loss": 0.5342, "step": 30635 }, { "epoch": 0.8944556363317859, "grad_norm": 0.689415036028792, "learning_rate": 2.345498783454988e-06, "loss": 0.6271, "step": 30636 }, { "epoch": 0.8944848325596333, "grad_norm": 0.6721618183545781, "learning_rate": 2.3448499594484998e-06, "loss": 0.5639, "step": 30637 }, { "epoch": 0.8945140287874807, "grad_norm": 0.6712830488556032, "learning_rate": 2.3442011354420118e-06, "loss": 0.6517, "step": 30638 }, { "epoch": 0.894543225015328, "grad_norm": 0.6239810009937087, "learning_rate": 2.3435523114355234e-06, "loss": 0.5691, "step": 30639 }, { "epoch": 0.8945724212431754, "grad_norm": 0.6383063875654745, "learning_rate": 2.342903487429035e-06, "loss": 0.585, "step": 30640 }, { "epoch": 0.8946016174710227, "grad_norm": 0.6619132406013883, "learning_rate": 2.342254663422547e-06, "loss": 0.5912, "step": 30641 }, { "epoch": 0.8946308136988701, "grad_norm": 0.715902060811435, "learning_rate": 2.3416058394160586e-06, "loss": 0.6488, "step": 30642 }, { "epoch": 0.8946600099267175, "grad_norm": 0.6798287187821391, "learning_rate": 2.3409570154095702e-06, "loss": 0.6673, "step": 30643 }, { "epoch": 0.8946892061545648, "grad_norm": 0.6141932856004374, "learning_rate": 2.340308191403082e-06, "loss": 0.5435, "step": 30644 }, { "epoch": 0.8947184023824122, "grad_norm": 0.6367593200180341, "learning_rate": 2.339659367396594e-06, "loss": 0.585, "step": 30645 }, { "epoch": 0.8947475986102595, "grad_norm": 0.6077372822795765, "learning_rate": 2.3390105433901055e-06, "loss": 0.5517, "step": 30646 }, { "epoch": 0.8947767948381069, "grad_norm": 0.6366048259777702, "learning_rate": 2.3383617193836175e-06, "loss": 0.5936, "step": 30647 }, { "epoch": 0.8948059910659543, "grad_norm": 0.6203299409742968, "learning_rate": 2.337712895377129e-06, "loss": 0.538, "step": 30648 }, { "epoch": 0.8948351872938016, "grad_norm": 0.6254250376290182, "learning_rate": 2.3370640713706407e-06, "loss": 0.6066, "step": 30649 }, { "epoch": 0.894864383521649, "grad_norm": 0.674615245071547, "learning_rate": 2.3364152473641527e-06, "loss": 0.6556, "step": 30650 }, { "epoch": 0.8948935797494963, "grad_norm": 0.6929502978041857, "learning_rate": 2.3357664233576643e-06, "loss": 0.6539, "step": 30651 }, { "epoch": 0.8949227759773437, "grad_norm": 0.6291220140400168, "learning_rate": 2.3351175993511764e-06, "loss": 0.5886, "step": 30652 }, { "epoch": 0.8949519722051911, "grad_norm": 0.6924658119653313, "learning_rate": 2.334468775344688e-06, "loss": 0.6457, "step": 30653 }, { "epoch": 0.8949811684330384, "grad_norm": 0.6330605270679706, "learning_rate": 2.3338199513382e-06, "loss": 0.6336, "step": 30654 }, { "epoch": 0.8950103646608858, "grad_norm": 0.6604195630899905, "learning_rate": 2.3331711273317116e-06, "loss": 0.5592, "step": 30655 }, { "epoch": 0.8950395608887332, "grad_norm": 0.6305015716684207, "learning_rate": 2.332522303325223e-06, "loss": 0.5889, "step": 30656 }, { "epoch": 0.8950687571165805, "grad_norm": 0.6911038838633143, "learning_rate": 2.3318734793187352e-06, "loss": 0.7439, "step": 30657 }, { "epoch": 0.8950979533444279, "grad_norm": 0.6329843307960595, "learning_rate": 2.331224655312247e-06, "loss": 0.574, "step": 30658 }, { "epoch": 0.8951271495722752, "grad_norm": 0.6387934705523486, "learning_rate": 2.3305758313057584e-06, "loss": 0.5819, "step": 30659 }, { "epoch": 0.8951563458001226, "grad_norm": 0.6382790421926949, "learning_rate": 2.32992700729927e-06, "loss": 0.5887, "step": 30660 }, { "epoch": 0.89518554202797, "grad_norm": 0.6773845607895298, "learning_rate": 2.329278183292782e-06, "loss": 0.5808, "step": 30661 }, { "epoch": 0.8952147382558173, "grad_norm": 0.670086207844895, "learning_rate": 2.3286293592862937e-06, "loss": 0.6051, "step": 30662 }, { "epoch": 0.8952439344836647, "grad_norm": 0.646650522443794, "learning_rate": 2.3279805352798053e-06, "loss": 0.5734, "step": 30663 }, { "epoch": 0.895273130711512, "grad_norm": 0.6795260688722963, "learning_rate": 2.3273317112733173e-06, "loss": 0.6149, "step": 30664 }, { "epoch": 0.8953023269393594, "grad_norm": 0.6346546596310023, "learning_rate": 2.326682887266829e-06, "loss": 0.5782, "step": 30665 }, { "epoch": 0.8953315231672068, "grad_norm": 0.6217444973964268, "learning_rate": 2.326034063260341e-06, "loss": 0.5434, "step": 30666 }, { "epoch": 0.8953607193950541, "grad_norm": 0.6231639714953334, "learning_rate": 2.3253852392538525e-06, "loss": 0.5228, "step": 30667 }, { "epoch": 0.8953899156229015, "grad_norm": 0.637955604098416, "learning_rate": 2.324736415247364e-06, "loss": 0.5701, "step": 30668 }, { "epoch": 0.8954191118507489, "grad_norm": 0.6556418045032711, "learning_rate": 2.324087591240876e-06, "loss": 0.644, "step": 30669 }, { "epoch": 0.8954483080785962, "grad_norm": 0.6470099113536488, "learning_rate": 2.3234387672343878e-06, "loss": 0.5823, "step": 30670 }, { "epoch": 0.8954775043064436, "grad_norm": 0.6222227315948253, "learning_rate": 2.3227899432278998e-06, "loss": 0.5535, "step": 30671 }, { "epoch": 0.8955067005342909, "grad_norm": 0.6564607956744674, "learning_rate": 2.3221411192214114e-06, "loss": 0.6211, "step": 30672 }, { "epoch": 0.8955358967621383, "grad_norm": 0.6524129798327689, "learning_rate": 2.3214922952149234e-06, "loss": 0.5827, "step": 30673 }, { "epoch": 0.8955650929899857, "grad_norm": 0.6899177524835277, "learning_rate": 2.320843471208435e-06, "loss": 0.6769, "step": 30674 }, { "epoch": 0.895594289217833, "grad_norm": 0.6430127131141603, "learning_rate": 2.3201946472019466e-06, "loss": 0.5882, "step": 30675 }, { "epoch": 0.8956234854456804, "grad_norm": 0.6395228389881219, "learning_rate": 2.3195458231954587e-06, "loss": 0.5493, "step": 30676 }, { "epoch": 0.8956526816735277, "grad_norm": 0.5937255588876837, "learning_rate": 2.3188969991889703e-06, "loss": 0.4945, "step": 30677 }, { "epoch": 0.8956818779013751, "grad_norm": 0.6649775612442043, "learning_rate": 2.318248175182482e-06, "loss": 0.6187, "step": 30678 }, { "epoch": 0.8957110741292225, "grad_norm": 0.5910598133679748, "learning_rate": 2.3175993511759935e-06, "loss": 0.5303, "step": 30679 }, { "epoch": 0.8957402703570698, "grad_norm": 0.5983742992599846, "learning_rate": 2.3169505271695055e-06, "loss": 0.4956, "step": 30680 }, { "epoch": 0.8957694665849172, "grad_norm": 0.6025510368892995, "learning_rate": 2.316301703163017e-06, "loss": 0.5452, "step": 30681 }, { "epoch": 0.8957986628127645, "grad_norm": 0.7219603740341664, "learning_rate": 2.3156528791565287e-06, "loss": 0.6427, "step": 30682 }, { "epoch": 0.8958278590406119, "grad_norm": 0.6245244772994328, "learning_rate": 2.3150040551500407e-06, "loss": 0.5404, "step": 30683 }, { "epoch": 0.8958570552684593, "grad_norm": 0.6433820649016314, "learning_rate": 2.3143552311435523e-06, "loss": 0.6298, "step": 30684 }, { "epoch": 0.8958862514963066, "grad_norm": 0.6120145412505705, "learning_rate": 2.3137064071370644e-06, "loss": 0.5033, "step": 30685 }, { "epoch": 0.895915447724154, "grad_norm": 0.6721773059687646, "learning_rate": 2.313057583130576e-06, "loss": 0.6243, "step": 30686 }, { "epoch": 0.8959446439520014, "grad_norm": 0.6802610426708242, "learning_rate": 2.312408759124088e-06, "loss": 0.6121, "step": 30687 }, { "epoch": 0.8959738401798487, "grad_norm": 0.6106178522983282, "learning_rate": 2.3117599351175996e-06, "loss": 0.565, "step": 30688 }, { "epoch": 0.8960030364076961, "grad_norm": 0.5944390341856644, "learning_rate": 2.311111111111111e-06, "loss": 0.5183, "step": 30689 }, { "epoch": 0.8960322326355434, "grad_norm": 0.679400060666626, "learning_rate": 2.3104622871046232e-06, "loss": 0.6912, "step": 30690 }, { "epoch": 0.8960614288633908, "grad_norm": 0.696363168097293, "learning_rate": 2.309813463098135e-06, "loss": 0.6913, "step": 30691 }, { "epoch": 0.8960906250912382, "grad_norm": 0.7300514182022415, "learning_rate": 2.309164639091647e-06, "loss": 0.6719, "step": 30692 }, { "epoch": 0.8961198213190855, "grad_norm": 0.6624381227234392, "learning_rate": 2.3085158150851585e-06, "loss": 0.6021, "step": 30693 }, { "epoch": 0.896149017546933, "grad_norm": 0.6835989883191057, "learning_rate": 2.30786699107867e-06, "loss": 0.6853, "step": 30694 }, { "epoch": 0.8961782137747804, "grad_norm": 0.6466083840381408, "learning_rate": 2.307218167072182e-06, "loss": 0.5963, "step": 30695 }, { "epoch": 0.8962074100026277, "grad_norm": 0.6584020034248719, "learning_rate": 2.3065693430656937e-06, "loss": 0.6179, "step": 30696 }, { "epoch": 0.8962366062304751, "grad_norm": 0.6613533424951735, "learning_rate": 2.3059205190592053e-06, "loss": 0.5891, "step": 30697 }, { "epoch": 0.8962658024583224, "grad_norm": 0.631069974374697, "learning_rate": 2.305271695052717e-06, "loss": 0.5712, "step": 30698 }, { "epoch": 0.8962949986861698, "grad_norm": 0.6644059458010724, "learning_rate": 2.304622871046229e-06, "loss": 0.6205, "step": 30699 }, { "epoch": 0.8963241949140172, "grad_norm": 0.6086875121479035, "learning_rate": 2.3039740470397405e-06, "loss": 0.5372, "step": 30700 }, { "epoch": 0.8963533911418645, "grad_norm": 0.6732804661524092, "learning_rate": 2.303325223033252e-06, "loss": 0.6114, "step": 30701 }, { "epoch": 0.8963825873697119, "grad_norm": 0.6448746293414503, "learning_rate": 2.302676399026764e-06, "loss": 0.6165, "step": 30702 }, { "epoch": 0.8964117835975592, "grad_norm": 0.6231131703799158, "learning_rate": 2.3020275750202758e-06, "loss": 0.5448, "step": 30703 }, { "epoch": 0.8964409798254066, "grad_norm": 0.6812764350506145, "learning_rate": 2.301378751013788e-06, "loss": 0.6531, "step": 30704 }, { "epoch": 0.896470176053254, "grad_norm": 0.599162907535267, "learning_rate": 2.3007299270072994e-06, "loss": 0.5053, "step": 30705 }, { "epoch": 0.8964993722811013, "grad_norm": 0.6721647580579687, "learning_rate": 2.3000811030008114e-06, "loss": 0.6445, "step": 30706 }, { "epoch": 0.8965285685089487, "grad_norm": 0.6563674961959198, "learning_rate": 2.299432278994323e-06, "loss": 0.5979, "step": 30707 }, { "epoch": 0.896557764736796, "grad_norm": 0.6468649556023413, "learning_rate": 2.2987834549878346e-06, "loss": 0.6051, "step": 30708 }, { "epoch": 0.8965869609646434, "grad_norm": 0.6549211146839429, "learning_rate": 2.2981346309813467e-06, "loss": 0.5745, "step": 30709 }, { "epoch": 0.8966161571924908, "grad_norm": 0.6452957347803409, "learning_rate": 2.2974858069748583e-06, "loss": 0.5959, "step": 30710 }, { "epoch": 0.8966453534203381, "grad_norm": 0.633838510434199, "learning_rate": 2.2968369829683703e-06, "loss": 0.5642, "step": 30711 }, { "epoch": 0.8966745496481855, "grad_norm": 0.6083690808595609, "learning_rate": 2.296188158961882e-06, "loss": 0.5452, "step": 30712 }, { "epoch": 0.8967037458760329, "grad_norm": 0.6471937361559641, "learning_rate": 2.2955393349553935e-06, "loss": 0.5834, "step": 30713 }, { "epoch": 0.8967329421038802, "grad_norm": 0.6252747693798791, "learning_rate": 2.294890510948905e-06, "loss": 0.5297, "step": 30714 }, { "epoch": 0.8967621383317276, "grad_norm": 0.6446442472484686, "learning_rate": 2.294241686942417e-06, "loss": 0.5892, "step": 30715 }, { "epoch": 0.8967913345595749, "grad_norm": 0.7093292910753292, "learning_rate": 2.2935928629359287e-06, "loss": 0.6783, "step": 30716 }, { "epoch": 0.8968205307874223, "grad_norm": 0.7232045467826076, "learning_rate": 2.2929440389294403e-06, "loss": 0.6826, "step": 30717 }, { "epoch": 0.8968497270152697, "grad_norm": 0.7079735804407193, "learning_rate": 2.2922952149229524e-06, "loss": 0.7108, "step": 30718 }, { "epoch": 0.896878923243117, "grad_norm": 0.6282744270426518, "learning_rate": 2.291646390916464e-06, "loss": 0.5769, "step": 30719 }, { "epoch": 0.8969081194709644, "grad_norm": 0.705869400152656, "learning_rate": 2.290997566909976e-06, "loss": 0.5942, "step": 30720 }, { "epoch": 0.8969373156988117, "grad_norm": 0.6747625709340273, "learning_rate": 2.2903487429034876e-06, "loss": 0.6639, "step": 30721 }, { "epoch": 0.8969665119266591, "grad_norm": 0.6371850353842096, "learning_rate": 2.289699918896999e-06, "loss": 0.6299, "step": 30722 }, { "epoch": 0.8969957081545065, "grad_norm": 0.6543163422902446, "learning_rate": 2.2890510948905112e-06, "loss": 0.6013, "step": 30723 }, { "epoch": 0.8970249043823538, "grad_norm": 0.5915064123800193, "learning_rate": 2.288402270884023e-06, "loss": 0.5127, "step": 30724 }, { "epoch": 0.8970541006102012, "grad_norm": 0.6393280866197295, "learning_rate": 2.287753446877535e-06, "loss": 0.5821, "step": 30725 }, { "epoch": 0.8970832968380485, "grad_norm": 0.6084357889655005, "learning_rate": 2.2871046228710465e-06, "loss": 0.5402, "step": 30726 }, { "epoch": 0.8971124930658959, "grad_norm": 0.6728125676607415, "learning_rate": 2.286455798864558e-06, "loss": 0.6242, "step": 30727 }, { "epoch": 0.8971416892937433, "grad_norm": 0.6111864549708286, "learning_rate": 2.28580697485807e-06, "loss": 0.5351, "step": 30728 }, { "epoch": 0.8971708855215906, "grad_norm": 0.6389190840839435, "learning_rate": 2.2851581508515817e-06, "loss": 0.5967, "step": 30729 }, { "epoch": 0.897200081749438, "grad_norm": 0.6481622160350885, "learning_rate": 2.2845093268450937e-06, "loss": 0.5717, "step": 30730 }, { "epoch": 0.8972292779772854, "grad_norm": 0.615162444927542, "learning_rate": 2.2838605028386053e-06, "loss": 0.5781, "step": 30731 }, { "epoch": 0.8972584742051327, "grad_norm": 0.6286825696853999, "learning_rate": 2.283211678832117e-06, "loss": 0.5655, "step": 30732 }, { "epoch": 0.8972876704329801, "grad_norm": 0.6267870799829888, "learning_rate": 2.2825628548256285e-06, "loss": 0.5417, "step": 30733 }, { "epoch": 0.8973168666608274, "grad_norm": 0.6332175420431179, "learning_rate": 2.2819140308191406e-06, "loss": 0.5941, "step": 30734 }, { "epoch": 0.8973460628886748, "grad_norm": 0.687271387086524, "learning_rate": 2.281265206812652e-06, "loss": 0.6658, "step": 30735 }, { "epoch": 0.8973752591165222, "grad_norm": 0.6233062648400993, "learning_rate": 2.2806163828061638e-06, "loss": 0.5889, "step": 30736 }, { "epoch": 0.8974044553443695, "grad_norm": 0.6584809807948498, "learning_rate": 2.279967558799676e-06, "loss": 0.6434, "step": 30737 }, { "epoch": 0.8974336515722169, "grad_norm": 0.6518542538444675, "learning_rate": 2.2793187347931874e-06, "loss": 0.5889, "step": 30738 }, { "epoch": 0.8974628478000642, "grad_norm": 0.6382853781746862, "learning_rate": 2.2786699107866994e-06, "loss": 0.6083, "step": 30739 }, { "epoch": 0.8974920440279116, "grad_norm": 0.6087588752388643, "learning_rate": 2.278021086780211e-06, "loss": 0.5504, "step": 30740 }, { "epoch": 0.897521240255759, "grad_norm": 0.6201435828998841, "learning_rate": 2.2773722627737226e-06, "loss": 0.5643, "step": 30741 }, { "epoch": 0.8975504364836063, "grad_norm": 0.6366487012566598, "learning_rate": 2.2767234387672347e-06, "loss": 0.5717, "step": 30742 }, { "epoch": 0.8975796327114537, "grad_norm": 0.6650543055556812, "learning_rate": 2.2760746147607463e-06, "loss": 0.5901, "step": 30743 }, { "epoch": 0.897608828939301, "grad_norm": 0.6442672058519996, "learning_rate": 2.2754257907542583e-06, "loss": 0.5626, "step": 30744 }, { "epoch": 0.8976380251671484, "grad_norm": 0.6558118447096032, "learning_rate": 2.27477696674777e-06, "loss": 0.6082, "step": 30745 }, { "epoch": 0.8976672213949958, "grad_norm": 0.6275700699859056, "learning_rate": 2.274128142741282e-06, "loss": 0.5688, "step": 30746 }, { "epoch": 0.8976964176228431, "grad_norm": 0.6265168795867507, "learning_rate": 2.2734793187347935e-06, "loss": 0.5369, "step": 30747 }, { "epoch": 0.8977256138506905, "grad_norm": 0.6754200479184854, "learning_rate": 2.272830494728305e-06, "loss": 0.622, "step": 30748 }, { "epoch": 0.8977548100785379, "grad_norm": 0.661869868518324, "learning_rate": 2.272181670721817e-06, "loss": 0.6081, "step": 30749 }, { "epoch": 0.8977840063063852, "grad_norm": 0.6201601326392986, "learning_rate": 2.2715328467153288e-06, "loss": 0.5582, "step": 30750 }, { "epoch": 0.8978132025342326, "grad_norm": 0.6899433090402303, "learning_rate": 2.2708840227088404e-06, "loss": 0.6697, "step": 30751 }, { "epoch": 0.8978423987620799, "grad_norm": 0.7055745947483345, "learning_rate": 2.270235198702352e-06, "loss": 0.7472, "step": 30752 }, { "epoch": 0.8978715949899273, "grad_norm": 0.6903147641071952, "learning_rate": 2.269586374695864e-06, "loss": 0.6103, "step": 30753 }, { "epoch": 0.8979007912177747, "grad_norm": 0.6374434704434345, "learning_rate": 2.2689375506893756e-06, "loss": 0.554, "step": 30754 }, { "epoch": 0.897929987445622, "grad_norm": 0.6482684481083275, "learning_rate": 2.268288726682887e-06, "loss": 0.5756, "step": 30755 }, { "epoch": 0.8979591836734694, "grad_norm": 0.701557530163091, "learning_rate": 2.2676399026763992e-06, "loss": 0.6758, "step": 30756 }, { "epoch": 0.8979883799013167, "grad_norm": 0.696260594378675, "learning_rate": 2.266991078669911e-06, "loss": 0.6467, "step": 30757 }, { "epoch": 0.8980175761291641, "grad_norm": 0.6469620388442837, "learning_rate": 2.266342254663423e-06, "loss": 0.6013, "step": 30758 }, { "epoch": 0.8980467723570115, "grad_norm": 0.6818918133554062, "learning_rate": 2.2656934306569345e-06, "loss": 0.6487, "step": 30759 }, { "epoch": 0.8980759685848588, "grad_norm": 0.660182757206011, "learning_rate": 2.265044606650446e-06, "loss": 0.6156, "step": 30760 }, { "epoch": 0.8981051648127062, "grad_norm": 0.6303340089771099, "learning_rate": 2.264395782643958e-06, "loss": 0.5353, "step": 30761 }, { "epoch": 0.8981343610405536, "grad_norm": 0.6151251948117765, "learning_rate": 2.2637469586374697e-06, "loss": 0.5341, "step": 30762 }, { "epoch": 0.8981635572684009, "grad_norm": 0.6116343072818744, "learning_rate": 2.2630981346309817e-06, "loss": 0.5463, "step": 30763 }, { "epoch": 0.8981927534962483, "grad_norm": 0.6307762032001891, "learning_rate": 2.2624493106244933e-06, "loss": 0.5876, "step": 30764 }, { "epoch": 0.8982219497240956, "grad_norm": 0.6211865567634169, "learning_rate": 2.2618004866180054e-06, "loss": 0.5686, "step": 30765 }, { "epoch": 0.898251145951943, "grad_norm": 0.6315104912896601, "learning_rate": 2.261151662611517e-06, "loss": 0.5705, "step": 30766 }, { "epoch": 0.8982803421797904, "grad_norm": 0.7268192536639361, "learning_rate": 2.2605028386050286e-06, "loss": 0.6639, "step": 30767 }, { "epoch": 0.8983095384076377, "grad_norm": 0.6948082438539218, "learning_rate": 2.25985401459854e-06, "loss": 0.6776, "step": 30768 }, { "epoch": 0.8983387346354851, "grad_norm": 0.6840356623424094, "learning_rate": 2.259205190592052e-06, "loss": 0.6559, "step": 30769 }, { "epoch": 0.8983679308633324, "grad_norm": 0.6217617585406064, "learning_rate": 2.258556366585564e-06, "loss": 0.5401, "step": 30770 }, { "epoch": 0.8983971270911798, "grad_norm": 0.6749736690782937, "learning_rate": 2.2579075425790754e-06, "loss": 0.6282, "step": 30771 }, { "epoch": 0.8984263233190272, "grad_norm": 0.6284063046956897, "learning_rate": 2.2572587185725874e-06, "loss": 0.5905, "step": 30772 }, { "epoch": 0.8984555195468745, "grad_norm": 0.6516464428658355, "learning_rate": 2.256609894566099e-06, "loss": 0.6174, "step": 30773 }, { "epoch": 0.8984847157747219, "grad_norm": 0.650822400068229, "learning_rate": 2.2559610705596106e-06, "loss": 0.6004, "step": 30774 }, { "epoch": 0.8985139120025692, "grad_norm": 0.6622310026135563, "learning_rate": 2.2553122465531227e-06, "loss": 0.6264, "step": 30775 }, { "epoch": 0.8985431082304166, "grad_norm": 0.6130676861804252, "learning_rate": 2.2546634225466343e-06, "loss": 0.556, "step": 30776 }, { "epoch": 0.898572304458264, "grad_norm": 0.620509099001227, "learning_rate": 2.2540145985401463e-06, "loss": 0.5308, "step": 30777 }, { "epoch": 0.8986015006861113, "grad_norm": 0.6362284188574836, "learning_rate": 2.253365774533658e-06, "loss": 0.6082, "step": 30778 }, { "epoch": 0.8986306969139587, "grad_norm": 0.6715720405516941, "learning_rate": 2.25271695052717e-06, "loss": 0.6302, "step": 30779 }, { "epoch": 0.898659893141806, "grad_norm": 0.6433188965039279, "learning_rate": 2.2520681265206815e-06, "loss": 0.5996, "step": 30780 }, { "epoch": 0.8986890893696534, "grad_norm": 0.6431817789661481, "learning_rate": 2.251419302514193e-06, "loss": 0.6102, "step": 30781 }, { "epoch": 0.8987182855975008, "grad_norm": 0.6271221594963716, "learning_rate": 2.250770478507705e-06, "loss": 0.5649, "step": 30782 }, { "epoch": 0.8987474818253481, "grad_norm": 0.6189015161197031, "learning_rate": 2.2501216545012168e-06, "loss": 0.5425, "step": 30783 }, { "epoch": 0.8987766780531955, "grad_norm": 0.5964353484659102, "learning_rate": 2.249472830494729e-06, "loss": 0.5427, "step": 30784 }, { "epoch": 0.8988058742810429, "grad_norm": 0.663592953959272, "learning_rate": 2.2488240064882404e-06, "loss": 0.6144, "step": 30785 }, { "epoch": 0.8988350705088902, "grad_norm": 0.6571777613118104, "learning_rate": 2.248175182481752e-06, "loss": 0.5797, "step": 30786 }, { "epoch": 0.8988642667367376, "grad_norm": 0.6550508204260886, "learning_rate": 2.2475263584752636e-06, "loss": 0.6264, "step": 30787 }, { "epoch": 0.8988934629645849, "grad_norm": 0.5527690131939659, "learning_rate": 2.2468775344687756e-06, "loss": 0.4579, "step": 30788 }, { "epoch": 0.8989226591924323, "grad_norm": 0.6056645016003989, "learning_rate": 2.2462287104622872e-06, "loss": 0.5233, "step": 30789 }, { "epoch": 0.8989518554202797, "grad_norm": 0.7137155467111765, "learning_rate": 2.245579886455799e-06, "loss": 0.7012, "step": 30790 }, { "epoch": 0.898981051648127, "grad_norm": 0.6517611433596046, "learning_rate": 2.244931062449311e-06, "loss": 0.6211, "step": 30791 }, { "epoch": 0.8990102478759744, "grad_norm": 0.7696301398575568, "learning_rate": 2.2442822384428225e-06, "loss": 0.7422, "step": 30792 }, { "epoch": 0.8990394441038217, "grad_norm": 0.6107643541405878, "learning_rate": 2.243633414436334e-06, "loss": 0.5147, "step": 30793 }, { "epoch": 0.8990686403316691, "grad_norm": 0.6351023119764757, "learning_rate": 2.242984590429846e-06, "loss": 0.6108, "step": 30794 }, { "epoch": 0.8990978365595165, "grad_norm": 0.5999886534325365, "learning_rate": 2.2423357664233577e-06, "loss": 0.511, "step": 30795 }, { "epoch": 0.8991270327873638, "grad_norm": 0.6513728155180861, "learning_rate": 2.2416869424168697e-06, "loss": 0.6337, "step": 30796 }, { "epoch": 0.8991562290152112, "grad_norm": 0.6392167194557316, "learning_rate": 2.2410381184103813e-06, "loss": 0.6194, "step": 30797 }, { "epoch": 0.8991854252430586, "grad_norm": 0.633933877014072, "learning_rate": 2.2403892944038934e-06, "loss": 0.582, "step": 30798 }, { "epoch": 0.8992146214709059, "grad_norm": 0.6364806234676806, "learning_rate": 2.239740470397405e-06, "loss": 0.6056, "step": 30799 }, { "epoch": 0.8992438176987533, "grad_norm": 0.6714155720656944, "learning_rate": 2.2390916463909166e-06, "loss": 0.6392, "step": 30800 }, { "epoch": 0.8992730139266006, "grad_norm": 0.6921065788760111, "learning_rate": 2.2384428223844286e-06, "loss": 0.6846, "step": 30801 }, { "epoch": 0.899302210154448, "grad_norm": 0.6433936415954691, "learning_rate": 2.23779399837794e-06, "loss": 0.5638, "step": 30802 }, { "epoch": 0.8993314063822954, "grad_norm": 0.6678135073443274, "learning_rate": 2.237145174371452e-06, "loss": 0.6142, "step": 30803 }, { "epoch": 0.8993606026101427, "grad_norm": 0.7207660547712209, "learning_rate": 2.236496350364964e-06, "loss": 0.7161, "step": 30804 }, { "epoch": 0.8993897988379901, "grad_norm": 0.6973931398462239, "learning_rate": 2.2358475263584754e-06, "loss": 0.6535, "step": 30805 }, { "epoch": 0.8994189950658374, "grad_norm": 0.6691722559588252, "learning_rate": 2.235198702351987e-06, "loss": 0.5871, "step": 30806 }, { "epoch": 0.8994481912936848, "grad_norm": 0.6709204358946053, "learning_rate": 2.234549878345499e-06, "loss": 0.5952, "step": 30807 }, { "epoch": 0.8994773875215322, "grad_norm": 0.6543816287567383, "learning_rate": 2.2339010543390107e-06, "loss": 0.643, "step": 30808 }, { "epoch": 0.8995065837493795, "grad_norm": 0.6785776277994443, "learning_rate": 2.2332522303325223e-06, "loss": 0.6191, "step": 30809 }, { "epoch": 0.8995357799772269, "grad_norm": 0.6717278754317885, "learning_rate": 2.2326034063260343e-06, "loss": 0.6125, "step": 30810 }, { "epoch": 0.8995649762050743, "grad_norm": 0.68290337659403, "learning_rate": 2.231954582319546e-06, "loss": 0.5491, "step": 30811 }, { "epoch": 0.8995941724329216, "grad_norm": 0.700571199413776, "learning_rate": 2.231305758313058e-06, "loss": 0.6329, "step": 30812 }, { "epoch": 0.899623368660769, "grad_norm": 0.636460278304518, "learning_rate": 2.2306569343065695e-06, "loss": 0.5634, "step": 30813 }, { "epoch": 0.8996525648886164, "grad_norm": 0.6454140787886959, "learning_rate": 2.230008110300081e-06, "loss": 0.5732, "step": 30814 }, { "epoch": 0.8996817611164638, "grad_norm": 0.692367664352042, "learning_rate": 2.229359286293593e-06, "loss": 0.6542, "step": 30815 }, { "epoch": 0.8997109573443112, "grad_norm": 0.658877681096207, "learning_rate": 2.2287104622871048e-06, "loss": 0.6044, "step": 30816 }, { "epoch": 0.8997401535721585, "grad_norm": 0.6357110244463845, "learning_rate": 2.228061638280617e-06, "loss": 0.5927, "step": 30817 }, { "epoch": 0.8997693498000059, "grad_norm": 0.6468213331258208, "learning_rate": 2.2274128142741284e-06, "loss": 0.5883, "step": 30818 }, { "epoch": 0.8997985460278533, "grad_norm": 0.6325916787154059, "learning_rate": 2.22676399026764e-06, "loss": 0.5504, "step": 30819 }, { "epoch": 0.8998277422557006, "grad_norm": 0.5970028553015364, "learning_rate": 2.226115166261152e-06, "loss": 0.4948, "step": 30820 }, { "epoch": 0.899856938483548, "grad_norm": 0.6035549603726066, "learning_rate": 2.2254663422546636e-06, "loss": 0.4981, "step": 30821 }, { "epoch": 0.8998861347113953, "grad_norm": 0.685848429884253, "learning_rate": 2.2248175182481752e-06, "loss": 0.6448, "step": 30822 }, { "epoch": 0.8999153309392427, "grad_norm": 0.6219333898415371, "learning_rate": 2.2241686942416873e-06, "loss": 0.6022, "step": 30823 }, { "epoch": 0.8999445271670901, "grad_norm": 0.6394595206804036, "learning_rate": 2.223519870235199e-06, "loss": 0.6013, "step": 30824 }, { "epoch": 0.8999737233949374, "grad_norm": 0.6798567562776681, "learning_rate": 2.2228710462287105e-06, "loss": 0.5968, "step": 30825 }, { "epoch": 0.9000029196227848, "grad_norm": 0.6596543284520165, "learning_rate": 2.222222222222222e-06, "loss": 0.6103, "step": 30826 }, { "epoch": 0.9000321158506321, "grad_norm": 0.681798585176096, "learning_rate": 2.221573398215734e-06, "loss": 0.6221, "step": 30827 }, { "epoch": 0.9000613120784795, "grad_norm": 0.621570223865912, "learning_rate": 2.2209245742092457e-06, "loss": 0.5232, "step": 30828 }, { "epoch": 0.9000905083063269, "grad_norm": 0.6484376458723673, "learning_rate": 2.2202757502027577e-06, "loss": 0.5876, "step": 30829 }, { "epoch": 0.9001197045341742, "grad_norm": 0.6557395141729316, "learning_rate": 2.2196269261962693e-06, "loss": 0.6448, "step": 30830 }, { "epoch": 0.9001489007620216, "grad_norm": 0.6701375954244729, "learning_rate": 2.2189781021897814e-06, "loss": 0.6186, "step": 30831 }, { "epoch": 0.900178096989869, "grad_norm": 0.6851245823146359, "learning_rate": 2.218329278183293e-06, "loss": 0.6619, "step": 30832 }, { "epoch": 0.9002072932177163, "grad_norm": 0.7394360680135611, "learning_rate": 2.2176804541768046e-06, "loss": 0.6482, "step": 30833 }, { "epoch": 0.9002364894455637, "grad_norm": 0.6574589355109605, "learning_rate": 2.2170316301703166e-06, "loss": 0.6012, "step": 30834 }, { "epoch": 0.900265685673411, "grad_norm": 0.6342881538975697, "learning_rate": 2.216382806163828e-06, "loss": 0.5481, "step": 30835 }, { "epoch": 0.9002948819012584, "grad_norm": 0.6372996736323688, "learning_rate": 2.2157339821573402e-06, "loss": 0.6046, "step": 30836 }, { "epoch": 0.9003240781291058, "grad_norm": 0.6658422664639089, "learning_rate": 2.215085158150852e-06, "loss": 0.6372, "step": 30837 }, { "epoch": 0.9003532743569531, "grad_norm": 0.6039535054591659, "learning_rate": 2.214436334144364e-06, "loss": 0.5518, "step": 30838 }, { "epoch": 0.9003824705848005, "grad_norm": 0.651658284273479, "learning_rate": 2.2137875101378755e-06, "loss": 0.6174, "step": 30839 }, { "epoch": 0.9004116668126478, "grad_norm": 0.6556601345957774, "learning_rate": 2.213138686131387e-06, "loss": 0.5615, "step": 30840 }, { "epoch": 0.9004408630404952, "grad_norm": 0.6342620529468992, "learning_rate": 2.2124898621248987e-06, "loss": 0.6145, "step": 30841 }, { "epoch": 0.9004700592683426, "grad_norm": 0.6723789641815625, "learning_rate": 2.2118410381184107e-06, "loss": 0.6271, "step": 30842 }, { "epoch": 0.9004992554961899, "grad_norm": 0.6524888651855054, "learning_rate": 2.2111922141119223e-06, "loss": 0.5954, "step": 30843 }, { "epoch": 0.9005284517240373, "grad_norm": 0.665671313188785, "learning_rate": 2.210543390105434e-06, "loss": 0.5788, "step": 30844 }, { "epoch": 0.9005576479518846, "grad_norm": 0.6356977508943995, "learning_rate": 2.2098945660989455e-06, "loss": 0.5483, "step": 30845 }, { "epoch": 0.900586844179732, "grad_norm": 0.6340463887213237, "learning_rate": 2.2092457420924575e-06, "loss": 0.578, "step": 30846 }, { "epoch": 0.9006160404075794, "grad_norm": 0.6477569576649768, "learning_rate": 2.208596918085969e-06, "loss": 0.6148, "step": 30847 }, { "epoch": 0.9006452366354267, "grad_norm": 0.5989079956935321, "learning_rate": 2.207948094079481e-06, "loss": 0.4846, "step": 30848 }, { "epoch": 0.9006744328632741, "grad_norm": 0.7225992319829061, "learning_rate": 2.2072992700729928e-06, "loss": 0.6802, "step": 30849 }, { "epoch": 0.9007036290911214, "grad_norm": 0.6114991488634485, "learning_rate": 2.206650446066505e-06, "loss": 0.5272, "step": 30850 }, { "epoch": 0.9007328253189688, "grad_norm": 0.6736436458931491, "learning_rate": 2.2060016220600164e-06, "loss": 0.6861, "step": 30851 }, { "epoch": 0.9007620215468162, "grad_norm": 0.6818489822331947, "learning_rate": 2.205352798053528e-06, "loss": 0.6111, "step": 30852 }, { "epoch": 0.9007912177746635, "grad_norm": 0.6820879184710905, "learning_rate": 2.20470397404704e-06, "loss": 0.6504, "step": 30853 }, { "epoch": 0.9008204140025109, "grad_norm": 0.611385752341415, "learning_rate": 2.2040551500405516e-06, "loss": 0.5583, "step": 30854 }, { "epoch": 0.9008496102303583, "grad_norm": 0.6152722191482712, "learning_rate": 2.2034063260340637e-06, "loss": 0.5544, "step": 30855 }, { "epoch": 0.9008788064582056, "grad_norm": 0.6410168262507651, "learning_rate": 2.2027575020275753e-06, "loss": 0.6065, "step": 30856 }, { "epoch": 0.900908002686053, "grad_norm": 0.6316468269889759, "learning_rate": 2.202108678021087e-06, "loss": 0.5905, "step": 30857 }, { "epoch": 0.9009371989139003, "grad_norm": 0.643458740386989, "learning_rate": 2.201459854014599e-06, "loss": 0.5923, "step": 30858 }, { "epoch": 0.9009663951417477, "grad_norm": 0.6547931099020788, "learning_rate": 2.2008110300081105e-06, "loss": 0.6067, "step": 30859 }, { "epoch": 0.9009955913695951, "grad_norm": 0.6419591933512695, "learning_rate": 2.200162206001622e-06, "loss": 0.618, "step": 30860 }, { "epoch": 0.9010247875974424, "grad_norm": 0.686194104155048, "learning_rate": 2.199513381995134e-06, "loss": 0.6309, "step": 30861 }, { "epoch": 0.9010539838252898, "grad_norm": 0.6003408572672969, "learning_rate": 2.1988645579886457e-06, "loss": 0.5115, "step": 30862 }, { "epoch": 0.9010831800531371, "grad_norm": 0.655441091364749, "learning_rate": 2.1982157339821573e-06, "loss": 0.6094, "step": 30863 }, { "epoch": 0.9011123762809845, "grad_norm": 0.6455786417361545, "learning_rate": 2.1975669099756694e-06, "loss": 0.6079, "step": 30864 }, { "epoch": 0.9011415725088319, "grad_norm": 0.637504060641918, "learning_rate": 2.196918085969181e-06, "loss": 0.6014, "step": 30865 }, { "epoch": 0.9011707687366792, "grad_norm": 0.6470131909024678, "learning_rate": 2.1962692619626926e-06, "loss": 0.6261, "step": 30866 }, { "epoch": 0.9011999649645266, "grad_norm": 0.6608077182494182, "learning_rate": 2.1956204379562046e-06, "loss": 0.6213, "step": 30867 }, { "epoch": 0.901229161192374, "grad_norm": 0.6580227665098135, "learning_rate": 2.194971613949716e-06, "loss": 0.6312, "step": 30868 }, { "epoch": 0.9012583574202213, "grad_norm": 0.6809333034360615, "learning_rate": 2.1943227899432282e-06, "loss": 0.6404, "step": 30869 }, { "epoch": 0.9012875536480687, "grad_norm": 0.6163827576890137, "learning_rate": 2.19367396593674e-06, "loss": 0.5501, "step": 30870 }, { "epoch": 0.901316749875916, "grad_norm": 0.6507020774052792, "learning_rate": 2.193025141930252e-06, "loss": 0.5864, "step": 30871 }, { "epoch": 0.9013459461037634, "grad_norm": 0.6154592627690404, "learning_rate": 2.1923763179237635e-06, "loss": 0.54, "step": 30872 }, { "epoch": 0.9013751423316108, "grad_norm": 0.6551944853010476, "learning_rate": 2.191727493917275e-06, "loss": 0.6562, "step": 30873 }, { "epoch": 0.9014043385594581, "grad_norm": 0.6878941627686991, "learning_rate": 2.191078669910787e-06, "loss": 0.5886, "step": 30874 }, { "epoch": 0.9014335347873055, "grad_norm": 0.6840884404472046, "learning_rate": 2.1904298459042987e-06, "loss": 0.6397, "step": 30875 }, { "epoch": 0.9014627310151528, "grad_norm": 0.6300294989271975, "learning_rate": 2.1897810218978103e-06, "loss": 0.5507, "step": 30876 }, { "epoch": 0.9014919272430002, "grad_norm": 0.627260309055208, "learning_rate": 2.1891321978913223e-06, "loss": 0.561, "step": 30877 }, { "epoch": 0.9015211234708476, "grad_norm": 0.5963146084836647, "learning_rate": 2.188483373884834e-06, "loss": 0.5306, "step": 30878 }, { "epoch": 0.9015503196986949, "grad_norm": 0.6031057006875817, "learning_rate": 2.1878345498783455e-06, "loss": 0.4991, "step": 30879 }, { "epoch": 0.9015795159265423, "grad_norm": 0.6424517587645326, "learning_rate": 2.187185725871857e-06, "loss": 0.5765, "step": 30880 }, { "epoch": 0.9016087121543896, "grad_norm": 0.5987259453613561, "learning_rate": 2.186536901865369e-06, "loss": 0.5587, "step": 30881 }, { "epoch": 0.901637908382237, "grad_norm": 0.6048924497980506, "learning_rate": 2.1858880778588808e-06, "loss": 0.5741, "step": 30882 }, { "epoch": 0.9016671046100844, "grad_norm": 0.6679043929430167, "learning_rate": 2.185239253852393e-06, "loss": 0.6262, "step": 30883 }, { "epoch": 0.9016963008379317, "grad_norm": 0.6132694131253283, "learning_rate": 2.1845904298459044e-06, "loss": 0.5553, "step": 30884 }, { "epoch": 0.9017254970657791, "grad_norm": 0.6812158357464745, "learning_rate": 2.183941605839416e-06, "loss": 0.6558, "step": 30885 }, { "epoch": 0.9017546932936265, "grad_norm": 0.6345380475302745, "learning_rate": 2.183292781832928e-06, "loss": 0.5618, "step": 30886 }, { "epoch": 0.9017838895214738, "grad_norm": 0.6477419740117344, "learning_rate": 2.1826439578264396e-06, "loss": 0.6215, "step": 30887 }, { "epoch": 0.9018130857493212, "grad_norm": 0.6329275646995769, "learning_rate": 2.1819951338199517e-06, "loss": 0.5686, "step": 30888 }, { "epoch": 0.9018422819771685, "grad_norm": 0.6749411005731404, "learning_rate": 2.1813463098134633e-06, "loss": 0.6327, "step": 30889 }, { "epoch": 0.9018714782050159, "grad_norm": 0.6650962797038548, "learning_rate": 2.1806974858069753e-06, "loss": 0.6317, "step": 30890 }, { "epoch": 0.9019006744328633, "grad_norm": 0.6792540494010582, "learning_rate": 2.180048661800487e-06, "loss": 0.6486, "step": 30891 }, { "epoch": 0.9019298706607106, "grad_norm": 0.6824575208613246, "learning_rate": 2.1793998377939985e-06, "loss": 0.6477, "step": 30892 }, { "epoch": 0.901959066888558, "grad_norm": 0.5869521010467686, "learning_rate": 2.1787510137875105e-06, "loss": 0.5095, "step": 30893 }, { "epoch": 0.9019882631164053, "grad_norm": 0.667680696709517, "learning_rate": 2.178102189781022e-06, "loss": 0.5994, "step": 30894 }, { "epoch": 0.9020174593442527, "grad_norm": 0.6601568966554986, "learning_rate": 2.1774533657745337e-06, "loss": 0.5946, "step": 30895 }, { "epoch": 0.9020466555721001, "grad_norm": 0.5935942428492976, "learning_rate": 2.1768045417680458e-06, "loss": 0.5329, "step": 30896 }, { "epoch": 0.9020758517999474, "grad_norm": 0.621731739709907, "learning_rate": 2.1761557177615574e-06, "loss": 0.5619, "step": 30897 }, { "epoch": 0.9021050480277948, "grad_norm": 0.5866576877383805, "learning_rate": 2.175506893755069e-06, "loss": 0.509, "step": 30898 }, { "epoch": 0.9021342442556421, "grad_norm": 0.6797798957629687, "learning_rate": 2.1748580697485806e-06, "loss": 0.6436, "step": 30899 }, { "epoch": 0.9021634404834895, "grad_norm": 0.6745874215028832, "learning_rate": 2.1742092457420926e-06, "loss": 0.6345, "step": 30900 }, { "epoch": 0.9021926367113369, "grad_norm": 0.6726277113875679, "learning_rate": 2.173560421735604e-06, "loss": 0.676, "step": 30901 }, { "epoch": 0.9022218329391842, "grad_norm": 0.6456076113572213, "learning_rate": 2.1729115977291162e-06, "loss": 0.6008, "step": 30902 }, { "epoch": 0.9022510291670316, "grad_norm": 0.6491159297959539, "learning_rate": 2.172262773722628e-06, "loss": 0.5677, "step": 30903 }, { "epoch": 0.902280225394879, "grad_norm": 0.6290255017021978, "learning_rate": 2.17161394971614e-06, "loss": 0.6063, "step": 30904 }, { "epoch": 0.9023094216227263, "grad_norm": 0.6126678351422546, "learning_rate": 2.1709651257096515e-06, "loss": 0.5484, "step": 30905 }, { "epoch": 0.9023386178505737, "grad_norm": 0.5861406714657869, "learning_rate": 2.170316301703163e-06, "loss": 0.4923, "step": 30906 }, { "epoch": 0.902367814078421, "grad_norm": 0.6732028724124989, "learning_rate": 2.169667477696675e-06, "loss": 0.6355, "step": 30907 }, { "epoch": 0.9023970103062684, "grad_norm": 0.6529037293112224, "learning_rate": 2.1690186536901867e-06, "loss": 0.6448, "step": 30908 }, { "epoch": 0.9024262065341158, "grad_norm": 0.6396185656100339, "learning_rate": 2.1683698296836987e-06, "loss": 0.6185, "step": 30909 }, { "epoch": 0.9024554027619631, "grad_norm": 0.6202273250844982, "learning_rate": 2.1677210056772103e-06, "loss": 0.5815, "step": 30910 }, { "epoch": 0.9024845989898105, "grad_norm": 0.6856938837354654, "learning_rate": 2.167072181670722e-06, "loss": 0.694, "step": 30911 }, { "epoch": 0.9025137952176578, "grad_norm": 0.6373957571165945, "learning_rate": 2.166423357664234e-06, "loss": 0.5519, "step": 30912 }, { "epoch": 0.9025429914455052, "grad_norm": 0.6149159299565096, "learning_rate": 2.1657745336577456e-06, "loss": 0.5453, "step": 30913 }, { "epoch": 0.9025721876733526, "grad_norm": 0.6475555572894286, "learning_rate": 2.165125709651257e-06, "loss": 0.589, "step": 30914 }, { "epoch": 0.9026013839011999, "grad_norm": 0.6194854490633735, "learning_rate": 2.1644768856447688e-06, "loss": 0.5564, "step": 30915 }, { "epoch": 0.9026305801290473, "grad_norm": 0.639001132151402, "learning_rate": 2.163828061638281e-06, "loss": 0.5729, "step": 30916 }, { "epoch": 0.9026597763568946, "grad_norm": 0.6271008439245185, "learning_rate": 2.1631792376317924e-06, "loss": 0.5839, "step": 30917 }, { "epoch": 0.902688972584742, "grad_norm": 0.6186247707950007, "learning_rate": 2.162530413625304e-06, "loss": 0.5601, "step": 30918 }, { "epoch": 0.9027181688125894, "grad_norm": 0.6269537381952454, "learning_rate": 2.161881589618816e-06, "loss": 0.5487, "step": 30919 }, { "epoch": 0.9027473650404367, "grad_norm": 0.6181950120145506, "learning_rate": 2.1612327656123276e-06, "loss": 0.5558, "step": 30920 }, { "epoch": 0.9027765612682841, "grad_norm": 0.700618016231065, "learning_rate": 2.1605839416058397e-06, "loss": 0.6891, "step": 30921 }, { "epoch": 0.9028057574961315, "grad_norm": 0.6431501502896274, "learning_rate": 2.1599351175993513e-06, "loss": 0.5289, "step": 30922 }, { "epoch": 0.9028349537239788, "grad_norm": 0.6349727724075399, "learning_rate": 2.1592862935928633e-06, "loss": 0.5435, "step": 30923 }, { "epoch": 0.9028641499518262, "grad_norm": 0.6374515276173386, "learning_rate": 2.158637469586375e-06, "loss": 0.5984, "step": 30924 }, { "epoch": 0.9028933461796735, "grad_norm": 0.7154328247261842, "learning_rate": 2.1579886455798865e-06, "loss": 0.6837, "step": 30925 }, { "epoch": 0.9029225424075209, "grad_norm": 0.6414735771347171, "learning_rate": 2.1573398215733985e-06, "loss": 0.6143, "step": 30926 }, { "epoch": 0.9029517386353683, "grad_norm": 0.6149813833133411, "learning_rate": 2.15669099756691e-06, "loss": 0.5046, "step": 30927 }, { "epoch": 0.9029809348632156, "grad_norm": 0.5863583262458689, "learning_rate": 2.156042173560422e-06, "loss": 0.5142, "step": 30928 }, { "epoch": 0.903010131091063, "grad_norm": 0.6425388937507632, "learning_rate": 2.1553933495539338e-06, "loss": 0.5803, "step": 30929 }, { "epoch": 0.9030393273189103, "grad_norm": 0.6030518732661478, "learning_rate": 2.1547445255474454e-06, "loss": 0.5173, "step": 30930 }, { "epoch": 0.9030685235467577, "grad_norm": 0.6711455741940856, "learning_rate": 2.1540957015409574e-06, "loss": 0.6913, "step": 30931 }, { "epoch": 0.9030977197746051, "grad_norm": 0.6372852498110928, "learning_rate": 2.153446877534469e-06, "loss": 0.5974, "step": 30932 }, { "epoch": 0.9031269160024524, "grad_norm": 0.60092368005148, "learning_rate": 2.1527980535279806e-06, "loss": 0.5504, "step": 30933 }, { "epoch": 0.9031561122302998, "grad_norm": 0.6442092069876352, "learning_rate": 2.1521492295214922e-06, "loss": 0.5692, "step": 30934 }, { "epoch": 0.9031853084581473, "grad_norm": 0.6450779216666214, "learning_rate": 2.1515004055150042e-06, "loss": 0.5553, "step": 30935 }, { "epoch": 0.9032145046859946, "grad_norm": 0.603429564747936, "learning_rate": 2.150851581508516e-06, "loss": 0.4993, "step": 30936 }, { "epoch": 0.903243700913842, "grad_norm": 0.6985809002617284, "learning_rate": 2.1502027575020274e-06, "loss": 0.6214, "step": 30937 }, { "epoch": 0.9032728971416893, "grad_norm": 0.6545811531276166, "learning_rate": 2.1495539334955395e-06, "loss": 0.5964, "step": 30938 }, { "epoch": 0.9033020933695367, "grad_norm": 0.641635903930925, "learning_rate": 2.148905109489051e-06, "loss": 0.5371, "step": 30939 }, { "epoch": 0.9033312895973841, "grad_norm": 0.6416281154847028, "learning_rate": 2.148256285482563e-06, "loss": 0.5672, "step": 30940 }, { "epoch": 0.9033604858252314, "grad_norm": 0.6414414665605395, "learning_rate": 2.1476074614760747e-06, "loss": 0.5711, "step": 30941 }, { "epoch": 0.9033896820530788, "grad_norm": 0.6676877936192993, "learning_rate": 2.1469586374695867e-06, "loss": 0.6244, "step": 30942 }, { "epoch": 0.9034188782809262, "grad_norm": 0.6725894171417683, "learning_rate": 2.1463098134630983e-06, "loss": 0.6158, "step": 30943 }, { "epoch": 0.9034480745087735, "grad_norm": 0.6136824527249517, "learning_rate": 2.14566098945661e-06, "loss": 0.5838, "step": 30944 }, { "epoch": 0.9034772707366209, "grad_norm": 0.6413342782206233, "learning_rate": 2.145012165450122e-06, "loss": 0.5848, "step": 30945 }, { "epoch": 0.9035064669644682, "grad_norm": 0.6044922723917083, "learning_rate": 2.1443633414436336e-06, "loss": 0.5393, "step": 30946 }, { "epoch": 0.9035356631923156, "grad_norm": 0.6317336479858249, "learning_rate": 2.1437145174371456e-06, "loss": 0.5611, "step": 30947 }, { "epoch": 0.903564859420163, "grad_norm": 0.6258139445348202, "learning_rate": 2.143065693430657e-06, "loss": 0.6042, "step": 30948 }, { "epoch": 0.9035940556480103, "grad_norm": 0.6427545529165448, "learning_rate": 2.142416869424169e-06, "loss": 0.5748, "step": 30949 }, { "epoch": 0.9036232518758577, "grad_norm": 0.6780074807490561, "learning_rate": 2.141768045417681e-06, "loss": 0.6645, "step": 30950 }, { "epoch": 0.903652448103705, "grad_norm": 0.6237531475233459, "learning_rate": 2.1411192214111924e-06, "loss": 0.5774, "step": 30951 }, { "epoch": 0.9036816443315524, "grad_norm": 0.6455906800869936, "learning_rate": 2.140470397404704e-06, "loss": 0.5338, "step": 30952 }, { "epoch": 0.9037108405593998, "grad_norm": 0.6822884798714383, "learning_rate": 2.1398215733982156e-06, "loss": 0.6805, "step": 30953 }, { "epoch": 0.9037400367872471, "grad_norm": 0.6317469333544808, "learning_rate": 2.1391727493917277e-06, "loss": 0.5956, "step": 30954 }, { "epoch": 0.9037692330150945, "grad_norm": 0.6237120670282909, "learning_rate": 2.1385239253852393e-06, "loss": 0.5974, "step": 30955 }, { "epoch": 0.9037984292429418, "grad_norm": 0.6230813120277996, "learning_rate": 2.1378751013787513e-06, "loss": 0.6111, "step": 30956 }, { "epoch": 0.9038276254707892, "grad_norm": 0.6337622466631976, "learning_rate": 2.137226277372263e-06, "loss": 0.5695, "step": 30957 }, { "epoch": 0.9038568216986366, "grad_norm": 0.6345970199796266, "learning_rate": 2.1365774533657745e-06, "loss": 0.5901, "step": 30958 }, { "epoch": 0.9038860179264839, "grad_norm": 0.6265613500604353, "learning_rate": 2.1359286293592865e-06, "loss": 0.566, "step": 30959 }, { "epoch": 0.9039152141543313, "grad_norm": 0.6676137525955385, "learning_rate": 2.135279805352798e-06, "loss": 0.6552, "step": 30960 }, { "epoch": 0.9039444103821787, "grad_norm": 0.6037157907774213, "learning_rate": 2.13463098134631e-06, "loss": 0.519, "step": 30961 }, { "epoch": 0.903973606610026, "grad_norm": 0.6947798403720075, "learning_rate": 2.1339821573398218e-06, "loss": 0.6508, "step": 30962 }, { "epoch": 0.9040028028378734, "grad_norm": 0.6457846590279018, "learning_rate": 2.133333333333334e-06, "loss": 0.5772, "step": 30963 }, { "epoch": 0.9040319990657207, "grad_norm": 0.6430985869510575, "learning_rate": 2.1326845093268454e-06, "loss": 0.5875, "step": 30964 }, { "epoch": 0.9040611952935681, "grad_norm": 0.6471554947525354, "learning_rate": 2.132035685320357e-06, "loss": 0.584, "step": 30965 }, { "epoch": 0.9040903915214155, "grad_norm": 0.6300966309439929, "learning_rate": 2.131386861313869e-06, "loss": 0.5075, "step": 30966 }, { "epoch": 0.9041195877492628, "grad_norm": 0.6935853091558744, "learning_rate": 2.1307380373073806e-06, "loss": 0.6579, "step": 30967 }, { "epoch": 0.9041487839771102, "grad_norm": 0.6145461989696891, "learning_rate": 2.1300892133008922e-06, "loss": 0.5933, "step": 30968 }, { "epoch": 0.9041779802049575, "grad_norm": 0.6544669607093906, "learning_rate": 2.129440389294404e-06, "loss": 0.6087, "step": 30969 }, { "epoch": 0.9042071764328049, "grad_norm": 0.6262872645071991, "learning_rate": 2.128791565287916e-06, "loss": 0.5526, "step": 30970 }, { "epoch": 0.9042363726606523, "grad_norm": 0.6123529554928395, "learning_rate": 2.1281427412814275e-06, "loss": 0.5337, "step": 30971 }, { "epoch": 0.9042655688884996, "grad_norm": 0.6611235907487808, "learning_rate": 2.127493917274939e-06, "loss": 0.632, "step": 30972 }, { "epoch": 0.904294765116347, "grad_norm": 0.6873982023118459, "learning_rate": 2.126845093268451e-06, "loss": 0.6668, "step": 30973 }, { "epoch": 0.9043239613441943, "grad_norm": 0.6436350469842794, "learning_rate": 2.1261962692619627e-06, "loss": 0.5977, "step": 30974 }, { "epoch": 0.9043531575720417, "grad_norm": 0.6825317625732327, "learning_rate": 2.1255474452554747e-06, "loss": 0.6705, "step": 30975 }, { "epoch": 0.9043823537998891, "grad_norm": 0.681068404368501, "learning_rate": 2.1248986212489863e-06, "loss": 0.577, "step": 30976 }, { "epoch": 0.9044115500277364, "grad_norm": 0.5981288723446655, "learning_rate": 2.124249797242498e-06, "loss": 0.5644, "step": 30977 }, { "epoch": 0.9044407462555838, "grad_norm": 0.6862577175591095, "learning_rate": 2.12360097323601e-06, "loss": 0.6242, "step": 30978 }, { "epoch": 0.9044699424834312, "grad_norm": 0.6828572729910161, "learning_rate": 2.1229521492295216e-06, "loss": 0.6656, "step": 30979 }, { "epoch": 0.9044991387112785, "grad_norm": 0.6798734569115836, "learning_rate": 2.1223033252230336e-06, "loss": 0.6641, "step": 30980 }, { "epoch": 0.9045283349391259, "grad_norm": 0.6845639918647655, "learning_rate": 2.121654501216545e-06, "loss": 0.639, "step": 30981 }, { "epoch": 0.9045575311669732, "grad_norm": 0.6352468399472173, "learning_rate": 2.1210056772100572e-06, "loss": 0.6088, "step": 30982 }, { "epoch": 0.9045867273948206, "grad_norm": 0.6478211903540647, "learning_rate": 2.120356853203569e-06, "loss": 0.5765, "step": 30983 }, { "epoch": 0.904615923622668, "grad_norm": 0.6182898363307827, "learning_rate": 2.1197080291970804e-06, "loss": 0.5532, "step": 30984 }, { "epoch": 0.9046451198505153, "grad_norm": 0.6831874388056374, "learning_rate": 2.1190592051905925e-06, "loss": 0.6286, "step": 30985 }, { "epoch": 0.9046743160783627, "grad_norm": 0.6599910086264494, "learning_rate": 2.118410381184104e-06, "loss": 0.5898, "step": 30986 }, { "epoch": 0.90470351230621, "grad_norm": 0.5941312971474917, "learning_rate": 2.1177615571776157e-06, "loss": 0.5398, "step": 30987 }, { "epoch": 0.9047327085340574, "grad_norm": 0.6649882054716298, "learning_rate": 2.1171127331711273e-06, "loss": 0.5968, "step": 30988 }, { "epoch": 0.9047619047619048, "grad_norm": 0.6382599363997357, "learning_rate": 2.1164639091646393e-06, "loss": 0.552, "step": 30989 }, { "epoch": 0.9047911009897521, "grad_norm": 0.6683493112080728, "learning_rate": 2.115815085158151e-06, "loss": 0.6555, "step": 30990 }, { "epoch": 0.9048202972175995, "grad_norm": 0.7164512230504635, "learning_rate": 2.1151662611516625e-06, "loss": 0.7015, "step": 30991 }, { "epoch": 0.9048494934454469, "grad_norm": 0.6446678523183196, "learning_rate": 2.1145174371451745e-06, "loss": 0.5504, "step": 30992 }, { "epoch": 0.9048786896732942, "grad_norm": 0.6485175597908525, "learning_rate": 2.113868613138686e-06, "loss": 0.5633, "step": 30993 }, { "epoch": 0.9049078859011416, "grad_norm": 0.5907273002468532, "learning_rate": 2.113219789132198e-06, "loss": 0.4819, "step": 30994 }, { "epoch": 0.9049370821289889, "grad_norm": 0.6730343348754214, "learning_rate": 2.1125709651257098e-06, "loss": 0.6026, "step": 30995 }, { "epoch": 0.9049662783568363, "grad_norm": 0.6764922226547969, "learning_rate": 2.1119221411192214e-06, "loss": 0.6031, "step": 30996 }, { "epoch": 0.9049954745846837, "grad_norm": 0.6683068726322439, "learning_rate": 2.1112733171127334e-06, "loss": 0.5936, "step": 30997 }, { "epoch": 0.905024670812531, "grad_norm": 0.6399312404504308, "learning_rate": 2.110624493106245e-06, "loss": 0.5844, "step": 30998 }, { "epoch": 0.9050538670403784, "grad_norm": 0.61789345081235, "learning_rate": 2.109975669099757e-06, "loss": 0.5498, "step": 30999 }, { "epoch": 0.9050830632682257, "grad_norm": 0.6759269608675201, "learning_rate": 2.1093268450932686e-06, "loss": 0.6382, "step": 31000 }, { "epoch": 0.9051122594960731, "grad_norm": 0.7231672111736698, "learning_rate": 2.1086780210867807e-06, "loss": 0.6233, "step": 31001 }, { "epoch": 0.9051414557239205, "grad_norm": 0.6570231065481128, "learning_rate": 2.1080291970802923e-06, "loss": 0.6295, "step": 31002 }, { "epoch": 0.9051706519517678, "grad_norm": 0.6522659222972208, "learning_rate": 2.107380373073804e-06, "loss": 0.5983, "step": 31003 }, { "epoch": 0.9051998481796152, "grad_norm": 0.6755784963083667, "learning_rate": 2.106731549067316e-06, "loss": 0.6403, "step": 31004 }, { "epoch": 0.9052290444074625, "grad_norm": 0.6242330809583074, "learning_rate": 2.1060827250608275e-06, "loss": 0.5999, "step": 31005 }, { "epoch": 0.9052582406353099, "grad_norm": 0.6472556611922206, "learning_rate": 2.105433901054339e-06, "loss": 0.6476, "step": 31006 }, { "epoch": 0.9052874368631573, "grad_norm": 0.6710663287396321, "learning_rate": 2.1047850770478507e-06, "loss": 0.5948, "step": 31007 }, { "epoch": 0.9053166330910046, "grad_norm": 0.6375453967462514, "learning_rate": 2.1041362530413627e-06, "loss": 0.5634, "step": 31008 }, { "epoch": 0.905345829318852, "grad_norm": 0.6208775309153989, "learning_rate": 2.1034874290348743e-06, "loss": 0.5854, "step": 31009 }, { "epoch": 0.9053750255466994, "grad_norm": 0.7191130866996404, "learning_rate": 2.102838605028386e-06, "loss": 0.6339, "step": 31010 }, { "epoch": 0.9054042217745467, "grad_norm": 0.6883771402352628, "learning_rate": 2.102189781021898e-06, "loss": 0.6031, "step": 31011 }, { "epoch": 0.9054334180023941, "grad_norm": 0.6916402776820049, "learning_rate": 2.1015409570154096e-06, "loss": 0.6765, "step": 31012 }, { "epoch": 0.9054626142302414, "grad_norm": 0.6260117031536543, "learning_rate": 2.1008921330089216e-06, "loss": 0.5625, "step": 31013 }, { "epoch": 0.9054918104580888, "grad_norm": 0.6932282153144098, "learning_rate": 2.1002433090024332e-06, "loss": 0.6697, "step": 31014 }, { "epoch": 0.9055210066859362, "grad_norm": 0.6793257199787641, "learning_rate": 2.0995944849959452e-06, "loss": 0.6918, "step": 31015 }, { "epoch": 0.9055502029137835, "grad_norm": 0.6306675209332214, "learning_rate": 2.098945660989457e-06, "loss": 0.5715, "step": 31016 }, { "epoch": 0.9055793991416309, "grad_norm": 0.6338863963813172, "learning_rate": 2.0982968369829684e-06, "loss": 0.5907, "step": 31017 }, { "epoch": 0.9056085953694782, "grad_norm": 0.6443789225739024, "learning_rate": 2.0976480129764805e-06, "loss": 0.5875, "step": 31018 }, { "epoch": 0.9056377915973256, "grad_norm": 0.6723838234708206, "learning_rate": 2.096999188969992e-06, "loss": 0.6138, "step": 31019 }, { "epoch": 0.905666987825173, "grad_norm": 0.6149682314790725, "learning_rate": 2.096350364963504e-06, "loss": 0.5067, "step": 31020 }, { "epoch": 0.9056961840530203, "grad_norm": 0.6646018797358754, "learning_rate": 2.0957015409570157e-06, "loss": 0.6108, "step": 31021 }, { "epoch": 0.9057253802808677, "grad_norm": 0.6853432629244689, "learning_rate": 2.0950527169505273e-06, "loss": 0.67, "step": 31022 }, { "epoch": 0.905754576508715, "grad_norm": 0.6733130152858758, "learning_rate": 2.094403892944039e-06, "loss": 0.6554, "step": 31023 }, { "epoch": 0.9057837727365624, "grad_norm": 0.5997597033873222, "learning_rate": 2.093755068937551e-06, "loss": 0.5609, "step": 31024 }, { "epoch": 0.9058129689644098, "grad_norm": 0.6888461484946587, "learning_rate": 2.0931062449310625e-06, "loss": 0.7287, "step": 31025 }, { "epoch": 0.9058421651922571, "grad_norm": 0.6073190712717135, "learning_rate": 2.092457420924574e-06, "loss": 0.5198, "step": 31026 }, { "epoch": 0.9058713614201045, "grad_norm": 0.5819209962932635, "learning_rate": 2.091808596918086e-06, "loss": 0.5259, "step": 31027 }, { "epoch": 0.9059005576479519, "grad_norm": 0.6415856240590364, "learning_rate": 2.0911597729115978e-06, "loss": 0.6006, "step": 31028 }, { "epoch": 0.9059297538757992, "grad_norm": 0.6766272486356306, "learning_rate": 2.0905109489051094e-06, "loss": 0.5964, "step": 31029 }, { "epoch": 0.9059589501036466, "grad_norm": 0.6639828386032867, "learning_rate": 2.0898621248986214e-06, "loss": 0.6631, "step": 31030 }, { "epoch": 0.9059881463314939, "grad_norm": 0.6616198608283961, "learning_rate": 2.089213300892133e-06, "loss": 0.6392, "step": 31031 }, { "epoch": 0.9060173425593413, "grad_norm": 0.6383886193039645, "learning_rate": 2.088564476885645e-06, "loss": 0.6395, "step": 31032 }, { "epoch": 0.9060465387871887, "grad_norm": 0.6315128753167196, "learning_rate": 2.0879156528791566e-06, "loss": 0.592, "step": 31033 }, { "epoch": 0.906075735015036, "grad_norm": 0.6440742590876727, "learning_rate": 2.0872668288726687e-06, "loss": 0.6305, "step": 31034 }, { "epoch": 0.9061049312428834, "grad_norm": 0.675426974804259, "learning_rate": 2.0866180048661803e-06, "loss": 0.6054, "step": 31035 }, { "epoch": 0.9061341274707307, "grad_norm": 0.6119080939131465, "learning_rate": 2.085969180859692e-06, "loss": 0.511, "step": 31036 }, { "epoch": 0.9061633236985781, "grad_norm": 0.6239696289837519, "learning_rate": 2.085320356853204e-06, "loss": 0.5916, "step": 31037 }, { "epoch": 0.9061925199264255, "grad_norm": 0.6228865976344995, "learning_rate": 2.0846715328467155e-06, "loss": 0.5427, "step": 31038 }, { "epoch": 0.9062217161542728, "grad_norm": 0.6467838222383538, "learning_rate": 2.0840227088402275e-06, "loss": 0.603, "step": 31039 }, { "epoch": 0.9062509123821202, "grad_norm": 0.6717471328451354, "learning_rate": 2.083373884833739e-06, "loss": 0.6559, "step": 31040 }, { "epoch": 0.9062801086099675, "grad_norm": 0.6510649351533577, "learning_rate": 2.0827250608272507e-06, "loss": 0.6292, "step": 31041 }, { "epoch": 0.9063093048378149, "grad_norm": 0.5910554327295156, "learning_rate": 2.0820762368207624e-06, "loss": 0.5455, "step": 31042 }, { "epoch": 0.9063385010656623, "grad_norm": 0.6461974630530237, "learning_rate": 2.0814274128142744e-06, "loss": 0.5584, "step": 31043 }, { "epoch": 0.9063676972935096, "grad_norm": 0.5903324208887056, "learning_rate": 2.080778588807786e-06, "loss": 0.5162, "step": 31044 }, { "epoch": 0.906396893521357, "grad_norm": 0.6059422833704198, "learning_rate": 2.0801297648012976e-06, "loss": 0.4983, "step": 31045 }, { "epoch": 0.9064260897492044, "grad_norm": 0.6251654462611989, "learning_rate": 2.0794809407948096e-06, "loss": 0.5755, "step": 31046 }, { "epoch": 0.9064552859770517, "grad_norm": 0.6755068314132329, "learning_rate": 2.0788321167883212e-06, "loss": 0.5956, "step": 31047 }, { "epoch": 0.9064844822048991, "grad_norm": 0.6067870789454454, "learning_rate": 2.0781832927818332e-06, "loss": 0.5435, "step": 31048 }, { "epoch": 0.9065136784327464, "grad_norm": 0.6690861125155912, "learning_rate": 2.077534468775345e-06, "loss": 0.6115, "step": 31049 }, { "epoch": 0.9065428746605938, "grad_norm": 0.634416554865175, "learning_rate": 2.0768856447688565e-06, "loss": 0.5729, "step": 31050 }, { "epoch": 0.9065720708884412, "grad_norm": 0.6239719263244708, "learning_rate": 2.0762368207623685e-06, "loss": 0.5902, "step": 31051 }, { "epoch": 0.9066012671162885, "grad_norm": 0.6356276543147251, "learning_rate": 2.07558799675588e-06, "loss": 0.5828, "step": 31052 }, { "epoch": 0.9066304633441359, "grad_norm": 0.6753777421201375, "learning_rate": 2.074939172749392e-06, "loss": 0.6444, "step": 31053 }, { "epoch": 0.9066596595719832, "grad_norm": 0.6036834736627349, "learning_rate": 2.0742903487429037e-06, "loss": 0.5214, "step": 31054 }, { "epoch": 0.9066888557998306, "grad_norm": 0.7813984535446487, "learning_rate": 2.0736415247364153e-06, "loss": 0.6705, "step": 31055 }, { "epoch": 0.9067180520276781, "grad_norm": 0.6678570988628391, "learning_rate": 2.0729927007299273e-06, "loss": 0.6116, "step": 31056 }, { "epoch": 0.9067472482555254, "grad_norm": 0.7140950215796775, "learning_rate": 2.072343876723439e-06, "loss": 0.6811, "step": 31057 }, { "epoch": 0.9067764444833728, "grad_norm": 0.6473897783186779, "learning_rate": 2.0716950527169506e-06, "loss": 0.5936, "step": 31058 }, { "epoch": 0.9068056407112202, "grad_norm": 0.5906149875837252, "learning_rate": 2.0710462287104626e-06, "loss": 0.5472, "step": 31059 }, { "epoch": 0.9068348369390675, "grad_norm": 0.6291999499670164, "learning_rate": 2.070397404703974e-06, "loss": 0.5601, "step": 31060 }, { "epoch": 0.9068640331669149, "grad_norm": 0.6381694492919767, "learning_rate": 2.0697485806974858e-06, "loss": 0.5324, "step": 31061 }, { "epoch": 0.9068932293947622, "grad_norm": 0.6368962895509523, "learning_rate": 2.069099756690998e-06, "loss": 0.5649, "step": 31062 }, { "epoch": 0.9069224256226096, "grad_norm": 0.6829059262153809, "learning_rate": 2.0684509326845094e-06, "loss": 0.6084, "step": 31063 }, { "epoch": 0.906951621850457, "grad_norm": 0.6441743603292772, "learning_rate": 2.067802108678021e-06, "loss": 0.5988, "step": 31064 }, { "epoch": 0.9069808180783043, "grad_norm": 0.6354451413068599, "learning_rate": 2.067153284671533e-06, "loss": 0.5973, "step": 31065 }, { "epoch": 0.9070100143061517, "grad_norm": 0.6676576511952205, "learning_rate": 2.0665044606650447e-06, "loss": 0.6348, "step": 31066 }, { "epoch": 0.907039210533999, "grad_norm": 0.6710943797475087, "learning_rate": 2.0658556366585567e-06, "loss": 0.6709, "step": 31067 }, { "epoch": 0.9070684067618464, "grad_norm": 0.6685557609902917, "learning_rate": 2.0652068126520683e-06, "loss": 0.5897, "step": 31068 }, { "epoch": 0.9070976029896938, "grad_norm": 0.6980800582737837, "learning_rate": 2.06455798864558e-06, "loss": 0.7097, "step": 31069 }, { "epoch": 0.9071267992175411, "grad_norm": 0.6531931736114569, "learning_rate": 2.063909164639092e-06, "loss": 0.5982, "step": 31070 }, { "epoch": 0.9071559954453885, "grad_norm": 0.6152721527343804, "learning_rate": 2.0632603406326035e-06, "loss": 0.553, "step": 31071 }, { "epoch": 0.9071851916732359, "grad_norm": 0.582231690520578, "learning_rate": 2.0626115166261155e-06, "loss": 0.489, "step": 31072 }, { "epoch": 0.9072143879010832, "grad_norm": 0.6433772241033806, "learning_rate": 2.061962692619627e-06, "loss": 0.5621, "step": 31073 }, { "epoch": 0.9072435841289306, "grad_norm": 0.6811058695874623, "learning_rate": 2.061313868613139e-06, "loss": 0.616, "step": 31074 }, { "epoch": 0.9072727803567779, "grad_norm": 0.596344113557787, "learning_rate": 2.0606650446066508e-06, "loss": 0.5204, "step": 31075 }, { "epoch": 0.9073019765846253, "grad_norm": 0.6286190441555102, "learning_rate": 2.0600162206001624e-06, "loss": 0.5752, "step": 31076 }, { "epoch": 0.9073311728124727, "grad_norm": 0.6350125312597069, "learning_rate": 2.059367396593674e-06, "loss": 0.549, "step": 31077 }, { "epoch": 0.90736036904032, "grad_norm": 0.6476133387479832, "learning_rate": 2.058718572587186e-06, "loss": 0.5768, "step": 31078 }, { "epoch": 0.9073895652681674, "grad_norm": 0.6070374963206268, "learning_rate": 2.0580697485806976e-06, "loss": 0.5088, "step": 31079 }, { "epoch": 0.9074187614960147, "grad_norm": 0.597369001256653, "learning_rate": 2.0574209245742092e-06, "loss": 0.5147, "step": 31080 }, { "epoch": 0.9074479577238621, "grad_norm": 0.6257356409753612, "learning_rate": 2.0567721005677212e-06, "loss": 0.5364, "step": 31081 }, { "epoch": 0.9074771539517095, "grad_norm": 0.6302801505303598, "learning_rate": 2.056123276561233e-06, "loss": 0.5894, "step": 31082 }, { "epoch": 0.9075063501795568, "grad_norm": 0.6568412002375407, "learning_rate": 2.0554744525547445e-06, "loss": 0.5753, "step": 31083 }, { "epoch": 0.9075355464074042, "grad_norm": 0.6281944429061603, "learning_rate": 2.0548256285482565e-06, "loss": 0.5978, "step": 31084 }, { "epoch": 0.9075647426352516, "grad_norm": 0.6498999381082059, "learning_rate": 2.054176804541768e-06, "loss": 0.5961, "step": 31085 }, { "epoch": 0.9075939388630989, "grad_norm": 0.6587883810401145, "learning_rate": 2.05352798053528e-06, "loss": 0.6335, "step": 31086 }, { "epoch": 0.9076231350909463, "grad_norm": 0.7314790702013082, "learning_rate": 2.0528791565287917e-06, "loss": 0.7048, "step": 31087 }, { "epoch": 0.9076523313187936, "grad_norm": 0.5754768142782014, "learning_rate": 2.0522303325223033e-06, "loss": 0.4338, "step": 31088 }, { "epoch": 0.907681527546641, "grad_norm": 0.6739644072137564, "learning_rate": 2.0515815085158153e-06, "loss": 0.591, "step": 31089 }, { "epoch": 0.9077107237744884, "grad_norm": 0.641309921337802, "learning_rate": 2.050932684509327e-06, "loss": 0.6083, "step": 31090 }, { "epoch": 0.9077399200023357, "grad_norm": 0.6789374067559862, "learning_rate": 2.050283860502839e-06, "loss": 0.5704, "step": 31091 }, { "epoch": 0.9077691162301831, "grad_norm": 0.6045968884354606, "learning_rate": 2.0496350364963506e-06, "loss": 0.5087, "step": 31092 }, { "epoch": 0.9077983124580304, "grad_norm": 0.6247433634702148, "learning_rate": 2.0489862124898626e-06, "loss": 0.5844, "step": 31093 }, { "epoch": 0.9078275086858778, "grad_norm": 0.652849185360806, "learning_rate": 2.0483373884833742e-06, "loss": 0.5424, "step": 31094 }, { "epoch": 0.9078567049137252, "grad_norm": 0.6768026663505913, "learning_rate": 2.047688564476886e-06, "loss": 0.6423, "step": 31095 }, { "epoch": 0.9078859011415725, "grad_norm": 0.6087971741256498, "learning_rate": 2.0470397404703974e-06, "loss": 0.5077, "step": 31096 }, { "epoch": 0.9079150973694199, "grad_norm": 0.6100372949493948, "learning_rate": 2.0463909164639094e-06, "loss": 0.5046, "step": 31097 }, { "epoch": 0.9079442935972672, "grad_norm": 0.6594865424553975, "learning_rate": 2.045742092457421e-06, "loss": 0.5424, "step": 31098 }, { "epoch": 0.9079734898251146, "grad_norm": 0.6385160504009201, "learning_rate": 2.0450932684509327e-06, "loss": 0.5765, "step": 31099 }, { "epoch": 0.908002686052962, "grad_norm": 0.6220005241398522, "learning_rate": 2.0444444444444447e-06, "loss": 0.5847, "step": 31100 }, { "epoch": 0.9080318822808093, "grad_norm": 0.6817204565692405, "learning_rate": 2.0437956204379563e-06, "loss": 0.6536, "step": 31101 }, { "epoch": 0.9080610785086567, "grad_norm": 0.6903483235406794, "learning_rate": 2.043146796431468e-06, "loss": 0.6461, "step": 31102 }, { "epoch": 0.908090274736504, "grad_norm": 0.6850366610580109, "learning_rate": 2.04249797242498e-06, "loss": 0.6446, "step": 31103 }, { "epoch": 0.9081194709643514, "grad_norm": 0.6484578272212256, "learning_rate": 2.0418491484184915e-06, "loss": 0.6318, "step": 31104 }, { "epoch": 0.9081486671921988, "grad_norm": 0.6787673433457834, "learning_rate": 2.0412003244120035e-06, "loss": 0.5965, "step": 31105 }, { "epoch": 0.9081778634200461, "grad_norm": 0.6181448641359656, "learning_rate": 2.040551500405515e-06, "loss": 0.5706, "step": 31106 }, { "epoch": 0.9082070596478935, "grad_norm": 0.6554379511938205, "learning_rate": 2.039902676399027e-06, "loss": 0.5886, "step": 31107 }, { "epoch": 0.9082362558757409, "grad_norm": 0.6414106990177459, "learning_rate": 2.0392538523925388e-06, "loss": 0.6141, "step": 31108 }, { "epoch": 0.9082654521035882, "grad_norm": 0.6674370572932136, "learning_rate": 2.0386050283860504e-06, "loss": 0.6388, "step": 31109 }, { "epoch": 0.9082946483314356, "grad_norm": 0.6581577703587868, "learning_rate": 2.0379562043795624e-06, "loss": 0.6022, "step": 31110 }, { "epoch": 0.908323844559283, "grad_norm": 0.6359605548519623, "learning_rate": 2.037307380373074e-06, "loss": 0.5661, "step": 31111 }, { "epoch": 0.9083530407871303, "grad_norm": 0.6141806990141815, "learning_rate": 2.0366585563665856e-06, "loss": 0.5454, "step": 31112 }, { "epoch": 0.9083822370149777, "grad_norm": 0.688919702788035, "learning_rate": 2.0360097323600976e-06, "loss": 0.588, "step": 31113 }, { "epoch": 0.908411433242825, "grad_norm": 0.7040611406051529, "learning_rate": 2.0353609083536093e-06, "loss": 0.7168, "step": 31114 }, { "epoch": 0.9084406294706724, "grad_norm": 0.6575184694067676, "learning_rate": 2.034712084347121e-06, "loss": 0.6354, "step": 31115 }, { "epoch": 0.9084698256985198, "grad_norm": 0.627789490848782, "learning_rate": 2.034063260340633e-06, "loss": 0.5988, "step": 31116 }, { "epoch": 0.9084990219263671, "grad_norm": 0.6635852987386689, "learning_rate": 2.0334144363341445e-06, "loss": 0.5992, "step": 31117 }, { "epoch": 0.9085282181542145, "grad_norm": 0.632392806179203, "learning_rate": 2.032765612327656e-06, "loss": 0.5966, "step": 31118 }, { "epoch": 0.9085574143820618, "grad_norm": 0.6093062654604706, "learning_rate": 2.032116788321168e-06, "loss": 0.531, "step": 31119 }, { "epoch": 0.9085866106099092, "grad_norm": 0.6463250152121653, "learning_rate": 2.0314679643146797e-06, "loss": 0.5861, "step": 31120 }, { "epoch": 0.9086158068377566, "grad_norm": 0.5885921340559255, "learning_rate": 2.0308191403081913e-06, "loss": 0.5008, "step": 31121 }, { "epoch": 0.9086450030656039, "grad_norm": 0.6505752992306963, "learning_rate": 2.0301703163017034e-06, "loss": 0.5614, "step": 31122 }, { "epoch": 0.9086741992934513, "grad_norm": 0.5833333689191229, "learning_rate": 2.029521492295215e-06, "loss": 0.5189, "step": 31123 }, { "epoch": 0.9087033955212986, "grad_norm": 0.6601177117699972, "learning_rate": 2.028872668288727e-06, "loss": 0.5914, "step": 31124 }, { "epoch": 0.908732591749146, "grad_norm": 0.6841479425398915, "learning_rate": 2.0282238442822386e-06, "loss": 0.592, "step": 31125 }, { "epoch": 0.9087617879769934, "grad_norm": 0.6028664490618683, "learning_rate": 2.0275750202757506e-06, "loss": 0.5781, "step": 31126 }, { "epoch": 0.9087909842048407, "grad_norm": 0.6770441711354263, "learning_rate": 2.0269261962692622e-06, "loss": 0.601, "step": 31127 }, { "epoch": 0.9088201804326881, "grad_norm": 0.6887751354322962, "learning_rate": 2.026277372262774e-06, "loss": 0.6254, "step": 31128 }, { "epoch": 0.9088493766605354, "grad_norm": 0.6018349843418702, "learning_rate": 2.025628548256286e-06, "loss": 0.5162, "step": 31129 }, { "epoch": 0.9088785728883828, "grad_norm": 0.6383461955258459, "learning_rate": 2.0249797242497975e-06, "loss": 0.5775, "step": 31130 }, { "epoch": 0.9089077691162302, "grad_norm": 0.6621371235789244, "learning_rate": 2.024330900243309e-06, "loss": 0.6329, "step": 31131 }, { "epoch": 0.9089369653440775, "grad_norm": 0.6178204768468425, "learning_rate": 2.023682076236821e-06, "loss": 0.5488, "step": 31132 }, { "epoch": 0.9089661615719249, "grad_norm": 0.6179712622334197, "learning_rate": 2.0230332522303327e-06, "loss": 0.5388, "step": 31133 }, { "epoch": 0.9089953577997723, "grad_norm": 0.6237600212403641, "learning_rate": 2.0223844282238443e-06, "loss": 0.5633, "step": 31134 }, { "epoch": 0.9090245540276196, "grad_norm": 0.6330841266350413, "learning_rate": 2.021735604217356e-06, "loss": 0.5385, "step": 31135 }, { "epoch": 0.909053750255467, "grad_norm": 0.6478535579879477, "learning_rate": 2.021086780210868e-06, "loss": 0.5864, "step": 31136 }, { "epoch": 0.9090829464833143, "grad_norm": 0.6736156314515318, "learning_rate": 2.0204379562043795e-06, "loss": 0.6646, "step": 31137 }, { "epoch": 0.9091121427111617, "grad_norm": 0.627488635614382, "learning_rate": 2.0197891321978916e-06, "loss": 0.5821, "step": 31138 }, { "epoch": 0.9091413389390091, "grad_norm": 0.6687405716026881, "learning_rate": 2.019140308191403e-06, "loss": 0.6932, "step": 31139 }, { "epoch": 0.9091705351668564, "grad_norm": 0.6146365017638605, "learning_rate": 2.018491484184915e-06, "loss": 0.5479, "step": 31140 }, { "epoch": 0.9091997313947038, "grad_norm": 0.6466304223991979, "learning_rate": 2.0178426601784268e-06, "loss": 0.644, "step": 31141 }, { "epoch": 0.9092289276225511, "grad_norm": 0.6666670058892068, "learning_rate": 2.0171938361719384e-06, "loss": 0.651, "step": 31142 }, { "epoch": 0.9092581238503985, "grad_norm": 0.6790568094803923, "learning_rate": 2.0165450121654504e-06, "loss": 0.6269, "step": 31143 }, { "epoch": 0.9092873200782459, "grad_norm": 0.6739931233013272, "learning_rate": 2.015896188158962e-06, "loss": 0.6067, "step": 31144 }, { "epoch": 0.9093165163060932, "grad_norm": 0.6112324659174383, "learning_rate": 2.015247364152474e-06, "loss": 0.5312, "step": 31145 }, { "epoch": 0.9093457125339406, "grad_norm": 0.6432991692369541, "learning_rate": 2.0145985401459857e-06, "loss": 0.572, "step": 31146 }, { "epoch": 0.909374908761788, "grad_norm": 0.6405227472640789, "learning_rate": 2.0139497161394973e-06, "loss": 0.526, "step": 31147 }, { "epoch": 0.9094041049896353, "grad_norm": 0.6508763934023322, "learning_rate": 2.0133008921330093e-06, "loss": 0.602, "step": 31148 }, { "epoch": 0.9094333012174827, "grad_norm": 0.6569968679962913, "learning_rate": 2.012652068126521e-06, "loss": 0.6417, "step": 31149 }, { "epoch": 0.90946249744533, "grad_norm": 0.6383926554112227, "learning_rate": 2.0120032441200325e-06, "loss": 0.5907, "step": 31150 }, { "epoch": 0.9094916936731774, "grad_norm": 0.5966137573090137, "learning_rate": 2.0113544201135445e-06, "loss": 0.4829, "step": 31151 }, { "epoch": 0.9095208899010248, "grad_norm": 0.6824668706196244, "learning_rate": 2.010705596107056e-06, "loss": 0.6554, "step": 31152 }, { "epoch": 0.9095500861288721, "grad_norm": 0.6599353941472124, "learning_rate": 2.0100567721005677e-06, "loss": 0.6383, "step": 31153 }, { "epoch": 0.9095792823567195, "grad_norm": 0.6012977913269215, "learning_rate": 2.0094079480940793e-06, "loss": 0.5157, "step": 31154 }, { "epoch": 0.9096084785845668, "grad_norm": 0.6204301374120441, "learning_rate": 2.0087591240875914e-06, "loss": 0.5587, "step": 31155 }, { "epoch": 0.9096376748124142, "grad_norm": 0.6187116268618588, "learning_rate": 2.008110300081103e-06, "loss": 0.5583, "step": 31156 }, { "epoch": 0.9096668710402616, "grad_norm": 0.6971827241204561, "learning_rate": 2.007461476074615e-06, "loss": 0.6706, "step": 31157 }, { "epoch": 0.9096960672681089, "grad_norm": 0.6667581194200759, "learning_rate": 2.0068126520681266e-06, "loss": 0.6247, "step": 31158 }, { "epoch": 0.9097252634959563, "grad_norm": 0.654282506592879, "learning_rate": 2.0061638280616386e-06, "loss": 0.5986, "step": 31159 }, { "epoch": 0.9097544597238036, "grad_norm": 0.6672162904932224, "learning_rate": 2.0055150040551502e-06, "loss": 0.6158, "step": 31160 }, { "epoch": 0.909783655951651, "grad_norm": 0.5986864351227267, "learning_rate": 2.004866180048662e-06, "loss": 0.5408, "step": 31161 }, { "epoch": 0.9098128521794984, "grad_norm": 0.6786451163941408, "learning_rate": 2.004217356042174e-06, "loss": 0.6822, "step": 31162 }, { "epoch": 0.9098420484073457, "grad_norm": 0.757998588421431, "learning_rate": 2.0035685320356855e-06, "loss": 0.749, "step": 31163 }, { "epoch": 0.9098712446351931, "grad_norm": 0.6039982982849291, "learning_rate": 2.0029197080291975e-06, "loss": 0.5357, "step": 31164 }, { "epoch": 0.9099004408630404, "grad_norm": 0.5980541266241781, "learning_rate": 2.002270884022709e-06, "loss": 0.5165, "step": 31165 }, { "epoch": 0.9099296370908878, "grad_norm": 0.6340719152270774, "learning_rate": 2.0016220600162207e-06, "loss": 0.5593, "step": 31166 }, { "epoch": 0.9099588333187352, "grad_norm": 0.6914890821927664, "learning_rate": 2.0009732360097327e-06, "loss": 0.6267, "step": 31167 }, { "epoch": 0.9099880295465825, "grad_norm": 0.6747855647128919, "learning_rate": 2.0003244120032443e-06, "loss": 0.6398, "step": 31168 }, { "epoch": 0.9100172257744299, "grad_norm": 0.6569650862042725, "learning_rate": 1.999675587996756e-06, "loss": 0.5865, "step": 31169 }, { "epoch": 0.9100464220022773, "grad_norm": 0.6598740927028635, "learning_rate": 1.9990267639902675e-06, "loss": 0.6308, "step": 31170 }, { "epoch": 0.9100756182301246, "grad_norm": 0.6205376509650888, "learning_rate": 1.9983779399837796e-06, "loss": 0.5369, "step": 31171 }, { "epoch": 0.910104814457972, "grad_norm": 0.6678542048167286, "learning_rate": 1.997729115977291e-06, "loss": 0.6351, "step": 31172 }, { "epoch": 0.9101340106858193, "grad_norm": 0.6853775240604919, "learning_rate": 1.997080291970803e-06, "loss": 0.5819, "step": 31173 }, { "epoch": 0.9101632069136667, "grad_norm": 0.6400718908343878, "learning_rate": 1.996431467964315e-06, "loss": 0.6063, "step": 31174 }, { "epoch": 0.9101924031415141, "grad_norm": 0.6942466315510194, "learning_rate": 1.9957826439578264e-06, "loss": 0.6317, "step": 31175 }, { "epoch": 0.9102215993693615, "grad_norm": 0.6425375210600512, "learning_rate": 1.9951338199513384e-06, "loss": 0.6058, "step": 31176 }, { "epoch": 0.9102507955972089, "grad_norm": 0.6562586748644258, "learning_rate": 1.99448499594485e-06, "loss": 0.6106, "step": 31177 }, { "epoch": 0.9102799918250563, "grad_norm": 0.6630554299109509, "learning_rate": 1.993836171938362e-06, "loss": 0.6546, "step": 31178 }, { "epoch": 0.9103091880529036, "grad_norm": 0.651614680174617, "learning_rate": 1.9931873479318737e-06, "loss": 0.6018, "step": 31179 }, { "epoch": 0.910338384280751, "grad_norm": 0.6384515165279349, "learning_rate": 1.9925385239253853e-06, "loss": 0.582, "step": 31180 }, { "epoch": 0.9103675805085983, "grad_norm": 0.6425284819555579, "learning_rate": 1.9918896999188973e-06, "loss": 0.5919, "step": 31181 }, { "epoch": 0.9103967767364457, "grad_norm": 0.6266314456908172, "learning_rate": 1.991240875912409e-06, "loss": 0.5242, "step": 31182 }, { "epoch": 0.9104259729642931, "grad_norm": 0.6284325142131905, "learning_rate": 1.990592051905921e-06, "loss": 0.5693, "step": 31183 }, { "epoch": 0.9104551691921404, "grad_norm": 0.6242893490379274, "learning_rate": 1.9899432278994325e-06, "loss": 0.5478, "step": 31184 }, { "epoch": 0.9104843654199878, "grad_norm": 0.6332897572542797, "learning_rate": 1.989294403892944e-06, "loss": 0.5985, "step": 31185 }, { "epoch": 0.9105135616478351, "grad_norm": 0.681510001393117, "learning_rate": 1.988645579886456e-06, "loss": 0.6332, "step": 31186 }, { "epoch": 0.9105427578756825, "grad_norm": 0.6569201838466977, "learning_rate": 1.9879967558799678e-06, "loss": 0.5917, "step": 31187 }, { "epoch": 0.9105719541035299, "grad_norm": 0.6322745346732754, "learning_rate": 1.9873479318734794e-06, "loss": 0.603, "step": 31188 }, { "epoch": 0.9106011503313772, "grad_norm": 0.6661494614864663, "learning_rate": 1.986699107866991e-06, "loss": 0.6523, "step": 31189 }, { "epoch": 0.9106303465592246, "grad_norm": 0.6369144107273059, "learning_rate": 1.986050283860503e-06, "loss": 0.5782, "step": 31190 }, { "epoch": 0.910659542787072, "grad_norm": 0.5789512707250158, "learning_rate": 1.9854014598540146e-06, "loss": 0.479, "step": 31191 }, { "epoch": 0.9106887390149193, "grad_norm": 0.7086840994924962, "learning_rate": 1.9847526358475266e-06, "loss": 0.664, "step": 31192 }, { "epoch": 0.9107179352427667, "grad_norm": 0.634766840608346, "learning_rate": 1.9841038118410382e-06, "loss": 0.6181, "step": 31193 }, { "epoch": 0.910747131470614, "grad_norm": 0.6435832724665422, "learning_rate": 1.98345498783455e-06, "loss": 0.5896, "step": 31194 }, { "epoch": 0.9107763276984614, "grad_norm": 0.6237392971226092, "learning_rate": 1.982806163828062e-06, "loss": 0.5717, "step": 31195 }, { "epoch": 0.9108055239263088, "grad_norm": 0.7039124995472424, "learning_rate": 1.9821573398215735e-06, "loss": 0.6949, "step": 31196 }, { "epoch": 0.9108347201541561, "grad_norm": 0.6336209403510329, "learning_rate": 1.9815085158150855e-06, "loss": 0.5724, "step": 31197 }, { "epoch": 0.9108639163820035, "grad_norm": 0.6685714938952906, "learning_rate": 1.980859691808597e-06, "loss": 0.5943, "step": 31198 }, { "epoch": 0.9108931126098508, "grad_norm": 0.6205563790607702, "learning_rate": 1.980210867802109e-06, "loss": 0.5894, "step": 31199 }, { "epoch": 0.9109223088376982, "grad_norm": 0.659904769810871, "learning_rate": 1.9795620437956207e-06, "loss": 0.5736, "step": 31200 }, { "epoch": 0.9109515050655456, "grad_norm": 0.6611113883069085, "learning_rate": 1.9789132197891323e-06, "loss": 0.628, "step": 31201 }, { "epoch": 0.9109807012933929, "grad_norm": 0.7034058514090648, "learning_rate": 1.9782643957826444e-06, "loss": 0.6561, "step": 31202 }, { "epoch": 0.9110098975212403, "grad_norm": 0.703318294884894, "learning_rate": 1.977615571776156e-06, "loss": 0.678, "step": 31203 }, { "epoch": 0.9110390937490876, "grad_norm": 0.6377850018302738, "learning_rate": 1.9769667477696676e-06, "loss": 0.6295, "step": 31204 }, { "epoch": 0.911068289976935, "grad_norm": 0.6534662993742394, "learning_rate": 1.9763179237631796e-06, "loss": 0.5772, "step": 31205 }, { "epoch": 0.9110974862047824, "grad_norm": 0.6131492396870287, "learning_rate": 1.975669099756691e-06, "loss": 0.5141, "step": 31206 }, { "epoch": 0.9111266824326297, "grad_norm": 0.618862172295814, "learning_rate": 1.975020275750203e-06, "loss": 0.529, "step": 31207 }, { "epoch": 0.9111558786604771, "grad_norm": 0.721261957004652, "learning_rate": 1.9743714517437144e-06, "loss": 0.6714, "step": 31208 }, { "epoch": 0.9111850748883245, "grad_norm": 0.6278247502823717, "learning_rate": 1.9737226277372264e-06, "loss": 0.5679, "step": 31209 }, { "epoch": 0.9112142711161718, "grad_norm": 0.609606610689535, "learning_rate": 1.973073803730738e-06, "loss": 0.5486, "step": 31210 }, { "epoch": 0.9112434673440192, "grad_norm": 0.6563969191915556, "learning_rate": 1.97242497972425e-06, "loss": 0.6138, "step": 31211 }, { "epoch": 0.9112726635718665, "grad_norm": 0.6887715073732333, "learning_rate": 1.9717761557177617e-06, "loss": 0.6552, "step": 31212 }, { "epoch": 0.9113018597997139, "grad_norm": 0.6766623189530843, "learning_rate": 1.9711273317112733e-06, "loss": 0.6152, "step": 31213 }, { "epoch": 0.9113310560275613, "grad_norm": 0.684659176652926, "learning_rate": 1.9704785077047853e-06, "loss": 0.6541, "step": 31214 }, { "epoch": 0.9113602522554086, "grad_norm": 0.6663750209949254, "learning_rate": 1.969829683698297e-06, "loss": 0.5843, "step": 31215 }, { "epoch": 0.911389448483256, "grad_norm": 0.7028295518966488, "learning_rate": 1.969180859691809e-06, "loss": 0.673, "step": 31216 }, { "epoch": 0.9114186447111033, "grad_norm": 0.6291929214960413, "learning_rate": 1.9685320356853205e-06, "loss": 0.564, "step": 31217 }, { "epoch": 0.9114478409389507, "grad_norm": 0.7255615797408591, "learning_rate": 1.9678832116788326e-06, "loss": 0.7087, "step": 31218 }, { "epoch": 0.9114770371667981, "grad_norm": 0.615187519356941, "learning_rate": 1.967234387672344e-06, "loss": 0.5502, "step": 31219 }, { "epoch": 0.9115062333946454, "grad_norm": 0.6347897033510379, "learning_rate": 1.9665855636658558e-06, "loss": 0.554, "step": 31220 }, { "epoch": 0.9115354296224928, "grad_norm": 0.6384212232881231, "learning_rate": 1.9659367396593678e-06, "loss": 0.5457, "step": 31221 }, { "epoch": 0.9115646258503401, "grad_norm": 0.5972273272284383, "learning_rate": 1.9652879156528794e-06, "loss": 0.5543, "step": 31222 }, { "epoch": 0.9115938220781875, "grad_norm": 0.6522416553102844, "learning_rate": 1.964639091646391e-06, "loss": 0.6387, "step": 31223 }, { "epoch": 0.9116230183060349, "grad_norm": 0.6370469555088387, "learning_rate": 1.9639902676399026e-06, "loss": 0.6033, "step": 31224 }, { "epoch": 0.9116522145338822, "grad_norm": 0.6662304105959923, "learning_rate": 1.9633414436334146e-06, "loss": 0.6039, "step": 31225 }, { "epoch": 0.9116814107617296, "grad_norm": 0.6979583197568057, "learning_rate": 1.9626926196269262e-06, "loss": 0.6629, "step": 31226 }, { "epoch": 0.911710606989577, "grad_norm": 0.6217057101529619, "learning_rate": 1.962043795620438e-06, "loss": 0.5272, "step": 31227 }, { "epoch": 0.9117398032174243, "grad_norm": 0.6582877449762033, "learning_rate": 1.96139497161395e-06, "loss": 0.626, "step": 31228 }, { "epoch": 0.9117689994452717, "grad_norm": 0.5954406721936152, "learning_rate": 1.9607461476074615e-06, "loss": 0.5249, "step": 31229 }, { "epoch": 0.911798195673119, "grad_norm": 0.60338355542383, "learning_rate": 1.9600973236009735e-06, "loss": 0.4972, "step": 31230 }, { "epoch": 0.9118273919009664, "grad_norm": 0.6588670615989004, "learning_rate": 1.959448499594485e-06, "loss": 0.6208, "step": 31231 }, { "epoch": 0.9118565881288138, "grad_norm": 0.6206891738497552, "learning_rate": 1.958799675587997e-06, "loss": 0.5589, "step": 31232 }, { "epoch": 0.9118857843566611, "grad_norm": 0.6353898635277567, "learning_rate": 1.9581508515815087e-06, "loss": 0.5629, "step": 31233 }, { "epoch": 0.9119149805845085, "grad_norm": 0.6579115959351004, "learning_rate": 1.9575020275750203e-06, "loss": 0.5911, "step": 31234 }, { "epoch": 0.9119441768123558, "grad_norm": 0.6223376891742209, "learning_rate": 1.9568532035685324e-06, "loss": 0.5301, "step": 31235 }, { "epoch": 0.9119733730402032, "grad_norm": 0.6180248640413508, "learning_rate": 1.956204379562044e-06, "loss": 0.5559, "step": 31236 }, { "epoch": 0.9120025692680506, "grad_norm": 0.8470061194575836, "learning_rate": 1.955555555555556e-06, "loss": 0.6631, "step": 31237 }, { "epoch": 0.9120317654958979, "grad_norm": 0.6846754504783569, "learning_rate": 1.9549067315490676e-06, "loss": 0.6256, "step": 31238 }, { "epoch": 0.9120609617237453, "grad_norm": 0.6375277447024926, "learning_rate": 1.954257907542579e-06, "loss": 0.6006, "step": 31239 }, { "epoch": 0.9120901579515927, "grad_norm": 0.6655040972350511, "learning_rate": 1.9536090835360912e-06, "loss": 0.5976, "step": 31240 }, { "epoch": 0.91211935417944, "grad_norm": 0.6401852908294497, "learning_rate": 1.952960259529603e-06, "loss": 0.6205, "step": 31241 }, { "epoch": 0.9121485504072874, "grad_norm": 0.6224160048637262, "learning_rate": 1.9523114355231144e-06, "loss": 0.5427, "step": 31242 }, { "epoch": 0.9121777466351347, "grad_norm": 0.6942995660462693, "learning_rate": 1.951662611516626e-06, "loss": 0.691, "step": 31243 }, { "epoch": 0.9122069428629821, "grad_norm": 0.6352389055848988, "learning_rate": 1.951013787510138e-06, "loss": 0.5142, "step": 31244 }, { "epoch": 0.9122361390908295, "grad_norm": 0.5864160949609984, "learning_rate": 1.9503649635036497e-06, "loss": 0.4808, "step": 31245 }, { "epoch": 0.9122653353186768, "grad_norm": 0.6172625277747069, "learning_rate": 1.9497161394971613e-06, "loss": 0.577, "step": 31246 }, { "epoch": 0.9122945315465242, "grad_norm": 0.6600807680404359, "learning_rate": 1.9490673154906733e-06, "loss": 0.6059, "step": 31247 }, { "epoch": 0.9123237277743715, "grad_norm": 0.619700861309764, "learning_rate": 1.948418491484185e-06, "loss": 0.5656, "step": 31248 }, { "epoch": 0.9123529240022189, "grad_norm": 0.656715728655642, "learning_rate": 1.947769667477697e-06, "loss": 0.6048, "step": 31249 }, { "epoch": 0.9123821202300663, "grad_norm": 0.6635575117623423, "learning_rate": 1.9471208434712085e-06, "loss": 0.5782, "step": 31250 }, { "epoch": 0.9124113164579136, "grad_norm": 0.6840841645655548, "learning_rate": 1.9464720194647206e-06, "loss": 0.6411, "step": 31251 }, { "epoch": 0.912440512685761, "grad_norm": 0.6426558822895478, "learning_rate": 1.945823195458232e-06, "loss": 0.5765, "step": 31252 }, { "epoch": 0.9124697089136083, "grad_norm": 0.6934646838021868, "learning_rate": 1.9451743714517438e-06, "loss": 0.6408, "step": 31253 }, { "epoch": 0.9124989051414557, "grad_norm": 0.6632977221863288, "learning_rate": 1.944525547445256e-06, "loss": 0.6227, "step": 31254 }, { "epoch": 0.9125281013693031, "grad_norm": 0.6446105896346038, "learning_rate": 1.9438767234387674e-06, "loss": 0.5692, "step": 31255 }, { "epoch": 0.9125572975971504, "grad_norm": 0.663040191718174, "learning_rate": 1.9432278994322794e-06, "loss": 0.6523, "step": 31256 }, { "epoch": 0.9125864938249978, "grad_norm": 0.6553364340181023, "learning_rate": 1.942579075425791e-06, "loss": 0.6067, "step": 31257 }, { "epoch": 0.9126156900528452, "grad_norm": 0.6617707640962446, "learning_rate": 1.9419302514193026e-06, "loss": 0.5874, "step": 31258 }, { "epoch": 0.9126448862806925, "grad_norm": 0.6730015212815654, "learning_rate": 1.9412814274128147e-06, "loss": 0.6047, "step": 31259 }, { "epoch": 0.9126740825085399, "grad_norm": 0.5691724891946504, "learning_rate": 1.9406326034063263e-06, "loss": 0.4786, "step": 31260 }, { "epoch": 0.9127032787363872, "grad_norm": 0.6300028217805795, "learning_rate": 1.939983779399838e-06, "loss": 0.5738, "step": 31261 }, { "epoch": 0.9127324749642346, "grad_norm": 0.6820942605325809, "learning_rate": 1.9393349553933495e-06, "loss": 0.6366, "step": 31262 }, { "epoch": 0.912761671192082, "grad_norm": 0.8594065347393267, "learning_rate": 1.9386861313868615e-06, "loss": 0.7039, "step": 31263 }, { "epoch": 0.9127908674199293, "grad_norm": 0.642557210936393, "learning_rate": 1.938037307380373e-06, "loss": 0.5932, "step": 31264 }, { "epoch": 0.9128200636477767, "grad_norm": 0.6970633557636905, "learning_rate": 1.9373884833738847e-06, "loss": 0.6495, "step": 31265 }, { "epoch": 0.912849259875624, "grad_norm": 0.6442785045630794, "learning_rate": 1.9367396593673967e-06, "loss": 0.6117, "step": 31266 }, { "epoch": 0.9128784561034714, "grad_norm": 0.6511465290366186, "learning_rate": 1.9360908353609083e-06, "loss": 0.5734, "step": 31267 }, { "epoch": 0.9129076523313188, "grad_norm": 0.6761324082062896, "learning_rate": 1.9354420113544204e-06, "loss": 0.6053, "step": 31268 }, { "epoch": 0.9129368485591661, "grad_norm": 0.6703247470564733, "learning_rate": 1.934793187347932e-06, "loss": 0.615, "step": 31269 }, { "epoch": 0.9129660447870135, "grad_norm": 0.5972726816691217, "learning_rate": 1.934144363341444e-06, "loss": 0.497, "step": 31270 }, { "epoch": 0.9129952410148608, "grad_norm": 0.6085929796904064, "learning_rate": 1.9334955393349556e-06, "loss": 0.5098, "step": 31271 }, { "epoch": 0.9130244372427082, "grad_norm": 0.6087106217405568, "learning_rate": 1.932846715328467e-06, "loss": 0.5833, "step": 31272 }, { "epoch": 0.9130536334705556, "grad_norm": 0.6624412483099315, "learning_rate": 1.9321978913219792e-06, "loss": 0.6204, "step": 31273 }, { "epoch": 0.9130828296984029, "grad_norm": 0.5976695826771528, "learning_rate": 1.931549067315491e-06, "loss": 0.5115, "step": 31274 }, { "epoch": 0.9131120259262503, "grad_norm": 0.6338622324109641, "learning_rate": 1.930900243309003e-06, "loss": 0.5755, "step": 31275 }, { "epoch": 0.9131412221540977, "grad_norm": 0.6330697845346691, "learning_rate": 1.9302514193025145e-06, "loss": 0.5802, "step": 31276 }, { "epoch": 0.913170418381945, "grad_norm": 0.632131900497798, "learning_rate": 1.929602595296026e-06, "loss": 0.5528, "step": 31277 }, { "epoch": 0.9131996146097924, "grad_norm": 0.6430795354161538, "learning_rate": 1.9289537712895377e-06, "loss": 0.5985, "step": 31278 }, { "epoch": 0.9132288108376397, "grad_norm": 0.6244187079678063, "learning_rate": 1.9283049472830497e-06, "loss": 0.5676, "step": 31279 }, { "epoch": 0.9132580070654871, "grad_norm": 0.6611156581558224, "learning_rate": 1.9276561232765613e-06, "loss": 0.6017, "step": 31280 }, { "epoch": 0.9132872032933345, "grad_norm": 0.5859239294539648, "learning_rate": 1.927007299270073e-06, "loss": 0.4756, "step": 31281 }, { "epoch": 0.9133163995211818, "grad_norm": 0.6545013794663379, "learning_rate": 1.926358475263585e-06, "loss": 0.6018, "step": 31282 }, { "epoch": 0.9133455957490292, "grad_norm": 0.6277841702180398, "learning_rate": 1.9257096512570965e-06, "loss": 0.5778, "step": 31283 }, { "epoch": 0.9133747919768765, "grad_norm": 0.6295031790017364, "learning_rate": 1.9250608272506086e-06, "loss": 0.5594, "step": 31284 }, { "epoch": 0.9134039882047239, "grad_norm": 0.6025022198617629, "learning_rate": 1.92441200324412e-06, "loss": 0.54, "step": 31285 }, { "epoch": 0.9134331844325713, "grad_norm": 0.62169292443914, "learning_rate": 1.9237631792376318e-06, "loss": 0.5832, "step": 31286 }, { "epoch": 0.9134623806604186, "grad_norm": 0.5838341699171846, "learning_rate": 1.923114355231144e-06, "loss": 0.515, "step": 31287 }, { "epoch": 0.913491576888266, "grad_norm": 0.6740440856579344, "learning_rate": 1.9224655312246554e-06, "loss": 0.6358, "step": 31288 }, { "epoch": 0.9135207731161133, "grad_norm": 0.6180764029157845, "learning_rate": 1.9218167072181674e-06, "loss": 0.5373, "step": 31289 }, { "epoch": 0.9135499693439607, "grad_norm": 0.6246013486066861, "learning_rate": 1.921167883211679e-06, "loss": 0.5649, "step": 31290 }, { "epoch": 0.9135791655718081, "grad_norm": 0.6979402903605957, "learning_rate": 1.920519059205191e-06, "loss": 0.6326, "step": 31291 }, { "epoch": 0.9136083617996554, "grad_norm": 0.6996905439529618, "learning_rate": 1.9198702351987027e-06, "loss": 0.6312, "step": 31292 }, { "epoch": 0.9136375580275028, "grad_norm": 0.6111624875807236, "learning_rate": 1.9192214111922143e-06, "loss": 0.5505, "step": 31293 }, { "epoch": 0.9136667542553502, "grad_norm": 0.6447618418277632, "learning_rate": 1.9185725871857263e-06, "loss": 0.5664, "step": 31294 }, { "epoch": 0.9136959504831975, "grad_norm": 0.5972942929855455, "learning_rate": 1.917923763179238e-06, "loss": 0.4987, "step": 31295 }, { "epoch": 0.9137251467110449, "grad_norm": 0.6197824115972533, "learning_rate": 1.9172749391727495e-06, "loss": 0.5287, "step": 31296 }, { "epoch": 0.9137543429388923, "grad_norm": 0.6650060973169502, "learning_rate": 1.916626115166261e-06, "loss": 0.6082, "step": 31297 }, { "epoch": 0.9137835391667397, "grad_norm": 0.6457149583426885, "learning_rate": 1.915977291159773e-06, "loss": 0.623, "step": 31298 }, { "epoch": 0.9138127353945871, "grad_norm": 0.6950421073158196, "learning_rate": 1.9153284671532847e-06, "loss": 0.6811, "step": 31299 }, { "epoch": 0.9138419316224344, "grad_norm": 0.6700874232885858, "learning_rate": 1.9146796431467963e-06, "loss": 0.5994, "step": 31300 }, { "epoch": 0.9138711278502818, "grad_norm": 0.6048715868969167, "learning_rate": 1.9140308191403084e-06, "loss": 0.5211, "step": 31301 }, { "epoch": 0.9139003240781292, "grad_norm": 0.6855023781929405, "learning_rate": 1.91338199513382e-06, "loss": 0.6945, "step": 31302 }, { "epoch": 0.9139295203059765, "grad_norm": 0.6589021707586908, "learning_rate": 1.912733171127332e-06, "loss": 0.6203, "step": 31303 }, { "epoch": 0.9139587165338239, "grad_norm": 0.6257272200540522, "learning_rate": 1.9120843471208436e-06, "loss": 0.5521, "step": 31304 }, { "epoch": 0.9139879127616712, "grad_norm": 0.6671898476731113, "learning_rate": 1.911435523114355e-06, "loss": 0.6018, "step": 31305 }, { "epoch": 0.9140171089895186, "grad_norm": 0.6593260500341122, "learning_rate": 1.9107866991078672e-06, "loss": 0.5401, "step": 31306 }, { "epoch": 0.914046305217366, "grad_norm": 0.6921406652548167, "learning_rate": 1.910137875101379e-06, "loss": 0.6447, "step": 31307 }, { "epoch": 0.9140755014452133, "grad_norm": 0.6202149045691577, "learning_rate": 1.909489051094891e-06, "loss": 0.5566, "step": 31308 }, { "epoch": 0.9141046976730607, "grad_norm": 0.6946417477724814, "learning_rate": 1.9088402270884025e-06, "loss": 0.6346, "step": 31309 }, { "epoch": 0.914133893900908, "grad_norm": 0.6694288576879559, "learning_rate": 1.9081914030819145e-06, "loss": 0.5954, "step": 31310 }, { "epoch": 0.9141630901287554, "grad_norm": 0.6495256781340354, "learning_rate": 1.907542579075426e-06, "loss": 0.6207, "step": 31311 }, { "epoch": 0.9141922863566028, "grad_norm": 0.6261678365494819, "learning_rate": 1.9068937550689377e-06, "loss": 0.6028, "step": 31312 }, { "epoch": 0.9142214825844501, "grad_norm": 0.6327690164064009, "learning_rate": 1.9062449310624495e-06, "loss": 0.5631, "step": 31313 }, { "epoch": 0.9142506788122975, "grad_norm": 0.6730673617980822, "learning_rate": 1.9055961070559611e-06, "loss": 0.6625, "step": 31314 }, { "epoch": 0.9142798750401449, "grad_norm": 0.6749783581507792, "learning_rate": 1.9049472830494731e-06, "loss": 0.6013, "step": 31315 }, { "epoch": 0.9143090712679922, "grad_norm": 0.6761198264862193, "learning_rate": 1.9042984590429847e-06, "loss": 0.6338, "step": 31316 }, { "epoch": 0.9143382674958396, "grad_norm": 0.6211037111869876, "learning_rate": 1.9036496350364966e-06, "loss": 0.5719, "step": 31317 }, { "epoch": 0.9143674637236869, "grad_norm": 0.5988461359910154, "learning_rate": 1.9030008110300084e-06, "loss": 0.5002, "step": 31318 }, { "epoch": 0.9143966599515343, "grad_norm": 0.650093625502098, "learning_rate": 1.90235198702352e-06, "loss": 0.601, "step": 31319 }, { "epoch": 0.9144258561793817, "grad_norm": 0.6148676389501895, "learning_rate": 1.9017031630170318e-06, "loss": 0.5788, "step": 31320 }, { "epoch": 0.914455052407229, "grad_norm": 0.6344311971015129, "learning_rate": 1.9010543390105434e-06, "loss": 0.6175, "step": 31321 }, { "epoch": 0.9144842486350764, "grad_norm": 0.6356653676877281, "learning_rate": 1.9004055150040554e-06, "loss": 0.5397, "step": 31322 }, { "epoch": 0.9145134448629237, "grad_norm": 0.6719395370219751, "learning_rate": 1.899756690997567e-06, "loss": 0.5908, "step": 31323 }, { "epoch": 0.9145426410907711, "grad_norm": 0.6283226553478357, "learning_rate": 1.899107866991079e-06, "loss": 0.6137, "step": 31324 }, { "epoch": 0.9145718373186185, "grad_norm": 0.6316415824428456, "learning_rate": 1.8984590429845907e-06, "loss": 0.5578, "step": 31325 }, { "epoch": 0.9146010335464658, "grad_norm": 0.6501574166726313, "learning_rate": 1.8978102189781023e-06, "loss": 0.5767, "step": 31326 }, { "epoch": 0.9146302297743132, "grad_norm": 0.6553687166036314, "learning_rate": 1.897161394971614e-06, "loss": 0.6205, "step": 31327 }, { "epoch": 0.9146594260021605, "grad_norm": 0.5898748153448107, "learning_rate": 1.896512570965126e-06, "loss": 0.4907, "step": 31328 }, { "epoch": 0.9146886222300079, "grad_norm": 0.6884805358809898, "learning_rate": 1.8958637469586377e-06, "loss": 0.6042, "step": 31329 }, { "epoch": 0.9147178184578553, "grad_norm": 0.6797128038406748, "learning_rate": 1.8952149229521493e-06, "loss": 0.6196, "step": 31330 }, { "epoch": 0.9147470146857026, "grad_norm": 0.6071968740144027, "learning_rate": 1.894566098945661e-06, "loss": 0.5351, "step": 31331 }, { "epoch": 0.91477621091355, "grad_norm": 0.7377877444898989, "learning_rate": 1.893917274939173e-06, "loss": 0.6364, "step": 31332 }, { "epoch": 0.9148054071413974, "grad_norm": 0.6242146145429383, "learning_rate": 1.8932684509326845e-06, "loss": 0.585, "step": 31333 }, { "epoch": 0.9148346033692447, "grad_norm": 0.5963860481455285, "learning_rate": 1.8926196269261966e-06, "loss": 0.5214, "step": 31334 }, { "epoch": 0.9148637995970921, "grad_norm": 0.729388766411027, "learning_rate": 1.8919708029197082e-06, "loss": 0.6726, "step": 31335 }, { "epoch": 0.9148929958249394, "grad_norm": 0.601703408580852, "learning_rate": 1.89132197891322e-06, "loss": 0.5619, "step": 31336 }, { "epoch": 0.9149221920527868, "grad_norm": 0.6139482548797734, "learning_rate": 1.8906731549067316e-06, "loss": 0.56, "step": 31337 }, { "epoch": 0.9149513882806342, "grad_norm": 0.6346973521266296, "learning_rate": 1.8900243309002434e-06, "loss": 0.5392, "step": 31338 }, { "epoch": 0.9149805845084815, "grad_norm": 0.6376934999645686, "learning_rate": 1.8893755068937552e-06, "loss": 0.609, "step": 31339 }, { "epoch": 0.9150097807363289, "grad_norm": 0.6853021639461093, "learning_rate": 1.8887266828872668e-06, "loss": 0.6924, "step": 31340 }, { "epoch": 0.9150389769641762, "grad_norm": 0.6392394894611954, "learning_rate": 1.8880778588807789e-06, "loss": 0.5756, "step": 31341 }, { "epoch": 0.9150681731920236, "grad_norm": 0.6829074644586091, "learning_rate": 1.8874290348742905e-06, "loss": 0.6452, "step": 31342 }, { "epoch": 0.915097369419871, "grad_norm": 0.6980215279926085, "learning_rate": 1.8867802108678025e-06, "loss": 0.6588, "step": 31343 }, { "epoch": 0.9151265656477183, "grad_norm": 0.6735988966284246, "learning_rate": 1.886131386861314e-06, "loss": 0.6349, "step": 31344 }, { "epoch": 0.9151557618755657, "grad_norm": 0.6271204982062389, "learning_rate": 1.8854825628548257e-06, "loss": 0.5733, "step": 31345 }, { "epoch": 0.915184958103413, "grad_norm": 0.6404369199690512, "learning_rate": 1.8848337388483375e-06, "loss": 0.5822, "step": 31346 }, { "epoch": 0.9152141543312604, "grad_norm": 0.6249702360557473, "learning_rate": 1.8841849148418493e-06, "loss": 0.5792, "step": 31347 }, { "epoch": 0.9152433505591078, "grad_norm": 0.6475883820384061, "learning_rate": 1.8835360908353611e-06, "loss": 0.5746, "step": 31348 }, { "epoch": 0.9152725467869551, "grad_norm": 0.7052431038773888, "learning_rate": 1.8828872668288727e-06, "loss": 0.6319, "step": 31349 }, { "epoch": 0.9153017430148025, "grad_norm": 0.5984782316244404, "learning_rate": 1.8822384428223848e-06, "loss": 0.4828, "step": 31350 }, { "epoch": 0.9153309392426499, "grad_norm": 0.6237649016829897, "learning_rate": 1.8815896188158964e-06, "loss": 0.5657, "step": 31351 }, { "epoch": 0.9153601354704972, "grad_norm": 0.6346921824461298, "learning_rate": 1.880940794809408e-06, "loss": 0.5277, "step": 31352 }, { "epoch": 0.9153893316983446, "grad_norm": 0.6272160939693993, "learning_rate": 1.88029197080292e-06, "loss": 0.5116, "step": 31353 }, { "epoch": 0.9154185279261919, "grad_norm": 0.6360057015076979, "learning_rate": 1.8796431467964316e-06, "loss": 0.5613, "step": 31354 }, { "epoch": 0.9154477241540393, "grad_norm": 0.6085359592728629, "learning_rate": 1.8789943227899434e-06, "loss": 0.515, "step": 31355 }, { "epoch": 0.9154769203818867, "grad_norm": 0.6044838173658588, "learning_rate": 1.878345498783455e-06, "loss": 0.487, "step": 31356 }, { "epoch": 0.915506116609734, "grad_norm": 0.69649230939499, "learning_rate": 1.8776966747769668e-06, "loss": 0.7147, "step": 31357 }, { "epoch": 0.9155353128375814, "grad_norm": 0.6628406286221071, "learning_rate": 1.8770478507704787e-06, "loss": 0.6259, "step": 31358 }, { "epoch": 0.9155645090654287, "grad_norm": 0.6459471895741763, "learning_rate": 1.8763990267639903e-06, "loss": 0.6158, "step": 31359 }, { "epoch": 0.9155937052932761, "grad_norm": 0.7116100551982025, "learning_rate": 1.8757502027575023e-06, "loss": 0.5824, "step": 31360 }, { "epoch": 0.9156229015211235, "grad_norm": 0.6335695070861713, "learning_rate": 1.875101378751014e-06, "loss": 0.5467, "step": 31361 }, { "epoch": 0.9156520977489708, "grad_norm": 0.6789146994545945, "learning_rate": 1.8744525547445257e-06, "loss": 0.634, "step": 31362 }, { "epoch": 0.9156812939768182, "grad_norm": 0.7124112514079506, "learning_rate": 1.8738037307380375e-06, "loss": 0.7043, "step": 31363 }, { "epoch": 0.9157104902046656, "grad_norm": 0.6424288527245647, "learning_rate": 1.8731549067315491e-06, "loss": 0.5807, "step": 31364 }, { "epoch": 0.9157396864325129, "grad_norm": 0.715398278338596, "learning_rate": 1.872506082725061e-06, "loss": 0.7193, "step": 31365 }, { "epoch": 0.9157688826603603, "grad_norm": 0.5770465894404866, "learning_rate": 1.8718572587185726e-06, "loss": 0.4674, "step": 31366 }, { "epoch": 0.9157980788882076, "grad_norm": 0.6634187437023782, "learning_rate": 1.8712084347120846e-06, "loss": 0.6433, "step": 31367 }, { "epoch": 0.915827275116055, "grad_norm": 0.6650265762210119, "learning_rate": 1.8705596107055962e-06, "loss": 0.6611, "step": 31368 }, { "epoch": 0.9158564713439024, "grad_norm": 0.6094969001911232, "learning_rate": 1.8699107866991082e-06, "loss": 0.5552, "step": 31369 }, { "epoch": 0.9158856675717497, "grad_norm": 0.5803388468699368, "learning_rate": 1.8692619626926198e-06, "loss": 0.4995, "step": 31370 }, { "epoch": 0.9159148637995971, "grad_norm": 0.6844861733253906, "learning_rate": 1.8686131386861314e-06, "loss": 0.6316, "step": 31371 }, { "epoch": 0.9159440600274444, "grad_norm": 0.6568627612118249, "learning_rate": 1.8679643146796434e-06, "loss": 0.5872, "step": 31372 }, { "epoch": 0.9159732562552918, "grad_norm": 0.6275350587739233, "learning_rate": 1.867315490673155e-06, "loss": 0.6159, "step": 31373 }, { "epoch": 0.9160024524831392, "grad_norm": 0.5939599089180403, "learning_rate": 1.8666666666666669e-06, "loss": 0.5206, "step": 31374 }, { "epoch": 0.9160316487109865, "grad_norm": 0.6142193385052329, "learning_rate": 1.8660178426601785e-06, "loss": 0.5511, "step": 31375 }, { "epoch": 0.9160608449388339, "grad_norm": 0.6837190230549743, "learning_rate": 1.8653690186536905e-06, "loss": 0.6455, "step": 31376 }, { "epoch": 0.9160900411666812, "grad_norm": 0.6118567633450572, "learning_rate": 1.864720194647202e-06, "loss": 0.5443, "step": 31377 }, { "epoch": 0.9161192373945286, "grad_norm": 0.6761781424322455, "learning_rate": 1.8640713706407137e-06, "loss": 0.6559, "step": 31378 }, { "epoch": 0.916148433622376, "grad_norm": 0.6308098830673663, "learning_rate": 1.8634225466342257e-06, "loss": 0.59, "step": 31379 }, { "epoch": 0.9161776298502233, "grad_norm": 0.6328235817868559, "learning_rate": 1.8627737226277373e-06, "loss": 0.6001, "step": 31380 }, { "epoch": 0.9162068260780707, "grad_norm": 0.657575811647008, "learning_rate": 1.8621248986212491e-06, "loss": 0.5974, "step": 31381 }, { "epoch": 0.916236022305918, "grad_norm": 0.6897965944689826, "learning_rate": 1.861476074614761e-06, "loss": 0.5978, "step": 31382 }, { "epoch": 0.9162652185337654, "grad_norm": 0.6051268261270312, "learning_rate": 1.8608272506082728e-06, "loss": 0.5743, "step": 31383 }, { "epoch": 0.9162944147616128, "grad_norm": 0.6431445589155679, "learning_rate": 1.8601784266017844e-06, "loss": 0.5951, "step": 31384 }, { "epoch": 0.9163236109894601, "grad_norm": 0.6501106929413359, "learning_rate": 1.859529602595296e-06, "loss": 0.5804, "step": 31385 }, { "epoch": 0.9163528072173075, "grad_norm": 0.6735555868445992, "learning_rate": 1.858880778588808e-06, "loss": 0.6504, "step": 31386 }, { "epoch": 0.9163820034451549, "grad_norm": 0.6348689990609013, "learning_rate": 1.8582319545823196e-06, "loss": 0.5485, "step": 31387 }, { "epoch": 0.9164111996730022, "grad_norm": 0.6130728821559258, "learning_rate": 1.8575831305758316e-06, "loss": 0.5391, "step": 31388 }, { "epoch": 0.9164403959008496, "grad_norm": 0.6679715568274822, "learning_rate": 1.8569343065693432e-06, "loss": 0.6454, "step": 31389 }, { "epoch": 0.9164695921286969, "grad_norm": 0.6180496498616861, "learning_rate": 1.8562854825628549e-06, "loss": 0.5678, "step": 31390 }, { "epoch": 0.9164987883565443, "grad_norm": 0.6318886423654301, "learning_rate": 1.8556366585563667e-06, "loss": 0.5508, "step": 31391 }, { "epoch": 0.9165279845843917, "grad_norm": 0.6861696910497526, "learning_rate": 1.8549878345498785e-06, "loss": 0.6221, "step": 31392 }, { "epoch": 0.916557180812239, "grad_norm": 0.6150766498141961, "learning_rate": 1.8543390105433903e-06, "loss": 0.5219, "step": 31393 }, { "epoch": 0.9165863770400864, "grad_norm": 0.6550532509183592, "learning_rate": 1.853690186536902e-06, "loss": 0.6025, "step": 31394 }, { "epoch": 0.9166155732679337, "grad_norm": 0.6532459393506702, "learning_rate": 1.853041362530414e-06, "loss": 0.6005, "step": 31395 }, { "epoch": 0.9166447694957811, "grad_norm": 0.6483384374503262, "learning_rate": 1.8523925385239255e-06, "loss": 0.5992, "step": 31396 }, { "epoch": 0.9166739657236285, "grad_norm": 0.646625180462617, "learning_rate": 1.8517437145174371e-06, "loss": 0.6266, "step": 31397 }, { "epoch": 0.9167031619514758, "grad_norm": 0.6426022260316252, "learning_rate": 1.8510948905109492e-06, "loss": 0.5768, "step": 31398 }, { "epoch": 0.9167323581793232, "grad_norm": 0.6550411970038809, "learning_rate": 1.8504460665044608e-06, "loss": 0.623, "step": 31399 }, { "epoch": 0.9167615544071706, "grad_norm": 0.6072026738322445, "learning_rate": 1.8497972424979726e-06, "loss": 0.5101, "step": 31400 }, { "epoch": 0.9167907506350179, "grad_norm": 0.6523818120565946, "learning_rate": 1.8491484184914844e-06, "loss": 0.6282, "step": 31401 }, { "epoch": 0.9168199468628653, "grad_norm": 0.6265275204805303, "learning_rate": 1.8484995944849962e-06, "loss": 0.5994, "step": 31402 }, { "epoch": 0.9168491430907126, "grad_norm": 0.6219181222842044, "learning_rate": 1.8478507704785078e-06, "loss": 0.571, "step": 31403 }, { "epoch": 0.91687833931856, "grad_norm": 0.6833597887970706, "learning_rate": 1.8472019464720194e-06, "loss": 0.6802, "step": 31404 }, { "epoch": 0.9169075355464074, "grad_norm": 0.6655109329736165, "learning_rate": 1.8465531224655314e-06, "loss": 0.6022, "step": 31405 }, { "epoch": 0.9169367317742547, "grad_norm": 0.7153397758828476, "learning_rate": 1.845904298459043e-06, "loss": 0.7062, "step": 31406 }, { "epoch": 0.9169659280021021, "grad_norm": 0.624744914346824, "learning_rate": 1.845255474452555e-06, "loss": 0.5611, "step": 31407 }, { "epoch": 0.9169951242299494, "grad_norm": 0.6754765213230981, "learning_rate": 1.8446066504460667e-06, "loss": 0.6325, "step": 31408 }, { "epoch": 0.9170243204577968, "grad_norm": 0.5865209840068881, "learning_rate": 1.8439578264395785e-06, "loss": 0.4797, "step": 31409 }, { "epoch": 0.9170535166856442, "grad_norm": 0.6052323056440345, "learning_rate": 1.84330900243309e-06, "loss": 0.523, "step": 31410 }, { "epoch": 0.9170827129134915, "grad_norm": 0.6095405613378044, "learning_rate": 1.842660178426602e-06, "loss": 0.5373, "step": 31411 }, { "epoch": 0.9171119091413389, "grad_norm": 0.6521869747842494, "learning_rate": 1.8420113544201137e-06, "loss": 0.6053, "step": 31412 }, { "epoch": 0.9171411053691862, "grad_norm": 0.6714308044375845, "learning_rate": 1.8413625304136253e-06, "loss": 0.5953, "step": 31413 }, { "epoch": 0.9171703015970336, "grad_norm": 0.6504646357604851, "learning_rate": 1.8407137064071374e-06, "loss": 0.5968, "step": 31414 }, { "epoch": 0.917199497824881, "grad_norm": 0.6946164256705815, "learning_rate": 1.840064882400649e-06, "loss": 0.5885, "step": 31415 }, { "epoch": 0.9172286940527283, "grad_norm": 0.6260962410472015, "learning_rate": 1.8394160583941606e-06, "loss": 0.5657, "step": 31416 }, { "epoch": 0.9172578902805758, "grad_norm": 0.6803082430492688, "learning_rate": 1.8387672343876726e-06, "loss": 0.6227, "step": 31417 }, { "epoch": 0.9172870865084232, "grad_norm": 0.6350243852131058, "learning_rate": 1.8381184103811842e-06, "loss": 0.5484, "step": 31418 }, { "epoch": 0.9173162827362705, "grad_norm": 0.6827300547635177, "learning_rate": 1.837469586374696e-06, "loss": 0.6898, "step": 31419 }, { "epoch": 0.9173454789641179, "grad_norm": 0.5622088159176069, "learning_rate": 1.8368207623682076e-06, "loss": 0.4539, "step": 31420 }, { "epoch": 0.9173746751919652, "grad_norm": 0.6245273310779533, "learning_rate": 1.8361719383617196e-06, "loss": 0.5844, "step": 31421 }, { "epoch": 0.9174038714198126, "grad_norm": 0.6127070462823961, "learning_rate": 1.8355231143552313e-06, "loss": 0.5578, "step": 31422 }, { "epoch": 0.91743306764766, "grad_norm": 0.6847952487767599, "learning_rate": 1.8348742903487429e-06, "loss": 0.6676, "step": 31423 }, { "epoch": 0.9174622638755073, "grad_norm": 0.666273007205257, "learning_rate": 1.8342254663422549e-06, "loss": 0.6388, "step": 31424 }, { "epoch": 0.9174914601033547, "grad_norm": 0.639431814228268, "learning_rate": 1.8335766423357665e-06, "loss": 0.5831, "step": 31425 }, { "epoch": 0.9175206563312021, "grad_norm": 0.7844643146687329, "learning_rate": 1.8329278183292783e-06, "loss": 0.6715, "step": 31426 }, { "epoch": 0.9175498525590494, "grad_norm": 0.6843520427102402, "learning_rate": 1.8322789943227901e-06, "loss": 0.6166, "step": 31427 }, { "epoch": 0.9175790487868968, "grad_norm": 0.6599320177803317, "learning_rate": 1.831630170316302e-06, "loss": 0.6126, "step": 31428 }, { "epoch": 0.9176082450147441, "grad_norm": 0.6132373407988597, "learning_rate": 1.8309813463098135e-06, "loss": 0.5455, "step": 31429 }, { "epoch": 0.9176374412425915, "grad_norm": 0.7414373851449267, "learning_rate": 1.8303325223033254e-06, "loss": 0.6437, "step": 31430 }, { "epoch": 0.9176666374704389, "grad_norm": 0.5949912282830686, "learning_rate": 1.8296836982968372e-06, "loss": 0.5415, "step": 31431 }, { "epoch": 0.9176958336982862, "grad_norm": 0.5675967715406296, "learning_rate": 1.8290348742903488e-06, "loss": 0.5219, "step": 31432 }, { "epoch": 0.9177250299261336, "grad_norm": 0.6721599168736666, "learning_rate": 1.8283860502838608e-06, "loss": 0.6322, "step": 31433 }, { "epoch": 0.917754226153981, "grad_norm": 0.641215457180112, "learning_rate": 1.8277372262773724e-06, "loss": 0.5612, "step": 31434 }, { "epoch": 0.9177834223818283, "grad_norm": 0.6027919371216508, "learning_rate": 1.8270884022708842e-06, "loss": 0.5158, "step": 31435 }, { "epoch": 0.9178126186096757, "grad_norm": 0.6216033432060285, "learning_rate": 1.826439578264396e-06, "loss": 0.5888, "step": 31436 }, { "epoch": 0.917841814837523, "grad_norm": 0.6264463189131338, "learning_rate": 1.8257907542579076e-06, "loss": 0.5718, "step": 31437 }, { "epoch": 0.9178710110653704, "grad_norm": 0.6097193064999564, "learning_rate": 1.8251419302514195e-06, "loss": 0.568, "step": 31438 }, { "epoch": 0.9179002072932178, "grad_norm": 0.6410791660811354, "learning_rate": 1.824493106244931e-06, "loss": 0.5861, "step": 31439 }, { "epoch": 0.9179294035210651, "grad_norm": 0.6074102512602707, "learning_rate": 1.823844282238443e-06, "loss": 0.5161, "step": 31440 }, { "epoch": 0.9179585997489125, "grad_norm": 0.6779380443059729, "learning_rate": 1.8231954582319547e-06, "loss": 0.6534, "step": 31441 }, { "epoch": 0.9179877959767598, "grad_norm": 0.6622320246737449, "learning_rate": 1.8225466342254667e-06, "loss": 0.6267, "step": 31442 }, { "epoch": 0.9180169922046072, "grad_norm": 0.6885187677791862, "learning_rate": 1.8218978102189783e-06, "loss": 0.635, "step": 31443 }, { "epoch": 0.9180461884324546, "grad_norm": 0.6976183962031844, "learning_rate": 1.82124898621249e-06, "loss": 0.5993, "step": 31444 }, { "epoch": 0.9180753846603019, "grad_norm": 0.6723742253137334, "learning_rate": 1.8206001622060017e-06, "loss": 0.6058, "step": 31445 }, { "epoch": 0.9181045808881493, "grad_norm": 0.6108858681511709, "learning_rate": 1.8199513381995136e-06, "loss": 0.5463, "step": 31446 }, { "epoch": 0.9181337771159966, "grad_norm": 0.6567311600024397, "learning_rate": 1.8193025141930254e-06, "loss": 0.6019, "step": 31447 }, { "epoch": 0.918162973343844, "grad_norm": 0.6427001006746734, "learning_rate": 1.818653690186537e-06, "loss": 0.6218, "step": 31448 }, { "epoch": 0.9181921695716914, "grad_norm": 0.6113855727669935, "learning_rate": 1.8180048661800486e-06, "loss": 0.5234, "step": 31449 }, { "epoch": 0.9182213657995387, "grad_norm": 0.6755868034235354, "learning_rate": 1.8173560421735606e-06, "loss": 0.5885, "step": 31450 }, { "epoch": 0.9182505620273861, "grad_norm": 0.5889674579010638, "learning_rate": 1.8167072181670722e-06, "loss": 0.4927, "step": 31451 }, { "epoch": 0.9182797582552334, "grad_norm": 0.630370017548918, "learning_rate": 1.8160583941605842e-06, "loss": 0.5752, "step": 31452 }, { "epoch": 0.9183089544830808, "grad_norm": 0.7016584192621504, "learning_rate": 1.8154095701540958e-06, "loss": 0.6969, "step": 31453 }, { "epoch": 0.9183381507109282, "grad_norm": 0.6630717599515346, "learning_rate": 1.8147607461476077e-06, "loss": 0.6418, "step": 31454 }, { "epoch": 0.9183673469387755, "grad_norm": 0.6541930887392882, "learning_rate": 1.8141119221411195e-06, "loss": 0.5754, "step": 31455 }, { "epoch": 0.9183965431666229, "grad_norm": 0.5945086611986061, "learning_rate": 1.813463098134631e-06, "loss": 0.4613, "step": 31456 }, { "epoch": 0.9184257393944703, "grad_norm": 0.7080491081253258, "learning_rate": 1.8128142741281429e-06, "loss": 0.7005, "step": 31457 }, { "epoch": 0.9184549356223176, "grad_norm": 0.7385641409270889, "learning_rate": 1.8121654501216545e-06, "loss": 0.7112, "step": 31458 }, { "epoch": 0.918484131850165, "grad_norm": 0.6320680049655835, "learning_rate": 1.8115166261151665e-06, "loss": 0.5662, "step": 31459 }, { "epoch": 0.9185133280780123, "grad_norm": 0.7102173224030527, "learning_rate": 1.8108678021086781e-06, "loss": 0.607, "step": 31460 }, { "epoch": 0.9185425243058597, "grad_norm": 0.6634101570794599, "learning_rate": 1.8102189781021901e-06, "loss": 0.6154, "step": 31461 }, { "epoch": 0.9185717205337071, "grad_norm": 0.6420388559938182, "learning_rate": 1.8095701540957018e-06, "loss": 0.5755, "step": 31462 }, { "epoch": 0.9186009167615544, "grad_norm": 0.6892092957075778, "learning_rate": 1.8089213300892134e-06, "loss": 0.658, "step": 31463 }, { "epoch": 0.9186301129894018, "grad_norm": 0.6117052143837144, "learning_rate": 1.8082725060827252e-06, "loss": 0.5216, "step": 31464 }, { "epoch": 0.9186593092172491, "grad_norm": 0.6247549067974312, "learning_rate": 1.807623682076237e-06, "loss": 0.568, "step": 31465 }, { "epoch": 0.9186885054450965, "grad_norm": 0.7035522816131755, "learning_rate": 1.8069748580697488e-06, "loss": 0.6143, "step": 31466 }, { "epoch": 0.9187177016729439, "grad_norm": 0.6400918221370849, "learning_rate": 1.8063260340632604e-06, "loss": 0.5832, "step": 31467 }, { "epoch": 0.9187468979007912, "grad_norm": 0.6046176989701736, "learning_rate": 1.8056772100567724e-06, "loss": 0.5211, "step": 31468 }, { "epoch": 0.9187760941286386, "grad_norm": 0.6690916739876327, "learning_rate": 1.805028386050284e-06, "loss": 0.6192, "step": 31469 }, { "epoch": 0.918805290356486, "grad_norm": 0.6392989591448285, "learning_rate": 1.8043795620437956e-06, "loss": 0.5962, "step": 31470 }, { "epoch": 0.9188344865843333, "grad_norm": 0.61883306911764, "learning_rate": 1.8037307380373077e-06, "loss": 0.5382, "step": 31471 }, { "epoch": 0.9188636828121807, "grad_norm": 0.6153368804523777, "learning_rate": 1.8030819140308193e-06, "loss": 0.5937, "step": 31472 }, { "epoch": 0.918892879040028, "grad_norm": 0.6815786240563466, "learning_rate": 1.802433090024331e-06, "loss": 0.6627, "step": 31473 }, { "epoch": 0.9189220752678754, "grad_norm": 0.6283625043385281, "learning_rate": 1.8017842660178427e-06, "loss": 0.5758, "step": 31474 }, { "epoch": 0.9189512714957228, "grad_norm": 0.6313385695690702, "learning_rate": 1.8011354420113545e-06, "loss": 0.5811, "step": 31475 }, { "epoch": 0.9189804677235701, "grad_norm": 0.6443332202461246, "learning_rate": 1.8004866180048663e-06, "loss": 0.6218, "step": 31476 }, { "epoch": 0.9190096639514175, "grad_norm": 0.6997829479693504, "learning_rate": 1.799837793998378e-06, "loss": 0.6138, "step": 31477 }, { "epoch": 0.9190388601792648, "grad_norm": 0.5988380503673082, "learning_rate": 1.79918896999189e-06, "loss": 0.5371, "step": 31478 }, { "epoch": 0.9190680564071122, "grad_norm": 0.6206747752740256, "learning_rate": 1.7985401459854016e-06, "loss": 0.5352, "step": 31479 }, { "epoch": 0.9190972526349596, "grad_norm": 0.6351403109802042, "learning_rate": 1.7978913219789134e-06, "loss": 0.563, "step": 31480 }, { "epoch": 0.9191264488628069, "grad_norm": 0.6486393412783195, "learning_rate": 1.7972424979724252e-06, "loss": 0.589, "step": 31481 }, { "epoch": 0.9191556450906543, "grad_norm": 0.5985720899860014, "learning_rate": 1.7965936739659368e-06, "loss": 0.5106, "step": 31482 }, { "epoch": 0.9191848413185016, "grad_norm": 0.715220498663624, "learning_rate": 1.7959448499594486e-06, "loss": 0.6734, "step": 31483 }, { "epoch": 0.919214037546349, "grad_norm": 0.6431511906097154, "learning_rate": 1.7952960259529604e-06, "loss": 0.5806, "step": 31484 }, { "epoch": 0.9192432337741964, "grad_norm": 0.66345755263405, "learning_rate": 1.7946472019464722e-06, "loss": 0.6178, "step": 31485 }, { "epoch": 0.9192724300020437, "grad_norm": 0.6775630272851456, "learning_rate": 1.7939983779399838e-06, "loss": 0.6544, "step": 31486 }, { "epoch": 0.9193016262298911, "grad_norm": 0.5909766760227599, "learning_rate": 1.7933495539334959e-06, "loss": 0.5077, "step": 31487 }, { "epoch": 0.9193308224577385, "grad_norm": 0.6464174657079566, "learning_rate": 1.7927007299270075e-06, "loss": 0.5895, "step": 31488 }, { "epoch": 0.9193600186855858, "grad_norm": 0.67988911465951, "learning_rate": 1.792051905920519e-06, "loss": 0.6434, "step": 31489 }, { "epoch": 0.9193892149134332, "grad_norm": 0.6880856436800354, "learning_rate": 1.791403081914031e-06, "loss": 0.6486, "step": 31490 }, { "epoch": 0.9194184111412805, "grad_norm": 0.6355492760115982, "learning_rate": 1.7907542579075427e-06, "loss": 0.592, "step": 31491 }, { "epoch": 0.9194476073691279, "grad_norm": 0.5951106174886115, "learning_rate": 1.7901054339010545e-06, "loss": 0.5144, "step": 31492 }, { "epoch": 0.9194768035969753, "grad_norm": 0.6486777104761, "learning_rate": 1.7894566098945661e-06, "loss": 0.541, "step": 31493 }, { "epoch": 0.9195059998248226, "grad_norm": 0.6035212963044225, "learning_rate": 1.7888077858880782e-06, "loss": 0.5405, "step": 31494 }, { "epoch": 0.91953519605267, "grad_norm": 0.6530822729234975, "learning_rate": 1.7881589618815898e-06, "loss": 0.5728, "step": 31495 }, { "epoch": 0.9195643922805173, "grad_norm": 0.6561718299531087, "learning_rate": 1.7875101378751014e-06, "loss": 0.6568, "step": 31496 }, { "epoch": 0.9195935885083647, "grad_norm": 0.7073518596911571, "learning_rate": 1.7868613138686134e-06, "loss": 0.6914, "step": 31497 }, { "epoch": 0.9196227847362121, "grad_norm": 0.6446090838836216, "learning_rate": 1.786212489862125e-06, "loss": 0.5934, "step": 31498 }, { "epoch": 0.9196519809640594, "grad_norm": 0.6303653552898356, "learning_rate": 1.7855636658556368e-06, "loss": 0.5761, "step": 31499 }, { "epoch": 0.9196811771919068, "grad_norm": 0.6837093438477023, "learning_rate": 1.7849148418491486e-06, "loss": 0.6206, "step": 31500 }, { "epoch": 0.9197103734197541, "grad_norm": 0.6519282937067544, "learning_rate": 1.7842660178426604e-06, "loss": 0.5622, "step": 31501 }, { "epoch": 0.9197395696476015, "grad_norm": 0.6843010591034967, "learning_rate": 1.783617193836172e-06, "loss": 0.6889, "step": 31502 }, { "epoch": 0.9197687658754489, "grad_norm": 0.6659905449069521, "learning_rate": 1.7829683698296836e-06, "loss": 0.6072, "step": 31503 }, { "epoch": 0.9197979621032962, "grad_norm": 0.7032841465907885, "learning_rate": 1.7823195458231957e-06, "loss": 0.677, "step": 31504 }, { "epoch": 0.9198271583311436, "grad_norm": 0.6445178842263714, "learning_rate": 1.7816707218167073e-06, "loss": 0.5467, "step": 31505 }, { "epoch": 0.919856354558991, "grad_norm": 0.673023691941837, "learning_rate": 1.7810218978102193e-06, "loss": 0.6399, "step": 31506 }, { "epoch": 0.9198855507868383, "grad_norm": 0.6536239297616031, "learning_rate": 1.780373073803731e-06, "loss": 0.5972, "step": 31507 }, { "epoch": 0.9199147470146857, "grad_norm": 0.6341398809030417, "learning_rate": 1.7797242497972425e-06, "loss": 0.5509, "step": 31508 }, { "epoch": 0.919943943242533, "grad_norm": 0.6939937054393853, "learning_rate": 1.7790754257907543e-06, "loss": 0.6695, "step": 31509 }, { "epoch": 0.9199731394703804, "grad_norm": 0.695585258819692, "learning_rate": 1.7784266017842661e-06, "loss": 0.5493, "step": 31510 }, { "epoch": 0.9200023356982278, "grad_norm": 0.6372815647383667, "learning_rate": 1.777777777777778e-06, "loss": 0.5631, "step": 31511 }, { "epoch": 0.9200315319260751, "grad_norm": 0.6173248628568235, "learning_rate": 1.7771289537712896e-06, "loss": 0.5601, "step": 31512 }, { "epoch": 0.9200607281539225, "grad_norm": 0.6287098330204108, "learning_rate": 1.7764801297648016e-06, "loss": 0.5294, "step": 31513 }, { "epoch": 0.9200899243817698, "grad_norm": 0.6932059146929491, "learning_rate": 1.7758313057583132e-06, "loss": 0.6314, "step": 31514 }, { "epoch": 0.9201191206096172, "grad_norm": 0.6352025642390353, "learning_rate": 1.7751824817518248e-06, "loss": 0.596, "step": 31515 }, { "epoch": 0.9201483168374646, "grad_norm": 0.6308057849900826, "learning_rate": 1.7745336577453368e-06, "loss": 0.5924, "step": 31516 }, { "epoch": 0.9201775130653119, "grad_norm": 0.6359894364638102, "learning_rate": 1.7738848337388484e-06, "loss": 0.5717, "step": 31517 }, { "epoch": 0.9202067092931593, "grad_norm": 0.6453406584809839, "learning_rate": 1.7732360097323602e-06, "loss": 0.5976, "step": 31518 }, { "epoch": 0.9202359055210066, "grad_norm": 0.6131632511774916, "learning_rate": 1.772587185725872e-06, "loss": 0.4965, "step": 31519 }, { "epoch": 0.920265101748854, "grad_norm": 0.6566746758044945, "learning_rate": 1.7719383617193839e-06, "loss": 0.5827, "step": 31520 }, { "epoch": 0.9202942979767014, "grad_norm": 0.6743458440676001, "learning_rate": 1.7712895377128955e-06, "loss": 0.6296, "step": 31521 }, { "epoch": 0.9203234942045487, "grad_norm": 0.6694763598759873, "learning_rate": 1.770640713706407e-06, "loss": 0.6505, "step": 31522 }, { "epoch": 0.9203526904323961, "grad_norm": 0.6258518560967137, "learning_rate": 1.7699918896999191e-06, "loss": 0.5298, "step": 31523 }, { "epoch": 0.9203818866602435, "grad_norm": 0.5783458588766957, "learning_rate": 1.7693430656934307e-06, "loss": 0.5178, "step": 31524 }, { "epoch": 0.9204110828880908, "grad_norm": 0.6100677861304653, "learning_rate": 1.7686942416869427e-06, "loss": 0.4926, "step": 31525 }, { "epoch": 0.9204402791159382, "grad_norm": 0.6704779915825172, "learning_rate": 1.7680454176804543e-06, "loss": 0.5836, "step": 31526 }, { "epoch": 0.9204694753437855, "grad_norm": 0.6308486547062551, "learning_rate": 1.7673965936739662e-06, "loss": 0.5797, "step": 31527 }, { "epoch": 0.9204986715716329, "grad_norm": 0.6331676588977035, "learning_rate": 1.7667477696674778e-06, "loss": 0.6011, "step": 31528 }, { "epoch": 0.9205278677994803, "grad_norm": 0.6113916576998475, "learning_rate": 1.7660989456609896e-06, "loss": 0.5221, "step": 31529 }, { "epoch": 0.9205570640273276, "grad_norm": 0.6643252242037451, "learning_rate": 1.7654501216545014e-06, "loss": 0.6174, "step": 31530 }, { "epoch": 0.920586260255175, "grad_norm": 0.680873807350711, "learning_rate": 1.764801297648013e-06, "loss": 0.6078, "step": 31531 }, { "epoch": 0.9206154564830223, "grad_norm": 0.6400873379901384, "learning_rate": 1.764152473641525e-06, "loss": 0.578, "step": 31532 }, { "epoch": 0.9206446527108697, "grad_norm": 0.6781306363279037, "learning_rate": 1.7635036496350366e-06, "loss": 0.6124, "step": 31533 }, { "epoch": 0.9206738489387171, "grad_norm": 0.6728760378908883, "learning_rate": 1.7628548256285484e-06, "loss": 0.6708, "step": 31534 }, { "epoch": 0.9207030451665644, "grad_norm": 0.6670142764147927, "learning_rate": 1.7622060016220603e-06, "loss": 0.5938, "step": 31535 }, { "epoch": 0.9207322413944118, "grad_norm": 0.6676901291766443, "learning_rate": 1.7615571776155719e-06, "loss": 0.6221, "step": 31536 }, { "epoch": 0.9207614376222591, "grad_norm": 0.6027860628714607, "learning_rate": 1.7609083536090837e-06, "loss": 0.5329, "step": 31537 }, { "epoch": 0.9207906338501066, "grad_norm": 0.6008550337672963, "learning_rate": 1.7602595296025955e-06, "loss": 0.5448, "step": 31538 }, { "epoch": 0.920819830077954, "grad_norm": 0.6458742881184107, "learning_rate": 1.7596107055961073e-06, "loss": 0.5719, "step": 31539 }, { "epoch": 0.9208490263058013, "grad_norm": 0.623938688165799, "learning_rate": 1.758961881589619e-06, "loss": 0.5585, "step": 31540 }, { "epoch": 0.9208782225336487, "grad_norm": 0.6956577982886707, "learning_rate": 1.7583130575831305e-06, "loss": 0.5977, "step": 31541 }, { "epoch": 0.9209074187614961, "grad_norm": 0.6464880345773096, "learning_rate": 1.7576642335766425e-06, "loss": 0.6184, "step": 31542 }, { "epoch": 0.9209366149893434, "grad_norm": 0.6728558555406103, "learning_rate": 1.7570154095701541e-06, "loss": 0.5928, "step": 31543 }, { "epoch": 0.9209658112171908, "grad_norm": 0.6195584897875749, "learning_rate": 1.7563665855636662e-06, "loss": 0.5496, "step": 31544 }, { "epoch": 0.9209950074450381, "grad_norm": 0.6507105718076065, "learning_rate": 1.7557177615571778e-06, "loss": 0.6137, "step": 31545 }, { "epoch": 0.9210242036728855, "grad_norm": 0.6841474808596338, "learning_rate": 1.7550689375506896e-06, "loss": 0.6573, "step": 31546 }, { "epoch": 0.9210533999007329, "grad_norm": 0.7096497125283004, "learning_rate": 1.7544201135442012e-06, "loss": 0.6289, "step": 31547 }, { "epoch": 0.9210825961285802, "grad_norm": 0.7162149726699286, "learning_rate": 1.753771289537713e-06, "loss": 0.7061, "step": 31548 }, { "epoch": 0.9211117923564276, "grad_norm": 0.6921123962793236, "learning_rate": 1.7531224655312248e-06, "loss": 0.6879, "step": 31549 }, { "epoch": 0.921140988584275, "grad_norm": 0.6608769009649306, "learning_rate": 1.7524736415247364e-06, "loss": 0.6303, "step": 31550 }, { "epoch": 0.9211701848121223, "grad_norm": 0.6584537298599787, "learning_rate": 1.7518248175182485e-06, "loss": 0.5811, "step": 31551 }, { "epoch": 0.9211993810399697, "grad_norm": 0.6100738596156122, "learning_rate": 1.75117599351176e-06, "loss": 0.5612, "step": 31552 }, { "epoch": 0.921228577267817, "grad_norm": 0.639597051852302, "learning_rate": 1.7505271695052719e-06, "loss": 0.6043, "step": 31553 }, { "epoch": 0.9212577734956644, "grad_norm": 0.6535951014473451, "learning_rate": 1.7498783454987837e-06, "loss": 0.5444, "step": 31554 }, { "epoch": 0.9212869697235118, "grad_norm": 0.6438759076786463, "learning_rate": 1.7492295214922953e-06, "loss": 0.5627, "step": 31555 }, { "epoch": 0.9213161659513591, "grad_norm": 0.6970586346595226, "learning_rate": 1.7485806974858071e-06, "loss": 0.6431, "step": 31556 }, { "epoch": 0.9213453621792065, "grad_norm": 0.6364860980691612, "learning_rate": 1.7479318734793187e-06, "loss": 0.5725, "step": 31557 }, { "epoch": 0.9213745584070538, "grad_norm": 0.685798703163397, "learning_rate": 1.7472830494728307e-06, "loss": 0.6894, "step": 31558 }, { "epoch": 0.9214037546349012, "grad_norm": 0.6779292314050861, "learning_rate": 1.7466342254663423e-06, "loss": 0.6405, "step": 31559 }, { "epoch": 0.9214329508627486, "grad_norm": 0.6311841238130491, "learning_rate": 1.7459854014598544e-06, "loss": 0.5871, "step": 31560 }, { "epoch": 0.9214621470905959, "grad_norm": 0.6757876810335953, "learning_rate": 1.745336577453366e-06, "loss": 0.6244, "step": 31561 }, { "epoch": 0.9214913433184433, "grad_norm": 0.6574881761190741, "learning_rate": 1.7446877534468776e-06, "loss": 0.6529, "step": 31562 }, { "epoch": 0.9215205395462907, "grad_norm": 0.6216096947111253, "learning_rate": 1.7440389294403894e-06, "loss": 0.559, "step": 31563 }, { "epoch": 0.921549735774138, "grad_norm": 0.5940004786332331, "learning_rate": 1.7433901054339012e-06, "loss": 0.5034, "step": 31564 }, { "epoch": 0.9215789320019854, "grad_norm": 0.6068529834811963, "learning_rate": 1.742741281427413e-06, "loss": 0.5349, "step": 31565 }, { "epoch": 0.9216081282298327, "grad_norm": 0.6326484475336097, "learning_rate": 1.7420924574209246e-06, "loss": 0.5845, "step": 31566 }, { "epoch": 0.9216373244576801, "grad_norm": 0.6387736685770721, "learning_rate": 1.7414436334144364e-06, "loss": 0.5777, "step": 31567 }, { "epoch": 0.9216665206855275, "grad_norm": 0.6505703582652062, "learning_rate": 1.7407948094079483e-06, "loss": 0.6298, "step": 31568 }, { "epoch": 0.9216957169133748, "grad_norm": 0.6547467509767327, "learning_rate": 1.7401459854014599e-06, "loss": 0.594, "step": 31569 }, { "epoch": 0.9217249131412222, "grad_norm": 0.6645082348191481, "learning_rate": 1.7394971613949719e-06, "loss": 0.621, "step": 31570 }, { "epoch": 0.9217541093690695, "grad_norm": 0.68953551680184, "learning_rate": 1.7388483373884835e-06, "loss": 0.6259, "step": 31571 }, { "epoch": 0.9217833055969169, "grad_norm": 0.5936372640507827, "learning_rate": 1.7381995133819953e-06, "loss": 0.5042, "step": 31572 }, { "epoch": 0.9218125018247643, "grad_norm": 0.64736575366913, "learning_rate": 1.7375506893755071e-06, "loss": 0.5953, "step": 31573 }, { "epoch": 0.9218416980526116, "grad_norm": 0.613851776597941, "learning_rate": 1.7369018653690187e-06, "loss": 0.5171, "step": 31574 }, { "epoch": 0.921870894280459, "grad_norm": 0.5989012226110254, "learning_rate": 1.7362530413625305e-06, "loss": 0.539, "step": 31575 }, { "epoch": 0.9219000905083063, "grad_norm": 0.6375223235351567, "learning_rate": 1.7356042173560421e-06, "loss": 0.5473, "step": 31576 }, { "epoch": 0.9219292867361537, "grad_norm": 0.7082554717424966, "learning_rate": 1.7349553933495542e-06, "loss": 0.6465, "step": 31577 }, { "epoch": 0.9219584829640011, "grad_norm": 0.6019786791065185, "learning_rate": 1.7343065693430658e-06, "loss": 0.5222, "step": 31578 }, { "epoch": 0.9219876791918484, "grad_norm": 0.6837042788155093, "learning_rate": 1.7336577453365778e-06, "loss": 0.6324, "step": 31579 }, { "epoch": 0.9220168754196958, "grad_norm": 0.6854646677467005, "learning_rate": 1.7330089213300894e-06, "loss": 0.6627, "step": 31580 }, { "epoch": 0.9220460716475432, "grad_norm": 0.6598906306077822, "learning_rate": 1.732360097323601e-06, "loss": 0.6132, "step": 31581 }, { "epoch": 0.9220752678753905, "grad_norm": 0.6400645652359908, "learning_rate": 1.7317112733171128e-06, "loss": 0.6161, "step": 31582 }, { "epoch": 0.9221044641032379, "grad_norm": 0.6461719283240382, "learning_rate": 1.7310624493106246e-06, "loss": 0.5693, "step": 31583 }, { "epoch": 0.9221336603310852, "grad_norm": 0.6844810368188183, "learning_rate": 1.7304136253041365e-06, "loss": 0.6158, "step": 31584 }, { "epoch": 0.9221628565589326, "grad_norm": 0.6384241958541942, "learning_rate": 1.729764801297648e-06, "loss": 0.5948, "step": 31585 }, { "epoch": 0.92219205278678, "grad_norm": 0.6455567610264474, "learning_rate": 1.72911597729116e-06, "loss": 0.5678, "step": 31586 }, { "epoch": 0.9222212490146273, "grad_norm": 0.6373996925972807, "learning_rate": 1.7284671532846717e-06, "loss": 0.5736, "step": 31587 }, { "epoch": 0.9222504452424747, "grad_norm": 0.6698204385581693, "learning_rate": 1.7278183292781833e-06, "loss": 0.6348, "step": 31588 }, { "epoch": 0.922279641470322, "grad_norm": 0.6163255424155301, "learning_rate": 1.7271695052716953e-06, "loss": 0.5416, "step": 31589 }, { "epoch": 0.9223088376981694, "grad_norm": 0.6085099352907097, "learning_rate": 1.726520681265207e-06, "loss": 0.5535, "step": 31590 }, { "epoch": 0.9223380339260168, "grad_norm": 0.6657174268540254, "learning_rate": 1.7258718572587187e-06, "loss": 0.5881, "step": 31591 }, { "epoch": 0.9223672301538641, "grad_norm": 0.6478096808057221, "learning_rate": 1.7252230332522303e-06, "loss": 0.5949, "step": 31592 }, { "epoch": 0.9223964263817115, "grad_norm": 0.6682124126883258, "learning_rate": 1.7245742092457424e-06, "loss": 0.5269, "step": 31593 }, { "epoch": 0.9224256226095588, "grad_norm": 0.6018005113603437, "learning_rate": 1.723925385239254e-06, "loss": 0.5388, "step": 31594 }, { "epoch": 0.9224548188374062, "grad_norm": 0.6638550284070837, "learning_rate": 1.7232765612327656e-06, "loss": 0.6193, "step": 31595 }, { "epoch": 0.9224840150652536, "grad_norm": 0.6234267889790807, "learning_rate": 1.7226277372262776e-06, "loss": 0.547, "step": 31596 }, { "epoch": 0.9225132112931009, "grad_norm": 0.635499930973611, "learning_rate": 1.7219789132197892e-06, "loss": 0.532, "step": 31597 }, { "epoch": 0.9225424075209483, "grad_norm": 0.6706270199930857, "learning_rate": 1.7213300892133012e-06, "loss": 0.6448, "step": 31598 }, { "epoch": 0.9225716037487957, "grad_norm": 0.6419495269916919, "learning_rate": 1.7206812652068128e-06, "loss": 0.5517, "step": 31599 }, { "epoch": 0.922600799976643, "grad_norm": 0.6561368119594196, "learning_rate": 1.7200324412003244e-06, "loss": 0.6008, "step": 31600 }, { "epoch": 0.9226299962044904, "grad_norm": 0.6269336761558018, "learning_rate": 1.7193836171938363e-06, "loss": 0.5628, "step": 31601 }, { "epoch": 0.9226591924323377, "grad_norm": 0.6735012514781672, "learning_rate": 1.718734793187348e-06, "loss": 0.6211, "step": 31602 }, { "epoch": 0.9226883886601851, "grad_norm": 0.6458298932557466, "learning_rate": 1.71808596918086e-06, "loss": 0.5869, "step": 31603 }, { "epoch": 0.9227175848880325, "grad_norm": 0.6555607280518321, "learning_rate": 1.7174371451743715e-06, "loss": 0.6277, "step": 31604 }, { "epoch": 0.9227467811158798, "grad_norm": 0.6566298726338162, "learning_rate": 1.7167883211678835e-06, "loss": 0.5633, "step": 31605 }, { "epoch": 0.9227759773437272, "grad_norm": 0.634747020103817, "learning_rate": 1.7161394971613951e-06, "loss": 0.5325, "step": 31606 }, { "epoch": 0.9228051735715745, "grad_norm": 0.7073528871694925, "learning_rate": 1.7154906731549067e-06, "loss": 0.6858, "step": 31607 }, { "epoch": 0.9228343697994219, "grad_norm": 0.6559533314096364, "learning_rate": 1.7148418491484188e-06, "loss": 0.5009, "step": 31608 }, { "epoch": 0.9228635660272693, "grad_norm": 0.6228428162698846, "learning_rate": 1.7141930251419304e-06, "loss": 0.5463, "step": 31609 }, { "epoch": 0.9228927622551166, "grad_norm": 0.6075299715527958, "learning_rate": 1.7135442011354422e-06, "loss": 0.5295, "step": 31610 }, { "epoch": 0.922921958482964, "grad_norm": 0.6685310100840743, "learning_rate": 1.7128953771289538e-06, "loss": 0.5897, "step": 31611 }, { "epoch": 0.9229511547108114, "grad_norm": 0.6521791240238736, "learning_rate": 1.7122465531224658e-06, "loss": 0.6204, "step": 31612 }, { "epoch": 0.9229803509386587, "grad_norm": 0.6445374564818525, "learning_rate": 1.7115977291159774e-06, "loss": 0.5941, "step": 31613 }, { "epoch": 0.9230095471665061, "grad_norm": 0.6380016937563551, "learning_rate": 1.710948905109489e-06, "loss": 0.5482, "step": 31614 }, { "epoch": 0.9230387433943534, "grad_norm": 0.6201012611957009, "learning_rate": 1.710300081103001e-06, "loss": 0.5628, "step": 31615 }, { "epoch": 0.9230679396222008, "grad_norm": 0.6315221842709942, "learning_rate": 1.7096512570965126e-06, "loss": 0.5882, "step": 31616 }, { "epoch": 0.9230971358500482, "grad_norm": 0.6466401015795461, "learning_rate": 1.7090024330900245e-06, "loss": 0.5508, "step": 31617 }, { "epoch": 0.9231263320778955, "grad_norm": 0.6797806944941555, "learning_rate": 1.7083536090835363e-06, "loss": 0.6461, "step": 31618 }, { "epoch": 0.9231555283057429, "grad_norm": 0.6944986289777902, "learning_rate": 1.707704785077048e-06, "loss": 0.6878, "step": 31619 }, { "epoch": 0.9231847245335902, "grad_norm": 0.6429070055901399, "learning_rate": 1.7070559610705597e-06, "loss": 0.593, "step": 31620 }, { "epoch": 0.9232139207614376, "grad_norm": 0.6528807039206509, "learning_rate": 1.7064071370640713e-06, "loss": 0.5771, "step": 31621 }, { "epoch": 0.923243116989285, "grad_norm": 0.6803963085248598, "learning_rate": 1.7057583130575833e-06, "loss": 0.6359, "step": 31622 }, { "epoch": 0.9232723132171323, "grad_norm": 0.7450207760612327, "learning_rate": 1.705109489051095e-06, "loss": 0.6411, "step": 31623 }, { "epoch": 0.9233015094449797, "grad_norm": 0.6068943362945589, "learning_rate": 1.704460665044607e-06, "loss": 0.5225, "step": 31624 }, { "epoch": 0.923330705672827, "grad_norm": 0.6259792085683028, "learning_rate": 1.7038118410381186e-06, "loss": 0.5739, "step": 31625 }, { "epoch": 0.9233599019006744, "grad_norm": 0.6568482469683676, "learning_rate": 1.7031630170316302e-06, "loss": 0.6437, "step": 31626 }, { "epoch": 0.9233890981285218, "grad_norm": 0.6292346533849513, "learning_rate": 1.7025141930251422e-06, "loss": 0.5841, "step": 31627 }, { "epoch": 0.9234182943563691, "grad_norm": 0.6344106834761499, "learning_rate": 1.7018653690186538e-06, "loss": 0.5938, "step": 31628 }, { "epoch": 0.9234474905842165, "grad_norm": 0.6403702423221965, "learning_rate": 1.7012165450121656e-06, "loss": 0.576, "step": 31629 }, { "epoch": 0.9234766868120639, "grad_norm": 0.6476666937161297, "learning_rate": 1.7005677210056772e-06, "loss": 0.6377, "step": 31630 }, { "epoch": 0.9235058830399112, "grad_norm": 0.6556472024267707, "learning_rate": 1.6999188969991892e-06, "loss": 0.6239, "step": 31631 }, { "epoch": 0.9235350792677586, "grad_norm": 0.6369409295648983, "learning_rate": 1.6992700729927008e-06, "loss": 0.5708, "step": 31632 }, { "epoch": 0.9235642754956059, "grad_norm": 0.6536700040459316, "learning_rate": 1.6986212489862125e-06, "loss": 0.6052, "step": 31633 }, { "epoch": 0.9235934717234533, "grad_norm": 0.6459812964905914, "learning_rate": 1.6979724249797245e-06, "loss": 0.5734, "step": 31634 }, { "epoch": 0.9236226679513007, "grad_norm": 0.6472482430183563, "learning_rate": 1.697323600973236e-06, "loss": 0.6246, "step": 31635 }, { "epoch": 0.923651864179148, "grad_norm": 0.6272030251403599, "learning_rate": 1.696674776966748e-06, "loss": 0.5642, "step": 31636 }, { "epoch": 0.9236810604069954, "grad_norm": 0.6991504221179149, "learning_rate": 1.6960259529602597e-06, "loss": 0.6898, "step": 31637 }, { "epoch": 0.9237102566348427, "grad_norm": 0.6430930514549849, "learning_rate": 1.6953771289537715e-06, "loss": 0.6062, "step": 31638 }, { "epoch": 0.9237394528626901, "grad_norm": 0.6601297125196507, "learning_rate": 1.6947283049472831e-06, "loss": 0.5789, "step": 31639 }, { "epoch": 0.9237686490905375, "grad_norm": 0.6774173714711664, "learning_rate": 1.6940794809407947e-06, "loss": 0.6055, "step": 31640 }, { "epoch": 0.9237978453183848, "grad_norm": 0.6321157110235661, "learning_rate": 1.6934306569343068e-06, "loss": 0.5379, "step": 31641 }, { "epoch": 0.9238270415462322, "grad_norm": 0.6448094766973981, "learning_rate": 1.6927818329278184e-06, "loss": 0.5709, "step": 31642 }, { "epoch": 0.9238562377740795, "grad_norm": 0.6509779423103713, "learning_rate": 1.6921330089213304e-06, "loss": 0.6112, "step": 31643 }, { "epoch": 0.9238854340019269, "grad_norm": 0.6340436971035698, "learning_rate": 1.691484184914842e-06, "loss": 0.5786, "step": 31644 }, { "epoch": 0.9239146302297743, "grad_norm": 0.6846125382134524, "learning_rate": 1.6908353609083538e-06, "loss": 0.6177, "step": 31645 }, { "epoch": 0.9239438264576216, "grad_norm": 0.60831398928229, "learning_rate": 1.6901865369018654e-06, "loss": 0.5138, "step": 31646 }, { "epoch": 0.923973022685469, "grad_norm": 0.6711825579955746, "learning_rate": 1.6895377128953772e-06, "loss": 0.639, "step": 31647 }, { "epoch": 0.9240022189133164, "grad_norm": 0.6313857903831798, "learning_rate": 1.688888888888889e-06, "loss": 0.5682, "step": 31648 }, { "epoch": 0.9240314151411637, "grad_norm": 0.6010938923371607, "learning_rate": 1.6882400648824007e-06, "loss": 0.5136, "step": 31649 }, { "epoch": 0.9240606113690111, "grad_norm": 0.6153725526980856, "learning_rate": 1.6875912408759127e-06, "loss": 0.5521, "step": 31650 }, { "epoch": 0.9240898075968584, "grad_norm": 0.7310124867391391, "learning_rate": 1.6869424168694243e-06, "loss": 0.6683, "step": 31651 }, { "epoch": 0.9241190038247058, "grad_norm": 0.670545252024143, "learning_rate": 1.686293592862936e-06, "loss": 0.5556, "step": 31652 }, { "epoch": 0.9241482000525532, "grad_norm": 0.6386166125473358, "learning_rate": 1.685644768856448e-06, "loss": 0.5749, "step": 31653 }, { "epoch": 0.9241773962804005, "grad_norm": 0.5957872056575764, "learning_rate": 1.6849959448499595e-06, "loss": 0.5363, "step": 31654 }, { "epoch": 0.9242065925082479, "grad_norm": 0.651968785756016, "learning_rate": 1.6843471208434713e-06, "loss": 0.6456, "step": 31655 }, { "epoch": 0.9242357887360952, "grad_norm": 0.6192225733393871, "learning_rate": 1.6836982968369831e-06, "loss": 0.5321, "step": 31656 }, { "epoch": 0.9242649849639426, "grad_norm": 0.6039790125878848, "learning_rate": 1.683049472830495e-06, "loss": 0.5495, "step": 31657 }, { "epoch": 0.92429418119179, "grad_norm": 0.7688028754164511, "learning_rate": 1.6824006488240066e-06, "loss": 0.6541, "step": 31658 }, { "epoch": 0.9243233774196374, "grad_norm": 0.6287239091021379, "learning_rate": 1.6817518248175182e-06, "loss": 0.6057, "step": 31659 }, { "epoch": 0.9243525736474848, "grad_norm": 0.6844452644824003, "learning_rate": 1.6811030008110302e-06, "loss": 0.6173, "step": 31660 }, { "epoch": 0.9243817698753322, "grad_norm": 0.5912106847014196, "learning_rate": 1.6804541768045418e-06, "loss": 0.5359, "step": 31661 }, { "epoch": 0.9244109661031795, "grad_norm": 0.7207162746252395, "learning_rate": 1.6798053527980538e-06, "loss": 0.7232, "step": 31662 }, { "epoch": 0.9244401623310269, "grad_norm": 0.6669491230733452, "learning_rate": 1.6791565287915654e-06, "loss": 0.5933, "step": 31663 }, { "epoch": 0.9244693585588742, "grad_norm": 0.6475083566342218, "learning_rate": 1.6785077047850772e-06, "loss": 0.6017, "step": 31664 }, { "epoch": 0.9244985547867216, "grad_norm": 0.6386079638455442, "learning_rate": 1.6778588807785889e-06, "loss": 0.5575, "step": 31665 }, { "epoch": 0.924527751014569, "grad_norm": 0.6475487954243628, "learning_rate": 1.6772100567721007e-06, "loss": 0.5772, "step": 31666 }, { "epoch": 0.9245569472424163, "grad_norm": 0.5929395647175911, "learning_rate": 1.6765612327656125e-06, "loss": 0.5139, "step": 31667 }, { "epoch": 0.9245861434702637, "grad_norm": 0.675901468805677, "learning_rate": 1.675912408759124e-06, "loss": 0.5935, "step": 31668 }, { "epoch": 0.924615339698111, "grad_norm": 0.6972162736474856, "learning_rate": 1.6752635847526361e-06, "loss": 0.5617, "step": 31669 }, { "epoch": 0.9246445359259584, "grad_norm": 0.67704951110205, "learning_rate": 1.6746147607461477e-06, "loss": 0.6605, "step": 31670 }, { "epoch": 0.9246737321538058, "grad_norm": 0.6502517202512802, "learning_rate": 1.6739659367396595e-06, "loss": 0.6285, "step": 31671 }, { "epoch": 0.9247029283816531, "grad_norm": 0.6443275493881904, "learning_rate": 1.6733171127331713e-06, "loss": 0.5868, "step": 31672 }, { "epoch": 0.9247321246095005, "grad_norm": 0.6406897735305409, "learning_rate": 1.672668288726683e-06, "loss": 0.5566, "step": 31673 }, { "epoch": 0.9247613208373479, "grad_norm": 0.649271564746447, "learning_rate": 1.6720194647201948e-06, "loss": 0.583, "step": 31674 }, { "epoch": 0.9247905170651952, "grad_norm": 0.5827372571244305, "learning_rate": 1.6713706407137064e-06, "loss": 0.5, "step": 31675 }, { "epoch": 0.9248197132930426, "grad_norm": 0.6711430520539594, "learning_rate": 1.6707218167072184e-06, "loss": 0.6016, "step": 31676 }, { "epoch": 0.9248489095208899, "grad_norm": 0.6666420789714265, "learning_rate": 1.67007299270073e-06, "loss": 0.6531, "step": 31677 }, { "epoch": 0.9248781057487373, "grad_norm": 0.6472746998933984, "learning_rate": 1.669424168694242e-06, "loss": 0.5915, "step": 31678 }, { "epoch": 0.9249073019765847, "grad_norm": 0.6776110612793299, "learning_rate": 1.6687753446877536e-06, "loss": 0.635, "step": 31679 }, { "epoch": 0.924936498204432, "grad_norm": 0.6781736126525545, "learning_rate": 1.6681265206812652e-06, "loss": 0.6158, "step": 31680 }, { "epoch": 0.9249656944322794, "grad_norm": 0.6582926879249829, "learning_rate": 1.6674776966747773e-06, "loss": 0.6188, "step": 31681 }, { "epoch": 0.9249948906601267, "grad_norm": 0.679934018838866, "learning_rate": 1.6668288726682889e-06, "loss": 0.6091, "step": 31682 }, { "epoch": 0.9250240868879741, "grad_norm": 0.6253154166942522, "learning_rate": 1.6661800486618007e-06, "loss": 0.5796, "step": 31683 }, { "epoch": 0.9250532831158215, "grad_norm": 0.6116933660185683, "learning_rate": 1.6655312246553123e-06, "loss": 0.5293, "step": 31684 }, { "epoch": 0.9250824793436688, "grad_norm": 0.6237945133429706, "learning_rate": 1.664882400648824e-06, "loss": 0.5402, "step": 31685 }, { "epoch": 0.9251116755715162, "grad_norm": 0.6718625187676901, "learning_rate": 1.664233576642336e-06, "loss": 0.6302, "step": 31686 }, { "epoch": 0.9251408717993636, "grad_norm": 0.6898391317292559, "learning_rate": 1.6635847526358475e-06, "loss": 0.6327, "step": 31687 }, { "epoch": 0.9251700680272109, "grad_norm": 0.6359791350138776, "learning_rate": 1.6629359286293595e-06, "loss": 0.5553, "step": 31688 }, { "epoch": 0.9251992642550583, "grad_norm": 0.6274081002161704, "learning_rate": 1.6622871046228712e-06, "loss": 0.5749, "step": 31689 }, { "epoch": 0.9252284604829056, "grad_norm": 0.6358007600281472, "learning_rate": 1.661638280616383e-06, "loss": 0.5974, "step": 31690 }, { "epoch": 0.925257656710753, "grad_norm": 0.6378259274682035, "learning_rate": 1.6609894566098948e-06, "loss": 0.5901, "step": 31691 }, { "epoch": 0.9252868529386004, "grad_norm": 0.6382473576346365, "learning_rate": 1.6603406326034064e-06, "loss": 0.6029, "step": 31692 }, { "epoch": 0.9253160491664477, "grad_norm": 0.6307616356042944, "learning_rate": 1.6596918085969182e-06, "loss": 0.5651, "step": 31693 }, { "epoch": 0.9253452453942951, "grad_norm": 0.6106112379187236, "learning_rate": 1.6590429845904298e-06, "loss": 0.5276, "step": 31694 }, { "epoch": 0.9253744416221424, "grad_norm": 0.6127881756306212, "learning_rate": 1.6583941605839418e-06, "loss": 0.5353, "step": 31695 }, { "epoch": 0.9254036378499898, "grad_norm": 0.7003948265232783, "learning_rate": 1.6577453365774534e-06, "loss": 0.5834, "step": 31696 }, { "epoch": 0.9254328340778372, "grad_norm": 0.6730865635527282, "learning_rate": 1.6570965125709655e-06, "loss": 0.6587, "step": 31697 }, { "epoch": 0.9254620303056845, "grad_norm": 0.6139825176113379, "learning_rate": 1.656447688564477e-06, "loss": 0.5306, "step": 31698 }, { "epoch": 0.9254912265335319, "grad_norm": 0.6650275740848447, "learning_rate": 1.6557988645579887e-06, "loss": 0.6212, "step": 31699 }, { "epoch": 0.9255204227613792, "grad_norm": 0.6936336381708065, "learning_rate": 1.6551500405515005e-06, "loss": 0.639, "step": 31700 }, { "epoch": 0.9255496189892266, "grad_norm": 0.6408491758641628, "learning_rate": 1.6545012165450123e-06, "loss": 0.5967, "step": 31701 }, { "epoch": 0.925578815217074, "grad_norm": 0.6332208347421318, "learning_rate": 1.6538523925385241e-06, "loss": 0.5612, "step": 31702 }, { "epoch": 0.9256080114449213, "grad_norm": 0.6451246029119224, "learning_rate": 1.6532035685320357e-06, "loss": 0.595, "step": 31703 }, { "epoch": 0.9256372076727687, "grad_norm": 0.6412768410943381, "learning_rate": 1.6525547445255477e-06, "loss": 0.5922, "step": 31704 }, { "epoch": 0.925666403900616, "grad_norm": 0.6723320746351895, "learning_rate": 1.6519059205190594e-06, "loss": 0.6034, "step": 31705 }, { "epoch": 0.9256956001284634, "grad_norm": 0.65610411232986, "learning_rate": 1.651257096512571e-06, "loss": 0.6537, "step": 31706 }, { "epoch": 0.9257247963563108, "grad_norm": 0.6612066355869053, "learning_rate": 1.650608272506083e-06, "loss": 0.6154, "step": 31707 }, { "epoch": 0.9257539925841581, "grad_norm": 0.7307851857713829, "learning_rate": 1.6499594484995946e-06, "loss": 0.7421, "step": 31708 }, { "epoch": 0.9257831888120055, "grad_norm": 0.6756293166698357, "learning_rate": 1.6493106244931064e-06, "loss": 0.5996, "step": 31709 }, { "epoch": 0.9258123850398529, "grad_norm": 0.660072365729403, "learning_rate": 1.6486618004866182e-06, "loss": 0.6335, "step": 31710 }, { "epoch": 0.9258415812677002, "grad_norm": 0.6884995250549992, "learning_rate": 1.64801297648013e-06, "loss": 0.6291, "step": 31711 }, { "epoch": 0.9258707774955476, "grad_norm": 0.6587230311438022, "learning_rate": 1.6473641524736416e-06, "loss": 0.5703, "step": 31712 }, { "epoch": 0.9258999737233949, "grad_norm": 0.6301967704380281, "learning_rate": 1.6467153284671532e-06, "loss": 0.5638, "step": 31713 }, { "epoch": 0.9259291699512423, "grad_norm": 0.6498125123600393, "learning_rate": 1.6460665044606653e-06, "loss": 0.5961, "step": 31714 }, { "epoch": 0.9259583661790897, "grad_norm": 0.6361455021661052, "learning_rate": 1.6454176804541769e-06, "loss": 0.5887, "step": 31715 }, { "epoch": 0.925987562406937, "grad_norm": 0.6326475258821407, "learning_rate": 1.644768856447689e-06, "loss": 0.5617, "step": 31716 }, { "epoch": 0.9260167586347844, "grad_norm": 0.6082661762089108, "learning_rate": 1.6441200324412005e-06, "loss": 0.5363, "step": 31717 }, { "epoch": 0.9260459548626317, "grad_norm": 0.6878136775532112, "learning_rate": 1.643471208434712e-06, "loss": 0.6775, "step": 31718 }, { "epoch": 0.9260751510904791, "grad_norm": 0.6577769297615054, "learning_rate": 1.642822384428224e-06, "loss": 0.6085, "step": 31719 }, { "epoch": 0.9261043473183265, "grad_norm": 0.6432247420622152, "learning_rate": 1.6421735604217357e-06, "loss": 0.5477, "step": 31720 }, { "epoch": 0.9261335435461738, "grad_norm": 0.650089889454721, "learning_rate": 1.6415247364152476e-06, "loss": 0.611, "step": 31721 }, { "epoch": 0.9261627397740212, "grad_norm": 0.6120340936628922, "learning_rate": 1.6408759124087592e-06, "loss": 0.5592, "step": 31722 }, { "epoch": 0.9261919360018686, "grad_norm": 0.6518134866539187, "learning_rate": 1.6402270884022712e-06, "loss": 0.6459, "step": 31723 }, { "epoch": 0.9262211322297159, "grad_norm": 0.6633850159463948, "learning_rate": 1.6395782643957828e-06, "loss": 0.6269, "step": 31724 }, { "epoch": 0.9262503284575633, "grad_norm": 0.6857633572956713, "learning_rate": 1.6389294403892944e-06, "loss": 0.651, "step": 31725 }, { "epoch": 0.9262795246854106, "grad_norm": 0.6421212149265413, "learning_rate": 1.6382806163828064e-06, "loss": 0.5803, "step": 31726 }, { "epoch": 0.926308720913258, "grad_norm": 0.6098654501100141, "learning_rate": 1.637631792376318e-06, "loss": 0.5357, "step": 31727 }, { "epoch": 0.9263379171411054, "grad_norm": 0.6830642714201304, "learning_rate": 1.6369829683698298e-06, "loss": 0.6296, "step": 31728 }, { "epoch": 0.9263671133689527, "grad_norm": 0.6650744466592685, "learning_rate": 1.6363341443633414e-06, "loss": 0.6145, "step": 31729 }, { "epoch": 0.9263963095968001, "grad_norm": 0.6864572301400941, "learning_rate": 1.6356853203568535e-06, "loss": 0.6865, "step": 31730 }, { "epoch": 0.9264255058246474, "grad_norm": 0.6112686269167571, "learning_rate": 1.635036496350365e-06, "loss": 0.5619, "step": 31731 }, { "epoch": 0.9264547020524948, "grad_norm": 0.6669599794972297, "learning_rate": 1.6343876723438767e-06, "loss": 0.6013, "step": 31732 }, { "epoch": 0.9264838982803422, "grad_norm": 0.6409955077341877, "learning_rate": 1.6337388483373887e-06, "loss": 0.5805, "step": 31733 }, { "epoch": 0.9265130945081895, "grad_norm": 0.7166871445485814, "learning_rate": 1.6330900243309003e-06, "loss": 0.7001, "step": 31734 }, { "epoch": 0.9265422907360369, "grad_norm": 0.6739404853573582, "learning_rate": 1.6324412003244121e-06, "loss": 0.5402, "step": 31735 }, { "epoch": 0.9265714869638842, "grad_norm": 0.6352516542613177, "learning_rate": 1.631792376317924e-06, "loss": 0.5346, "step": 31736 }, { "epoch": 0.9266006831917316, "grad_norm": 0.6116014289244214, "learning_rate": 1.6311435523114358e-06, "loss": 0.5438, "step": 31737 }, { "epoch": 0.926629879419579, "grad_norm": 0.617128994374235, "learning_rate": 1.6304947283049474e-06, "loss": 0.5773, "step": 31738 }, { "epoch": 0.9266590756474263, "grad_norm": 0.6997136732718612, "learning_rate": 1.6298459042984592e-06, "loss": 0.6373, "step": 31739 }, { "epoch": 0.9266882718752737, "grad_norm": 0.7060656301559808, "learning_rate": 1.629197080291971e-06, "loss": 0.6764, "step": 31740 }, { "epoch": 0.9267174681031211, "grad_norm": 0.6618581777930193, "learning_rate": 1.6285482562854826e-06, "loss": 0.6568, "step": 31741 }, { "epoch": 0.9267466643309684, "grad_norm": 0.6152445670773146, "learning_rate": 1.6278994322789946e-06, "loss": 0.5243, "step": 31742 }, { "epoch": 0.9267758605588158, "grad_norm": 0.6194360810636499, "learning_rate": 1.6272506082725062e-06, "loss": 0.542, "step": 31743 }, { "epoch": 0.9268050567866631, "grad_norm": 0.6282580635527838, "learning_rate": 1.626601784266018e-06, "loss": 0.5624, "step": 31744 }, { "epoch": 0.9268342530145105, "grad_norm": 0.6772855533200984, "learning_rate": 1.6259529602595299e-06, "loss": 0.6323, "step": 31745 }, { "epoch": 0.9268634492423579, "grad_norm": 0.6546522418305852, "learning_rate": 1.6253041362530415e-06, "loss": 0.5892, "step": 31746 }, { "epoch": 0.9268926454702052, "grad_norm": 0.6572222805594415, "learning_rate": 1.6246553122465533e-06, "loss": 0.5627, "step": 31747 }, { "epoch": 0.9269218416980526, "grad_norm": 0.6445530966744345, "learning_rate": 1.6240064882400649e-06, "loss": 0.5964, "step": 31748 }, { "epoch": 0.9269510379259, "grad_norm": 0.7034497056937151, "learning_rate": 1.623357664233577e-06, "loss": 0.7205, "step": 31749 }, { "epoch": 0.9269802341537473, "grad_norm": 0.6121349658379032, "learning_rate": 1.6227088402270885e-06, "loss": 0.5539, "step": 31750 }, { "epoch": 0.9270094303815947, "grad_norm": 0.6644618365930688, "learning_rate": 1.6220600162206001e-06, "loss": 0.6187, "step": 31751 }, { "epoch": 0.927038626609442, "grad_norm": 0.6424597217780523, "learning_rate": 1.6214111922141121e-06, "loss": 0.5729, "step": 31752 }, { "epoch": 0.9270678228372894, "grad_norm": 0.6365266714946278, "learning_rate": 1.6207623682076237e-06, "loss": 0.6151, "step": 31753 }, { "epoch": 0.9270970190651368, "grad_norm": 0.6749347784249347, "learning_rate": 1.6201135442011356e-06, "loss": 0.6573, "step": 31754 }, { "epoch": 0.9271262152929841, "grad_norm": 0.6462134279654349, "learning_rate": 1.6194647201946474e-06, "loss": 0.6127, "step": 31755 }, { "epoch": 0.9271554115208315, "grad_norm": 0.6115958723518267, "learning_rate": 1.6188158961881592e-06, "loss": 0.5675, "step": 31756 }, { "epoch": 0.9271846077486788, "grad_norm": 0.6992036810498614, "learning_rate": 1.6181670721816708e-06, "loss": 0.6943, "step": 31757 }, { "epoch": 0.9272138039765262, "grad_norm": 0.6328942503383596, "learning_rate": 1.6175182481751824e-06, "loss": 0.5991, "step": 31758 }, { "epoch": 0.9272430002043736, "grad_norm": 0.6723138720366035, "learning_rate": 1.6168694241686944e-06, "loss": 0.627, "step": 31759 }, { "epoch": 0.9272721964322209, "grad_norm": 0.6632922639142822, "learning_rate": 1.616220600162206e-06, "loss": 0.6093, "step": 31760 }, { "epoch": 0.9273013926600683, "grad_norm": 0.6391586636801686, "learning_rate": 1.615571776155718e-06, "loss": 0.5627, "step": 31761 }, { "epoch": 0.9273305888879156, "grad_norm": 0.6759847049049854, "learning_rate": 1.6149229521492297e-06, "loss": 0.6436, "step": 31762 }, { "epoch": 0.927359785115763, "grad_norm": 0.6569986831792957, "learning_rate": 1.6142741281427415e-06, "loss": 0.6426, "step": 31763 }, { "epoch": 0.9273889813436104, "grad_norm": 0.6260268280582849, "learning_rate": 1.613625304136253e-06, "loss": 0.5458, "step": 31764 }, { "epoch": 0.9274181775714577, "grad_norm": 0.6962667685707176, "learning_rate": 1.6129764801297649e-06, "loss": 0.7066, "step": 31765 }, { "epoch": 0.9274473737993051, "grad_norm": 0.6205219853416187, "learning_rate": 1.6123276561232767e-06, "loss": 0.5527, "step": 31766 }, { "epoch": 0.9274765700271524, "grad_norm": 0.5665699864063614, "learning_rate": 1.6116788321167883e-06, "loss": 0.4591, "step": 31767 }, { "epoch": 0.9275057662549998, "grad_norm": 0.6563812304534987, "learning_rate": 1.6110300081103003e-06, "loss": 0.6052, "step": 31768 }, { "epoch": 0.9275349624828472, "grad_norm": 0.6128894138147285, "learning_rate": 1.610381184103812e-06, "loss": 0.5569, "step": 31769 }, { "epoch": 0.9275641587106945, "grad_norm": 0.6819414954015924, "learning_rate": 1.609732360097324e-06, "loss": 0.6732, "step": 31770 }, { "epoch": 0.9275933549385419, "grad_norm": 0.6563387214032348, "learning_rate": 1.6090835360908356e-06, "loss": 0.621, "step": 31771 }, { "epoch": 0.9276225511663893, "grad_norm": 0.6208471758850511, "learning_rate": 1.6084347120843472e-06, "loss": 0.5551, "step": 31772 }, { "epoch": 0.9276517473942366, "grad_norm": 0.6453369970290372, "learning_rate": 1.607785888077859e-06, "loss": 0.5537, "step": 31773 }, { "epoch": 0.927680943622084, "grad_norm": 0.6770801627850385, "learning_rate": 1.6071370640713708e-06, "loss": 0.6344, "step": 31774 }, { "epoch": 0.9277101398499313, "grad_norm": 0.6149499040352047, "learning_rate": 1.6064882400648826e-06, "loss": 0.4966, "step": 31775 }, { "epoch": 0.9277393360777787, "grad_norm": 0.6273566742240095, "learning_rate": 1.6058394160583942e-06, "loss": 0.5449, "step": 31776 }, { "epoch": 0.9277685323056261, "grad_norm": 0.606221082687264, "learning_rate": 1.6051905920519058e-06, "loss": 0.5573, "step": 31777 }, { "epoch": 0.9277977285334734, "grad_norm": 0.6365691807581848, "learning_rate": 1.6045417680454179e-06, "loss": 0.6076, "step": 31778 }, { "epoch": 0.9278269247613209, "grad_norm": 0.6416068375494821, "learning_rate": 1.6038929440389295e-06, "loss": 0.5969, "step": 31779 }, { "epoch": 0.9278561209891683, "grad_norm": 0.6260496759117707, "learning_rate": 1.6032441200324415e-06, "loss": 0.5473, "step": 31780 }, { "epoch": 0.9278853172170156, "grad_norm": 0.6386117421310226, "learning_rate": 1.602595296025953e-06, "loss": 0.5708, "step": 31781 }, { "epoch": 0.927914513444863, "grad_norm": 0.6153468616627161, "learning_rate": 1.601946472019465e-06, "loss": 0.4845, "step": 31782 }, { "epoch": 0.9279437096727103, "grad_norm": 0.680169397081482, "learning_rate": 1.6012976480129765e-06, "loss": 0.6205, "step": 31783 }, { "epoch": 0.9279729059005577, "grad_norm": 0.6845195160014316, "learning_rate": 1.6006488240064883e-06, "loss": 0.6805, "step": 31784 }, { "epoch": 0.9280021021284051, "grad_norm": 0.6374776608490749, "learning_rate": 1.6000000000000001e-06, "loss": 0.5674, "step": 31785 }, { "epoch": 0.9280312983562524, "grad_norm": 0.6797342225990776, "learning_rate": 1.5993511759935117e-06, "loss": 0.608, "step": 31786 }, { "epoch": 0.9280604945840998, "grad_norm": 0.6830711847829909, "learning_rate": 1.5987023519870238e-06, "loss": 0.6222, "step": 31787 }, { "epoch": 0.9280896908119471, "grad_norm": 0.6842964714998715, "learning_rate": 1.5980535279805354e-06, "loss": 0.6821, "step": 31788 }, { "epoch": 0.9281188870397945, "grad_norm": 0.6858814219853842, "learning_rate": 1.5974047039740472e-06, "loss": 0.634, "step": 31789 }, { "epoch": 0.9281480832676419, "grad_norm": 0.6395818539174636, "learning_rate": 1.596755879967559e-06, "loss": 0.597, "step": 31790 }, { "epoch": 0.9281772794954892, "grad_norm": 0.6021885976874743, "learning_rate": 1.5961070559610706e-06, "loss": 0.5043, "step": 31791 }, { "epoch": 0.9282064757233366, "grad_norm": 0.6333031871316521, "learning_rate": 1.5954582319545824e-06, "loss": 0.5765, "step": 31792 }, { "epoch": 0.928235671951184, "grad_norm": 0.6330330420001307, "learning_rate": 1.5948094079480942e-06, "loss": 0.5616, "step": 31793 }, { "epoch": 0.9282648681790313, "grad_norm": 0.6079727476265747, "learning_rate": 1.594160583941606e-06, "loss": 0.5105, "step": 31794 }, { "epoch": 0.9282940644068787, "grad_norm": 0.6645078506441999, "learning_rate": 1.5935117599351177e-06, "loss": 0.5768, "step": 31795 }, { "epoch": 0.928323260634726, "grad_norm": 0.6340535266677463, "learning_rate": 1.5928629359286297e-06, "loss": 0.5836, "step": 31796 }, { "epoch": 0.9283524568625734, "grad_norm": 0.6587162614986741, "learning_rate": 1.5922141119221413e-06, "loss": 0.6164, "step": 31797 }, { "epoch": 0.9283816530904208, "grad_norm": 0.6522127624777049, "learning_rate": 1.5915652879156529e-06, "loss": 0.5394, "step": 31798 }, { "epoch": 0.9284108493182681, "grad_norm": 0.6714953489642764, "learning_rate": 1.590916463909165e-06, "loss": 0.594, "step": 31799 }, { "epoch": 0.9284400455461155, "grad_norm": 0.6676799319700036, "learning_rate": 1.5902676399026765e-06, "loss": 0.5961, "step": 31800 }, { "epoch": 0.9284692417739628, "grad_norm": 0.6570332968152959, "learning_rate": 1.5896188158961883e-06, "loss": 0.6352, "step": 31801 }, { "epoch": 0.9284984380018102, "grad_norm": 0.6222594596243451, "learning_rate": 1.5889699918897e-06, "loss": 0.5087, "step": 31802 }, { "epoch": 0.9285276342296576, "grad_norm": 0.6297076008055114, "learning_rate": 1.588321167883212e-06, "loss": 0.5555, "step": 31803 }, { "epoch": 0.9285568304575049, "grad_norm": 0.6205670754573196, "learning_rate": 1.5876723438767236e-06, "loss": 0.5738, "step": 31804 }, { "epoch": 0.9285860266853523, "grad_norm": 0.6323839542907174, "learning_rate": 1.5870235198702352e-06, "loss": 0.5868, "step": 31805 }, { "epoch": 0.9286152229131996, "grad_norm": 0.6609532526259762, "learning_rate": 1.5863746958637472e-06, "loss": 0.6262, "step": 31806 }, { "epoch": 0.928644419141047, "grad_norm": 0.624393965440042, "learning_rate": 1.5857258718572588e-06, "loss": 0.5677, "step": 31807 }, { "epoch": 0.9286736153688944, "grad_norm": 0.6251878868986197, "learning_rate": 1.5850770478507706e-06, "loss": 0.5705, "step": 31808 }, { "epoch": 0.9287028115967417, "grad_norm": 0.6823506865998296, "learning_rate": 1.5844282238442824e-06, "loss": 0.6177, "step": 31809 }, { "epoch": 0.9287320078245891, "grad_norm": 0.6373439061918912, "learning_rate": 1.583779399837794e-06, "loss": 0.5474, "step": 31810 }, { "epoch": 0.9287612040524365, "grad_norm": 0.6972289039998705, "learning_rate": 1.5831305758313059e-06, "loss": 0.6729, "step": 31811 }, { "epoch": 0.9287904002802838, "grad_norm": 0.6416035956680377, "learning_rate": 1.5824817518248175e-06, "loss": 0.5877, "step": 31812 }, { "epoch": 0.9288195965081312, "grad_norm": 0.6275523499858848, "learning_rate": 1.5818329278183295e-06, "loss": 0.567, "step": 31813 }, { "epoch": 0.9288487927359785, "grad_norm": 0.6780200155772085, "learning_rate": 1.581184103811841e-06, "loss": 0.5948, "step": 31814 }, { "epoch": 0.9288779889638259, "grad_norm": 0.6573616822127069, "learning_rate": 1.5805352798053531e-06, "loss": 0.6085, "step": 31815 }, { "epoch": 0.9289071851916733, "grad_norm": 0.6326051328347698, "learning_rate": 1.5798864557988647e-06, "loss": 0.5314, "step": 31816 }, { "epoch": 0.9289363814195206, "grad_norm": 0.6581413746819444, "learning_rate": 1.5792376317923763e-06, "loss": 0.6015, "step": 31817 }, { "epoch": 0.928965577647368, "grad_norm": 0.6378394194706793, "learning_rate": 1.5785888077858881e-06, "loss": 0.5723, "step": 31818 }, { "epoch": 0.9289947738752153, "grad_norm": 0.6494376840549944, "learning_rate": 1.5779399837794e-06, "loss": 0.6111, "step": 31819 }, { "epoch": 0.9290239701030627, "grad_norm": 0.6214621689452616, "learning_rate": 1.5772911597729118e-06, "loss": 0.5632, "step": 31820 }, { "epoch": 0.9290531663309101, "grad_norm": 0.6633519381073346, "learning_rate": 1.5766423357664234e-06, "loss": 0.6225, "step": 31821 }, { "epoch": 0.9290823625587574, "grad_norm": 0.6146413947356415, "learning_rate": 1.5759935117599354e-06, "loss": 0.5481, "step": 31822 }, { "epoch": 0.9291115587866048, "grad_norm": 0.6652931949446876, "learning_rate": 1.575344687753447e-06, "loss": 0.5816, "step": 31823 }, { "epoch": 0.9291407550144521, "grad_norm": 0.6439871933563256, "learning_rate": 1.5746958637469586e-06, "loss": 0.6072, "step": 31824 }, { "epoch": 0.9291699512422995, "grad_norm": 0.6469829075596893, "learning_rate": 1.5740470397404706e-06, "loss": 0.5793, "step": 31825 }, { "epoch": 0.9291991474701469, "grad_norm": 0.7022194589500788, "learning_rate": 1.5733982157339822e-06, "loss": 0.6533, "step": 31826 }, { "epoch": 0.9292283436979942, "grad_norm": 0.6821582502134796, "learning_rate": 1.572749391727494e-06, "loss": 0.6352, "step": 31827 }, { "epoch": 0.9292575399258416, "grad_norm": 0.6482779820779271, "learning_rate": 1.5721005677210059e-06, "loss": 0.5927, "step": 31828 }, { "epoch": 0.929286736153689, "grad_norm": 0.7405248733113545, "learning_rate": 1.5714517437145177e-06, "loss": 0.6626, "step": 31829 }, { "epoch": 0.9293159323815363, "grad_norm": 0.6767674526842529, "learning_rate": 1.5708029197080293e-06, "loss": 0.637, "step": 31830 }, { "epoch": 0.9293451286093837, "grad_norm": 0.6569308166303606, "learning_rate": 1.570154095701541e-06, "loss": 0.6138, "step": 31831 }, { "epoch": 0.929374324837231, "grad_norm": 0.6859738606620743, "learning_rate": 1.569505271695053e-06, "loss": 0.6143, "step": 31832 }, { "epoch": 0.9294035210650784, "grad_norm": 0.6075868441176936, "learning_rate": 1.5688564476885645e-06, "loss": 0.5379, "step": 31833 }, { "epoch": 0.9294327172929258, "grad_norm": 0.6741440972750654, "learning_rate": 1.5682076236820766e-06, "loss": 0.6399, "step": 31834 }, { "epoch": 0.9294619135207731, "grad_norm": 0.6579005218021035, "learning_rate": 1.5675587996755882e-06, "loss": 0.5976, "step": 31835 }, { "epoch": 0.9294911097486205, "grad_norm": 0.6330443888892839, "learning_rate": 1.5669099756690998e-06, "loss": 0.5426, "step": 31836 }, { "epoch": 0.9295203059764678, "grad_norm": 0.65247452921759, "learning_rate": 1.5662611516626116e-06, "loss": 0.5926, "step": 31837 }, { "epoch": 0.9295495022043152, "grad_norm": 0.6471967788913828, "learning_rate": 1.5656123276561234e-06, "loss": 0.537, "step": 31838 }, { "epoch": 0.9295786984321626, "grad_norm": 0.6764320137983472, "learning_rate": 1.5649635036496352e-06, "loss": 0.6444, "step": 31839 }, { "epoch": 0.9296078946600099, "grad_norm": 0.6395063125810373, "learning_rate": 1.5643146796431468e-06, "loss": 0.618, "step": 31840 }, { "epoch": 0.9296370908878573, "grad_norm": 0.6253792975772847, "learning_rate": 1.5636658556366588e-06, "loss": 0.6092, "step": 31841 }, { "epoch": 0.9296662871157046, "grad_norm": 0.6922932767398271, "learning_rate": 1.5630170316301704e-06, "loss": 0.6456, "step": 31842 }, { "epoch": 0.929695483343552, "grad_norm": 0.6304279536243895, "learning_rate": 1.562368207623682e-06, "loss": 0.5883, "step": 31843 }, { "epoch": 0.9297246795713994, "grad_norm": 0.6235601377740617, "learning_rate": 1.561719383617194e-06, "loss": 0.5857, "step": 31844 }, { "epoch": 0.9297538757992467, "grad_norm": 0.6664468544037531, "learning_rate": 1.5610705596107057e-06, "loss": 0.6383, "step": 31845 }, { "epoch": 0.9297830720270941, "grad_norm": 0.6507882552076584, "learning_rate": 1.5604217356042175e-06, "loss": 0.5877, "step": 31846 }, { "epoch": 0.9298122682549415, "grad_norm": 0.6137266562331629, "learning_rate": 1.559772911597729e-06, "loss": 0.5062, "step": 31847 }, { "epoch": 0.9298414644827888, "grad_norm": 0.6370042149972576, "learning_rate": 1.5591240875912411e-06, "loss": 0.5641, "step": 31848 }, { "epoch": 0.9298706607106362, "grad_norm": 0.6267015348147704, "learning_rate": 1.5584752635847527e-06, "loss": 0.5853, "step": 31849 }, { "epoch": 0.9298998569384835, "grad_norm": 0.6533461847159003, "learning_rate": 1.5578264395782643e-06, "loss": 0.5831, "step": 31850 }, { "epoch": 0.9299290531663309, "grad_norm": 0.6352098622450396, "learning_rate": 1.5571776155717764e-06, "loss": 0.5582, "step": 31851 }, { "epoch": 0.9299582493941783, "grad_norm": 0.5998707559328457, "learning_rate": 1.556528791565288e-06, "loss": 0.4938, "step": 31852 }, { "epoch": 0.9299874456220256, "grad_norm": 0.6051515985538476, "learning_rate": 1.5558799675588e-06, "loss": 0.5472, "step": 31853 }, { "epoch": 0.930016641849873, "grad_norm": 0.6655019153513915, "learning_rate": 1.5552311435523116e-06, "loss": 0.6528, "step": 31854 }, { "epoch": 0.9300458380777203, "grad_norm": 0.6591120651923901, "learning_rate": 1.5545823195458234e-06, "loss": 0.6609, "step": 31855 }, { "epoch": 0.9300750343055677, "grad_norm": 0.6444741703110853, "learning_rate": 1.553933495539335e-06, "loss": 0.6023, "step": 31856 }, { "epoch": 0.9301042305334151, "grad_norm": 0.6003477998657631, "learning_rate": 1.5532846715328468e-06, "loss": 0.5262, "step": 31857 }, { "epoch": 0.9301334267612624, "grad_norm": 0.6589454078206011, "learning_rate": 1.5526358475263586e-06, "loss": 0.6071, "step": 31858 }, { "epoch": 0.9301626229891098, "grad_norm": 0.6310518604206133, "learning_rate": 1.5519870235198702e-06, "loss": 0.5815, "step": 31859 }, { "epoch": 0.9301918192169571, "grad_norm": 0.6463627647208441, "learning_rate": 1.5513381995133823e-06, "loss": 0.6186, "step": 31860 }, { "epoch": 0.9302210154448045, "grad_norm": 0.6213585048528708, "learning_rate": 1.5506893755068939e-06, "loss": 0.511, "step": 31861 }, { "epoch": 0.9302502116726519, "grad_norm": 0.6601274131232211, "learning_rate": 1.5500405515004057e-06, "loss": 0.5865, "step": 31862 }, { "epoch": 0.9302794079004992, "grad_norm": 0.6013734735918765, "learning_rate": 1.5493917274939175e-06, "loss": 0.5399, "step": 31863 }, { "epoch": 0.9303086041283466, "grad_norm": 0.621991579784175, "learning_rate": 1.5487429034874291e-06, "loss": 0.5786, "step": 31864 }, { "epoch": 0.930337800356194, "grad_norm": 0.6834790959680443, "learning_rate": 1.548094079480941e-06, "loss": 0.692, "step": 31865 }, { "epoch": 0.9303669965840413, "grad_norm": 0.648843151414612, "learning_rate": 1.5474452554744525e-06, "loss": 0.5643, "step": 31866 }, { "epoch": 0.9303961928118887, "grad_norm": 0.6369479120676744, "learning_rate": 1.5467964314679646e-06, "loss": 0.6021, "step": 31867 }, { "epoch": 0.930425389039736, "grad_norm": 0.6251717740302637, "learning_rate": 1.5461476074614762e-06, "loss": 0.5497, "step": 31868 }, { "epoch": 0.9304545852675834, "grad_norm": 0.6636788963432527, "learning_rate": 1.5454987834549878e-06, "loss": 0.6058, "step": 31869 }, { "epoch": 0.9304837814954308, "grad_norm": 0.6264004888515097, "learning_rate": 1.5448499594484998e-06, "loss": 0.5889, "step": 31870 }, { "epoch": 0.9305129777232781, "grad_norm": 0.628461682975135, "learning_rate": 1.5442011354420114e-06, "loss": 0.5817, "step": 31871 }, { "epoch": 0.9305421739511255, "grad_norm": 0.6496361702301996, "learning_rate": 1.5435523114355232e-06, "loss": 0.5931, "step": 31872 }, { "epoch": 0.9305713701789728, "grad_norm": 0.6442584607866093, "learning_rate": 1.542903487429035e-06, "loss": 0.5837, "step": 31873 }, { "epoch": 0.9306005664068202, "grad_norm": 0.690465805773537, "learning_rate": 1.5422546634225468e-06, "loss": 0.6354, "step": 31874 }, { "epoch": 0.9306297626346676, "grad_norm": 0.6263880288935354, "learning_rate": 1.5416058394160584e-06, "loss": 0.5665, "step": 31875 }, { "epoch": 0.9306589588625149, "grad_norm": 0.6915498264415775, "learning_rate": 1.5409570154095703e-06, "loss": 0.6578, "step": 31876 }, { "epoch": 0.9306881550903623, "grad_norm": 0.6541008959459258, "learning_rate": 1.540308191403082e-06, "loss": 0.5591, "step": 31877 }, { "epoch": 0.9307173513182097, "grad_norm": 0.645570635565975, "learning_rate": 1.5396593673965937e-06, "loss": 0.6012, "step": 31878 }, { "epoch": 0.930746547546057, "grad_norm": 0.6579952658340145, "learning_rate": 1.5390105433901057e-06, "loss": 0.6124, "step": 31879 }, { "epoch": 0.9307757437739044, "grad_norm": 0.6466411884915433, "learning_rate": 1.5383617193836173e-06, "loss": 0.5812, "step": 31880 }, { "epoch": 0.9308049400017517, "grad_norm": 0.6609025226865601, "learning_rate": 1.5377128953771291e-06, "loss": 0.6088, "step": 31881 }, { "epoch": 0.9308341362295991, "grad_norm": 0.7461879798115256, "learning_rate": 1.537064071370641e-06, "loss": 0.6728, "step": 31882 }, { "epoch": 0.9308633324574465, "grad_norm": 0.5995265667229722, "learning_rate": 1.5364152473641525e-06, "loss": 0.536, "step": 31883 }, { "epoch": 0.9308925286852938, "grad_norm": 0.6508193252510714, "learning_rate": 1.5357664233576644e-06, "loss": 0.5824, "step": 31884 }, { "epoch": 0.9309217249131412, "grad_norm": 0.6995575781069457, "learning_rate": 1.535117599351176e-06, "loss": 0.6549, "step": 31885 }, { "epoch": 0.9309509211409885, "grad_norm": 0.6526695371727133, "learning_rate": 1.534468775344688e-06, "loss": 0.5719, "step": 31886 }, { "epoch": 0.9309801173688359, "grad_norm": 0.6678764147133266, "learning_rate": 1.5338199513381996e-06, "loss": 0.5913, "step": 31887 }, { "epoch": 0.9310093135966833, "grad_norm": 0.6148826685296837, "learning_rate": 1.5331711273317116e-06, "loss": 0.543, "step": 31888 }, { "epoch": 0.9310385098245306, "grad_norm": 0.6459520677408962, "learning_rate": 1.5325223033252232e-06, "loss": 0.5655, "step": 31889 }, { "epoch": 0.931067706052378, "grad_norm": 0.6304954067477997, "learning_rate": 1.5318734793187348e-06, "loss": 0.5268, "step": 31890 }, { "epoch": 0.9310969022802253, "grad_norm": 0.6722207417671737, "learning_rate": 1.5312246553122466e-06, "loss": 0.6351, "step": 31891 }, { "epoch": 0.9311260985080727, "grad_norm": 0.6237253604170582, "learning_rate": 1.5305758313057585e-06, "loss": 0.5812, "step": 31892 }, { "epoch": 0.9311552947359201, "grad_norm": 0.6210425385860792, "learning_rate": 1.5299270072992703e-06, "loss": 0.5141, "step": 31893 }, { "epoch": 0.9311844909637674, "grad_norm": 0.6347539509762763, "learning_rate": 1.5292781832927819e-06, "loss": 0.5634, "step": 31894 }, { "epoch": 0.9312136871916148, "grad_norm": 0.6549338327589443, "learning_rate": 1.5286293592862935e-06, "loss": 0.6438, "step": 31895 }, { "epoch": 0.9312428834194622, "grad_norm": 0.6126962862938418, "learning_rate": 1.5279805352798055e-06, "loss": 0.5592, "step": 31896 }, { "epoch": 0.9312720796473095, "grad_norm": 0.6427983717368573, "learning_rate": 1.5273317112733171e-06, "loss": 0.5837, "step": 31897 }, { "epoch": 0.9313012758751569, "grad_norm": 0.7019631126975812, "learning_rate": 1.5266828872668291e-06, "loss": 0.6476, "step": 31898 }, { "epoch": 0.9313304721030042, "grad_norm": 0.651037194161453, "learning_rate": 1.5260340632603407e-06, "loss": 0.5856, "step": 31899 }, { "epoch": 0.9313596683308517, "grad_norm": 0.6545948653070776, "learning_rate": 1.5253852392538526e-06, "loss": 0.5681, "step": 31900 }, { "epoch": 0.9313888645586991, "grad_norm": 0.6695897579786615, "learning_rate": 1.5247364152473642e-06, "loss": 0.6273, "step": 31901 }, { "epoch": 0.9314180607865464, "grad_norm": 0.646701476010855, "learning_rate": 1.524087591240876e-06, "loss": 0.5611, "step": 31902 }, { "epoch": 0.9314472570143938, "grad_norm": 0.5898842192565986, "learning_rate": 1.5234387672343878e-06, "loss": 0.5133, "step": 31903 }, { "epoch": 0.9314764532422412, "grad_norm": 0.647292411611985, "learning_rate": 1.5227899432278994e-06, "loss": 0.6149, "step": 31904 }, { "epoch": 0.9315056494700885, "grad_norm": 0.6078799910246337, "learning_rate": 1.5221411192214114e-06, "loss": 0.5508, "step": 31905 }, { "epoch": 0.9315348456979359, "grad_norm": 0.6576038146493045, "learning_rate": 1.521492295214923e-06, "loss": 0.6092, "step": 31906 }, { "epoch": 0.9315640419257832, "grad_norm": 0.6794645633779256, "learning_rate": 1.5208434712084348e-06, "loss": 0.6213, "step": 31907 }, { "epoch": 0.9315932381536306, "grad_norm": 0.6425043077504624, "learning_rate": 1.5201946472019467e-06, "loss": 0.5784, "step": 31908 }, { "epoch": 0.931622434381478, "grad_norm": 0.6388106915165178, "learning_rate": 1.5195458231954583e-06, "loss": 0.5941, "step": 31909 }, { "epoch": 0.9316516306093253, "grad_norm": 0.6558481821749842, "learning_rate": 1.51889699918897e-06, "loss": 0.6035, "step": 31910 }, { "epoch": 0.9316808268371727, "grad_norm": 0.5990637831083911, "learning_rate": 1.518248175182482e-06, "loss": 0.5327, "step": 31911 }, { "epoch": 0.93171002306502, "grad_norm": 0.6188624138895401, "learning_rate": 1.5175993511759937e-06, "loss": 0.5657, "step": 31912 }, { "epoch": 0.9317392192928674, "grad_norm": 0.6601045925036497, "learning_rate": 1.5169505271695053e-06, "loss": 0.609, "step": 31913 }, { "epoch": 0.9317684155207148, "grad_norm": 0.6795494129212254, "learning_rate": 1.5163017031630173e-06, "loss": 0.6295, "step": 31914 }, { "epoch": 0.9317976117485621, "grad_norm": 0.6187008565344165, "learning_rate": 1.515652879156529e-06, "loss": 0.566, "step": 31915 }, { "epoch": 0.9318268079764095, "grad_norm": 0.6318430676317395, "learning_rate": 1.5150040551500406e-06, "loss": 0.5925, "step": 31916 }, { "epoch": 0.9318560042042568, "grad_norm": 0.6809207612733477, "learning_rate": 1.5143552311435526e-06, "loss": 0.6712, "step": 31917 }, { "epoch": 0.9318852004321042, "grad_norm": 0.6110075653101492, "learning_rate": 1.5137064071370642e-06, "loss": 0.5317, "step": 31918 }, { "epoch": 0.9319143966599516, "grad_norm": 0.6215680209116397, "learning_rate": 1.513057583130576e-06, "loss": 0.5995, "step": 31919 }, { "epoch": 0.9319435928877989, "grad_norm": 0.6859111758873223, "learning_rate": 1.5124087591240876e-06, "loss": 0.6711, "step": 31920 }, { "epoch": 0.9319727891156463, "grad_norm": 0.70095948318, "learning_rate": 1.5117599351175996e-06, "loss": 0.5833, "step": 31921 }, { "epoch": 0.9320019853434937, "grad_norm": 0.6355024164994614, "learning_rate": 1.5111111111111112e-06, "loss": 0.5988, "step": 31922 }, { "epoch": 0.932031181571341, "grad_norm": 0.6613279976533518, "learning_rate": 1.5104622871046228e-06, "loss": 0.6062, "step": 31923 }, { "epoch": 0.9320603777991884, "grad_norm": 0.645511603803773, "learning_rate": 1.5098134630981349e-06, "loss": 0.5782, "step": 31924 }, { "epoch": 0.9320895740270357, "grad_norm": 0.6835392940807125, "learning_rate": 1.5091646390916465e-06, "loss": 0.608, "step": 31925 }, { "epoch": 0.9321187702548831, "grad_norm": 1.2555711077388512, "learning_rate": 1.5085158150851583e-06, "loss": 0.6899, "step": 31926 }, { "epoch": 0.9321479664827305, "grad_norm": 0.6415104689066918, "learning_rate": 1.50786699107867e-06, "loss": 0.569, "step": 31927 }, { "epoch": 0.9321771627105778, "grad_norm": 0.5957963342924364, "learning_rate": 1.5072181670721817e-06, "loss": 0.5066, "step": 31928 }, { "epoch": 0.9322063589384252, "grad_norm": 0.6604413157668875, "learning_rate": 1.5065693430656935e-06, "loss": 0.6427, "step": 31929 }, { "epoch": 0.9322355551662725, "grad_norm": 0.6390914116497329, "learning_rate": 1.5059205190592051e-06, "loss": 0.5736, "step": 31930 }, { "epoch": 0.9322647513941199, "grad_norm": 0.6066658176002402, "learning_rate": 1.5052716950527171e-06, "loss": 0.5071, "step": 31931 }, { "epoch": 0.9322939476219673, "grad_norm": 0.6185970709863681, "learning_rate": 1.5046228710462288e-06, "loss": 0.5524, "step": 31932 }, { "epoch": 0.9323231438498146, "grad_norm": 0.6662088261860479, "learning_rate": 1.5039740470397408e-06, "loss": 0.6433, "step": 31933 }, { "epoch": 0.932352340077662, "grad_norm": 0.6254548838260555, "learning_rate": 1.5033252230332524e-06, "loss": 0.5694, "step": 31934 }, { "epoch": 0.9323815363055094, "grad_norm": 0.6498346805200559, "learning_rate": 1.502676399026764e-06, "loss": 0.5927, "step": 31935 }, { "epoch": 0.9324107325333567, "grad_norm": 0.6764616457948115, "learning_rate": 1.502027575020276e-06, "loss": 0.6319, "step": 31936 }, { "epoch": 0.9324399287612041, "grad_norm": 0.6282476938948878, "learning_rate": 1.5013787510137876e-06, "loss": 0.5649, "step": 31937 }, { "epoch": 0.9324691249890514, "grad_norm": 0.585071572944116, "learning_rate": 1.5007299270072994e-06, "loss": 0.5, "step": 31938 }, { "epoch": 0.9324983212168988, "grad_norm": 0.6648312779321797, "learning_rate": 1.500081103000811e-06, "loss": 0.6308, "step": 31939 }, { "epoch": 0.9325275174447462, "grad_norm": 0.6271467870916763, "learning_rate": 1.499432278994323e-06, "loss": 0.5977, "step": 31940 }, { "epoch": 0.9325567136725935, "grad_norm": 0.65076732552023, "learning_rate": 1.4987834549878347e-06, "loss": 0.6279, "step": 31941 }, { "epoch": 0.9325859099004409, "grad_norm": 0.6224518295079904, "learning_rate": 1.4981346309813463e-06, "loss": 0.5504, "step": 31942 }, { "epoch": 0.9326151061282882, "grad_norm": 0.6479951699347879, "learning_rate": 1.4974858069748583e-06, "loss": 0.6011, "step": 31943 }, { "epoch": 0.9326443023561356, "grad_norm": 0.6006489505372863, "learning_rate": 1.49683698296837e-06, "loss": 0.5525, "step": 31944 }, { "epoch": 0.932673498583983, "grad_norm": 0.6488135280587484, "learning_rate": 1.4961881589618817e-06, "loss": 0.588, "step": 31945 }, { "epoch": 0.9327026948118303, "grad_norm": 0.6282362998788346, "learning_rate": 1.4955393349553935e-06, "loss": 0.5742, "step": 31946 }, { "epoch": 0.9327318910396777, "grad_norm": 0.6534693344937264, "learning_rate": 1.4948905109489053e-06, "loss": 0.527, "step": 31947 }, { "epoch": 0.932761087267525, "grad_norm": 0.6433662894739587, "learning_rate": 1.494241686942417e-06, "loss": 0.5697, "step": 31948 }, { "epoch": 0.9327902834953724, "grad_norm": 0.6162267866883036, "learning_rate": 1.4935928629359286e-06, "loss": 0.5384, "step": 31949 }, { "epoch": 0.9328194797232198, "grad_norm": 0.6270282517296333, "learning_rate": 1.4929440389294406e-06, "loss": 0.5577, "step": 31950 }, { "epoch": 0.9328486759510671, "grad_norm": 0.6815999262034147, "learning_rate": 1.4922952149229522e-06, "loss": 0.6511, "step": 31951 }, { "epoch": 0.9328778721789145, "grad_norm": 0.6424366398596859, "learning_rate": 1.4916463909164642e-06, "loss": 0.6089, "step": 31952 }, { "epoch": 0.9329070684067619, "grad_norm": 0.7267017762099071, "learning_rate": 1.4909975669099758e-06, "loss": 0.6817, "step": 31953 }, { "epoch": 0.9329362646346092, "grad_norm": 0.6811078649003943, "learning_rate": 1.4903487429034876e-06, "loss": 0.668, "step": 31954 }, { "epoch": 0.9329654608624566, "grad_norm": 0.6646912650741075, "learning_rate": 1.4896999188969992e-06, "loss": 0.613, "step": 31955 }, { "epoch": 0.9329946570903039, "grad_norm": 0.6191762288071258, "learning_rate": 1.489051094890511e-06, "loss": 0.5417, "step": 31956 }, { "epoch": 0.9330238533181513, "grad_norm": 0.5652877772856264, "learning_rate": 1.4884022708840229e-06, "loss": 0.4915, "step": 31957 }, { "epoch": 0.9330530495459987, "grad_norm": 0.6504349046618866, "learning_rate": 1.4877534468775345e-06, "loss": 0.6068, "step": 31958 }, { "epoch": 0.933082245773846, "grad_norm": 0.6726831618028208, "learning_rate": 1.4871046228710465e-06, "loss": 0.6408, "step": 31959 }, { "epoch": 0.9331114420016934, "grad_norm": 0.6385668761437336, "learning_rate": 1.486455798864558e-06, "loss": 0.5765, "step": 31960 }, { "epoch": 0.9331406382295407, "grad_norm": 0.7153601049394162, "learning_rate": 1.4858069748580697e-06, "loss": 0.6677, "step": 31961 }, { "epoch": 0.9331698344573881, "grad_norm": 0.6730083288732157, "learning_rate": 1.4851581508515817e-06, "loss": 0.5871, "step": 31962 }, { "epoch": 0.9331990306852355, "grad_norm": 0.6300144357252079, "learning_rate": 1.4845093268450933e-06, "loss": 0.5899, "step": 31963 }, { "epoch": 0.9332282269130828, "grad_norm": 0.5953755653514026, "learning_rate": 1.4838605028386051e-06, "loss": 0.5268, "step": 31964 }, { "epoch": 0.9332574231409302, "grad_norm": 0.6725553205943302, "learning_rate": 1.483211678832117e-06, "loss": 0.6361, "step": 31965 }, { "epoch": 0.9332866193687775, "grad_norm": 0.6252553250544354, "learning_rate": 1.4825628548256288e-06, "loss": 0.5663, "step": 31966 }, { "epoch": 0.9333158155966249, "grad_norm": 0.6768739504246224, "learning_rate": 1.4819140308191404e-06, "loss": 0.6665, "step": 31967 }, { "epoch": 0.9333450118244723, "grad_norm": 0.6347580693961833, "learning_rate": 1.481265206812652e-06, "loss": 0.5815, "step": 31968 }, { "epoch": 0.9333742080523196, "grad_norm": 0.6914241002576295, "learning_rate": 1.480616382806164e-06, "loss": 0.6456, "step": 31969 }, { "epoch": 0.933403404280167, "grad_norm": 0.6537311812922025, "learning_rate": 1.4799675587996756e-06, "loss": 0.6053, "step": 31970 }, { "epoch": 0.9334326005080144, "grad_norm": 0.624602072445103, "learning_rate": 1.4793187347931876e-06, "loss": 0.544, "step": 31971 }, { "epoch": 0.9334617967358617, "grad_norm": 0.6837784345528641, "learning_rate": 1.4786699107866992e-06, "loss": 0.6116, "step": 31972 }, { "epoch": 0.9334909929637091, "grad_norm": 0.6538116017105189, "learning_rate": 1.478021086780211e-06, "loss": 0.6116, "step": 31973 }, { "epoch": 0.9335201891915564, "grad_norm": 0.5839427270096649, "learning_rate": 1.4773722627737227e-06, "loss": 0.5033, "step": 31974 }, { "epoch": 0.9335493854194038, "grad_norm": 0.6804544116630272, "learning_rate": 1.4767234387672345e-06, "loss": 0.6416, "step": 31975 }, { "epoch": 0.9335785816472512, "grad_norm": 0.6659270122404382, "learning_rate": 1.4760746147607463e-06, "loss": 0.6284, "step": 31976 }, { "epoch": 0.9336077778750985, "grad_norm": 0.6178539845418203, "learning_rate": 1.475425790754258e-06, "loss": 0.5474, "step": 31977 }, { "epoch": 0.9336369741029459, "grad_norm": 0.6261112861887191, "learning_rate": 1.47477696674777e-06, "loss": 0.5599, "step": 31978 }, { "epoch": 0.9336661703307932, "grad_norm": 0.7023666053397885, "learning_rate": 1.4741281427412815e-06, "loss": 0.661, "step": 31979 }, { "epoch": 0.9336953665586406, "grad_norm": 0.5990188658200424, "learning_rate": 1.4734793187347933e-06, "loss": 0.4975, "step": 31980 }, { "epoch": 0.933724562786488, "grad_norm": 0.692766710211881, "learning_rate": 1.4728304947283052e-06, "loss": 0.6336, "step": 31981 }, { "epoch": 0.9337537590143353, "grad_norm": 0.6507784446325735, "learning_rate": 1.4721816707218168e-06, "loss": 0.5709, "step": 31982 }, { "epoch": 0.9337829552421827, "grad_norm": 0.6499865060958807, "learning_rate": 1.4715328467153286e-06, "loss": 0.5921, "step": 31983 }, { "epoch": 0.93381215147003, "grad_norm": 0.6620386229888797, "learning_rate": 1.4708840227088402e-06, "loss": 0.6402, "step": 31984 }, { "epoch": 0.9338413476978774, "grad_norm": 0.6634474771724646, "learning_rate": 1.4702351987023522e-06, "loss": 0.6275, "step": 31985 }, { "epoch": 0.9338705439257248, "grad_norm": 0.6565170391811254, "learning_rate": 1.4695863746958638e-06, "loss": 0.6213, "step": 31986 }, { "epoch": 0.9338997401535721, "grad_norm": 0.5688275256056304, "learning_rate": 1.4689375506893754e-06, "loss": 0.4735, "step": 31987 }, { "epoch": 0.9339289363814195, "grad_norm": 0.6182489988274739, "learning_rate": 1.4682887266828874e-06, "loss": 0.521, "step": 31988 }, { "epoch": 0.9339581326092669, "grad_norm": 0.6687841789295137, "learning_rate": 1.467639902676399e-06, "loss": 0.6468, "step": 31989 }, { "epoch": 0.9339873288371142, "grad_norm": 0.6618461408148839, "learning_rate": 1.4669910786699109e-06, "loss": 0.6339, "step": 31990 }, { "epoch": 0.9340165250649616, "grad_norm": 0.6699928369788145, "learning_rate": 1.4663422546634227e-06, "loss": 0.5697, "step": 31991 }, { "epoch": 0.9340457212928089, "grad_norm": 0.5808981373479826, "learning_rate": 1.4656934306569345e-06, "loss": 0.4948, "step": 31992 }, { "epoch": 0.9340749175206563, "grad_norm": 0.614079976188019, "learning_rate": 1.465044606650446e-06, "loss": 0.5731, "step": 31993 }, { "epoch": 0.9341041137485037, "grad_norm": 0.6951241948620273, "learning_rate": 1.464395782643958e-06, "loss": 0.6355, "step": 31994 }, { "epoch": 0.934133309976351, "grad_norm": 0.6291122571205062, "learning_rate": 1.4637469586374697e-06, "loss": 0.5774, "step": 31995 }, { "epoch": 0.9341625062041984, "grad_norm": 0.6765775251174677, "learning_rate": 1.4630981346309813e-06, "loss": 0.6504, "step": 31996 }, { "epoch": 0.9341917024320457, "grad_norm": 0.6650417207505691, "learning_rate": 1.4624493106244934e-06, "loss": 0.6295, "step": 31997 }, { "epoch": 0.9342208986598931, "grad_norm": 0.6485222078438254, "learning_rate": 1.461800486618005e-06, "loss": 0.5997, "step": 31998 }, { "epoch": 0.9342500948877405, "grad_norm": 0.6228709815123692, "learning_rate": 1.4611516626115168e-06, "loss": 0.5736, "step": 31999 }, { "epoch": 0.9342792911155878, "grad_norm": 0.6376963831721094, "learning_rate": 1.4605028386050286e-06, "loss": 0.5947, "step": 32000 }, { "epoch": 0.9343084873434352, "grad_norm": 0.6287685429119442, "learning_rate": 1.4598540145985402e-06, "loss": 0.5696, "step": 32001 }, { "epoch": 0.9343376835712826, "grad_norm": 0.5910955921756721, "learning_rate": 1.459205190592052e-06, "loss": 0.5147, "step": 32002 }, { "epoch": 0.9343668797991299, "grad_norm": 0.5920422796288641, "learning_rate": 1.4585563665855636e-06, "loss": 0.5164, "step": 32003 }, { "epoch": 0.9343960760269773, "grad_norm": 0.684459322565579, "learning_rate": 1.4579075425790756e-06, "loss": 0.6379, "step": 32004 }, { "epoch": 0.9344252722548246, "grad_norm": 0.61607941437236, "learning_rate": 1.4572587185725873e-06, "loss": 0.5492, "step": 32005 }, { "epoch": 0.934454468482672, "grad_norm": 0.637442343715986, "learning_rate": 1.4566098945660993e-06, "loss": 0.5474, "step": 32006 }, { "epoch": 0.9344836647105194, "grad_norm": 0.6228784976382719, "learning_rate": 1.4559610705596109e-06, "loss": 0.5373, "step": 32007 }, { "epoch": 0.9345128609383667, "grad_norm": 0.6340920053445777, "learning_rate": 1.4553122465531225e-06, "loss": 0.5863, "step": 32008 }, { "epoch": 0.9345420571662141, "grad_norm": 0.6399563999024834, "learning_rate": 1.4546634225466343e-06, "loss": 0.6018, "step": 32009 }, { "epoch": 0.9345712533940614, "grad_norm": 0.6115361179722244, "learning_rate": 1.4540145985401461e-06, "loss": 0.5473, "step": 32010 }, { "epoch": 0.9346004496219088, "grad_norm": 0.5974612824579678, "learning_rate": 1.453365774533658e-06, "loss": 0.5157, "step": 32011 }, { "epoch": 0.9346296458497562, "grad_norm": 0.6814242971167284, "learning_rate": 1.4527169505271695e-06, "loss": 0.6716, "step": 32012 }, { "epoch": 0.9346588420776035, "grad_norm": 0.5969079864682867, "learning_rate": 1.4520681265206816e-06, "loss": 0.5048, "step": 32013 }, { "epoch": 0.9346880383054509, "grad_norm": 0.6553651018654109, "learning_rate": 1.4514193025141932e-06, "loss": 0.6255, "step": 32014 }, { "epoch": 0.9347172345332982, "grad_norm": 0.6645704580862309, "learning_rate": 1.4507704785077048e-06, "loss": 0.6059, "step": 32015 }, { "epoch": 0.9347464307611456, "grad_norm": 0.6128960709535823, "learning_rate": 1.4501216545012168e-06, "loss": 0.5551, "step": 32016 }, { "epoch": 0.934775626988993, "grad_norm": 0.6806342330663371, "learning_rate": 1.4494728304947284e-06, "loss": 0.5678, "step": 32017 }, { "epoch": 0.9348048232168403, "grad_norm": 0.6280953628582475, "learning_rate": 1.4488240064882402e-06, "loss": 0.5683, "step": 32018 }, { "epoch": 0.9348340194446877, "grad_norm": 0.6706442549571516, "learning_rate": 1.448175182481752e-06, "loss": 0.6637, "step": 32019 }, { "epoch": 0.9348632156725352, "grad_norm": 0.6792234149934566, "learning_rate": 1.4475263584752636e-06, "loss": 0.6346, "step": 32020 }, { "epoch": 0.9348924119003825, "grad_norm": 0.6983109712573985, "learning_rate": 1.4468775344687755e-06, "loss": 0.6278, "step": 32021 }, { "epoch": 0.9349216081282299, "grad_norm": 0.6363800928476945, "learning_rate": 1.446228710462287e-06, "loss": 0.5264, "step": 32022 }, { "epoch": 0.9349508043560772, "grad_norm": 0.67944130638665, "learning_rate": 1.445579886455799e-06, "loss": 0.6516, "step": 32023 }, { "epoch": 0.9349800005839246, "grad_norm": 0.7069743031356639, "learning_rate": 1.4449310624493107e-06, "loss": 0.546, "step": 32024 }, { "epoch": 0.935009196811772, "grad_norm": 0.603826679757611, "learning_rate": 1.4442822384428227e-06, "loss": 0.5794, "step": 32025 }, { "epoch": 0.9350383930396193, "grad_norm": 0.6461999249578899, "learning_rate": 1.4436334144363343e-06, "loss": 0.5684, "step": 32026 }, { "epoch": 0.9350675892674667, "grad_norm": 0.6303997005953087, "learning_rate": 1.442984590429846e-06, "loss": 0.5598, "step": 32027 }, { "epoch": 0.935096785495314, "grad_norm": 0.6949649759150555, "learning_rate": 1.4423357664233577e-06, "loss": 0.6843, "step": 32028 }, { "epoch": 0.9351259817231614, "grad_norm": 0.5878184202919602, "learning_rate": 1.4416869424168696e-06, "loss": 0.5296, "step": 32029 }, { "epoch": 0.9351551779510088, "grad_norm": 0.6388621782114803, "learning_rate": 1.4410381184103814e-06, "loss": 0.5722, "step": 32030 }, { "epoch": 0.9351843741788561, "grad_norm": 0.6547404065127824, "learning_rate": 1.440389294403893e-06, "loss": 0.5896, "step": 32031 }, { "epoch": 0.9352135704067035, "grad_norm": 0.6509064498245006, "learning_rate": 1.439740470397405e-06, "loss": 0.6219, "step": 32032 }, { "epoch": 0.9352427666345509, "grad_norm": 0.6060092955067371, "learning_rate": 1.4390916463909166e-06, "loss": 0.5329, "step": 32033 }, { "epoch": 0.9352719628623982, "grad_norm": 0.6410536480385503, "learning_rate": 1.4384428223844282e-06, "loss": 0.5523, "step": 32034 }, { "epoch": 0.9353011590902456, "grad_norm": 0.676468528930479, "learning_rate": 1.4377939983779402e-06, "loss": 0.66, "step": 32035 }, { "epoch": 0.9353303553180929, "grad_norm": 0.5846249152228901, "learning_rate": 1.4371451743714518e-06, "loss": 0.4925, "step": 32036 }, { "epoch": 0.9353595515459403, "grad_norm": 0.6980723123650765, "learning_rate": 1.4364963503649637e-06, "loss": 0.701, "step": 32037 }, { "epoch": 0.9353887477737877, "grad_norm": 0.6472331327608949, "learning_rate": 1.4358475263584753e-06, "loss": 0.5897, "step": 32038 }, { "epoch": 0.935417944001635, "grad_norm": 0.6135197367319472, "learning_rate": 1.4351987023519873e-06, "loss": 0.5587, "step": 32039 }, { "epoch": 0.9354471402294824, "grad_norm": 0.6501266801080853, "learning_rate": 1.4345498783454989e-06, "loss": 0.5976, "step": 32040 }, { "epoch": 0.9354763364573297, "grad_norm": 0.649465886849632, "learning_rate": 1.4339010543390105e-06, "loss": 0.617, "step": 32041 }, { "epoch": 0.9355055326851771, "grad_norm": 0.6402325174724556, "learning_rate": 1.4332522303325225e-06, "loss": 0.583, "step": 32042 }, { "epoch": 0.9355347289130245, "grad_norm": 0.6164927270556982, "learning_rate": 1.4326034063260341e-06, "loss": 0.5268, "step": 32043 }, { "epoch": 0.9355639251408718, "grad_norm": 0.6422334918479137, "learning_rate": 1.431954582319546e-06, "loss": 0.5972, "step": 32044 }, { "epoch": 0.9355931213687192, "grad_norm": 0.66679503647487, "learning_rate": 1.4313057583130578e-06, "loss": 0.6109, "step": 32045 }, { "epoch": 0.9356223175965666, "grad_norm": 0.7253505796786132, "learning_rate": 1.4306569343065694e-06, "loss": 0.7007, "step": 32046 }, { "epoch": 0.9356515138244139, "grad_norm": 0.6527192193410537, "learning_rate": 1.4300081103000812e-06, "loss": 0.5794, "step": 32047 }, { "epoch": 0.9356807100522613, "grad_norm": 0.6799196759122849, "learning_rate": 1.429359286293593e-06, "loss": 0.6375, "step": 32048 }, { "epoch": 0.9357099062801086, "grad_norm": 0.6529715247151875, "learning_rate": 1.4287104622871048e-06, "loss": 0.5958, "step": 32049 }, { "epoch": 0.935739102507956, "grad_norm": 0.6946753687780898, "learning_rate": 1.4280616382806164e-06, "loss": 0.6246, "step": 32050 }, { "epoch": 0.9357682987358034, "grad_norm": 0.6605676938501885, "learning_rate": 1.4274128142741284e-06, "loss": 0.6015, "step": 32051 }, { "epoch": 0.9357974949636507, "grad_norm": 0.6121993164831304, "learning_rate": 1.42676399026764e-06, "loss": 0.5559, "step": 32052 }, { "epoch": 0.9358266911914981, "grad_norm": 0.6323201424199966, "learning_rate": 1.4261151662611516e-06, "loss": 0.5489, "step": 32053 }, { "epoch": 0.9358558874193454, "grad_norm": 0.6468047753036993, "learning_rate": 1.4254663422546637e-06, "loss": 0.5855, "step": 32054 }, { "epoch": 0.9358850836471928, "grad_norm": 0.6469830995741295, "learning_rate": 1.4248175182481753e-06, "loss": 0.6194, "step": 32055 }, { "epoch": 0.9359142798750402, "grad_norm": 0.6347188962242581, "learning_rate": 1.424168694241687e-06, "loss": 0.5451, "step": 32056 }, { "epoch": 0.9359434761028875, "grad_norm": 0.6773691912305558, "learning_rate": 1.4235198702351987e-06, "loss": 0.6204, "step": 32057 }, { "epoch": 0.9359726723307349, "grad_norm": 0.6013853515371478, "learning_rate": 1.4228710462287107e-06, "loss": 0.5558, "step": 32058 }, { "epoch": 0.9360018685585823, "grad_norm": 0.6306143065212976, "learning_rate": 1.4222222222222223e-06, "loss": 0.6058, "step": 32059 }, { "epoch": 0.9360310647864296, "grad_norm": 0.6524958678501506, "learning_rate": 1.421573398215734e-06, "loss": 0.6161, "step": 32060 }, { "epoch": 0.936060261014277, "grad_norm": 0.596032006427774, "learning_rate": 1.420924574209246e-06, "loss": 0.5364, "step": 32061 }, { "epoch": 0.9360894572421243, "grad_norm": 0.6431610181109888, "learning_rate": 1.4202757502027576e-06, "loss": 0.6045, "step": 32062 }, { "epoch": 0.9361186534699717, "grad_norm": 0.6568991303551913, "learning_rate": 1.4196269261962694e-06, "loss": 0.5965, "step": 32063 }, { "epoch": 0.9361478496978191, "grad_norm": 0.6550301136092153, "learning_rate": 1.4189781021897812e-06, "loss": 0.605, "step": 32064 }, { "epoch": 0.9361770459256664, "grad_norm": 0.6574566959464305, "learning_rate": 1.418329278183293e-06, "loss": 0.5914, "step": 32065 }, { "epoch": 0.9362062421535138, "grad_norm": 0.6233219836110334, "learning_rate": 1.4176804541768046e-06, "loss": 0.5972, "step": 32066 }, { "epoch": 0.9362354383813611, "grad_norm": 0.65538307045315, "learning_rate": 1.4170316301703162e-06, "loss": 0.6401, "step": 32067 }, { "epoch": 0.9362646346092085, "grad_norm": 0.639167314078636, "learning_rate": 1.4163828061638282e-06, "loss": 0.6222, "step": 32068 }, { "epoch": 0.9362938308370559, "grad_norm": 0.6459927344602747, "learning_rate": 1.4157339821573398e-06, "loss": 0.5845, "step": 32069 }, { "epoch": 0.9363230270649032, "grad_norm": 0.6582008241787678, "learning_rate": 1.4150851581508519e-06, "loss": 0.6599, "step": 32070 }, { "epoch": 0.9363522232927506, "grad_norm": 0.6235479268020501, "learning_rate": 1.4144363341443635e-06, "loss": 0.5418, "step": 32071 }, { "epoch": 0.936381419520598, "grad_norm": 0.6579457392271512, "learning_rate": 1.4137875101378753e-06, "loss": 0.577, "step": 32072 }, { "epoch": 0.9364106157484453, "grad_norm": 0.7084817243416851, "learning_rate": 1.4131386861313869e-06, "loss": 0.6845, "step": 32073 }, { "epoch": 0.9364398119762927, "grad_norm": 0.6783851328037838, "learning_rate": 1.4124898621248987e-06, "loss": 0.6846, "step": 32074 }, { "epoch": 0.93646900820414, "grad_norm": 0.6569077829023289, "learning_rate": 1.4118410381184105e-06, "loss": 0.6339, "step": 32075 }, { "epoch": 0.9364982044319874, "grad_norm": 0.6870662483056963, "learning_rate": 1.4111922141119221e-06, "loss": 0.7135, "step": 32076 }, { "epoch": 0.9365274006598348, "grad_norm": 0.6640333406091922, "learning_rate": 1.4105433901054342e-06, "loss": 0.6065, "step": 32077 }, { "epoch": 0.9365565968876821, "grad_norm": 0.6342804036026719, "learning_rate": 1.4098945660989458e-06, "loss": 0.6081, "step": 32078 }, { "epoch": 0.9365857931155295, "grad_norm": 0.6993827593697409, "learning_rate": 1.4092457420924574e-06, "loss": 0.7179, "step": 32079 }, { "epoch": 0.9366149893433768, "grad_norm": 0.6404158675286453, "learning_rate": 1.4085969180859694e-06, "loss": 0.572, "step": 32080 }, { "epoch": 0.9366441855712242, "grad_norm": 0.6608781653490405, "learning_rate": 1.407948094079481e-06, "loss": 0.5529, "step": 32081 }, { "epoch": 0.9366733817990716, "grad_norm": 0.6433080672575068, "learning_rate": 1.4072992700729928e-06, "loss": 0.5935, "step": 32082 }, { "epoch": 0.9367025780269189, "grad_norm": 0.6634061745627065, "learning_rate": 1.4066504460665046e-06, "loss": 0.6188, "step": 32083 }, { "epoch": 0.9367317742547663, "grad_norm": 0.6636022798162222, "learning_rate": 1.4060016220600164e-06, "loss": 0.6274, "step": 32084 }, { "epoch": 0.9367609704826136, "grad_norm": 0.671705662688353, "learning_rate": 1.405352798053528e-06, "loss": 0.6222, "step": 32085 }, { "epoch": 0.936790166710461, "grad_norm": 0.6830954555570352, "learning_rate": 1.4047039740470396e-06, "loss": 0.6448, "step": 32086 }, { "epoch": 0.9368193629383084, "grad_norm": 0.6664642554909999, "learning_rate": 1.4040551500405517e-06, "loss": 0.6298, "step": 32087 }, { "epoch": 0.9368485591661557, "grad_norm": 0.647990513716107, "learning_rate": 1.4034063260340633e-06, "loss": 0.5791, "step": 32088 }, { "epoch": 0.9368777553940031, "grad_norm": 0.6293196957253318, "learning_rate": 1.4027575020275753e-06, "loss": 0.5789, "step": 32089 }, { "epoch": 0.9369069516218504, "grad_norm": 0.678059816580222, "learning_rate": 1.402108678021087e-06, "loss": 0.6566, "step": 32090 }, { "epoch": 0.9369361478496978, "grad_norm": 0.6594718230678517, "learning_rate": 1.4014598540145987e-06, "loss": 0.6104, "step": 32091 }, { "epoch": 0.9369653440775452, "grad_norm": 0.6555561624309705, "learning_rate": 1.4008110300081103e-06, "loss": 0.592, "step": 32092 }, { "epoch": 0.9369945403053925, "grad_norm": 0.6737129977344556, "learning_rate": 1.4001622060016221e-06, "loss": 0.6338, "step": 32093 }, { "epoch": 0.9370237365332399, "grad_norm": 0.634372955234078, "learning_rate": 1.399513381995134e-06, "loss": 0.601, "step": 32094 }, { "epoch": 0.9370529327610873, "grad_norm": 0.673063605654882, "learning_rate": 1.3988645579886456e-06, "loss": 0.6938, "step": 32095 }, { "epoch": 0.9370821289889346, "grad_norm": 0.636272083123316, "learning_rate": 1.3982157339821576e-06, "loss": 0.6126, "step": 32096 }, { "epoch": 0.937111325216782, "grad_norm": 0.6994816887133505, "learning_rate": 1.3975669099756692e-06, "loss": 0.6478, "step": 32097 }, { "epoch": 0.9371405214446293, "grad_norm": 0.6089763803060891, "learning_rate": 1.396918085969181e-06, "loss": 0.5296, "step": 32098 }, { "epoch": 0.9371697176724767, "grad_norm": 0.6269223549560473, "learning_rate": 1.3962692619626928e-06, "loss": 0.5369, "step": 32099 }, { "epoch": 0.9371989139003241, "grad_norm": 0.6322693107627103, "learning_rate": 1.3956204379562044e-06, "loss": 0.567, "step": 32100 }, { "epoch": 0.9372281101281714, "grad_norm": 0.6697492607739519, "learning_rate": 1.3949716139497162e-06, "loss": 0.6341, "step": 32101 }, { "epoch": 0.9372573063560188, "grad_norm": 0.6534797466425287, "learning_rate": 1.3943227899432278e-06, "loss": 0.5972, "step": 32102 }, { "epoch": 0.9372865025838661, "grad_norm": 0.641556191709865, "learning_rate": 1.3936739659367399e-06, "loss": 0.6058, "step": 32103 }, { "epoch": 0.9373156988117135, "grad_norm": 0.6456136411773423, "learning_rate": 1.3930251419302515e-06, "loss": 0.591, "step": 32104 }, { "epoch": 0.9373448950395609, "grad_norm": 0.6286286072561238, "learning_rate": 1.392376317923763e-06, "loss": 0.5998, "step": 32105 }, { "epoch": 0.9373740912674082, "grad_norm": 0.6334192739599156, "learning_rate": 1.3917274939172751e-06, "loss": 0.5966, "step": 32106 }, { "epoch": 0.9374032874952556, "grad_norm": 0.6323632756590887, "learning_rate": 1.3910786699107867e-06, "loss": 0.5943, "step": 32107 }, { "epoch": 0.937432483723103, "grad_norm": 0.687582199686161, "learning_rate": 1.3904298459042987e-06, "loss": 0.6511, "step": 32108 }, { "epoch": 0.9374616799509503, "grad_norm": 0.7312378118680019, "learning_rate": 1.3897810218978103e-06, "loss": 0.7238, "step": 32109 }, { "epoch": 0.9374908761787977, "grad_norm": 0.6457229428843937, "learning_rate": 1.3891321978913222e-06, "loss": 0.5638, "step": 32110 }, { "epoch": 0.937520072406645, "grad_norm": 0.645491751704348, "learning_rate": 1.3884833738848338e-06, "loss": 0.5603, "step": 32111 }, { "epoch": 0.9375492686344924, "grad_norm": 0.6136538657365322, "learning_rate": 1.3878345498783456e-06, "loss": 0.5136, "step": 32112 }, { "epoch": 0.9375784648623398, "grad_norm": 0.6903021300441393, "learning_rate": 1.3871857258718574e-06, "loss": 0.632, "step": 32113 }, { "epoch": 0.9376076610901871, "grad_norm": 0.6270283628621526, "learning_rate": 1.386536901865369e-06, "loss": 0.5558, "step": 32114 }, { "epoch": 0.9376368573180345, "grad_norm": 0.6295290781450567, "learning_rate": 1.385888077858881e-06, "loss": 0.5585, "step": 32115 }, { "epoch": 0.9376660535458818, "grad_norm": 0.6575209789441366, "learning_rate": 1.3852392538523926e-06, "loss": 0.6487, "step": 32116 }, { "epoch": 0.9376952497737292, "grad_norm": 0.5860738314762031, "learning_rate": 1.3845904298459044e-06, "loss": 0.498, "step": 32117 }, { "epoch": 0.9377244460015766, "grad_norm": 0.6549379111135466, "learning_rate": 1.3839416058394163e-06, "loss": 0.574, "step": 32118 }, { "epoch": 0.9377536422294239, "grad_norm": 0.6658311153098984, "learning_rate": 1.3832927818329279e-06, "loss": 0.6601, "step": 32119 }, { "epoch": 0.9377828384572713, "grad_norm": 0.7019318012864627, "learning_rate": 1.3826439578264397e-06, "loss": 0.6671, "step": 32120 }, { "epoch": 0.9378120346851186, "grad_norm": 0.6149636329854317, "learning_rate": 1.3819951338199513e-06, "loss": 0.5229, "step": 32121 }, { "epoch": 0.937841230912966, "grad_norm": 0.6283718172823397, "learning_rate": 1.3813463098134633e-06, "loss": 0.552, "step": 32122 }, { "epoch": 0.9378704271408134, "grad_norm": 0.647705360401013, "learning_rate": 1.380697485806975e-06, "loss": 0.6079, "step": 32123 }, { "epoch": 0.9378996233686607, "grad_norm": 0.6504342614669822, "learning_rate": 1.380048661800487e-06, "loss": 0.5905, "step": 32124 }, { "epoch": 0.9379288195965081, "grad_norm": 0.6402131440415758, "learning_rate": 1.3793998377939985e-06, "loss": 0.5904, "step": 32125 }, { "epoch": 0.9379580158243555, "grad_norm": 0.6822075588196261, "learning_rate": 1.3787510137875101e-06, "loss": 0.631, "step": 32126 }, { "epoch": 0.9379872120522028, "grad_norm": 0.639553565490426, "learning_rate": 1.378102189781022e-06, "loss": 0.5762, "step": 32127 }, { "epoch": 0.9380164082800502, "grad_norm": 0.7025440842210834, "learning_rate": 1.3774533657745338e-06, "loss": 0.7119, "step": 32128 }, { "epoch": 0.9380456045078975, "grad_norm": 0.622444450990859, "learning_rate": 1.3768045417680456e-06, "loss": 0.5702, "step": 32129 }, { "epoch": 0.9380748007357449, "grad_norm": 0.6684485580238564, "learning_rate": 1.3761557177615572e-06, "loss": 0.5837, "step": 32130 }, { "epoch": 0.9381039969635923, "grad_norm": 0.6844529898063805, "learning_rate": 1.3755068937550692e-06, "loss": 0.6532, "step": 32131 }, { "epoch": 0.9381331931914396, "grad_norm": 0.6783159020180086, "learning_rate": 1.3748580697485808e-06, "loss": 0.6578, "step": 32132 }, { "epoch": 0.938162389419287, "grad_norm": 0.5927033315826377, "learning_rate": 1.3742092457420924e-06, "loss": 0.5266, "step": 32133 }, { "epoch": 0.9381915856471343, "grad_norm": 0.6634997015786758, "learning_rate": 1.3735604217356045e-06, "loss": 0.6727, "step": 32134 }, { "epoch": 0.9382207818749817, "grad_norm": 0.6630161875414756, "learning_rate": 1.372911597729116e-06, "loss": 0.62, "step": 32135 }, { "epoch": 0.9382499781028291, "grad_norm": 0.6016199067754452, "learning_rate": 1.3722627737226279e-06, "loss": 0.5042, "step": 32136 }, { "epoch": 0.9382791743306764, "grad_norm": 0.658925318021752, "learning_rate": 1.3716139497161397e-06, "loss": 0.5846, "step": 32137 }, { "epoch": 0.9383083705585238, "grad_norm": 0.6294777180269456, "learning_rate": 1.3709651257096513e-06, "loss": 0.5751, "step": 32138 }, { "epoch": 0.9383375667863711, "grad_norm": 0.6424179578504156, "learning_rate": 1.3703163017031631e-06, "loss": 0.5839, "step": 32139 }, { "epoch": 0.9383667630142185, "grad_norm": 0.6437547241899263, "learning_rate": 1.3696674776966747e-06, "loss": 0.5745, "step": 32140 }, { "epoch": 0.938395959242066, "grad_norm": 0.6729182294721986, "learning_rate": 1.3690186536901867e-06, "loss": 0.6582, "step": 32141 }, { "epoch": 0.9384251554699133, "grad_norm": 0.6509015264964055, "learning_rate": 1.3683698296836983e-06, "loss": 0.644, "step": 32142 }, { "epoch": 0.9384543516977607, "grad_norm": 0.6721553654444935, "learning_rate": 1.3677210056772104e-06, "loss": 0.6194, "step": 32143 }, { "epoch": 0.9384835479256081, "grad_norm": 0.6658785943025278, "learning_rate": 1.367072181670722e-06, "loss": 0.6027, "step": 32144 }, { "epoch": 0.9385127441534554, "grad_norm": 0.7397487204472197, "learning_rate": 1.3664233576642336e-06, "loss": 0.7077, "step": 32145 }, { "epoch": 0.9385419403813028, "grad_norm": 0.6498841696334385, "learning_rate": 1.3657745336577454e-06, "loss": 0.6161, "step": 32146 }, { "epoch": 0.9385711366091501, "grad_norm": 0.6158919462042883, "learning_rate": 1.3651257096512572e-06, "loss": 0.5445, "step": 32147 }, { "epoch": 0.9386003328369975, "grad_norm": 0.654584158522575, "learning_rate": 1.364476885644769e-06, "loss": 0.6249, "step": 32148 }, { "epoch": 0.9386295290648449, "grad_norm": 0.712989360213648, "learning_rate": 1.3638280616382806e-06, "loss": 0.6137, "step": 32149 }, { "epoch": 0.9386587252926922, "grad_norm": 0.6667626417784943, "learning_rate": 1.3631792376317927e-06, "loss": 0.5868, "step": 32150 }, { "epoch": 0.9386879215205396, "grad_norm": 0.6007685492653696, "learning_rate": 1.3625304136253043e-06, "loss": 0.5548, "step": 32151 }, { "epoch": 0.938717117748387, "grad_norm": 0.6475760654135517, "learning_rate": 1.3618815896188159e-06, "loss": 0.5956, "step": 32152 }, { "epoch": 0.9387463139762343, "grad_norm": 0.6286606395080513, "learning_rate": 1.3612327656123279e-06, "loss": 0.5649, "step": 32153 }, { "epoch": 0.9387755102040817, "grad_norm": 0.5876103651410397, "learning_rate": 1.3605839416058395e-06, "loss": 0.5127, "step": 32154 }, { "epoch": 0.938804706431929, "grad_norm": 0.6229370978865648, "learning_rate": 1.3599351175993513e-06, "loss": 0.5743, "step": 32155 }, { "epoch": 0.9388339026597764, "grad_norm": 0.6367038236780853, "learning_rate": 1.359286293592863e-06, "loss": 0.5306, "step": 32156 }, { "epoch": 0.9388630988876238, "grad_norm": 0.6366084876819659, "learning_rate": 1.358637469586375e-06, "loss": 0.557, "step": 32157 }, { "epoch": 0.9388922951154711, "grad_norm": 0.6255704448059594, "learning_rate": 1.3579886455798865e-06, "loss": 0.5627, "step": 32158 }, { "epoch": 0.9389214913433185, "grad_norm": 0.6642502702283005, "learning_rate": 1.3573398215733981e-06, "loss": 0.6392, "step": 32159 }, { "epoch": 0.9389506875711658, "grad_norm": 0.6038949998262699, "learning_rate": 1.3566909975669102e-06, "loss": 0.5644, "step": 32160 }, { "epoch": 0.9389798837990132, "grad_norm": 0.6492829642807887, "learning_rate": 1.3560421735604218e-06, "loss": 0.6485, "step": 32161 }, { "epoch": 0.9390090800268606, "grad_norm": 0.6321465227623182, "learning_rate": 1.3553933495539338e-06, "loss": 0.5902, "step": 32162 }, { "epoch": 0.9390382762547079, "grad_norm": 0.624417114457324, "learning_rate": 1.3547445255474454e-06, "loss": 0.5434, "step": 32163 }, { "epoch": 0.9390674724825553, "grad_norm": 0.669921080291617, "learning_rate": 1.3540957015409572e-06, "loss": 0.6201, "step": 32164 }, { "epoch": 0.9390966687104026, "grad_norm": 0.708770507159327, "learning_rate": 1.3534468775344688e-06, "loss": 0.6902, "step": 32165 }, { "epoch": 0.93912586493825, "grad_norm": 0.6291986336258005, "learning_rate": 1.3527980535279806e-06, "loss": 0.5954, "step": 32166 }, { "epoch": 0.9391550611660974, "grad_norm": 0.6482253740279721, "learning_rate": 1.3521492295214925e-06, "loss": 0.5648, "step": 32167 }, { "epoch": 0.9391842573939447, "grad_norm": 0.6062782091149617, "learning_rate": 1.351500405515004e-06, "loss": 0.5176, "step": 32168 }, { "epoch": 0.9392134536217921, "grad_norm": 0.6524376230787959, "learning_rate": 1.350851581508516e-06, "loss": 0.6088, "step": 32169 }, { "epoch": 0.9392426498496395, "grad_norm": 0.607893207254948, "learning_rate": 1.3502027575020277e-06, "loss": 0.5063, "step": 32170 }, { "epoch": 0.9392718460774868, "grad_norm": 0.666203061900293, "learning_rate": 1.3495539334955393e-06, "loss": 0.6589, "step": 32171 }, { "epoch": 0.9393010423053342, "grad_norm": 0.6539500816069597, "learning_rate": 1.3489051094890513e-06, "loss": 0.6097, "step": 32172 }, { "epoch": 0.9393302385331815, "grad_norm": 0.6512941114739448, "learning_rate": 1.348256285482563e-06, "loss": 0.6391, "step": 32173 }, { "epoch": 0.9393594347610289, "grad_norm": 0.6547509881607484, "learning_rate": 1.3476074614760747e-06, "loss": 0.6043, "step": 32174 }, { "epoch": 0.9393886309888763, "grad_norm": 0.7000391804909248, "learning_rate": 1.3469586374695863e-06, "loss": 0.6334, "step": 32175 }, { "epoch": 0.9394178272167236, "grad_norm": 0.6630946536926373, "learning_rate": 1.3463098134630984e-06, "loss": 0.6194, "step": 32176 }, { "epoch": 0.939447023444571, "grad_norm": 0.675073137731079, "learning_rate": 1.34566098945661e-06, "loss": 0.6106, "step": 32177 }, { "epoch": 0.9394762196724183, "grad_norm": 0.5834348867682334, "learning_rate": 1.3450121654501216e-06, "loss": 0.526, "step": 32178 }, { "epoch": 0.9395054159002657, "grad_norm": 0.6198648595540108, "learning_rate": 1.3443633414436336e-06, "loss": 0.5714, "step": 32179 }, { "epoch": 0.9395346121281131, "grad_norm": 0.6441849366188369, "learning_rate": 1.3437145174371452e-06, "loss": 0.5469, "step": 32180 }, { "epoch": 0.9395638083559604, "grad_norm": 0.6749286405794869, "learning_rate": 1.343065693430657e-06, "loss": 0.6242, "step": 32181 }, { "epoch": 0.9395930045838078, "grad_norm": 0.6579713769567611, "learning_rate": 1.3424168694241688e-06, "loss": 0.6122, "step": 32182 }, { "epoch": 0.9396222008116552, "grad_norm": 0.6561547911545307, "learning_rate": 1.3417680454176807e-06, "loss": 0.6065, "step": 32183 }, { "epoch": 0.9396513970395025, "grad_norm": 0.6713302043210321, "learning_rate": 1.3411192214111923e-06, "loss": 0.6086, "step": 32184 }, { "epoch": 0.9396805932673499, "grad_norm": 0.6459205138722682, "learning_rate": 1.3404703974047039e-06, "loss": 0.5798, "step": 32185 }, { "epoch": 0.9397097894951972, "grad_norm": 0.6282137995091663, "learning_rate": 1.339821573398216e-06, "loss": 0.6095, "step": 32186 }, { "epoch": 0.9397389857230446, "grad_norm": 0.6534755127163235, "learning_rate": 1.3391727493917275e-06, "loss": 0.6084, "step": 32187 }, { "epoch": 0.939768181950892, "grad_norm": 0.5966648597065872, "learning_rate": 1.3385239253852395e-06, "loss": 0.5171, "step": 32188 }, { "epoch": 0.9397973781787393, "grad_norm": 0.6294544981044976, "learning_rate": 1.3378751013787511e-06, "loss": 0.5717, "step": 32189 }, { "epoch": 0.9398265744065867, "grad_norm": 0.6506974496784438, "learning_rate": 1.337226277372263e-06, "loss": 0.5871, "step": 32190 }, { "epoch": 0.939855770634434, "grad_norm": 0.6669289877605249, "learning_rate": 1.3365774533657748e-06, "loss": 0.5923, "step": 32191 }, { "epoch": 0.9398849668622814, "grad_norm": 0.6695915553872575, "learning_rate": 1.3359286293592864e-06, "loss": 0.646, "step": 32192 }, { "epoch": 0.9399141630901288, "grad_norm": 0.6477685691045222, "learning_rate": 1.3352798053527982e-06, "loss": 0.62, "step": 32193 }, { "epoch": 0.9399433593179761, "grad_norm": 0.6830398904465793, "learning_rate": 1.3346309813463098e-06, "loss": 0.6334, "step": 32194 }, { "epoch": 0.9399725555458235, "grad_norm": 0.5834293746965943, "learning_rate": 1.3339821573398218e-06, "loss": 0.5271, "step": 32195 }, { "epoch": 0.9400017517736708, "grad_norm": 0.6526825370970148, "learning_rate": 1.3333333333333334e-06, "loss": 0.5803, "step": 32196 }, { "epoch": 0.9400309480015182, "grad_norm": 0.63146670276132, "learning_rate": 1.332684509326845e-06, "loss": 0.5873, "step": 32197 }, { "epoch": 0.9400601442293656, "grad_norm": 0.6246426168823219, "learning_rate": 1.332035685320357e-06, "loss": 0.5668, "step": 32198 }, { "epoch": 0.9400893404572129, "grad_norm": 0.6065134392149482, "learning_rate": 1.3313868613138686e-06, "loss": 0.5187, "step": 32199 }, { "epoch": 0.9401185366850603, "grad_norm": 0.6404734046105142, "learning_rate": 1.3307380373073805e-06, "loss": 0.5498, "step": 32200 }, { "epoch": 0.9401477329129077, "grad_norm": 0.6876925486327611, "learning_rate": 1.3300892133008923e-06, "loss": 0.613, "step": 32201 }, { "epoch": 0.940176929140755, "grad_norm": 0.6564620743532106, "learning_rate": 1.329440389294404e-06, "loss": 0.6509, "step": 32202 }, { "epoch": 0.9402061253686024, "grad_norm": 0.654896105770279, "learning_rate": 1.3287915652879157e-06, "loss": 0.6106, "step": 32203 }, { "epoch": 0.9402353215964497, "grad_norm": 0.6603490170485299, "learning_rate": 1.3281427412814273e-06, "loss": 0.6499, "step": 32204 }, { "epoch": 0.9402645178242971, "grad_norm": 0.6082398070935735, "learning_rate": 1.3274939172749393e-06, "loss": 0.515, "step": 32205 }, { "epoch": 0.9402937140521445, "grad_norm": 0.5831052564074816, "learning_rate": 1.326845093268451e-06, "loss": 0.5167, "step": 32206 }, { "epoch": 0.9403229102799918, "grad_norm": 0.6258086157215369, "learning_rate": 1.326196269261963e-06, "loss": 0.567, "step": 32207 }, { "epoch": 0.9403521065078392, "grad_norm": 0.6857271464438874, "learning_rate": 1.3255474452554746e-06, "loss": 0.6504, "step": 32208 }, { "epoch": 0.9403813027356865, "grad_norm": 0.6192444969978796, "learning_rate": 1.3248986212489864e-06, "loss": 0.5742, "step": 32209 }, { "epoch": 0.9404104989635339, "grad_norm": 0.6810546653178341, "learning_rate": 1.324249797242498e-06, "loss": 0.6682, "step": 32210 }, { "epoch": 0.9404396951913813, "grad_norm": 0.6986182702722528, "learning_rate": 1.3236009732360098e-06, "loss": 0.6475, "step": 32211 }, { "epoch": 0.9404688914192286, "grad_norm": 0.6348258200878993, "learning_rate": 1.3229521492295216e-06, "loss": 0.5549, "step": 32212 }, { "epoch": 0.940498087647076, "grad_norm": 0.6457690508965793, "learning_rate": 1.3223033252230332e-06, "loss": 0.6117, "step": 32213 }, { "epoch": 0.9405272838749233, "grad_norm": 0.6489735712261487, "learning_rate": 1.3216545012165452e-06, "loss": 0.5731, "step": 32214 }, { "epoch": 0.9405564801027707, "grad_norm": 0.6172156258345556, "learning_rate": 1.3210056772100568e-06, "loss": 0.5511, "step": 32215 }, { "epoch": 0.9405856763306181, "grad_norm": 0.6763522744005965, "learning_rate": 1.3203568532035687e-06, "loss": 0.6326, "step": 32216 }, { "epoch": 0.9406148725584654, "grad_norm": 0.6777679959745903, "learning_rate": 1.3197080291970805e-06, "loss": 0.6453, "step": 32217 }, { "epoch": 0.9406440687863128, "grad_norm": 0.6968936710987303, "learning_rate": 1.319059205190592e-06, "loss": 0.6306, "step": 32218 }, { "epoch": 0.9406732650141602, "grad_norm": 0.652507241737835, "learning_rate": 1.318410381184104e-06, "loss": 0.5931, "step": 32219 }, { "epoch": 0.9407024612420075, "grad_norm": 0.6196625407815683, "learning_rate": 1.3177615571776157e-06, "loss": 0.5069, "step": 32220 }, { "epoch": 0.9407316574698549, "grad_norm": 0.7046534644919065, "learning_rate": 1.3171127331711275e-06, "loss": 0.7097, "step": 32221 }, { "epoch": 0.9407608536977022, "grad_norm": 0.6412778533529196, "learning_rate": 1.3164639091646391e-06, "loss": 0.5873, "step": 32222 }, { "epoch": 0.9407900499255496, "grad_norm": 0.6515696818939689, "learning_rate": 1.3158150851581512e-06, "loss": 0.6081, "step": 32223 }, { "epoch": 0.940819246153397, "grad_norm": 0.6867764432624137, "learning_rate": 1.3151662611516628e-06, "loss": 0.6842, "step": 32224 }, { "epoch": 0.9408484423812443, "grad_norm": 0.6232312657640092, "learning_rate": 1.3145174371451744e-06, "loss": 0.5491, "step": 32225 }, { "epoch": 0.9408776386090917, "grad_norm": 0.7237089185592128, "learning_rate": 1.3138686131386864e-06, "loss": 0.6814, "step": 32226 }, { "epoch": 0.940906834836939, "grad_norm": 0.6888780022328704, "learning_rate": 1.313219789132198e-06, "loss": 0.6421, "step": 32227 }, { "epoch": 0.9409360310647864, "grad_norm": 0.582669115775031, "learning_rate": 1.3125709651257098e-06, "loss": 0.4989, "step": 32228 }, { "epoch": 0.9409652272926338, "grad_norm": 0.6747025489608536, "learning_rate": 1.3119221411192214e-06, "loss": 0.6497, "step": 32229 }, { "epoch": 0.9409944235204811, "grad_norm": 0.6431238612698456, "learning_rate": 1.3112733171127332e-06, "loss": 0.4785, "step": 32230 }, { "epoch": 0.9410236197483285, "grad_norm": 0.6875753693828452, "learning_rate": 1.310624493106245e-06, "loss": 0.6314, "step": 32231 }, { "epoch": 0.9410528159761758, "grad_norm": 0.718334666471877, "learning_rate": 1.3099756690997567e-06, "loss": 0.6985, "step": 32232 }, { "epoch": 0.9410820122040232, "grad_norm": 0.6590968002516028, "learning_rate": 1.3093268450932687e-06, "loss": 0.6224, "step": 32233 }, { "epoch": 0.9411112084318706, "grad_norm": 0.6738610183609256, "learning_rate": 1.3086780210867803e-06, "loss": 0.6108, "step": 32234 }, { "epoch": 0.9411404046597179, "grad_norm": 0.6824347421245299, "learning_rate": 1.308029197080292e-06, "loss": 0.6428, "step": 32235 }, { "epoch": 0.9411696008875653, "grad_norm": 0.6218938433959895, "learning_rate": 1.307380373073804e-06, "loss": 0.5832, "step": 32236 }, { "epoch": 0.9411987971154127, "grad_norm": 0.6609918390058964, "learning_rate": 1.3067315490673155e-06, "loss": 0.6074, "step": 32237 }, { "epoch": 0.94122799334326, "grad_norm": 0.6915508146967257, "learning_rate": 1.3060827250608273e-06, "loss": 0.6595, "step": 32238 }, { "epoch": 0.9412571895711074, "grad_norm": 0.630618381172342, "learning_rate": 1.305433901054339e-06, "loss": 0.5724, "step": 32239 }, { "epoch": 0.9412863857989547, "grad_norm": 0.6410620621064049, "learning_rate": 1.304785077047851e-06, "loss": 0.61, "step": 32240 }, { "epoch": 0.9413155820268021, "grad_norm": 0.6425997229863334, "learning_rate": 1.3041362530413626e-06, "loss": 0.6239, "step": 32241 }, { "epoch": 0.9413447782546495, "grad_norm": 0.7029220688370023, "learning_rate": 1.3034874290348746e-06, "loss": 0.6605, "step": 32242 }, { "epoch": 0.9413739744824968, "grad_norm": 0.5809540080019184, "learning_rate": 1.3028386050283862e-06, "loss": 0.5106, "step": 32243 }, { "epoch": 0.9414031707103442, "grad_norm": 0.6429140798312544, "learning_rate": 1.3021897810218978e-06, "loss": 0.572, "step": 32244 }, { "epoch": 0.9414323669381915, "grad_norm": 0.6571725839375813, "learning_rate": 1.3015409570154096e-06, "loss": 0.5868, "step": 32245 }, { "epoch": 0.9414615631660389, "grad_norm": 0.6302407403284425, "learning_rate": 1.3008921330089214e-06, "loss": 0.5727, "step": 32246 }, { "epoch": 0.9414907593938863, "grad_norm": 0.6560482926665677, "learning_rate": 1.3002433090024332e-06, "loss": 0.5469, "step": 32247 }, { "epoch": 0.9415199556217336, "grad_norm": 0.7155227917890121, "learning_rate": 1.2995944849959449e-06, "loss": 0.727, "step": 32248 }, { "epoch": 0.941549151849581, "grad_norm": 0.6243502966602957, "learning_rate": 1.2989456609894569e-06, "loss": 0.5723, "step": 32249 }, { "epoch": 0.9415783480774284, "grad_norm": 0.6629919226169525, "learning_rate": 1.2982968369829685e-06, "loss": 0.6349, "step": 32250 }, { "epoch": 0.9416075443052757, "grad_norm": 0.6273957748455115, "learning_rate": 1.29764801297648e-06, "loss": 0.5676, "step": 32251 }, { "epoch": 0.9416367405331231, "grad_norm": 0.6099240834070399, "learning_rate": 1.2969991889699921e-06, "loss": 0.5238, "step": 32252 }, { "epoch": 0.9416659367609704, "grad_norm": 0.6263158641815701, "learning_rate": 1.2963503649635037e-06, "loss": 0.5754, "step": 32253 }, { "epoch": 0.9416951329888178, "grad_norm": 0.6785112832435076, "learning_rate": 1.2957015409570155e-06, "loss": 0.6236, "step": 32254 }, { "epoch": 0.9417243292166652, "grad_norm": 0.6229427591219596, "learning_rate": 1.2950527169505273e-06, "loss": 0.5519, "step": 32255 }, { "epoch": 0.9417535254445125, "grad_norm": 0.7021760914256048, "learning_rate": 1.294403892944039e-06, "loss": 0.6782, "step": 32256 }, { "epoch": 0.9417827216723599, "grad_norm": 0.6372846226281599, "learning_rate": 1.2937550689375508e-06, "loss": 0.6208, "step": 32257 }, { "epoch": 0.9418119179002072, "grad_norm": 0.6781806190936215, "learning_rate": 1.2931062449310624e-06, "loss": 0.6644, "step": 32258 }, { "epoch": 0.9418411141280546, "grad_norm": 0.6357912184702428, "learning_rate": 1.2924574209245744e-06, "loss": 0.5573, "step": 32259 }, { "epoch": 0.941870310355902, "grad_norm": 0.6238934601438936, "learning_rate": 1.291808596918086e-06, "loss": 0.5704, "step": 32260 }, { "epoch": 0.9418995065837493, "grad_norm": 0.6161078422808548, "learning_rate": 1.291159772911598e-06, "loss": 0.5363, "step": 32261 }, { "epoch": 0.9419287028115968, "grad_norm": 0.6360073209966579, "learning_rate": 1.2905109489051096e-06, "loss": 0.5749, "step": 32262 }, { "epoch": 0.9419578990394442, "grad_norm": 0.6326494930240741, "learning_rate": 1.2898621248986212e-06, "loss": 0.5751, "step": 32263 }, { "epoch": 0.9419870952672915, "grad_norm": 0.6688739029800956, "learning_rate": 1.289213300892133e-06, "loss": 0.5943, "step": 32264 }, { "epoch": 0.9420162914951389, "grad_norm": 0.6491889103192214, "learning_rate": 1.2885644768856449e-06, "loss": 0.6269, "step": 32265 }, { "epoch": 0.9420454877229862, "grad_norm": 0.605842163849695, "learning_rate": 1.2879156528791567e-06, "loss": 0.5225, "step": 32266 }, { "epoch": 0.9420746839508336, "grad_norm": 0.6832634327449312, "learning_rate": 1.2872668288726683e-06, "loss": 0.7021, "step": 32267 }, { "epoch": 0.942103880178681, "grad_norm": 0.6668276352180308, "learning_rate": 1.2866180048661803e-06, "loss": 0.6089, "step": 32268 }, { "epoch": 0.9421330764065283, "grad_norm": 0.6802047723592973, "learning_rate": 1.285969180859692e-06, "loss": 0.625, "step": 32269 }, { "epoch": 0.9421622726343757, "grad_norm": 0.6469437425134046, "learning_rate": 1.2853203568532035e-06, "loss": 0.6212, "step": 32270 }, { "epoch": 0.942191468862223, "grad_norm": 0.6439268048819897, "learning_rate": 1.2846715328467155e-06, "loss": 0.5308, "step": 32271 }, { "epoch": 0.9422206650900704, "grad_norm": 0.6622727944598252, "learning_rate": 1.2840227088402272e-06, "loss": 0.581, "step": 32272 }, { "epoch": 0.9422498613179178, "grad_norm": 0.6615822638177439, "learning_rate": 1.283373884833739e-06, "loss": 0.6089, "step": 32273 }, { "epoch": 0.9422790575457651, "grad_norm": 0.6306611093848669, "learning_rate": 1.2827250608272508e-06, "loss": 0.5723, "step": 32274 }, { "epoch": 0.9423082537736125, "grad_norm": 0.6873564228326939, "learning_rate": 1.2820762368207626e-06, "loss": 0.6043, "step": 32275 }, { "epoch": 0.9423374500014599, "grad_norm": 0.6998510963350186, "learning_rate": 1.2814274128142742e-06, "loss": 0.6724, "step": 32276 }, { "epoch": 0.9423666462293072, "grad_norm": 0.5694051184223913, "learning_rate": 1.2807785888077858e-06, "loss": 0.4875, "step": 32277 }, { "epoch": 0.9423958424571546, "grad_norm": 0.7209374713788065, "learning_rate": 1.2801297648012978e-06, "loss": 0.6956, "step": 32278 }, { "epoch": 0.9424250386850019, "grad_norm": 0.6703518620894692, "learning_rate": 1.2794809407948094e-06, "loss": 0.6164, "step": 32279 }, { "epoch": 0.9424542349128493, "grad_norm": 0.6161438899110186, "learning_rate": 1.2788321167883215e-06, "loss": 0.5424, "step": 32280 }, { "epoch": 0.9424834311406967, "grad_norm": 0.678469870280894, "learning_rate": 1.278183292781833e-06, "loss": 0.6388, "step": 32281 }, { "epoch": 0.942512627368544, "grad_norm": 0.6041854317403991, "learning_rate": 1.2775344687753449e-06, "loss": 0.5444, "step": 32282 }, { "epoch": 0.9425418235963914, "grad_norm": 0.5952962122956124, "learning_rate": 1.2768856447688565e-06, "loss": 0.5357, "step": 32283 }, { "epoch": 0.9425710198242387, "grad_norm": 0.671808199847624, "learning_rate": 1.2762368207623683e-06, "loss": 0.5975, "step": 32284 }, { "epoch": 0.9426002160520861, "grad_norm": 0.6666008880617819, "learning_rate": 1.2755879967558801e-06, "loss": 0.6154, "step": 32285 }, { "epoch": 0.9426294122799335, "grad_norm": 0.6568990828410398, "learning_rate": 1.2749391727493917e-06, "loss": 0.6219, "step": 32286 }, { "epoch": 0.9426586085077808, "grad_norm": 0.7360780194305442, "learning_rate": 1.2742903487429037e-06, "loss": 0.7342, "step": 32287 }, { "epoch": 0.9426878047356282, "grad_norm": 0.6506070713216522, "learning_rate": 1.2736415247364154e-06, "loss": 0.5759, "step": 32288 }, { "epoch": 0.9427170009634755, "grad_norm": 0.6367453626373446, "learning_rate": 1.272992700729927e-06, "loss": 0.5816, "step": 32289 }, { "epoch": 0.9427461971913229, "grad_norm": 0.6476714965364743, "learning_rate": 1.272343876723439e-06, "loss": 0.6118, "step": 32290 }, { "epoch": 0.9427753934191703, "grad_norm": 0.6506969970101064, "learning_rate": 1.2716950527169506e-06, "loss": 0.5776, "step": 32291 }, { "epoch": 0.9428045896470176, "grad_norm": 0.6234867285523049, "learning_rate": 1.2710462287104624e-06, "loss": 0.5459, "step": 32292 }, { "epoch": 0.942833785874865, "grad_norm": 0.6571478394530486, "learning_rate": 1.270397404703974e-06, "loss": 0.6395, "step": 32293 }, { "epoch": 0.9428629821027124, "grad_norm": 0.6446019672752963, "learning_rate": 1.269748580697486e-06, "loss": 0.5349, "step": 32294 }, { "epoch": 0.9428921783305597, "grad_norm": 0.6500985778875734, "learning_rate": 1.2690997566909976e-06, "loss": 0.5956, "step": 32295 }, { "epoch": 0.9429213745584071, "grad_norm": 0.6667503000230369, "learning_rate": 1.2684509326845092e-06, "loss": 0.6337, "step": 32296 }, { "epoch": 0.9429505707862544, "grad_norm": 0.6945251985544197, "learning_rate": 1.2678021086780213e-06, "loss": 0.6836, "step": 32297 }, { "epoch": 0.9429797670141018, "grad_norm": 0.5921017461424871, "learning_rate": 1.2671532846715329e-06, "loss": 0.4937, "step": 32298 }, { "epoch": 0.9430089632419492, "grad_norm": 0.6517546938840081, "learning_rate": 1.2665044606650447e-06, "loss": 0.6456, "step": 32299 }, { "epoch": 0.9430381594697965, "grad_norm": 0.6143883829960416, "learning_rate": 1.2658556366585565e-06, "loss": 0.5464, "step": 32300 }, { "epoch": 0.9430673556976439, "grad_norm": 0.6714512952766467, "learning_rate": 1.2652068126520683e-06, "loss": 0.645, "step": 32301 }, { "epoch": 0.9430965519254912, "grad_norm": 0.6170783957321682, "learning_rate": 1.26455798864558e-06, "loss": 0.5615, "step": 32302 }, { "epoch": 0.9431257481533386, "grad_norm": 0.655832618294761, "learning_rate": 1.2639091646390917e-06, "loss": 0.6144, "step": 32303 }, { "epoch": 0.943154944381186, "grad_norm": 0.6861319891597782, "learning_rate": 1.2632603406326036e-06, "loss": 0.6529, "step": 32304 }, { "epoch": 0.9431841406090333, "grad_norm": 0.6985620180785641, "learning_rate": 1.2626115166261152e-06, "loss": 0.6362, "step": 32305 }, { "epoch": 0.9432133368368807, "grad_norm": 0.5989854951728177, "learning_rate": 1.2619626926196272e-06, "loss": 0.5331, "step": 32306 }, { "epoch": 0.943242533064728, "grad_norm": 0.6834545373177633, "learning_rate": 1.2613138686131388e-06, "loss": 0.6372, "step": 32307 }, { "epoch": 0.9432717292925754, "grad_norm": 0.6395313874558454, "learning_rate": 1.2606650446066506e-06, "loss": 0.5395, "step": 32308 }, { "epoch": 0.9433009255204228, "grad_norm": 0.6053741763437341, "learning_rate": 1.2600162206001624e-06, "loss": 0.5147, "step": 32309 }, { "epoch": 0.9433301217482701, "grad_norm": 0.6875081967825241, "learning_rate": 1.259367396593674e-06, "loss": 0.6134, "step": 32310 }, { "epoch": 0.9433593179761175, "grad_norm": 0.6689663085436627, "learning_rate": 1.2587185725871858e-06, "loss": 0.6555, "step": 32311 }, { "epoch": 0.9433885142039649, "grad_norm": 0.6651704958399342, "learning_rate": 1.2580697485806974e-06, "loss": 0.598, "step": 32312 }, { "epoch": 0.9434177104318122, "grad_norm": 0.6175231191328813, "learning_rate": 1.2574209245742095e-06, "loss": 0.5574, "step": 32313 }, { "epoch": 0.9434469066596596, "grad_norm": 0.6850589232763403, "learning_rate": 1.256772100567721e-06, "loss": 0.6496, "step": 32314 }, { "epoch": 0.9434761028875069, "grad_norm": 0.6535599230263874, "learning_rate": 1.256123276561233e-06, "loss": 0.5749, "step": 32315 }, { "epoch": 0.9435052991153543, "grad_norm": 0.6634076691910878, "learning_rate": 1.2554744525547447e-06, "loss": 0.5744, "step": 32316 }, { "epoch": 0.9435344953432017, "grad_norm": 0.6777123471192196, "learning_rate": 1.2548256285482563e-06, "loss": 0.6487, "step": 32317 }, { "epoch": 0.943563691571049, "grad_norm": 0.7002525597556684, "learning_rate": 1.2541768045417681e-06, "loss": 0.6936, "step": 32318 }, { "epoch": 0.9435928877988964, "grad_norm": 0.6533997727073565, "learning_rate": 1.25352798053528e-06, "loss": 0.5591, "step": 32319 }, { "epoch": 0.9436220840267437, "grad_norm": 0.7089248357118423, "learning_rate": 1.2528791565287918e-06, "loss": 0.6005, "step": 32320 }, { "epoch": 0.9436512802545911, "grad_norm": 0.619859637195571, "learning_rate": 1.2522303325223034e-06, "loss": 0.5556, "step": 32321 }, { "epoch": 0.9436804764824385, "grad_norm": 0.6758248120927406, "learning_rate": 1.251581508515815e-06, "loss": 0.5817, "step": 32322 }, { "epoch": 0.9437096727102858, "grad_norm": 0.7184552791873052, "learning_rate": 1.250932684509327e-06, "loss": 0.635, "step": 32323 }, { "epoch": 0.9437388689381332, "grad_norm": 0.6681575150270411, "learning_rate": 1.2502838605028386e-06, "loss": 0.5999, "step": 32324 }, { "epoch": 0.9437680651659806, "grad_norm": 0.6568368036331382, "learning_rate": 1.2496350364963504e-06, "loss": 0.6277, "step": 32325 }, { "epoch": 0.9437972613938279, "grad_norm": 0.60523234563476, "learning_rate": 1.2489862124898622e-06, "loss": 0.5464, "step": 32326 }, { "epoch": 0.9438264576216753, "grad_norm": 0.6533132361267722, "learning_rate": 1.248337388483374e-06, "loss": 0.6096, "step": 32327 }, { "epoch": 0.9438556538495226, "grad_norm": 0.6418727482229035, "learning_rate": 1.2476885644768856e-06, "loss": 0.5951, "step": 32328 }, { "epoch": 0.94388485007737, "grad_norm": 0.6526903827904552, "learning_rate": 1.2470397404703975e-06, "loss": 0.6, "step": 32329 }, { "epoch": 0.9439140463052174, "grad_norm": 0.6125949396838443, "learning_rate": 1.2463909164639093e-06, "loss": 0.5215, "step": 32330 }, { "epoch": 0.9439432425330647, "grad_norm": 0.5962806277312426, "learning_rate": 1.245742092457421e-06, "loss": 0.5316, "step": 32331 }, { "epoch": 0.9439724387609121, "grad_norm": 0.6831831998792451, "learning_rate": 1.2450932684509327e-06, "loss": 0.601, "step": 32332 }, { "epoch": 0.9440016349887594, "grad_norm": 0.6469088080321549, "learning_rate": 1.2444444444444445e-06, "loss": 0.5696, "step": 32333 }, { "epoch": 0.9440308312166068, "grad_norm": 0.6721550298849099, "learning_rate": 1.2437956204379563e-06, "loss": 0.6488, "step": 32334 }, { "epoch": 0.9440600274444542, "grad_norm": 0.5760985651402458, "learning_rate": 1.2431467964314681e-06, "loss": 0.4799, "step": 32335 }, { "epoch": 0.9440892236723015, "grad_norm": 0.6629269328976496, "learning_rate": 1.24249797242498e-06, "loss": 0.5964, "step": 32336 }, { "epoch": 0.9441184199001489, "grad_norm": 0.6720871381953201, "learning_rate": 1.2418491484184916e-06, "loss": 0.656, "step": 32337 }, { "epoch": 0.9441476161279962, "grad_norm": 0.6416791152586642, "learning_rate": 1.2412003244120034e-06, "loss": 0.5638, "step": 32338 }, { "epoch": 0.9441768123558436, "grad_norm": 0.7185135389711348, "learning_rate": 1.240551500405515e-06, "loss": 0.7295, "step": 32339 }, { "epoch": 0.944206008583691, "grad_norm": 0.6405777465350307, "learning_rate": 1.2399026763990268e-06, "loss": 0.5756, "step": 32340 }, { "epoch": 0.9442352048115383, "grad_norm": 0.645763300607515, "learning_rate": 1.2392538523925386e-06, "loss": 0.6121, "step": 32341 }, { "epoch": 0.9442644010393857, "grad_norm": 0.6790288921379353, "learning_rate": 1.2386050283860504e-06, "loss": 0.6816, "step": 32342 }, { "epoch": 0.944293597267233, "grad_norm": 0.6349231547493663, "learning_rate": 1.2379562043795622e-06, "loss": 0.6022, "step": 32343 }, { "epoch": 0.9443227934950804, "grad_norm": 0.6260025498339036, "learning_rate": 1.237307380373074e-06, "loss": 0.5351, "step": 32344 }, { "epoch": 0.9443519897229278, "grad_norm": 0.6281401220587068, "learning_rate": 1.2366585563665857e-06, "loss": 0.5775, "step": 32345 }, { "epoch": 0.9443811859507751, "grad_norm": 0.6144858826675521, "learning_rate": 1.2360097323600975e-06, "loss": 0.5529, "step": 32346 }, { "epoch": 0.9444103821786225, "grad_norm": 0.6356041968362927, "learning_rate": 1.235360908353609e-06, "loss": 0.5858, "step": 32347 }, { "epoch": 0.9444395784064699, "grad_norm": 0.6285603416709825, "learning_rate": 1.2347120843471209e-06, "loss": 0.508, "step": 32348 }, { "epoch": 0.9444687746343172, "grad_norm": 0.6176607871115118, "learning_rate": 1.2340632603406327e-06, "loss": 0.5528, "step": 32349 }, { "epoch": 0.9444979708621646, "grad_norm": 0.632184298424144, "learning_rate": 1.2334144363341445e-06, "loss": 0.5391, "step": 32350 }, { "epoch": 0.9445271670900119, "grad_norm": 0.6746146240818987, "learning_rate": 1.2327656123276563e-06, "loss": 0.6342, "step": 32351 }, { "epoch": 0.9445563633178593, "grad_norm": 0.6776721735929162, "learning_rate": 1.232116788321168e-06, "loss": 0.6206, "step": 32352 }, { "epoch": 0.9445855595457067, "grad_norm": 0.6685072051298421, "learning_rate": 1.2314679643146798e-06, "loss": 0.5893, "step": 32353 }, { "epoch": 0.944614755773554, "grad_norm": 0.6066455319577243, "learning_rate": 1.2308191403081916e-06, "loss": 0.4958, "step": 32354 }, { "epoch": 0.9446439520014014, "grad_norm": 0.604957104743898, "learning_rate": 1.2301703163017032e-06, "loss": 0.5264, "step": 32355 }, { "epoch": 0.9446731482292487, "grad_norm": 0.6009993886738084, "learning_rate": 1.229521492295215e-06, "loss": 0.5368, "step": 32356 }, { "epoch": 0.9447023444570961, "grad_norm": 0.6340027334888635, "learning_rate": 1.2288726682887268e-06, "loss": 0.6097, "step": 32357 }, { "epoch": 0.9447315406849435, "grad_norm": 0.6543147907396416, "learning_rate": 1.2282238442822384e-06, "loss": 0.6153, "step": 32358 }, { "epoch": 0.9447607369127908, "grad_norm": 0.674758706542367, "learning_rate": 1.2275750202757502e-06, "loss": 0.6839, "step": 32359 }, { "epoch": 0.9447899331406382, "grad_norm": 0.6517867540478804, "learning_rate": 1.226926196269262e-06, "loss": 0.5917, "step": 32360 }, { "epoch": 0.9448191293684856, "grad_norm": 0.6593367884867635, "learning_rate": 1.2262773722627739e-06, "loss": 0.6091, "step": 32361 }, { "epoch": 0.9448483255963329, "grad_norm": 0.6304445236947035, "learning_rate": 1.2256285482562857e-06, "loss": 0.553, "step": 32362 }, { "epoch": 0.9448775218241803, "grad_norm": 0.6822658433630774, "learning_rate": 1.2249797242497975e-06, "loss": 0.5876, "step": 32363 }, { "epoch": 0.9449067180520276, "grad_norm": 0.6481841881528121, "learning_rate": 1.224330900243309e-06, "loss": 0.6006, "step": 32364 }, { "epoch": 0.944935914279875, "grad_norm": 0.6213635005080892, "learning_rate": 1.223682076236821e-06, "loss": 0.5635, "step": 32365 }, { "epoch": 0.9449651105077224, "grad_norm": 0.603574936322772, "learning_rate": 1.2230332522303325e-06, "loss": 0.535, "step": 32366 }, { "epoch": 0.9449943067355697, "grad_norm": 0.6399900586893742, "learning_rate": 1.2223844282238443e-06, "loss": 0.67, "step": 32367 }, { "epoch": 0.9450235029634171, "grad_norm": 0.6660294778416233, "learning_rate": 1.2217356042173561e-06, "loss": 0.6227, "step": 32368 }, { "epoch": 0.9450526991912644, "grad_norm": 0.6570650207754234, "learning_rate": 1.221086780210868e-06, "loss": 0.5898, "step": 32369 }, { "epoch": 0.9450818954191118, "grad_norm": 0.6547717800865966, "learning_rate": 1.2204379562043798e-06, "loss": 0.6436, "step": 32370 }, { "epoch": 0.9451110916469592, "grad_norm": 0.6390261427961219, "learning_rate": 1.2197891321978914e-06, "loss": 0.5457, "step": 32371 }, { "epoch": 0.9451402878748065, "grad_norm": 0.6585908596471847, "learning_rate": 1.2191403081914032e-06, "loss": 0.5683, "step": 32372 }, { "epoch": 0.9451694841026539, "grad_norm": 0.6143151013908874, "learning_rate": 1.218491484184915e-06, "loss": 0.5063, "step": 32373 }, { "epoch": 0.9451986803305013, "grad_norm": 0.5927094141223165, "learning_rate": 1.2178426601784266e-06, "loss": 0.4924, "step": 32374 }, { "epoch": 0.9452278765583486, "grad_norm": 0.6611421921940247, "learning_rate": 1.2171938361719384e-06, "loss": 0.6477, "step": 32375 }, { "epoch": 0.945257072786196, "grad_norm": 0.6376611580295611, "learning_rate": 1.2165450121654502e-06, "loss": 0.583, "step": 32376 }, { "epoch": 0.9452862690140433, "grad_norm": 0.63677077987041, "learning_rate": 1.215896188158962e-06, "loss": 0.5929, "step": 32377 }, { "epoch": 0.9453154652418907, "grad_norm": 0.6475358083967482, "learning_rate": 1.2152473641524737e-06, "loss": 0.6082, "step": 32378 }, { "epoch": 0.9453446614697381, "grad_norm": 0.7033500102421882, "learning_rate": 1.2145985401459855e-06, "loss": 0.6193, "step": 32379 }, { "epoch": 0.9453738576975854, "grad_norm": 0.6980094616059408, "learning_rate": 1.2139497161394973e-06, "loss": 0.6908, "step": 32380 }, { "epoch": 0.9454030539254328, "grad_norm": 0.6595720415157013, "learning_rate": 1.213300892133009e-06, "loss": 0.5749, "step": 32381 }, { "epoch": 0.9454322501532803, "grad_norm": 0.6059213545409738, "learning_rate": 1.2126520681265207e-06, "loss": 0.5332, "step": 32382 }, { "epoch": 0.9454614463811276, "grad_norm": 0.6356247713440808, "learning_rate": 1.2120032441200325e-06, "loss": 0.5541, "step": 32383 }, { "epoch": 0.945490642608975, "grad_norm": 0.640489123415132, "learning_rate": 1.2113544201135441e-06, "loss": 0.5528, "step": 32384 }, { "epoch": 0.9455198388368223, "grad_norm": 0.6784429631496458, "learning_rate": 1.210705596107056e-06, "loss": 0.5969, "step": 32385 }, { "epoch": 0.9455490350646697, "grad_norm": 0.6452497205211601, "learning_rate": 1.2100567721005678e-06, "loss": 0.5749, "step": 32386 }, { "epoch": 0.9455782312925171, "grad_norm": 0.650878342697228, "learning_rate": 1.2094079480940796e-06, "loss": 0.6039, "step": 32387 }, { "epoch": 0.9456074275203644, "grad_norm": 0.6340492342502675, "learning_rate": 1.2087591240875914e-06, "loss": 0.5655, "step": 32388 }, { "epoch": 0.9456366237482118, "grad_norm": 0.6441032710681978, "learning_rate": 1.2081103000811032e-06, "loss": 0.5776, "step": 32389 }, { "epoch": 0.9456658199760591, "grad_norm": 0.6541801179623864, "learning_rate": 1.207461476074615e-06, "loss": 0.5888, "step": 32390 }, { "epoch": 0.9456950162039065, "grad_norm": 0.7027958880087226, "learning_rate": 1.2068126520681266e-06, "loss": 0.582, "step": 32391 }, { "epoch": 0.9457242124317539, "grad_norm": 0.7319945089818308, "learning_rate": 1.2061638280616384e-06, "loss": 0.6648, "step": 32392 }, { "epoch": 0.9457534086596012, "grad_norm": 0.5812769628528311, "learning_rate": 1.20551500405515e-06, "loss": 0.4418, "step": 32393 }, { "epoch": 0.9457826048874486, "grad_norm": 0.6646793158256239, "learning_rate": 1.2048661800486619e-06, "loss": 0.5266, "step": 32394 }, { "epoch": 0.945811801115296, "grad_norm": 0.6589685230676171, "learning_rate": 1.2042173560421737e-06, "loss": 0.6208, "step": 32395 }, { "epoch": 0.9458409973431433, "grad_norm": 0.6908962572355672, "learning_rate": 1.2035685320356855e-06, "loss": 0.6794, "step": 32396 }, { "epoch": 0.9458701935709907, "grad_norm": 0.6581870971127178, "learning_rate": 1.2029197080291973e-06, "loss": 0.5992, "step": 32397 }, { "epoch": 0.945899389798838, "grad_norm": 0.6607851846638253, "learning_rate": 1.202270884022709e-06, "loss": 0.6624, "step": 32398 }, { "epoch": 0.9459285860266854, "grad_norm": 0.709754383653645, "learning_rate": 1.2016220600162207e-06, "loss": 0.6394, "step": 32399 }, { "epoch": 0.9459577822545328, "grad_norm": 0.6663095639712743, "learning_rate": 1.2009732360097325e-06, "loss": 0.6212, "step": 32400 }, { "epoch": 0.9459869784823801, "grad_norm": 0.6700381575768961, "learning_rate": 1.2003244120032441e-06, "loss": 0.65, "step": 32401 }, { "epoch": 0.9460161747102275, "grad_norm": 0.8267602205213758, "learning_rate": 1.199675587996756e-06, "loss": 0.6268, "step": 32402 }, { "epoch": 0.9460453709380748, "grad_norm": 0.7007619086530719, "learning_rate": 1.1990267639902678e-06, "loss": 0.6474, "step": 32403 }, { "epoch": 0.9460745671659222, "grad_norm": 0.6493771903114122, "learning_rate": 1.1983779399837794e-06, "loss": 0.5744, "step": 32404 }, { "epoch": 0.9461037633937696, "grad_norm": 0.6453638857527928, "learning_rate": 1.1977291159772912e-06, "loss": 0.5798, "step": 32405 }, { "epoch": 0.9461329596216169, "grad_norm": 0.6411915440991316, "learning_rate": 1.197080291970803e-06, "loss": 0.5975, "step": 32406 }, { "epoch": 0.9461621558494643, "grad_norm": 0.6833501734952733, "learning_rate": 1.1964314679643148e-06, "loss": 0.7005, "step": 32407 }, { "epoch": 0.9461913520773116, "grad_norm": 0.6547225934868384, "learning_rate": 1.1957826439578266e-06, "loss": 0.6399, "step": 32408 }, { "epoch": 0.946220548305159, "grad_norm": 0.614111947906936, "learning_rate": 1.1951338199513382e-06, "loss": 0.5184, "step": 32409 }, { "epoch": 0.9462497445330064, "grad_norm": 0.6442730341680291, "learning_rate": 1.19448499594485e-06, "loss": 0.5865, "step": 32410 }, { "epoch": 0.9462789407608537, "grad_norm": 0.632918780391916, "learning_rate": 1.1938361719383617e-06, "loss": 0.6081, "step": 32411 }, { "epoch": 0.9463081369887011, "grad_norm": 0.6602310414615371, "learning_rate": 1.1931873479318735e-06, "loss": 0.6029, "step": 32412 }, { "epoch": 0.9463373332165484, "grad_norm": 0.5826080184167394, "learning_rate": 1.1925385239253853e-06, "loss": 0.5155, "step": 32413 }, { "epoch": 0.9463665294443958, "grad_norm": 0.6615423964791625, "learning_rate": 1.1918896999188971e-06, "loss": 0.6105, "step": 32414 }, { "epoch": 0.9463957256722432, "grad_norm": 0.673201879934116, "learning_rate": 1.191240875912409e-06, "loss": 0.5801, "step": 32415 }, { "epoch": 0.9464249219000905, "grad_norm": 0.6459351585611536, "learning_rate": 1.1905920519059207e-06, "loss": 0.6228, "step": 32416 }, { "epoch": 0.9464541181279379, "grad_norm": 0.6388689634251165, "learning_rate": 1.1899432278994323e-06, "loss": 0.5475, "step": 32417 }, { "epoch": 0.9464833143557853, "grad_norm": 0.6438877572053797, "learning_rate": 1.1892944038929442e-06, "loss": 0.5908, "step": 32418 }, { "epoch": 0.9465125105836326, "grad_norm": 0.6686687489092527, "learning_rate": 1.188645579886456e-06, "loss": 0.6447, "step": 32419 }, { "epoch": 0.94654170681148, "grad_norm": 0.6768794217247204, "learning_rate": 1.1879967558799676e-06, "loss": 0.65, "step": 32420 }, { "epoch": 0.9465709030393273, "grad_norm": 0.6359402105506541, "learning_rate": 1.1873479318734794e-06, "loss": 0.5841, "step": 32421 }, { "epoch": 0.9466000992671747, "grad_norm": 0.6758351599133968, "learning_rate": 1.1866991078669912e-06, "loss": 0.5842, "step": 32422 }, { "epoch": 0.9466292954950221, "grad_norm": 0.6407498065557479, "learning_rate": 1.186050283860503e-06, "loss": 0.5853, "step": 32423 }, { "epoch": 0.9466584917228694, "grad_norm": 0.6610857875381682, "learning_rate": 1.1854014598540146e-06, "loss": 0.6104, "step": 32424 }, { "epoch": 0.9466876879507168, "grad_norm": 0.6955091039759856, "learning_rate": 1.1847526358475264e-06, "loss": 0.6784, "step": 32425 }, { "epoch": 0.9467168841785641, "grad_norm": 0.6685954236738149, "learning_rate": 1.1841038118410383e-06, "loss": 0.6285, "step": 32426 }, { "epoch": 0.9467460804064115, "grad_norm": 0.608314638652691, "learning_rate": 1.18345498783455e-06, "loss": 0.5449, "step": 32427 }, { "epoch": 0.9467752766342589, "grad_norm": 0.6185683921227098, "learning_rate": 1.1828061638280617e-06, "loss": 0.5952, "step": 32428 }, { "epoch": 0.9468044728621062, "grad_norm": 0.6540325098201294, "learning_rate": 1.1821573398215735e-06, "loss": 0.5889, "step": 32429 }, { "epoch": 0.9468336690899536, "grad_norm": 0.5763301178667606, "learning_rate": 1.181508515815085e-06, "loss": 0.4681, "step": 32430 }, { "epoch": 0.946862865317801, "grad_norm": 0.5921783217136726, "learning_rate": 1.180859691808597e-06, "loss": 0.517, "step": 32431 }, { "epoch": 0.9468920615456483, "grad_norm": 0.6677518565597055, "learning_rate": 1.1802108678021087e-06, "loss": 0.642, "step": 32432 }, { "epoch": 0.9469212577734957, "grad_norm": 0.6033850665996762, "learning_rate": 1.1795620437956205e-06, "loss": 0.5242, "step": 32433 }, { "epoch": 0.946950454001343, "grad_norm": 0.6122756940580986, "learning_rate": 1.1789132197891324e-06, "loss": 0.5356, "step": 32434 }, { "epoch": 0.9469796502291904, "grad_norm": 0.6383075197192435, "learning_rate": 1.1782643957826442e-06, "loss": 0.5903, "step": 32435 }, { "epoch": 0.9470088464570378, "grad_norm": 0.6442121737172078, "learning_rate": 1.1776155717761558e-06, "loss": 0.5937, "step": 32436 }, { "epoch": 0.9470380426848851, "grad_norm": 0.643431268277063, "learning_rate": 1.1769667477696676e-06, "loss": 0.585, "step": 32437 }, { "epoch": 0.9470672389127325, "grad_norm": 0.6596793373476338, "learning_rate": 1.1763179237631792e-06, "loss": 0.5405, "step": 32438 }, { "epoch": 0.9470964351405798, "grad_norm": 0.6244016127579061, "learning_rate": 1.175669099756691e-06, "loss": 0.5771, "step": 32439 }, { "epoch": 0.9471256313684272, "grad_norm": 0.6033715300122824, "learning_rate": 1.1750202757502028e-06, "loss": 0.5556, "step": 32440 }, { "epoch": 0.9471548275962746, "grad_norm": 0.5549069524746605, "learning_rate": 1.1743714517437146e-06, "loss": 0.4646, "step": 32441 }, { "epoch": 0.9471840238241219, "grad_norm": 0.67420988089303, "learning_rate": 1.1737226277372265e-06, "loss": 0.673, "step": 32442 }, { "epoch": 0.9472132200519693, "grad_norm": 0.6919676021552015, "learning_rate": 1.173073803730738e-06, "loss": 0.6146, "step": 32443 }, { "epoch": 0.9472424162798166, "grad_norm": 0.6151336927032973, "learning_rate": 1.1724249797242499e-06, "loss": 0.5472, "step": 32444 }, { "epoch": 0.947271612507664, "grad_norm": 0.6041330257821926, "learning_rate": 1.1717761557177617e-06, "loss": 0.5403, "step": 32445 }, { "epoch": 0.9473008087355114, "grad_norm": 0.6739777563626452, "learning_rate": 1.1711273317112735e-06, "loss": 0.6162, "step": 32446 }, { "epoch": 0.9473300049633587, "grad_norm": 0.6234720200726568, "learning_rate": 1.1704785077047851e-06, "loss": 0.5847, "step": 32447 }, { "epoch": 0.9473592011912061, "grad_norm": 0.6612963646485941, "learning_rate": 1.169829683698297e-06, "loss": 0.5933, "step": 32448 }, { "epoch": 0.9473883974190535, "grad_norm": 0.6725556489952532, "learning_rate": 1.1691808596918087e-06, "loss": 0.6698, "step": 32449 }, { "epoch": 0.9474175936469008, "grad_norm": 0.6398260846692656, "learning_rate": 1.1685320356853203e-06, "loss": 0.5474, "step": 32450 }, { "epoch": 0.9474467898747482, "grad_norm": 0.6538519704593537, "learning_rate": 1.1678832116788322e-06, "loss": 0.5627, "step": 32451 }, { "epoch": 0.9474759861025955, "grad_norm": 0.7061279041629618, "learning_rate": 1.167234387672344e-06, "loss": 0.621, "step": 32452 }, { "epoch": 0.9475051823304429, "grad_norm": 0.5957972588550382, "learning_rate": 1.1665855636658558e-06, "loss": 0.5461, "step": 32453 }, { "epoch": 0.9475343785582903, "grad_norm": 0.6555708399302934, "learning_rate": 1.1659367396593676e-06, "loss": 0.5563, "step": 32454 }, { "epoch": 0.9475635747861376, "grad_norm": 0.6514481147777699, "learning_rate": 1.1652879156528792e-06, "loss": 0.5806, "step": 32455 }, { "epoch": 0.947592771013985, "grad_norm": 0.6566679185694058, "learning_rate": 1.164639091646391e-06, "loss": 0.6195, "step": 32456 }, { "epoch": 0.9476219672418323, "grad_norm": 0.6662758460208994, "learning_rate": 1.1639902676399026e-06, "loss": 0.6287, "step": 32457 }, { "epoch": 0.9476511634696797, "grad_norm": 0.6236742244886555, "learning_rate": 1.1633414436334144e-06, "loss": 0.5418, "step": 32458 }, { "epoch": 0.9476803596975271, "grad_norm": 0.631998651115158, "learning_rate": 1.1626926196269263e-06, "loss": 0.5623, "step": 32459 }, { "epoch": 0.9477095559253744, "grad_norm": 0.6491845016638707, "learning_rate": 1.162043795620438e-06, "loss": 0.5634, "step": 32460 }, { "epoch": 0.9477387521532218, "grad_norm": 0.6630834687884265, "learning_rate": 1.1613949716139499e-06, "loss": 0.6481, "step": 32461 }, { "epoch": 0.9477679483810691, "grad_norm": 0.6799515434851189, "learning_rate": 1.1607461476074617e-06, "loss": 0.6203, "step": 32462 }, { "epoch": 0.9477971446089165, "grad_norm": 0.611202997639258, "learning_rate": 1.1600973236009733e-06, "loss": 0.5379, "step": 32463 }, { "epoch": 0.9478263408367639, "grad_norm": 0.6730957140444707, "learning_rate": 1.1594484995944851e-06, "loss": 0.6672, "step": 32464 }, { "epoch": 0.9478555370646112, "grad_norm": 0.6451975394899983, "learning_rate": 1.1587996755879967e-06, "loss": 0.6117, "step": 32465 }, { "epoch": 0.9478847332924586, "grad_norm": 0.6764395280690962, "learning_rate": 1.1581508515815085e-06, "loss": 0.6285, "step": 32466 }, { "epoch": 0.947913929520306, "grad_norm": 0.6269241339415093, "learning_rate": 1.1575020275750204e-06, "loss": 0.5679, "step": 32467 }, { "epoch": 0.9479431257481533, "grad_norm": 0.6357879306608453, "learning_rate": 1.1568532035685322e-06, "loss": 0.5538, "step": 32468 }, { "epoch": 0.9479723219760007, "grad_norm": 0.6372827389259589, "learning_rate": 1.156204379562044e-06, "loss": 0.5834, "step": 32469 }, { "epoch": 0.948001518203848, "grad_norm": 0.6523077370516038, "learning_rate": 1.1555555555555556e-06, "loss": 0.6153, "step": 32470 }, { "epoch": 0.9480307144316954, "grad_norm": 0.6478407380303213, "learning_rate": 1.1549067315490674e-06, "loss": 0.6249, "step": 32471 }, { "epoch": 0.9480599106595428, "grad_norm": 0.6426641608079291, "learning_rate": 1.1542579075425792e-06, "loss": 0.5565, "step": 32472 }, { "epoch": 0.9480891068873901, "grad_norm": 0.6052820846866175, "learning_rate": 1.153609083536091e-06, "loss": 0.5365, "step": 32473 }, { "epoch": 0.9481183031152375, "grad_norm": 0.6536865751131689, "learning_rate": 1.1529602595296026e-06, "loss": 0.6332, "step": 32474 }, { "epoch": 0.9481474993430848, "grad_norm": 0.7058858771700918, "learning_rate": 1.1523114355231145e-06, "loss": 0.6505, "step": 32475 }, { "epoch": 0.9481766955709322, "grad_norm": 0.6302544228903175, "learning_rate": 1.151662611516626e-06, "loss": 0.5679, "step": 32476 }, { "epoch": 0.9482058917987796, "grad_norm": 0.6633761071144995, "learning_rate": 1.1510137875101379e-06, "loss": 0.5921, "step": 32477 }, { "epoch": 0.9482350880266269, "grad_norm": 0.6280461877125731, "learning_rate": 1.1503649635036497e-06, "loss": 0.5406, "step": 32478 }, { "epoch": 0.9482642842544743, "grad_norm": 0.6638098276064989, "learning_rate": 1.1497161394971615e-06, "loss": 0.6095, "step": 32479 }, { "epoch": 0.9482934804823216, "grad_norm": 0.7255481160793109, "learning_rate": 1.1490673154906733e-06, "loss": 0.6591, "step": 32480 }, { "epoch": 0.948322676710169, "grad_norm": 0.6383045325781764, "learning_rate": 1.1484184914841851e-06, "loss": 0.6133, "step": 32481 }, { "epoch": 0.9483518729380164, "grad_norm": 0.656686275423725, "learning_rate": 1.1477696674776967e-06, "loss": 0.6198, "step": 32482 }, { "epoch": 0.9483810691658637, "grad_norm": 0.6740346494943882, "learning_rate": 1.1471208434712086e-06, "loss": 0.6048, "step": 32483 }, { "epoch": 0.9484102653937111, "grad_norm": 0.659423292898631, "learning_rate": 1.1464720194647202e-06, "loss": 0.6174, "step": 32484 }, { "epoch": 0.9484394616215585, "grad_norm": 0.7014845976913765, "learning_rate": 1.145823195458232e-06, "loss": 0.7273, "step": 32485 }, { "epoch": 0.9484686578494058, "grad_norm": 0.6113246842815473, "learning_rate": 1.1451743714517438e-06, "loss": 0.5328, "step": 32486 }, { "epoch": 0.9484978540772532, "grad_norm": 0.6359317510833489, "learning_rate": 1.1445255474452556e-06, "loss": 0.5528, "step": 32487 }, { "epoch": 0.9485270503051005, "grad_norm": 0.6915987163658492, "learning_rate": 1.1438767234387674e-06, "loss": 0.6258, "step": 32488 }, { "epoch": 0.9485562465329479, "grad_norm": 0.6592259857265435, "learning_rate": 1.143227899432279e-06, "loss": 0.6404, "step": 32489 }, { "epoch": 0.9485854427607953, "grad_norm": 0.6266701310851922, "learning_rate": 1.1425790754257908e-06, "loss": 0.5834, "step": 32490 }, { "epoch": 0.9486146389886426, "grad_norm": 0.639706964837039, "learning_rate": 1.1419302514193027e-06, "loss": 0.6052, "step": 32491 }, { "epoch": 0.94864383521649, "grad_norm": 0.6791855311615279, "learning_rate": 1.1412814274128143e-06, "loss": 0.6463, "step": 32492 }, { "epoch": 0.9486730314443373, "grad_norm": 0.629363487254741, "learning_rate": 1.140632603406326e-06, "loss": 0.5369, "step": 32493 }, { "epoch": 0.9487022276721847, "grad_norm": 0.5912739003837046, "learning_rate": 1.139983779399838e-06, "loss": 0.514, "step": 32494 }, { "epoch": 0.9487314239000321, "grad_norm": 0.6040277018322818, "learning_rate": 1.1393349553933497e-06, "loss": 0.5073, "step": 32495 }, { "epoch": 0.9487606201278794, "grad_norm": 0.6525285359781917, "learning_rate": 1.1386861313868613e-06, "loss": 0.6121, "step": 32496 }, { "epoch": 0.9487898163557268, "grad_norm": 0.6682221403792117, "learning_rate": 1.1380373073803731e-06, "loss": 0.5823, "step": 32497 }, { "epoch": 0.9488190125835742, "grad_norm": 0.665728089251023, "learning_rate": 1.137388483373885e-06, "loss": 0.6619, "step": 32498 }, { "epoch": 0.9488482088114215, "grad_norm": 0.6512916017941864, "learning_rate": 1.1367396593673968e-06, "loss": 0.6403, "step": 32499 }, { "epoch": 0.9488774050392689, "grad_norm": 0.6352151399471976, "learning_rate": 1.1360908353609086e-06, "loss": 0.5787, "step": 32500 }, { "epoch": 0.9489066012671162, "grad_norm": 0.6667605679629282, "learning_rate": 1.1354420113544202e-06, "loss": 0.5945, "step": 32501 }, { "epoch": 0.9489357974949636, "grad_norm": 0.632885465388448, "learning_rate": 1.134793187347932e-06, "loss": 0.6052, "step": 32502 }, { "epoch": 0.9489649937228111, "grad_norm": 0.6843248289355595, "learning_rate": 1.1341443633414436e-06, "loss": 0.6511, "step": 32503 }, { "epoch": 0.9489941899506584, "grad_norm": 0.5891644157657059, "learning_rate": 1.1334955393349554e-06, "loss": 0.5362, "step": 32504 }, { "epoch": 0.9490233861785058, "grad_norm": 0.651974750260481, "learning_rate": 1.1328467153284672e-06, "loss": 0.5881, "step": 32505 }, { "epoch": 0.9490525824063532, "grad_norm": 0.6593215671322591, "learning_rate": 1.132197891321979e-06, "loss": 0.5859, "step": 32506 }, { "epoch": 0.9490817786342005, "grad_norm": 0.6764541319838177, "learning_rate": 1.1315490673154909e-06, "loss": 0.6341, "step": 32507 }, { "epoch": 0.9491109748620479, "grad_norm": 0.6623195104718971, "learning_rate": 1.1309002433090027e-06, "loss": 0.623, "step": 32508 }, { "epoch": 0.9491401710898952, "grad_norm": 0.6210350980807621, "learning_rate": 1.1302514193025143e-06, "loss": 0.5259, "step": 32509 }, { "epoch": 0.9491693673177426, "grad_norm": 0.6228841863421547, "learning_rate": 1.129602595296026e-06, "loss": 0.5389, "step": 32510 }, { "epoch": 0.94919856354559, "grad_norm": 0.5745715996320931, "learning_rate": 1.1289537712895377e-06, "loss": 0.4778, "step": 32511 }, { "epoch": 0.9492277597734373, "grad_norm": 0.6543745365865248, "learning_rate": 1.1283049472830495e-06, "loss": 0.5994, "step": 32512 }, { "epoch": 0.9492569560012847, "grad_norm": 0.6356650969564374, "learning_rate": 1.1276561232765613e-06, "loss": 0.5809, "step": 32513 }, { "epoch": 0.949286152229132, "grad_norm": 0.6522902763029876, "learning_rate": 1.1270072992700731e-06, "loss": 0.6223, "step": 32514 }, { "epoch": 0.9493153484569794, "grad_norm": 0.596612515020503, "learning_rate": 1.126358475263585e-06, "loss": 0.4878, "step": 32515 }, { "epoch": 0.9493445446848268, "grad_norm": 0.6312724184266431, "learning_rate": 1.1257096512570966e-06, "loss": 0.5396, "step": 32516 }, { "epoch": 0.9493737409126741, "grad_norm": 0.6187142396111167, "learning_rate": 1.1250608272506084e-06, "loss": 0.5691, "step": 32517 }, { "epoch": 0.9494029371405215, "grad_norm": 0.6658058565988757, "learning_rate": 1.1244120032441202e-06, "loss": 0.6513, "step": 32518 }, { "epoch": 0.9494321333683688, "grad_norm": 0.7247410199318478, "learning_rate": 1.1237631792376318e-06, "loss": 0.7527, "step": 32519 }, { "epoch": 0.9494613295962162, "grad_norm": 0.5993730542847212, "learning_rate": 1.1231143552311436e-06, "loss": 0.5, "step": 32520 }, { "epoch": 0.9494905258240636, "grad_norm": 0.643261714482575, "learning_rate": 1.1224655312246554e-06, "loss": 0.5897, "step": 32521 }, { "epoch": 0.9495197220519109, "grad_norm": 0.7600129820908667, "learning_rate": 1.121816707218167e-06, "loss": 0.7139, "step": 32522 }, { "epoch": 0.9495489182797583, "grad_norm": 0.6646252574584949, "learning_rate": 1.1211678832116789e-06, "loss": 0.688, "step": 32523 }, { "epoch": 0.9495781145076057, "grad_norm": 0.6190916974666343, "learning_rate": 1.1205190592051907e-06, "loss": 0.573, "step": 32524 }, { "epoch": 0.949607310735453, "grad_norm": 0.6565226953695819, "learning_rate": 1.1198702351987025e-06, "loss": 0.627, "step": 32525 }, { "epoch": 0.9496365069633004, "grad_norm": 0.6322408505533873, "learning_rate": 1.1192214111922143e-06, "loss": 0.5846, "step": 32526 }, { "epoch": 0.9496657031911477, "grad_norm": 0.6437223407154321, "learning_rate": 1.118572587185726e-06, "loss": 0.5961, "step": 32527 }, { "epoch": 0.9496948994189951, "grad_norm": 0.6384803313842443, "learning_rate": 1.1179237631792377e-06, "loss": 0.5639, "step": 32528 }, { "epoch": 0.9497240956468425, "grad_norm": 0.653897872167771, "learning_rate": 1.1172749391727495e-06, "loss": 0.613, "step": 32529 }, { "epoch": 0.9497532918746898, "grad_norm": 0.6501915855996642, "learning_rate": 1.1166261151662611e-06, "loss": 0.5586, "step": 32530 }, { "epoch": 0.9497824881025372, "grad_norm": 0.6286937755215548, "learning_rate": 1.115977291159773e-06, "loss": 0.5697, "step": 32531 }, { "epoch": 0.9498116843303845, "grad_norm": 0.6479199205614038, "learning_rate": 1.1153284671532848e-06, "loss": 0.5892, "step": 32532 }, { "epoch": 0.9498408805582319, "grad_norm": 0.6609530567391078, "learning_rate": 1.1146796431467966e-06, "loss": 0.6631, "step": 32533 }, { "epoch": 0.9498700767860793, "grad_norm": 0.7052563822951163, "learning_rate": 1.1140308191403084e-06, "loss": 0.7076, "step": 32534 }, { "epoch": 0.9498992730139266, "grad_norm": 0.5977518389335881, "learning_rate": 1.11338199513382e-06, "loss": 0.5154, "step": 32535 }, { "epoch": 0.949928469241774, "grad_norm": 0.5995331817853008, "learning_rate": 1.1127331711273318e-06, "loss": 0.5165, "step": 32536 }, { "epoch": 0.9499576654696213, "grad_norm": 0.6298501458103755, "learning_rate": 1.1120843471208436e-06, "loss": 0.6124, "step": 32537 }, { "epoch": 0.9499868616974687, "grad_norm": 0.6770040177901102, "learning_rate": 1.1114355231143552e-06, "loss": 0.628, "step": 32538 }, { "epoch": 0.9500160579253161, "grad_norm": 0.6623981849152056, "learning_rate": 1.110786699107867e-06, "loss": 0.6309, "step": 32539 }, { "epoch": 0.9500452541531634, "grad_norm": 0.6398457767669214, "learning_rate": 1.1101378751013789e-06, "loss": 0.5891, "step": 32540 }, { "epoch": 0.9500744503810108, "grad_norm": 0.6417913749111142, "learning_rate": 1.1094890510948907e-06, "loss": 0.5891, "step": 32541 }, { "epoch": 0.9501036466088582, "grad_norm": 0.625657366232028, "learning_rate": 1.1088402270884023e-06, "loss": 0.5589, "step": 32542 }, { "epoch": 0.9501328428367055, "grad_norm": 0.6165495771526935, "learning_rate": 1.108191403081914e-06, "loss": 0.5247, "step": 32543 }, { "epoch": 0.9501620390645529, "grad_norm": 0.6297354749859739, "learning_rate": 1.107542579075426e-06, "loss": 0.5703, "step": 32544 }, { "epoch": 0.9501912352924002, "grad_norm": 0.641013644454486, "learning_rate": 1.1068937550689377e-06, "loss": 0.5543, "step": 32545 }, { "epoch": 0.9502204315202476, "grad_norm": 0.676344168006102, "learning_rate": 1.1062449310624493e-06, "loss": 0.6481, "step": 32546 }, { "epoch": 0.950249627748095, "grad_norm": 0.6501531179754118, "learning_rate": 1.1055961070559612e-06, "loss": 0.6236, "step": 32547 }, { "epoch": 0.9502788239759423, "grad_norm": 0.6174086924230029, "learning_rate": 1.1049472830494728e-06, "loss": 0.552, "step": 32548 }, { "epoch": 0.9503080202037897, "grad_norm": 0.6682051671715716, "learning_rate": 1.1042984590429846e-06, "loss": 0.6184, "step": 32549 }, { "epoch": 0.950337216431637, "grad_norm": 0.6566283121394001, "learning_rate": 1.1036496350364964e-06, "loss": 0.6271, "step": 32550 }, { "epoch": 0.9503664126594844, "grad_norm": 0.6373706907062979, "learning_rate": 1.1030008110300082e-06, "loss": 0.5709, "step": 32551 }, { "epoch": 0.9503956088873318, "grad_norm": 0.6581196030975007, "learning_rate": 1.10235198702352e-06, "loss": 0.6233, "step": 32552 }, { "epoch": 0.9504248051151791, "grad_norm": 0.6661020879085863, "learning_rate": 1.1017031630170318e-06, "loss": 0.5863, "step": 32553 }, { "epoch": 0.9504540013430265, "grad_norm": 0.6887655375708716, "learning_rate": 1.1010543390105434e-06, "loss": 0.695, "step": 32554 }, { "epoch": 0.9504831975708739, "grad_norm": 0.6619573174927672, "learning_rate": 1.1004055150040553e-06, "loss": 0.6261, "step": 32555 }, { "epoch": 0.9505123937987212, "grad_norm": 0.6585354216128034, "learning_rate": 1.099756690997567e-06, "loss": 0.5736, "step": 32556 }, { "epoch": 0.9505415900265686, "grad_norm": 0.6045110761698219, "learning_rate": 1.0991078669910787e-06, "loss": 0.5421, "step": 32557 }, { "epoch": 0.9505707862544159, "grad_norm": 0.6458578929456785, "learning_rate": 1.0984590429845905e-06, "loss": 0.6383, "step": 32558 }, { "epoch": 0.9505999824822633, "grad_norm": 0.6007588762712868, "learning_rate": 1.0978102189781023e-06, "loss": 0.5313, "step": 32559 }, { "epoch": 0.9506291787101107, "grad_norm": 0.6110636493367111, "learning_rate": 1.0971613949716141e-06, "loss": 0.5445, "step": 32560 }, { "epoch": 0.950658374937958, "grad_norm": 0.6328855436286925, "learning_rate": 1.096512570965126e-06, "loss": 0.5821, "step": 32561 }, { "epoch": 0.9506875711658054, "grad_norm": 0.6383464228346966, "learning_rate": 1.0958637469586375e-06, "loss": 0.5704, "step": 32562 }, { "epoch": 0.9507167673936527, "grad_norm": 0.6444874567489105, "learning_rate": 1.0952149229521494e-06, "loss": 0.5815, "step": 32563 }, { "epoch": 0.9507459636215001, "grad_norm": 0.6423040668081088, "learning_rate": 1.0945660989456612e-06, "loss": 0.598, "step": 32564 }, { "epoch": 0.9507751598493475, "grad_norm": 0.6351488185333948, "learning_rate": 1.0939172749391728e-06, "loss": 0.5863, "step": 32565 }, { "epoch": 0.9508043560771948, "grad_norm": 0.6575031940855058, "learning_rate": 1.0932684509326846e-06, "loss": 0.5896, "step": 32566 }, { "epoch": 0.9508335523050422, "grad_norm": 0.6970306261121167, "learning_rate": 1.0926196269261964e-06, "loss": 0.6861, "step": 32567 }, { "epoch": 0.9508627485328895, "grad_norm": 0.6308388350888547, "learning_rate": 1.091970802919708e-06, "loss": 0.5251, "step": 32568 }, { "epoch": 0.9508919447607369, "grad_norm": 0.6348363857838412, "learning_rate": 1.0913219789132198e-06, "loss": 0.6176, "step": 32569 }, { "epoch": 0.9509211409885843, "grad_norm": 0.6317633345973656, "learning_rate": 1.0906731549067316e-06, "loss": 0.5871, "step": 32570 }, { "epoch": 0.9509503372164316, "grad_norm": 0.6500756478603258, "learning_rate": 1.0900243309002435e-06, "loss": 0.5922, "step": 32571 }, { "epoch": 0.950979533444279, "grad_norm": 0.626292608197918, "learning_rate": 1.0893755068937553e-06, "loss": 0.5581, "step": 32572 }, { "epoch": 0.9510087296721264, "grad_norm": 0.6692945339931506, "learning_rate": 1.0887266828872669e-06, "loss": 0.6262, "step": 32573 }, { "epoch": 0.9510379258999737, "grad_norm": 0.6352923162416445, "learning_rate": 1.0880778588807787e-06, "loss": 0.5811, "step": 32574 }, { "epoch": 0.9510671221278211, "grad_norm": 0.6179345417913547, "learning_rate": 1.0874290348742903e-06, "loss": 0.5691, "step": 32575 }, { "epoch": 0.9510963183556684, "grad_norm": 0.6922512223300357, "learning_rate": 1.086780210867802e-06, "loss": 0.6322, "step": 32576 }, { "epoch": 0.9511255145835158, "grad_norm": 0.6290257317612347, "learning_rate": 1.086131386861314e-06, "loss": 0.5312, "step": 32577 }, { "epoch": 0.9511547108113632, "grad_norm": 0.6688371692676846, "learning_rate": 1.0854825628548257e-06, "loss": 0.6307, "step": 32578 }, { "epoch": 0.9511839070392105, "grad_norm": 0.6526402281194504, "learning_rate": 1.0848337388483376e-06, "loss": 0.5566, "step": 32579 }, { "epoch": 0.9512131032670579, "grad_norm": 0.6595262481369569, "learning_rate": 1.0841849148418494e-06, "loss": 0.5476, "step": 32580 }, { "epoch": 0.9512422994949052, "grad_norm": 0.5986225299201356, "learning_rate": 1.083536090835361e-06, "loss": 0.5235, "step": 32581 }, { "epoch": 0.9512714957227526, "grad_norm": 0.635328578539008, "learning_rate": 1.0828872668288728e-06, "loss": 0.5904, "step": 32582 }, { "epoch": 0.9513006919506, "grad_norm": 0.6517960544452788, "learning_rate": 1.0822384428223844e-06, "loss": 0.5922, "step": 32583 }, { "epoch": 0.9513298881784473, "grad_norm": 0.6984979110564838, "learning_rate": 1.0815896188158962e-06, "loss": 0.6035, "step": 32584 }, { "epoch": 0.9513590844062947, "grad_norm": 0.6595840621199525, "learning_rate": 1.080940794809408e-06, "loss": 0.6001, "step": 32585 }, { "epoch": 0.951388280634142, "grad_norm": 0.6677847065232486, "learning_rate": 1.0802919708029198e-06, "loss": 0.6287, "step": 32586 }, { "epoch": 0.9514174768619894, "grad_norm": 0.6357314169730612, "learning_rate": 1.0796431467964317e-06, "loss": 0.5305, "step": 32587 }, { "epoch": 0.9514466730898368, "grad_norm": 0.6499738094735737, "learning_rate": 1.0789943227899433e-06, "loss": 0.5949, "step": 32588 }, { "epoch": 0.9514758693176841, "grad_norm": 0.6682381219874065, "learning_rate": 1.078345498783455e-06, "loss": 0.6067, "step": 32589 }, { "epoch": 0.9515050655455315, "grad_norm": 0.6322328072843026, "learning_rate": 1.0776966747769669e-06, "loss": 0.5697, "step": 32590 }, { "epoch": 0.9515342617733789, "grad_norm": 0.6106891901118116, "learning_rate": 1.0770478507704787e-06, "loss": 0.548, "step": 32591 }, { "epoch": 0.9515634580012262, "grad_norm": 0.718514926347993, "learning_rate": 1.0763990267639903e-06, "loss": 0.6276, "step": 32592 }, { "epoch": 0.9515926542290736, "grad_norm": 0.654065458232861, "learning_rate": 1.0757502027575021e-06, "loss": 0.6156, "step": 32593 }, { "epoch": 0.9516218504569209, "grad_norm": 0.5834478469882268, "learning_rate": 1.0751013787510137e-06, "loss": 0.4988, "step": 32594 }, { "epoch": 0.9516510466847683, "grad_norm": 0.9001567834196561, "learning_rate": 1.0744525547445255e-06, "loss": 0.7081, "step": 32595 }, { "epoch": 0.9516802429126157, "grad_norm": 0.7035767565831206, "learning_rate": 1.0738037307380374e-06, "loss": 0.6429, "step": 32596 }, { "epoch": 0.951709439140463, "grad_norm": 0.6172692049591508, "learning_rate": 1.0731549067315492e-06, "loss": 0.518, "step": 32597 }, { "epoch": 0.9517386353683104, "grad_norm": 0.6377084004311009, "learning_rate": 1.072506082725061e-06, "loss": 0.6021, "step": 32598 }, { "epoch": 0.9517678315961577, "grad_norm": 0.6376665836723084, "learning_rate": 1.0718572587185728e-06, "loss": 0.5777, "step": 32599 }, { "epoch": 0.9517970278240051, "grad_norm": 0.6907008307476741, "learning_rate": 1.0712084347120844e-06, "loss": 0.6151, "step": 32600 }, { "epoch": 0.9518262240518525, "grad_norm": 0.6922179724237889, "learning_rate": 1.0705596107055962e-06, "loss": 0.5845, "step": 32601 }, { "epoch": 0.9518554202796998, "grad_norm": 0.6191506073249754, "learning_rate": 1.0699107866991078e-06, "loss": 0.5364, "step": 32602 }, { "epoch": 0.9518846165075472, "grad_norm": 0.6991957027106507, "learning_rate": 1.0692619626926196e-06, "loss": 0.6912, "step": 32603 }, { "epoch": 0.9519138127353945, "grad_norm": 0.6758613182271214, "learning_rate": 1.0686131386861315e-06, "loss": 0.6006, "step": 32604 }, { "epoch": 0.9519430089632419, "grad_norm": 0.5717122292581137, "learning_rate": 1.0679643146796433e-06, "loss": 0.4921, "step": 32605 }, { "epoch": 0.9519722051910893, "grad_norm": 0.6852027460069079, "learning_rate": 1.067315490673155e-06, "loss": 0.6954, "step": 32606 }, { "epoch": 0.9520014014189366, "grad_norm": 0.6202800726941451, "learning_rate": 1.066666666666667e-06, "loss": 0.5582, "step": 32607 }, { "epoch": 0.952030597646784, "grad_norm": 0.6335507465683806, "learning_rate": 1.0660178426601785e-06, "loss": 0.5185, "step": 32608 }, { "epoch": 0.9520597938746314, "grad_norm": 0.6297465041255614, "learning_rate": 1.0653690186536903e-06, "loss": 0.5537, "step": 32609 }, { "epoch": 0.9520889901024787, "grad_norm": 0.6659502409131328, "learning_rate": 1.064720194647202e-06, "loss": 0.6132, "step": 32610 }, { "epoch": 0.9521181863303261, "grad_norm": 0.7000913755754128, "learning_rate": 1.0640713706407137e-06, "loss": 0.6565, "step": 32611 }, { "epoch": 0.9521473825581734, "grad_norm": 0.7172355098582432, "learning_rate": 1.0634225466342256e-06, "loss": 0.663, "step": 32612 }, { "epoch": 0.9521765787860208, "grad_norm": 0.6066591448776505, "learning_rate": 1.0627737226277374e-06, "loss": 0.5481, "step": 32613 }, { "epoch": 0.9522057750138682, "grad_norm": 0.6984534329321597, "learning_rate": 1.062124898621249e-06, "loss": 0.6991, "step": 32614 }, { "epoch": 0.9522349712417155, "grad_norm": 0.5582038393719635, "learning_rate": 1.0614760746147608e-06, "loss": 0.4719, "step": 32615 }, { "epoch": 0.9522641674695629, "grad_norm": 0.6749135164215578, "learning_rate": 1.0608272506082726e-06, "loss": 0.625, "step": 32616 }, { "epoch": 0.9522933636974102, "grad_norm": 0.6305723977117113, "learning_rate": 1.0601784266017844e-06, "loss": 0.5951, "step": 32617 }, { "epoch": 0.9523225599252576, "grad_norm": 0.6203506569524265, "learning_rate": 1.0595296025952962e-06, "loss": 0.5974, "step": 32618 }, { "epoch": 0.952351756153105, "grad_norm": 0.6472826696324379, "learning_rate": 1.0588807785888078e-06, "loss": 0.6077, "step": 32619 }, { "epoch": 0.9523809523809523, "grad_norm": 0.6059113339611519, "learning_rate": 1.0582319545823197e-06, "loss": 0.5532, "step": 32620 }, { "epoch": 0.9524101486087997, "grad_norm": 0.6259937544939996, "learning_rate": 1.0575831305758313e-06, "loss": 0.5797, "step": 32621 }, { "epoch": 0.952439344836647, "grad_norm": 0.7049346455548147, "learning_rate": 1.056934306569343e-06, "loss": 0.6496, "step": 32622 }, { "epoch": 0.9524685410644945, "grad_norm": 0.7352315477013337, "learning_rate": 1.0562854825628549e-06, "loss": 0.7092, "step": 32623 }, { "epoch": 0.9524977372923419, "grad_norm": 0.5991625127959184, "learning_rate": 1.0556366585563667e-06, "loss": 0.5119, "step": 32624 }, { "epoch": 0.9525269335201892, "grad_norm": 0.6652629498965201, "learning_rate": 1.0549878345498785e-06, "loss": 0.5871, "step": 32625 }, { "epoch": 0.9525561297480366, "grad_norm": 0.6183247837677858, "learning_rate": 1.0543390105433903e-06, "loss": 0.5056, "step": 32626 }, { "epoch": 0.952585325975884, "grad_norm": 0.7018569720426863, "learning_rate": 1.053690186536902e-06, "loss": 0.6137, "step": 32627 }, { "epoch": 0.9526145222037313, "grad_norm": 0.619011421075468, "learning_rate": 1.0530413625304138e-06, "loss": 0.5672, "step": 32628 }, { "epoch": 0.9526437184315787, "grad_norm": 0.6600535746109341, "learning_rate": 1.0523925385239254e-06, "loss": 0.5956, "step": 32629 }, { "epoch": 0.952672914659426, "grad_norm": 0.6631200338722529, "learning_rate": 1.0517437145174372e-06, "loss": 0.6031, "step": 32630 }, { "epoch": 0.9527021108872734, "grad_norm": 0.6726645644170184, "learning_rate": 1.051094890510949e-06, "loss": 0.5825, "step": 32631 }, { "epoch": 0.9527313071151208, "grad_norm": 0.6398578013482006, "learning_rate": 1.0504460665044608e-06, "loss": 0.6074, "step": 32632 }, { "epoch": 0.9527605033429681, "grad_norm": 0.615069217089255, "learning_rate": 1.0497972424979726e-06, "loss": 0.589, "step": 32633 }, { "epoch": 0.9527896995708155, "grad_norm": 0.6776091029118142, "learning_rate": 1.0491484184914842e-06, "loss": 0.6447, "step": 32634 }, { "epoch": 0.9528188957986629, "grad_norm": 0.641537922580583, "learning_rate": 1.048499594484996e-06, "loss": 0.5987, "step": 32635 }, { "epoch": 0.9528480920265102, "grad_norm": 0.6163017968711036, "learning_rate": 1.0478507704785079e-06, "loss": 0.5224, "step": 32636 }, { "epoch": 0.9528772882543576, "grad_norm": 0.6118061181560579, "learning_rate": 1.0472019464720195e-06, "loss": 0.5447, "step": 32637 }, { "epoch": 0.9529064844822049, "grad_norm": 0.6284285915465242, "learning_rate": 1.0465531224655313e-06, "loss": 0.5696, "step": 32638 }, { "epoch": 0.9529356807100523, "grad_norm": 0.6588268717363867, "learning_rate": 1.045904298459043e-06, "loss": 0.6416, "step": 32639 }, { "epoch": 0.9529648769378997, "grad_norm": 0.6484405194321751, "learning_rate": 1.0452554744525547e-06, "loss": 0.6255, "step": 32640 }, { "epoch": 0.952994073165747, "grad_norm": 0.6587370744292197, "learning_rate": 1.0446066504460665e-06, "loss": 0.662, "step": 32641 }, { "epoch": 0.9530232693935944, "grad_norm": 0.6840723575289333, "learning_rate": 1.0439578264395783e-06, "loss": 0.6847, "step": 32642 }, { "epoch": 0.9530524656214417, "grad_norm": 0.6609220175724619, "learning_rate": 1.0433090024330901e-06, "loss": 0.6238, "step": 32643 }, { "epoch": 0.9530816618492891, "grad_norm": 0.6302718200075947, "learning_rate": 1.042660178426602e-06, "loss": 0.5583, "step": 32644 }, { "epoch": 0.9531108580771365, "grad_norm": 0.6477632842112501, "learning_rate": 1.0420113544201138e-06, "loss": 0.6256, "step": 32645 }, { "epoch": 0.9531400543049838, "grad_norm": 0.6392287588703497, "learning_rate": 1.0413625304136254e-06, "loss": 0.5918, "step": 32646 }, { "epoch": 0.9531692505328312, "grad_norm": 0.5891968107002092, "learning_rate": 1.0407137064071372e-06, "loss": 0.4859, "step": 32647 }, { "epoch": 0.9531984467606786, "grad_norm": 0.6671486739553159, "learning_rate": 1.0400648824006488e-06, "loss": 0.6104, "step": 32648 }, { "epoch": 0.9532276429885259, "grad_norm": 0.6630525162192479, "learning_rate": 1.0394160583941606e-06, "loss": 0.61, "step": 32649 }, { "epoch": 0.9532568392163733, "grad_norm": 0.6378008170823597, "learning_rate": 1.0387672343876724e-06, "loss": 0.5817, "step": 32650 }, { "epoch": 0.9532860354442206, "grad_norm": 0.6390029702454045, "learning_rate": 1.0381184103811842e-06, "loss": 0.6013, "step": 32651 }, { "epoch": 0.953315231672068, "grad_norm": 0.6414793291327414, "learning_rate": 1.037469586374696e-06, "loss": 0.6003, "step": 32652 }, { "epoch": 0.9533444278999154, "grad_norm": 0.600846998421975, "learning_rate": 1.0368207623682077e-06, "loss": 0.5088, "step": 32653 }, { "epoch": 0.9533736241277627, "grad_norm": 0.615356141197148, "learning_rate": 1.0361719383617195e-06, "loss": 0.5419, "step": 32654 }, { "epoch": 0.9534028203556101, "grad_norm": 0.6409309082270959, "learning_rate": 1.0355231143552313e-06, "loss": 0.6194, "step": 32655 }, { "epoch": 0.9534320165834574, "grad_norm": 0.6365173147983089, "learning_rate": 1.0348742903487429e-06, "loss": 0.5823, "step": 32656 }, { "epoch": 0.9534612128113048, "grad_norm": 0.6198167806522775, "learning_rate": 1.0342254663422547e-06, "loss": 0.5539, "step": 32657 }, { "epoch": 0.9534904090391522, "grad_norm": 0.6812969576449913, "learning_rate": 1.0335766423357665e-06, "loss": 0.655, "step": 32658 }, { "epoch": 0.9535196052669995, "grad_norm": 0.6241466016828056, "learning_rate": 1.0329278183292783e-06, "loss": 0.5971, "step": 32659 }, { "epoch": 0.9535488014948469, "grad_norm": 0.6887207946385997, "learning_rate": 1.03227899432279e-06, "loss": 0.7152, "step": 32660 }, { "epoch": 0.9535779977226942, "grad_norm": 0.6329452658397453, "learning_rate": 1.0316301703163018e-06, "loss": 0.5697, "step": 32661 }, { "epoch": 0.9536071939505416, "grad_norm": 0.6260986145357442, "learning_rate": 1.0309813463098136e-06, "loss": 0.5696, "step": 32662 }, { "epoch": 0.953636390178389, "grad_norm": 0.6053026619125323, "learning_rate": 1.0303325223033254e-06, "loss": 0.5406, "step": 32663 }, { "epoch": 0.9536655864062363, "grad_norm": 0.6056698544794419, "learning_rate": 1.029683698296837e-06, "loss": 0.5599, "step": 32664 }, { "epoch": 0.9536947826340837, "grad_norm": 0.6108518448368642, "learning_rate": 1.0290348742903488e-06, "loss": 0.5191, "step": 32665 }, { "epoch": 0.953723978861931, "grad_norm": 0.655020565609728, "learning_rate": 1.0283860502838606e-06, "loss": 0.6069, "step": 32666 }, { "epoch": 0.9537531750897784, "grad_norm": 0.6104609518496882, "learning_rate": 1.0277372262773722e-06, "loss": 0.5331, "step": 32667 }, { "epoch": 0.9537823713176258, "grad_norm": 0.7309399087377618, "learning_rate": 1.027088402270884e-06, "loss": 0.5974, "step": 32668 }, { "epoch": 0.9538115675454731, "grad_norm": 0.6297608000926773, "learning_rate": 1.0264395782643959e-06, "loss": 0.5258, "step": 32669 }, { "epoch": 0.9538407637733205, "grad_norm": 0.6309192078117564, "learning_rate": 1.0257907542579077e-06, "loss": 0.5872, "step": 32670 }, { "epoch": 0.9538699600011679, "grad_norm": 0.6620612151834571, "learning_rate": 1.0251419302514195e-06, "loss": 0.6569, "step": 32671 }, { "epoch": 0.9538991562290152, "grad_norm": 0.6896385584190102, "learning_rate": 1.0244931062449313e-06, "loss": 0.6324, "step": 32672 }, { "epoch": 0.9539283524568626, "grad_norm": 0.655412542966501, "learning_rate": 1.023844282238443e-06, "loss": 0.6388, "step": 32673 }, { "epoch": 0.9539575486847099, "grad_norm": 0.6525246508562877, "learning_rate": 1.0231954582319547e-06, "loss": 0.5516, "step": 32674 }, { "epoch": 0.9539867449125573, "grad_norm": 0.6677949929261544, "learning_rate": 1.0225466342254663e-06, "loss": 0.6313, "step": 32675 }, { "epoch": 0.9540159411404047, "grad_norm": 0.6778011113750846, "learning_rate": 1.0218978102189781e-06, "loss": 0.6356, "step": 32676 }, { "epoch": 0.954045137368252, "grad_norm": 0.6447678373967828, "learning_rate": 1.02124898621249e-06, "loss": 0.5649, "step": 32677 }, { "epoch": 0.9540743335960994, "grad_norm": 0.6629180634351433, "learning_rate": 1.0206001622060018e-06, "loss": 0.6466, "step": 32678 }, { "epoch": 0.9541035298239467, "grad_norm": 0.6228205012424343, "learning_rate": 1.0199513381995136e-06, "loss": 0.5597, "step": 32679 }, { "epoch": 0.9541327260517941, "grad_norm": 0.6787351337109738, "learning_rate": 1.0193025141930252e-06, "loss": 0.6507, "step": 32680 }, { "epoch": 0.9541619222796415, "grad_norm": 0.6560793177082499, "learning_rate": 1.018653690186537e-06, "loss": 0.6569, "step": 32681 }, { "epoch": 0.9541911185074888, "grad_norm": 0.6047401406020123, "learning_rate": 1.0180048661800488e-06, "loss": 0.5017, "step": 32682 }, { "epoch": 0.9542203147353362, "grad_norm": 0.6549849154825917, "learning_rate": 1.0173560421735604e-06, "loss": 0.6181, "step": 32683 }, { "epoch": 0.9542495109631836, "grad_norm": 0.6608081045768738, "learning_rate": 1.0167072181670722e-06, "loss": 0.6122, "step": 32684 }, { "epoch": 0.9542787071910309, "grad_norm": 0.6310442791165567, "learning_rate": 1.016058394160584e-06, "loss": 0.5648, "step": 32685 }, { "epoch": 0.9543079034188783, "grad_norm": 0.7082792634214226, "learning_rate": 1.0154095701540957e-06, "loss": 0.6618, "step": 32686 }, { "epoch": 0.9543370996467256, "grad_norm": 0.6579660439054056, "learning_rate": 1.0147607461476075e-06, "loss": 0.6353, "step": 32687 }, { "epoch": 0.954366295874573, "grad_norm": 0.5978845630919108, "learning_rate": 1.0141119221411193e-06, "loss": 0.5292, "step": 32688 }, { "epoch": 0.9543954921024204, "grad_norm": 0.6562498616993496, "learning_rate": 1.0134630981346311e-06, "loss": 0.566, "step": 32689 }, { "epoch": 0.9544246883302677, "grad_norm": 0.5731897566841325, "learning_rate": 1.012814274128143e-06, "loss": 0.5112, "step": 32690 }, { "epoch": 0.9544538845581151, "grad_norm": 0.6646558201158503, "learning_rate": 1.0121654501216545e-06, "loss": 0.6363, "step": 32691 }, { "epoch": 0.9544830807859624, "grad_norm": 0.6342766419290007, "learning_rate": 1.0115166261151663e-06, "loss": 0.5051, "step": 32692 }, { "epoch": 0.9545122770138098, "grad_norm": 0.6405108619470716, "learning_rate": 1.010867802108678e-06, "loss": 0.5535, "step": 32693 }, { "epoch": 0.9545414732416572, "grad_norm": 0.6307790277325457, "learning_rate": 1.0102189781021898e-06, "loss": 0.5808, "step": 32694 }, { "epoch": 0.9545706694695045, "grad_norm": 0.626657590168994, "learning_rate": 1.0095701540957016e-06, "loss": 0.5477, "step": 32695 }, { "epoch": 0.9545998656973519, "grad_norm": 0.6389587599755767, "learning_rate": 1.0089213300892134e-06, "loss": 0.5634, "step": 32696 }, { "epoch": 0.9546290619251993, "grad_norm": 0.6605500419898338, "learning_rate": 1.0082725060827252e-06, "loss": 0.6287, "step": 32697 }, { "epoch": 0.9546582581530466, "grad_norm": 0.6284966087952912, "learning_rate": 1.007623682076237e-06, "loss": 0.5907, "step": 32698 }, { "epoch": 0.954687454380894, "grad_norm": 0.6056982622170048, "learning_rate": 1.0069748580697486e-06, "loss": 0.5147, "step": 32699 }, { "epoch": 0.9547166506087413, "grad_norm": 0.6895404025875421, "learning_rate": 1.0063260340632604e-06, "loss": 0.6991, "step": 32700 }, { "epoch": 0.9547458468365887, "grad_norm": 0.5925430427236618, "learning_rate": 1.0056772100567723e-06, "loss": 0.5051, "step": 32701 }, { "epoch": 0.9547750430644361, "grad_norm": 0.6744884732044624, "learning_rate": 1.0050283860502839e-06, "loss": 0.6484, "step": 32702 }, { "epoch": 0.9548042392922834, "grad_norm": 0.652909155674193, "learning_rate": 1.0043795620437957e-06, "loss": 0.6101, "step": 32703 }, { "epoch": 0.9548334355201308, "grad_norm": 0.6654165524437649, "learning_rate": 1.0037307380373075e-06, "loss": 0.5915, "step": 32704 }, { "epoch": 0.9548626317479781, "grad_norm": 0.6518773649814853, "learning_rate": 1.0030819140308193e-06, "loss": 0.6025, "step": 32705 }, { "epoch": 0.9548918279758255, "grad_norm": 0.7104426628754835, "learning_rate": 1.002433090024331e-06, "loss": 0.7017, "step": 32706 }, { "epoch": 0.9549210242036729, "grad_norm": 0.6464665988274357, "learning_rate": 1.0017842660178427e-06, "loss": 0.5968, "step": 32707 }, { "epoch": 0.9549502204315202, "grad_norm": 0.6240013561333019, "learning_rate": 1.0011354420113545e-06, "loss": 0.5401, "step": 32708 }, { "epoch": 0.9549794166593676, "grad_norm": 0.5963965544875053, "learning_rate": 1.0004866180048664e-06, "loss": 0.502, "step": 32709 }, { "epoch": 0.955008612887215, "grad_norm": 0.6366250088312186, "learning_rate": 9.99837793998378e-07, "loss": 0.5731, "step": 32710 }, { "epoch": 0.9550378091150623, "grad_norm": 0.601968498778837, "learning_rate": 9.991889699918898e-07, "loss": 0.5328, "step": 32711 }, { "epoch": 0.9550670053429097, "grad_norm": 0.6257556924486514, "learning_rate": 9.985401459854016e-07, "loss": 0.5354, "step": 32712 }, { "epoch": 0.955096201570757, "grad_norm": 0.6178602062286396, "learning_rate": 9.978913219789132e-07, "loss": 0.5682, "step": 32713 }, { "epoch": 0.9551253977986044, "grad_norm": 0.6164267456176086, "learning_rate": 9.97242497972425e-07, "loss": 0.5841, "step": 32714 }, { "epoch": 0.9551545940264518, "grad_norm": 0.6902229352954327, "learning_rate": 9.965936739659368e-07, "loss": 0.6947, "step": 32715 }, { "epoch": 0.9551837902542991, "grad_norm": 0.659818497607347, "learning_rate": 9.959448499594486e-07, "loss": 0.6184, "step": 32716 }, { "epoch": 0.9552129864821465, "grad_norm": 0.7929590933489515, "learning_rate": 9.952960259529605e-07, "loss": 0.6215, "step": 32717 }, { "epoch": 0.9552421827099938, "grad_norm": 0.6489966534086647, "learning_rate": 9.94647201946472e-07, "loss": 0.6092, "step": 32718 }, { "epoch": 0.9552713789378412, "grad_norm": 0.6547830747900778, "learning_rate": 9.939983779399839e-07, "loss": 0.5809, "step": 32719 }, { "epoch": 0.9553005751656886, "grad_norm": 0.7053522435800058, "learning_rate": 9.933495539334955e-07, "loss": 0.6319, "step": 32720 }, { "epoch": 0.9553297713935359, "grad_norm": 0.6850299144013093, "learning_rate": 9.927007299270073e-07, "loss": 0.6297, "step": 32721 }, { "epoch": 0.9553589676213833, "grad_norm": 0.643980069291524, "learning_rate": 9.920519059205191e-07, "loss": 0.5673, "step": 32722 }, { "epoch": 0.9553881638492306, "grad_norm": 0.6507038491259205, "learning_rate": 9.91403081914031e-07, "loss": 0.5758, "step": 32723 }, { "epoch": 0.955417360077078, "grad_norm": 0.6314981913400797, "learning_rate": 9.907542579075427e-07, "loss": 0.5802, "step": 32724 }, { "epoch": 0.9554465563049254, "grad_norm": 0.6282722500907869, "learning_rate": 9.901054339010546e-07, "loss": 0.542, "step": 32725 }, { "epoch": 0.9554757525327727, "grad_norm": 0.6651128202293255, "learning_rate": 9.894566098945662e-07, "loss": 0.5716, "step": 32726 }, { "epoch": 0.9555049487606201, "grad_norm": 0.6176452766901017, "learning_rate": 9.88807785888078e-07, "loss": 0.5129, "step": 32727 }, { "epoch": 0.9555341449884674, "grad_norm": 0.6770218193129628, "learning_rate": 9.881589618815898e-07, "loss": 0.6553, "step": 32728 }, { "epoch": 0.9555633412163148, "grad_norm": 0.6317292523928854, "learning_rate": 9.875101378751014e-07, "loss": 0.5549, "step": 32729 }, { "epoch": 0.9555925374441622, "grad_norm": 0.6496907375702897, "learning_rate": 9.868613138686132e-07, "loss": 0.6228, "step": 32730 }, { "epoch": 0.9556217336720095, "grad_norm": 0.6477907040448051, "learning_rate": 9.86212489862125e-07, "loss": 0.592, "step": 32731 }, { "epoch": 0.9556509298998569, "grad_norm": 0.6375957677904237, "learning_rate": 9.855636658556366e-07, "loss": 0.5328, "step": 32732 }, { "epoch": 0.9556801261277043, "grad_norm": 0.6780997956770922, "learning_rate": 9.849148418491484e-07, "loss": 0.6125, "step": 32733 }, { "epoch": 0.9557093223555516, "grad_norm": 0.6017079176728819, "learning_rate": 9.842660178426603e-07, "loss": 0.5433, "step": 32734 }, { "epoch": 0.955738518583399, "grad_norm": 0.6180020820014065, "learning_rate": 9.83617193836172e-07, "loss": 0.5267, "step": 32735 }, { "epoch": 0.9557677148112463, "grad_norm": 0.6253736553513245, "learning_rate": 9.829683698296839e-07, "loss": 0.5744, "step": 32736 }, { "epoch": 0.9557969110390937, "grad_norm": 0.6686087786635856, "learning_rate": 9.823195458231955e-07, "loss": 0.6412, "step": 32737 }, { "epoch": 0.9558261072669411, "grad_norm": 0.6396761830655758, "learning_rate": 9.816707218167073e-07, "loss": 0.5454, "step": 32738 }, { "epoch": 0.9558553034947884, "grad_norm": 0.6563853556311021, "learning_rate": 9.81021897810219e-07, "loss": 0.6017, "step": 32739 }, { "epoch": 0.9558844997226358, "grad_norm": 0.660978679337511, "learning_rate": 9.803730738037307e-07, "loss": 0.5883, "step": 32740 }, { "epoch": 0.9559136959504831, "grad_norm": 0.6697988076150279, "learning_rate": 9.797242497972425e-07, "loss": 0.6134, "step": 32741 }, { "epoch": 0.9559428921783305, "grad_norm": 0.6172210578181649, "learning_rate": 9.790754257907544e-07, "loss": 0.5508, "step": 32742 }, { "epoch": 0.9559720884061779, "grad_norm": 0.6219163652124639, "learning_rate": 9.784266017842662e-07, "loss": 0.5407, "step": 32743 }, { "epoch": 0.9560012846340253, "grad_norm": 0.6347937241683445, "learning_rate": 9.77777777777778e-07, "loss": 0.5784, "step": 32744 }, { "epoch": 0.9560304808618727, "grad_norm": 0.6707845664772661, "learning_rate": 9.771289537712896e-07, "loss": 0.618, "step": 32745 }, { "epoch": 0.9560596770897201, "grad_norm": 0.6896304653328841, "learning_rate": 9.764801297648014e-07, "loss": 0.6413, "step": 32746 }, { "epoch": 0.9560888733175674, "grad_norm": 0.6468570254165019, "learning_rate": 9.75831305758313e-07, "loss": 0.599, "step": 32747 }, { "epoch": 0.9561180695454148, "grad_norm": 0.6190075818799681, "learning_rate": 9.751824817518248e-07, "loss": 0.5806, "step": 32748 }, { "epoch": 0.9561472657732621, "grad_norm": 0.7024900802295878, "learning_rate": 9.745336577453366e-07, "loss": 0.6589, "step": 32749 }, { "epoch": 0.9561764620011095, "grad_norm": 0.6827173859173542, "learning_rate": 9.738848337388485e-07, "loss": 0.5925, "step": 32750 }, { "epoch": 0.9562056582289569, "grad_norm": 0.6572127799249046, "learning_rate": 9.732360097323603e-07, "loss": 0.5048, "step": 32751 }, { "epoch": 0.9562348544568042, "grad_norm": 0.6528959554253374, "learning_rate": 9.725871857258719e-07, "loss": 0.6254, "step": 32752 }, { "epoch": 0.9562640506846516, "grad_norm": 0.6777464506832719, "learning_rate": 9.719383617193837e-07, "loss": 0.6464, "step": 32753 }, { "epoch": 0.956293246912499, "grad_norm": 0.6642164624547724, "learning_rate": 9.712895377128955e-07, "loss": 0.6391, "step": 32754 }, { "epoch": 0.9563224431403463, "grad_norm": 0.6577019017210898, "learning_rate": 9.706407137064073e-07, "loss": 0.6269, "step": 32755 }, { "epoch": 0.9563516393681937, "grad_norm": 0.6958652209659066, "learning_rate": 9.69991889699919e-07, "loss": 0.6822, "step": 32756 }, { "epoch": 0.956380835596041, "grad_norm": 0.6604412037316135, "learning_rate": 9.693430656934307e-07, "loss": 0.6122, "step": 32757 }, { "epoch": 0.9564100318238884, "grad_norm": 0.6373291729488394, "learning_rate": 9.686942416869423e-07, "loss": 0.6058, "step": 32758 }, { "epoch": 0.9564392280517358, "grad_norm": 0.7115420326847655, "learning_rate": 9.680454176804542e-07, "loss": 0.7431, "step": 32759 }, { "epoch": 0.9564684242795831, "grad_norm": 0.60325543454635, "learning_rate": 9.67396593673966e-07, "loss": 0.5081, "step": 32760 }, { "epoch": 0.9564976205074305, "grad_norm": 0.7300836917781135, "learning_rate": 9.667477696674778e-07, "loss": 0.75, "step": 32761 }, { "epoch": 0.9565268167352778, "grad_norm": 0.5729036220306126, "learning_rate": 9.660989456609896e-07, "loss": 0.4846, "step": 32762 }, { "epoch": 0.9565560129631252, "grad_norm": 0.6732888011094867, "learning_rate": 9.654501216545014e-07, "loss": 0.6014, "step": 32763 }, { "epoch": 0.9565852091909726, "grad_norm": 0.6560121726639523, "learning_rate": 9.64801297648013e-07, "loss": 0.6296, "step": 32764 }, { "epoch": 0.9566144054188199, "grad_norm": 0.6939364212682914, "learning_rate": 9.641524736415248e-07, "loss": 0.6135, "step": 32765 }, { "epoch": 0.9566436016466673, "grad_norm": 0.654639955109274, "learning_rate": 9.635036496350364e-07, "loss": 0.5819, "step": 32766 }, { "epoch": 0.9566727978745146, "grad_norm": 0.6242358193601747, "learning_rate": 9.628548256285483e-07, "loss": 0.59, "step": 32767 }, { "epoch": 0.956701994102362, "grad_norm": 0.6366119872637036, "learning_rate": 9.6220600162206e-07, "loss": 0.56, "step": 32768 }, { "epoch": 0.9567311903302094, "grad_norm": 0.6742064953608599, "learning_rate": 9.61557177615572e-07, "loss": 0.6612, "step": 32769 }, { "epoch": 0.9567603865580567, "grad_norm": 0.6709394991839529, "learning_rate": 9.609083536090837e-07, "loss": 0.6322, "step": 32770 }, { "epoch": 0.9567895827859041, "grad_norm": 0.6435778842135048, "learning_rate": 9.602595296025955e-07, "loss": 0.6041, "step": 32771 }, { "epoch": 0.9568187790137515, "grad_norm": 0.6305705184209954, "learning_rate": 9.596107055961071e-07, "loss": 0.5544, "step": 32772 }, { "epoch": 0.9568479752415988, "grad_norm": 0.6025256285259487, "learning_rate": 9.58961881589619e-07, "loss": 0.5185, "step": 32773 }, { "epoch": 0.9568771714694462, "grad_norm": 0.6267247564715569, "learning_rate": 9.583130575831305e-07, "loss": 0.5728, "step": 32774 }, { "epoch": 0.9569063676972935, "grad_norm": 0.6977746988855051, "learning_rate": 9.576642335766424e-07, "loss": 0.6735, "step": 32775 }, { "epoch": 0.9569355639251409, "grad_norm": 0.6692156673314551, "learning_rate": 9.570154095701542e-07, "loss": 0.6427, "step": 32776 }, { "epoch": 0.9569647601529883, "grad_norm": 0.5901147180627072, "learning_rate": 9.56366585563666e-07, "loss": 0.5278, "step": 32777 }, { "epoch": 0.9569939563808356, "grad_norm": 0.6442171423394888, "learning_rate": 9.557177615571776e-07, "loss": 0.607, "step": 32778 }, { "epoch": 0.957023152608683, "grad_norm": 0.6157417668703393, "learning_rate": 9.550689375506894e-07, "loss": 0.5522, "step": 32779 }, { "epoch": 0.9570523488365303, "grad_norm": 0.6578697446928587, "learning_rate": 9.544201135442012e-07, "loss": 0.5962, "step": 32780 }, { "epoch": 0.9570815450643777, "grad_norm": 0.670823206327889, "learning_rate": 9.53771289537713e-07, "loss": 0.6382, "step": 32781 }, { "epoch": 0.9571107412922251, "grad_norm": 0.6459848998603974, "learning_rate": 9.531224655312248e-07, "loss": 0.5635, "step": 32782 }, { "epoch": 0.9571399375200724, "grad_norm": 0.6719607277104773, "learning_rate": 9.524736415247366e-07, "loss": 0.6015, "step": 32783 }, { "epoch": 0.9571691337479198, "grad_norm": 0.640961709380405, "learning_rate": 9.518248175182483e-07, "loss": 0.5695, "step": 32784 }, { "epoch": 0.9571983299757671, "grad_norm": 0.6315625733934425, "learning_rate": 9.5117599351176e-07, "loss": 0.5813, "step": 32785 }, { "epoch": 0.9572275262036145, "grad_norm": 0.6230596639069597, "learning_rate": 9.505271695052717e-07, "loss": 0.5645, "step": 32786 }, { "epoch": 0.9572567224314619, "grad_norm": 0.6384688336493366, "learning_rate": 9.498783454987835e-07, "loss": 0.6165, "step": 32787 }, { "epoch": 0.9572859186593092, "grad_norm": 0.6856954096025842, "learning_rate": 9.492295214922953e-07, "loss": 0.5593, "step": 32788 }, { "epoch": 0.9573151148871566, "grad_norm": 0.6253479392004407, "learning_rate": 9.48580697485807e-07, "loss": 0.5399, "step": 32789 }, { "epoch": 0.957344311115004, "grad_norm": 0.6699479088365398, "learning_rate": 9.479318734793189e-07, "loss": 0.6314, "step": 32790 }, { "epoch": 0.9573735073428513, "grad_norm": 0.6665591065473851, "learning_rate": 9.472830494728305e-07, "loss": 0.6194, "step": 32791 }, { "epoch": 0.9574027035706987, "grad_norm": 0.6876175961495873, "learning_rate": 9.466342254663423e-07, "loss": 0.6664, "step": 32792 }, { "epoch": 0.957431899798546, "grad_norm": 0.6280431363621196, "learning_rate": 9.459854014598541e-07, "loss": 0.5091, "step": 32793 }, { "epoch": 0.9574610960263934, "grad_norm": 0.6875797377199844, "learning_rate": 9.453365774533658e-07, "loss": 0.6801, "step": 32794 }, { "epoch": 0.9574902922542408, "grad_norm": 0.6622859895725517, "learning_rate": 9.446877534468776e-07, "loss": 0.5847, "step": 32795 }, { "epoch": 0.9575194884820881, "grad_norm": 0.6138706786054461, "learning_rate": 9.440389294403894e-07, "loss": 0.5494, "step": 32796 }, { "epoch": 0.9575486847099355, "grad_norm": 0.7236060550438376, "learning_rate": 9.433901054339012e-07, "loss": 0.6121, "step": 32797 }, { "epoch": 0.9575778809377828, "grad_norm": 0.610417362691097, "learning_rate": 9.427412814274128e-07, "loss": 0.5444, "step": 32798 }, { "epoch": 0.9576070771656302, "grad_norm": 0.6826832418411068, "learning_rate": 9.420924574209247e-07, "loss": 0.6475, "step": 32799 }, { "epoch": 0.9576362733934776, "grad_norm": 0.6461411344185433, "learning_rate": 9.414436334144364e-07, "loss": 0.5694, "step": 32800 }, { "epoch": 0.9576654696213249, "grad_norm": 0.6162398079140748, "learning_rate": 9.407948094079482e-07, "loss": 0.5277, "step": 32801 }, { "epoch": 0.9576946658491723, "grad_norm": 0.6501850884905492, "learning_rate": 9.4014598540146e-07, "loss": 0.5894, "step": 32802 }, { "epoch": 0.9577238620770196, "grad_norm": 0.6419729808673963, "learning_rate": 9.394971613949717e-07, "loss": 0.584, "step": 32803 }, { "epoch": 0.957753058304867, "grad_norm": 0.6560168276352446, "learning_rate": 9.388483373884834e-07, "loss": 0.6165, "step": 32804 }, { "epoch": 0.9577822545327144, "grad_norm": 0.6600874599673827, "learning_rate": 9.381995133819951e-07, "loss": 0.6446, "step": 32805 }, { "epoch": 0.9578114507605617, "grad_norm": 0.6597648023228654, "learning_rate": 9.37550689375507e-07, "loss": 0.6106, "step": 32806 }, { "epoch": 0.9578406469884091, "grad_norm": 0.6308828821013561, "learning_rate": 9.369018653690188e-07, "loss": 0.6063, "step": 32807 }, { "epoch": 0.9578698432162565, "grad_norm": 0.6462699402965701, "learning_rate": 9.362530413625305e-07, "loss": 0.5782, "step": 32808 }, { "epoch": 0.9578990394441038, "grad_norm": 0.6411251040409768, "learning_rate": 9.356042173560423e-07, "loss": 0.5513, "step": 32809 }, { "epoch": 0.9579282356719512, "grad_norm": 0.617764266500621, "learning_rate": 9.349553933495541e-07, "loss": 0.5637, "step": 32810 }, { "epoch": 0.9579574318997985, "grad_norm": 0.6538717735975961, "learning_rate": 9.343065693430657e-07, "loss": 0.588, "step": 32811 }, { "epoch": 0.9579866281276459, "grad_norm": 0.6613001687618494, "learning_rate": 9.336577453365775e-07, "loss": 0.6159, "step": 32812 }, { "epoch": 0.9580158243554933, "grad_norm": 0.7277908848648633, "learning_rate": 9.330089213300892e-07, "loss": 0.6749, "step": 32813 }, { "epoch": 0.9580450205833406, "grad_norm": 0.6043731109920113, "learning_rate": 9.32360097323601e-07, "loss": 0.5387, "step": 32814 }, { "epoch": 0.958074216811188, "grad_norm": 0.6366548759522992, "learning_rate": 9.317112733171129e-07, "loss": 0.571, "step": 32815 }, { "epoch": 0.9581034130390353, "grad_norm": 0.7138145309658394, "learning_rate": 9.310624493106246e-07, "loss": 0.6478, "step": 32816 }, { "epoch": 0.9581326092668827, "grad_norm": 0.6804640937281317, "learning_rate": 9.304136253041364e-07, "loss": 0.5823, "step": 32817 }, { "epoch": 0.9581618054947301, "grad_norm": 0.6156151823707081, "learning_rate": 9.29764801297648e-07, "loss": 0.5538, "step": 32818 }, { "epoch": 0.9581910017225774, "grad_norm": 0.6267826544030868, "learning_rate": 9.291159772911598e-07, "loss": 0.5935, "step": 32819 }, { "epoch": 0.9582201979504248, "grad_norm": 0.6106056425521106, "learning_rate": 9.284671532846716e-07, "loss": 0.5466, "step": 32820 }, { "epoch": 0.9582493941782722, "grad_norm": 0.6465372737927597, "learning_rate": 9.278183292781833e-07, "loss": 0.5912, "step": 32821 }, { "epoch": 0.9582785904061195, "grad_norm": 0.7127248243888854, "learning_rate": 9.271695052716951e-07, "loss": 0.6583, "step": 32822 }, { "epoch": 0.9583077866339669, "grad_norm": 0.6771958373056819, "learning_rate": 9.26520681265207e-07, "loss": 0.6159, "step": 32823 }, { "epoch": 0.9583369828618142, "grad_norm": 0.643583019660795, "learning_rate": 9.258718572587186e-07, "loss": 0.5587, "step": 32824 }, { "epoch": 0.9583661790896616, "grad_norm": 0.638043815419954, "learning_rate": 9.252230332522304e-07, "loss": 0.6035, "step": 32825 }, { "epoch": 0.958395375317509, "grad_norm": 0.5830536900774739, "learning_rate": 9.245742092457422e-07, "loss": 0.5117, "step": 32826 }, { "epoch": 0.9584245715453563, "grad_norm": 0.6105764454574734, "learning_rate": 9.239253852392539e-07, "loss": 0.5298, "step": 32827 }, { "epoch": 0.9584537677732037, "grad_norm": 0.610064204234763, "learning_rate": 9.232765612327657e-07, "loss": 0.487, "step": 32828 }, { "epoch": 0.958482964001051, "grad_norm": 0.6436851637265969, "learning_rate": 9.226277372262775e-07, "loss": 0.53, "step": 32829 }, { "epoch": 0.9585121602288984, "grad_norm": 0.6550861712865821, "learning_rate": 9.219789132197892e-07, "loss": 0.5511, "step": 32830 }, { "epoch": 0.9585413564567458, "grad_norm": 0.6797600516476219, "learning_rate": 9.21330089213301e-07, "loss": 0.5922, "step": 32831 }, { "epoch": 0.9585705526845931, "grad_norm": 0.6909597067420574, "learning_rate": 9.206812652068127e-07, "loss": 0.6868, "step": 32832 }, { "epoch": 0.9585997489124405, "grad_norm": 0.6398815573225387, "learning_rate": 9.200324412003245e-07, "loss": 0.5544, "step": 32833 }, { "epoch": 0.9586289451402878, "grad_norm": 0.6003712725321595, "learning_rate": 9.193836171938363e-07, "loss": 0.5259, "step": 32834 }, { "epoch": 0.9586581413681352, "grad_norm": 0.7018290347954201, "learning_rate": 9.18734793187348e-07, "loss": 0.6797, "step": 32835 }, { "epoch": 0.9586873375959826, "grad_norm": 0.6281649757479103, "learning_rate": 9.180859691808598e-07, "loss": 0.5832, "step": 32836 }, { "epoch": 0.9587165338238299, "grad_norm": 0.6607688847527396, "learning_rate": 9.174371451743714e-07, "loss": 0.5673, "step": 32837 }, { "epoch": 0.9587457300516773, "grad_norm": 0.6101789972287309, "learning_rate": 9.167883211678832e-07, "loss": 0.539, "step": 32838 }, { "epoch": 0.9587749262795247, "grad_norm": 0.634729227334957, "learning_rate": 9.161394971613951e-07, "loss": 0.5435, "step": 32839 }, { "epoch": 0.958804122507372, "grad_norm": 0.6380588780525454, "learning_rate": 9.154906731549068e-07, "loss": 0.5733, "step": 32840 }, { "epoch": 0.9588333187352194, "grad_norm": 0.6523562823140928, "learning_rate": 9.148418491484186e-07, "loss": 0.603, "step": 32841 }, { "epoch": 0.9588625149630667, "grad_norm": 0.6764802399466504, "learning_rate": 9.141930251419304e-07, "loss": 0.5974, "step": 32842 }, { "epoch": 0.9588917111909141, "grad_norm": 0.6387314658650273, "learning_rate": 9.135442011354421e-07, "loss": 0.5546, "step": 32843 }, { "epoch": 0.9589209074187615, "grad_norm": 0.6543326673768054, "learning_rate": 9.128953771289538e-07, "loss": 0.6474, "step": 32844 }, { "epoch": 0.9589501036466088, "grad_norm": 0.6291514439533924, "learning_rate": 9.122465531224655e-07, "loss": 0.5476, "step": 32845 }, { "epoch": 0.9589792998744562, "grad_norm": 0.639451175109988, "learning_rate": 9.115977291159773e-07, "loss": 0.6136, "step": 32846 }, { "epoch": 0.9590084961023035, "grad_norm": 0.6250705065323761, "learning_rate": 9.109489051094892e-07, "loss": 0.5699, "step": 32847 }, { "epoch": 0.9590376923301509, "grad_norm": 0.6411346249194048, "learning_rate": 9.103000811030009e-07, "loss": 0.6198, "step": 32848 }, { "epoch": 0.9590668885579983, "grad_norm": 0.6939516519256287, "learning_rate": 9.096512570965127e-07, "loss": 0.6388, "step": 32849 }, { "epoch": 0.9590960847858456, "grad_norm": 0.602446773310722, "learning_rate": 9.090024330900243e-07, "loss": 0.5586, "step": 32850 }, { "epoch": 0.959125281013693, "grad_norm": 0.6649481702765005, "learning_rate": 9.083536090835361e-07, "loss": 0.6974, "step": 32851 }, { "epoch": 0.9591544772415403, "grad_norm": 0.681586214967684, "learning_rate": 9.077047850770479e-07, "loss": 0.6288, "step": 32852 }, { "epoch": 0.9591836734693877, "grad_norm": 0.7101602759446265, "learning_rate": 9.070559610705597e-07, "loss": 0.6327, "step": 32853 }, { "epoch": 0.9592128696972351, "grad_norm": 0.6551849750599941, "learning_rate": 9.064071370640714e-07, "loss": 0.5782, "step": 32854 }, { "epoch": 0.9592420659250824, "grad_norm": 0.6127487216390426, "learning_rate": 9.057583130575833e-07, "loss": 0.536, "step": 32855 }, { "epoch": 0.9592712621529298, "grad_norm": 0.651519317126642, "learning_rate": 9.051094890510951e-07, "loss": 0.6043, "step": 32856 }, { "epoch": 0.9593004583807772, "grad_norm": 0.7091339269761413, "learning_rate": 9.044606650446067e-07, "loss": 0.6527, "step": 32857 }, { "epoch": 0.9593296546086245, "grad_norm": 0.6821336706636726, "learning_rate": 9.038118410381185e-07, "loss": 0.6694, "step": 32858 }, { "epoch": 0.9593588508364719, "grad_norm": 0.6549980737173654, "learning_rate": 9.031630170316302e-07, "loss": 0.5865, "step": 32859 }, { "epoch": 0.9593880470643192, "grad_norm": 0.6749297356062558, "learning_rate": 9.02514193025142e-07, "loss": 0.6573, "step": 32860 }, { "epoch": 0.9594172432921666, "grad_norm": 0.6577662408751072, "learning_rate": 9.018653690186538e-07, "loss": 0.6236, "step": 32861 }, { "epoch": 0.959446439520014, "grad_norm": 0.669475103107589, "learning_rate": 9.012165450121655e-07, "loss": 0.6234, "step": 32862 }, { "epoch": 0.9594756357478613, "grad_norm": 0.6252450703805309, "learning_rate": 9.005677210056773e-07, "loss": 0.5748, "step": 32863 }, { "epoch": 0.9595048319757087, "grad_norm": 0.6217238927650192, "learning_rate": 8.99918896999189e-07, "loss": 0.5635, "step": 32864 }, { "epoch": 0.9595340282035562, "grad_norm": 0.679294826100383, "learning_rate": 8.992700729927008e-07, "loss": 0.6325, "step": 32865 }, { "epoch": 0.9595632244314035, "grad_norm": 0.6839404441992484, "learning_rate": 8.986212489862126e-07, "loss": 0.6269, "step": 32866 }, { "epoch": 0.9595924206592509, "grad_norm": 0.6940969073855237, "learning_rate": 8.979724249797243e-07, "loss": 0.6443, "step": 32867 }, { "epoch": 0.9596216168870982, "grad_norm": 0.669731865339869, "learning_rate": 8.973236009732361e-07, "loss": 0.6189, "step": 32868 }, { "epoch": 0.9596508131149456, "grad_norm": 0.6650045347407302, "learning_rate": 8.966747769667479e-07, "loss": 0.6618, "step": 32869 }, { "epoch": 0.959680009342793, "grad_norm": 0.6274956396858771, "learning_rate": 8.960259529602595e-07, "loss": 0.5412, "step": 32870 }, { "epoch": 0.9597092055706403, "grad_norm": 0.6352535422453004, "learning_rate": 8.953771289537714e-07, "loss": 0.5542, "step": 32871 }, { "epoch": 0.9597384017984877, "grad_norm": 0.684985100428211, "learning_rate": 8.947283049472831e-07, "loss": 0.6354, "step": 32872 }, { "epoch": 0.959767598026335, "grad_norm": 0.6450541648425077, "learning_rate": 8.940794809407949e-07, "loss": 0.5897, "step": 32873 }, { "epoch": 0.9597967942541824, "grad_norm": 0.676827826904377, "learning_rate": 8.934306569343067e-07, "loss": 0.5975, "step": 32874 }, { "epoch": 0.9598259904820298, "grad_norm": 0.6529778305144973, "learning_rate": 8.927818329278184e-07, "loss": 0.6029, "step": 32875 }, { "epoch": 0.9598551867098771, "grad_norm": 0.6107224921602769, "learning_rate": 8.921330089213302e-07, "loss": 0.4992, "step": 32876 }, { "epoch": 0.9598843829377245, "grad_norm": 0.6588246912262808, "learning_rate": 8.914841849148418e-07, "loss": 0.6392, "step": 32877 }, { "epoch": 0.9599135791655719, "grad_norm": 0.6387273980145084, "learning_rate": 8.908353609083536e-07, "loss": 0.5937, "step": 32878 }, { "epoch": 0.9599427753934192, "grad_norm": 0.6179210855585456, "learning_rate": 8.901865369018655e-07, "loss": 0.5517, "step": 32879 }, { "epoch": 0.9599719716212666, "grad_norm": 0.6890270723820374, "learning_rate": 8.895377128953772e-07, "loss": 0.6332, "step": 32880 }, { "epoch": 0.9600011678491139, "grad_norm": 0.6506544185117015, "learning_rate": 8.88888888888889e-07, "loss": 0.5925, "step": 32881 }, { "epoch": 0.9600303640769613, "grad_norm": 0.6672658019155396, "learning_rate": 8.882400648824008e-07, "loss": 0.6341, "step": 32882 }, { "epoch": 0.9600595603048087, "grad_norm": 0.7193010611380654, "learning_rate": 8.875912408759124e-07, "loss": 0.514, "step": 32883 }, { "epoch": 0.960088756532656, "grad_norm": 0.6518202650375546, "learning_rate": 8.869424168694242e-07, "loss": 0.5769, "step": 32884 }, { "epoch": 0.9601179527605034, "grad_norm": 0.5993581231673364, "learning_rate": 8.86293592862936e-07, "loss": 0.5346, "step": 32885 }, { "epoch": 0.9601471489883507, "grad_norm": 0.5920788844221146, "learning_rate": 8.856447688564477e-07, "loss": 0.5178, "step": 32886 }, { "epoch": 0.9601763452161981, "grad_norm": 0.6654463264117155, "learning_rate": 8.849959448499596e-07, "loss": 0.5813, "step": 32887 }, { "epoch": 0.9602055414440455, "grad_norm": 0.6406311704012303, "learning_rate": 8.843471208434714e-07, "loss": 0.6242, "step": 32888 }, { "epoch": 0.9602347376718928, "grad_norm": 0.6350943016852373, "learning_rate": 8.836982968369831e-07, "loss": 0.6006, "step": 32889 }, { "epoch": 0.9602639338997402, "grad_norm": 0.683566178021813, "learning_rate": 8.830494728304948e-07, "loss": 0.6104, "step": 32890 }, { "epoch": 0.9602931301275875, "grad_norm": 0.6354733067734969, "learning_rate": 8.824006488240065e-07, "loss": 0.5723, "step": 32891 }, { "epoch": 0.9603223263554349, "grad_norm": 0.665079012595288, "learning_rate": 8.817518248175183e-07, "loss": 0.6402, "step": 32892 }, { "epoch": 0.9603515225832823, "grad_norm": 0.630882503505507, "learning_rate": 8.811030008110301e-07, "loss": 0.521, "step": 32893 }, { "epoch": 0.9603807188111296, "grad_norm": 0.6436949581613138, "learning_rate": 8.804541768045418e-07, "loss": 0.5963, "step": 32894 }, { "epoch": 0.960409915038977, "grad_norm": 0.6585101339851455, "learning_rate": 8.798053527980537e-07, "loss": 0.555, "step": 32895 }, { "epoch": 0.9604391112668244, "grad_norm": 0.6443590516772456, "learning_rate": 8.791565287915653e-07, "loss": 0.5851, "step": 32896 }, { "epoch": 0.9604683074946717, "grad_norm": 0.6429203468828166, "learning_rate": 8.785077047850771e-07, "loss": 0.5958, "step": 32897 }, { "epoch": 0.9604975037225191, "grad_norm": 0.6436414302604956, "learning_rate": 8.778588807785889e-07, "loss": 0.6295, "step": 32898 }, { "epoch": 0.9605266999503664, "grad_norm": 0.651805219345814, "learning_rate": 8.772100567721006e-07, "loss": 0.581, "step": 32899 }, { "epoch": 0.9605558961782138, "grad_norm": 0.670711850798343, "learning_rate": 8.765612327656124e-07, "loss": 0.6117, "step": 32900 }, { "epoch": 0.9605850924060612, "grad_norm": 0.6783846838611663, "learning_rate": 8.759124087591242e-07, "loss": 0.6145, "step": 32901 }, { "epoch": 0.9606142886339085, "grad_norm": 0.6256556434434457, "learning_rate": 8.752635847526359e-07, "loss": 0.5367, "step": 32902 }, { "epoch": 0.9606434848617559, "grad_norm": 0.6550643828171794, "learning_rate": 8.746147607461476e-07, "loss": 0.5797, "step": 32903 }, { "epoch": 0.9606726810896032, "grad_norm": 0.5616505249938156, "learning_rate": 8.739659367396594e-07, "loss": 0.4812, "step": 32904 }, { "epoch": 0.9607018773174506, "grad_norm": 0.5824259227370396, "learning_rate": 8.733171127331712e-07, "loss": 0.5143, "step": 32905 }, { "epoch": 0.960731073545298, "grad_norm": 0.6547467301522479, "learning_rate": 8.72668288726683e-07, "loss": 0.5738, "step": 32906 }, { "epoch": 0.9607602697731453, "grad_norm": 0.5843739496174565, "learning_rate": 8.720194647201947e-07, "loss": 0.5145, "step": 32907 }, { "epoch": 0.9607894660009927, "grad_norm": 0.6545551213894666, "learning_rate": 8.713706407137065e-07, "loss": 0.5907, "step": 32908 }, { "epoch": 0.96081866222884, "grad_norm": 0.6790950143710209, "learning_rate": 8.707218167072182e-07, "loss": 0.6266, "step": 32909 }, { "epoch": 0.9608478584566874, "grad_norm": 0.6147519776133402, "learning_rate": 8.700729927007299e-07, "loss": 0.5277, "step": 32910 }, { "epoch": 0.9608770546845348, "grad_norm": 0.586304982717519, "learning_rate": 8.694241686942417e-07, "loss": 0.5014, "step": 32911 }, { "epoch": 0.9609062509123821, "grad_norm": 0.6057958199372584, "learning_rate": 8.687753446877536e-07, "loss": 0.544, "step": 32912 }, { "epoch": 0.9609354471402295, "grad_norm": 0.6317752927458092, "learning_rate": 8.681265206812653e-07, "loss": 0.5877, "step": 32913 }, { "epoch": 0.9609646433680769, "grad_norm": 0.6479721556585955, "learning_rate": 8.674776966747771e-07, "loss": 0.5725, "step": 32914 }, { "epoch": 0.9609938395959242, "grad_norm": 0.5888835257354711, "learning_rate": 8.668288726682889e-07, "loss": 0.495, "step": 32915 }, { "epoch": 0.9610230358237716, "grad_norm": 0.6349004901338063, "learning_rate": 8.661800486618005e-07, "loss": 0.5536, "step": 32916 }, { "epoch": 0.9610522320516189, "grad_norm": 0.6835378516964344, "learning_rate": 8.655312246553123e-07, "loss": 0.6142, "step": 32917 }, { "epoch": 0.9610814282794663, "grad_norm": 0.6306587426834601, "learning_rate": 8.64882400648824e-07, "loss": 0.564, "step": 32918 }, { "epoch": 0.9611106245073137, "grad_norm": 0.6278698635531595, "learning_rate": 8.642335766423358e-07, "loss": 0.5562, "step": 32919 }, { "epoch": 0.961139820735161, "grad_norm": 0.6258560474659528, "learning_rate": 8.635847526358477e-07, "loss": 0.5902, "step": 32920 }, { "epoch": 0.9611690169630084, "grad_norm": 0.6384256979953609, "learning_rate": 8.629359286293594e-07, "loss": 0.5595, "step": 32921 }, { "epoch": 0.9611982131908557, "grad_norm": 0.6353782942727721, "learning_rate": 8.622871046228712e-07, "loss": 0.5504, "step": 32922 }, { "epoch": 0.9612274094187031, "grad_norm": 0.7229440385744498, "learning_rate": 8.616382806163828e-07, "loss": 0.7369, "step": 32923 }, { "epoch": 0.9612566056465505, "grad_norm": 0.6252425506643317, "learning_rate": 8.609894566098946e-07, "loss": 0.5629, "step": 32924 }, { "epoch": 0.9612858018743978, "grad_norm": 0.6521351029275689, "learning_rate": 8.603406326034064e-07, "loss": 0.6126, "step": 32925 }, { "epoch": 0.9613149981022452, "grad_norm": 0.6983280466884118, "learning_rate": 8.596918085969181e-07, "loss": 0.6501, "step": 32926 }, { "epoch": 0.9613441943300925, "grad_norm": 0.6195066236410481, "learning_rate": 8.5904298459043e-07, "loss": 0.5486, "step": 32927 }, { "epoch": 0.9613733905579399, "grad_norm": 0.6350913389763817, "learning_rate": 8.583941605839418e-07, "loss": 0.6034, "step": 32928 }, { "epoch": 0.9614025867857873, "grad_norm": 0.6370136368135055, "learning_rate": 8.577453365774534e-07, "loss": 0.5753, "step": 32929 }, { "epoch": 0.9614317830136346, "grad_norm": 0.6274106351234884, "learning_rate": 8.570965125709652e-07, "loss": 0.5559, "step": 32930 }, { "epoch": 0.961460979241482, "grad_norm": 0.6525807344052281, "learning_rate": 8.564476885644769e-07, "loss": 0.6341, "step": 32931 }, { "epoch": 0.9614901754693294, "grad_norm": 0.6663419996585722, "learning_rate": 8.557988645579887e-07, "loss": 0.6798, "step": 32932 }, { "epoch": 0.9615193716971767, "grad_norm": 0.6802793552101828, "learning_rate": 8.551500405515005e-07, "loss": 0.6787, "step": 32933 }, { "epoch": 0.9615485679250241, "grad_norm": 0.6087939453351631, "learning_rate": 8.545012165450122e-07, "loss": 0.5545, "step": 32934 }, { "epoch": 0.9615777641528714, "grad_norm": 0.6833419555872535, "learning_rate": 8.53852392538524e-07, "loss": 0.6511, "step": 32935 }, { "epoch": 0.9616069603807188, "grad_norm": 0.6744509130663204, "learning_rate": 8.532035685320357e-07, "loss": 0.6324, "step": 32936 }, { "epoch": 0.9616361566085662, "grad_norm": 0.6570024513785843, "learning_rate": 8.525547445255475e-07, "loss": 0.608, "step": 32937 }, { "epoch": 0.9616653528364135, "grad_norm": 0.6467661131106184, "learning_rate": 8.519059205190593e-07, "loss": 0.5676, "step": 32938 }, { "epoch": 0.9616945490642609, "grad_norm": 0.645330459068114, "learning_rate": 8.512570965125711e-07, "loss": 0.6088, "step": 32939 }, { "epoch": 0.9617237452921082, "grad_norm": 0.6137812890529161, "learning_rate": 8.506082725060828e-07, "loss": 0.5404, "step": 32940 }, { "epoch": 0.9617529415199556, "grad_norm": 0.6678722520839026, "learning_rate": 8.499594484995946e-07, "loss": 0.6024, "step": 32941 }, { "epoch": 0.961782137747803, "grad_norm": 0.6240532022912645, "learning_rate": 8.493106244931062e-07, "loss": 0.5727, "step": 32942 }, { "epoch": 0.9618113339756503, "grad_norm": 0.6571125571441373, "learning_rate": 8.48661800486618e-07, "loss": 0.5987, "step": 32943 }, { "epoch": 0.9618405302034977, "grad_norm": 0.6244035583297202, "learning_rate": 8.480129764801299e-07, "loss": 0.5392, "step": 32944 }, { "epoch": 0.961869726431345, "grad_norm": 0.5922689728624064, "learning_rate": 8.473641524736416e-07, "loss": 0.4988, "step": 32945 }, { "epoch": 0.9618989226591924, "grad_norm": 0.6042125298701524, "learning_rate": 8.467153284671534e-07, "loss": 0.5426, "step": 32946 }, { "epoch": 0.9619281188870398, "grad_norm": 0.6433608282337359, "learning_rate": 8.460665044606652e-07, "loss": 0.5625, "step": 32947 }, { "epoch": 0.9619573151148871, "grad_norm": 0.6640568826728954, "learning_rate": 8.454176804541769e-07, "loss": 0.6444, "step": 32948 }, { "epoch": 0.9619865113427345, "grad_norm": 0.6960374877340302, "learning_rate": 8.447688564476886e-07, "loss": 0.7277, "step": 32949 }, { "epoch": 0.9620157075705819, "grad_norm": 0.6547002718782748, "learning_rate": 8.441200324412003e-07, "loss": 0.5664, "step": 32950 }, { "epoch": 0.9620449037984292, "grad_norm": 0.6332080223887112, "learning_rate": 8.434712084347121e-07, "loss": 0.562, "step": 32951 }, { "epoch": 0.9620741000262766, "grad_norm": 0.671733376931817, "learning_rate": 8.42822384428224e-07, "loss": 0.6167, "step": 32952 }, { "epoch": 0.9621032962541239, "grad_norm": 0.637054345454341, "learning_rate": 8.421735604217357e-07, "loss": 0.5776, "step": 32953 }, { "epoch": 0.9621324924819713, "grad_norm": 0.6292352508791643, "learning_rate": 8.415247364152475e-07, "loss": 0.5823, "step": 32954 }, { "epoch": 0.9621616887098187, "grad_norm": 0.6390259228453824, "learning_rate": 8.408759124087591e-07, "loss": 0.5536, "step": 32955 }, { "epoch": 0.962190884937666, "grad_norm": 0.6153930752037565, "learning_rate": 8.402270884022709e-07, "loss": 0.5658, "step": 32956 }, { "epoch": 0.9622200811655134, "grad_norm": 0.652604502517551, "learning_rate": 8.395782643957827e-07, "loss": 0.6125, "step": 32957 }, { "epoch": 0.9622492773933607, "grad_norm": 0.6519953942485816, "learning_rate": 8.389294403892944e-07, "loss": 0.6121, "step": 32958 }, { "epoch": 0.9622784736212081, "grad_norm": 0.6491010992670031, "learning_rate": 8.382806163828062e-07, "loss": 0.5885, "step": 32959 }, { "epoch": 0.9623076698490555, "grad_norm": 0.6427541791848328, "learning_rate": 8.376317923763181e-07, "loss": 0.6441, "step": 32960 }, { "epoch": 0.9623368660769028, "grad_norm": 0.6736428143177691, "learning_rate": 8.369829683698298e-07, "loss": 0.5998, "step": 32961 }, { "epoch": 0.9623660623047502, "grad_norm": 0.6963794075760436, "learning_rate": 8.363341443633415e-07, "loss": 0.6078, "step": 32962 }, { "epoch": 0.9623952585325976, "grad_norm": 0.6208803569467252, "learning_rate": 8.356853203568532e-07, "loss": 0.5611, "step": 32963 }, { "epoch": 0.9624244547604449, "grad_norm": 0.6241495204917826, "learning_rate": 8.35036496350365e-07, "loss": 0.5998, "step": 32964 }, { "epoch": 0.9624536509882923, "grad_norm": 0.6406825302942172, "learning_rate": 8.343876723438768e-07, "loss": 0.6068, "step": 32965 }, { "epoch": 0.9624828472161396, "grad_norm": 0.6161012284074103, "learning_rate": 8.337388483373886e-07, "loss": 0.5359, "step": 32966 }, { "epoch": 0.962512043443987, "grad_norm": 0.6645554266287229, "learning_rate": 8.330900243309003e-07, "loss": 0.6361, "step": 32967 }, { "epoch": 0.9625412396718344, "grad_norm": 0.6001013096456663, "learning_rate": 8.32441200324412e-07, "loss": 0.4969, "step": 32968 }, { "epoch": 0.9625704358996817, "grad_norm": 0.693667297638422, "learning_rate": 8.317923763179238e-07, "loss": 0.6294, "step": 32969 }, { "epoch": 0.9625996321275291, "grad_norm": 0.67520385696705, "learning_rate": 8.311435523114356e-07, "loss": 0.6735, "step": 32970 }, { "epoch": 0.9626288283553764, "grad_norm": 0.6166986869323673, "learning_rate": 8.304947283049474e-07, "loss": 0.5456, "step": 32971 }, { "epoch": 0.9626580245832238, "grad_norm": 0.649354227701602, "learning_rate": 8.298459042984591e-07, "loss": 0.5768, "step": 32972 }, { "epoch": 0.9626872208110712, "grad_norm": 0.6212253267908547, "learning_rate": 8.291970802919709e-07, "loss": 0.5568, "step": 32973 }, { "epoch": 0.9627164170389185, "grad_norm": 0.6517042710020775, "learning_rate": 8.285482562854827e-07, "loss": 0.6138, "step": 32974 }, { "epoch": 0.9627456132667659, "grad_norm": 0.6168478598373893, "learning_rate": 8.278994322789943e-07, "loss": 0.5344, "step": 32975 }, { "epoch": 0.9627748094946132, "grad_norm": 0.6056320678384743, "learning_rate": 8.272506082725062e-07, "loss": 0.5217, "step": 32976 }, { "epoch": 0.9628040057224606, "grad_norm": 0.6809161082180402, "learning_rate": 8.266017842660179e-07, "loss": 0.6408, "step": 32977 }, { "epoch": 0.962833201950308, "grad_norm": 0.6553846869572618, "learning_rate": 8.259529602595297e-07, "loss": 0.6111, "step": 32978 }, { "epoch": 0.9628623981781553, "grad_norm": 0.6237270867753313, "learning_rate": 8.253041362530415e-07, "loss": 0.5637, "step": 32979 }, { "epoch": 0.9628915944060027, "grad_norm": 0.6078703208716192, "learning_rate": 8.246553122465532e-07, "loss": 0.5339, "step": 32980 }, { "epoch": 0.96292079063385, "grad_norm": 0.6689683778909182, "learning_rate": 8.24006488240065e-07, "loss": 0.6222, "step": 32981 }, { "epoch": 0.9629499868616974, "grad_norm": 0.6549239744640929, "learning_rate": 8.233576642335766e-07, "loss": 0.5297, "step": 32982 }, { "epoch": 0.9629791830895448, "grad_norm": 0.6566804650776669, "learning_rate": 8.227088402270884e-07, "loss": 0.6272, "step": 32983 }, { "epoch": 0.9630083793173921, "grad_norm": 0.6360754280097407, "learning_rate": 8.220600162206003e-07, "loss": 0.5983, "step": 32984 }, { "epoch": 0.9630375755452396, "grad_norm": 0.6240669908990499, "learning_rate": 8.21411192214112e-07, "loss": 0.5793, "step": 32985 }, { "epoch": 0.963066771773087, "grad_norm": 0.641870449050645, "learning_rate": 8.207623682076238e-07, "loss": 0.6152, "step": 32986 }, { "epoch": 0.9630959680009343, "grad_norm": 0.6269582482184217, "learning_rate": 8.201135442011356e-07, "loss": 0.5944, "step": 32987 }, { "epoch": 0.9631251642287817, "grad_norm": 0.613404918002975, "learning_rate": 8.194647201946472e-07, "loss": 0.5592, "step": 32988 }, { "epoch": 0.963154360456629, "grad_norm": 0.683338601939784, "learning_rate": 8.18815896188159e-07, "loss": 0.5204, "step": 32989 }, { "epoch": 0.9631835566844764, "grad_norm": 0.6570465939528217, "learning_rate": 8.181670721816707e-07, "loss": 0.6069, "step": 32990 }, { "epoch": 0.9632127529123238, "grad_norm": 0.6624397546126939, "learning_rate": 8.175182481751825e-07, "loss": 0.616, "step": 32991 }, { "epoch": 0.9632419491401711, "grad_norm": 0.6446893995204301, "learning_rate": 8.168694241686944e-07, "loss": 0.5745, "step": 32992 }, { "epoch": 0.9632711453680185, "grad_norm": 0.6334918231796242, "learning_rate": 8.162206001622061e-07, "loss": 0.5915, "step": 32993 }, { "epoch": 0.9633003415958659, "grad_norm": 0.642820371274076, "learning_rate": 8.155717761557179e-07, "loss": 0.6138, "step": 32994 }, { "epoch": 0.9633295378237132, "grad_norm": 0.6774681180995729, "learning_rate": 8.149229521492296e-07, "loss": 0.6604, "step": 32995 }, { "epoch": 0.9633587340515606, "grad_norm": 0.6638689769601855, "learning_rate": 8.142741281427413e-07, "loss": 0.6461, "step": 32996 }, { "epoch": 0.963387930279408, "grad_norm": 0.623481930476911, "learning_rate": 8.136253041362531e-07, "loss": 0.5535, "step": 32997 }, { "epoch": 0.9634171265072553, "grad_norm": 0.7533233123279426, "learning_rate": 8.129764801297649e-07, "loss": 0.6284, "step": 32998 }, { "epoch": 0.9634463227351027, "grad_norm": 0.6350444484246239, "learning_rate": 8.123276561232766e-07, "loss": 0.5867, "step": 32999 }, { "epoch": 0.96347551896295, "grad_norm": 0.6799438950908678, "learning_rate": 8.116788321167885e-07, "loss": 0.6382, "step": 33000 }, { "epoch": 0.9635047151907974, "grad_norm": 0.648236877068025, "learning_rate": 8.110300081103001e-07, "loss": 0.5967, "step": 33001 }, { "epoch": 0.9635339114186448, "grad_norm": 0.6177755878325866, "learning_rate": 8.103811841038119e-07, "loss": 0.5756, "step": 33002 }, { "epoch": 0.9635631076464921, "grad_norm": 0.6449316443352209, "learning_rate": 8.097323600973237e-07, "loss": 0.5584, "step": 33003 }, { "epoch": 0.9635923038743395, "grad_norm": 0.6865523862106174, "learning_rate": 8.090835360908354e-07, "loss": 0.6435, "step": 33004 }, { "epoch": 0.9636215001021868, "grad_norm": 0.687815429622935, "learning_rate": 8.084347120843472e-07, "loss": 0.6421, "step": 33005 }, { "epoch": 0.9636506963300342, "grad_norm": 0.6290889213057369, "learning_rate": 8.07785888077859e-07, "loss": 0.5237, "step": 33006 }, { "epoch": 0.9636798925578816, "grad_norm": 0.6247366401653404, "learning_rate": 8.071370640713707e-07, "loss": 0.5721, "step": 33007 }, { "epoch": 0.9637090887857289, "grad_norm": 0.6443015583626569, "learning_rate": 8.064882400648824e-07, "loss": 0.5995, "step": 33008 }, { "epoch": 0.9637382850135763, "grad_norm": 0.686922727609007, "learning_rate": 8.058394160583942e-07, "loss": 0.6068, "step": 33009 }, { "epoch": 0.9637674812414236, "grad_norm": 0.6219729360996018, "learning_rate": 8.05190592051906e-07, "loss": 0.5572, "step": 33010 }, { "epoch": 0.963796677469271, "grad_norm": 0.9320508724208333, "learning_rate": 8.045417680454178e-07, "loss": 0.5162, "step": 33011 }, { "epoch": 0.9638258736971184, "grad_norm": 0.6236953594521092, "learning_rate": 8.038929440389295e-07, "loss": 0.5605, "step": 33012 }, { "epoch": 0.9638550699249657, "grad_norm": 0.5940404661205488, "learning_rate": 8.032441200324413e-07, "loss": 0.5108, "step": 33013 }, { "epoch": 0.9638842661528131, "grad_norm": 0.6443199124489676, "learning_rate": 8.025952960259529e-07, "loss": 0.6182, "step": 33014 }, { "epoch": 0.9639134623806604, "grad_norm": 0.6671946201615834, "learning_rate": 8.019464720194647e-07, "loss": 0.588, "step": 33015 }, { "epoch": 0.9639426586085078, "grad_norm": 0.6773828638342613, "learning_rate": 8.012976480129765e-07, "loss": 0.6245, "step": 33016 }, { "epoch": 0.9639718548363552, "grad_norm": 0.586165395601557, "learning_rate": 8.006488240064883e-07, "loss": 0.489, "step": 33017 }, { "epoch": 0.9640010510642025, "grad_norm": 0.6382557157997809, "learning_rate": 8.000000000000001e-07, "loss": 0.6111, "step": 33018 }, { "epoch": 0.9640302472920499, "grad_norm": 0.6739399063051098, "learning_rate": 7.993511759935119e-07, "loss": 0.6751, "step": 33019 }, { "epoch": 0.9640594435198973, "grad_norm": 0.5939688264022563, "learning_rate": 7.987023519870236e-07, "loss": 0.4941, "step": 33020 }, { "epoch": 0.9640886397477446, "grad_norm": 0.6943527715503599, "learning_rate": 7.980535279805353e-07, "loss": 0.6622, "step": 33021 }, { "epoch": 0.964117835975592, "grad_norm": 0.6440332482479004, "learning_rate": 7.974047039740471e-07, "loss": 0.5908, "step": 33022 }, { "epoch": 0.9641470322034393, "grad_norm": 0.6135040639447604, "learning_rate": 7.967558799675588e-07, "loss": 0.5736, "step": 33023 }, { "epoch": 0.9641762284312867, "grad_norm": 0.7199368144583351, "learning_rate": 7.961070559610706e-07, "loss": 0.6706, "step": 33024 }, { "epoch": 0.9642054246591341, "grad_norm": 0.7533743279989455, "learning_rate": 7.954582319545825e-07, "loss": 0.6436, "step": 33025 }, { "epoch": 0.9642346208869814, "grad_norm": 0.6186034272604162, "learning_rate": 7.948094079480942e-07, "loss": 0.5455, "step": 33026 }, { "epoch": 0.9642638171148288, "grad_norm": 0.6462542544422762, "learning_rate": 7.94160583941606e-07, "loss": 0.58, "step": 33027 }, { "epoch": 0.9642930133426761, "grad_norm": 0.6217212193093321, "learning_rate": 7.935117599351176e-07, "loss": 0.5847, "step": 33028 }, { "epoch": 0.9643222095705235, "grad_norm": 0.6527770284963664, "learning_rate": 7.928629359286294e-07, "loss": 0.6113, "step": 33029 }, { "epoch": 0.9643514057983709, "grad_norm": 0.6639617764662322, "learning_rate": 7.922141119221412e-07, "loss": 0.545, "step": 33030 }, { "epoch": 0.9643806020262182, "grad_norm": 0.6096891993355028, "learning_rate": 7.915652879156529e-07, "loss": 0.5423, "step": 33031 }, { "epoch": 0.9644097982540656, "grad_norm": 0.6504166854819253, "learning_rate": 7.909164639091647e-07, "loss": 0.5687, "step": 33032 }, { "epoch": 0.964438994481913, "grad_norm": 0.6524380410227574, "learning_rate": 7.902676399026766e-07, "loss": 0.5984, "step": 33033 }, { "epoch": 0.9644681907097603, "grad_norm": 0.6630931034277153, "learning_rate": 7.896188158961882e-07, "loss": 0.6593, "step": 33034 }, { "epoch": 0.9644973869376077, "grad_norm": 0.689718926350049, "learning_rate": 7.889699918897e-07, "loss": 0.6654, "step": 33035 }, { "epoch": 0.964526583165455, "grad_norm": 0.6841293677386661, "learning_rate": 7.883211678832117e-07, "loss": 0.6872, "step": 33036 }, { "epoch": 0.9645557793933024, "grad_norm": 0.6545514454655598, "learning_rate": 7.876723438767235e-07, "loss": 0.6077, "step": 33037 }, { "epoch": 0.9645849756211498, "grad_norm": 0.5978026397965999, "learning_rate": 7.870235198702353e-07, "loss": 0.5415, "step": 33038 }, { "epoch": 0.9646141718489971, "grad_norm": 0.6915964151036726, "learning_rate": 7.86374695863747e-07, "loss": 0.6669, "step": 33039 }, { "epoch": 0.9646433680768445, "grad_norm": 0.6688396046366017, "learning_rate": 7.857258718572588e-07, "loss": 0.6055, "step": 33040 }, { "epoch": 0.9646725643046918, "grad_norm": 0.634656288785834, "learning_rate": 7.850770478507704e-07, "loss": 0.5928, "step": 33041 }, { "epoch": 0.9647017605325392, "grad_norm": 0.6377181908502048, "learning_rate": 7.844282238442823e-07, "loss": 0.5602, "step": 33042 }, { "epoch": 0.9647309567603866, "grad_norm": 0.5981036166743203, "learning_rate": 7.837793998377941e-07, "loss": 0.5317, "step": 33043 }, { "epoch": 0.9647601529882339, "grad_norm": 0.6442574656731173, "learning_rate": 7.831305758313058e-07, "loss": 0.5423, "step": 33044 }, { "epoch": 0.9647893492160813, "grad_norm": 0.6722932151299609, "learning_rate": 7.824817518248176e-07, "loss": 0.6205, "step": 33045 }, { "epoch": 0.9648185454439286, "grad_norm": 0.6839291952402453, "learning_rate": 7.818329278183294e-07, "loss": 0.6565, "step": 33046 }, { "epoch": 0.964847741671776, "grad_norm": 0.6550953119461972, "learning_rate": 7.81184103811841e-07, "loss": 0.6172, "step": 33047 }, { "epoch": 0.9648769378996234, "grad_norm": 0.6518263030781367, "learning_rate": 7.805352798053528e-07, "loss": 0.6148, "step": 33048 }, { "epoch": 0.9649061341274707, "grad_norm": 0.6524001931655476, "learning_rate": 7.798864557988645e-07, "loss": 0.5771, "step": 33049 }, { "epoch": 0.9649353303553181, "grad_norm": 0.6513166235684301, "learning_rate": 7.792376317923764e-07, "loss": 0.5905, "step": 33050 }, { "epoch": 0.9649645265831654, "grad_norm": 0.6544442706876196, "learning_rate": 7.785888077858882e-07, "loss": 0.6357, "step": 33051 }, { "epoch": 0.9649937228110128, "grad_norm": 0.6405328636139921, "learning_rate": 7.779399837794e-07, "loss": 0.5379, "step": 33052 }, { "epoch": 0.9650229190388602, "grad_norm": 0.6327653014268512, "learning_rate": 7.772911597729117e-07, "loss": 0.5604, "step": 33053 }, { "epoch": 0.9650521152667075, "grad_norm": 0.6362019856209286, "learning_rate": 7.766423357664234e-07, "loss": 0.6125, "step": 33054 }, { "epoch": 0.9650813114945549, "grad_norm": 0.6092488146903428, "learning_rate": 7.759935117599351e-07, "loss": 0.5244, "step": 33055 }, { "epoch": 0.9651105077224023, "grad_norm": 0.6512666184591509, "learning_rate": 7.753446877534469e-07, "loss": 0.5944, "step": 33056 }, { "epoch": 0.9651397039502496, "grad_norm": 0.6306693642257842, "learning_rate": 7.746958637469588e-07, "loss": 0.6041, "step": 33057 }, { "epoch": 0.965168900178097, "grad_norm": 0.6875417321093101, "learning_rate": 7.740470397404705e-07, "loss": 0.6456, "step": 33058 }, { "epoch": 0.9651980964059443, "grad_norm": 0.6189774194999873, "learning_rate": 7.733982157339823e-07, "loss": 0.5703, "step": 33059 }, { "epoch": 0.9652272926337917, "grad_norm": 0.6682114148539113, "learning_rate": 7.727493917274939e-07, "loss": 0.6423, "step": 33060 }, { "epoch": 0.9652564888616391, "grad_norm": 0.6270642737034416, "learning_rate": 7.721005677210057e-07, "loss": 0.5675, "step": 33061 }, { "epoch": 0.9652856850894864, "grad_norm": 0.6794223534261437, "learning_rate": 7.714517437145175e-07, "loss": 0.6806, "step": 33062 }, { "epoch": 0.9653148813173338, "grad_norm": 0.6743160985664585, "learning_rate": 7.708029197080292e-07, "loss": 0.5705, "step": 33063 }, { "epoch": 0.9653440775451811, "grad_norm": 0.6055485951024663, "learning_rate": 7.70154095701541e-07, "loss": 0.5326, "step": 33064 }, { "epoch": 0.9653732737730285, "grad_norm": 0.6592865419958017, "learning_rate": 7.695052716950529e-07, "loss": 0.5813, "step": 33065 }, { "epoch": 0.9654024700008759, "grad_norm": 0.6389491668029305, "learning_rate": 7.688564476885646e-07, "loss": 0.5407, "step": 33066 }, { "epoch": 0.9654316662287232, "grad_norm": 0.6196048461707396, "learning_rate": 7.682076236820763e-07, "loss": 0.5426, "step": 33067 }, { "epoch": 0.9654608624565706, "grad_norm": 0.6531244336981914, "learning_rate": 7.67558799675588e-07, "loss": 0.6211, "step": 33068 }, { "epoch": 0.965490058684418, "grad_norm": 0.6114683690057109, "learning_rate": 7.669099756690998e-07, "loss": 0.5642, "step": 33069 }, { "epoch": 0.9655192549122653, "grad_norm": 0.6111774260227874, "learning_rate": 7.662611516626116e-07, "loss": 0.556, "step": 33070 }, { "epoch": 0.9655484511401127, "grad_norm": 0.632144479632573, "learning_rate": 7.656123276561233e-07, "loss": 0.5871, "step": 33071 }, { "epoch": 0.96557764736796, "grad_norm": 0.6274116526241802, "learning_rate": 7.649635036496351e-07, "loss": 0.5815, "step": 33072 }, { "epoch": 0.9656068435958074, "grad_norm": 0.6238924306298551, "learning_rate": 7.643146796431467e-07, "loss": 0.5567, "step": 33073 }, { "epoch": 0.9656360398236548, "grad_norm": 0.6427922680093394, "learning_rate": 7.636658556366586e-07, "loss": 0.6026, "step": 33074 }, { "epoch": 0.9656652360515021, "grad_norm": 0.612196457310038, "learning_rate": 7.630170316301704e-07, "loss": 0.5623, "step": 33075 }, { "epoch": 0.9656944322793495, "grad_norm": 0.655030827921961, "learning_rate": 7.623682076236821e-07, "loss": 0.5718, "step": 33076 }, { "epoch": 0.9657236285071968, "grad_norm": 0.6140477764070391, "learning_rate": 7.617193836171939e-07, "loss": 0.5102, "step": 33077 }, { "epoch": 0.9657528247350442, "grad_norm": 0.5903612876734657, "learning_rate": 7.610705596107057e-07, "loss": 0.5205, "step": 33078 }, { "epoch": 0.9657820209628916, "grad_norm": 0.6321026155938467, "learning_rate": 7.604217356042174e-07, "loss": 0.523, "step": 33079 }, { "epoch": 0.9658112171907389, "grad_norm": 0.6239504175721708, "learning_rate": 7.597729115977291e-07, "loss": 0.5747, "step": 33080 }, { "epoch": 0.9658404134185863, "grad_norm": 0.7320206129150638, "learning_rate": 7.59124087591241e-07, "loss": 0.7196, "step": 33081 }, { "epoch": 0.9658696096464336, "grad_norm": 0.6287385760806472, "learning_rate": 7.584752635847527e-07, "loss": 0.578, "step": 33082 }, { "epoch": 0.965898805874281, "grad_norm": 0.6496595026996795, "learning_rate": 7.578264395782645e-07, "loss": 0.5897, "step": 33083 }, { "epoch": 0.9659280021021284, "grad_norm": 0.6380513339542745, "learning_rate": 7.571776155717763e-07, "loss": 0.6118, "step": 33084 }, { "epoch": 0.9659571983299757, "grad_norm": 0.633606176586515, "learning_rate": 7.56528791565288e-07, "loss": 0.531, "step": 33085 }, { "epoch": 0.9659863945578231, "grad_norm": 0.6449833379047597, "learning_rate": 7.558799675587998e-07, "loss": 0.5976, "step": 33086 }, { "epoch": 0.9660155907856705, "grad_norm": 0.6140985562755651, "learning_rate": 7.552311435523114e-07, "loss": 0.5383, "step": 33087 }, { "epoch": 0.9660447870135178, "grad_norm": 0.5994914751639933, "learning_rate": 7.545823195458232e-07, "loss": 0.5471, "step": 33088 }, { "epoch": 0.9660739832413652, "grad_norm": 0.6140643898224082, "learning_rate": 7.53933495539335e-07, "loss": 0.545, "step": 33089 }, { "epoch": 0.9661031794692125, "grad_norm": 0.6262883292264281, "learning_rate": 7.532846715328468e-07, "loss": 0.5424, "step": 33090 }, { "epoch": 0.9661323756970599, "grad_norm": 0.6504145718563802, "learning_rate": 7.526358475263586e-07, "loss": 0.6332, "step": 33091 }, { "epoch": 0.9661615719249073, "grad_norm": 0.6016839467626676, "learning_rate": 7.519870235198704e-07, "loss": 0.5391, "step": 33092 }, { "epoch": 0.9661907681527546, "grad_norm": 0.6420212518701539, "learning_rate": 7.51338199513382e-07, "loss": 0.5657, "step": 33093 }, { "epoch": 0.966219964380602, "grad_norm": 0.6687708558932722, "learning_rate": 7.506893755068938e-07, "loss": 0.6132, "step": 33094 }, { "epoch": 0.9662491606084493, "grad_norm": 0.663926859009792, "learning_rate": 7.500405515004055e-07, "loss": 0.6063, "step": 33095 }, { "epoch": 0.9662783568362967, "grad_norm": 0.7214926056266343, "learning_rate": 7.493917274939173e-07, "loss": 0.6108, "step": 33096 }, { "epoch": 0.9663075530641441, "grad_norm": 0.6786934558179558, "learning_rate": 7.487429034874291e-07, "loss": 0.656, "step": 33097 }, { "epoch": 0.9663367492919914, "grad_norm": 0.6758135116710338, "learning_rate": 7.480940794809409e-07, "loss": 0.6668, "step": 33098 }, { "epoch": 0.9663659455198388, "grad_norm": 0.6274289159467494, "learning_rate": 7.474452554744527e-07, "loss": 0.5535, "step": 33099 }, { "epoch": 0.9663951417476861, "grad_norm": 0.609495738362258, "learning_rate": 7.467964314679643e-07, "loss": 0.5504, "step": 33100 }, { "epoch": 0.9664243379755335, "grad_norm": 0.6239437292569567, "learning_rate": 7.461476074614761e-07, "loss": 0.5382, "step": 33101 }, { "epoch": 0.9664535342033809, "grad_norm": 0.6678857592613928, "learning_rate": 7.454987834549879e-07, "loss": 0.6123, "step": 33102 }, { "epoch": 0.9664827304312282, "grad_norm": 0.621570895114462, "learning_rate": 7.448499594484996e-07, "loss": 0.5711, "step": 33103 }, { "epoch": 0.9665119266590756, "grad_norm": 0.590555917130252, "learning_rate": 7.442011354420114e-07, "loss": 0.5181, "step": 33104 }, { "epoch": 0.966541122886923, "grad_norm": 0.650667663672475, "learning_rate": 7.435523114355232e-07, "loss": 0.5894, "step": 33105 }, { "epoch": 0.9665703191147704, "grad_norm": 0.6884038192816829, "learning_rate": 7.429034874290349e-07, "loss": 0.6023, "step": 33106 }, { "epoch": 0.9665995153426178, "grad_norm": 0.6617778264205563, "learning_rate": 7.422546634225467e-07, "loss": 0.6567, "step": 33107 }, { "epoch": 0.9666287115704651, "grad_norm": 0.6736345091857757, "learning_rate": 7.416058394160585e-07, "loss": 0.6004, "step": 33108 }, { "epoch": 0.9666579077983125, "grad_norm": 0.6595673353625882, "learning_rate": 7.409570154095702e-07, "loss": 0.6559, "step": 33109 }, { "epoch": 0.9666871040261599, "grad_norm": 0.6222921003196291, "learning_rate": 7.40308191403082e-07, "loss": 0.5644, "step": 33110 }, { "epoch": 0.9667163002540072, "grad_norm": 0.6516575770286409, "learning_rate": 7.396593673965938e-07, "loss": 0.6094, "step": 33111 }, { "epoch": 0.9667454964818546, "grad_norm": 0.6570641611210983, "learning_rate": 7.390105433901055e-07, "loss": 0.6242, "step": 33112 }, { "epoch": 0.966774692709702, "grad_norm": 0.6627580890711275, "learning_rate": 7.383617193836172e-07, "loss": 0.5945, "step": 33113 }, { "epoch": 0.9668038889375493, "grad_norm": 0.6495208681741613, "learning_rate": 7.37712895377129e-07, "loss": 0.5927, "step": 33114 }, { "epoch": 0.9668330851653967, "grad_norm": 0.6775536351618094, "learning_rate": 7.370640713706408e-07, "loss": 0.6424, "step": 33115 }, { "epoch": 0.966862281393244, "grad_norm": 0.6051837613642643, "learning_rate": 7.364152473641526e-07, "loss": 0.5203, "step": 33116 }, { "epoch": 0.9668914776210914, "grad_norm": 0.6629361455972897, "learning_rate": 7.357664233576643e-07, "loss": 0.6147, "step": 33117 }, { "epoch": 0.9669206738489388, "grad_norm": 0.7231013328572384, "learning_rate": 7.351175993511761e-07, "loss": 0.6566, "step": 33118 }, { "epoch": 0.9669498700767861, "grad_norm": 0.6433451984918279, "learning_rate": 7.344687753446877e-07, "loss": 0.568, "step": 33119 }, { "epoch": 0.9669790663046335, "grad_norm": 0.6053442708229059, "learning_rate": 7.338199513381995e-07, "loss": 0.5357, "step": 33120 }, { "epoch": 0.9670082625324808, "grad_norm": 0.6601508741050879, "learning_rate": 7.331711273317113e-07, "loss": 0.6028, "step": 33121 }, { "epoch": 0.9670374587603282, "grad_norm": 0.6855770203975853, "learning_rate": 7.32522303325223e-07, "loss": 0.6575, "step": 33122 }, { "epoch": 0.9670666549881756, "grad_norm": 0.6252035867503836, "learning_rate": 7.318734793187349e-07, "loss": 0.5364, "step": 33123 }, { "epoch": 0.9670958512160229, "grad_norm": 0.6546325246157123, "learning_rate": 7.312246553122467e-07, "loss": 0.5869, "step": 33124 }, { "epoch": 0.9671250474438703, "grad_norm": 0.6522601444618388, "learning_rate": 7.305758313057584e-07, "loss": 0.6056, "step": 33125 }, { "epoch": 0.9671542436717177, "grad_norm": 0.6472292547995617, "learning_rate": 7.299270072992701e-07, "loss": 0.5678, "step": 33126 }, { "epoch": 0.967183439899565, "grad_norm": 0.6278618654739943, "learning_rate": 7.292781832927818e-07, "loss": 0.5543, "step": 33127 }, { "epoch": 0.9672126361274124, "grad_norm": 0.6436946708390663, "learning_rate": 7.286293592862936e-07, "loss": 0.5982, "step": 33128 }, { "epoch": 0.9672418323552597, "grad_norm": 0.6415634009704803, "learning_rate": 7.279805352798054e-07, "loss": 0.5685, "step": 33129 }, { "epoch": 0.9672710285831071, "grad_norm": 0.6494910068560757, "learning_rate": 7.273317112733172e-07, "loss": 0.6034, "step": 33130 }, { "epoch": 0.9673002248109545, "grad_norm": 0.5874307632481834, "learning_rate": 7.26682887266829e-07, "loss": 0.5023, "step": 33131 }, { "epoch": 0.9673294210388018, "grad_norm": 0.6636564248018636, "learning_rate": 7.260340632603408e-07, "loss": 0.5607, "step": 33132 }, { "epoch": 0.9673586172666492, "grad_norm": 0.6717394516595789, "learning_rate": 7.253852392538524e-07, "loss": 0.6182, "step": 33133 }, { "epoch": 0.9673878134944965, "grad_norm": 0.6225886292239314, "learning_rate": 7.247364152473642e-07, "loss": 0.6072, "step": 33134 }, { "epoch": 0.9674170097223439, "grad_norm": 0.6475690567000227, "learning_rate": 7.24087591240876e-07, "loss": 0.6317, "step": 33135 }, { "epoch": 0.9674462059501913, "grad_norm": 0.6503312493026878, "learning_rate": 7.234387672343877e-07, "loss": 0.6597, "step": 33136 }, { "epoch": 0.9674754021780386, "grad_norm": 0.6406769369212729, "learning_rate": 7.227899432278995e-07, "loss": 0.6064, "step": 33137 }, { "epoch": 0.967504598405886, "grad_norm": 0.6551346195286066, "learning_rate": 7.221411192214114e-07, "loss": 0.6357, "step": 33138 }, { "epoch": 0.9675337946337333, "grad_norm": 0.6499855300785481, "learning_rate": 7.21492295214923e-07, "loss": 0.6085, "step": 33139 }, { "epoch": 0.9675629908615807, "grad_norm": 0.6513575757110913, "learning_rate": 7.208434712084348e-07, "loss": 0.5802, "step": 33140 }, { "epoch": 0.9675921870894281, "grad_norm": 0.6185448955078637, "learning_rate": 7.201946472019465e-07, "loss": 0.5126, "step": 33141 }, { "epoch": 0.9676213833172754, "grad_norm": 0.6198108538779467, "learning_rate": 7.195458231954583e-07, "loss": 0.5459, "step": 33142 }, { "epoch": 0.9676505795451228, "grad_norm": 0.6442962261262666, "learning_rate": 7.188969991889701e-07, "loss": 0.5785, "step": 33143 }, { "epoch": 0.9676797757729702, "grad_norm": 0.6325190001888773, "learning_rate": 7.182481751824818e-07, "loss": 0.5676, "step": 33144 }, { "epoch": 0.9677089720008175, "grad_norm": 0.6482990213482722, "learning_rate": 7.175993511759936e-07, "loss": 0.5829, "step": 33145 }, { "epoch": 0.9677381682286649, "grad_norm": 0.6616986211837462, "learning_rate": 7.169505271695052e-07, "loss": 0.6074, "step": 33146 }, { "epoch": 0.9677673644565122, "grad_norm": 0.6669369761071043, "learning_rate": 7.163017031630171e-07, "loss": 0.6617, "step": 33147 }, { "epoch": 0.9677965606843596, "grad_norm": 0.6349372768875648, "learning_rate": 7.156528791565289e-07, "loss": 0.5857, "step": 33148 }, { "epoch": 0.967825756912207, "grad_norm": 0.7052725105883608, "learning_rate": 7.150040551500406e-07, "loss": 0.6648, "step": 33149 }, { "epoch": 0.9678549531400543, "grad_norm": 0.6056666563847916, "learning_rate": 7.143552311435524e-07, "loss": 0.5462, "step": 33150 }, { "epoch": 0.9678841493679017, "grad_norm": 0.6636894653034163, "learning_rate": 7.137064071370642e-07, "loss": 0.6144, "step": 33151 }, { "epoch": 0.967913345595749, "grad_norm": 0.6435711114639862, "learning_rate": 7.130575831305758e-07, "loss": 0.5893, "step": 33152 }, { "epoch": 0.9679425418235964, "grad_norm": 0.6029167041349146, "learning_rate": 7.124087591240876e-07, "loss": 0.5293, "step": 33153 }, { "epoch": 0.9679717380514438, "grad_norm": 2.693534283393232, "learning_rate": 7.117599351175993e-07, "loss": 0.5558, "step": 33154 }, { "epoch": 0.9680009342792911, "grad_norm": 0.5963666338973423, "learning_rate": 7.111111111111112e-07, "loss": 0.5196, "step": 33155 }, { "epoch": 0.9680301305071385, "grad_norm": 0.6334256776871102, "learning_rate": 7.10462287104623e-07, "loss": 0.5437, "step": 33156 }, { "epoch": 0.9680593267349858, "grad_norm": 0.6058865912500878, "learning_rate": 7.098134630981347e-07, "loss": 0.5459, "step": 33157 }, { "epoch": 0.9680885229628332, "grad_norm": 0.6366477183527139, "learning_rate": 7.091646390916465e-07, "loss": 0.6057, "step": 33158 }, { "epoch": 0.9681177191906806, "grad_norm": 0.6688838318159186, "learning_rate": 7.085158150851581e-07, "loss": 0.6357, "step": 33159 }, { "epoch": 0.9681469154185279, "grad_norm": 0.6563028285378372, "learning_rate": 7.078669910786699e-07, "loss": 0.6057, "step": 33160 }, { "epoch": 0.9681761116463753, "grad_norm": 0.6275500244566877, "learning_rate": 7.072181670721817e-07, "loss": 0.5668, "step": 33161 }, { "epoch": 0.9682053078742227, "grad_norm": 0.6607590994575934, "learning_rate": 7.065693430656934e-07, "loss": 0.6301, "step": 33162 }, { "epoch": 0.96823450410207, "grad_norm": 0.6465179205785478, "learning_rate": 7.059205190592053e-07, "loss": 0.5837, "step": 33163 }, { "epoch": 0.9682637003299174, "grad_norm": 0.6401596109494219, "learning_rate": 7.052716950527171e-07, "loss": 0.6133, "step": 33164 }, { "epoch": 0.9682928965577647, "grad_norm": 0.6525067416576745, "learning_rate": 7.046228710462287e-07, "loss": 0.6162, "step": 33165 }, { "epoch": 0.9683220927856121, "grad_norm": 0.6853878447019032, "learning_rate": 7.039740470397405e-07, "loss": 0.6296, "step": 33166 }, { "epoch": 0.9683512890134595, "grad_norm": 0.5998558226698091, "learning_rate": 7.033252230332523e-07, "loss": 0.5417, "step": 33167 }, { "epoch": 0.9683804852413068, "grad_norm": 0.6359197099423339, "learning_rate": 7.02676399026764e-07, "loss": 0.5891, "step": 33168 }, { "epoch": 0.9684096814691542, "grad_norm": 0.6565159343418113, "learning_rate": 7.020275750202758e-07, "loss": 0.6464, "step": 33169 }, { "epoch": 0.9684388776970015, "grad_norm": 0.6698421481136978, "learning_rate": 7.013787510137877e-07, "loss": 0.6125, "step": 33170 }, { "epoch": 0.9684680739248489, "grad_norm": 0.6215827942368183, "learning_rate": 7.007299270072994e-07, "loss": 0.56, "step": 33171 }, { "epoch": 0.9684972701526963, "grad_norm": 0.5811397586675525, "learning_rate": 7.000811030008111e-07, "loss": 0.5044, "step": 33172 }, { "epoch": 0.9685264663805436, "grad_norm": 0.6402675827512286, "learning_rate": 6.994322789943228e-07, "loss": 0.5748, "step": 33173 }, { "epoch": 0.968555662608391, "grad_norm": 0.6225826178121082, "learning_rate": 6.987834549878346e-07, "loss": 0.5653, "step": 33174 }, { "epoch": 0.9685848588362383, "grad_norm": 0.6276244207167888, "learning_rate": 6.981346309813464e-07, "loss": 0.5713, "step": 33175 }, { "epoch": 0.9686140550640857, "grad_norm": 0.6345970432006692, "learning_rate": 6.974858069748581e-07, "loss": 0.5909, "step": 33176 }, { "epoch": 0.9686432512919331, "grad_norm": 0.6484232919579802, "learning_rate": 6.968369829683699e-07, "loss": 0.6135, "step": 33177 }, { "epoch": 0.9686724475197804, "grad_norm": 0.6433168500085782, "learning_rate": 6.961881589618815e-07, "loss": 0.5902, "step": 33178 }, { "epoch": 0.9687016437476278, "grad_norm": 0.645537555846732, "learning_rate": 6.955393349553934e-07, "loss": 0.6273, "step": 33179 }, { "epoch": 0.9687308399754752, "grad_norm": 0.6725818306116215, "learning_rate": 6.948905109489052e-07, "loss": 0.6408, "step": 33180 }, { "epoch": 0.9687600362033225, "grad_norm": 0.609802262763253, "learning_rate": 6.942416869424169e-07, "loss": 0.5574, "step": 33181 }, { "epoch": 0.9687892324311699, "grad_norm": 0.6762049182553913, "learning_rate": 6.935928629359287e-07, "loss": 0.6583, "step": 33182 }, { "epoch": 0.9688184286590172, "grad_norm": 0.6088064560411585, "learning_rate": 6.929440389294405e-07, "loss": 0.5305, "step": 33183 }, { "epoch": 0.9688476248868646, "grad_norm": 0.5824837263204556, "learning_rate": 6.922952149229522e-07, "loss": 0.5319, "step": 33184 }, { "epoch": 0.968876821114712, "grad_norm": 0.7370937031950933, "learning_rate": 6.916463909164639e-07, "loss": 0.7219, "step": 33185 }, { "epoch": 0.9689060173425593, "grad_norm": 0.6322809936353994, "learning_rate": 6.909975669099756e-07, "loss": 0.5915, "step": 33186 }, { "epoch": 0.9689352135704067, "grad_norm": 0.7503324615946275, "learning_rate": 6.903487429034875e-07, "loss": 0.6752, "step": 33187 }, { "epoch": 0.968964409798254, "grad_norm": 0.6395127284007817, "learning_rate": 6.896999188969993e-07, "loss": 0.6099, "step": 33188 }, { "epoch": 0.9689936060261014, "grad_norm": 0.6740770726570584, "learning_rate": 6.89051094890511e-07, "loss": 0.6353, "step": 33189 }, { "epoch": 0.9690228022539488, "grad_norm": 0.6339023809202258, "learning_rate": 6.884022708840228e-07, "loss": 0.5964, "step": 33190 }, { "epoch": 0.9690519984817961, "grad_norm": 0.6343893665235832, "learning_rate": 6.877534468775346e-07, "loss": 0.589, "step": 33191 }, { "epoch": 0.9690811947096435, "grad_norm": 0.7010942174698479, "learning_rate": 6.871046228710462e-07, "loss": 0.6536, "step": 33192 }, { "epoch": 0.9691103909374909, "grad_norm": 0.5909450870557902, "learning_rate": 6.86455798864558e-07, "loss": 0.5112, "step": 33193 }, { "epoch": 0.9691395871653382, "grad_norm": 0.6445249573448185, "learning_rate": 6.858069748580698e-07, "loss": 0.5704, "step": 33194 }, { "epoch": 0.9691687833931856, "grad_norm": 0.7153607927827501, "learning_rate": 6.851581508515816e-07, "loss": 0.6871, "step": 33195 }, { "epoch": 0.9691979796210329, "grad_norm": 0.6919025970213437, "learning_rate": 6.845093268450934e-07, "loss": 0.6429, "step": 33196 }, { "epoch": 0.9692271758488803, "grad_norm": 0.6078612804112272, "learning_rate": 6.838605028386052e-07, "loss": 0.5519, "step": 33197 }, { "epoch": 0.9692563720767277, "grad_norm": 0.6387314642557288, "learning_rate": 6.832116788321168e-07, "loss": 0.5658, "step": 33198 }, { "epoch": 0.969285568304575, "grad_norm": 0.6198664488235784, "learning_rate": 6.825628548256286e-07, "loss": 0.5251, "step": 33199 }, { "epoch": 0.9693147645324224, "grad_norm": 0.6109452749302708, "learning_rate": 6.819140308191403e-07, "loss": 0.5415, "step": 33200 }, { "epoch": 0.9693439607602697, "grad_norm": 0.6554419375734041, "learning_rate": 6.812652068126521e-07, "loss": 0.6323, "step": 33201 }, { "epoch": 0.9693731569881171, "grad_norm": 0.6087578825682056, "learning_rate": 6.806163828061639e-07, "loss": 0.5301, "step": 33202 }, { "epoch": 0.9694023532159645, "grad_norm": 0.6735283580745294, "learning_rate": 6.799675587996757e-07, "loss": 0.6156, "step": 33203 }, { "epoch": 0.9694315494438118, "grad_norm": 0.6428065777555432, "learning_rate": 6.793187347931875e-07, "loss": 0.594, "step": 33204 }, { "epoch": 0.9694607456716592, "grad_norm": 0.6505461481422931, "learning_rate": 6.786699107866991e-07, "loss": 0.5973, "step": 33205 }, { "epoch": 0.9694899418995065, "grad_norm": 0.5991079386145947, "learning_rate": 6.780210867802109e-07, "loss": 0.5047, "step": 33206 }, { "epoch": 0.9695191381273539, "grad_norm": 0.6676098883009526, "learning_rate": 6.773722627737227e-07, "loss": 0.6713, "step": 33207 }, { "epoch": 0.9695483343552013, "grad_norm": 0.6497713091681458, "learning_rate": 6.767234387672344e-07, "loss": 0.5989, "step": 33208 }, { "epoch": 0.9695775305830486, "grad_norm": 0.6366217403773915, "learning_rate": 6.760746147607462e-07, "loss": 0.5908, "step": 33209 }, { "epoch": 0.969606726810896, "grad_norm": 0.6287309173406421, "learning_rate": 6.75425790754258e-07, "loss": 0.5874, "step": 33210 }, { "epoch": 0.9696359230387434, "grad_norm": 0.6816420741827298, "learning_rate": 6.747769667477696e-07, "loss": 0.6634, "step": 33211 }, { "epoch": 0.9696651192665907, "grad_norm": 0.6855134040266914, "learning_rate": 6.741281427412815e-07, "loss": 0.6824, "step": 33212 }, { "epoch": 0.9696943154944381, "grad_norm": 0.6202864346534175, "learning_rate": 6.734793187347932e-07, "loss": 0.5798, "step": 33213 }, { "epoch": 0.9697235117222854, "grad_norm": 0.6650634294232902, "learning_rate": 6.72830494728305e-07, "loss": 0.5646, "step": 33214 }, { "epoch": 0.9697527079501328, "grad_norm": 0.6916864047506158, "learning_rate": 6.721816707218168e-07, "loss": 0.6246, "step": 33215 }, { "epoch": 0.9697819041779802, "grad_norm": 0.6591330843305783, "learning_rate": 6.715328467153285e-07, "loss": 0.6265, "step": 33216 }, { "epoch": 0.9698111004058275, "grad_norm": 0.6392218210173556, "learning_rate": 6.708840227088403e-07, "loss": 0.5563, "step": 33217 }, { "epoch": 0.9698402966336749, "grad_norm": 0.7090544542369104, "learning_rate": 6.702351987023519e-07, "loss": 0.7132, "step": 33218 }, { "epoch": 0.9698694928615222, "grad_norm": 0.6688012639966974, "learning_rate": 6.695863746958637e-07, "loss": 0.6616, "step": 33219 }, { "epoch": 0.9698986890893696, "grad_norm": 0.6332481428355441, "learning_rate": 6.689375506893756e-07, "loss": 0.5584, "step": 33220 }, { "epoch": 0.969927885317217, "grad_norm": 0.6278687236341377, "learning_rate": 6.682887266828874e-07, "loss": 0.6024, "step": 33221 }, { "epoch": 0.9699570815450643, "grad_norm": 0.6348820382154514, "learning_rate": 6.676399026763991e-07, "loss": 0.5793, "step": 33222 }, { "epoch": 0.9699862777729117, "grad_norm": 0.6388953485144628, "learning_rate": 6.669910786699109e-07, "loss": 0.5819, "step": 33223 }, { "epoch": 0.970015474000759, "grad_norm": 0.6686973421059994, "learning_rate": 6.663422546634225e-07, "loss": 0.661, "step": 33224 }, { "epoch": 0.9700446702286064, "grad_norm": 0.5895925615842891, "learning_rate": 6.656934306569343e-07, "loss": 0.5004, "step": 33225 }, { "epoch": 0.9700738664564539, "grad_norm": 0.6667105788440392, "learning_rate": 6.650446066504461e-07, "loss": 0.5952, "step": 33226 }, { "epoch": 0.9701030626843012, "grad_norm": 0.6844030036501659, "learning_rate": 6.643957826439578e-07, "loss": 0.6794, "step": 33227 }, { "epoch": 0.9701322589121486, "grad_norm": 0.6745142459778564, "learning_rate": 6.637469586374697e-07, "loss": 0.6269, "step": 33228 }, { "epoch": 0.970161455139996, "grad_norm": 0.6207717900166088, "learning_rate": 6.630981346309815e-07, "loss": 0.5533, "step": 33229 }, { "epoch": 0.9701906513678433, "grad_norm": 0.6411482728688942, "learning_rate": 6.624493106244932e-07, "loss": 0.6177, "step": 33230 }, { "epoch": 0.9702198475956907, "grad_norm": 0.5880620659948756, "learning_rate": 6.618004866180049e-07, "loss": 0.5109, "step": 33231 }, { "epoch": 0.970249043823538, "grad_norm": 0.6142396912762795, "learning_rate": 6.611516626115166e-07, "loss": 0.5515, "step": 33232 }, { "epoch": 0.9702782400513854, "grad_norm": 0.6384759585339564, "learning_rate": 6.605028386050284e-07, "loss": 0.6038, "step": 33233 }, { "epoch": 0.9703074362792328, "grad_norm": 0.6646494573283503, "learning_rate": 6.598540145985402e-07, "loss": 0.6532, "step": 33234 }, { "epoch": 0.9703366325070801, "grad_norm": 0.696146504208348, "learning_rate": 6.59205190592052e-07, "loss": 0.6705, "step": 33235 }, { "epoch": 0.9703658287349275, "grad_norm": 0.6620422375930206, "learning_rate": 6.585563665855638e-07, "loss": 0.6355, "step": 33236 }, { "epoch": 0.9703950249627749, "grad_norm": 0.627268439076021, "learning_rate": 6.579075425790756e-07, "loss": 0.5628, "step": 33237 }, { "epoch": 0.9704242211906222, "grad_norm": 0.632436944871228, "learning_rate": 6.572587185725872e-07, "loss": 0.5697, "step": 33238 }, { "epoch": 0.9704534174184696, "grad_norm": 0.6564328869659616, "learning_rate": 6.56609894566099e-07, "loss": 0.6179, "step": 33239 }, { "epoch": 0.9704826136463169, "grad_norm": 0.6288941703638109, "learning_rate": 6.559610705596107e-07, "loss": 0.5547, "step": 33240 }, { "epoch": 0.9705118098741643, "grad_norm": 0.6263473520524268, "learning_rate": 6.553122465531225e-07, "loss": 0.524, "step": 33241 }, { "epoch": 0.9705410061020117, "grad_norm": 0.6384307317803953, "learning_rate": 6.546634225466343e-07, "loss": 0.6225, "step": 33242 }, { "epoch": 0.970570202329859, "grad_norm": 0.689957350546849, "learning_rate": 6.54014598540146e-07, "loss": 0.6664, "step": 33243 }, { "epoch": 0.9705993985577064, "grad_norm": 0.7079284347157426, "learning_rate": 6.533657745336578e-07, "loss": 0.5993, "step": 33244 }, { "epoch": 0.9706285947855537, "grad_norm": 0.6397272964411197, "learning_rate": 6.527169505271695e-07, "loss": 0.5835, "step": 33245 }, { "epoch": 0.9706577910134011, "grad_norm": 0.6465502780728004, "learning_rate": 6.520681265206813e-07, "loss": 0.592, "step": 33246 }, { "epoch": 0.9706869872412485, "grad_norm": 0.6618988722596639, "learning_rate": 6.514193025141931e-07, "loss": 0.6132, "step": 33247 }, { "epoch": 0.9707161834690958, "grad_norm": 0.6784828144135872, "learning_rate": 6.507704785077048e-07, "loss": 0.6138, "step": 33248 }, { "epoch": 0.9707453796969432, "grad_norm": 0.5949131827574384, "learning_rate": 6.501216545012166e-07, "loss": 0.4929, "step": 33249 }, { "epoch": 0.9707745759247906, "grad_norm": 0.6402360854212449, "learning_rate": 6.494728304947284e-07, "loss": 0.5572, "step": 33250 }, { "epoch": 0.9708037721526379, "grad_norm": 0.6868954221979765, "learning_rate": 6.4882400648824e-07, "loss": 0.6195, "step": 33251 }, { "epoch": 0.9708329683804853, "grad_norm": 0.6651115763010943, "learning_rate": 6.481751824817519e-07, "loss": 0.5857, "step": 33252 }, { "epoch": 0.9708621646083326, "grad_norm": 0.6825341880204584, "learning_rate": 6.475263584752637e-07, "loss": 0.6605, "step": 33253 }, { "epoch": 0.97089136083618, "grad_norm": 0.5871610378304326, "learning_rate": 6.468775344687754e-07, "loss": 0.4706, "step": 33254 }, { "epoch": 0.9709205570640274, "grad_norm": 0.6494153349108939, "learning_rate": 6.462287104622872e-07, "loss": 0.6235, "step": 33255 }, { "epoch": 0.9709497532918747, "grad_norm": 0.6369716117778439, "learning_rate": 6.45579886455799e-07, "loss": 0.5804, "step": 33256 }, { "epoch": 0.9709789495197221, "grad_norm": 0.6433246203971179, "learning_rate": 6.449310624493106e-07, "loss": 0.6074, "step": 33257 }, { "epoch": 0.9710081457475694, "grad_norm": 0.6290278907759048, "learning_rate": 6.442822384428224e-07, "loss": 0.5844, "step": 33258 }, { "epoch": 0.9710373419754168, "grad_norm": 0.6036557229502113, "learning_rate": 6.436334144363341e-07, "loss": 0.5176, "step": 33259 }, { "epoch": 0.9710665382032642, "grad_norm": 0.656777063134979, "learning_rate": 6.42984590429846e-07, "loss": 0.6058, "step": 33260 }, { "epoch": 0.9710957344311115, "grad_norm": 0.6596422363721614, "learning_rate": 6.423357664233578e-07, "loss": 0.6032, "step": 33261 }, { "epoch": 0.9711249306589589, "grad_norm": 0.7441090937077307, "learning_rate": 6.416869424168695e-07, "loss": 0.7263, "step": 33262 }, { "epoch": 0.9711541268868062, "grad_norm": 0.6273537048617781, "learning_rate": 6.410381184103813e-07, "loss": 0.5766, "step": 33263 }, { "epoch": 0.9711833231146536, "grad_norm": 0.6294094711013004, "learning_rate": 6.403892944038929e-07, "loss": 0.5681, "step": 33264 }, { "epoch": 0.971212519342501, "grad_norm": 0.6880442925884757, "learning_rate": 6.397404703974047e-07, "loss": 0.6442, "step": 33265 }, { "epoch": 0.9712417155703483, "grad_norm": 0.6557965995535401, "learning_rate": 6.390916463909165e-07, "loss": 0.6303, "step": 33266 }, { "epoch": 0.9712709117981957, "grad_norm": 0.6729129252365244, "learning_rate": 6.384428223844282e-07, "loss": 0.6211, "step": 33267 }, { "epoch": 0.971300108026043, "grad_norm": 0.6193099767355523, "learning_rate": 6.377939983779401e-07, "loss": 0.6021, "step": 33268 }, { "epoch": 0.9713293042538904, "grad_norm": 0.6652609601322809, "learning_rate": 6.371451743714519e-07, "loss": 0.6304, "step": 33269 }, { "epoch": 0.9713585004817378, "grad_norm": 0.6106329848186446, "learning_rate": 6.364963503649635e-07, "loss": 0.5237, "step": 33270 }, { "epoch": 0.9713876967095851, "grad_norm": 0.6401960756122861, "learning_rate": 6.358475263584753e-07, "loss": 0.5679, "step": 33271 }, { "epoch": 0.9714168929374325, "grad_norm": 0.6029153619590757, "learning_rate": 6.35198702351987e-07, "loss": 0.4649, "step": 33272 }, { "epoch": 0.9714460891652799, "grad_norm": 0.5929072200236566, "learning_rate": 6.345498783454988e-07, "loss": 0.5114, "step": 33273 }, { "epoch": 0.9714752853931272, "grad_norm": 0.6358345873705402, "learning_rate": 6.339010543390106e-07, "loss": 0.6217, "step": 33274 }, { "epoch": 0.9715044816209746, "grad_norm": 0.6367525086250041, "learning_rate": 6.332522303325223e-07, "loss": 0.5482, "step": 33275 }, { "epoch": 0.9715336778488219, "grad_norm": 0.6272392951536647, "learning_rate": 6.326034063260342e-07, "loss": 0.5597, "step": 33276 }, { "epoch": 0.9715628740766693, "grad_norm": 0.6482314669483192, "learning_rate": 6.319545823195459e-07, "loss": 0.5974, "step": 33277 }, { "epoch": 0.9715920703045167, "grad_norm": 0.6380727722586826, "learning_rate": 6.313057583130576e-07, "loss": 0.5861, "step": 33278 }, { "epoch": 0.971621266532364, "grad_norm": 0.6301019231533654, "learning_rate": 6.306569343065694e-07, "loss": 0.5987, "step": 33279 }, { "epoch": 0.9716504627602114, "grad_norm": 0.6752624604212508, "learning_rate": 6.300081103000812e-07, "loss": 0.6433, "step": 33280 }, { "epoch": 0.9716796589880587, "grad_norm": 0.6664654172308733, "learning_rate": 6.293592862935929e-07, "loss": 0.6472, "step": 33281 }, { "epoch": 0.9717088552159061, "grad_norm": 0.6256897238468098, "learning_rate": 6.287104622871047e-07, "loss": 0.5173, "step": 33282 }, { "epoch": 0.9717380514437535, "grad_norm": 0.6445636539516361, "learning_rate": 6.280616382806165e-07, "loss": 0.654, "step": 33283 }, { "epoch": 0.9717672476716008, "grad_norm": 0.5988051620891414, "learning_rate": 6.274128142741282e-07, "loss": 0.5356, "step": 33284 }, { "epoch": 0.9717964438994482, "grad_norm": 0.655679522967888, "learning_rate": 6.2676399026764e-07, "loss": 0.5881, "step": 33285 }, { "epoch": 0.9718256401272956, "grad_norm": 0.6561971796246059, "learning_rate": 6.261151662611517e-07, "loss": 0.6528, "step": 33286 }, { "epoch": 0.9718548363551429, "grad_norm": 0.636872812073094, "learning_rate": 6.254663422546635e-07, "loss": 0.5754, "step": 33287 }, { "epoch": 0.9718840325829903, "grad_norm": 0.6297871955187481, "learning_rate": 6.248175182481752e-07, "loss": 0.5826, "step": 33288 }, { "epoch": 0.9719132288108376, "grad_norm": 0.6577052224265968, "learning_rate": 6.24168694241687e-07, "loss": 0.6288, "step": 33289 }, { "epoch": 0.971942425038685, "grad_norm": 0.6896682750091834, "learning_rate": 6.235198702351987e-07, "loss": 0.6649, "step": 33290 }, { "epoch": 0.9719716212665324, "grad_norm": 0.6488640328676036, "learning_rate": 6.228710462287105e-07, "loss": 0.5844, "step": 33291 }, { "epoch": 0.9720008174943797, "grad_norm": 0.6324654857463641, "learning_rate": 6.222222222222223e-07, "loss": 0.5918, "step": 33292 }, { "epoch": 0.9720300137222271, "grad_norm": 0.6274468422583769, "learning_rate": 6.215733982157341e-07, "loss": 0.5826, "step": 33293 }, { "epoch": 0.9720592099500744, "grad_norm": 0.6619695390910063, "learning_rate": 6.209245742092458e-07, "loss": 0.6211, "step": 33294 }, { "epoch": 0.9720884061779218, "grad_norm": 0.6354186363247932, "learning_rate": 6.202757502027575e-07, "loss": 0.6043, "step": 33295 }, { "epoch": 0.9721176024057692, "grad_norm": 0.6146371573458304, "learning_rate": 6.196269261962693e-07, "loss": 0.5668, "step": 33296 }, { "epoch": 0.9721467986336165, "grad_norm": 0.6902711916309233, "learning_rate": 6.189781021897811e-07, "loss": 0.6658, "step": 33297 }, { "epoch": 0.9721759948614639, "grad_norm": 0.6406961322839125, "learning_rate": 6.183292781832928e-07, "loss": 0.5962, "step": 33298 }, { "epoch": 0.9722051910893112, "grad_norm": 0.6697862204356947, "learning_rate": 6.176804541768045e-07, "loss": 0.6632, "step": 33299 }, { "epoch": 0.9722343873171586, "grad_norm": 0.5991094440896846, "learning_rate": 6.170316301703164e-07, "loss": 0.543, "step": 33300 }, { "epoch": 0.972263583545006, "grad_norm": 0.6913266910742238, "learning_rate": 6.163828061638282e-07, "loss": 0.7081, "step": 33301 }, { "epoch": 0.9722927797728533, "grad_norm": 0.6456367580731595, "learning_rate": 6.157339821573399e-07, "loss": 0.5932, "step": 33302 }, { "epoch": 0.9723219760007007, "grad_norm": 0.6393840523687152, "learning_rate": 6.150851581508516e-07, "loss": 0.5568, "step": 33303 }, { "epoch": 0.972351172228548, "grad_norm": 0.6563871190143553, "learning_rate": 6.144363341443634e-07, "loss": 0.5937, "step": 33304 }, { "epoch": 0.9723803684563954, "grad_norm": 0.624104128437373, "learning_rate": 6.137875101378751e-07, "loss": 0.5413, "step": 33305 }, { "epoch": 0.9724095646842428, "grad_norm": 0.6347596727264044, "learning_rate": 6.131386861313869e-07, "loss": 0.6303, "step": 33306 }, { "epoch": 0.9724387609120901, "grad_norm": 0.6399441568085469, "learning_rate": 6.124898621248987e-07, "loss": 0.5409, "step": 33307 }, { "epoch": 0.9724679571399375, "grad_norm": 0.6563439165748329, "learning_rate": 6.118410381184105e-07, "loss": 0.5373, "step": 33308 }, { "epoch": 0.9724971533677849, "grad_norm": 0.626433866954912, "learning_rate": 6.111922141119222e-07, "loss": 0.5354, "step": 33309 }, { "epoch": 0.9725263495956322, "grad_norm": 0.6159055746909426, "learning_rate": 6.10543390105434e-07, "loss": 0.5473, "step": 33310 }, { "epoch": 0.9725555458234796, "grad_norm": 0.6015883455486344, "learning_rate": 6.098945660989457e-07, "loss": 0.5061, "step": 33311 }, { "epoch": 0.972584742051327, "grad_norm": 0.6319488414099109, "learning_rate": 6.092457420924575e-07, "loss": 0.5705, "step": 33312 }, { "epoch": 0.9726139382791743, "grad_norm": 0.6349508214339371, "learning_rate": 6.085969180859692e-07, "loss": 0.6147, "step": 33313 }, { "epoch": 0.9726431345070217, "grad_norm": 0.688926551194697, "learning_rate": 6.07948094079481e-07, "loss": 0.6767, "step": 33314 }, { "epoch": 0.972672330734869, "grad_norm": 0.6770162055250742, "learning_rate": 6.072992700729927e-07, "loss": 0.6512, "step": 33315 }, { "epoch": 0.9727015269627164, "grad_norm": 0.6168473549704098, "learning_rate": 6.066504460665046e-07, "loss": 0.528, "step": 33316 }, { "epoch": 0.9727307231905638, "grad_norm": 0.6094729529053742, "learning_rate": 6.060016220600163e-07, "loss": 0.5398, "step": 33317 }, { "epoch": 0.9727599194184111, "grad_norm": 0.6916128811586274, "learning_rate": 6.05352798053528e-07, "loss": 0.6854, "step": 33318 }, { "epoch": 0.9727891156462585, "grad_norm": 0.6330746144112627, "learning_rate": 6.047039740470398e-07, "loss": 0.5668, "step": 33319 }, { "epoch": 0.9728183118741058, "grad_norm": 0.6779577551731278, "learning_rate": 6.040551500405516e-07, "loss": 0.6495, "step": 33320 }, { "epoch": 0.9728475081019532, "grad_norm": 0.6772478410205593, "learning_rate": 6.034063260340633e-07, "loss": 0.6818, "step": 33321 }, { "epoch": 0.9728767043298006, "grad_norm": 0.6330522991071902, "learning_rate": 6.02757502027575e-07, "loss": 0.5983, "step": 33322 }, { "epoch": 0.9729059005576479, "grad_norm": 0.6611382231555964, "learning_rate": 6.021086780210868e-07, "loss": 0.6107, "step": 33323 }, { "epoch": 0.9729350967854953, "grad_norm": 0.6727572232289561, "learning_rate": 6.014598540145987e-07, "loss": 0.6011, "step": 33324 }, { "epoch": 0.9729642930133426, "grad_norm": 0.6103529540947155, "learning_rate": 6.008110300081104e-07, "loss": 0.521, "step": 33325 }, { "epoch": 0.97299348924119, "grad_norm": 0.6183364872083463, "learning_rate": 6.001622060016221e-07, "loss": 0.5737, "step": 33326 }, { "epoch": 0.9730226854690374, "grad_norm": 0.6703982028370536, "learning_rate": 5.995133819951339e-07, "loss": 0.6598, "step": 33327 }, { "epoch": 0.9730518816968847, "grad_norm": 0.6467733143971509, "learning_rate": 5.988645579886456e-07, "loss": 0.5875, "step": 33328 }, { "epoch": 0.9730810779247321, "grad_norm": 0.6707941006280116, "learning_rate": 5.982157339821574e-07, "loss": 0.5497, "step": 33329 }, { "epoch": 0.9731102741525794, "grad_norm": 0.6188864268164719, "learning_rate": 5.975669099756691e-07, "loss": 0.542, "step": 33330 }, { "epoch": 0.9731394703804268, "grad_norm": 0.6128406663273104, "learning_rate": 5.969180859691808e-07, "loss": 0.5043, "step": 33331 }, { "epoch": 0.9731686666082742, "grad_norm": 0.6707714770033515, "learning_rate": 5.962692619626926e-07, "loss": 0.6269, "step": 33332 }, { "epoch": 0.9731978628361215, "grad_norm": 0.6574671225764769, "learning_rate": 5.956204379562045e-07, "loss": 0.6469, "step": 33333 }, { "epoch": 0.9732270590639689, "grad_norm": 0.6779725307832973, "learning_rate": 5.949716139497162e-07, "loss": 0.593, "step": 33334 }, { "epoch": 0.9732562552918163, "grad_norm": 0.642683888540177, "learning_rate": 5.94322789943228e-07, "loss": 0.5848, "step": 33335 }, { "epoch": 0.9732854515196636, "grad_norm": 0.6642129159154525, "learning_rate": 5.936739659367397e-07, "loss": 0.5963, "step": 33336 }, { "epoch": 0.973314647747511, "grad_norm": 0.6048514788170493, "learning_rate": 5.930251419302515e-07, "loss": 0.5283, "step": 33337 }, { "epoch": 0.9733438439753583, "grad_norm": 0.6849248236063936, "learning_rate": 5.923763179237632e-07, "loss": 0.6091, "step": 33338 }, { "epoch": 0.9733730402032057, "grad_norm": 0.6805743685788778, "learning_rate": 5.91727493917275e-07, "loss": 0.6091, "step": 33339 }, { "epoch": 0.9734022364310531, "grad_norm": 0.6557414089944305, "learning_rate": 5.910786699107867e-07, "loss": 0.569, "step": 33340 }, { "epoch": 0.9734314326589004, "grad_norm": 0.6421772377707341, "learning_rate": 5.904298459042985e-07, "loss": 0.5888, "step": 33341 }, { "epoch": 0.9734606288867478, "grad_norm": 0.6217826976533903, "learning_rate": 5.897810218978103e-07, "loss": 0.5514, "step": 33342 }, { "epoch": 0.9734898251145951, "grad_norm": 0.6559208224913432, "learning_rate": 5.891321978913221e-07, "loss": 0.6297, "step": 33343 }, { "epoch": 0.9735190213424425, "grad_norm": 0.66625044141926, "learning_rate": 5.884833738848338e-07, "loss": 0.6416, "step": 33344 }, { "epoch": 0.9735482175702899, "grad_norm": 0.6800900669488431, "learning_rate": 5.878345498783455e-07, "loss": 0.6128, "step": 33345 }, { "epoch": 0.9735774137981372, "grad_norm": 0.6564556750188363, "learning_rate": 5.871857258718573e-07, "loss": 0.6197, "step": 33346 }, { "epoch": 0.9736066100259847, "grad_norm": 0.6183899521917252, "learning_rate": 5.86536901865369e-07, "loss": 0.5483, "step": 33347 }, { "epoch": 0.9736358062538321, "grad_norm": 0.6015570564405144, "learning_rate": 5.858880778588808e-07, "loss": 0.5638, "step": 33348 }, { "epoch": 0.9736650024816794, "grad_norm": 0.5938177736173822, "learning_rate": 5.852392538523926e-07, "loss": 0.5134, "step": 33349 }, { "epoch": 0.9736941987095268, "grad_norm": 0.6088168876314513, "learning_rate": 5.845904298459044e-07, "loss": 0.5525, "step": 33350 }, { "epoch": 0.9737233949373741, "grad_norm": 0.6242072709531209, "learning_rate": 5.839416058394161e-07, "loss": 0.5329, "step": 33351 }, { "epoch": 0.9737525911652215, "grad_norm": 0.6233386265032056, "learning_rate": 5.832927818329279e-07, "loss": 0.5665, "step": 33352 }, { "epoch": 0.9737817873930689, "grad_norm": 0.6792460457227998, "learning_rate": 5.826439578264396e-07, "loss": 0.6335, "step": 33353 }, { "epoch": 0.9738109836209162, "grad_norm": 0.6302912488917882, "learning_rate": 5.819951338199513e-07, "loss": 0.5679, "step": 33354 }, { "epoch": 0.9738401798487636, "grad_norm": 0.5886687522703606, "learning_rate": 5.813463098134631e-07, "loss": 0.5047, "step": 33355 }, { "epoch": 0.973869376076611, "grad_norm": 0.6347625699950343, "learning_rate": 5.806974858069749e-07, "loss": 0.5399, "step": 33356 }, { "epoch": 0.9738985723044583, "grad_norm": 0.5926689762928706, "learning_rate": 5.800486618004867e-07, "loss": 0.5305, "step": 33357 }, { "epoch": 0.9739277685323057, "grad_norm": 0.6488590085610653, "learning_rate": 5.793998377939984e-07, "loss": 0.6301, "step": 33358 }, { "epoch": 0.973956964760153, "grad_norm": 0.6495859021573474, "learning_rate": 5.787510137875102e-07, "loss": 0.6392, "step": 33359 }, { "epoch": 0.9739861609880004, "grad_norm": 0.6316183383181316, "learning_rate": 5.78102189781022e-07, "loss": 0.5662, "step": 33360 }, { "epoch": 0.9740153572158478, "grad_norm": 0.6378589889760593, "learning_rate": 5.774533657745337e-07, "loss": 0.5422, "step": 33361 }, { "epoch": 0.9740445534436951, "grad_norm": 0.636684416966848, "learning_rate": 5.768045417680455e-07, "loss": 0.56, "step": 33362 }, { "epoch": 0.9740737496715425, "grad_norm": 0.6132682238605537, "learning_rate": 5.761557177615572e-07, "loss": 0.5168, "step": 33363 }, { "epoch": 0.9741029458993898, "grad_norm": 0.6065788945192386, "learning_rate": 5.755068937550689e-07, "loss": 0.5172, "step": 33364 }, { "epoch": 0.9741321421272372, "grad_norm": 0.6592292827528361, "learning_rate": 5.748580697485808e-07, "loss": 0.6226, "step": 33365 }, { "epoch": 0.9741613383550846, "grad_norm": 0.6532649839394308, "learning_rate": 5.742092457420926e-07, "loss": 0.5739, "step": 33366 }, { "epoch": 0.9741905345829319, "grad_norm": 0.6319626146355675, "learning_rate": 5.735604217356043e-07, "loss": 0.5508, "step": 33367 }, { "epoch": 0.9742197308107793, "grad_norm": 0.6567325785038425, "learning_rate": 5.72911597729116e-07, "loss": 0.5867, "step": 33368 }, { "epoch": 0.9742489270386266, "grad_norm": 0.5959953910174426, "learning_rate": 5.722627737226278e-07, "loss": 0.5314, "step": 33369 }, { "epoch": 0.974278123266474, "grad_norm": 0.6353740712701261, "learning_rate": 5.716139497161395e-07, "loss": 0.5476, "step": 33370 }, { "epoch": 0.9743073194943214, "grad_norm": 0.6262356619426837, "learning_rate": 5.709651257096513e-07, "loss": 0.5427, "step": 33371 }, { "epoch": 0.9743365157221687, "grad_norm": 0.6527590146408939, "learning_rate": 5.70316301703163e-07, "loss": 0.6195, "step": 33372 }, { "epoch": 0.9743657119500161, "grad_norm": 0.6011167905201962, "learning_rate": 5.696674776966749e-07, "loss": 0.5363, "step": 33373 }, { "epoch": 0.9743949081778635, "grad_norm": 0.6161892145829146, "learning_rate": 5.690186536901866e-07, "loss": 0.5302, "step": 33374 }, { "epoch": 0.9744241044057108, "grad_norm": 0.6695756430250758, "learning_rate": 5.683698296836984e-07, "loss": 0.6208, "step": 33375 }, { "epoch": 0.9744533006335582, "grad_norm": 0.6524708514937312, "learning_rate": 5.677210056772101e-07, "loss": 0.6063, "step": 33376 }, { "epoch": 0.9744824968614055, "grad_norm": 0.6725393681091246, "learning_rate": 5.670721816707218e-07, "loss": 0.6359, "step": 33377 }, { "epoch": 0.9745116930892529, "grad_norm": 0.6619001387289908, "learning_rate": 5.664233576642336e-07, "loss": 0.6292, "step": 33378 }, { "epoch": 0.9745408893171003, "grad_norm": 0.6311883387907613, "learning_rate": 5.657745336577454e-07, "loss": 0.5924, "step": 33379 }, { "epoch": 0.9745700855449476, "grad_norm": 0.642918194866968, "learning_rate": 5.651257096512571e-07, "loss": 0.5745, "step": 33380 }, { "epoch": 0.974599281772795, "grad_norm": 0.6287466821033629, "learning_rate": 5.644768856447689e-07, "loss": 0.5775, "step": 33381 }, { "epoch": 0.9746284780006423, "grad_norm": 0.6295326172875415, "learning_rate": 5.638280616382807e-07, "loss": 0.5682, "step": 33382 }, { "epoch": 0.9746576742284897, "grad_norm": 0.6670652149357532, "learning_rate": 5.631792376317925e-07, "loss": 0.6332, "step": 33383 }, { "epoch": 0.9746868704563371, "grad_norm": 0.6214228523258005, "learning_rate": 5.625304136253042e-07, "loss": 0.5891, "step": 33384 }, { "epoch": 0.9747160666841844, "grad_norm": 0.6853608597003528, "learning_rate": 5.618815896188159e-07, "loss": 0.6331, "step": 33385 }, { "epoch": 0.9747452629120318, "grad_norm": 0.6342145951927686, "learning_rate": 5.612327656123277e-07, "loss": 0.597, "step": 33386 }, { "epoch": 0.9747744591398791, "grad_norm": 0.6781006725412684, "learning_rate": 5.605839416058394e-07, "loss": 0.5862, "step": 33387 }, { "epoch": 0.9748036553677265, "grad_norm": 0.6539192559849542, "learning_rate": 5.599351175993512e-07, "loss": 0.5918, "step": 33388 }, { "epoch": 0.9748328515955739, "grad_norm": 0.692705303423029, "learning_rate": 5.59286293592863e-07, "loss": 0.6867, "step": 33389 }, { "epoch": 0.9748620478234212, "grad_norm": 0.7046116444726193, "learning_rate": 5.586374695863748e-07, "loss": 0.6443, "step": 33390 }, { "epoch": 0.9748912440512686, "grad_norm": 0.64639650364009, "learning_rate": 5.579886455798865e-07, "loss": 0.5987, "step": 33391 }, { "epoch": 0.974920440279116, "grad_norm": 0.6573792294010811, "learning_rate": 5.573398215733983e-07, "loss": 0.6489, "step": 33392 }, { "epoch": 0.9749496365069633, "grad_norm": 0.6517971846891785, "learning_rate": 5.5669099756691e-07, "loss": 0.6029, "step": 33393 }, { "epoch": 0.9749788327348107, "grad_norm": 0.6534365812936873, "learning_rate": 5.560421735604218e-07, "loss": 0.607, "step": 33394 }, { "epoch": 0.975008028962658, "grad_norm": 0.5982901976285598, "learning_rate": 5.553933495539335e-07, "loss": 0.5494, "step": 33395 }, { "epoch": 0.9750372251905054, "grad_norm": 0.632482302406373, "learning_rate": 5.547445255474453e-07, "loss": 0.6002, "step": 33396 }, { "epoch": 0.9750664214183528, "grad_norm": 0.6683755833989921, "learning_rate": 5.54095701540957e-07, "loss": 0.646, "step": 33397 }, { "epoch": 0.9750956176462001, "grad_norm": 0.6537044899074637, "learning_rate": 5.534468775344689e-07, "loss": 0.5916, "step": 33398 }, { "epoch": 0.9751248138740475, "grad_norm": 0.6678386852394195, "learning_rate": 5.527980535279806e-07, "loss": 0.6069, "step": 33399 }, { "epoch": 0.9751540101018948, "grad_norm": 0.7133754818298481, "learning_rate": 5.521492295214923e-07, "loss": 0.6951, "step": 33400 }, { "epoch": 0.9751832063297422, "grad_norm": 0.655044175284168, "learning_rate": 5.515004055150041e-07, "loss": 0.5678, "step": 33401 }, { "epoch": 0.9752124025575896, "grad_norm": 0.6809800077804368, "learning_rate": 5.508515815085159e-07, "loss": 0.6204, "step": 33402 }, { "epoch": 0.9752415987854369, "grad_norm": 0.6275598476961028, "learning_rate": 5.502027575020276e-07, "loss": 0.5194, "step": 33403 }, { "epoch": 0.9752707950132843, "grad_norm": 0.584003833168098, "learning_rate": 5.495539334955393e-07, "loss": 0.5176, "step": 33404 }, { "epoch": 0.9752999912411316, "grad_norm": 0.6304357034815464, "learning_rate": 5.489051094890512e-07, "loss": 0.5516, "step": 33405 }, { "epoch": 0.975329187468979, "grad_norm": 0.6867056662777689, "learning_rate": 5.48256285482563e-07, "loss": 0.6587, "step": 33406 }, { "epoch": 0.9753583836968264, "grad_norm": 0.6644348210310154, "learning_rate": 5.476074614760747e-07, "loss": 0.6566, "step": 33407 }, { "epoch": 0.9753875799246737, "grad_norm": 0.6525451877753033, "learning_rate": 5.469586374695864e-07, "loss": 0.6031, "step": 33408 }, { "epoch": 0.9754167761525211, "grad_norm": 0.6374077965706413, "learning_rate": 5.463098134630982e-07, "loss": 0.613, "step": 33409 }, { "epoch": 0.9754459723803685, "grad_norm": 0.6792143364117988, "learning_rate": 5.456609894566099e-07, "loss": 0.6081, "step": 33410 }, { "epoch": 0.9754751686082158, "grad_norm": 0.6632351027122921, "learning_rate": 5.450121654501217e-07, "loss": 0.6029, "step": 33411 }, { "epoch": 0.9755043648360632, "grad_norm": 0.7036961681457111, "learning_rate": 5.443633414436334e-07, "loss": 0.6673, "step": 33412 }, { "epoch": 0.9755335610639105, "grad_norm": 0.6192599458330926, "learning_rate": 5.437145174371451e-07, "loss": 0.5532, "step": 33413 }, { "epoch": 0.9755627572917579, "grad_norm": 0.6293152540767027, "learning_rate": 5.43065693430657e-07, "loss": 0.6005, "step": 33414 }, { "epoch": 0.9755919535196053, "grad_norm": 0.5921654239599254, "learning_rate": 5.424168694241688e-07, "loss": 0.4966, "step": 33415 }, { "epoch": 0.9756211497474526, "grad_norm": 0.6348211481619824, "learning_rate": 5.417680454176805e-07, "loss": 0.5981, "step": 33416 }, { "epoch": 0.9756503459753, "grad_norm": 0.6582181685504426, "learning_rate": 5.411192214111922e-07, "loss": 0.5832, "step": 33417 }, { "epoch": 0.9756795422031473, "grad_norm": 0.653764194946789, "learning_rate": 5.40470397404704e-07, "loss": 0.6272, "step": 33418 }, { "epoch": 0.9757087384309947, "grad_norm": 0.6340339958452543, "learning_rate": 5.398215733982158e-07, "loss": 0.547, "step": 33419 }, { "epoch": 0.9757379346588421, "grad_norm": 0.6829560079467186, "learning_rate": 5.391727493917275e-07, "loss": 0.6591, "step": 33420 }, { "epoch": 0.9757671308866894, "grad_norm": 1.1629387715868096, "learning_rate": 5.385239253852394e-07, "loss": 0.6172, "step": 33421 }, { "epoch": 0.9757963271145368, "grad_norm": 0.6653839875020432, "learning_rate": 5.378751013787511e-07, "loss": 0.6636, "step": 33422 }, { "epoch": 0.9758255233423841, "grad_norm": 0.6421609234937201, "learning_rate": 5.372262773722628e-07, "loss": 0.5346, "step": 33423 }, { "epoch": 0.9758547195702315, "grad_norm": 0.630845699538634, "learning_rate": 5.365774533657746e-07, "loss": 0.5827, "step": 33424 }, { "epoch": 0.9758839157980789, "grad_norm": 0.6378241213465081, "learning_rate": 5.359286293592864e-07, "loss": 0.5953, "step": 33425 }, { "epoch": 0.9759131120259262, "grad_norm": 0.70202711621975, "learning_rate": 5.352798053527981e-07, "loss": 0.6754, "step": 33426 }, { "epoch": 0.9759423082537736, "grad_norm": 0.6509304892231439, "learning_rate": 5.346309813463098e-07, "loss": 0.6131, "step": 33427 }, { "epoch": 0.975971504481621, "grad_norm": 0.6234212742043258, "learning_rate": 5.339821573398216e-07, "loss": 0.5519, "step": 33428 }, { "epoch": 0.9760007007094683, "grad_norm": 0.6297472041734405, "learning_rate": 5.333333333333335e-07, "loss": 0.5436, "step": 33429 }, { "epoch": 0.9760298969373157, "grad_norm": 0.6527827604343984, "learning_rate": 5.326845093268452e-07, "loss": 0.6258, "step": 33430 }, { "epoch": 0.976059093165163, "grad_norm": 0.8708254564784126, "learning_rate": 5.320356853203569e-07, "loss": 0.6226, "step": 33431 }, { "epoch": 0.9760882893930104, "grad_norm": 0.6504681452849557, "learning_rate": 5.313868613138687e-07, "loss": 0.6217, "step": 33432 }, { "epoch": 0.9761174856208578, "grad_norm": 0.6402291199054738, "learning_rate": 5.307380373073804e-07, "loss": 0.5563, "step": 33433 }, { "epoch": 0.9761466818487051, "grad_norm": 0.6552643020483969, "learning_rate": 5.300892133008922e-07, "loss": 0.6256, "step": 33434 }, { "epoch": 0.9761758780765525, "grad_norm": 0.6638908004836197, "learning_rate": 5.294403892944039e-07, "loss": 0.6368, "step": 33435 }, { "epoch": 0.9762050743043998, "grad_norm": 0.6620763705986407, "learning_rate": 5.287915652879156e-07, "loss": 0.5761, "step": 33436 }, { "epoch": 0.9762342705322472, "grad_norm": 0.607322930153761, "learning_rate": 5.281427412814274e-07, "loss": 0.5257, "step": 33437 }, { "epoch": 0.9762634667600946, "grad_norm": 0.6737083906946636, "learning_rate": 5.274939172749393e-07, "loss": 0.6271, "step": 33438 }, { "epoch": 0.9762926629879419, "grad_norm": 0.6264755402440548, "learning_rate": 5.26845093268451e-07, "loss": 0.5617, "step": 33439 }, { "epoch": 0.9763218592157893, "grad_norm": 0.6189887719471636, "learning_rate": 5.261962692619627e-07, "loss": 0.514, "step": 33440 }, { "epoch": 0.9763510554436367, "grad_norm": 0.6585314834853994, "learning_rate": 5.255474452554745e-07, "loss": 0.6124, "step": 33441 }, { "epoch": 0.976380251671484, "grad_norm": 0.6146880815007232, "learning_rate": 5.248986212489863e-07, "loss": 0.5446, "step": 33442 }, { "epoch": 0.9764094478993314, "grad_norm": 0.67397308313968, "learning_rate": 5.24249797242498e-07, "loss": 0.6303, "step": 33443 }, { "epoch": 0.9764386441271787, "grad_norm": 0.6171441797359376, "learning_rate": 5.236009732360097e-07, "loss": 0.4915, "step": 33444 }, { "epoch": 0.9764678403550261, "grad_norm": 0.6473340119017641, "learning_rate": 5.229521492295215e-07, "loss": 0.5694, "step": 33445 }, { "epoch": 0.9764970365828735, "grad_norm": 0.6444383893320719, "learning_rate": 5.223033252230333e-07, "loss": 0.5809, "step": 33446 }, { "epoch": 0.9765262328107208, "grad_norm": 0.6474210065366834, "learning_rate": 5.216545012165451e-07, "loss": 0.6021, "step": 33447 }, { "epoch": 0.9765554290385682, "grad_norm": 0.62805603045135, "learning_rate": 5.210056772100569e-07, "loss": 0.5846, "step": 33448 }, { "epoch": 0.9765846252664155, "grad_norm": 0.6711825584635766, "learning_rate": 5.203568532035686e-07, "loss": 0.6474, "step": 33449 }, { "epoch": 0.9766138214942629, "grad_norm": 0.6549625322231863, "learning_rate": 5.197080291970803e-07, "loss": 0.639, "step": 33450 }, { "epoch": 0.9766430177221103, "grad_norm": 0.6552065566934099, "learning_rate": 5.190592051905921e-07, "loss": 0.6381, "step": 33451 }, { "epoch": 0.9766722139499576, "grad_norm": 0.5964844546427563, "learning_rate": 5.184103811841038e-07, "loss": 0.5249, "step": 33452 }, { "epoch": 0.976701410177805, "grad_norm": 0.6104987359403942, "learning_rate": 5.177615571776156e-07, "loss": 0.5361, "step": 33453 }, { "epoch": 0.9767306064056523, "grad_norm": 0.6703151310618768, "learning_rate": 5.171127331711274e-07, "loss": 0.556, "step": 33454 }, { "epoch": 0.9767598026334997, "grad_norm": 0.6672747064223211, "learning_rate": 5.164639091646392e-07, "loss": 0.6156, "step": 33455 }, { "epoch": 0.9767889988613471, "grad_norm": 0.7068966554585007, "learning_rate": 5.158150851581509e-07, "loss": 0.6816, "step": 33456 }, { "epoch": 0.9768181950891944, "grad_norm": 0.6762497125729341, "learning_rate": 5.151662611516627e-07, "loss": 0.6355, "step": 33457 }, { "epoch": 0.9768473913170418, "grad_norm": 0.669516352942968, "learning_rate": 5.145174371451744e-07, "loss": 0.6776, "step": 33458 }, { "epoch": 0.9768765875448892, "grad_norm": 0.5992324926516067, "learning_rate": 5.138686131386861e-07, "loss": 0.5465, "step": 33459 }, { "epoch": 0.9769057837727365, "grad_norm": 0.6139247967160943, "learning_rate": 5.132197891321979e-07, "loss": 0.5159, "step": 33460 }, { "epoch": 0.9769349800005839, "grad_norm": 0.6484368031761777, "learning_rate": 5.125709651257097e-07, "loss": 0.6162, "step": 33461 }, { "epoch": 0.9769641762284312, "grad_norm": 0.6851668720234523, "learning_rate": 5.119221411192215e-07, "loss": 0.5693, "step": 33462 }, { "epoch": 0.9769933724562786, "grad_norm": 0.614841808661534, "learning_rate": 5.112733171127332e-07, "loss": 0.5043, "step": 33463 }, { "epoch": 0.977022568684126, "grad_norm": 0.5960355601358152, "learning_rate": 5.10624493106245e-07, "loss": 0.5163, "step": 33464 }, { "epoch": 0.9770517649119733, "grad_norm": 0.5966356380144046, "learning_rate": 5.099756690997568e-07, "loss": 0.527, "step": 33465 }, { "epoch": 0.9770809611398207, "grad_norm": 0.6514972362396336, "learning_rate": 5.093268450932685e-07, "loss": 0.6241, "step": 33466 }, { "epoch": 0.977110157367668, "grad_norm": 0.6547208615881845, "learning_rate": 5.086780210867802e-07, "loss": 0.6112, "step": 33467 }, { "epoch": 0.9771393535955155, "grad_norm": 0.6879207752642633, "learning_rate": 5.08029197080292e-07, "loss": 0.613, "step": 33468 }, { "epoch": 0.9771685498233629, "grad_norm": 0.6473330987585294, "learning_rate": 5.073803730738037e-07, "loss": 0.5999, "step": 33469 }, { "epoch": 0.9771977460512102, "grad_norm": 0.7190976449987032, "learning_rate": 5.067315490673156e-07, "loss": 0.5919, "step": 33470 }, { "epoch": 0.9772269422790576, "grad_norm": 0.6988046504298884, "learning_rate": 5.060827250608273e-07, "loss": 0.6771, "step": 33471 }, { "epoch": 0.977256138506905, "grad_norm": 0.6441954303135146, "learning_rate": 5.05433901054339e-07, "loss": 0.6082, "step": 33472 }, { "epoch": 0.9772853347347523, "grad_norm": 0.5941038786416696, "learning_rate": 5.047850770478508e-07, "loss": 0.5183, "step": 33473 }, { "epoch": 0.9773145309625997, "grad_norm": 0.643849931449267, "learning_rate": 5.041362530413626e-07, "loss": 0.6158, "step": 33474 }, { "epoch": 0.977343727190447, "grad_norm": 0.6034608117318193, "learning_rate": 5.034874290348743e-07, "loss": 0.5018, "step": 33475 }, { "epoch": 0.9773729234182944, "grad_norm": 0.6166466550345054, "learning_rate": 5.028386050283861e-07, "loss": 0.5339, "step": 33476 }, { "epoch": 0.9774021196461418, "grad_norm": 0.6241724432948218, "learning_rate": 5.021897810218978e-07, "loss": 0.5737, "step": 33477 }, { "epoch": 0.9774313158739891, "grad_norm": 0.6601490617835016, "learning_rate": 5.015409570154097e-07, "loss": 0.6302, "step": 33478 }, { "epoch": 0.9774605121018365, "grad_norm": 0.616561168283818, "learning_rate": 5.008921330089214e-07, "loss": 0.5279, "step": 33479 }, { "epoch": 0.9774897083296838, "grad_norm": 0.6892895878230488, "learning_rate": 5.002433090024332e-07, "loss": 0.659, "step": 33480 }, { "epoch": 0.9775189045575312, "grad_norm": 0.6351623617806269, "learning_rate": 4.995944849959449e-07, "loss": 0.575, "step": 33481 }, { "epoch": 0.9775481007853786, "grad_norm": 0.6939638952038282, "learning_rate": 4.989456609894566e-07, "loss": 0.6549, "step": 33482 }, { "epoch": 0.9775772970132259, "grad_norm": 0.6322748457486569, "learning_rate": 4.982968369829684e-07, "loss": 0.5656, "step": 33483 }, { "epoch": 0.9776064932410733, "grad_norm": 0.6379667915313632, "learning_rate": 4.976480129764802e-07, "loss": 0.555, "step": 33484 }, { "epoch": 0.9776356894689207, "grad_norm": 0.6992498270159018, "learning_rate": 4.969991889699919e-07, "loss": 0.6733, "step": 33485 }, { "epoch": 0.977664885696768, "grad_norm": 0.6803760077765272, "learning_rate": 4.963503649635036e-07, "loss": 0.6429, "step": 33486 }, { "epoch": 0.9776940819246154, "grad_norm": 0.636174664634321, "learning_rate": 4.957015409570155e-07, "loss": 0.6151, "step": 33487 }, { "epoch": 0.9777232781524627, "grad_norm": 0.6524701319010854, "learning_rate": 4.950527169505273e-07, "loss": 0.5871, "step": 33488 }, { "epoch": 0.9777524743803101, "grad_norm": 0.6462836715641408, "learning_rate": 4.94403892944039e-07, "loss": 0.5453, "step": 33489 }, { "epoch": 0.9777816706081575, "grad_norm": 0.6103052999943285, "learning_rate": 4.937550689375507e-07, "loss": 0.5194, "step": 33490 }, { "epoch": 0.9778108668360048, "grad_norm": 0.6786722923593985, "learning_rate": 4.931062449310625e-07, "loss": 0.6257, "step": 33491 }, { "epoch": 0.9778400630638522, "grad_norm": 0.698723470060054, "learning_rate": 4.924574209245742e-07, "loss": 0.674, "step": 33492 }, { "epoch": 0.9778692592916995, "grad_norm": 0.6516724776875447, "learning_rate": 4.91808596918086e-07, "loss": 0.5642, "step": 33493 }, { "epoch": 0.9778984555195469, "grad_norm": 0.609408433625895, "learning_rate": 4.911597729115977e-07, "loss": 0.5538, "step": 33494 }, { "epoch": 0.9779276517473943, "grad_norm": 0.6999657148645435, "learning_rate": 4.905109489051095e-07, "loss": 0.5989, "step": 33495 }, { "epoch": 0.9779568479752416, "grad_norm": 0.6686493455151019, "learning_rate": 4.898621248986213e-07, "loss": 0.6292, "step": 33496 }, { "epoch": 0.977986044203089, "grad_norm": 0.6638702864875882, "learning_rate": 4.892133008921331e-07, "loss": 0.5968, "step": 33497 }, { "epoch": 0.9780152404309364, "grad_norm": 0.6631327896914544, "learning_rate": 4.885644768856448e-07, "loss": 0.6259, "step": 33498 }, { "epoch": 0.9780444366587837, "grad_norm": 0.6244295084394614, "learning_rate": 4.879156528791565e-07, "loss": 0.5621, "step": 33499 }, { "epoch": 0.9780736328866311, "grad_norm": 0.6673011811969295, "learning_rate": 4.872668288726683e-07, "loss": 0.6133, "step": 33500 }, { "epoch": 0.9781028291144784, "grad_norm": 0.6047993545280198, "learning_rate": 4.866180048661801e-07, "loss": 0.5309, "step": 33501 }, { "epoch": 0.9781320253423258, "grad_norm": 0.6810552267503112, "learning_rate": 4.859691808596918e-07, "loss": 0.669, "step": 33502 }, { "epoch": 0.9781612215701732, "grad_norm": 0.681677950825889, "learning_rate": 4.853203568532037e-07, "loss": 0.6254, "step": 33503 }, { "epoch": 0.9781904177980205, "grad_norm": 0.6131193934771142, "learning_rate": 4.846715328467154e-07, "loss": 0.5803, "step": 33504 }, { "epoch": 0.9782196140258679, "grad_norm": 0.6752946800142748, "learning_rate": 4.840227088402271e-07, "loss": 0.6652, "step": 33505 }, { "epoch": 0.9782488102537152, "grad_norm": 0.6242492089583728, "learning_rate": 4.833738848337389e-07, "loss": 0.4939, "step": 33506 }, { "epoch": 0.9782780064815626, "grad_norm": 0.6484879219107265, "learning_rate": 4.827250608272507e-07, "loss": 0.6212, "step": 33507 }, { "epoch": 0.97830720270941, "grad_norm": 0.6640296693867225, "learning_rate": 4.820762368207624e-07, "loss": 0.6198, "step": 33508 }, { "epoch": 0.9783363989372573, "grad_norm": 0.6542370625215553, "learning_rate": 4.814274128142741e-07, "loss": 0.6069, "step": 33509 }, { "epoch": 0.9783655951651047, "grad_norm": 0.6582852686324168, "learning_rate": 4.80778588807786e-07, "loss": 0.6045, "step": 33510 }, { "epoch": 0.978394791392952, "grad_norm": 0.6728019254702555, "learning_rate": 4.801297648012978e-07, "loss": 0.6652, "step": 33511 }, { "epoch": 0.9784239876207994, "grad_norm": 0.6360940833826865, "learning_rate": 4.794809407948095e-07, "loss": 0.5662, "step": 33512 }, { "epoch": 0.9784531838486468, "grad_norm": 0.6263433053397194, "learning_rate": 4.788321167883212e-07, "loss": 0.5638, "step": 33513 }, { "epoch": 0.9784823800764941, "grad_norm": 0.6817771381413118, "learning_rate": 4.78183292781833e-07, "loss": 0.6376, "step": 33514 }, { "epoch": 0.9785115763043415, "grad_norm": 0.6188311361042711, "learning_rate": 4.775344687753447e-07, "loss": 0.5581, "step": 33515 }, { "epoch": 0.9785407725321889, "grad_norm": 0.6237744765192369, "learning_rate": 4.768856447688565e-07, "loss": 0.5528, "step": 33516 }, { "epoch": 0.9785699687600362, "grad_norm": 0.6590170541029461, "learning_rate": 4.762368207623683e-07, "loss": 0.6076, "step": 33517 }, { "epoch": 0.9785991649878836, "grad_norm": 0.69174802883231, "learning_rate": 4.7558799675588e-07, "loss": 0.6729, "step": 33518 }, { "epoch": 0.9786283612157309, "grad_norm": 0.56661841799213, "learning_rate": 4.7493917274939176e-07, "loss": 0.4849, "step": 33519 }, { "epoch": 0.9786575574435783, "grad_norm": 0.6273835789986287, "learning_rate": 4.742903487429035e-07, "loss": 0.5305, "step": 33520 }, { "epoch": 0.9786867536714257, "grad_norm": 0.614986843141394, "learning_rate": 4.7364152473641523e-07, "loss": 0.5276, "step": 33521 }, { "epoch": 0.978715949899273, "grad_norm": 0.6286975675710291, "learning_rate": 4.7299270072992705e-07, "loss": 0.5949, "step": 33522 }, { "epoch": 0.9787451461271204, "grad_norm": 0.6454161523784264, "learning_rate": 4.723438767234388e-07, "loss": 0.6331, "step": 33523 }, { "epoch": 0.9787743423549677, "grad_norm": 0.6687070892069105, "learning_rate": 4.716950527169506e-07, "loss": 0.6059, "step": 33524 }, { "epoch": 0.9788035385828151, "grad_norm": 0.6646574354682082, "learning_rate": 4.7104622871046233e-07, "loss": 0.6226, "step": 33525 }, { "epoch": 0.9788327348106625, "grad_norm": 0.6440133886244943, "learning_rate": 4.703974047039741e-07, "loss": 0.5845, "step": 33526 }, { "epoch": 0.9788619310385098, "grad_norm": 0.6730212721115085, "learning_rate": 4.6974858069748586e-07, "loss": 0.6668, "step": 33527 }, { "epoch": 0.9788911272663572, "grad_norm": 0.6160385377229709, "learning_rate": 4.6909975669099757e-07, "loss": 0.5257, "step": 33528 }, { "epoch": 0.9789203234942045, "grad_norm": 0.6162724512786855, "learning_rate": 4.684509326845094e-07, "loss": 0.553, "step": 33529 }, { "epoch": 0.9789495197220519, "grad_norm": 0.6234083445235477, "learning_rate": 4.6780210867802114e-07, "loss": 0.5576, "step": 33530 }, { "epoch": 0.9789787159498993, "grad_norm": 0.7428305467509096, "learning_rate": 4.6715328467153285e-07, "loss": 0.7345, "step": 33531 }, { "epoch": 0.9790079121777466, "grad_norm": 0.6409490009962325, "learning_rate": 4.665044606650446e-07, "loss": 0.6143, "step": 33532 }, { "epoch": 0.979037108405594, "grad_norm": 0.6433438783896386, "learning_rate": 4.6585563665855643e-07, "loss": 0.5868, "step": 33533 }, { "epoch": 0.9790663046334414, "grad_norm": 0.6948831812110227, "learning_rate": 4.652068126520682e-07, "loss": 0.6507, "step": 33534 }, { "epoch": 0.9790955008612887, "grad_norm": 0.6175445674529886, "learning_rate": 4.645579886455799e-07, "loss": 0.5424, "step": 33535 }, { "epoch": 0.9791246970891361, "grad_norm": 0.613697867598867, "learning_rate": 4.6390916463909167e-07, "loss": 0.4769, "step": 33536 }, { "epoch": 0.9791538933169834, "grad_norm": 0.6059959277575594, "learning_rate": 4.632603406326035e-07, "loss": 0.5151, "step": 33537 }, { "epoch": 0.9791830895448308, "grad_norm": 0.658099778186248, "learning_rate": 4.626115166261152e-07, "loss": 0.6462, "step": 33538 }, { "epoch": 0.9792122857726782, "grad_norm": 0.6675314401743797, "learning_rate": 4.6196269261962695e-07, "loss": 0.697, "step": 33539 }, { "epoch": 0.9792414820005255, "grad_norm": 0.6359398343521656, "learning_rate": 4.6131386861313877e-07, "loss": 0.5876, "step": 33540 }, { "epoch": 0.9792706782283729, "grad_norm": 0.629263383634466, "learning_rate": 4.606650446066505e-07, "loss": 0.599, "step": 33541 }, { "epoch": 0.9792998744562202, "grad_norm": 0.6326111756708209, "learning_rate": 4.6001622060016224e-07, "loss": 0.6137, "step": 33542 }, { "epoch": 0.9793290706840676, "grad_norm": 0.6672216360257278, "learning_rate": 4.59367396593674e-07, "loss": 0.6148, "step": 33543 }, { "epoch": 0.979358266911915, "grad_norm": 0.6217652817408891, "learning_rate": 4.587185725871857e-07, "loss": 0.5419, "step": 33544 }, { "epoch": 0.9793874631397623, "grad_norm": 0.6472497444941523, "learning_rate": 4.5806974858069753e-07, "loss": 0.5784, "step": 33545 }, { "epoch": 0.9794166593676097, "grad_norm": 0.6510222656709618, "learning_rate": 4.574209245742093e-07, "loss": 0.5957, "step": 33546 }, { "epoch": 0.979445855595457, "grad_norm": 0.6452974438636215, "learning_rate": 4.5677210056772105e-07, "loss": 0.5845, "step": 33547 }, { "epoch": 0.9794750518233044, "grad_norm": 0.6850982341750457, "learning_rate": 4.5612327656123276e-07, "loss": 0.6302, "step": 33548 }, { "epoch": 0.9795042480511518, "grad_norm": 0.72686047370238, "learning_rate": 4.554744525547446e-07, "loss": 0.7186, "step": 33549 }, { "epoch": 0.9795334442789991, "grad_norm": 0.6855378393334749, "learning_rate": 4.5482562854825634e-07, "loss": 0.6281, "step": 33550 }, { "epoch": 0.9795626405068465, "grad_norm": 0.609716294946611, "learning_rate": 4.5417680454176805e-07, "loss": 0.5712, "step": 33551 }, { "epoch": 0.9795918367346939, "grad_norm": 0.6670502209772454, "learning_rate": 4.5352798053527987e-07, "loss": 0.5874, "step": 33552 }, { "epoch": 0.9796210329625412, "grad_norm": 0.6588016909991277, "learning_rate": 4.5287915652879163e-07, "loss": 0.6134, "step": 33553 }, { "epoch": 0.9796502291903886, "grad_norm": 0.646447720658217, "learning_rate": 4.5223033252230334e-07, "loss": 0.5835, "step": 33554 }, { "epoch": 0.9796794254182359, "grad_norm": 0.6286139846639833, "learning_rate": 4.515815085158151e-07, "loss": 0.5839, "step": 33555 }, { "epoch": 0.9797086216460833, "grad_norm": 0.6022737665659748, "learning_rate": 4.509326845093269e-07, "loss": 0.5248, "step": 33556 }, { "epoch": 0.9797378178739307, "grad_norm": 0.6430241360073897, "learning_rate": 4.5028386050283863e-07, "loss": 0.5892, "step": 33557 }, { "epoch": 0.979767014101778, "grad_norm": 0.5932089588013005, "learning_rate": 4.496350364963504e-07, "loss": 0.4671, "step": 33558 }, { "epoch": 0.9797962103296254, "grad_norm": 0.6848402914697607, "learning_rate": 4.4898621248986215e-07, "loss": 0.6508, "step": 33559 }, { "epoch": 0.9798254065574727, "grad_norm": 0.6500983463309308, "learning_rate": 4.4833738848337397e-07, "loss": 0.5806, "step": 33560 }, { "epoch": 0.9798546027853201, "grad_norm": 0.6441716477994047, "learning_rate": 4.476885644768857e-07, "loss": 0.5783, "step": 33561 }, { "epoch": 0.9798837990131675, "grad_norm": 0.638189978730272, "learning_rate": 4.4703974047039744e-07, "loss": 0.5877, "step": 33562 }, { "epoch": 0.9799129952410148, "grad_norm": 0.648471561013344, "learning_rate": 4.463909164639092e-07, "loss": 0.564, "step": 33563 }, { "epoch": 0.9799421914688622, "grad_norm": 0.6504836660434699, "learning_rate": 4.457420924574209e-07, "loss": 0.5957, "step": 33564 }, { "epoch": 0.9799713876967096, "grad_norm": 0.6553730649437022, "learning_rate": 4.4509326845093273e-07, "loss": 0.6146, "step": 33565 }, { "epoch": 0.9800005839245569, "grad_norm": 0.6575458973890198, "learning_rate": 4.444444444444445e-07, "loss": 0.6237, "step": 33566 }, { "epoch": 0.9800297801524043, "grad_norm": 0.6721056839539996, "learning_rate": 4.437956204379562e-07, "loss": 0.6736, "step": 33567 }, { "epoch": 0.9800589763802516, "grad_norm": 0.6460966373671302, "learning_rate": 4.43146796431468e-07, "loss": 0.5768, "step": 33568 }, { "epoch": 0.980088172608099, "grad_norm": 0.6181219191820703, "learning_rate": 4.424979724249798e-07, "loss": 0.5482, "step": 33569 }, { "epoch": 0.9801173688359464, "grad_norm": 0.6792825156468878, "learning_rate": 4.4184914841849154e-07, "loss": 0.63, "step": 33570 }, { "epoch": 0.9801465650637937, "grad_norm": 0.6819967144093846, "learning_rate": 4.4120032441200325e-07, "loss": 0.6515, "step": 33571 }, { "epoch": 0.9801757612916411, "grad_norm": 0.6271301280614194, "learning_rate": 4.4055150040551506e-07, "loss": 0.5264, "step": 33572 }, { "epoch": 0.9802049575194884, "grad_norm": 0.603209441798135, "learning_rate": 4.3990267639902683e-07, "loss": 0.5319, "step": 33573 }, { "epoch": 0.9802341537473358, "grad_norm": 0.6430465626375069, "learning_rate": 4.3925385239253854e-07, "loss": 0.5636, "step": 33574 }, { "epoch": 0.9802633499751832, "grad_norm": 0.6123393356203513, "learning_rate": 4.386050283860503e-07, "loss": 0.5184, "step": 33575 }, { "epoch": 0.9802925462030305, "grad_norm": 0.6655924189571797, "learning_rate": 4.379562043795621e-07, "loss": 0.6107, "step": 33576 }, { "epoch": 0.9803217424308779, "grad_norm": 0.726872080494898, "learning_rate": 4.373073803730738e-07, "loss": 0.6689, "step": 33577 }, { "epoch": 0.9803509386587252, "grad_norm": 0.6260833994642127, "learning_rate": 4.366585563665856e-07, "loss": 0.5157, "step": 33578 }, { "epoch": 0.9803801348865726, "grad_norm": 0.6255763771715972, "learning_rate": 4.3600973236009735e-07, "loss": 0.5407, "step": 33579 }, { "epoch": 0.98040933111442, "grad_norm": 0.7468043088169185, "learning_rate": 4.353609083536091e-07, "loss": 0.6101, "step": 33580 }, { "epoch": 0.9804385273422673, "grad_norm": 0.6285419056174187, "learning_rate": 4.347120843471209e-07, "loss": 0.5828, "step": 33581 }, { "epoch": 0.9804677235701147, "grad_norm": 0.6343682449367702, "learning_rate": 4.3406326034063264e-07, "loss": 0.572, "step": 33582 }, { "epoch": 0.980496919797962, "grad_norm": 0.6720941139293539, "learning_rate": 4.3341443633414445e-07, "loss": 0.67, "step": 33583 }, { "epoch": 0.9805261160258094, "grad_norm": 0.6626312099251098, "learning_rate": 4.3276561232765616e-07, "loss": 0.6171, "step": 33584 }, { "epoch": 0.9805553122536568, "grad_norm": 0.62431057149772, "learning_rate": 4.321167883211679e-07, "loss": 0.5825, "step": 33585 }, { "epoch": 0.9805845084815041, "grad_norm": 0.6274730090837606, "learning_rate": 4.314679643146797e-07, "loss": 0.5982, "step": 33586 }, { "epoch": 0.9806137047093515, "grad_norm": 0.6425573508006017, "learning_rate": 4.308191403081914e-07, "loss": 0.5544, "step": 33587 }, { "epoch": 0.980642900937199, "grad_norm": 0.6300145220020837, "learning_rate": 4.301703163017032e-07, "loss": 0.5752, "step": 33588 }, { "epoch": 0.9806720971650463, "grad_norm": 0.6427240079294412, "learning_rate": 4.29521492295215e-07, "loss": 0.5738, "step": 33589 }, { "epoch": 0.9807012933928937, "grad_norm": 0.6353267088664567, "learning_rate": 4.288726682887267e-07, "loss": 0.5331, "step": 33590 }, { "epoch": 0.980730489620741, "grad_norm": 0.6333904529426029, "learning_rate": 4.2822384428223845e-07, "loss": 0.551, "step": 33591 }, { "epoch": 0.9807596858485884, "grad_norm": 0.6595078861052044, "learning_rate": 4.2757502027575026e-07, "loss": 0.5974, "step": 33592 }, { "epoch": 0.9807888820764358, "grad_norm": 0.6724905791666093, "learning_rate": 4.26926196269262e-07, "loss": 0.6463, "step": 33593 }, { "epoch": 0.9808180783042831, "grad_norm": 0.7021623264998622, "learning_rate": 4.2627737226277373e-07, "loss": 0.5777, "step": 33594 }, { "epoch": 0.9808472745321305, "grad_norm": 0.6411645422623453, "learning_rate": 4.2562854825628555e-07, "loss": 0.5989, "step": 33595 }, { "epoch": 0.9808764707599779, "grad_norm": 0.6313323245331176, "learning_rate": 4.249797242497973e-07, "loss": 0.5792, "step": 33596 }, { "epoch": 0.9809056669878252, "grad_norm": 0.6443483357090711, "learning_rate": 4.24330900243309e-07, "loss": 0.6506, "step": 33597 }, { "epoch": 0.9809348632156726, "grad_norm": 0.5965047053831682, "learning_rate": 4.236820762368208e-07, "loss": 0.5797, "step": 33598 }, { "epoch": 0.9809640594435199, "grad_norm": 0.6462464644619785, "learning_rate": 4.230332522303326e-07, "loss": 0.637, "step": 33599 }, { "epoch": 0.9809932556713673, "grad_norm": 0.6829208907354702, "learning_rate": 4.223844282238443e-07, "loss": 0.6281, "step": 33600 }, { "epoch": 0.9810224518992147, "grad_norm": 0.6415956831530772, "learning_rate": 4.2173560421735607e-07, "loss": 0.5775, "step": 33601 }, { "epoch": 0.981051648127062, "grad_norm": 0.6139930037671437, "learning_rate": 4.2108678021086783e-07, "loss": 0.552, "step": 33602 }, { "epoch": 0.9810808443549094, "grad_norm": 0.6514284477900307, "learning_rate": 4.2043795620437954e-07, "loss": 0.6085, "step": 33603 }, { "epoch": 0.9811100405827567, "grad_norm": 0.6393683397760342, "learning_rate": 4.1978913219789136e-07, "loss": 0.5919, "step": 33604 }, { "epoch": 0.9811392368106041, "grad_norm": 0.6896565576706297, "learning_rate": 4.191403081914031e-07, "loss": 0.6324, "step": 33605 }, { "epoch": 0.9811684330384515, "grad_norm": 0.715104406562055, "learning_rate": 4.184914841849149e-07, "loss": 0.6821, "step": 33606 }, { "epoch": 0.9811976292662988, "grad_norm": 0.6295345506145512, "learning_rate": 4.178426601784266e-07, "loss": 0.5811, "step": 33607 }, { "epoch": 0.9812268254941462, "grad_norm": 0.6353262667918432, "learning_rate": 4.171938361719384e-07, "loss": 0.5939, "step": 33608 }, { "epoch": 0.9812560217219936, "grad_norm": 0.6097926906406435, "learning_rate": 4.1654501216545017e-07, "loss": 0.4907, "step": 33609 }, { "epoch": 0.9812852179498409, "grad_norm": 0.6393652768320296, "learning_rate": 4.158961881589619e-07, "loss": 0.5835, "step": 33610 }, { "epoch": 0.9813144141776883, "grad_norm": 0.6440291294985251, "learning_rate": 4.152473641524737e-07, "loss": 0.5835, "step": 33611 }, { "epoch": 0.9813436104055356, "grad_norm": 0.6524306881220939, "learning_rate": 4.1459854014598546e-07, "loss": 0.5403, "step": 33612 }, { "epoch": 0.981372806633383, "grad_norm": 0.6622785971756248, "learning_rate": 4.1394971613949717e-07, "loss": 0.5927, "step": 33613 }, { "epoch": 0.9814020028612304, "grad_norm": 0.6181732507972098, "learning_rate": 4.1330089213300893e-07, "loss": 0.5292, "step": 33614 }, { "epoch": 0.9814311990890777, "grad_norm": 0.6249794833074426, "learning_rate": 4.1265206812652075e-07, "loss": 0.5589, "step": 33615 }, { "epoch": 0.9814603953169251, "grad_norm": 0.6113605342116128, "learning_rate": 4.120032441200325e-07, "loss": 0.5317, "step": 33616 }, { "epoch": 0.9814895915447724, "grad_norm": 0.6355692418060925, "learning_rate": 4.113544201135442e-07, "loss": 0.5733, "step": 33617 }, { "epoch": 0.9815187877726198, "grad_norm": 0.7330863112845162, "learning_rate": 4.10705596107056e-07, "loss": 0.6769, "step": 33618 }, { "epoch": 0.9815479840004672, "grad_norm": 0.6549305865052116, "learning_rate": 4.100567721005678e-07, "loss": 0.5882, "step": 33619 }, { "epoch": 0.9815771802283145, "grad_norm": 0.6340790956618256, "learning_rate": 4.094079480940795e-07, "loss": 0.536, "step": 33620 }, { "epoch": 0.9816063764561619, "grad_norm": 0.633053140337191, "learning_rate": 4.0875912408759127e-07, "loss": 0.5456, "step": 33621 }, { "epoch": 0.9816355726840092, "grad_norm": 0.6334202269335942, "learning_rate": 4.0811030008110303e-07, "loss": 0.5645, "step": 33622 }, { "epoch": 0.9816647689118566, "grad_norm": 0.6131681379885888, "learning_rate": 4.074614760746148e-07, "loss": 0.5734, "step": 33623 }, { "epoch": 0.981693965139704, "grad_norm": 0.6406303328342977, "learning_rate": 4.0681265206812656e-07, "loss": 0.6069, "step": 33624 }, { "epoch": 0.9817231613675513, "grad_norm": 0.6733402754738141, "learning_rate": 4.061638280616383e-07, "loss": 0.6418, "step": 33625 }, { "epoch": 0.9817523575953987, "grad_norm": 0.6560595896309995, "learning_rate": 4.0551500405515003e-07, "loss": 0.5891, "step": 33626 }, { "epoch": 0.9817815538232461, "grad_norm": 0.6638017962461233, "learning_rate": 4.0486618004866184e-07, "loss": 0.6568, "step": 33627 }, { "epoch": 0.9818107500510934, "grad_norm": 0.6277943642731731, "learning_rate": 4.042173560421736e-07, "loss": 0.5792, "step": 33628 }, { "epoch": 0.9818399462789408, "grad_norm": 0.6144223468255777, "learning_rate": 4.0356853203568537e-07, "loss": 0.5874, "step": 33629 }, { "epoch": 0.9818691425067881, "grad_norm": 0.639319971655176, "learning_rate": 4.029197080291971e-07, "loss": 0.6092, "step": 33630 }, { "epoch": 0.9818983387346355, "grad_norm": 0.6155958415487692, "learning_rate": 4.022708840227089e-07, "loss": 0.5673, "step": 33631 }, { "epoch": 0.9819275349624829, "grad_norm": 0.6677309377482542, "learning_rate": 4.0162206001622066e-07, "loss": 0.6217, "step": 33632 }, { "epoch": 0.9819567311903302, "grad_norm": 0.6557528461719002, "learning_rate": 4.0097323600973236e-07, "loss": 0.5915, "step": 33633 }, { "epoch": 0.9819859274181776, "grad_norm": 0.6500004584688688, "learning_rate": 4.0032441200324413e-07, "loss": 0.6057, "step": 33634 }, { "epoch": 0.982015123646025, "grad_norm": 0.6693216022502266, "learning_rate": 3.9967558799675594e-07, "loss": 0.6377, "step": 33635 }, { "epoch": 0.9820443198738723, "grad_norm": 0.6272421422378149, "learning_rate": 3.9902676399026765e-07, "loss": 0.5531, "step": 33636 }, { "epoch": 0.9820735161017197, "grad_norm": 0.581103544379933, "learning_rate": 3.983779399837794e-07, "loss": 0.5061, "step": 33637 }, { "epoch": 0.982102712329567, "grad_norm": 0.6158862499088269, "learning_rate": 3.9772911597729123e-07, "loss": 0.5391, "step": 33638 }, { "epoch": 0.9821319085574144, "grad_norm": 0.6547976670401053, "learning_rate": 3.97080291970803e-07, "loss": 0.6292, "step": 33639 }, { "epoch": 0.9821611047852618, "grad_norm": 0.6230522697396672, "learning_rate": 3.964314679643147e-07, "loss": 0.5364, "step": 33640 }, { "epoch": 0.9821903010131091, "grad_norm": 0.6787956832376595, "learning_rate": 3.9578264395782646e-07, "loss": 0.6288, "step": 33641 }, { "epoch": 0.9822194972409565, "grad_norm": 0.6319276681796304, "learning_rate": 3.951338199513383e-07, "loss": 0.572, "step": 33642 }, { "epoch": 0.9822486934688038, "grad_norm": 0.6730187233569774, "learning_rate": 3.9448499594485e-07, "loss": 0.6413, "step": 33643 }, { "epoch": 0.9822778896966512, "grad_norm": 0.711158712534142, "learning_rate": 3.9383617193836175e-07, "loss": 0.6952, "step": 33644 }, { "epoch": 0.9823070859244986, "grad_norm": 0.6882933916513582, "learning_rate": 3.931873479318735e-07, "loss": 0.6577, "step": 33645 }, { "epoch": 0.9823362821523459, "grad_norm": 0.6944691686236426, "learning_rate": 3.925385239253852e-07, "loss": 0.7031, "step": 33646 }, { "epoch": 0.9823654783801933, "grad_norm": 0.6305202951023288, "learning_rate": 3.9188969991889704e-07, "loss": 0.552, "step": 33647 }, { "epoch": 0.9823946746080406, "grad_norm": 0.6654108636077868, "learning_rate": 3.912408759124088e-07, "loss": 0.6037, "step": 33648 }, { "epoch": 0.982423870835888, "grad_norm": 0.6103522121652111, "learning_rate": 3.905920519059205e-07, "loss": 0.5551, "step": 33649 }, { "epoch": 0.9824530670637354, "grad_norm": 0.6813826861098994, "learning_rate": 3.899432278994323e-07, "loss": 0.6722, "step": 33650 }, { "epoch": 0.9824822632915827, "grad_norm": 0.6415659726928007, "learning_rate": 3.892944038929441e-07, "loss": 0.5945, "step": 33651 }, { "epoch": 0.9825114595194301, "grad_norm": 0.6309236549016436, "learning_rate": 3.8864557988645585e-07, "loss": 0.5752, "step": 33652 }, { "epoch": 0.9825406557472774, "grad_norm": 0.6590453187226585, "learning_rate": 3.8799675587996756e-07, "loss": 0.6056, "step": 33653 }, { "epoch": 0.9825698519751248, "grad_norm": 0.8589934413478912, "learning_rate": 3.873479318734794e-07, "loss": 0.6582, "step": 33654 }, { "epoch": 0.9825990482029722, "grad_norm": 0.6334909018843599, "learning_rate": 3.8669910786699114e-07, "loss": 0.5859, "step": 33655 }, { "epoch": 0.9826282444308195, "grad_norm": 0.6831542362111869, "learning_rate": 3.8605028386050285e-07, "loss": 0.6864, "step": 33656 }, { "epoch": 0.9826574406586669, "grad_norm": 0.6426326971735472, "learning_rate": 3.854014598540146e-07, "loss": 0.5634, "step": 33657 }, { "epoch": 0.9826866368865143, "grad_norm": 0.692609435836489, "learning_rate": 3.8475263584752643e-07, "loss": 0.6166, "step": 33658 }, { "epoch": 0.9827158331143616, "grad_norm": 0.6408096203593834, "learning_rate": 3.8410381184103814e-07, "loss": 0.5831, "step": 33659 }, { "epoch": 0.982745029342209, "grad_norm": 0.6874427239449509, "learning_rate": 3.834549878345499e-07, "loss": 0.6677, "step": 33660 }, { "epoch": 0.9827742255700563, "grad_norm": 0.6107132453608382, "learning_rate": 3.8280616382806166e-07, "loss": 0.5363, "step": 33661 }, { "epoch": 0.9828034217979037, "grad_norm": 0.6970479846843122, "learning_rate": 3.8215733982157337e-07, "loss": 0.6962, "step": 33662 }, { "epoch": 0.9828326180257511, "grad_norm": 0.6394330398506732, "learning_rate": 3.815085158150852e-07, "loss": 0.5892, "step": 33663 }, { "epoch": 0.9828618142535984, "grad_norm": 0.6129804649234093, "learning_rate": 3.8085969180859695e-07, "loss": 0.4992, "step": 33664 }, { "epoch": 0.9828910104814458, "grad_norm": 0.6631482227885882, "learning_rate": 3.802108678021087e-07, "loss": 0.5899, "step": 33665 }, { "epoch": 0.9829202067092931, "grad_norm": 0.678828380234319, "learning_rate": 3.795620437956205e-07, "loss": 0.6363, "step": 33666 }, { "epoch": 0.9829494029371405, "grad_norm": 0.6274982515021555, "learning_rate": 3.7891321978913224e-07, "loss": 0.5543, "step": 33667 }, { "epoch": 0.9829785991649879, "grad_norm": 0.6585534550562352, "learning_rate": 3.78264395782644e-07, "loss": 0.5935, "step": 33668 }, { "epoch": 0.9830077953928352, "grad_norm": 0.6403263394829033, "learning_rate": 3.776155717761557e-07, "loss": 0.586, "step": 33669 }, { "epoch": 0.9830369916206826, "grad_norm": 0.6168599619556694, "learning_rate": 3.769667477696675e-07, "loss": 0.5363, "step": 33670 }, { "epoch": 0.98306618784853, "grad_norm": 0.6779119095350764, "learning_rate": 3.763179237631793e-07, "loss": 0.6011, "step": 33671 }, { "epoch": 0.9830953840763773, "grad_norm": 0.6380239713919311, "learning_rate": 3.75669099756691e-07, "loss": 0.5868, "step": 33672 }, { "epoch": 0.9831245803042247, "grad_norm": 0.6630384160050699, "learning_rate": 3.7502027575020276e-07, "loss": 0.6037, "step": 33673 }, { "epoch": 0.983153776532072, "grad_norm": 0.6269311796220853, "learning_rate": 3.743714517437146e-07, "loss": 0.5478, "step": 33674 }, { "epoch": 0.9831829727599194, "grad_norm": 0.6314401261713491, "learning_rate": 3.7372262773722634e-07, "loss": 0.5569, "step": 33675 }, { "epoch": 0.9832121689877668, "grad_norm": 0.6336813830244477, "learning_rate": 3.7307380373073805e-07, "loss": 0.5468, "step": 33676 }, { "epoch": 0.9832413652156141, "grad_norm": 0.6599557996361237, "learning_rate": 3.724249797242498e-07, "loss": 0.6241, "step": 33677 }, { "epoch": 0.9832705614434615, "grad_norm": 0.6510366385702138, "learning_rate": 3.717761557177616e-07, "loss": 0.5768, "step": 33678 }, { "epoch": 0.9832997576713088, "grad_norm": 0.5948493252182586, "learning_rate": 3.7112733171127333e-07, "loss": 0.5205, "step": 33679 }, { "epoch": 0.9833289538991562, "grad_norm": 0.691743044066897, "learning_rate": 3.704785077047851e-07, "loss": 0.6293, "step": 33680 }, { "epoch": 0.9833581501270036, "grad_norm": 0.6701055307595922, "learning_rate": 3.698296836982969e-07, "loss": 0.6156, "step": 33681 }, { "epoch": 0.9833873463548509, "grad_norm": 0.6738138726983229, "learning_rate": 3.691808596918086e-07, "loss": 0.5746, "step": 33682 }, { "epoch": 0.9834165425826983, "grad_norm": 0.570581247608901, "learning_rate": 3.685320356853204e-07, "loss": 0.4581, "step": 33683 }, { "epoch": 0.9834457388105456, "grad_norm": 0.6244163688698852, "learning_rate": 3.6788321167883215e-07, "loss": 0.5595, "step": 33684 }, { "epoch": 0.983474935038393, "grad_norm": 0.6497421685491725, "learning_rate": 3.6723438767234386e-07, "loss": 0.5756, "step": 33685 }, { "epoch": 0.9835041312662404, "grad_norm": 0.6500474399611402, "learning_rate": 3.6658556366585567e-07, "loss": 0.5965, "step": 33686 }, { "epoch": 0.9835333274940877, "grad_norm": 0.6261474094830795, "learning_rate": 3.6593673965936743e-07, "loss": 0.5721, "step": 33687 }, { "epoch": 0.9835625237219351, "grad_norm": 0.7112148568266724, "learning_rate": 3.652879156528792e-07, "loss": 0.7, "step": 33688 }, { "epoch": 0.9835917199497825, "grad_norm": 0.6928086962036009, "learning_rate": 3.646390916463909e-07, "loss": 0.6793, "step": 33689 }, { "epoch": 0.9836209161776298, "grad_norm": 0.631544666757392, "learning_rate": 3.639902676399027e-07, "loss": 0.5603, "step": 33690 }, { "epoch": 0.9836501124054772, "grad_norm": 0.6380401202169071, "learning_rate": 3.633414436334145e-07, "loss": 0.5697, "step": 33691 }, { "epoch": 0.9836793086333245, "grad_norm": 0.5715976503426543, "learning_rate": 3.626926196269262e-07, "loss": 0.489, "step": 33692 }, { "epoch": 0.9837085048611719, "grad_norm": 0.6779403726326207, "learning_rate": 3.62043795620438e-07, "loss": 0.6551, "step": 33693 }, { "epoch": 0.9837377010890193, "grad_norm": 0.6357024940425193, "learning_rate": 3.6139497161394977e-07, "loss": 0.5853, "step": 33694 }, { "epoch": 0.9837668973168666, "grad_norm": 0.5840809609414183, "learning_rate": 3.607461476074615e-07, "loss": 0.49, "step": 33695 }, { "epoch": 0.983796093544714, "grad_norm": 0.6341967521455005, "learning_rate": 3.6009732360097324e-07, "loss": 0.5711, "step": 33696 }, { "epoch": 0.9838252897725613, "grad_norm": 0.5978941807900691, "learning_rate": 3.5944849959448506e-07, "loss": 0.4475, "step": 33697 }, { "epoch": 0.9838544860004087, "grad_norm": 0.6401210331688196, "learning_rate": 3.587996755879968e-07, "loss": 0.595, "step": 33698 }, { "epoch": 0.9838836822282561, "grad_norm": 0.6325512663885311, "learning_rate": 3.5815085158150853e-07, "loss": 0.5766, "step": 33699 }, { "epoch": 0.9839128784561034, "grad_norm": 0.6399209894176769, "learning_rate": 3.575020275750203e-07, "loss": 0.5874, "step": 33700 }, { "epoch": 0.9839420746839508, "grad_norm": 0.63670561797754, "learning_rate": 3.568532035685321e-07, "loss": 0.5629, "step": 33701 }, { "epoch": 0.9839712709117981, "grad_norm": 0.6355473950358641, "learning_rate": 3.562043795620438e-07, "loss": 0.5498, "step": 33702 }, { "epoch": 0.9840004671396455, "grad_norm": 0.5906877617474482, "learning_rate": 3.555555555555556e-07, "loss": 0.5129, "step": 33703 }, { "epoch": 0.9840296633674929, "grad_norm": 0.7042234372974853, "learning_rate": 3.5490673154906734e-07, "loss": 0.722, "step": 33704 }, { "epoch": 0.9840588595953402, "grad_norm": 0.6068167188373524, "learning_rate": 3.5425790754257905e-07, "loss": 0.5119, "step": 33705 }, { "epoch": 0.9840880558231876, "grad_norm": 0.6532161153948364, "learning_rate": 3.5360908353609087e-07, "loss": 0.61, "step": 33706 }, { "epoch": 0.984117252051035, "grad_norm": 0.6500700660689641, "learning_rate": 3.5296025952960263e-07, "loss": 0.5951, "step": 33707 }, { "epoch": 0.9841464482788823, "grad_norm": 0.6176864725082422, "learning_rate": 3.5231143552311434e-07, "loss": 0.5515, "step": 33708 }, { "epoch": 0.9841756445067298, "grad_norm": 0.7110600605999474, "learning_rate": 3.5166261151662616e-07, "loss": 0.6543, "step": 33709 }, { "epoch": 0.9842048407345771, "grad_norm": 0.60022959248256, "learning_rate": 3.510137875101379e-07, "loss": 0.5269, "step": 33710 }, { "epoch": 0.9842340369624245, "grad_norm": 0.6480362909541721, "learning_rate": 3.503649635036497e-07, "loss": 0.6102, "step": 33711 }, { "epoch": 0.9842632331902719, "grad_norm": 0.6749371280108093, "learning_rate": 3.497161394971614e-07, "loss": 0.5988, "step": 33712 }, { "epoch": 0.9842924294181192, "grad_norm": 0.684799163998402, "learning_rate": 3.490673154906732e-07, "loss": 0.641, "step": 33713 }, { "epoch": 0.9843216256459666, "grad_norm": 0.7084008478763765, "learning_rate": 3.4841849148418497e-07, "loss": 0.5962, "step": 33714 }, { "epoch": 0.984350821873814, "grad_norm": 0.6151715017501564, "learning_rate": 3.477696674776967e-07, "loss": 0.5572, "step": 33715 }, { "epoch": 0.9843800181016613, "grad_norm": 0.6912045442083361, "learning_rate": 3.4712084347120844e-07, "loss": 0.6606, "step": 33716 }, { "epoch": 0.9844092143295087, "grad_norm": 0.6553294009425532, "learning_rate": 3.4647201946472026e-07, "loss": 0.6336, "step": 33717 }, { "epoch": 0.984438410557356, "grad_norm": 0.6470350461724556, "learning_rate": 3.4582319545823197e-07, "loss": 0.6037, "step": 33718 }, { "epoch": 0.9844676067852034, "grad_norm": 0.6298633820398711, "learning_rate": 3.4517437145174373e-07, "loss": 0.5899, "step": 33719 }, { "epoch": 0.9844968030130508, "grad_norm": 0.6607774349541955, "learning_rate": 3.445255474452555e-07, "loss": 0.6051, "step": 33720 }, { "epoch": 0.9845259992408981, "grad_norm": 0.6486634828592017, "learning_rate": 3.438767234387673e-07, "loss": 0.6231, "step": 33721 }, { "epoch": 0.9845551954687455, "grad_norm": 0.718858067594877, "learning_rate": 3.43227899432279e-07, "loss": 0.5704, "step": 33722 }, { "epoch": 0.9845843916965928, "grad_norm": 0.6697255457180699, "learning_rate": 3.425790754257908e-07, "loss": 0.6117, "step": 33723 }, { "epoch": 0.9846135879244402, "grad_norm": 0.6469583380802721, "learning_rate": 3.419302514193026e-07, "loss": 0.5822, "step": 33724 }, { "epoch": 0.9846427841522876, "grad_norm": 0.625337840414232, "learning_rate": 3.412814274128143e-07, "loss": 0.5772, "step": 33725 }, { "epoch": 0.9846719803801349, "grad_norm": 0.6464957247365456, "learning_rate": 3.4063260340632607e-07, "loss": 0.5932, "step": 33726 }, { "epoch": 0.9847011766079823, "grad_norm": 0.6248535048787774, "learning_rate": 3.3998377939983783e-07, "loss": 0.5579, "step": 33727 }, { "epoch": 0.9847303728358296, "grad_norm": 0.6433574689202103, "learning_rate": 3.3933495539334954e-07, "loss": 0.6182, "step": 33728 }, { "epoch": 0.984759569063677, "grad_norm": 0.6219290436291057, "learning_rate": 3.3868613138686135e-07, "loss": 0.5547, "step": 33729 }, { "epoch": 0.9847887652915244, "grad_norm": 0.6501876348956657, "learning_rate": 3.380373073803731e-07, "loss": 0.6475, "step": 33730 }, { "epoch": 0.9848179615193717, "grad_norm": 0.6867318199702653, "learning_rate": 3.373884833738848e-07, "loss": 0.6533, "step": 33731 }, { "epoch": 0.9848471577472191, "grad_norm": 0.6077015624194851, "learning_rate": 3.367396593673966e-07, "loss": 0.5754, "step": 33732 }, { "epoch": 0.9848763539750665, "grad_norm": 0.6909665007614307, "learning_rate": 3.360908353609084e-07, "loss": 0.6479, "step": 33733 }, { "epoch": 0.9849055502029138, "grad_norm": 0.5934746564375709, "learning_rate": 3.3544201135442017e-07, "loss": 0.5035, "step": 33734 }, { "epoch": 0.9849347464307612, "grad_norm": 0.6323494031044563, "learning_rate": 3.347931873479319e-07, "loss": 0.5516, "step": 33735 }, { "epoch": 0.9849639426586085, "grad_norm": 0.6056038122288128, "learning_rate": 3.341443633414437e-07, "loss": 0.5468, "step": 33736 }, { "epoch": 0.9849931388864559, "grad_norm": 0.637177165749168, "learning_rate": 3.3349553933495545e-07, "loss": 0.5566, "step": 33737 }, { "epoch": 0.9850223351143033, "grad_norm": 0.6814188774529172, "learning_rate": 3.3284671532846716e-07, "loss": 0.6089, "step": 33738 }, { "epoch": 0.9850515313421506, "grad_norm": 0.6730875251879924, "learning_rate": 3.321978913219789e-07, "loss": 0.6374, "step": 33739 }, { "epoch": 0.985080727569998, "grad_norm": 0.6479815393195519, "learning_rate": 3.3154906731549074e-07, "loss": 0.6141, "step": 33740 }, { "epoch": 0.9851099237978453, "grad_norm": 0.6332322549476179, "learning_rate": 3.3090024330900245e-07, "loss": 0.5763, "step": 33741 }, { "epoch": 0.9851391200256927, "grad_norm": 0.6473586361001737, "learning_rate": 3.302514193025142e-07, "loss": 0.6201, "step": 33742 }, { "epoch": 0.9851683162535401, "grad_norm": 0.6319396643427453, "learning_rate": 3.29602595296026e-07, "loss": 0.5825, "step": 33743 }, { "epoch": 0.9851975124813874, "grad_norm": 0.6613238725508397, "learning_rate": 3.289537712895378e-07, "loss": 0.6368, "step": 33744 }, { "epoch": 0.9852267087092348, "grad_norm": 0.6290358025831234, "learning_rate": 3.283049472830495e-07, "loss": 0.561, "step": 33745 }, { "epoch": 0.9852559049370821, "grad_norm": 0.6591004376525863, "learning_rate": 3.2765612327656126e-07, "loss": 0.5935, "step": 33746 }, { "epoch": 0.9852851011649295, "grad_norm": 0.697404534671171, "learning_rate": 3.27007299270073e-07, "loss": 0.5845, "step": 33747 }, { "epoch": 0.9853142973927769, "grad_norm": 0.6043043544259175, "learning_rate": 3.2635847526358473e-07, "loss": 0.5448, "step": 33748 }, { "epoch": 0.9853434936206242, "grad_norm": 0.6807766127972266, "learning_rate": 3.2570965125709655e-07, "loss": 0.5933, "step": 33749 }, { "epoch": 0.9853726898484716, "grad_norm": 0.627830877670239, "learning_rate": 3.250608272506083e-07, "loss": 0.5553, "step": 33750 }, { "epoch": 0.985401886076319, "grad_norm": 0.6487243892509633, "learning_rate": 3.2441200324412e-07, "loss": 0.5911, "step": 33751 }, { "epoch": 0.9854310823041663, "grad_norm": 0.6354135417532032, "learning_rate": 3.2376317923763184e-07, "loss": 0.5835, "step": 33752 }, { "epoch": 0.9854602785320137, "grad_norm": 0.6993971900425678, "learning_rate": 3.231143552311436e-07, "loss": 0.6472, "step": 33753 }, { "epoch": 0.985489474759861, "grad_norm": 0.6467110272235159, "learning_rate": 3.224655312246553e-07, "loss": 0.5616, "step": 33754 }, { "epoch": 0.9855186709877084, "grad_norm": 0.6143737235267662, "learning_rate": 3.2181670721816707e-07, "loss": 0.5457, "step": 33755 }, { "epoch": 0.9855478672155558, "grad_norm": 0.6524530694725136, "learning_rate": 3.211678832116789e-07, "loss": 0.6112, "step": 33756 }, { "epoch": 0.9855770634434031, "grad_norm": 0.6309751475514636, "learning_rate": 3.2051905920519065e-07, "loss": 0.5541, "step": 33757 }, { "epoch": 0.9856062596712505, "grad_norm": 0.6516765483022524, "learning_rate": 3.1987023519870236e-07, "loss": 0.5468, "step": 33758 }, { "epoch": 0.9856354558990978, "grad_norm": 0.6238853228861876, "learning_rate": 3.192214111922141e-07, "loss": 0.5216, "step": 33759 }, { "epoch": 0.9856646521269452, "grad_norm": 0.6479030063269067, "learning_rate": 3.1857258718572594e-07, "loss": 0.5785, "step": 33760 }, { "epoch": 0.9856938483547926, "grad_norm": 0.6258956225864895, "learning_rate": 3.1792376317923765e-07, "loss": 0.5934, "step": 33761 }, { "epoch": 0.9857230445826399, "grad_norm": 0.6578335036698215, "learning_rate": 3.172749391727494e-07, "loss": 0.5889, "step": 33762 }, { "epoch": 0.9857522408104873, "grad_norm": 0.6566646692009711, "learning_rate": 3.1662611516626117e-07, "loss": 0.5997, "step": 33763 }, { "epoch": 0.9857814370383347, "grad_norm": 0.6681577196735289, "learning_rate": 3.1597729115977293e-07, "loss": 0.6203, "step": 33764 }, { "epoch": 0.985810633266182, "grad_norm": 0.6555874445718105, "learning_rate": 3.153284671532847e-07, "loss": 0.6072, "step": 33765 }, { "epoch": 0.9858398294940294, "grad_norm": 0.6554628821000594, "learning_rate": 3.1467964314679646e-07, "loss": 0.5811, "step": 33766 }, { "epoch": 0.9858690257218767, "grad_norm": 0.6511402936672566, "learning_rate": 3.140308191403083e-07, "loss": 0.6249, "step": 33767 }, { "epoch": 0.9858982219497241, "grad_norm": 0.6432136188534358, "learning_rate": 3.1338199513382e-07, "loss": 0.5689, "step": 33768 }, { "epoch": 0.9859274181775715, "grad_norm": 0.6311555696880974, "learning_rate": 3.1273317112733175e-07, "loss": 0.5742, "step": 33769 }, { "epoch": 0.9859566144054188, "grad_norm": 0.6694415935840312, "learning_rate": 3.120843471208435e-07, "loss": 0.6052, "step": 33770 }, { "epoch": 0.9859858106332662, "grad_norm": 0.6140384827892246, "learning_rate": 3.1143552311435527e-07, "loss": 0.5611, "step": 33771 }, { "epoch": 0.9860150068611135, "grad_norm": 0.5970977083476782, "learning_rate": 3.1078669910786703e-07, "loss": 0.5267, "step": 33772 }, { "epoch": 0.9860442030889609, "grad_norm": 0.6429953825231222, "learning_rate": 3.1013787510137874e-07, "loss": 0.604, "step": 33773 }, { "epoch": 0.9860733993168083, "grad_norm": 0.6684835921295237, "learning_rate": 3.0948905109489056e-07, "loss": 0.62, "step": 33774 }, { "epoch": 0.9861025955446556, "grad_norm": 0.6999318491628675, "learning_rate": 3.0884022708840227e-07, "loss": 0.6712, "step": 33775 }, { "epoch": 0.986131791772503, "grad_norm": 0.6473086392434603, "learning_rate": 3.081914030819141e-07, "loss": 0.5938, "step": 33776 }, { "epoch": 0.9861609880003503, "grad_norm": 0.6474807902751234, "learning_rate": 3.075425790754258e-07, "loss": 0.6028, "step": 33777 }, { "epoch": 0.9861901842281977, "grad_norm": 0.6651122065646001, "learning_rate": 3.0689375506893756e-07, "loss": 0.6159, "step": 33778 }, { "epoch": 0.9862193804560451, "grad_norm": 0.6263782891931213, "learning_rate": 3.0624493106244937e-07, "loss": 0.5385, "step": 33779 }, { "epoch": 0.9862485766838924, "grad_norm": 0.6198814294435232, "learning_rate": 3.055961070559611e-07, "loss": 0.5625, "step": 33780 }, { "epoch": 0.9862777729117398, "grad_norm": 0.6323576073262498, "learning_rate": 3.0494728304947284e-07, "loss": 0.6296, "step": 33781 }, { "epoch": 0.9863069691395872, "grad_norm": 0.6372671676087905, "learning_rate": 3.042984590429846e-07, "loss": 0.5219, "step": 33782 }, { "epoch": 0.9863361653674345, "grad_norm": 0.6386572365304652, "learning_rate": 3.0364963503649637e-07, "loss": 0.5957, "step": 33783 }, { "epoch": 0.9863653615952819, "grad_norm": 0.6555720698156322, "learning_rate": 3.0300081103000813e-07, "loss": 0.6145, "step": 33784 }, { "epoch": 0.9863945578231292, "grad_norm": 0.6044298254763761, "learning_rate": 3.023519870235199e-07, "loss": 0.5503, "step": 33785 }, { "epoch": 0.9864237540509766, "grad_norm": 0.6885259494257981, "learning_rate": 3.0170316301703166e-07, "loss": 0.636, "step": 33786 }, { "epoch": 0.986452950278824, "grad_norm": 0.6196316383377747, "learning_rate": 3.010543390105434e-07, "loss": 0.548, "step": 33787 }, { "epoch": 0.9864821465066713, "grad_norm": 0.7118843858931868, "learning_rate": 3.004055150040552e-07, "loss": 0.7132, "step": 33788 }, { "epoch": 0.9865113427345187, "grad_norm": 0.6544086371591131, "learning_rate": 2.9975669099756694e-07, "loss": 0.6045, "step": 33789 }, { "epoch": 0.986540538962366, "grad_norm": 0.6274946950156939, "learning_rate": 2.991078669910787e-07, "loss": 0.5841, "step": 33790 }, { "epoch": 0.9865697351902134, "grad_norm": 0.6629165843667734, "learning_rate": 2.984590429845904e-07, "loss": 0.6232, "step": 33791 }, { "epoch": 0.9865989314180608, "grad_norm": 0.6478350532090745, "learning_rate": 2.9781021897810223e-07, "loss": 0.6137, "step": 33792 }, { "epoch": 0.9866281276459081, "grad_norm": 0.6049009634527637, "learning_rate": 2.97161394971614e-07, "loss": 0.5216, "step": 33793 }, { "epoch": 0.9866573238737555, "grad_norm": 0.657742999068128, "learning_rate": 2.9651257096512576e-07, "loss": 0.6391, "step": 33794 }, { "epoch": 0.9866865201016028, "grad_norm": 0.6841600889297415, "learning_rate": 2.958637469586375e-07, "loss": 0.6126, "step": 33795 }, { "epoch": 0.9867157163294502, "grad_norm": 0.606301354053534, "learning_rate": 2.9521492295214923e-07, "loss": 0.5669, "step": 33796 }, { "epoch": 0.9867449125572976, "grad_norm": 0.6143038103150039, "learning_rate": 2.9456609894566104e-07, "loss": 0.5368, "step": 33797 }, { "epoch": 0.9867741087851449, "grad_norm": 0.6563472036924818, "learning_rate": 2.9391727493917275e-07, "loss": 0.614, "step": 33798 }, { "epoch": 0.9868033050129923, "grad_norm": 0.6356782807391931, "learning_rate": 2.932684509326845e-07, "loss": 0.5576, "step": 33799 }, { "epoch": 0.9868325012408397, "grad_norm": 0.6551659952217047, "learning_rate": 2.926196269261963e-07, "loss": 0.61, "step": 33800 }, { "epoch": 0.986861697468687, "grad_norm": 0.6744588645160167, "learning_rate": 2.9197080291970804e-07, "loss": 0.5773, "step": 33801 }, { "epoch": 0.9868908936965344, "grad_norm": 0.6747153655571481, "learning_rate": 2.913219789132198e-07, "loss": 0.5955, "step": 33802 }, { "epoch": 0.9869200899243817, "grad_norm": 0.6527594606143862, "learning_rate": 2.9067315490673157e-07, "loss": 0.5871, "step": 33803 }, { "epoch": 0.9869492861522291, "grad_norm": 0.6500787903196829, "learning_rate": 2.9002433090024333e-07, "loss": 0.6302, "step": 33804 }, { "epoch": 0.9869784823800765, "grad_norm": 0.629660438153355, "learning_rate": 2.893755068937551e-07, "loss": 0.5797, "step": 33805 }, { "epoch": 0.9870076786079238, "grad_norm": 0.6601913686886499, "learning_rate": 2.8872668288726685e-07, "loss": 0.6365, "step": 33806 }, { "epoch": 0.9870368748357712, "grad_norm": 0.6199774824875112, "learning_rate": 2.880778588807786e-07, "loss": 0.5465, "step": 33807 }, { "epoch": 0.9870660710636185, "grad_norm": 0.6547590965780055, "learning_rate": 2.874290348742904e-07, "loss": 0.5778, "step": 33808 }, { "epoch": 0.9870952672914659, "grad_norm": 0.6075758870939456, "learning_rate": 2.8678021086780214e-07, "loss": 0.5206, "step": 33809 }, { "epoch": 0.9871244635193133, "grad_norm": 0.6305319331463304, "learning_rate": 2.861313868613139e-07, "loss": 0.5506, "step": 33810 }, { "epoch": 0.9871536597471606, "grad_norm": 0.8404078544569598, "learning_rate": 2.8548256285482567e-07, "loss": 0.6447, "step": 33811 }, { "epoch": 0.987182855975008, "grad_norm": 0.6360315270597473, "learning_rate": 2.8483373884833743e-07, "loss": 0.5915, "step": 33812 }, { "epoch": 0.9872120522028554, "grad_norm": 0.6130020915550821, "learning_rate": 2.841849148418492e-07, "loss": 0.5339, "step": 33813 }, { "epoch": 0.9872412484307027, "grad_norm": 0.6322211068370913, "learning_rate": 2.835360908353609e-07, "loss": 0.5461, "step": 33814 }, { "epoch": 0.9872704446585501, "grad_norm": 0.6535116571399847, "learning_rate": 2.828872668288727e-07, "loss": 0.604, "step": 33815 }, { "epoch": 0.9872996408863974, "grad_norm": 0.6174108855826486, "learning_rate": 2.822384428223844e-07, "loss": 0.5572, "step": 33816 }, { "epoch": 0.9873288371142448, "grad_norm": 0.6396489689332087, "learning_rate": 2.8158961881589624e-07, "loss": 0.5486, "step": 33817 }, { "epoch": 0.9873580333420922, "grad_norm": 0.6481824586267392, "learning_rate": 2.8094079480940795e-07, "loss": 0.5876, "step": 33818 }, { "epoch": 0.9873872295699395, "grad_norm": 0.6263754422425922, "learning_rate": 2.802919708029197e-07, "loss": 0.5868, "step": 33819 }, { "epoch": 0.9874164257977869, "grad_norm": 0.6519282453374727, "learning_rate": 2.796431467964315e-07, "loss": 0.6213, "step": 33820 }, { "epoch": 0.9874456220256342, "grad_norm": 0.6349910725417998, "learning_rate": 2.7899432278994324e-07, "loss": 0.5606, "step": 33821 }, { "epoch": 0.9874748182534816, "grad_norm": 0.6357271534007906, "learning_rate": 2.78345498783455e-07, "loss": 0.553, "step": 33822 }, { "epoch": 0.987504014481329, "grad_norm": 0.6569414866007788, "learning_rate": 2.7769667477696676e-07, "loss": 0.5983, "step": 33823 }, { "epoch": 0.9875332107091763, "grad_norm": 0.600459136767333, "learning_rate": 2.770478507704785e-07, "loss": 0.5213, "step": 33824 }, { "epoch": 0.9875624069370237, "grad_norm": 0.615478576346292, "learning_rate": 2.763990267639903e-07, "loss": 0.532, "step": 33825 }, { "epoch": 0.987591603164871, "grad_norm": 0.6105731776700076, "learning_rate": 2.7575020275750205e-07, "loss": 0.5454, "step": 33826 }, { "epoch": 0.9876207993927184, "grad_norm": 0.6421334019519159, "learning_rate": 2.751013787510138e-07, "loss": 0.5683, "step": 33827 }, { "epoch": 0.9876499956205658, "grad_norm": 0.6121717422342291, "learning_rate": 2.744525547445256e-07, "loss": 0.5596, "step": 33828 }, { "epoch": 0.9876791918484132, "grad_norm": 0.6182715284155618, "learning_rate": 2.7380373073803734e-07, "loss": 0.5799, "step": 33829 }, { "epoch": 0.9877083880762606, "grad_norm": 0.6358041169997095, "learning_rate": 2.731549067315491e-07, "loss": 0.6085, "step": 33830 }, { "epoch": 0.987737584304108, "grad_norm": 0.656135081698324, "learning_rate": 2.7250608272506086e-07, "loss": 0.648, "step": 33831 }, { "epoch": 0.9877667805319553, "grad_norm": 0.6635283566622553, "learning_rate": 2.7185725871857257e-07, "loss": 0.6107, "step": 33832 }, { "epoch": 0.9877959767598027, "grad_norm": 0.6339286665967413, "learning_rate": 2.712084347120844e-07, "loss": 0.5473, "step": 33833 }, { "epoch": 0.98782517298765, "grad_norm": 0.6588423323284607, "learning_rate": 2.705596107055961e-07, "loss": 0.6118, "step": 33834 }, { "epoch": 0.9878543692154974, "grad_norm": 0.7056292562898439, "learning_rate": 2.699107866991079e-07, "loss": 0.699, "step": 33835 }, { "epoch": 0.9878835654433448, "grad_norm": 0.6521753012585562, "learning_rate": 2.692619626926197e-07, "loss": 0.5906, "step": 33836 }, { "epoch": 0.9879127616711921, "grad_norm": 0.618918803560497, "learning_rate": 2.686131386861314e-07, "loss": 0.5518, "step": 33837 }, { "epoch": 0.9879419578990395, "grad_norm": 0.6711057880182706, "learning_rate": 2.679643146796432e-07, "loss": 0.6151, "step": 33838 }, { "epoch": 0.9879711541268869, "grad_norm": 0.6474466477960532, "learning_rate": 2.673154906731549e-07, "loss": 0.5789, "step": 33839 }, { "epoch": 0.9880003503547342, "grad_norm": 0.6094531921176968, "learning_rate": 2.666666666666667e-07, "loss": 0.5311, "step": 33840 }, { "epoch": 0.9880295465825816, "grad_norm": 0.6716856570194167, "learning_rate": 2.6601784266017843e-07, "loss": 0.6463, "step": 33841 }, { "epoch": 0.9880587428104289, "grad_norm": 0.6294864468538778, "learning_rate": 2.653690186536902e-07, "loss": 0.5817, "step": 33842 }, { "epoch": 0.9880879390382763, "grad_norm": 0.6711084888849499, "learning_rate": 2.6472019464720196e-07, "loss": 0.643, "step": 33843 }, { "epoch": 0.9881171352661237, "grad_norm": 0.6580496004682639, "learning_rate": 2.640713706407137e-07, "loss": 0.5702, "step": 33844 }, { "epoch": 0.988146331493971, "grad_norm": 0.7020120225248336, "learning_rate": 2.634225466342255e-07, "loss": 0.661, "step": 33845 }, { "epoch": 0.9881755277218184, "grad_norm": 0.6493347098616935, "learning_rate": 2.6277372262773725e-07, "loss": 0.5442, "step": 33846 }, { "epoch": 0.9882047239496657, "grad_norm": 0.6212059811762711, "learning_rate": 2.62124898621249e-07, "loss": 0.5735, "step": 33847 }, { "epoch": 0.9882339201775131, "grad_norm": 0.6350847513677104, "learning_rate": 2.6147607461476077e-07, "loss": 0.5571, "step": 33848 }, { "epoch": 0.9882631164053605, "grad_norm": 0.6773712697658486, "learning_rate": 2.6082725060827253e-07, "loss": 0.5561, "step": 33849 }, { "epoch": 0.9882923126332078, "grad_norm": 0.6631734045973621, "learning_rate": 2.601784266017843e-07, "loss": 0.6338, "step": 33850 }, { "epoch": 0.9883215088610552, "grad_norm": 0.6339765004648552, "learning_rate": 2.5952960259529606e-07, "loss": 0.5878, "step": 33851 }, { "epoch": 0.9883507050889025, "grad_norm": 0.5838628340091331, "learning_rate": 2.588807785888078e-07, "loss": 0.4975, "step": 33852 }, { "epoch": 0.9883799013167499, "grad_norm": 0.6690611168418721, "learning_rate": 2.582319545823196e-07, "loss": 0.6075, "step": 33853 }, { "epoch": 0.9884090975445973, "grad_norm": 0.6215282295641443, "learning_rate": 2.5758313057583135e-07, "loss": 0.5788, "step": 33854 }, { "epoch": 0.9884382937724446, "grad_norm": 0.6285304685034866, "learning_rate": 2.5693430656934306e-07, "loss": 0.5697, "step": 33855 }, { "epoch": 0.988467490000292, "grad_norm": 0.6489264937613378, "learning_rate": 2.5628548256285487e-07, "loss": 0.6124, "step": 33856 }, { "epoch": 0.9884966862281394, "grad_norm": 0.6580018152650093, "learning_rate": 2.556366585563666e-07, "loss": 0.6245, "step": 33857 }, { "epoch": 0.9885258824559867, "grad_norm": 0.6304641927929666, "learning_rate": 2.549878345498784e-07, "loss": 0.5564, "step": 33858 }, { "epoch": 0.9885550786838341, "grad_norm": 0.6247647147238876, "learning_rate": 2.543390105433901e-07, "loss": 0.5026, "step": 33859 }, { "epoch": 0.9885842749116814, "grad_norm": 0.6135752343245517, "learning_rate": 2.5369018653690187e-07, "loss": 0.5469, "step": 33860 }, { "epoch": 0.9886134711395288, "grad_norm": 0.6502202726636892, "learning_rate": 2.5304136253041363e-07, "loss": 0.574, "step": 33861 }, { "epoch": 0.9886426673673762, "grad_norm": 0.6363504003395574, "learning_rate": 2.523925385239254e-07, "loss": 0.5465, "step": 33862 }, { "epoch": 0.9886718635952235, "grad_norm": 0.6652462373888308, "learning_rate": 2.5174371451743716e-07, "loss": 0.6243, "step": 33863 }, { "epoch": 0.9887010598230709, "grad_norm": 0.6636203812587922, "learning_rate": 2.510948905109489e-07, "loss": 0.6448, "step": 33864 }, { "epoch": 0.9887302560509182, "grad_norm": 0.6451435234498402, "learning_rate": 2.504460665044607e-07, "loss": 0.5796, "step": 33865 }, { "epoch": 0.9887594522787656, "grad_norm": 0.6373473049682279, "learning_rate": 2.4979724249797244e-07, "loss": 0.5914, "step": 33866 }, { "epoch": 0.988788648506613, "grad_norm": 0.6313134997918741, "learning_rate": 2.491484184914842e-07, "loss": 0.5533, "step": 33867 }, { "epoch": 0.9888178447344603, "grad_norm": 0.7518806524098631, "learning_rate": 2.4849959448499597e-07, "loss": 0.636, "step": 33868 }, { "epoch": 0.9888470409623077, "grad_norm": 0.6034619180496817, "learning_rate": 2.4785077047850773e-07, "loss": 0.5151, "step": 33869 }, { "epoch": 0.988876237190155, "grad_norm": 0.6504667478351656, "learning_rate": 2.472019464720195e-07, "loss": 0.5926, "step": 33870 }, { "epoch": 0.9889054334180024, "grad_norm": 0.645512032739405, "learning_rate": 2.4655312246553126e-07, "loss": 0.5973, "step": 33871 }, { "epoch": 0.9889346296458498, "grad_norm": 0.6614462620503891, "learning_rate": 2.45904298459043e-07, "loss": 0.6062, "step": 33872 }, { "epoch": 0.9889638258736971, "grad_norm": 0.6810814844257733, "learning_rate": 2.4525547445255473e-07, "loss": 0.6423, "step": 33873 }, { "epoch": 0.9889930221015445, "grad_norm": 0.6512915120747113, "learning_rate": 2.4460665044606654e-07, "loss": 0.6427, "step": 33874 }, { "epoch": 0.9890222183293919, "grad_norm": 0.6266183556927711, "learning_rate": 2.4395782643957825e-07, "loss": 0.5605, "step": 33875 }, { "epoch": 0.9890514145572392, "grad_norm": 0.6174154814281633, "learning_rate": 2.4330900243309007e-07, "loss": 0.5156, "step": 33876 }, { "epoch": 0.9890806107850866, "grad_norm": 0.6865904223242946, "learning_rate": 2.4266017842660183e-07, "loss": 0.6489, "step": 33877 }, { "epoch": 0.9891098070129339, "grad_norm": 0.6320614832577914, "learning_rate": 2.4201135442011354e-07, "loss": 0.5675, "step": 33878 }, { "epoch": 0.9891390032407813, "grad_norm": 0.6511805167609779, "learning_rate": 2.4136253041362536e-07, "loss": 0.6085, "step": 33879 }, { "epoch": 0.9891681994686287, "grad_norm": 0.6678805993838418, "learning_rate": 2.4071370640713707e-07, "loss": 0.6054, "step": 33880 }, { "epoch": 0.989197395696476, "grad_norm": 0.688798048582061, "learning_rate": 2.400648824006489e-07, "loss": 0.6925, "step": 33881 }, { "epoch": 0.9892265919243234, "grad_norm": 0.6312840972809189, "learning_rate": 2.394160583941606e-07, "loss": 0.5364, "step": 33882 }, { "epoch": 0.9892557881521707, "grad_norm": 0.602770861476068, "learning_rate": 2.3876723438767235e-07, "loss": 0.5086, "step": 33883 }, { "epoch": 0.9892849843800181, "grad_norm": 0.6557916554286775, "learning_rate": 2.3811841038118414e-07, "loss": 0.614, "step": 33884 }, { "epoch": 0.9893141806078655, "grad_norm": 0.6290098136308938, "learning_rate": 2.3746958637469588e-07, "loss": 0.5476, "step": 33885 }, { "epoch": 0.9893433768357128, "grad_norm": 0.6407727069712095, "learning_rate": 2.3682076236820761e-07, "loss": 0.613, "step": 33886 }, { "epoch": 0.9893725730635602, "grad_norm": 0.6378247785719742, "learning_rate": 2.361719383617194e-07, "loss": 0.591, "step": 33887 }, { "epoch": 0.9894017692914076, "grad_norm": 0.6110330084273592, "learning_rate": 2.3552311435523117e-07, "loss": 0.5525, "step": 33888 }, { "epoch": 0.9894309655192549, "grad_norm": 0.679279985381938, "learning_rate": 2.3487429034874293e-07, "loss": 0.6836, "step": 33889 }, { "epoch": 0.9894601617471023, "grad_norm": 0.6531305647490904, "learning_rate": 2.342254663422547e-07, "loss": 0.6012, "step": 33890 }, { "epoch": 0.9894893579749496, "grad_norm": 0.6491277078137057, "learning_rate": 2.3357664233576643e-07, "loss": 0.6175, "step": 33891 }, { "epoch": 0.989518554202797, "grad_norm": 0.6516903595788764, "learning_rate": 2.3292781832927822e-07, "loss": 0.6045, "step": 33892 }, { "epoch": 0.9895477504306444, "grad_norm": 0.6447956498206938, "learning_rate": 2.3227899432278995e-07, "loss": 0.5819, "step": 33893 }, { "epoch": 0.9895769466584917, "grad_norm": 0.7131787132800008, "learning_rate": 2.3163017031630174e-07, "loss": 0.7469, "step": 33894 }, { "epoch": 0.9896061428863391, "grad_norm": 0.7355343020469145, "learning_rate": 2.3098134630981348e-07, "loss": 0.6991, "step": 33895 }, { "epoch": 0.9896353391141864, "grad_norm": 0.6529140482910016, "learning_rate": 2.3033252230332524e-07, "loss": 0.6304, "step": 33896 }, { "epoch": 0.9896645353420338, "grad_norm": 0.640188922108875, "learning_rate": 2.29683698296837e-07, "loss": 0.5781, "step": 33897 }, { "epoch": 0.9896937315698812, "grad_norm": 0.6676727559116425, "learning_rate": 2.2903487429034876e-07, "loss": 0.5497, "step": 33898 }, { "epoch": 0.9897229277977285, "grad_norm": 0.6137075587608122, "learning_rate": 2.2838605028386053e-07, "loss": 0.5466, "step": 33899 }, { "epoch": 0.9897521240255759, "grad_norm": 0.6222913550429177, "learning_rate": 2.277372262773723e-07, "loss": 0.5522, "step": 33900 }, { "epoch": 0.9897813202534232, "grad_norm": 1.1698766059890575, "learning_rate": 2.2708840227088403e-07, "loss": 0.7526, "step": 33901 }, { "epoch": 0.9898105164812706, "grad_norm": 0.6459744227285501, "learning_rate": 2.2643957826439581e-07, "loss": 0.5542, "step": 33902 }, { "epoch": 0.989839712709118, "grad_norm": 0.6540214136213054, "learning_rate": 2.2579075425790755e-07, "loss": 0.6384, "step": 33903 }, { "epoch": 0.9898689089369653, "grad_norm": 0.6907529894941187, "learning_rate": 2.2514193025141931e-07, "loss": 0.6494, "step": 33904 }, { "epoch": 0.9898981051648127, "grad_norm": 0.6908327671643262, "learning_rate": 2.2449310624493108e-07, "loss": 0.6058, "step": 33905 }, { "epoch": 0.98992730139266, "grad_norm": 0.7050834329504793, "learning_rate": 2.2384428223844284e-07, "loss": 0.6504, "step": 33906 }, { "epoch": 0.9899564976205074, "grad_norm": 0.6423128712020608, "learning_rate": 2.231954582319546e-07, "loss": 0.5846, "step": 33907 }, { "epoch": 0.9899856938483548, "grad_norm": 0.6692003607806783, "learning_rate": 2.2254663422546636e-07, "loss": 0.6691, "step": 33908 }, { "epoch": 0.9900148900762021, "grad_norm": 0.685736456929692, "learning_rate": 2.218978102189781e-07, "loss": 0.6031, "step": 33909 }, { "epoch": 0.9900440863040495, "grad_norm": 0.69766408453199, "learning_rate": 2.212489862124899e-07, "loss": 0.7103, "step": 33910 }, { "epoch": 0.9900732825318969, "grad_norm": 0.6292828318395851, "learning_rate": 2.2060016220600162e-07, "loss": 0.6054, "step": 33911 }, { "epoch": 0.9901024787597442, "grad_norm": 0.6385062239780152, "learning_rate": 2.1995133819951341e-07, "loss": 0.5678, "step": 33912 }, { "epoch": 0.9901316749875916, "grad_norm": 0.6558030954856862, "learning_rate": 2.1930251419302515e-07, "loss": 0.6721, "step": 33913 }, { "epoch": 0.9901608712154389, "grad_norm": 0.6231644734779204, "learning_rate": 2.186536901865369e-07, "loss": 0.5695, "step": 33914 }, { "epoch": 0.9901900674432863, "grad_norm": 0.662569754178516, "learning_rate": 2.1800486618004867e-07, "loss": 0.5601, "step": 33915 }, { "epoch": 0.9902192636711337, "grad_norm": 0.6391015353203268, "learning_rate": 2.1735604217356044e-07, "loss": 0.5475, "step": 33916 }, { "epoch": 0.990248459898981, "grad_norm": 0.6488664307975954, "learning_rate": 2.1670721816707223e-07, "loss": 0.6079, "step": 33917 }, { "epoch": 0.9902776561268284, "grad_norm": 0.6621658083127758, "learning_rate": 2.1605839416058396e-07, "loss": 0.6112, "step": 33918 }, { "epoch": 0.9903068523546757, "grad_norm": 0.6289731410158578, "learning_rate": 2.154095701540957e-07, "loss": 0.5602, "step": 33919 }, { "epoch": 0.9903360485825231, "grad_norm": 0.6169591587332535, "learning_rate": 2.147607461476075e-07, "loss": 0.5405, "step": 33920 }, { "epoch": 0.9903652448103705, "grad_norm": 0.651211028114376, "learning_rate": 2.1411192214111922e-07, "loss": 0.5576, "step": 33921 }, { "epoch": 0.9903944410382178, "grad_norm": 0.6635146080064812, "learning_rate": 2.13463098134631e-07, "loss": 0.654, "step": 33922 }, { "epoch": 0.9904236372660652, "grad_norm": 0.6076701280111355, "learning_rate": 2.1281427412814277e-07, "loss": 0.5403, "step": 33923 }, { "epoch": 0.9904528334939126, "grad_norm": 0.6025475701345778, "learning_rate": 2.121654501216545e-07, "loss": 0.5338, "step": 33924 }, { "epoch": 0.9904820297217599, "grad_norm": 0.5976755544659758, "learning_rate": 2.115166261151663e-07, "loss": 0.5168, "step": 33925 }, { "epoch": 0.9905112259496073, "grad_norm": 0.6536400794920497, "learning_rate": 2.1086780210867804e-07, "loss": 0.6196, "step": 33926 }, { "epoch": 0.9905404221774546, "grad_norm": 0.6946422494190668, "learning_rate": 2.1021897810218977e-07, "loss": 0.6472, "step": 33927 }, { "epoch": 0.990569618405302, "grad_norm": 0.6894695223006264, "learning_rate": 2.0957015409570156e-07, "loss": 0.6462, "step": 33928 }, { "epoch": 0.9905988146331494, "grad_norm": 0.6361810374860876, "learning_rate": 2.089213300892133e-07, "loss": 0.5672, "step": 33929 }, { "epoch": 0.9906280108609967, "grad_norm": 0.6666634534737954, "learning_rate": 2.0827250608272509e-07, "loss": 0.6368, "step": 33930 }, { "epoch": 0.9906572070888441, "grad_norm": 0.6214918297170728, "learning_rate": 2.0762368207623685e-07, "loss": 0.582, "step": 33931 }, { "epoch": 0.9906864033166914, "grad_norm": 0.6520407761682477, "learning_rate": 2.0697485806974858e-07, "loss": 0.5631, "step": 33932 }, { "epoch": 0.9907155995445388, "grad_norm": 0.6561271016555061, "learning_rate": 2.0632603406326037e-07, "loss": 0.6339, "step": 33933 }, { "epoch": 0.9907447957723862, "grad_norm": 0.6023711516488273, "learning_rate": 2.056772100567721e-07, "loss": 0.5344, "step": 33934 }, { "epoch": 0.9907739920002335, "grad_norm": 0.654547383258162, "learning_rate": 2.050283860502839e-07, "loss": 0.6295, "step": 33935 }, { "epoch": 0.9908031882280809, "grad_norm": 0.6649399269466721, "learning_rate": 2.0437956204379563e-07, "loss": 0.5878, "step": 33936 }, { "epoch": 0.9908323844559283, "grad_norm": 0.6276422234727581, "learning_rate": 2.037307380373074e-07, "loss": 0.5506, "step": 33937 }, { "epoch": 0.9908615806837756, "grad_norm": 0.63312854518299, "learning_rate": 2.0308191403081916e-07, "loss": 0.5729, "step": 33938 }, { "epoch": 0.990890776911623, "grad_norm": 0.6166302103063914, "learning_rate": 2.0243309002433092e-07, "loss": 0.5484, "step": 33939 }, { "epoch": 0.9909199731394703, "grad_norm": 0.6217796069464346, "learning_rate": 2.0178426601784268e-07, "loss": 0.5715, "step": 33940 }, { "epoch": 0.9909491693673177, "grad_norm": 0.6834158810244133, "learning_rate": 2.0113544201135445e-07, "loss": 0.5942, "step": 33941 }, { "epoch": 0.9909783655951651, "grad_norm": 0.6916674790171355, "learning_rate": 2.0048661800486618e-07, "loss": 0.5931, "step": 33942 }, { "epoch": 0.9910075618230124, "grad_norm": 0.6376686561741093, "learning_rate": 1.9983779399837797e-07, "loss": 0.5902, "step": 33943 }, { "epoch": 0.9910367580508598, "grad_norm": 0.5828742141346047, "learning_rate": 1.991889699918897e-07, "loss": 0.5126, "step": 33944 }, { "epoch": 0.9910659542787071, "grad_norm": 0.6581965791960959, "learning_rate": 1.985401459854015e-07, "loss": 0.6176, "step": 33945 }, { "epoch": 0.9910951505065545, "grad_norm": 0.691303936721026, "learning_rate": 1.9789132197891323e-07, "loss": 0.6798, "step": 33946 }, { "epoch": 0.9911243467344019, "grad_norm": 0.6865471666401138, "learning_rate": 1.97242497972425e-07, "loss": 0.6686, "step": 33947 }, { "epoch": 0.9911535429622492, "grad_norm": 0.710597296525224, "learning_rate": 1.9659367396593676e-07, "loss": 0.6539, "step": 33948 }, { "epoch": 0.9911827391900966, "grad_norm": 0.7107466467191824, "learning_rate": 1.9594484995944852e-07, "loss": 0.5857, "step": 33949 }, { "epoch": 0.9912119354179441, "grad_norm": 0.6466495264025122, "learning_rate": 1.9529602595296026e-07, "loss": 0.6366, "step": 33950 }, { "epoch": 0.9912411316457914, "grad_norm": 0.6256385583059411, "learning_rate": 1.9464720194647204e-07, "loss": 0.5077, "step": 33951 }, { "epoch": 0.9912703278736388, "grad_norm": 0.6683372673229971, "learning_rate": 1.9399837793998378e-07, "loss": 0.656, "step": 33952 }, { "epoch": 0.9912995241014861, "grad_norm": 0.6034549274627652, "learning_rate": 1.9334955393349557e-07, "loss": 0.5364, "step": 33953 }, { "epoch": 0.9913287203293335, "grad_norm": 0.6559429961433078, "learning_rate": 1.927007299270073e-07, "loss": 0.5734, "step": 33954 }, { "epoch": 0.9913579165571809, "grad_norm": 0.6998786274951911, "learning_rate": 1.9205190592051907e-07, "loss": 0.6389, "step": 33955 }, { "epoch": 0.9913871127850282, "grad_norm": 0.6548325635093065, "learning_rate": 1.9140308191403083e-07, "loss": 0.5848, "step": 33956 }, { "epoch": 0.9914163090128756, "grad_norm": 0.6461228299908724, "learning_rate": 1.907542579075426e-07, "loss": 0.5971, "step": 33957 }, { "epoch": 0.991445505240723, "grad_norm": 0.6518262251609133, "learning_rate": 1.9010543390105436e-07, "loss": 0.5885, "step": 33958 }, { "epoch": 0.9914747014685703, "grad_norm": 0.654943951470257, "learning_rate": 1.8945660989456612e-07, "loss": 0.623, "step": 33959 }, { "epoch": 0.9915038976964177, "grad_norm": 0.6782962327511456, "learning_rate": 1.8880778588807785e-07, "loss": 0.6411, "step": 33960 }, { "epoch": 0.991533093924265, "grad_norm": 0.6068006285293913, "learning_rate": 1.8815896188158964e-07, "loss": 0.5372, "step": 33961 }, { "epoch": 0.9915622901521124, "grad_norm": 0.6258983512392138, "learning_rate": 1.8751013787510138e-07, "loss": 0.5331, "step": 33962 }, { "epoch": 0.9915914863799598, "grad_norm": 0.6135543983911699, "learning_rate": 1.8686131386861317e-07, "loss": 0.557, "step": 33963 }, { "epoch": 0.9916206826078071, "grad_norm": 0.654923349307428, "learning_rate": 1.862124898621249e-07, "loss": 0.6528, "step": 33964 }, { "epoch": 0.9916498788356545, "grad_norm": 0.6528107784448642, "learning_rate": 1.8556366585563667e-07, "loss": 0.6008, "step": 33965 }, { "epoch": 0.9916790750635018, "grad_norm": 0.6088575092586245, "learning_rate": 1.8491484184914846e-07, "loss": 0.5673, "step": 33966 }, { "epoch": 0.9917082712913492, "grad_norm": 0.7266506686388482, "learning_rate": 1.842660178426602e-07, "loss": 0.6607, "step": 33967 }, { "epoch": 0.9917374675191966, "grad_norm": 0.6273694076861919, "learning_rate": 1.8361719383617193e-07, "loss": 0.5424, "step": 33968 }, { "epoch": 0.9917666637470439, "grad_norm": 0.6687114941176443, "learning_rate": 1.8296836982968372e-07, "loss": 0.5963, "step": 33969 }, { "epoch": 0.9917958599748913, "grad_norm": 0.6304539345014837, "learning_rate": 1.8231954582319545e-07, "loss": 0.611, "step": 33970 }, { "epoch": 0.9918250562027386, "grad_norm": 0.6892998492766345, "learning_rate": 1.8167072181670724e-07, "loss": 0.6438, "step": 33971 }, { "epoch": 0.991854252430586, "grad_norm": 0.6114485890019168, "learning_rate": 1.81021897810219e-07, "loss": 0.5551, "step": 33972 }, { "epoch": 0.9918834486584334, "grad_norm": 0.6090080710281265, "learning_rate": 1.8037307380373074e-07, "loss": 0.5198, "step": 33973 }, { "epoch": 0.9919126448862807, "grad_norm": 0.6517672558312165, "learning_rate": 1.7972424979724253e-07, "loss": 0.5891, "step": 33974 }, { "epoch": 0.9919418411141281, "grad_norm": 0.8581762444647503, "learning_rate": 1.7907542579075427e-07, "loss": 0.6047, "step": 33975 }, { "epoch": 0.9919710373419754, "grad_norm": 0.6399536301434516, "learning_rate": 1.7842660178426605e-07, "loss": 0.5956, "step": 33976 }, { "epoch": 0.9920002335698228, "grad_norm": 0.601266339181744, "learning_rate": 1.777777777777778e-07, "loss": 0.5172, "step": 33977 }, { "epoch": 0.9920294297976702, "grad_norm": 0.67051053409806, "learning_rate": 1.7712895377128953e-07, "loss": 0.56, "step": 33978 }, { "epoch": 0.9920586260255175, "grad_norm": 0.6309611079626027, "learning_rate": 1.7648012976480132e-07, "loss": 0.5312, "step": 33979 }, { "epoch": 0.9920878222533649, "grad_norm": 0.6636216297308436, "learning_rate": 1.7583130575831308e-07, "loss": 0.622, "step": 33980 }, { "epoch": 0.9921170184812123, "grad_norm": 0.639724188285178, "learning_rate": 1.7518248175182484e-07, "loss": 0.541, "step": 33981 }, { "epoch": 0.9921462147090596, "grad_norm": 0.6878690111771136, "learning_rate": 1.745336577453366e-07, "loss": 0.6218, "step": 33982 }, { "epoch": 0.992175410936907, "grad_norm": 0.6285964900452511, "learning_rate": 1.7388483373884834e-07, "loss": 0.5452, "step": 33983 }, { "epoch": 0.9922046071647543, "grad_norm": 0.6365320181398543, "learning_rate": 1.7323600973236013e-07, "loss": 0.5826, "step": 33984 }, { "epoch": 0.9922338033926017, "grad_norm": 0.6283487057818998, "learning_rate": 1.7258718572587186e-07, "loss": 0.5591, "step": 33985 }, { "epoch": 0.9922629996204491, "grad_norm": 0.6555872469569697, "learning_rate": 1.7193836171938365e-07, "loss": 0.6209, "step": 33986 }, { "epoch": 0.9922921958482964, "grad_norm": 0.6306727109157446, "learning_rate": 1.712895377128954e-07, "loss": 0.5468, "step": 33987 }, { "epoch": 0.9923213920761438, "grad_norm": 0.5777964822626905, "learning_rate": 1.7064071370640715e-07, "loss": 0.479, "step": 33988 }, { "epoch": 0.9923505883039911, "grad_norm": 0.6332738399356926, "learning_rate": 1.6999188969991891e-07, "loss": 0.5719, "step": 33989 }, { "epoch": 0.9923797845318385, "grad_norm": 0.6677848492666175, "learning_rate": 1.6934306569343068e-07, "loss": 0.6224, "step": 33990 }, { "epoch": 0.9924089807596859, "grad_norm": 0.6304805333177601, "learning_rate": 1.686942416869424e-07, "loss": 0.5961, "step": 33991 }, { "epoch": 0.9924381769875332, "grad_norm": 0.5953847241758351, "learning_rate": 1.680454176804542e-07, "loss": 0.5039, "step": 33992 }, { "epoch": 0.9924673732153806, "grad_norm": 0.6268162675550912, "learning_rate": 1.6739659367396594e-07, "loss": 0.5547, "step": 33993 }, { "epoch": 0.992496569443228, "grad_norm": 0.6248980406856969, "learning_rate": 1.6674776966747773e-07, "loss": 0.5755, "step": 33994 }, { "epoch": 0.9925257656710753, "grad_norm": 0.6792445268240906, "learning_rate": 1.6609894566098946e-07, "loss": 0.641, "step": 33995 }, { "epoch": 0.9925549618989227, "grad_norm": 0.6299872689354186, "learning_rate": 1.6545012165450122e-07, "loss": 0.5684, "step": 33996 }, { "epoch": 0.99258415812677, "grad_norm": 0.641981486964029, "learning_rate": 1.64801297648013e-07, "loss": 0.5394, "step": 33997 }, { "epoch": 0.9926133543546174, "grad_norm": 0.6418145096359734, "learning_rate": 1.6415247364152475e-07, "loss": 0.5768, "step": 33998 }, { "epoch": 0.9926425505824648, "grad_norm": 0.6626997366770275, "learning_rate": 1.635036496350365e-07, "loss": 0.5938, "step": 33999 }, { "epoch": 0.9926717468103121, "grad_norm": 0.6787080726534427, "learning_rate": 1.6285482562854827e-07, "loss": 0.6358, "step": 34000 }, { "epoch": 0.9927009430381595, "grad_norm": 0.6410148359329172, "learning_rate": 1.6220600162206e-07, "loss": 0.5987, "step": 34001 }, { "epoch": 0.9927301392660068, "grad_norm": 0.6077461989359244, "learning_rate": 1.615571776155718e-07, "loss": 0.5139, "step": 34002 }, { "epoch": 0.9927593354938542, "grad_norm": 0.6365210764917418, "learning_rate": 1.6090835360908354e-07, "loss": 0.5963, "step": 34003 }, { "epoch": 0.9927885317217016, "grad_norm": 0.670488836427407, "learning_rate": 1.6025952960259532e-07, "loss": 0.6235, "step": 34004 }, { "epoch": 0.9928177279495489, "grad_norm": 0.6916600764562343, "learning_rate": 1.5961070559610706e-07, "loss": 0.6162, "step": 34005 }, { "epoch": 0.9928469241773963, "grad_norm": 0.6671560088132904, "learning_rate": 1.5896188158961882e-07, "loss": 0.6145, "step": 34006 }, { "epoch": 0.9928761204052436, "grad_norm": 0.6419643746550856, "learning_rate": 1.5831305758313059e-07, "loss": 0.5812, "step": 34007 }, { "epoch": 0.992905316633091, "grad_norm": 0.6465840374123832, "learning_rate": 1.5766423357664235e-07, "loss": 0.577, "step": 34008 }, { "epoch": 0.9929345128609384, "grad_norm": 0.6498434149226183, "learning_rate": 1.5701540957015414e-07, "loss": 0.5778, "step": 34009 }, { "epoch": 0.9929637090887857, "grad_norm": 0.6314474985899863, "learning_rate": 1.5636658556366587e-07, "loss": 0.62, "step": 34010 }, { "epoch": 0.9929929053166331, "grad_norm": 0.6170909145741234, "learning_rate": 1.5571776155717764e-07, "loss": 0.5404, "step": 34011 }, { "epoch": 0.9930221015444805, "grad_norm": 0.6652883620951225, "learning_rate": 1.5506893755068937e-07, "loss": 0.6593, "step": 34012 }, { "epoch": 0.9930512977723278, "grad_norm": 0.6432430042668539, "learning_rate": 1.5442011354420113e-07, "loss": 0.5682, "step": 34013 }, { "epoch": 0.9930804940001752, "grad_norm": 0.6258451252090279, "learning_rate": 1.537712895377129e-07, "loss": 0.5685, "step": 34014 }, { "epoch": 0.9931096902280225, "grad_norm": 0.6407067191240011, "learning_rate": 1.5312246553122469e-07, "loss": 0.5979, "step": 34015 }, { "epoch": 0.9931388864558699, "grad_norm": 0.6960389602435234, "learning_rate": 1.5247364152473642e-07, "loss": 0.6393, "step": 34016 }, { "epoch": 0.9931680826837173, "grad_norm": 0.6529520295739296, "learning_rate": 1.5182481751824818e-07, "loss": 0.6025, "step": 34017 }, { "epoch": 0.9931972789115646, "grad_norm": 0.6092413895073913, "learning_rate": 1.5117599351175995e-07, "loss": 0.5616, "step": 34018 }, { "epoch": 0.993226475139412, "grad_norm": 0.6458202123367295, "learning_rate": 1.505271695052717e-07, "loss": 0.5532, "step": 34019 }, { "epoch": 0.9932556713672593, "grad_norm": 0.7351527500707545, "learning_rate": 1.4987834549878347e-07, "loss": 0.7303, "step": 34020 }, { "epoch": 0.9932848675951067, "grad_norm": 0.6330869424586606, "learning_rate": 1.492295214922952e-07, "loss": 0.5458, "step": 34021 }, { "epoch": 0.9933140638229541, "grad_norm": 0.6078727697034477, "learning_rate": 1.48580697485807e-07, "loss": 0.5405, "step": 34022 }, { "epoch": 0.9933432600508014, "grad_norm": 0.6358101783329473, "learning_rate": 1.4793187347931876e-07, "loss": 0.571, "step": 34023 }, { "epoch": 0.9933724562786488, "grad_norm": 0.6729866030768378, "learning_rate": 1.4728304947283052e-07, "loss": 0.6602, "step": 34024 }, { "epoch": 0.9934016525064961, "grad_norm": 0.6516131116413547, "learning_rate": 1.4663422546634226e-07, "loss": 0.6103, "step": 34025 }, { "epoch": 0.9934308487343435, "grad_norm": 0.6322026357948608, "learning_rate": 1.4598540145985402e-07, "loss": 0.5519, "step": 34026 }, { "epoch": 0.9934600449621909, "grad_norm": 0.7092589939813434, "learning_rate": 1.4533657745336578e-07, "loss": 0.6714, "step": 34027 }, { "epoch": 0.9934892411900382, "grad_norm": 0.6673222189549778, "learning_rate": 1.4468775344687755e-07, "loss": 0.6134, "step": 34028 }, { "epoch": 0.9935184374178856, "grad_norm": 0.6640587753739379, "learning_rate": 1.440389294403893e-07, "loss": 0.6142, "step": 34029 }, { "epoch": 0.993547633645733, "grad_norm": 0.6420902957546781, "learning_rate": 1.4339010543390107e-07, "loss": 0.5878, "step": 34030 }, { "epoch": 0.9935768298735803, "grad_norm": 0.6634844309341023, "learning_rate": 1.4274128142741283e-07, "loss": 0.5958, "step": 34031 }, { "epoch": 0.9936060261014277, "grad_norm": 0.6349385204907411, "learning_rate": 1.420924574209246e-07, "loss": 0.5738, "step": 34032 }, { "epoch": 0.993635222329275, "grad_norm": 0.6427970438572012, "learning_rate": 1.4144363341443636e-07, "loss": 0.5833, "step": 34033 }, { "epoch": 0.9936644185571224, "grad_norm": 0.6125468034233029, "learning_rate": 1.4079480940794812e-07, "loss": 0.5505, "step": 34034 }, { "epoch": 0.9936936147849698, "grad_norm": 0.6322655989205886, "learning_rate": 1.4014598540145986e-07, "loss": 0.5868, "step": 34035 }, { "epoch": 0.9937228110128171, "grad_norm": 0.6183554665982676, "learning_rate": 1.3949716139497162e-07, "loss": 0.5427, "step": 34036 }, { "epoch": 0.9937520072406645, "grad_norm": 0.640141613137736, "learning_rate": 1.3884833738848338e-07, "loss": 0.5446, "step": 34037 }, { "epoch": 0.9937812034685118, "grad_norm": 0.6992612783644331, "learning_rate": 1.3819951338199514e-07, "loss": 0.655, "step": 34038 }, { "epoch": 0.9938103996963592, "grad_norm": 0.6702045233193, "learning_rate": 1.375506893755069e-07, "loss": 0.6437, "step": 34039 }, { "epoch": 0.9938395959242066, "grad_norm": 0.6504142948982496, "learning_rate": 1.3690186536901867e-07, "loss": 0.6246, "step": 34040 }, { "epoch": 0.9938687921520539, "grad_norm": 0.6740110659427266, "learning_rate": 1.3625304136253043e-07, "loss": 0.5995, "step": 34041 }, { "epoch": 0.9938979883799013, "grad_norm": 0.6681165109855645, "learning_rate": 1.356042173560422e-07, "loss": 0.617, "step": 34042 }, { "epoch": 0.9939271846077486, "grad_norm": 0.7182979323754061, "learning_rate": 1.3495539334955396e-07, "loss": 0.6945, "step": 34043 }, { "epoch": 0.993956380835596, "grad_norm": 0.6403944482148335, "learning_rate": 1.343065693430657e-07, "loss": 0.6088, "step": 34044 }, { "epoch": 0.9939855770634434, "grad_norm": 0.5881292921767994, "learning_rate": 1.3365774533657745e-07, "loss": 0.5086, "step": 34045 }, { "epoch": 0.9940147732912907, "grad_norm": 0.615846921182304, "learning_rate": 1.3300892133008922e-07, "loss": 0.5528, "step": 34046 }, { "epoch": 0.9940439695191381, "grad_norm": 0.6062076780764202, "learning_rate": 1.3236009732360098e-07, "loss": 0.5382, "step": 34047 }, { "epoch": 0.9940731657469855, "grad_norm": 0.6323602696752371, "learning_rate": 1.3171127331711274e-07, "loss": 0.5513, "step": 34048 }, { "epoch": 0.9941023619748328, "grad_norm": 0.6068318260971586, "learning_rate": 1.310624493106245e-07, "loss": 0.5411, "step": 34049 }, { "epoch": 0.9941315582026802, "grad_norm": 0.6114016173422049, "learning_rate": 1.3041362530413627e-07, "loss": 0.5552, "step": 34050 }, { "epoch": 0.9941607544305275, "grad_norm": 0.7165972637288605, "learning_rate": 1.2976480129764803e-07, "loss": 0.6523, "step": 34051 }, { "epoch": 0.9941899506583749, "grad_norm": 0.6987419274048128, "learning_rate": 1.291159772911598e-07, "loss": 0.6001, "step": 34052 }, { "epoch": 0.9942191468862223, "grad_norm": 0.5936011466387572, "learning_rate": 1.2846715328467153e-07, "loss": 0.4829, "step": 34053 }, { "epoch": 0.9942483431140696, "grad_norm": 0.6288217532028385, "learning_rate": 1.278183292781833e-07, "loss": 0.5768, "step": 34054 }, { "epoch": 0.994277539341917, "grad_norm": 0.6422143198426085, "learning_rate": 1.2716950527169505e-07, "loss": 0.5501, "step": 34055 }, { "epoch": 0.9943067355697643, "grad_norm": 0.6508394225100491, "learning_rate": 1.2652068126520682e-07, "loss": 0.5946, "step": 34056 }, { "epoch": 0.9943359317976117, "grad_norm": 0.6570295282099107, "learning_rate": 1.2587185725871858e-07, "loss": 0.5891, "step": 34057 }, { "epoch": 0.9943651280254591, "grad_norm": 0.6583195926705547, "learning_rate": 1.2522303325223034e-07, "loss": 0.6286, "step": 34058 }, { "epoch": 0.9943943242533064, "grad_norm": 0.6594550974916316, "learning_rate": 1.245742092457421e-07, "loss": 0.6297, "step": 34059 }, { "epoch": 0.9944235204811538, "grad_norm": 0.6072712306647241, "learning_rate": 1.2392538523925387e-07, "loss": 0.544, "step": 34060 }, { "epoch": 0.9944527167090011, "grad_norm": 0.6366895669084375, "learning_rate": 1.2327656123276563e-07, "loss": 0.5913, "step": 34061 }, { "epoch": 0.9944819129368485, "grad_norm": 0.7069380487636737, "learning_rate": 1.2262773722627736e-07, "loss": 0.6766, "step": 34062 }, { "epoch": 0.9945111091646959, "grad_norm": 0.6164073263646882, "learning_rate": 1.2197891321978913e-07, "loss": 0.5215, "step": 34063 }, { "epoch": 0.9945403053925432, "grad_norm": 0.6709605590792985, "learning_rate": 1.2133008921330092e-07, "loss": 0.6098, "step": 34064 }, { "epoch": 0.9945695016203906, "grad_norm": 0.6311629396461232, "learning_rate": 1.2068126520681268e-07, "loss": 0.5683, "step": 34065 }, { "epoch": 0.994598697848238, "grad_norm": 0.6388494226254803, "learning_rate": 1.2003244120032444e-07, "loss": 0.609, "step": 34066 }, { "epoch": 0.9946278940760853, "grad_norm": 0.6165793531213067, "learning_rate": 1.1938361719383618e-07, "loss": 0.5252, "step": 34067 }, { "epoch": 0.9946570903039327, "grad_norm": 0.6446848726251759, "learning_rate": 1.1873479318734794e-07, "loss": 0.5682, "step": 34068 }, { "epoch": 0.99468628653178, "grad_norm": 0.6277264852625237, "learning_rate": 1.180859691808597e-07, "loss": 0.5517, "step": 34069 }, { "epoch": 0.9947154827596274, "grad_norm": 0.6581287764750479, "learning_rate": 1.1743714517437146e-07, "loss": 0.5833, "step": 34070 }, { "epoch": 0.9947446789874749, "grad_norm": 0.6620396401488884, "learning_rate": 1.1678832116788321e-07, "loss": 0.6012, "step": 34071 }, { "epoch": 0.9947738752153222, "grad_norm": 0.6276395935172778, "learning_rate": 1.1613949716139498e-07, "loss": 0.5442, "step": 34072 }, { "epoch": 0.9948030714431696, "grad_norm": 0.6311643356383924, "learning_rate": 1.1549067315490674e-07, "loss": 0.5807, "step": 34073 }, { "epoch": 0.994832267671017, "grad_norm": 0.6233852067322649, "learning_rate": 1.148418491484185e-07, "loss": 0.5626, "step": 34074 }, { "epoch": 0.9948614638988643, "grad_norm": 0.6706306501512949, "learning_rate": 1.1419302514193026e-07, "loss": 0.6258, "step": 34075 }, { "epoch": 0.9948906601267117, "grad_norm": 0.6775457301916468, "learning_rate": 1.1354420113544201e-07, "loss": 0.6846, "step": 34076 }, { "epoch": 0.994919856354559, "grad_norm": 0.649263988602028, "learning_rate": 1.1289537712895378e-07, "loss": 0.6009, "step": 34077 }, { "epoch": 0.9949490525824064, "grad_norm": 0.6150396540288318, "learning_rate": 1.1224655312246554e-07, "loss": 0.524, "step": 34078 }, { "epoch": 0.9949782488102538, "grad_norm": 0.675721630537234, "learning_rate": 1.115977291159773e-07, "loss": 0.6412, "step": 34079 }, { "epoch": 0.9950074450381011, "grad_norm": 0.635270396820835, "learning_rate": 1.1094890510948905e-07, "loss": 0.5857, "step": 34080 }, { "epoch": 0.9950366412659485, "grad_norm": 0.6656255936848685, "learning_rate": 1.1030008110300081e-07, "loss": 0.637, "step": 34081 }, { "epoch": 0.9950658374937958, "grad_norm": 0.6684641063453579, "learning_rate": 1.0965125709651257e-07, "loss": 0.6275, "step": 34082 }, { "epoch": 0.9950950337216432, "grad_norm": 0.6376686892507607, "learning_rate": 1.0900243309002434e-07, "loss": 0.5479, "step": 34083 }, { "epoch": 0.9951242299494906, "grad_norm": 0.6687083806624309, "learning_rate": 1.0835360908353611e-07, "loss": 0.6928, "step": 34084 }, { "epoch": 0.9951534261773379, "grad_norm": 0.6806671188641854, "learning_rate": 1.0770478507704785e-07, "loss": 0.6057, "step": 34085 }, { "epoch": 0.9951826224051853, "grad_norm": 0.6570241569160806, "learning_rate": 1.0705596107055961e-07, "loss": 0.6038, "step": 34086 }, { "epoch": 0.9952118186330327, "grad_norm": 0.6356385888441421, "learning_rate": 1.0640713706407139e-07, "loss": 0.5905, "step": 34087 }, { "epoch": 0.99524101486088, "grad_norm": 0.6448534629192467, "learning_rate": 1.0575831305758315e-07, "loss": 0.5763, "step": 34088 }, { "epoch": 0.9952702110887274, "grad_norm": 0.6876857715939145, "learning_rate": 1.0510948905109489e-07, "loss": 0.5877, "step": 34089 }, { "epoch": 0.9952994073165747, "grad_norm": 0.6458860544016134, "learning_rate": 1.0446066504460665e-07, "loss": 0.6078, "step": 34090 }, { "epoch": 0.9953286035444221, "grad_norm": 0.6048815350656072, "learning_rate": 1.0381184103811842e-07, "loss": 0.5325, "step": 34091 }, { "epoch": 0.9953577997722695, "grad_norm": 0.6713608193290542, "learning_rate": 1.0316301703163019e-07, "loss": 0.6419, "step": 34092 }, { "epoch": 0.9953869960001168, "grad_norm": 0.6848318446066194, "learning_rate": 1.0251419302514195e-07, "loss": 0.6679, "step": 34093 }, { "epoch": 0.9954161922279642, "grad_norm": 0.6724587906632536, "learning_rate": 1.018653690186537e-07, "loss": 0.6341, "step": 34094 }, { "epoch": 0.9954453884558115, "grad_norm": 0.6667349392474918, "learning_rate": 1.0121654501216546e-07, "loss": 0.6245, "step": 34095 }, { "epoch": 0.9954745846836589, "grad_norm": 0.6477423537848344, "learning_rate": 1.0056772100567722e-07, "loss": 0.6002, "step": 34096 }, { "epoch": 0.9955037809115063, "grad_norm": 0.6356660791519271, "learning_rate": 9.991889699918899e-08, "loss": 0.6031, "step": 34097 }, { "epoch": 0.9955329771393536, "grad_norm": 0.6455893139431049, "learning_rate": 9.927007299270075e-08, "loss": 0.5516, "step": 34098 }, { "epoch": 0.995562173367201, "grad_norm": 0.6472515732974599, "learning_rate": 9.86212489862125e-08, "loss": 0.5636, "step": 34099 }, { "epoch": 0.9955913695950483, "grad_norm": 0.6260069388406238, "learning_rate": 9.797242497972426e-08, "loss": 0.555, "step": 34100 }, { "epoch": 0.9956205658228957, "grad_norm": 0.6181522350410918, "learning_rate": 9.732360097323602e-08, "loss": 0.5722, "step": 34101 }, { "epoch": 0.9956497620507431, "grad_norm": 0.6744377011441625, "learning_rate": 9.667477696674778e-08, "loss": 0.6295, "step": 34102 }, { "epoch": 0.9956789582785904, "grad_norm": 0.6147421967627231, "learning_rate": 9.602595296025953e-08, "loss": 0.5525, "step": 34103 }, { "epoch": 0.9957081545064378, "grad_norm": 0.6196651989193352, "learning_rate": 9.53771289537713e-08, "loss": 0.5259, "step": 34104 }, { "epoch": 0.9957373507342852, "grad_norm": 0.6426610424781656, "learning_rate": 9.472830494728306e-08, "loss": 0.6003, "step": 34105 }, { "epoch": 0.9957665469621325, "grad_norm": 0.6484224568119743, "learning_rate": 9.407948094079482e-08, "loss": 0.5745, "step": 34106 }, { "epoch": 0.9957957431899799, "grad_norm": 0.7061078104560912, "learning_rate": 9.343065693430658e-08, "loss": 0.6696, "step": 34107 }, { "epoch": 0.9958249394178272, "grad_norm": 0.7024451124717043, "learning_rate": 9.278183292781833e-08, "loss": 0.6565, "step": 34108 }, { "epoch": 0.9958541356456746, "grad_norm": 0.5952565622532506, "learning_rate": 9.21330089213301e-08, "loss": 0.4896, "step": 34109 }, { "epoch": 0.995883331873522, "grad_norm": 0.6196502463433248, "learning_rate": 9.148418491484186e-08, "loss": 0.5351, "step": 34110 }, { "epoch": 0.9959125281013693, "grad_norm": 0.6656121606562148, "learning_rate": 9.083536090835362e-08, "loss": 0.6128, "step": 34111 }, { "epoch": 0.9959417243292167, "grad_norm": 0.6689888362974309, "learning_rate": 9.018653690186537e-08, "loss": 0.579, "step": 34112 }, { "epoch": 0.995970920557064, "grad_norm": 0.6012266595740439, "learning_rate": 8.953771289537713e-08, "loss": 0.4957, "step": 34113 }, { "epoch": 0.9960001167849114, "grad_norm": 0.6270817138781826, "learning_rate": 8.88888888888889e-08, "loss": 0.592, "step": 34114 }, { "epoch": 0.9960293130127588, "grad_norm": 0.6611263689975679, "learning_rate": 8.824006488240066e-08, "loss": 0.649, "step": 34115 }, { "epoch": 0.9960585092406061, "grad_norm": 0.6176218256632992, "learning_rate": 8.759124087591242e-08, "loss": 0.5405, "step": 34116 }, { "epoch": 0.9960877054684535, "grad_norm": 0.6761028371548823, "learning_rate": 8.694241686942417e-08, "loss": 0.6132, "step": 34117 }, { "epoch": 0.9961169016963008, "grad_norm": 0.5883995167868251, "learning_rate": 8.629359286293593e-08, "loss": 0.5043, "step": 34118 }, { "epoch": 0.9961460979241482, "grad_norm": 0.6556196279841222, "learning_rate": 8.56447688564477e-08, "loss": 0.5692, "step": 34119 }, { "epoch": 0.9961752941519956, "grad_norm": 0.6199047683411711, "learning_rate": 8.499594484995946e-08, "loss": 0.5507, "step": 34120 }, { "epoch": 0.9962044903798429, "grad_norm": 0.6227723359990067, "learning_rate": 8.43471208434712e-08, "loss": 0.561, "step": 34121 }, { "epoch": 0.9962336866076903, "grad_norm": 0.6860839178771438, "learning_rate": 8.369829683698297e-08, "loss": 0.6638, "step": 34122 }, { "epoch": 0.9962628828355377, "grad_norm": 0.667199535750848, "learning_rate": 8.304947283049473e-08, "loss": 0.6496, "step": 34123 }, { "epoch": 0.996292079063385, "grad_norm": 0.6845843257092689, "learning_rate": 8.24006488240065e-08, "loss": 0.623, "step": 34124 }, { "epoch": 0.9963212752912324, "grad_norm": 0.6399663767891067, "learning_rate": 8.175182481751826e-08, "loss": 0.6146, "step": 34125 }, { "epoch": 0.9963504715190797, "grad_norm": 0.6695021485240468, "learning_rate": 8.110300081103e-08, "loss": 0.6722, "step": 34126 }, { "epoch": 0.9963796677469271, "grad_norm": 0.6238704135424629, "learning_rate": 8.045417680454177e-08, "loss": 0.5984, "step": 34127 }, { "epoch": 0.9964088639747745, "grad_norm": 0.6614887486494897, "learning_rate": 7.980535279805353e-08, "loss": 0.6281, "step": 34128 }, { "epoch": 0.9964380602026218, "grad_norm": 0.6540580880633654, "learning_rate": 7.915652879156529e-08, "loss": 0.5582, "step": 34129 }, { "epoch": 0.9964672564304692, "grad_norm": 0.6518539812622259, "learning_rate": 7.850770478507707e-08, "loss": 0.5975, "step": 34130 }, { "epoch": 0.9964964526583165, "grad_norm": 0.640363321272438, "learning_rate": 7.785888077858882e-08, "loss": 0.5435, "step": 34131 }, { "epoch": 0.9965256488861639, "grad_norm": 0.6118937284001912, "learning_rate": 7.721005677210057e-08, "loss": 0.5529, "step": 34132 }, { "epoch": 0.9965548451140113, "grad_norm": 0.6253465314845423, "learning_rate": 7.656123276561234e-08, "loss": 0.5562, "step": 34133 }, { "epoch": 0.9965840413418586, "grad_norm": 0.6593867249286415, "learning_rate": 7.591240875912409e-08, "loss": 0.6075, "step": 34134 }, { "epoch": 0.996613237569706, "grad_norm": 0.6973754531870584, "learning_rate": 7.526358475263585e-08, "loss": 0.6201, "step": 34135 }, { "epoch": 0.9966424337975534, "grad_norm": 0.6694216807995506, "learning_rate": 7.46147607461476e-08, "loss": 0.6091, "step": 34136 }, { "epoch": 0.9966716300254007, "grad_norm": 0.6397601031814693, "learning_rate": 7.396593673965938e-08, "loss": 0.5803, "step": 34137 }, { "epoch": 0.9967008262532481, "grad_norm": 0.6377382046459706, "learning_rate": 7.331711273317113e-08, "loss": 0.5931, "step": 34138 }, { "epoch": 0.9967300224810954, "grad_norm": 0.6480548744688264, "learning_rate": 7.266828872668289e-08, "loss": 0.5793, "step": 34139 }, { "epoch": 0.9967592187089428, "grad_norm": 0.6318517034335406, "learning_rate": 7.201946472019465e-08, "loss": 0.5932, "step": 34140 }, { "epoch": 0.9967884149367902, "grad_norm": 0.6083956987532549, "learning_rate": 7.137064071370642e-08, "loss": 0.5362, "step": 34141 }, { "epoch": 0.9968176111646375, "grad_norm": 0.6489511440316655, "learning_rate": 7.072181670721818e-08, "loss": 0.5908, "step": 34142 }, { "epoch": 0.9968468073924849, "grad_norm": 0.638950812454855, "learning_rate": 7.007299270072993e-08, "loss": 0.5426, "step": 34143 }, { "epoch": 0.9968760036203322, "grad_norm": 0.6872633844161379, "learning_rate": 6.942416869424169e-08, "loss": 0.6129, "step": 34144 }, { "epoch": 0.9969051998481796, "grad_norm": 0.6466962173136368, "learning_rate": 6.877534468775345e-08, "loss": 0.6192, "step": 34145 }, { "epoch": 0.996934396076027, "grad_norm": 0.6300369881522552, "learning_rate": 6.812652068126522e-08, "loss": 0.5669, "step": 34146 }, { "epoch": 0.9969635923038743, "grad_norm": 0.6378921007501825, "learning_rate": 6.747769667477698e-08, "loss": 0.5889, "step": 34147 }, { "epoch": 0.9969927885317217, "grad_norm": 0.6606154413867587, "learning_rate": 6.682887266828873e-08, "loss": 0.6239, "step": 34148 }, { "epoch": 0.997021984759569, "grad_norm": 0.629288502048009, "learning_rate": 6.618004866180049e-08, "loss": 0.5598, "step": 34149 }, { "epoch": 0.9970511809874164, "grad_norm": 0.6284866582508948, "learning_rate": 6.553122465531225e-08, "loss": 0.5608, "step": 34150 }, { "epoch": 0.9970803772152638, "grad_norm": 0.7288156981963706, "learning_rate": 6.488240064882401e-08, "loss": 0.6986, "step": 34151 }, { "epoch": 0.9971095734431111, "grad_norm": 0.6325571687893807, "learning_rate": 6.423357664233576e-08, "loss": 0.5892, "step": 34152 }, { "epoch": 0.9971387696709585, "grad_norm": 0.6329590971526958, "learning_rate": 6.358475263584753e-08, "loss": 0.5717, "step": 34153 }, { "epoch": 0.9971679658988059, "grad_norm": 0.6112201242805666, "learning_rate": 6.293592862935929e-08, "loss": 0.5399, "step": 34154 }, { "epoch": 0.9971971621266532, "grad_norm": 0.6650182951504638, "learning_rate": 6.228710462287105e-08, "loss": 0.6132, "step": 34155 }, { "epoch": 0.9972263583545006, "grad_norm": 0.652000288956728, "learning_rate": 6.163828061638281e-08, "loss": 0.6153, "step": 34156 }, { "epoch": 0.9972555545823479, "grad_norm": 0.6219013820480849, "learning_rate": 6.098945660989456e-08, "loss": 0.5695, "step": 34157 }, { "epoch": 0.9972847508101953, "grad_norm": 0.6677212009026203, "learning_rate": 6.034063260340634e-08, "loss": 0.6618, "step": 34158 }, { "epoch": 0.9973139470380427, "grad_norm": 0.6312834530748135, "learning_rate": 5.969180859691809e-08, "loss": 0.6287, "step": 34159 }, { "epoch": 0.99734314326589, "grad_norm": 0.6441981683497704, "learning_rate": 5.904298459042985e-08, "loss": 0.6072, "step": 34160 }, { "epoch": 0.9973723394937374, "grad_norm": 0.6733098004208958, "learning_rate": 5.839416058394161e-08, "loss": 0.6234, "step": 34161 }, { "epoch": 0.9974015357215847, "grad_norm": 0.7053368936650671, "learning_rate": 5.774533657745337e-08, "loss": 0.6442, "step": 34162 }, { "epoch": 0.9974307319494321, "grad_norm": 0.644180860897677, "learning_rate": 5.709651257096513e-08, "loss": 0.5957, "step": 34163 }, { "epoch": 0.9974599281772795, "grad_norm": 0.662241300004584, "learning_rate": 5.644768856447689e-08, "loss": 0.6075, "step": 34164 }, { "epoch": 0.9974891244051268, "grad_norm": 0.7384096118497264, "learning_rate": 5.579886455798865e-08, "loss": 0.7023, "step": 34165 }, { "epoch": 0.9975183206329742, "grad_norm": 0.6701782117551841, "learning_rate": 5.5150040551500406e-08, "loss": 0.6804, "step": 34166 }, { "epoch": 0.9975475168608215, "grad_norm": 0.6375388149245337, "learning_rate": 5.450121654501217e-08, "loss": 0.5774, "step": 34167 }, { "epoch": 0.9975767130886689, "grad_norm": 0.645778572483729, "learning_rate": 5.3852392538523924e-08, "loss": 0.625, "step": 34168 }, { "epoch": 0.9976059093165163, "grad_norm": 0.6005649338003816, "learning_rate": 5.3203568532035694e-08, "loss": 0.4713, "step": 34169 }, { "epoch": 0.9976351055443636, "grad_norm": 0.679396355071661, "learning_rate": 5.255474452554744e-08, "loss": 0.5949, "step": 34170 }, { "epoch": 0.997664301772211, "grad_norm": 0.6897024593921601, "learning_rate": 5.190592051905921e-08, "loss": 0.6272, "step": 34171 }, { "epoch": 0.9976934980000584, "grad_norm": 0.6963594847763098, "learning_rate": 5.1257096512570974e-08, "loss": 0.683, "step": 34172 }, { "epoch": 0.9977226942279057, "grad_norm": 0.6660255249353669, "learning_rate": 5.060827250608273e-08, "loss": 0.6467, "step": 34173 }, { "epoch": 0.9977518904557531, "grad_norm": 0.6612344016154982, "learning_rate": 4.995944849959449e-08, "loss": 0.6364, "step": 34174 }, { "epoch": 0.9977810866836004, "grad_norm": 0.6512985411787446, "learning_rate": 4.931062449310625e-08, "loss": 0.5938, "step": 34175 }, { "epoch": 0.9978102829114478, "grad_norm": 0.6389324808298595, "learning_rate": 4.866180048661801e-08, "loss": 0.5521, "step": 34176 }, { "epoch": 0.9978394791392952, "grad_norm": 0.6740007504784805, "learning_rate": 4.801297648012977e-08, "loss": 0.6442, "step": 34177 }, { "epoch": 0.9978686753671425, "grad_norm": 0.6480237356803388, "learning_rate": 4.736415247364153e-08, "loss": 0.6275, "step": 34178 }, { "epoch": 0.9978978715949899, "grad_norm": 0.6579203048719288, "learning_rate": 4.671532846715329e-08, "loss": 0.5988, "step": 34179 }, { "epoch": 0.9979270678228372, "grad_norm": 0.6215063057879447, "learning_rate": 4.606650446066505e-08, "loss": 0.5742, "step": 34180 }, { "epoch": 0.9979562640506846, "grad_norm": 0.6358449451429695, "learning_rate": 4.541768045417681e-08, "loss": 0.5993, "step": 34181 }, { "epoch": 0.997985460278532, "grad_norm": 0.660295655350842, "learning_rate": 4.4768856447688566e-08, "loss": 0.5268, "step": 34182 }, { "epoch": 0.9980146565063793, "grad_norm": 0.6511866612756028, "learning_rate": 4.412003244120033e-08, "loss": 0.5769, "step": 34183 }, { "epoch": 0.9980438527342267, "grad_norm": 0.6664675701515372, "learning_rate": 4.3471208434712085e-08, "loss": 0.6192, "step": 34184 }, { "epoch": 0.998073048962074, "grad_norm": 0.6370656572946493, "learning_rate": 4.282238442822385e-08, "loss": 0.579, "step": 34185 }, { "epoch": 0.9981022451899214, "grad_norm": 0.6187177738570834, "learning_rate": 4.21735604217356e-08, "loss": 0.5669, "step": 34186 }, { "epoch": 0.9981314414177688, "grad_norm": 0.6526714805447915, "learning_rate": 4.1524736415247366e-08, "loss": 0.6008, "step": 34187 }, { "epoch": 0.9981606376456161, "grad_norm": 0.581730592714396, "learning_rate": 4.087591240875913e-08, "loss": 0.4911, "step": 34188 }, { "epoch": 0.9981898338734635, "grad_norm": 0.638846715104442, "learning_rate": 4.0227088402270884e-08, "loss": 0.5759, "step": 34189 }, { "epoch": 0.9982190301013109, "grad_norm": 0.6151794470251207, "learning_rate": 3.9578264395782646e-08, "loss": 0.5363, "step": 34190 }, { "epoch": 0.9982482263291583, "grad_norm": 0.6493053956572448, "learning_rate": 3.892944038929441e-08, "loss": 0.5995, "step": 34191 }, { "epoch": 0.9982774225570057, "grad_norm": 0.6667568587236516, "learning_rate": 3.828061638280617e-08, "loss": 0.6332, "step": 34192 }, { "epoch": 0.998306618784853, "grad_norm": 0.6777275052567652, "learning_rate": 3.763179237631793e-08, "loss": 0.6314, "step": 34193 }, { "epoch": 0.9983358150127004, "grad_norm": 0.6379656151232124, "learning_rate": 3.698296836982969e-08, "loss": 0.5564, "step": 34194 }, { "epoch": 0.9983650112405478, "grad_norm": 0.6282331687878743, "learning_rate": 3.6334144363341446e-08, "loss": 0.5926, "step": 34195 }, { "epoch": 0.9983942074683951, "grad_norm": 0.6404448847325519, "learning_rate": 3.568532035685321e-08, "loss": 0.5652, "step": 34196 }, { "epoch": 0.9984234036962425, "grad_norm": 0.6198541860082537, "learning_rate": 3.5036496350364964e-08, "loss": 0.5396, "step": 34197 }, { "epoch": 0.9984525999240899, "grad_norm": 0.6580432217800225, "learning_rate": 3.4387672343876727e-08, "loss": 0.6187, "step": 34198 }, { "epoch": 0.9984817961519372, "grad_norm": 0.7129512144067832, "learning_rate": 3.373884833738849e-08, "loss": 0.6942, "step": 34199 }, { "epoch": 0.9985109923797846, "grad_norm": 0.6561198383910687, "learning_rate": 3.3090024330900245e-08, "loss": 0.635, "step": 34200 }, { "epoch": 0.9985401886076319, "grad_norm": 0.6349810585306603, "learning_rate": 3.244120032441201e-08, "loss": 0.5739, "step": 34201 }, { "epoch": 0.9985693848354793, "grad_norm": 0.6315109128797093, "learning_rate": 3.1792376317923763e-08, "loss": 0.5583, "step": 34202 }, { "epoch": 0.9985985810633267, "grad_norm": 0.6202776024255527, "learning_rate": 3.1143552311435526e-08, "loss": 0.5558, "step": 34203 }, { "epoch": 0.998627777291174, "grad_norm": 0.6491310083249845, "learning_rate": 3.049472830494728e-08, "loss": 0.6153, "step": 34204 }, { "epoch": 0.9986569735190214, "grad_norm": 0.6650084872050499, "learning_rate": 2.9845904298459044e-08, "loss": 0.6163, "step": 34205 }, { "epoch": 0.9986861697468687, "grad_norm": 0.6449202998682414, "learning_rate": 2.9197080291970803e-08, "loss": 0.5862, "step": 34206 }, { "epoch": 0.9987153659747161, "grad_norm": 0.6210683767786426, "learning_rate": 2.8548256285482566e-08, "loss": 0.5466, "step": 34207 }, { "epoch": 0.9987445622025635, "grad_norm": 0.6622355797733804, "learning_rate": 2.7899432278994325e-08, "loss": 0.5904, "step": 34208 }, { "epoch": 0.9987737584304108, "grad_norm": 0.6361867050528615, "learning_rate": 2.7250608272506084e-08, "loss": 0.5515, "step": 34209 }, { "epoch": 0.9988029546582582, "grad_norm": 0.6628764636222596, "learning_rate": 2.6601784266017847e-08, "loss": 0.5861, "step": 34210 }, { "epoch": 0.9988321508861056, "grad_norm": 0.7836035233263808, "learning_rate": 2.5952960259529606e-08, "loss": 0.6729, "step": 34211 }, { "epoch": 0.9988613471139529, "grad_norm": 0.7260417319621227, "learning_rate": 2.5304136253041365e-08, "loss": 0.7158, "step": 34212 }, { "epoch": 0.9988905433418003, "grad_norm": 0.5778027538529664, "learning_rate": 2.4655312246553124e-08, "loss": 0.5143, "step": 34213 }, { "epoch": 0.9989197395696476, "grad_norm": 0.6564962596215047, "learning_rate": 2.4006488240064884e-08, "loss": 0.6467, "step": 34214 }, { "epoch": 0.998948935797495, "grad_norm": 0.6084713566774339, "learning_rate": 2.3357664233576646e-08, "loss": 0.5305, "step": 34215 }, { "epoch": 0.9989781320253424, "grad_norm": 0.7191328880157016, "learning_rate": 2.2708840227088405e-08, "loss": 0.6241, "step": 34216 }, { "epoch": 0.9990073282531897, "grad_norm": 0.6856139067768804, "learning_rate": 2.2060016220600164e-08, "loss": 0.6245, "step": 34217 }, { "epoch": 0.9990365244810371, "grad_norm": 0.6894057025957047, "learning_rate": 2.1411192214111924e-08, "loss": 0.6984, "step": 34218 }, { "epoch": 0.9990657207088844, "grad_norm": 0.6488876089070287, "learning_rate": 2.0762368207623683e-08, "loss": 0.6135, "step": 34219 }, { "epoch": 0.9990949169367318, "grad_norm": 0.7320438725703172, "learning_rate": 2.0113544201135442e-08, "loss": 0.6495, "step": 34220 }, { "epoch": 0.9991241131645792, "grad_norm": 0.6084993955421112, "learning_rate": 1.9464720194647204e-08, "loss": 0.5095, "step": 34221 }, { "epoch": 0.9991533093924265, "grad_norm": 0.6163377569290325, "learning_rate": 1.8815896188158964e-08, "loss": 0.5751, "step": 34222 }, { "epoch": 0.9991825056202739, "grad_norm": 0.6463085445373344, "learning_rate": 1.8167072181670723e-08, "loss": 0.6019, "step": 34223 }, { "epoch": 0.9992117018481212, "grad_norm": 0.6770968778078712, "learning_rate": 1.7518248175182482e-08, "loss": 0.6172, "step": 34224 }, { "epoch": 0.9992408980759686, "grad_norm": 0.6275176625391687, "learning_rate": 1.6869424168694245e-08, "loss": 0.543, "step": 34225 }, { "epoch": 0.999270094303816, "grad_norm": 0.650927340580013, "learning_rate": 1.6220600162206004e-08, "loss": 0.6194, "step": 34226 }, { "epoch": 0.9992992905316633, "grad_norm": 0.6927725940526775, "learning_rate": 1.5571776155717763e-08, "loss": 0.5761, "step": 34227 }, { "epoch": 0.9993284867595107, "grad_norm": 0.6577021257341231, "learning_rate": 1.4922952149229522e-08, "loss": 0.6099, "step": 34228 }, { "epoch": 0.999357682987358, "grad_norm": 0.6481899382288312, "learning_rate": 1.4274128142741283e-08, "loss": 0.5833, "step": 34229 }, { "epoch": 0.9993868792152054, "grad_norm": 0.6669614854987923, "learning_rate": 1.3625304136253042e-08, "loss": 0.668, "step": 34230 }, { "epoch": 0.9994160754430528, "grad_norm": 0.6177503339787833, "learning_rate": 1.2976480129764803e-08, "loss": 0.5947, "step": 34231 }, { "epoch": 0.9994452716709001, "grad_norm": 0.6126426698760432, "learning_rate": 1.2327656123276562e-08, "loss": 0.5075, "step": 34232 }, { "epoch": 0.9994744678987475, "grad_norm": 0.5905340966262931, "learning_rate": 1.1678832116788323e-08, "loss": 0.5062, "step": 34233 }, { "epoch": 0.9995036641265949, "grad_norm": 0.662529123033624, "learning_rate": 1.1030008110300082e-08, "loss": 0.628, "step": 34234 }, { "epoch": 0.9995328603544422, "grad_norm": 0.6673263938454852, "learning_rate": 1.0381184103811841e-08, "loss": 0.619, "step": 34235 }, { "epoch": 0.9995620565822896, "grad_norm": 0.6392466869178048, "learning_rate": 9.732360097323602e-09, "loss": 0.593, "step": 34236 }, { "epoch": 0.9995912528101369, "grad_norm": 0.5936266005722133, "learning_rate": 9.083536090835361e-09, "loss": 0.5107, "step": 34237 }, { "epoch": 0.9996204490379843, "grad_norm": 0.6694529790764318, "learning_rate": 8.434712084347122e-09, "loss": 0.6308, "step": 34238 }, { "epoch": 0.9996496452658317, "grad_norm": 0.6788474871325519, "learning_rate": 7.785888077858881e-09, "loss": 0.6614, "step": 34239 }, { "epoch": 0.999678841493679, "grad_norm": 0.6753537140024359, "learning_rate": 7.1370640713706415e-09, "loss": 0.5341, "step": 34240 }, { "epoch": 0.9997080377215264, "grad_norm": 0.6661648644116276, "learning_rate": 6.4882400648824015e-09, "loss": 0.5917, "step": 34241 }, { "epoch": 0.9997372339493737, "grad_norm": 0.6475748927400954, "learning_rate": 5.8394160583941615e-09, "loss": 0.5795, "step": 34242 }, { "epoch": 0.9997664301772211, "grad_norm": 0.6673960262075991, "learning_rate": 5.190592051905921e-09, "loss": 0.628, "step": 34243 }, { "epoch": 0.9997956264050685, "grad_norm": 0.6255996916096412, "learning_rate": 4.541768045417681e-09, "loss": 0.5651, "step": 34244 }, { "epoch": 0.9998248226329158, "grad_norm": 0.6427454529161486, "learning_rate": 3.892944038929441e-09, "loss": 0.6254, "step": 34245 }, { "epoch": 0.9998540188607632, "grad_norm": 0.6595739932688065, "learning_rate": 3.2441200324412007e-09, "loss": 0.6141, "step": 34246 }, { "epoch": 0.9998832150886106, "grad_norm": 0.7110783446185541, "learning_rate": 2.5952960259529603e-09, "loss": 0.6625, "step": 34247 }, { "epoch": 0.9999124113164579, "grad_norm": 0.6671288670288735, "learning_rate": 1.9464720194647204e-09, "loss": 0.5851, "step": 34248 }, { "epoch": 0.9999416075443053, "grad_norm": 0.6282851536282745, "learning_rate": 1.2976480129764802e-09, "loss": 0.5684, "step": 34249 }, { "epoch": 0.9999708037721526, "grad_norm": 0.6466697234916562, "learning_rate": 6.488240064882401e-10, "loss": 0.6241, "step": 34250 }, { "epoch": 1.0, "grad_norm": 0.6415846576003814, "learning_rate": 0.0, "loss": 0.5884, "step": 34251 }, { "epoch": 1.0, "step": 34251, "total_flos": 487866325008384.0, "train_loss": 0.6343101681589172, "train_runtime": 53335.8655, "train_samples_per_second": 10.275, "train_steps_per_second": 0.642 } ], "logging_steps": 1, "max_steps": 34251, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 487866325008384.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }