{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999254232232083, "eval_steps": 500, "global_step": 6704, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014915355358341412, "grad_norm": 33.46785354614258, "learning_rate": 9.900990099009901e-08, "loss": 1.9175, "step": 1 }, { "epoch": 0.00029830710716682824, "grad_norm": 21.075450897216797, "learning_rate": 1.9801980198019803e-07, "loss": 1.7211, "step": 2 }, { "epoch": 0.0004474606607502424, "grad_norm": 25.594966888427734, "learning_rate": 2.9702970297029703e-07, "loss": 1.6696, "step": 3 }, { "epoch": 0.0005966142143336565, "grad_norm": 34.85166931152344, "learning_rate": 3.9603960396039606e-07, "loss": 1.7923, "step": 4 }, { "epoch": 0.0007457677679170706, "grad_norm": 26.8599853515625, "learning_rate": 4.950495049504951e-07, "loss": 1.7751, "step": 5 }, { "epoch": 0.0008949213215004848, "grad_norm": 40.40629959106445, "learning_rate": 5.940594059405941e-07, "loss": 1.8981, "step": 6 }, { "epoch": 0.0010440748750838989, "grad_norm": 27.15860939025879, "learning_rate": 6.930693069306931e-07, "loss": 1.9091, "step": 7 }, { "epoch": 0.001193228428667313, "grad_norm": 34.30740737915039, "learning_rate": 7.920792079207921e-07, "loss": 1.7004, "step": 8 }, { "epoch": 0.001342381982250727, "grad_norm": 34.65781021118164, "learning_rate": 8.910891089108911e-07, "loss": 1.6624, "step": 9 }, { "epoch": 0.0014915355358341412, "grad_norm": 21.567005157470703, "learning_rate": 9.900990099009902e-07, "loss": 1.7384, "step": 10 }, { "epoch": 0.0016406890894175555, "grad_norm": 16.879714965820312, "learning_rate": 1.0891089108910893e-06, "loss": 1.6054, "step": 11 }, { "epoch": 0.0017898426430009696, "grad_norm": 12.358672142028809, "learning_rate": 1.1881188118811881e-06, "loss": 1.4912, "step": 12 }, { "epoch": 0.0019389961965843837, "grad_norm": 10.467639923095703, "learning_rate": 1.2871287128712872e-06, "loss": 1.4201, "step": 13 }, { "epoch": 0.0020881497501677978, "grad_norm": 7.833812236785889, "learning_rate": 1.3861386138613863e-06, "loss": 1.4478, "step": 14 }, { "epoch": 0.002237303303751212, "grad_norm": 6.684574127197266, "learning_rate": 1.4851485148514852e-06, "loss": 1.4166, "step": 15 }, { "epoch": 0.002386456857334626, "grad_norm": 7.244694232940674, "learning_rate": 1.5841584158415842e-06, "loss": 1.3769, "step": 16 }, { "epoch": 0.0025356104109180403, "grad_norm": 6.1608123779296875, "learning_rate": 1.6831683168316833e-06, "loss": 1.2845, "step": 17 }, { "epoch": 0.002684763964501454, "grad_norm": 7.719333171844482, "learning_rate": 1.7821782178217822e-06, "loss": 1.5205, "step": 18 }, { "epoch": 0.0028339175180848684, "grad_norm": 6.451050281524658, "learning_rate": 1.8811881188118813e-06, "loss": 1.3789, "step": 19 }, { "epoch": 0.0029830710716682823, "grad_norm": 7.5586724281311035, "learning_rate": 1.9801980198019803e-06, "loss": 1.3646, "step": 20 }, { "epoch": 0.0031322246252516966, "grad_norm": 8.235079765319824, "learning_rate": 2.0792079207920794e-06, "loss": 1.4056, "step": 21 }, { "epoch": 0.003281378178835111, "grad_norm": 5.528628349304199, "learning_rate": 2.1782178217821785e-06, "loss": 1.2241, "step": 22 }, { "epoch": 0.003430531732418525, "grad_norm": 5.514181137084961, "learning_rate": 2.2772277227722776e-06, "loss": 1.3271, "step": 23 }, { "epoch": 0.003579685286001939, "grad_norm": 4.05088472366333, "learning_rate": 2.3762376237623762e-06, "loss": 1.2905, "step": 24 }, { "epoch": 0.003728838839585353, "grad_norm": 5.250198841094971, "learning_rate": 2.4752475247524753e-06, "loss": 1.2406, "step": 25 }, { "epoch": 0.0038779923931687673, "grad_norm": 5.075861930847168, "learning_rate": 2.5742574257425744e-06, "loss": 1.3417, "step": 26 }, { "epoch": 0.004027145946752181, "grad_norm": 5.338611602783203, "learning_rate": 2.6732673267326735e-06, "loss": 1.2527, "step": 27 }, { "epoch": 0.0041762995003355955, "grad_norm": 4.106296062469482, "learning_rate": 2.7722772277227726e-06, "loss": 1.2302, "step": 28 }, { "epoch": 0.00432545305391901, "grad_norm": 4.265397548675537, "learning_rate": 2.8712871287128712e-06, "loss": 1.241, "step": 29 }, { "epoch": 0.004474606607502424, "grad_norm": 5.2959747314453125, "learning_rate": 2.9702970297029703e-06, "loss": 1.2836, "step": 30 }, { "epoch": 0.004623760161085838, "grad_norm": 5.3309502601623535, "learning_rate": 3.0693069306930694e-06, "loss": 1.3357, "step": 31 }, { "epoch": 0.004772913714669252, "grad_norm": 10.963210105895996, "learning_rate": 3.1683168316831685e-06, "loss": 1.2055, "step": 32 }, { "epoch": 0.004922067268252666, "grad_norm": 6.684006214141846, "learning_rate": 3.2673267326732676e-06, "loss": 1.2248, "step": 33 }, { "epoch": 0.0050712208218360805, "grad_norm": 3.1862525939941406, "learning_rate": 3.3663366336633666e-06, "loss": 1.1896, "step": 34 }, { "epoch": 0.005220374375419494, "grad_norm": 4.717220783233643, "learning_rate": 3.4653465346534653e-06, "loss": 1.1742, "step": 35 }, { "epoch": 0.005369527929002908, "grad_norm": 4.764100074768066, "learning_rate": 3.5643564356435644e-06, "loss": 1.1151, "step": 36 }, { "epoch": 0.005518681482586323, "grad_norm": 3.7473268508911133, "learning_rate": 3.6633663366336635e-06, "loss": 1.2392, "step": 37 }, { "epoch": 0.005667835036169737, "grad_norm": 3.372643232345581, "learning_rate": 3.7623762376237625e-06, "loss": 1.2841, "step": 38 }, { "epoch": 0.005816988589753151, "grad_norm": 4.030853271484375, "learning_rate": 3.861386138613862e-06, "loss": 1.1544, "step": 39 }, { "epoch": 0.005966142143336565, "grad_norm": 4.517285346984863, "learning_rate": 3.960396039603961e-06, "loss": 1.1269, "step": 40 }, { "epoch": 0.006115295696919979, "grad_norm": 4.164844036102295, "learning_rate": 4.05940594059406e-06, "loss": 1.2, "step": 41 }, { "epoch": 0.006264449250503393, "grad_norm": 4.1731367111206055, "learning_rate": 4.158415841584159e-06, "loss": 1.1265, "step": 42 }, { "epoch": 0.006413602804086808, "grad_norm": 5.3257012367248535, "learning_rate": 4.2574257425742575e-06, "loss": 1.1055, "step": 43 }, { "epoch": 0.006562756357670222, "grad_norm": 3.961484432220459, "learning_rate": 4.356435643564357e-06, "loss": 1.1362, "step": 44 }, { "epoch": 0.006711909911253635, "grad_norm": 3.3058910369873047, "learning_rate": 4.455445544554456e-06, "loss": 1.1173, "step": 45 }, { "epoch": 0.00686106346483705, "grad_norm": 0.6857548356056213, "learning_rate": 4.554455445544555e-06, "loss": 0.2163, "step": 46 }, { "epoch": 0.007010217018420464, "grad_norm": 5.221093654632568, "learning_rate": 4.653465346534654e-06, "loss": 1.1517, "step": 47 }, { "epoch": 0.007159370572003878, "grad_norm": 4.251334190368652, "learning_rate": 4.7524752475247525e-06, "loss": 1.1037, "step": 48 }, { "epoch": 0.007308524125587292, "grad_norm": 4.567934989929199, "learning_rate": 4.851485148514852e-06, "loss": 1.1038, "step": 49 }, { "epoch": 0.007457677679170706, "grad_norm": 0.6759074926376343, "learning_rate": 4.950495049504951e-06, "loss": 0.2241, "step": 50 }, { "epoch": 0.00760683123275412, "grad_norm": 3.4278738498687744, "learning_rate": 5.04950495049505e-06, "loss": 1.0522, "step": 51 }, { "epoch": 0.007755984786337535, "grad_norm": 3.5582821369171143, "learning_rate": 5.148514851485149e-06, "loss": 1.0894, "step": 52 }, { "epoch": 0.007905138339920948, "grad_norm": 3.081789970397949, "learning_rate": 5.247524752475248e-06, "loss": 1.2351, "step": 53 }, { "epoch": 0.008054291893504362, "grad_norm": 3.3864681720733643, "learning_rate": 5.346534653465347e-06, "loss": 1.1088, "step": 54 }, { "epoch": 0.008203445447087777, "grad_norm": 4.235476970672607, "learning_rate": 5.4455445544554465e-06, "loss": 1.0689, "step": 55 }, { "epoch": 0.008352599000671191, "grad_norm": 3.8261160850524902, "learning_rate": 5.544554455445545e-06, "loss": 0.9897, "step": 56 }, { "epoch": 0.008501752554254605, "grad_norm": 3.144970417022705, "learning_rate": 5.643564356435644e-06, "loss": 1.0128, "step": 57 }, { "epoch": 0.00865090610783802, "grad_norm": 3.344707489013672, "learning_rate": 5.7425742574257425e-06, "loss": 1.0942, "step": 58 }, { "epoch": 0.008800059661421434, "grad_norm": 4.2349042892456055, "learning_rate": 5.841584158415842e-06, "loss": 1.0689, "step": 59 }, { "epoch": 0.008949213215004848, "grad_norm": 4.601934432983398, "learning_rate": 5.940594059405941e-06, "loss": 1.0492, "step": 60 }, { "epoch": 0.00909836676858826, "grad_norm": 3.246380090713501, "learning_rate": 6.03960396039604e-06, "loss": 1.0602, "step": 61 }, { "epoch": 0.009247520322171675, "grad_norm": 0.6813023090362549, "learning_rate": 6.138613861386139e-06, "loss": 0.2089, "step": 62 }, { "epoch": 0.00939667387575509, "grad_norm": 3.1957526206970215, "learning_rate": 6.237623762376238e-06, "loss": 1.0917, "step": 63 }, { "epoch": 0.009545827429338504, "grad_norm": 4.632509708404541, "learning_rate": 6.336633663366337e-06, "loss": 1.0175, "step": 64 }, { "epoch": 0.009694980982921918, "grad_norm": 2.7732937335968018, "learning_rate": 6.4356435643564364e-06, "loss": 1.0236, "step": 65 }, { "epoch": 0.009844134536505332, "grad_norm": 3.4336354732513428, "learning_rate": 6.534653465346535e-06, "loss": 1.0949, "step": 66 }, { "epoch": 0.009993288090088747, "grad_norm": 2.9921786785125732, "learning_rate": 6.633663366336635e-06, "loss": 1.107, "step": 67 }, { "epoch": 0.010142441643672161, "grad_norm": 3.592417001724243, "learning_rate": 6.732673267326733e-06, "loss": 1.1055, "step": 68 }, { "epoch": 0.010291595197255575, "grad_norm": 3.0891358852386475, "learning_rate": 6.831683168316833e-06, "loss": 1.0975, "step": 69 }, { "epoch": 0.010440748750838988, "grad_norm": 3.5852065086364746, "learning_rate": 6.930693069306931e-06, "loss": 0.9667, "step": 70 }, { "epoch": 0.010589902304422402, "grad_norm": 7.19373893737793, "learning_rate": 7.02970297029703e-06, "loss": 1.0295, "step": 71 }, { "epoch": 0.010739055858005817, "grad_norm": 3.2959463596343994, "learning_rate": 7.128712871287129e-06, "loss": 1.0876, "step": 72 }, { "epoch": 0.01088820941158923, "grad_norm": 0.5877397060394287, "learning_rate": 7.227722772277228e-06, "loss": 0.211, "step": 73 }, { "epoch": 0.011037362965172645, "grad_norm": 3.3861587047576904, "learning_rate": 7.326732673267327e-06, "loss": 1.0648, "step": 74 }, { "epoch": 0.01118651651875606, "grad_norm": 4.219755172729492, "learning_rate": 7.425742574257426e-06, "loss": 1.0431, "step": 75 }, { "epoch": 0.011335670072339474, "grad_norm": 4.673391819000244, "learning_rate": 7.524752475247525e-06, "loss": 1.186, "step": 76 }, { "epoch": 0.011484823625922888, "grad_norm": 0.555681049823761, "learning_rate": 7.6237623762376246e-06, "loss": 0.2174, "step": 77 }, { "epoch": 0.011633977179506302, "grad_norm": 4.128749847412109, "learning_rate": 7.722772277227724e-06, "loss": 0.9634, "step": 78 }, { "epoch": 0.011783130733089717, "grad_norm": 3.555722951889038, "learning_rate": 7.821782178217822e-06, "loss": 0.9783, "step": 79 }, { "epoch": 0.01193228428667313, "grad_norm": 3.1141819953918457, "learning_rate": 7.920792079207921e-06, "loss": 0.9391, "step": 80 }, { "epoch": 0.012081437840256544, "grad_norm": 3.1952788829803467, "learning_rate": 8.019801980198021e-06, "loss": 0.9685, "step": 81 }, { "epoch": 0.012230591393839958, "grad_norm": 4.197729110717773, "learning_rate": 8.11881188118812e-06, "loss": 1.0291, "step": 82 }, { "epoch": 0.012379744947423372, "grad_norm": 3.345567226409912, "learning_rate": 8.217821782178218e-06, "loss": 0.981, "step": 83 }, { "epoch": 0.012528898501006787, "grad_norm": 4.746242046356201, "learning_rate": 8.316831683168318e-06, "loss": 1.0188, "step": 84 }, { "epoch": 0.0126780520545902, "grad_norm": 6.1302361488342285, "learning_rate": 8.415841584158416e-06, "loss": 0.946, "step": 85 }, { "epoch": 0.012827205608173615, "grad_norm": 8.309062957763672, "learning_rate": 8.514851485148515e-06, "loss": 0.9877, "step": 86 }, { "epoch": 0.01297635916175703, "grad_norm": 4.758452892303467, "learning_rate": 8.613861386138615e-06, "loss": 1.0452, "step": 87 }, { "epoch": 0.013125512715340444, "grad_norm": 3.224172592163086, "learning_rate": 8.712871287128714e-06, "loss": 1.1155, "step": 88 }, { "epoch": 0.013274666268923856, "grad_norm": 3.895554542541504, "learning_rate": 8.811881188118812e-06, "loss": 1.0232, "step": 89 }, { "epoch": 0.01342381982250727, "grad_norm": 3.3907060623168945, "learning_rate": 8.910891089108911e-06, "loss": 1.0032, "step": 90 }, { "epoch": 0.013572973376090685, "grad_norm": 4.2322869300842285, "learning_rate": 9.009900990099011e-06, "loss": 1.0662, "step": 91 }, { "epoch": 0.0137221269296741, "grad_norm": 4.219111442565918, "learning_rate": 9.10891089108911e-06, "loss": 1.0033, "step": 92 }, { "epoch": 0.013871280483257514, "grad_norm": 6.393682956695557, "learning_rate": 9.20792079207921e-06, "loss": 1.0126, "step": 93 }, { "epoch": 0.014020434036840928, "grad_norm": 4.0708746910095215, "learning_rate": 9.306930693069308e-06, "loss": 0.9518, "step": 94 }, { "epoch": 0.014169587590424342, "grad_norm": 3.4250476360321045, "learning_rate": 9.405940594059405e-06, "loss": 0.923, "step": 95 }, { "epoch": 0.014318741144007757, "grad_norm": 3.0933268070220947, "learning_rate": 9.504950495049505e-06, "loss": 1.0046, "step": 96 }, { "epoch": 0.01446789469759117, "grad_norm": 6.180056095123291, "learning_rate": 9.603960396039604e-06, "loss": 0.9697, "step": 97 }, { "epoch": 0.014617048251174583, "grad_norm": 3.932058095932007, "learning_rate": 9.702970297029704e-06, "loss": 0.9115, "step": 98 }, { "epoch": 0.014766201804757998, "grad_norm": 3.4206488132476807, "learning_rate": 9.801980198019802e-06, "loss": 0.9961, "step": 99 }, { "epoch": 0.014915355358341412, "grad_norm": 4.655818462371826, "learning_rate": 9.900990099009901e-06, "loss": 1.1018, "step": 100 }, { "epoch": 0.015064508911924826, "grad_norm": 10.015466690063477, "learning_rate": 1e-05, "loss": 0.8661, "step": 101 }, { "epoch": 0.01521366246550824, "grad_norm": 4.2008538246154785, "learning_rate": 1.00990099009901e-05, "loss": 0.9605, "step": 102 }, { "epoch": 0.015362816019091655, "grad_norm": 2.6605498790740967, "learning_rate": 1.01980198019802e-05, "loss": 0.9795, "step": 103 }, { "epoch": 0.01551196957267507, "grad_norm": 4.094620704650879, "learning_rate": 1.0297029702970298e-05, "loss": 1.0027, "step": 104 }, { "epoch": 0.015661123126258482, "grad_norm": 3.1637461185455322, "learning_rate": 1.0396039603960397e-05, "loss": 0.9348, "step": 105 }, { "epoch": 0.015810276679841896, "grad_norm": 3.7168772220611572, "learning_rate": 1.0495049504950497e-05, "loss": 1.0071, "step": 106 }, { "epoch": 0.01595943023342531, "grad_norm": 3.924219846725464, "learning_rate": 1.0594059405940596e-05, "loss": 0.9971, "step": 107 }, { "epoch": 0.016108583787008725, "grad_norm": 3.3025028705596924, "learning_rate": 1.0693069306930694e-05, "loss": 0.9386, "step": 108 }, { "epoch": 0.01625773734059214, "grad_norm": 3.3758513927459717, "learning_rate": 1.0792079207920793e-05, "loss": 1.0037, "step": 109 }, { "epoch": 0.016406890894175553, "grad_norm": 1.956055998802185, "learning_rate": 1.0891089108910893e-05, "loss": 0.9623, "step": 110 }, { "epoch": 0.016556044447758968, "grad_norm": 3.512071371078491, "learning_rate": 1.0990099009900992e-05, "loss": 1.0286, "step": 111 }, { "epoch": 0.016705198001342382, "grad_norm": 2.565762758255005, "learning_rate": 1.108910891089109e-05, "loss": 1.0112, "step": 112 }, { "epoch": 0.016854351554925796, "grad_norm": 3.9047703742980957, "learning_rate": 1.118811881188119e-05, "loss": 0.9083, "step": 113 }, { "epoch": 0.01700350510850921, "grad_norm": 0.5394715666770935, "learning_rate": 1.1287128712871288e-05, "loss": 0.2206, "step": 114 }, { "epoch": 0.017152658662092625, "grad_norm": 2.204237699508667, "learning_rate": 1.1386138613861385e-05, "loss": 0.9638, "step": 115 }, { "epoch": 0.01730181221567604, "grad_norm": 4.165485382080078, "learning_rate": 1.1485148514851485e-05, "loss": 1.0373, "step": 116 }, { "epoch": 0.017450965769259454, "grad_norm": 3.632218360900879, "learning_rate": 1.1584158415841584e-05, "loss": 1.0206, "step": 117 }, { "epoch": 0.017600119322842868, "grad_norm": 3.803077220916748, "learning_rate": 1.1683168316831684e-05, "loss": 0.993, "step": 118 }, { "epoch": 0.017749272876426282, "grad_norm": 2.8637309074401855, "learning_rate": 1.1782178217821782e-05, "loss": 1.037, "step": 119 }, { "epoch": 0.017898426430009697, "grad_norm": 4.055881023406982, "learning_rate": 1.1881188118811881e-05, "loss": 1.011, "step": 120 }, { "epoch": 0.01804757998359311, "grad_norm": 3.128279447555542, "learning_rate": 1.198019801980198e-05, "loss": 0.8909, "step": 121 }, { "epoch": 0.01819673353717652, "grad_norm": 3.0133845806121826, "learning_rate": 1.207920792079208e-05, "loss": 0.9641, "step": 122 }, { "epoch": 0.018345887090759936, "grad_norm": 2.785449981689453, "learning_rate": 1.217821782178218e-05, "loss": 0.9504, "step": 123 }, { "epoch": 0.01849504064434335, "grad_norm": 3.988884687423706, "learning_rate": 1.2277227722772278e-05, "loss": 0.8611, "step": 124 }, { "epoch": 0.018644194197926765, "grad_norm": 2.8634345531463623, "learning_rate": 1.2376237623762377e-05, "loss": 1.0151, "step": 125 }, { "epoch": 0.01879334775151018, "grad_norm": 2.762653112411499, "learning_rate": 1.2475247524752477e-05, "loss": 1.0099, "step": 126 }, { "epoch": 0.018942501305093593, "grad_norm": 4.365610599517822, "learning_rate": 1.2574257425742576e-05, "loss": 1.0177, "step": 127 }, { "epoch": 0.019091654858677008, "grad_norm": 11.472090721130371, "learning_rate": 1.2673267326732674e-05, "loss": 0.9817, "step": 128 }, { "epoch": 0.019240808412260422, "grad_norm": 3.196831226348877, "learning_rate": 1.2772277227722773e-05, "loss": 0.991, "step": 129 }, { "epoch": 0.019389961965843836, "grad_norm": 3.3482444286346436, "learning_rate": 1.2871287128712873e-05, "loss": 0.9999, "step": 130 }, { "epoch": 0.01953911551942725, "grad_norm": 3.29642653465271, "learning_rate": 1.2970297029702972e-05, "loss": 1.0582, "step": 131 }, { "epoch": 0.019688269073010665, "grad_norm": 3.13476300239563, "learning_rate": 1.306930693069307e-05, "loss": 0.9197, "step": 132 }, { "epoch": 0.01983742262659408, "grad_norm": 4.851349830627441, "learning_rate": 1.316831683168317e-05, "loss": 0.9525, "step": 133 }, { "epoch": 0.019986576180177493, "grad_norm": 3.7482824325561523, "learning_rate": 1.326732673267327e-05, "loss": 0.9035, "step": 134 }, { "epoch": 0.020135729733760908, "grad_norm": 4.388681411743164, "learning_rate": 1.3366336633663369e-05, "loss": 1.0127, "step": 135 }, { "epoch": 0.020284883287344322, "grad_norm": 3.2583932876586914, "learning_rate": 1.3465346534653467e-05, "loss": 0.9607, "step": 136 }, { "epoch": 0.020434036840927736, "grad_norm": 4.276053428649902, "learning_rate": 1.3564356435643566e-05, "loss": 0.9289, "step": 137 }, { "epoch": 0.02058319039451115, "grad_norm": 8.798200607299805, "learning_rate": 1.3663366336633666e-05, "loss": 0.8977, "step": 138 }, { "epoch": 0.020732343948094565, "grad_norm": 2.9017324447631836, "learning_rate": 1.3762376237623762e-05, "loss": 0.9682, "step": 139 }, { "epoch": 0.020881497501677976, "grad_norm": 4.600789546966553, "learning_rate": 1.3861386138613861e-05, "loss": 0.9871, "step": 140 }, { "epoch": 0.02103065105526139, "grad_norm": 6.003566265106201, "learning_rate": 1.396039603960396e-05, "loss": 0.8657, "step": 141 }, { "epoch": 0.021179804608844804, "grad_norm": 2.2687177658081055, "learning_rate": 1.405940594059406e-05, "loss": 1.0188, "step": 142 }, { "epoch": 0.02132895816242822, "grad_norm": 4.084898948669434, "learning_rate": 1.4158415841584158e-05, "loss": 0.8557, "step": 143 }, { "epoch": 0.021478111716011633, "grad_norm": 5.167733192443848, "learning_rate": 1.4257425742574257e-05, "loss": 0.8854, "step": 144 }, { "epoch": 0.021627265269595047, "grad_norm": 3.0253615379333496, "learning_rate": 1.4356435643564357e-05, "loss": 0.9997, "step": 145 }, { "epoch": 0.02177641882317846, "grad_norm": 4.782785892486572, "learning_rate": 1.4455445544554456e-05, "loss": 0.9624, "step": 146 }, { "epoch": 0.021925572376761876, "grad_norm": 3.13102650642395, "learning_rate": 1.4554455445544556e-05, "loss": 0.9489, "step": 147 }, { "epoch": 0.02207472593034529, "grad_norm": 3.8281142711639404, "learning_rate": 1.4653465346534654e-05, "loss": 0.9241, "step": 148 }, { "epoch": 0.022223879483928705, "grad_norm": 2.5506961345672607, "learning_rate": 1.4752475247524753e-05, "loss": 0.9265, "step": 149 }, { "epoch": 0.02237303303751212, "grad_norm": 3.7426867485046387, "learning_rate": 1.4851485148514853e-05, "loss": 0.9375, "step": 150 }, { "epoch": 0.022522186591095533, "grad_norm": 4.587522506713867, "learning_rate": 1.4950495049504952e-05, "loss": 0.8776, "step": 151 }, { "epoch": 0.022671340144678948, "grad_norm": 3.6405012607574463, "learning_rate": 1.504950495049505e-05, "loss": 0.9561, "step": 152 }, { "epoch": 0.022820493698262362, "grad_norm": 3.9900383949279785, "learning_rate": 1.514851485148515e-05, "loss": 0.9231, "step": 153 }, { "epoch": 0.022969647251845776, "grad_norm": 0.5421858429908752, "learning_rate": 1.5247524752475249e-05, "loss": 0.1938, "step": 154 }, { "epoch": 0.02311880080542919, "grad_norm": 3.4960334300994873, "learning_rate": 1.534653465346535e-05, "loss": 0.8868, "step": 155 }, { "epoch": 0.023267954359012605, "grad_norm": 3.5810506343841553, "learning_rate": 1.5445544554455448e-05, "loss": 1.0705, "step": 156 }, { "epoch": 0.02341710791259602, "grad_norm": 3.004021406173706, "learning_rate": 1.5544554455445548e-05, "loss": 1.0391, "step": 157 }, { "epoch": 0.023566261466179433, "grad_norm": 2.5652596950531006, "learning_rate": 1.5643564356435644e-05, "loss": 0.9843, "step": 158 }, { "epoch": 0.023715415019762844, "grad_norm": 4.381618499755859, "learning_rate": 1.5742574257425743e-05, "loss": 0.9177, "step": 159 }, { "epoch": 0.02386456857334626, "grad_norm": 3.5513553619384766, "learning_rate": 1.5841584158415843e-05, "loss": 0.895, "step": 160 }, { "epoch": 0.024013722126929673, "grad_norm": 8.627764701843262, "learning_rate": 1.5940594059405942e-05, "loss": 0.9229, "step": 161 }, { "epoch": 0.024162875680513087, "grad_norm": 4.740233421325684, "learning_rate": 1.6039603960396042e-05, "loss": 0.8184, "step": 162 }, { "epoch": 0.0243120292340965, "grad_norm": 3.8299875259399414, "learning_rate": 1.613861386138614e-05, "loss": 0.9325, "step": 163 }, { "epoch": 0.024461182787679916, "grad_norm": 5.260658264160156, "learning_rate": 1.623762376237624e-05, "loss": 0.9144, "step": 164 }, { "epoch": 0.02461033634126333, "grad_norm": 0.595231831073761, "learning_rate": 1.6336633663366337e-05, "loss": 0.2296, "step": 165 }, { "epoch": 0.024759489894846744, "grad_norm": 4.6904191970825195, "learning_rate": 1.6435643564356436e-05, "loss": 0.9374, "step": 166 }, { "epoch": 0.02490864344843016, "grad_norm": 7.1352386474609375, "learning_rate": 1.6534653465346536e-05, "loss": 0.9909, "step": 167 }, { "epoch": 0.025057797002013573, "grad_norm": 4.476190567016602, "learning_rate": 1.6633663366336635e-05, "loss": 1.026, "step": 168 }, { "epoch": 0.025206950555596987, "grad_norm": 12.098798751831055, "learning_rate": 1.6732673267326735e-05, "loss": 0.9136, "step": 169 }, { "epoch": 0.0253561041091804, "grad_norm": 0.5218330025672913, "learning_rate": 1.683168316831683e-05, "loss": 0.2338, "step": 170 }, { "epoch": 0.025505257662763816, "grad_norm": 4.387589454650879, "learning_rate": 1.693069306930693e-05, "loss": 0.8709, "step": 171 }, { "epoch": 0.02565441121634723, "grad_norm": 3.60666561126709, "learning_rate": 1.702970297029703e-05, "loss": 1.0308, "step": 172 }, { "epoch": 0.025803564769930645, "grad_norm": 3.847170829772949, "learning_rate": 1.712871287128713e-05, "loss": 0.9764, "step": 173 }, { "epoch": 0.02595271832351406, "grad_norm": 3.056631565093994, "learning_rate": 1.722772277227723e-05, "loss": 0.9855, "step": 174 }, { "epoch": 0.026101871877097473, "grad_norm": 7.9983978271484375, "learning_rate": 1.732673267326733e-05, "loss": 0.9087, "step": 175 }, { "epoch": 0.026251025430680888, "grad_norm": 0.5140776038169861, "learning_rate": 1.7425742574257428e-05, "loss": 0.2081, "step": 176 }, { "epoch": 0.0264001789842643, "grad_norm": 3.8414664268493652, "learning_rate": 1.7524752475247528e-05, "loss": 0.9241, "step": 177 }, { "epoch": 0.026549332537847713, "grad_norm": 5.797891139984131, "learning_rate": 1.7623762376237624e-05, "loss": 0.8796, "step": 178 }, { "epoch": 0.026698486091431127, "grad_norm": 2.9314591884613037, "learning_rate": 1.7722772277227723e-05, "loss": 0.8789, "step": 179 }, { "epoch": 0.02684763964501454, "grad_norm": 4.830236911773682, "learning_rate": 1.7821782178217823e-05, "loss": 0.8566, "step": 180 }, { "epoch": 0.026996793198597956, "grad_norm": 3.3879692554473877, "learning_rate": 1.7920792079207922e-05, "loss": 0.9684, "step": 181 }, { "epoch": 0.02714594675218137, "grad_norm": 0.516357421875, "learning_rate": 1.8019801980198022e-05, "loss": 0.2044, "step": 182 }, { "epoch": 0.027295100305764784, "grad_norm": 3.4202613830566406, "learning_rate": 1.811881188118812e-05, "loss": 0.8922, "step": 183 }, { "epoch": 0.0274442538593482, "grad_norm": 26.879573822021484, "learning_rate": 1.821782178217822e-05, "loss": 0.9322, "step": 184 }, { "epoch": 0.027593407412931613, "grad_norm": 2.31836199760437, "learning_rate": 1.831683168316832e-05, "loss": 0.9881, "step": 185 }, { "epoch": 0.027742560966515027, "grad_norm": 9.370018005371094, "learning_rate": 1.841584158415842e-05, "loss": 0.9349, "step": 186 }, { "epoch": 0.02789171452009844, "grad_norm": 8.174256324768066, "learning_rate": 1.8514851485148516e-05, "loss": 0.9245, "step": 187 }, { "epoch": 0.028040868073681856, "grad_norm": 3.666325092315674, "learning_rate": 1.8613861386138615e-05, "loss": 1.0431, "step": 188 }, { "epoch": 0.02819002162726527, "grad_norm": 4.515475273132324, "learning_rate": 1.8712871287128715e-05, "loss": 0.8715, "step": 189 }, { "epoch": 0.028339175180848684, "grad_norm": 4.24520206451416, "learning_rate": 1.881188118811881e-05, "loss": 0.9479, "step": 190 }, { "epoch": 0.0284883287344321, "grad_norm": 4.411160945892334, "learning_rate": 1.891089108910891e-05, "loss": 0.8626, "step": 191 }, { "epoch": 0.028637482288015513, "grad_norm": 4.418791770935059, "learning_rate": 1.900990099009901e-05, "loss": 0.8901, "step": 192 }, { "epoch": 0.028786635841598927, "grad_norm": 5.058281898498535, "learning_rate": 1.910891089108911e-05, "loss": 0.9088, "step": 193 }, { "epoch": 0.02893578939518234, "grad_norm": 2.544296979904175, "learning_rate": 1.920792079207921e-05, "loss": 0.8862, "step": 194 }, { "epoch": 0.029084942948765756, "grad_norm": 5.744648456573486, "learning_rate": 1.930693069306931e-05, "loss": 0.8568, "step": 195 }, { "epoch": 0.029234096502349167, "grad_norm": 0.5577027797698975, "learning_rate": 1.9405940594059408e-05, "loss": 0.2115, "step": 196 }, { "epoch": 0.02938325005593258, "grad_norm": 6.259124279022217, "learning_rate": 1.9504950495049508e-05, "loss": 0.943, "step": 197 }, { "epoch": 0.029532403609515995, "grad_norm": 4.0946760177612305, "learning_rate": 1.9603960396039604e-05, "loss": 0.9372, "step": 198 }, { "epoch": 0.02968155716309941, "grad_norm": 5.665671348571777, "learning_rate": 1.9702970297029703e-05, "loss": 0.9189, "step": 199 }, { "epoch": 0.029830710716682824, "grad_norm": 3.8019306659698486, "learning_rate": 1.9801980198019803e-05, "loss": 0.9411, "step": 200 }, { "epoch": 0.02997986427026624, "grad_norm": 3.4816739559173584, "learning_rate": 1.9900990099009902e-05, "loss": 0.8273, "step": 201 }, { "epoch": 0.030129017823849653, "grad_norm": 5.180420875549316, "learning_rate": 2e-05, "loss": 0.8701, "step": 202 }, { "epoch": 0.030278171377433067, "grad_norm": 3.976012945175171, "learning_rate": 1.999999883271794e-05, "loss": 0.9198, "step": 203 }, { "epoch": 0.03042732493101648, "grad_norm": 4.323599815368652, "learning_rate": 1.9999995330872033e-05, "loss": 0.9605, "step": 204 }, { "epoch": 0.030576478484599896, "grad_norm": 3.536442518234253, "learning_rate": 1.9999989494463094e-05, "loss": 0.9948, "step": 205 }, { "epoch": 0.03072563203818331, "grad_norm": 4.1971964836120605, "learning_rate": 1.9999981323492487e-05, "loss": 0.8951, "step": 206 }, { "epoch": 0.030874785591766724, "grad_norm": 0.5948680639266968, "learning_rate": 1.9999970817962122e-05, "loss": 0.2061, "step": 207 }, { "epoch": 0.03102393914535014, "grad_norm": 3.833204507827759, "learning_rate": 1.999995797787445e-05, "loss": 0.8667, "step": 208 }, { "epoch": 0.031173092698933553, "grad_norm": 3.966623067855835, "learning_rate": 1.9999942803232467e-05, "loss": 0.8883, "step": 209 }, { "epoch": 0.031322246252516964, "grad_norm": 3.8376047611236572, "learning_rate": 1.999992529403971e-05, "loss": 0.9267, "step": 210 }, { "epoch": 0.03147139980610038, "grad_norm": 2.8021743297576904, "learning_rate": 1.9999905450300284e-05, "loss": 0.9029, "step": 211 }, { "epoch": 0.03162055335968379, "grad_norm": 2.0348191261291504, "learning_rate": 1.9999883272018805e-05, "loss": 0.9395, "step": 212 }, { "epoch": 0.03176970691326721, "grad_norm": 2.8746726512908936, "learning_rate": 1.9999858759200455e-05, "loss": 0.8763, "step": 213 }, { "epoch": 0.03191886046685062, "grad_norm": 6.1789116859436035, "learning_rate": 1.999983191185096e-05, "loss": 0.8708, "step": 214 }, { "epoch": 0.032068014020434035, "grad_norm": 1.867109775543213, "learning_rate": 1.999980272997659e-05, "loss": 0.9256, "step": 215 }, { "epoch": 0.03221716757401745, "grad_norm": 2.2050280570983887, "learning_rate": 1.9999771213584147e-05, "loss": 0.941, "step": 216 }, { "epoch": 0.032366321127600864, "grad_norm": 19.996967315673828, "learning_rate": 1.9999737362680997e-05, "loss": 0.9362, "step": 217 }, { "epoch": 0.03251547468118428, "grad_norm": 3.0183515548706055, "learning_rate": 1.9999701177275045e-05, "loss": 0.9845, "step": 218 }, { "epoch": 0.03266462823476769, "grad_norm": 4.474681377410889, "learning_rate": 1.9999662657374732e-05, "loss": 0.882, "step": 219 }, { "epoch": 0.03281378178835111, "grad_norm": 3.855984687805176, "learning_rate": 1.999962180298905e-05, "loss": 0.8111, "step": 220 }, { "epoch": 0.03296293534193452, "grad_norm": 2.4791440963745117, "learning_rate": 1.9999578614127544e-05, "loss": 1.0208, "step": 221 }, { "epoch": 0.033112088895517935, "grad_norm": 6.274467945098877, "learning_rate": 1.9999533090800293e-05, "loss": 0.8869, "step": 222 }, { "epoch": 0.03326124244910135, "grad_norm": 3.171496868133545, "learning_rate": 1.9999485233017926e-05, "loss": 0.8693, "step": 223 }, { "epoch": 0.033410396002684764, "grad_norm": 9.049464225769043, "learning_rate": 1.9999435040791612e-05, "loss": 0.7874, "step": 224 }, { "epoch": 0.03355954955626818, "grad_norm": 3.310910224914551, "learning_rate": 1.999938251413307e-05, "loss": 0.9002, "step": 225 }, { "epoch": 0.03370870310985159, "grad_norm": 3.9958336353302, "learning_rate": 1.9999327653054563e-05, "loss": 0.8213, "step": 226 }, { "epoch": 0.03385785666343501, "grad_norm": 3.387072801589966, "learning_rate": 1.9999270457568904e-05, "loss": 0.8489, "step": 227 }, { "epoch": 0.03400701021701842, "grad_norm": 0.7535048723220825, "learning_rate": 1.9999210927689438e-05, "loss": 0.2245, "step": 228 }, { "epoch": 0.034156163770601836, "grad_norm": 5.23142671585083, "learning_rate": 1.9999149063430066e-05, "loss": 0.9723, "step": 229 }, { "epoch": 0.03430531732418525, "grad_norm": 0.5330758690834045, "learning_rate": 1.999908486480523e-05, "loss": 0.2174, "step": 230 }, { "epoch": 0.034454470877768664, "grad_norm": 2.465949296951294, "learning_rate": 1.9999018331829916e-05, "loss": 0.9395, "step": 231 }, { "epoch": 0.03460362443135208, "grad_norm": 3.7715423107147217, "learning_rate": 1.999894946451966e-05, "loss": 0.9878, "step": 232 }, { "epoch": 0.03475277798493549, "grad_norm": 5.081086158752441, "learning_rate": 1.999887826289054e-05, "loss": 0.8999, "step": 233 }, { "epoch": 0.03490193153851891, "grad_norm": 4.547044277191162, "learning_rate": 1.9998804726959173e-05, "loss": 0.8437, "step": 234 }, { "epoch": 0.03505108509210232, "grad_norm": 1.9542083740234375, "learning_rate": 1.9998728856742732e-05, "loss": 0.8859, "step": 235 }, { "epoch": 0.035200238645685736, "grad_norm": 2.431312084197998, "learning_rate": 1.9998650652258926e-05, "loss": 0.9181, "step": 236 }, { "epoch": 0.03534939219926915, "grad_norm": 3.2879912853240967, "learning_rate": 1.9998570113526013e-05, "loss": 0.9251, "step": 237 }, { "epoch": 0.035498545752852564, "grad_norm": 4.533670425415039, "learning_rate": 1.9998487240562798e-05, "loss": 0.8794, "step": 238 }, { "epoch": 0.03564769930643598, "grad_norm": 2.9840948581695557, "learning_rate": 1.9998402033388626e-05, "loss": 0.8384, "step": 239 }, { "epoch": 0.03579685286001939, "grad_norm": 2.966843605041504, "learning_rate": 1.9998314492023387e-05, "loss": 0.9163, "step": 240 }, { "epoch": 0.03594600641360281, "grad_norm": 4.015908241271973, "learning_rate": 1.9998224616487523e-05, "loss": 0.8592, "step": 241 }, { "epoch": 0.03609515996718622, "grad_norm": 1.254214882850647, "learning_rate": 1.9998132406802008e-05, "loss": 0.2542, "step": 242 }, { "epoch": 0.03624431352076963, "grad_norm": 4.255161762237549, "learning_rate": 1.999803786298838e-05, "loss": 0.8945, "step": 243 }, { "epoch": 0.03639346707435304, "grad_norm": 2.3793318271636963, "learning_rate": 1.9997940985068702e-05, "loss": 1.0074, "step": 244 }, { "epoch": 0.03654262062793646, "grad_norm": 4.296963214874268, "learning_rate": 1.9997841773065594e-05, "loss": 0.915, "step": 245 }, { "epoch": 0.03669177418151987, "grad_norm": 2.729381799697876, "learning_rate": 1.9997740227002217e-05, "loss": 1.01, "step": 246 }, { "epoch": 0.036840927735103286, "grad_norm": 2.8796823024749756, "learning_rate": 1.9997636346902284e-05, "loss": 0.8513, "step": 247 }, { "epoch": 0.0369900812886867, "grad_norm": 5.825348377227783, "learning_rate": 1.9997530132790034e-05, "loss": 0.9291, "step": 248 }, { "epoch": 0.037139234842270115, "grad_norm": 3.106023073196411, "learning_rate": 1.9997421584690272e-05, "loss": 0.9291, "step": 249 }, { "epoch": 0.03728838839585353, "grad_norm": 0.6235529780387878, "learning_rate": 1.9997310702628338e-05, "loss": 0.2193, "step": 250 }, { "epoch": 0.037437541949436944, "grad_norm": 3.365039587020874, "learning_rate": 1.9997197486630116e-05, "loss": 0.8896, "step": 251 }, { "epoch": 0.03758669550302036, "grad_norm": 2.264119863510132, "learning_rate": 1.9997081936722037e-05, "loss": 0.9636, "step": 252 }, { "epoch": 0.03773584905660377, "grad_norm": 3.695404052734375, "learning_rate": 1.9996964052931082e-05, "loss": 0.9196, "step": 253 }, { "epoch": 0.037885002610187186, "grad_norm": 2.5830276012420654, "learning_rate": 1.9996843835284765e-05, "loss": 0.9496, "step": 254 }, { "epoch": 0.0380341561637706, "grad_norm": 2.479339361190796, "learning_rate": 1.9996721283811157e-05, "loss": 0.8721, "step": 255 }, { "epoch": 0.038183309717354015, "grad_norm": 2.911588430404663, "learning_rate": 1.9996596398538865e-05, "loss": 0.937, "step": 256 }, { "epoch": 0.03833246327093743, "grad_norm": 6.59764289855957, "learning_rate": 1.9996469179497045e-05, "loss": 0.8564, "step": 257 }, { "epoch": 0.038481616824520844, "grad_norm": 2.7332515716552734, "learning_rate": 1.99963396267154e-05, "loss": 0.9167, "step": 258 }, { "epoch": 0.03863077037810426, "grad_norm": 3.0391411781311035, "learning_rate": 1.999620774022417e-05, "loss": 0.9383, "step": 259 }, { "epoch": 0.03877992393168767, "grad_norm": 2.6762800216674805, "learning_rate": 1.9996073520054143e-05, "loss": 0.86, "step": 260 }, { "epoch": 0.03892907748527109, "grad_norm": 2.218050241470337, "learning_rate": 1.999593696623666e-05, "loss": 0.9473, "step": 261 }, { "epoch": 0.0390782310388545, "grad_norm": 3.365501880645752, "learning_rate": 1.99957980788036e-05, "loss": 0.8649, "step": 262 }, { "epoch": 0.039227384592437915, "grad_norm": 4.029329299926758, "learning_rate": 1.9995656857787384e-05, "loss": 0.7774, "step": 263 }, { "epoch": 0.03937653814602133, "grad_norm": 0.5729072093963623, "learning_rate": 1.999551330322098e-05, "loss": 0.2165, "step": 264 }, { "epoch": 0.039525691699604744, "grad_norm": 2.7354331016540527, "learning_rate": 1.9995367415137906e-05, "loss": 0.8348, "step": 265 }, { "epoch": 0.03967484525318816, "grad_norm": 4.120749473571777, "learning_rate": 1.9995219193572216e-05, "loss": 0.8654, "step": 266 }, { "epoch": 0.03982399880677157, "grad_norm": 3.013434410095215, "learning_rate": 1.9995068638558522e-05, "loss": 0.8211, "step": 267 }, { "epoch": 0.03997315236035499, "grad_norm": 3.6080822944641113, "learning_rate": 1.999491575013196e-05, "loss": 0.9097, "step": 268 }, { "epoch": 0.0401223059139384, "grad_norm": 0.47905904054641724, "learning_rate": 1.9994760528328226e-05, "loss": 0.2405, "step": 269 }, { "epoch": 0.040271459467521815, "grad_norm": 3.030529499053955, "learning_rate": 1.999460297318357e-05, "loss": 0.8589, "step": 270 }, { "epoch": 0.04042061302110523, "grad_norm": 0.46486780047416687, "learning_rate": 1.9994443084734754e-05, "loss": 0.2199, "step": 271 }, { "epoch": 0.040569766574688644, "grad_norm": 4.228407382965088, "learning_rate": 1.999428086301912e-05, "loss": 0.8972, "step": 272 }, { "epoch": 0.04071892012827206, "grad_norm": 3.9548470973968506, "learning_rate": 1.9994116308074532e-05, "loss": 0.8748, "step": 273 }, { "epoch": 0.04086807368185547, "grad_norm": 6.3282389640808105, "learning_rate": 1.9993949419939412e-05, "loss": 0.9052, "step": 274 }, { "epoch": 0.04101722723543889, "grad_norm": 2.489429235458374, "learning_rate": 1.9993780198652716e-05, "loss": 0.8837, "step": 275 }, { "epoch": 0.0411663807890223, "grad_norm": 2.344733953475952, "learning_rate": 1.9993608644253954e-05, "loss": 0.9419, "step": 276 }, { "epoch": 0.041315534342605716, "grad_norm": 2.8475754261016846, "learning_rate": 1.9993434756783173e-05, "loss": 0.908, "step": 277 }, { "epoch": 0.04146468789618913, "grad_norm": 4.105980396270752, "learning_rate": 1.999325853628097e-05, "loss": 0.8683, "step": 278 }, { "epoch": 0.041613841449772544, "grad_norm": 2.8906660079956055, "learning_rate": 1.9993079982788486e-05, "loss": 0.8921, "step": 279 }, { "epoch": 0.04176299500335595, "grad_norm": 2.924365520477295, "learning_rate": 1.9992899096347403e-05, "loss": 0.9259, "step": 280 }, { "epoch": 0.041912148556939366, "grad_norm": 3.056579113006592, "learning_rate": 1.9992715876999953e-05, "loss": 0.9455, "step": 281 }, { "epoch": 0.04206130211052278, "grad_norm": 3.46181321144104, "learning_rate": 1.9992530324788903e-05, "loss": 0.9097, "step": 282 }, { "epoch": 0.042210455664106195, "grad_norm": 2.9212918281555176, "learning_rate": 1.999234243975758e-05, "loss": 0.806, "step": 283 }, { "epoch": 0.04235960921768961, "grad_norm": 3.3614726066589355, "learning_rate": 1.9992152221949842e-05, "loss": 0.9651, "step": 284 }, { "epoch": 0.04250876277127302, "grad_norm": 3.913341999053955, "learning_rate": 1.99919596714101e-05, "loss": 0.8665, "step": 285 }, { "epoch": 0.04265791632485644, "grad_norm": 3.8884522914886475, "learning_rate": 1.9991764788183303e-05, "loss": 0.8745, "step": 286 }, { "epoch": 0.04280706987843985, "grad_norm": 2.8051788806915283, "learning_rate": 1.9991567572314948e-05, "loss": 0.9065, "step": 287 }, { "epoch": 0.042956223432023266, "grad_norm": 3.864551544189453, "learning_rate": 1.9991368023851078e-05, "loss": 0.8646, "step": 288 }, { "epoch": 0.04310537698560668, "grad_norm": 3.0999667644500732, "learning_rate": 1.9991166142838276e-05, "loss": 0.8315, "step": 289 }, { "epoch": 0.043254530539190095, "grad_norm": 3.064849376678467, "learning_rate": 1.9990961929323674e-05, "loss": 0.8442, "step": 290 }, { "epoch": 0.04340368409277351, "grad_norm": 3.0551412105560303, "learning_rate": 1.999075538335495e-05, "loss": 0.9207, "step": 291 }, { "epoch": 0.04355283764635692, "grad_norm": 3.1354877948760986, "learning_rate": 1.9990546504980318e-05, "loss": 0.9649, "step": 292 }, { "epoch": 0.04370199119994034, "grad_norm": 2.0862820148468018, "learning_rate": 1.9990335294248543e-05, "loss": 0.8424, "step": 293 }, { "epoch": 0.04385114475352375, "grad_norm": 2.4629812240600586, "learning_rate": 1.999012175120894e-05, "loss": 0.8671, "step": 294 }, { "epoch": 0.044000298307107166, "grad_norm": 4.872715473175049, "learning_rate": 1.9989905875911353e-05, "loss": 0.8148, "step": 295 }, { "epoch": 0.04414945186069058, "grad_norm": 9.341991424560547, "learning_rate": 1.9989687668406184e-05, "loss": 0.8938, "step": 296 }, { "epoch": 0.044298605414273995, "grad_norm": 3.3633651733398438, "learning_rate": 1.998946712874438e-05, "loss": 0.8095, "step": 297 }, { "epoch": 0.04444775896785741, "grad_norm": 2.4222466945648193, "learning_rate": 1.9989244256977415e-05, "loss": 0.8795, "step": 298 }, { "epoch": 0.044596912521440824, "grad_norm": 5.094366550445557, "learning_rate": 1.998901905315733e-05, "loss": 0.903, "step": 299 }, { "epoch": 0.04474606607502424, "grad_norm": 0.8428094983100891, "learning_rate": 1.99887915173367e-05, "loss": 0.2715, "step": 300 }, { "epoch": 0.04489521962860765, "grad_norm": 5.923583507537842, "learning_rate": 1.9988561649568636e-05, "loss": 0.9085, "step": 301 }, { "epoch": 0.045044373182191066, "grad_norm": 2.8639140129089355, "learning_rate": 1.998832944990681e-05, "loss": 0.8562, "step": 302 }, { "epoch": 0.04519352673577448, "grad_norm": 2.037905216217041, "learning_rate": 1.9988094918405427e-05, "loss": 0.88, "step": 303 }, { "epoch": 0.045342680289357895, "grad_norm": 3.6690056324005127, "learning_rate": 1.9987858055119243e-05, "loss": 0.8983, "step": 304 }, { "epoch": 0.04549183384294131, "grad_norm": 2.9254512786865234, "learning_rate": 1.9987618860103554e-05, "loss": 0.9267, "step": 305 }, { "epoch": 0.045640987396524724, "grad_norm": 2.9030721187591553, "learning_rate": 1.9987377333414203e-05, "loss": 0.9074, "step": 306 }, { "epoch": 0.04579014095010814, "grad_norm": 2.267957925796509, "learning_rate": 1.998713347510757e-05, "loss": 0.9229, "step": 307 }, { "epoch": 0.04593929450369155, "grad_norm": 2.2998104095458984, "learning_rate": 1.9986887285240592e-05, "loss": 0.92, "step": 308 }, { "epoch": 0.04608844805727497, "grad_norm": 2.6239190101623535, "learning_rate": 1.998663876387074e-05, "loss": 0.8658, "step": 309 }, { "epoch": 0.04623760161085838, "grad_norm": 2.5447704792022705, "learning_rate": 1.9986387911056034e-05, "loss": 0.972, "step": 310 }, { "epoch": 0.046386755164441795, "grad_norm": 3.62683367729187, "learning_rate": 1.9986134726855036e-05, "loss": 0.9156, "step": 311 }, { "epoch": 0.04653590871802521, "grad_norm": 4.0630388259887695, "learning_rate": 1.9985879211326857e-05, "loss": 0.8887, "step": 312 }, { "epoch": 0.046685062271608624, "grad_norm": 3.557805299758911, "learning_rate": 1.9985621364531144e-05, "loss": 0.9021, "step": 313 }, { "epoch": 0.04683421582519204, "grad_norm": 4.691293716430664, "learning_rate": 1.9985361186528097e-05, "loss": 0.8439, "step": 314 }, { "epoch": 0.04698336937877545, "grad_norm": 3.470402956008911, "learning_rate": 1.9985098677378456e-05, "loss": 0.8432, "step": 315 }, { "epoch": 0.04713252293235887, "grad_norm": 2.9130020141601562, "learning_rate": 1.99848338371435e-05, "loss": 0.8876, "step": 316 }, { "epoch": 0.047281676485942274, "grad_norm": 6.155801296234131, "learning_rate": 1.9984566665885064e-05, "loss": 0.7878, "step": 317 }, { "epoch": 0.04743083003952569, "grad_norm": 3.0485339164733887, "learning_rate": 1.9984297163665518e-05, "loss": 0.8361, "step": 318 }, { "epoch": 0.0475799835931091, "grad_norm": 2.990734100341797, "learning_rate": 1.998402533054778e-05, "loss": 0.9001, "step": 319 }, { "epoch": 0.04772913714669252, "grad_norm": 6.040631294250488, "learning_rate": 1.998375116659531e-05, "loss": 0.9831, "step": 320 }, { "epoch": 0.04787829070027593, "grad_norm": 2.3603057861328125, "learning_rate": 1.9983474671872112e-05, "loss": 0.8605, "step": 321 }, { "epoch": 0.048027444253859346, "grad_norm": 2.0388195514678955, "learning_rate": 1.998319584644274e-05, "loss": 0.9587, "step": 322 }, { "epoch": 0.04817659780744276, "grad_norm": 3.112030267715454, "learning_rate": 1.9982914690372282e-05, "loss": 0.8526, "step": 323 }, { "epoch": 0.048325751361026174, "grad_norm": 3.6645779609680176, "learning_rate": 1.9982631203726385e-05, "loss": 0.9095, "step": 324 }, { "epoch": 0.04847490491460959, "grad_norm": 3.140103340148926, "learning_rate": 1.9982345386571217e-05, "loss": 0.8515, "step": 325 }, { "epoch": 0.048624058468193, "grad_norm": 2.2842156887054443, "learning_rate": 1.9982057238973516e-05, "loss": 0.8763, "step": 326 }, { "epoch": 0.04877321202177642, "grad_norm": 4.80811882019043, "learning_rate": 1.998176676100055e-05, "loss": 0.7753, "step": 327 }, { "epoch": 0.04892236557535983, "grad_norm": 2.1726701259613037, "learning_rate": 1.9981473952720122e-05, "loss": 0.8408, "step": 328 }, { "epoch": 0.049071519128943246, "grad_norm": 3.3533215522766113, "learning_rate": 1.9981178814200603e-05, "loss": 0.8556, "step": 329 }, { "epoch": 0.04922067268252666, "grad_norm": 3.385443687438965, "learning_rate": 1.998088134551089e-05, "loss": 0.9168, "step": 330 }, { "epoch": 0.049369826236110075, "grad_norm": 2.713491439819336, "learning_rate": 1.998058154672043e-05, "loss": 0.9387, "step": 331 }, { "epoch": 0.04951897978969349, "grad_norm": 4.301455974578857, "learning_rate": 1.998027941789921e-05, "loss": 0.8677, "step": 332 }, { "epoch": 0.0496681333432769, "grad_norm": 3.143970012664795, "learning_rate": 1.997997495911777e-05, "loss": 0.9027, "step": 333 }, { "epoch": 0.04981728689686032, "grad_norm": 2.957857131958008, "learning_rate": 1.9979668170447176e-05, "loss": 0.889, "step": 334 }, { "epoch": 0.04996644045044373, "grad_norm": 2.8832571506500244, "learning_rate": 1.9979359051959063e-05, "loss": 0.892, "step": 335 }, { "epoch": 0.050115594004027146, "grad_norm": 2.8372044563293457, "learning_rate": 1.997904760372559e-05, "loss": 0.9109, "step": 336 }, { "epoch": 0.05026474755761056, "grad_norm": 2.735034465789795, "learning_rate": 1.997873382581947e-05, "loss": 0.8746, "step": 337 }, { "epoch": 0.050413901111193975, "grad_norm": 2.442314863204956, "learning_rate": 1.9978417718313953e-05, "loss": 0.9117, "step": 338 }, { "epoch": 0.05056305466477739, "grad_norm": 3.351416826248169, "learning_rate": 1.997809928128284e-05, "loss": 0.9132, "step": 339 }, { "epoch": 0.0507122082183608, "grad_norm": 2.530731678009033, "learning_rate": 1.9977778514800462e-05, "loss": 0.933, "step": 340 }, { "epoch": 0.05086136177194422, "grad_norm": 2.8022148609161377, "learning_rate": 1.997745541894172e-05, "loss": 0.9376, "step": 341 }, { "epoch": 0.05101051532552763, "grad_norm": 2.4425208568573, "learning_rate": 1.997712999378203e-05, "loss": 0.9053, "step": 342 }, { "epoch": 0.051159668879111046, "grad_norm": 2.6215970516204834, "learning_rate": 1.9976802239397373e-05, "loss": 0.9257, "step": 343 }, { "epoch": 0.05130882243269446, "grad_norm": 2.558863639831543, "learning_rate": 1.9976472155864258e-05, "loss": 0.8899, "step": 344 }, { "epoch": 0.051457975986277875, "grad_norm": 3.199979066848755, "learning_rate": 1.997613974325975e-05, "loss": 0.8144, "step": 345 }, { "epoch": 0.05160712953986129, "grad_norm": 2.407370090484619, "learning_rate": 1.997580500166145e-05, "loss": 0.86, "step": 346 }, { "epoch": 0.051756283093444704, "grad_norm": 2.5698251724243164, "learning_rate": 1.9975467931147512e-05, "loss": 0.927, "step": 347 }, { "epoch": 0.05190543664702812, "grad_norm": 2.187136173248291, "learning_rate": 1.997512853179662e-05, "loss": 0.928, "step": 348 }, { "epoch": 0.05205459020061153, "grad_norm": 3.5745227336883545, "learning_rate": 1.997478680368801e-05, "loss": 0.8945, "step": 349 }, { "epoch": 0.052203743754194946, "grad_norm": 17.175857543945312, "learning_rate": 1.9974442746901464e-05, "loss": 0.8615, "step": 350 }, { "epoch": 0.05235289730777836, "grad_norm": 3.876333475112915, "learning_rate": 1.9974096361517302e-05, "loss": 0.8491, "step": 351 }, { "epoch": 0.052502050861361775, "grad_norm": 3.0382895469665527, "learning_rate": 1.9973747647616387e-05, "loss": 0.8646, "step": 352 }, { "epoch": 0.05265120441494519, "grad_norm": 2.941591262817383, "learning_rate": 1.9973396605280135e-05, "loss": 0.8847, "step": 353 }, { "epoch": 0.0528003579685286, "grad_norm": 1.9449965953826904, "learning_rate": 1.9973043234590495e-05, "loss": 0.9046, "step": 354 }, { "epoch": 0.05294951152211201, "grad_norm": 2.4699294567108154, "learning_rate": 1.9972687535629962e-05, "loss": 0.8375, "step": 355 }, { "epoch": 0.053098665075695425, "grad_norm": 2.844377040863037, "learning_rate": 1.997232950848158e-05, "loss": 0.8727, "step": 356 }, { "epoch": 0.05324781862927884, "grad_norm": 3.400522470474243, "learning_rate": 1.9971969153228934e-05, "loss": 0.862, "step": 357 }, { "epoch": 0.053396972182862254, "grad_norm": 2.1816346645355225, "learning_rate": 1.9971606469956146e-05, "loss": 0.9418, "step": 358 }, { "epoch": 0.05354612573644567, "grad_norm": 2.020021438598633, "learning_rate": 1.997124145874789e-05, "loss": 0.813, "step": 359 }, { "epoch": 0.05369527929002908, "grad_norm": 3.660477876663208, "learning_rate": 1.997087411968938e-05, "loss": 0.7775, "step": 360 }, { "epoch": 0.0538444328436125, "grad_norm": 2.022148847579956, "learning_rate": 1.997050445286637e-05, "loss": 0.9532, "step": 361 }, { "epoch": 0.05399358639719591, "grad_norm": 3.988225221633911, "learning_rate": 1.9970132458365165e-05, "loss": 0.8134, "step": 362 }, { "epoch": 0.054142739950779326, "grad_norm": 2.957157611846924, "learning_rate": 1.9969758136272614e-05, "loss": 0.8869, "step": 363 }, { "epoch": 0.05429189350436274, "grad_norm": 2.2344300746917725, "learning_rate": 1.9969381486676092e-05, "loss": 0.7824, "step": 364 }, { "epoch": 0.054441047057946154, "grad_norm": 4.09674596786499, "learning_rate": 1.9969002509663543e-05, "loss": 0.8012, "step": 365 }, { "epoch": 0.05459020061152957, "grad_norm": 3.2760274410247803, "learning_rate": 1.9968621205323434e-05, "loss": 0.8785, "step": 366 }, { "epoch": 0.05473935416511298, "grad_norm": 2.760108232498169, "learning_rate": 1.9968237573744788e-05, "loss": 0.8799, "step": 367 }, { "epoch": 0.0548885077186964, "grad_norm": 4.057400703430176, "learning_rate": 1.9967851615017164e-05, "loss": 0.8645, "step": 368 }, { "epoch": 0.05503766127227981, "grad_norm": 2.4350802898406982, "learning_rate": 1.9967463329230665e-05, "loss": 0.822, "step": 369 }, { "epoch": 0.055186814825863226, "grad_norm": 3.2432804107666016, "learning_rate": 1.9967072716475938e-05, "loss": 0.8663, "step": 370 }, { "epoch": 0.05533596837944664, "grad_norm": 2.0559563636779785, "learning_rate": 1.996667977684418e-05, "loss": 0.8059, "step": 371 }, { "epoch": 0.055485121933030054, "grad_norm": 2.778951406478882, "learning_rate": 1.9966284510427118e-05, "loss": 0.8656, "step": 372 }, { "epoch": 0.05563427548661347, "grad_norm": 1.5353320837020874, "learning_rate": 1.9965886917317034e-05, "loss": 0.9133, "step": 373 }, { "epoch": 0.05578342904019688, "grad_norm": 0.8143346905708313, "learning_rate": 1.9965486997606747e-05, "loss": 0.2424, "step": 374 }, { "epoch": 0.0559325825937803, "grad_norm": 2.3563365936279297, "learning_rate": 1.996508475138962e-05, "loss": 0.8561, "step": 375 }, { "epoch": 0.05608173614736371, "grad_norm": 3.736351251602173, "learning_rate": 1.9964680178759565e-05, "loss": 0.8353, "step": 376 }, { "epoch": 0.056230889700947126, "grad_norm": 2.7561163902282715, "learning_rate": 1.9964273279811026e-05, "loss": 0.8509, "step": 377 }, { "epoch": 0.05638004325453054, "grad_norm": 3.9756548404693604, "learning_rate": 1.9963864054639e-05, "loss": 0.9247, "step": 378 }, { "epoch": 0.056529196808113955, "grad_norm": 0.562821090221405, "learning_rate": 1.996345250333902e-05, "loss": 0.2191, "step": 379 }, { "epoch": 0.05667835036169737, "grad_norm": 2.9426143169403076, "learning_rate": 1.996303862600717e-05, "loss": 0.8934, "step": 380 }, { "epoch": 0.05682750391528078, "grad_norm": 2.5122601985931396, "learning_rate": 1.9962622422740067e-05, "loss": 0.879, "step": 381 }, { "epoch": 0.0569766574688642, "grad_norm": 3.1558098793029785, "learning_rate": 1.996220389363488e-05, "loss": 0.9041, "step": 382 }, { "epoch": 0.05712581102244761, "grad_norm": 2.393960475921631, "learning_rate": 1.9961783038789314e-05, "loss": 0.7439, "step": 383 }, { "epoch": 0.057274964576031026, "grad_norm": 1.9793556928634644, "learning_rate": 1.9961359858301622e-05, "loss": 0.935, "step": 384 }, { "epoch": 0.05742411812961444, "grad_norm": 4.299928665161133, "learning_rate": 1.99609343522706e-05, "loss": 0.7941, "step": 385 }, { "epoch": 0.057573271683197855, "grad_norm": 2.7066121101379395, "learning_rate": 1.9960506520795585e-05, "loss": 0.9139, "step": 386 }, { "epoch": 0.05772242523678127, "grad_norm": 2.3824284076690674, "learning_rate": 1.9960076363976454e-05, "loss": 0.911, "step": 387 }, { "epoch": 0.05787157879036468, "grad_norm": 2.0276262760162354, "learning_rate": 1.995964388191363e-05, "loss": 0.8444, "step": 388 }, { "epoch": 0.0580207323439481, "grad_norm": 4.349749565124512, "learning_rate": 1.9959209074708084e-05, "loss": 0.8946, "step": 389 }, { "epoch": 0.05816988589753151, "grad_norm": 2.9580039978027344, "learning_rate": 1.995877194246132e-05, "loss": 0.7703, "step": 390 }, { "epoch": 0.058319039451114926, "grad_norm": 2.6254396438598633, "learning_rate": 1.9958332485275386e-05, "loss": 0.894, "step": 391 }, { "epoch": 0.058468193004698334, "grad_norm": 3.348177671432495, "learning_rate": 1.9957890703252882e-05, "loss": 0.7757, "step": 392 }, { "epoch": 0.05861734655828175, "grad_norm": 2.326329231262207, "learning_rate": 1.9957446596496945e-05, "loss": 0.8737, "step": 393 }, { "epoch": 0.05876650011186516, "grad_norm": 2.1053073406219482, "learning_rate": 1.995700016511125e-05, "loss": 0.8324, "step": 394 }, { "epoch": 0.05891565366544858, "grad_norm": 2.6347007751464844, "learning_rate": 1.995655140920002e-05, "loss": 0.8471, "step": 395 }, { "epoch": 0.05906480721903199, "grad_norm": 2.6413137912750244, "learning_rate": 1.995610032886803e-05, "loss": 0.8568, "step": 396 }, { "epoch": 0.059213960772615405, "grad_norm": 2.6268489360809326, "learning_rate": 1.995564692422057e-05, "loss": 0.8164, "step": 397 }, { "epoch": 0.05936311432619882, "grad_norm": 3.977534770965576, "learning_rate": 1.9955191195363505e-05, "loss": 0.8528, "step": 398 }, { "epoch": 0.059512267879782234, "grad_norm": 3.2653968334198, "learning_rate": 1.995473314240322e-05, "loss": 0.8868, "step": 399 }, { "epoch": 0.05966142143336565, "grad_norm": 2.3410398960113525, "learning_rate": 1.9954272765446656e-05, "loss": 0.8155, "step": 400 }, { "epoch": 0.05981057498694906, "grad_norm": 2.8630733489990234, "learning_rate": 1.9953810064601284e-05, "loss": 0.9056, "step": 401 }, { "epoch": 0.05995972854053248, "grad_norm": 2.2745556831359863, "learning_rate": 1.995334503997513e-05, "loss": 0.861, "step": 402 }, { "epoch": 0.06010888209411589, "grad_norm": 1.8208949565887451, "learning_rate": 1.9952877691676754e-05, "loss": 0.8973, "step": 403 }, { "epoch": 0.060258035647699305, "grad_norm": 3.457472801208496, "learning_rate": 1.9952408019815266e-05, "loss": 0.8015, "step": 404 }, { "epoch": 0.06040718920128272, "grad_norm": 3.101141929626465, "learning_rate": 1.9951936024500306e-05, "loss": 0.9242, "step": 405 }, { "epoch": 0.060556342754866134, "grad_norm": 2.087676763534546, "learning_rate": 1.9951461705842073e-05, "loss": 0.8072, "step": 406 }, { "epoch": 0.06070549630844955, "grad_norm": 3.2039647102355957, "learning_rate": 1.995098506395129e-05, "loss": 0.8376, "step": 407 }, { "epoch": 0.06085464986203296, "grad_norm": 2.6167049407958984, "learning_rate": 1.9950506098939243e-05, "loss": 0.872, "step": 408 }, { "epoch": 0.06100380341561638, "grad_norm": 2.2808899879455566, "learning_rate": 1.9950024810917745e-05, "loss": 0.8873, "step": 409 }, { "epoch": 0.06115295696919979, "grad_norm": 3.310760736465454, "learning_rate": 1.994954119999915e-05, "loss": 0.8635, "step": 410 }, { "epoch": 0.061302110522783206, "grad_norm": 3.3300766944885254, "learning_rate": 1.994905526629637e-05, "loss": 0.8273, "step": 411 }, { "epoch": 0.06145126407636662, "grad_norm": 3.114314079284668, "learning_rate": 1.9948567009922842e-05, "loss": 0.852, "step": 412 }, { "epoch": 0.061600417629950034, "grad_norm": 2.1629908084869385, "learning_rate": 1.9948076430992557e-05, "loss": 0.8916, "step": 413 }, { "epoch": 0.06174957118353345, "grad_norm": 3.239694356918335, "learning_rate": 1.9947583529620038e-05, "loss": 0.8908, "step": 414 }, { "epoch": 0.06189872473711686, "grad_norm": 2.790855646133423, "learning_rate": 1.994708830592036e-05, "loss": 0.7472, "step": 415 }, { "epoch": 0.06204787829070028, "grad_norm": 2.9816408157348633, "learning_rate": 1.9946590760009137e-05, "loss": 0.8352, "step": 416 }, { "epoch": 0.06219703184428369, "grad_norm": 1.9293317794799805, "learning_rate": 1.9946090892002524e-05, "loss": 0.8743, "step": 417 }, { "epoch": 0.062346185397867106, "grad_norm": 1.880286455154419, "learning_rate": 1.9945588702017215e-05, "loss": 0.8601, "step": 418 }, { "epoch": 0.06249533895145052, "grad_norm": 2.296487331390381, "learning_rate": 1.9945084190170456e-05, "loss": 0.8366, "step": 419 }, { "epoch": 0.06264449250503393, "grad_norm": 2.6073050498962402, "learning_rate": 1.9944577356580023e-05, "loss": 0.8207, "step": 420 }, { "epoch": 0.06279364605861734, "grad_norm": 3.4085915088653564, "learning_rate": 1.9944068201364238e-05, "loss": 0.8881, "step": 421 }, { "epoch": 0.06294279961220076, "grad_norm": 1.6858160495758057, "learning_rate": 1.9943556724641975e-05, "loss": 0.848, "step": 422 }, { "epoch": 0.06309195316578417, "grad_norm": 0.8950862884521484, "learning_rate": 1.9943042926532634e-05, "loss": 0.2374, "step": 423 }, { "epoch": 0.06324110671936758, "grad_norm": 2.893927812576294, "learning_rate": 1.9942526807156166e-05, "loss": 0.8835, "step": 424 }, { "epoch": 0.063390260272951, "grad_norm": 0.659728467464447, "learning_rate": 1.9942008366633063e-05, "loss": 0.231, "step": 425 }, { "epoch": 0.06353941382653441, "grad_norm": 3.177354097366333, "learning_rate": 1.994148760508436e-05, "loss": 0.816, "step": 426 }, { "epoch": 0.06368856738011783, "grad_norm": 2.25211501121521, "learning_rate": 1.994096452263163e-05, "loss": 0.827, "step": 427 }, { "epoch": 0.06383772093370124, "grad_norm": 2.6010918617248535, "learning_rate": 1.9940439119396985e-05, "loss": 0.817, "step": 428 }, { "epoch": 0.06398687448728466, "grad_norm": 2.397444248199463, "learning_rate": 1.9939911395503094e-05, "loss": 0.829, "step": 429 }, { "epoch": 0.06413602804086807, "grad_norm": 2.618238925933838, "learning_rate": 1.9939381351073153e-05, "loss": 0.805, "step": 430 }, { "epoch": 0.06428518159445148, "grad_norm": 2.7493844032287598, "learning_rate": 1.9938848986230904e-05, "loss": 0.8261, "step": 431 }, { "epoch": 0.0644343351480349, "grad_norm": 2.6060385704040527, "learning_rate": 1.993831430110063e-05, "loss": 0.8375, "step": 432 }, { "epoch": 0.06458348870161831, "grad_norm": 3.1383931636810303, "learning_rate": 1.9937777295807156e-05, "loss": 0.9087, "step": 433 }, { "epoch": 0.06473264225520173, "grad_norm": 3.851846694946289, "learning_rate": 1.9937237970475857e-05, "loss": 0.89, "step": 434 }, { "epoch": 0.06488179580878514, "grad_norm": 0.6978795528411865, "learning_rate": 1.993669632523263e-05, "loss": 0.23, "step": 435 }, { "epoch": 0.06503094936236856, "grad_norm": 0.7036376595497131, "learning_rate": 1.993615236020393e-05, "loss": 0.2355, "step": 436 }, { "epoch": 0.06518010291595197, "grad_norm": 2.638955593109131, "learning_rate": 1.9935606075516754e-05, "loss": 0.8873, "step": 437 }, { "epoch": 0.06532925646953539, "grad_norm": 0.5978618860244751, "learning_rate": 1.9935057471298633e-05, "loss": 0.2418, "step": 438 }, { "epoch": 0.0654784100231188, "grad_norm": 2.8107011318206787, "learning_rate": 1.993450654767764e-05, "loss": 0.8627, "step": 439 }, { "epoch": 0.06562756357670221, "grad_norm": 2.279385566711426, "learning_rate": 1.993395330478239e-05, "loss": 0.9044, "step": 440 }, { "epoch": 0.06577671713028563, "grad_norm": 1.9686379432678223, "learning_rate": 1.993339774274205e-05, "loss": 0.8798, "step": 441 }, { "epoch": 0.06592587068386904, "grad_norm": 2.2035131454467773, "learning_rate": 1.993283986168631e-05, "loss": 0.837, "step": 442 }, { "epoch": 0.06607502423745246, "grad_norm": 2.461723804473877, "learning_rate": 1.9932279661745416e-05, "loss": 0.8536, "step": 443 }, { "epoch": 0.06622417779103587, "grad_norm": 1.951593279838562, "learning_rate": 1.9931717143050147e-05, "loss": 0.7186, "step": 444 }, { "epoch": 0.06637333134461929, "grad_norm": 2.3269357681274414, "learning_rate": 1.9931152305731828e-05, "loss": 0.8602, "step": 445 }, { "epoch": 0.0665224848982027, "grad_norm": 2.0893635749816895, "learning_rate": 1.9930585149922325e-05, "loss": 0.7887, "step": 446 }, { "epoch": 0.06667163845178611, "grad_norm": 3.47110652923584, "learning_rate": 1.9930015675754047e-05, "loss": 0.8289, "step": 447 }, { "epoch": 0.06682079200536953, "grad_norm": 0.9066480994224548, "learning_rate": 1.9929443883359934e-05, "loss": 0.2534, "step": 448 }, { "epoch": 0.06696994555895294, "grad_norm": 3.1825263500213623, "learning_rate": 1.992886977287348e-05, "loss": 0.8039, "step": 449 }, { "epoch": 0.06711909911253636, "grad_norm": 3.623110771179199, "learning_rate": 1.9928293344428714e-05, "loss": 0.8061, "step": 450 }, { "epoch": 0.06726825266611977, "grad_norm": 2.7143096923828125, "learning_rate": 1.9927714598160204e-05, "loss": 0.8097, "step": 451 }, { "epoch": 0.06741740621970319, "grad_norm": 2.3457396030426025, "learning_rate": 1.9927133534203064e-05, "loss": 0.8519, "step": 452 }, { "epoch": 0.0675665597732866, "grad_norm": 2.3081634044647217, "learning_rate": 1.992655015269295e-05, "loss": 0.7816, "step": 453 }, { "epoch": 0.06771571332687001, "grad_norm": 2.1470093727111816, "learning_rate": 1.992596445376605e-05, "loss": 0.84, "step": 454 }, { "epoch": 0.06786486688045343, "grad_norm": 2.83054518699646, "learning_rate": 1.9925376437559106e-05, "loss": 0.8727, "step": 455 }, { "epoch": 0.06801402043403684, "grad_norm": 2.366001844406128, "learning_rate": 1.992478610420939e-05, "loss": 0.8283, "step": 456 }, { "epoch": 0.06816317398762026, "grad_norm": 2.811633348464966, "learning_rate": 1.992419345385472e-05, "loss": 0.8843, "step": 457 }, { "epoch": 0.06831232754120367, "grad_norm": 2.3446388244628906, "learning_rate": 1.992359848663345e-05, "loss": 0.8393, "step": 458 }, { "epoch": 0.06846148109478709, "grad_norm": 3.7336325645446777, "learning_rate": 1.992300120268449e-05, "loss": 0.7718, "step": 459 }, { "epoch": 0.0686106346483705, "grad_norm": 2.050379514694214, "learning_rate": 1.9922401602147266e-05, "loss": 0.8928, "step": 460 }, { "epoch": 0.06875978820195391, "grad_norm": 3.892504930496216, "learning_rate": 1.992179968516177e-05, "loss": 0.7855, "step": 461 }, { "epoch": 0.06890894175553733, "grad_norm": 3.3994905948638916, "learning_rate": 1.9921195451868514e-05, "loss": 0.8557, "step": 462 }, { "epoch": 0.06905809530912074, "grad_norm": 2.2113144397735596, "learning_rate": 1.9920588902408567e-05, "loss": 0.8265, "step": 463 }, { "epoch": 0.06920724886270416, "grad_norm": 2.2149484157562256, "learning_rate": 1.991998003692353e-05, "loss": 0.8648, "step": 464 }, { "epoch": 0.06935640241628757, "grad_norm": 1.9301351308822632, "learning_rate": 1.9919368855555546e-05, "loss": 0.8242, "step": 465 }, { "epoch": 0.06950555596987099, "grad_norm": 2.9316656589508057, "learning_rate": 1.9918755358447298e-05, "loss": 0.7601, "step": 466 }, { "epoch": 0.0696547095234544, "grad_norm": 2.6116204261779785, "learning_rate": 1.991813954574201e-05, "loss": 0.8436, "step": 467 }, { "epoch": 0.06980386307703781, "grad_norm": 2.7822370529174805, "learning_rate": 1.9917521417583456e-05, "loss": 0.8125, "step": 468 }, { "epoch": 0.06995301663062123, "grad_norm": 2.7795934677124023, "learning_rate": 1.9916900974115932e-05, "loss": 0.7995, "step": 469 }, { "epoch": 0.07010217018420464, "grad_norm": 2.694121837615967, "learning_rate": 1.9916278215484288e-05, "loss": 0.8733, "step": 470 }, { "epoch": 0.07025132373778806, "grad_norm": 4.156251430511475, "learning_rate": 1.991565314183391e-05, "loss": 0.8627, "step": 471 }, { "epoch": 0.07040047729137147, "grad_norm": 0.48472729325294495, "learning_rate": 1.9915025753310727e-05, "loss": 0.2271, "step": 472 }, { "epoch": 0.07054963084495489, "grad_norm": 1.936448097229004, "learning_rate": 1.9914396050061212e-05, "loss": 0.9504, "step": 473 }, { "epoch": 0.0706987843985383, "grad_norm": 2.1098639965057373, "learning_rate": 1.9913764032232362e-05, "loss": 0.838, "step": 474 }, { "epoch": 0.07084793795212171, "grad_norm": 3.1767287254333496, "learning_rate": 1.991312969997173e-05, "loss": 0.8156, "step": 475 }, { "epoch": 0.07099709150570513, "grad_norm": 7.116645336151123, "learning_rate": 1.991249305342741e-05, "loss": 0.8075, "step": 476 }, { "epoch": 0.07114624505928854, "grad_norm": 3.067148447036743, "learning_rate": 1.9911854092748023e-05, "loss": 0.8773, "step": 477 }, { "epoch": 0.07129539861287196, "grad_norm": 2.291674852371216, "learning_rate": 1.9911212818082746e-05, "loss": 0.8591, "step": 478 }, { "epoch": 0.07144455216645537, "grad_norm": 3.267972707748413, "learning_rate": 1.9910569229581288e-05, "loss": 0.8051, "step": 479 }, { "epoch": 0.07159370572003879, "grad_norm": 1.9044170379638672, "learning_rate": 1.990992332739389e-05, "loss": 0.8062, "step": 480 }, { "epoch": 0.0717428592736222, "grad_norm": 0.5122166275978088, "learning_rate": 1.9909275111671354e-05, "loss": 0.2266, "step": 481 }, { "epoch": 0.07189201282720561, "grad_norm": 0.49422982335090637, "learning_rate": 1.9908624582565002e-05, "loss": 0.2123, "step": 482 }, { "epoch": 0.07204116638078903, "grad_norm": 2.603001832962036, "learning_rate": 1.9907971740226708e-05, "loss": 0.7884, "step": 483 }, { "epoch": 0.07219031993437244, "grad_norm": 3.5079691410064697, "learning_rate": 1.990731658480888e-05, "loss": 0.8435, "step": 484 }, { "epoch": 0.07233947348795584, "grad_norm": 4.863563537597656, "learning_rate": 1.9906659116464467e-05, "loss": 0.839, "step": 485 }, { "epoch": 0.07248862704153926, "grad_norm": 1.9831753969192505, "learning_rate": 1.9905999335346967e-05, "loss": 0.8351, "step": 486 }, { "epoch": 0.07263778059512267, "grad_norm": 4.426351070404053, "learning_rate": 1.99053372416104e-05, "loss": 0.8988, "step": 487 }, { "epoch": 0.07278693414870609, "grad_norm": 2.38358473777771, "learning_rate": 1.990467283540934e-05, "loss": 0.8183, "step": 488 }, { "epoch": 0.0729360877022895, "grad_norm": 2.5194778442382812, "learning_rate": 1.9904006116898903e-05, "loss": 0.8971, "step": 489 }, { "epoch": 0.07308524125587292, "grad_norm": 1.7235687971115112, "learning_rate": 1.990333708623473e-05, "loss": 0.8786, "step": 490 }, { "epoch": 0.07323439480945633, "grad_norm": 2.1288366317749023, "learning_rate": 1.9902665743573012e-05, "loss": 0.8573, "step": 491 }, { "epoch": 0.07338354836303974, "grad_norm": 3.672182321548462, "learning_rate": 1.9901992089070483e-05, "loss": 0.8569, "step": 492 }, { "epoch": 0.07353270191662316, "grad_norm": 2.1447370052337646, "learning_rate": 1.9901316122884405e-05, "loss": 0.8793, "step": 493 }, { "epoch": 0.07368185547020657, "grad_norm": 2.867741346359253, "learning_rate": 1.9900637845172594e-05, "loss": 0.7891, "step": 494 }, { "epoch": 0.07383100902378999, "grad_norm": 3.513369083404541, "learning_rate": 1.9899957256093393e-05, "loss": 0.7773, "step": 495 }, { "epoch": 0.0739801625773734, "grad_norm": 3.55354380607605, "learning_rate": 1.989927435580569e-05, "loss": 0.8133, "step": 496 }, { "epoch": 0.07412931613095682, "grad_norm": 3.074087142944336, "learning_rate": 1.9898589144468916e-05, "loss": 0.855, "step": 497 }, { "epoch": 0.07427846968454023, "grad_norm": 3.111663818359375, "learning_rate": 1.9897901622243038e-05, "loss": 0.8633, "step": 498 }, { "epoch": 0.07442762323812364, "grad_norm": 2.8796956539154053, "learning_rate": 1.9897211789288556e-05, "loss": 0.9232, "step": 499 }, { "epoch": 0.07457677679170706, "grad_norm": 2.449767827987671, "learning_rate": 1.989651964576653e-05, "loss": 0.8059, "step": 500 }, { "epoch": 0.07472593034529047, "grad_norm": 2.803831100463867, "learning_rate": 1.9895825191838524e-05, "loss": 0.8219, "step": 501 }, { "epoch": 0.07487508389887389, "grad_norm": 3.1161773204803467, "learning_rate": 1.989512842766668e-05, "loss": 0.8733, "step": 502 }, { "epoch": 0.0750242374524573, "grad_norm": 0.5602089166641235, "learning_rate": 1.989442935341366e-05, "loss": 0.2376, "step": 503 }, { "epoch": 0.07517339100604072, "grad_norm": 2.9767019748687744, "learning_rate": 1.9893727969242657e-05, "loss": 0.7188, "step": 504 }, { "epoch": 0.07532254455962413, "grad_norm": 2.5083816051483154, "learning_rate": 1.9893024275317424e-05, "loss": 0.874, "step": 505 }, { "epoch": 0.07547169811320754, "grad_norm": 3.2790451049804688, "learning_rate": 1.989231827180224e-05, "loss": 0.8359, "step": 506 }, { "epoch": 0.07562085166679096, "grad_norm": 2.629169225692749, "learning_rate": 1.9891609958861926e-05, "loss": 0.8188, "step": 507 }, { "epoch": 0.07577000522037437, "grad_norm": 3.2210147380828857, "learning_rate": 1.989089933666184e-05, "loss": 0.8766, "step": 508 }, { "epoch": 0.07591915877395779, "grad_norm": 2.074129819869995, "learning_rate": 1.9890186405367884e-05, "loss": 0.9769, "step": 509 }, { "epoch": 0.0760683123275412, "grad_norm": 3.1232428550720215, "learning_rate": 1.9889471165146495e-05, "loss": 0.7959, "step": 510 }, { "epoch": 0.07621746588112462, "grad_norm": 2.3715033531188965, "learning_rate": 1.988875361616465e-05, "loss": 0.8847, "step": 511 }, { "epoch": 0.07636661943470803, "grad_norm": 2.8143680095672607, "learning_rate": 1.988803375858987e-05, "loss": 0.8288, "step": 512 }, { "epoch": 0.07651577298829144, "grad_norm": 2.37874698638916, "learning_rate": 1.9887311592590205e-05, "loss": 0.7761, "step": 513 }, { "epoch": 0.07666492654187486, "grad_norm": 2.136094808578491, "learning_rate": 1.9886587118334248e-05, "loss": 0.8386, "step": 514 }, { "epoch": 0.07681408009545827, "grad_norm": 2.6274161338806152, "learning_rate": 1.9885860335991136e-05, "loss": 0.7694, "step": 515 }, { "epoch": 0.07696323364904169, "grad_norm": 2.5092990398406982, "learning_rate": 1.988513124573054e-05, "loss": 0.7848, "step": 516 }, { "epoch": 0.0771123872026251, "grad_norm": 2.230915069580078, "learning_rate": 1.9884399847722676e-05, "loss": 0.8812, "step": 517 }, { "epoch": 0.07726154075620852, "grad_norm": 2.1555027961730957, "learning_rate": 1.9883666142138282e-05, "loss": 0.9199, "step": 518 }, { "epoch": 0.07741069430979193, "grad_norm": 3.3065106868743896, "learning_rate": 1.9882930129148653e-05, "loss": 0.7534, "step": 519 }, { "epoch": 0.07755984786337534, "grad_norm": 2.288494825363159, "learning_rate": 1.988219180892562e-05, "loss": 0.7864, "step": 520 }, { "epoch": 0.07770900141695876, "grad_norm": 2.4304471015930176, "learning_rate": 1.9881451181641542e-05, "loss": 0.8782, "step": 521 }, { "epoch": 0.07785815497054217, "grad_norm": 2.0412955284118652, "learning_rate": 1.9880708247469328e-05, "loss": 0.9198, "step": 522 }, { "epoch": 0.07800730852412559, "grad_norm": 2.5462071895599365, "learning_rate": 1.9879963006582413e-05, "loss": 0.8012, "step": 523 }, { "epoch": 0.078156462077709, "grad_norm": 2.380927562713623, "learning_rate": 1.9879215459154787e-05, "loss": 0.7871, "step": 524 }, { "epoch": 0.07830561563129242, "grad_norm": 3.0584709644317627, "learning_rate": 1.9878465605360963e-05, "loss": 0.8478, "step": 525 }, { "epoch": 0.07845476918487583, "grad_norm": 2.079756736755371, "learning_rate": 1.9877713445376005e-05, "loss": 0.8357, "step": 526 }, { "epoch": 0.07860392273845924, "grad_norm": 1.84335196018219, "learning_rate": 1.9876958979375507e-05, "loss": 0.8251, "step": 527 }, { "epoch": 0.07875307629204266, "grad_norm": 2.629284143447876, "learning_rate": 1.98762022075356e-05, "loss": 0.7386, "step": 528 }, { "epoch": 0.07890222984562607, "grad_norm": 5.19591760635376, "learning_rate": 1.9875443130032968e-05, "loss": 0.8174, "step": 529 }, { "epoch": 0.07905138339920949, "grad_norm": 2.8459041118621826, "learning_rate": 1.987468174704481e-05, "loss": 0.8691, "step": 530 }, { "epoch": 0.0792005369527929, "grad_norm": 2.2824249267578125, "learning_rate": 1.9873918058748886e-05, "loss": 0.8392, "step": 531 }, { "epoch": 0.07934969050637632, "grad_norm": 3.35256290435791, "learning_rate": 1.9873152065323476e-05, "loss": 0.832, "step": 532 }, { "epoch": 0.07949884405995973, "grad_norm": 2.279214859008789, "learning_rate": 1.987238376694741e-05, "loss": 0.8723, "step": 533 }, { "epoch": 0.07964799761354315, "grad_norm": 2.5804264545440674, "learning_rate": 1.987161316380005e-05, "loss": 0.8789, "step": 534 }, { "epoch": 0.07979715116712656, "grad_norm": 2.6410763263702393, "learning_rate": 1.98708402560613e-05, "loss": 0.7923, "step": 535 }, { "epoch": 0.07994630472070997, "grad_norm": 4.862730026245117, "learning_rate": 1.9870065043911603e-05, "loss": 0.8737, "step": 536 }, { "epoch": 0.08009545827429339, "grad_norm": 2.435544729232788, "learning_rate": 1.986928752753193e-05, "loss": 0.8508, "step": 537 }, { "epoch": 0.0802446118278768, "grad_norm": 2.366231679916382, "learning_rate": 1.9868507707103806e-05, "loss": 0.8121, "step": 538 }, { "epoch": 0.08039376538146022, "grad_norm": 2.577342987060547, "learning_rate": 1.9867725582809278e-05, "loss": 0.7824, "step": 539 }, { "epoch": 0.08054291893504363, "grad_norm": 2.240668535232544, "learning_rate": 1.986694115483094e-05, "loss": 0.7469, "step": 540 }, { "epoch": 0.08069207248862705, "grad_norm": 2.5276451110839844, "learning_rate": 1.9866154423351923e-05, "loss": 0.8501, "step": 541 }, { "epoch": 0.08084122604221046, "grad_norm": 2.2146432399749756, "learning_rate": 1.9865365388555896e-05, "loss": 0.8349, "step": 542 }, { "epoch": 0.08099037959579387, "grad_norm": 1.8251603841781616, "learning_rate": 1.986457405062706e-05, "loss": 0.8756, "step": 543 }, { "epoch": 0.08113953314937729, "grad_norm": 1.1328449249267578, "learning_rate": 1.986378040975016e-05, "loss": 0.2244, "step": 544 }, { "epoch": 0.0812886867029607, "grad_norm": 2.3783907890319824, "learning_rate": 1.9862984466110476e-05, "loss": 0.8845, "step": 545 }, { "epoch": 0.08143784025654412, "grad_norm": 3.6110899448394775, "learning_rate": 1.9862186219893825e-05, "loss": 0.8523, "step": 546 }, { "epoch": 0.08158699381012753, "grad_norm": 4.022459030151367, "learning_rate": 1.9861385671286565e-05, "loss": 0.8036, "step": 547 }, { "epoch": 0.08173614736371095, "grad_norm": 2.1383678913116455, "learning_rate": 1.9860582820475593e-05, "loss": 0.839, "step": 548 }, { "epoch": 0.08188530091729436, "grad_norm": 2.9709973335266113, "learning_rate": 1.9859777667648326e-05, "loss": 0.8744, "step": 549 }, { "epoch": 0.08203445447087777, "grad_norm": 3.0038466453552246, "learning_rate": 1.985897021299275e-05, "loss": 0.833, "step": 550 }, { "epoch": 0.08218360802446119, "grad_norm": 4.022024154663086, "learning_rate": 1.985816045669736e-05, "loss": 0.7564, "step": 551 }, { "epoch": 0.0823327615780446, "grad_norm": 2.115385055541992, "learning_rate": 1.98573483989512e-05, "loss": 0.9343, "step": 552 }, { "epoch": 0.08248191513162802, "grad_norm": 0.49315759539604187, "learning_rate": 1.985653403994385e-05, "loss": 0.2299, "step": 553 }, { "epoch": 0.08263106868521143, "grad_norm": 2.620171308517456, "learning_rate": 1.9855717379865424e-05, "loss": 0.8794, "step": 554 }, { "epoch": 0.08278022223879485, "grad_norm": 2.4304308891296387, "learning_rate": 1.9854898418906585e-05, "loss": 0.8527, "step": 555 }, { "epoch": 0.08292937579237826, "grad_norm": 0.47070416808128357, "learning_rate": 1.985407715725852e-05, "loss": 0.2041, "step": 556 }, { "epoch": 0.08307852934596167, "grad_norm": 0.5244216322898865, "learning_rate": 1.9853253595112955e-05, "loss": 0.2465, "step": 557 }, { "epoch": 0.08322768289954509, "grad_norm": 2.3281538486480713, "learning_rate": 1.985242773266216e-05, "loss": 0.8309, "step": 558 }, { "epoch": 0.08337683645312849, "grad_norm": 5.023544788360596, "learning_rate": 1.985159957009894e-05, "loss": 0.8093, "step": 559 }, { "epoch": 0.0835259900067119, "grad_norm": 3.0702106952667236, "learning_rate": 1.985076910761663e-05, "loss": 0.8643, "step": 560 }, { "epoch": 0.08367514356029532, "grad_norm": 3.0689008235931396, "learning_rate": 1.9849936345409105e-05, "loss": 0.8305, "step": 561 }, { "epoch": 0.08382429711387873, "grad_norm": 2.6098055839538574, "learning_rate": 1.9849101283670787e-05, "loss": 0.867, "step": 562 }, { "epoch": 0.08397345066746215, "grad_norm": 2.111180543899536, "learning_rate": 1.9848263922596617e-05, "loss": 0.841, "step": 563 }, { "epoch": 0.08412260422104556, "grad_norm": 3.1588821411132812, "learning_rate": 1.9847424262382087e-05, "loss": 0.8127, "step": 564 }, { "epoch": 0.08427175777462897, "grad_norm": 2.263758420944214, "learning_rate": 1.9846582303223224e-05, "loss": 0.9223, "step": 565 }, { "epoch": 0.08442091132821239, "grad_norm": 2.94126296043396, "learning_rate": 1.9845738045316584e-05, "loss": 0.9137, "step": 566 }, { "epoch": 0.0845700648817958, "grad_norm": 4.38880729675293, "learning_rate": 1.9844891488859267e-05, "loss": 0.8018, "step": 567 }, { "epoch": 0.08471921843537922, "grad_norm": 2.097245693206787, "learning_rate": 1.9844042634048905e-05, "loss": 0.8242, "step": 568 }, { "epoch": 0.08486837198896263, "grad_norm": 2.7120893001556396, "learning_rate": 1.984319148108367e-05, "loss": 0.8828, "step": 569 }, { "epoch": 0.08501752554254605, "grad_norm": 3.0494883060455322, "learning_rate": 1.9842338030162273e-05, "loss": 0.8552, "step": 570 }, { "epoch": 0.08516667909612946, "grad_norm": 2.438506603240967, "learning_rate": 1.9841482281483946e-05, "loss": 0.8079, "step": 571 }, { "epoch": 0.08531583264971287, "grad_norm": 2.356688976287842, "learning_rate": 1.984062423524848e-05, "loss": 0.7567, "step": 572 }, { "epoch": 0.08546498620329629, "grad_norm": 2.4131059646606445, "learning_rate": 1.9839763891656186e-05, "loss": 0.9185, "step": 573 }, { "epoch": 0.0856141397568797, "grad_norm": 3.0795633792877197, "learning_rate": 1.9838901250907924e-05, "loss": 0.897, "step": 574 }, { "epoch": 0.08576329331046312, "grad_norm": 3.4153501987457275, "learning_rate": 1.9838036313205073e-05, "loss": 0.833, "step": 575 }, { "epoch": 0.08591244686404653, "grad_norm": 2.364779472351074, "learning_rate": 1.9837169078749567e-05, "loss": 0.8466, "step": 576 }, { "epoch": 0.08606160041762995, "grad_norm": 2.3963913917541504, "learning_rate": 1.9836299547743856e-05, "loss": 0.8452, "step": 577 }, { "epoch": 0.08621075397121336, "grad_norm": 3.9724831581115723, "learning_rate": 1.983542772039095e-05, "loss": 0.8228, "step": 578 }, { "epoch": 0.08635990752479678, "grad_norm": 4.161649227142334, "learning_rate": 1.9834553596894377e-05, "loss": 0.8504, "step": 579 }, { "epoch": 0.08650906107838019, "grad_norm": 2.8053550720214844, "learning_rate": 1.9833677177458207e-05, "loss": 0.8222, "step": 580 }, { "epoch": 0.0866582146319636, "grad_norm": 3.454970359802246, "learning_rate": 1.9832798462287047e-05, "loss": 0.9424, "step": 581 }, { "epoch": 0.08680736818554702, "grad_norm": 4.10425329208374, "learning_rate": 1.9831917451586036e-05, "loss": 0.7427, "step": 582 }, { "epoch": 0.08695652173913043, "grad_norm": 5.213951587677002, "learning_rate": 1.9831034145560854e-05, "loss": 0.8322, "step": 583 }, { "epoch": 0.08710567529271385, "grad_norm": 2.2933835983276367, "learning_rate": 1.983014854441771e-05, "loss": 0.7912, "step": 584 }, { "epoch": 0.08725482884629726, "grad_norm": 3.353792428970337, "learning_rate": 1.9829260648363366e-05, "loss": 0.7549, "step": 585 }, { "epoch": 0.08740398239988068, "grad_norm": 2.1137189865112305, "learning_rate": 1.982837045760509e-05, "loss": 0.8424, "step": 586 }, { "epoch": 0.08755313595346409, "grad_norm": 3.367601156234741, "learning_rate": 1.9827477972350713e-05, "loss": 0.7713, "step": 587 }, { "epoch": 0.0877022895070475, "grad_norm": 2.4120216369628906, "learning_rate": 1.982658319280859e-05, "loss": 0.7816, "step": 588 }, { "epoch": 0.08785144306063092, "grad_norm": 2.6518282890319824, "learning_rate": 1.9825686119187613e-05, "loss": 0.7752, "step": 589 }, { "epoch": 0.08800059661421433, "grad_norm": 2.0931222438812256, "learning_rate": 1.9824786751697206e-05, "loss": 0.8048, "step": 590 }, { "epoch": 0.08814975016779775, "grad_norm": 3.3228282928466797, "learning_rate": 1.9823885090547338e-05, "loss": 0.8002, "step": 591 }, { "epoch": 0.08829890372138116, "grad_norm": 3.243133544921875, "learning_rate": 1.98229811359485e-05, "loss": 0.7373, "step": 592 }, { "epoch": 0.08844805727496458, "grad_norm": 2.242403745651245, "learning_rate": 1.9822074888111738e-05, "loss": 0.8631, "step": 593 }, { "epoch": 0.08859721082854799, "grad_norm": 2.127934217453003, "learning_rate": 1.9821166347248607e-05, "loss": 0.8023, "step": 594 }, { "epoch": 0.0887463643821314, "grad_norm": 1.6103906631469727, "learning_rate": 1.9820255513571222e-05, "loss": 0.8615, "step": 595 }, { "epoch": 0.08889551793571482, "grad_norm": 3.2743375301361084, "learning_rate": 1.981934238729222e-05, "loss": 0.75, "step": 596 }, { "epoch": 0.08904467148929823, "grad_norm": 2.4008524417877197, "learning_rate": 1.9818426968624772e-05, "loss": 0.7275, "step": 597 }, { "epoch": 0.08919382504288165, "grad_norm": 1.998936414718628, "learning_rate": 1.9817509257782595e-05, "loss": 0.8407, "step": 598 }, { "epoch": 0.08934297859646506, "grad_norm": 2.3720529079437256, "learning_rate": 1.9816589254979932e-05, "loss": 0.8693, "step": 599 }, { "epoch": 0.08949213215004848, "grad_norm": 0.5061334371566772, "learning_rate": 1.981566696043156e-05, "loss": 0.2207, "step": 600 }, { "epoch": 0.08964128570363189, "grad_norm": 1.8684086799621582, "learning_rate": 1.98147423743528e-05, "loss": 0.949, "step": 601 }, { "epoch": 0.0897904392572153, "grad_norm": 2.9182987213134766, "learning_rate": 1.98138154969595e-05, "loss": 0.9267, "step": 602 }, { "epoch": 0.08993959281079872, "grad_norm": 2.8200464248657227, "learning_rate": 1.9812886328468047e-05, "loss": 0.8888, "step": 603 }, { "epoch": 0.09008874636438213, "grad_norm": 3.0810766220092773, "learning_rate": 1.981195486909536e-05, "loss": 0.7636, "step": 604 }, { "epoch": 0.09023789991796555, "grad_norm": 2.815110445022583, "learning_rate": 1.981102111905889e-05, "loss": 0.8389, "step": 605 }, { "epoch": 0.09038705347154896, "grad_norm": 2.610570192337036, "learning_rate": 1.981008507857664e-05, "loss": 0.8064, "step": 606 }, { "epoch": 0.09053620702513238, "grad_norm": 2.760401725769043, "learning_rate": 1.9809146747867116e-05, "loss": 0.8605, "step": 607 }, { "epoch": 0.09068536057871579, "grad_norm": 2.642451047897339, "learning_rate": 1.980820612714939e-05, "loss": 0.7986, "step": 608 }, { "epoch": 0.0908345141322992, "grad_norm": 2.717817544937134, "learning_rate": 1.9807263216643053e-05, "loss": 0.8307, "step": 609 }, { "epoch": 0.09098366768588262, "grad_norm": 2.3141913414001465, "learning_rate": 1.9806318016568235e-05, "loss": 0.859, "step": 610 }, { "epoch": 0.09113282123946603, "grad_norm": 2.628553628921509, "learning_rate": 1.98053705271456e-05, "loss": 0.8829, "step": 611 }, { "epoch": 0.09128197479304945, "grad_norm": 2.9807374477386475, "learning_rate": 1.980442074859634e-05, "loss": 0.8395, "step": 612 }, { "epoch": 0.09143112834663286, "grad_norm": 2.2093253135681152, "learning_rate": 1.980346868114219e-05, "loss": 0.7744, "step": 613 }, { "epoch": 0.09158028190021628, "grad_norm": 2.5987279415130615, "learning_rate": 1.9802514325005414e-05, "loss": 0.7559, "step": 614 }, { "epoch": 0.09172943545379969, "grad_norm": 4.043369770050049, "learning_rate": 1.980155768040882e-05, "loss": 0.838, "step": 615 }, { "epoch": 0.0918785890073831, "grad_norm": 2.9829115867614746, "learning_rate": 1.9800598747575734e-05, "loss": 0.723, "step": 616 }, { "epoch": 0.09202774256096652, "grad_norm": 2.1568949222564697, "learning_rate": 1.9799637526730027e-05, "loss": 0.8257, "step": 617 }, { "epoch": 0.09217689611454993, "grad_norm": 3.3093373775482178, "learning_rate": 1.9798674018096106e-05, "loss": 0.8236, "step": 618 }, { "epoch": 0.09232604966813335, "grad_norm": 3.3171072006225586, "learning_rate": 1.9797708221898906e-05, "loss": 0.7013, "step": 619 }, { "epoch": 0.09247520322171676, "grad_norm": 3.0135498046875, "learning_rate": 1.97967401383639e-05, "loss": 0.7744, "step": 620 }, { "epoch": 0.09262435677530018, "grad_norm": 1.7315059900283813, "learning_rate": 1.9795769767717087e-05, "loss": 0.9238, "step": 621 }, { "epoch": 0.09277351032888359, "grad_norm": 3.6466875076293945, "learning_rate": 1.9794797110185015e-05, "loss": 0.7711, "step": 622 }, { "epoch": 0.092922663882467, "grad_norm": 2.2572970390319824, "learning_rate": 1.979382216599475e-05, "loss": 0.8609, "step": 623 }, { "epoch": 0.09307181743605042, "grad_norm": 2.936962366104126, "learning_rate": 1.9792844935373905e-05, "loss": 0.8224, "step": 624 }, { "epoch": 0.09322097098963383, "grad_norm": 2.4368178844451904, "learning_rate": 1.9791865418550618e-05, "loss": 0.7899, "step": 625 }, { "epoch": 0.09337012454321725, "grad_norm": 2.8427374362945557, "learning_rate": 1.979088361575356e-05, "loss": 0.8799, "step": 626 }, { "epoch": 0.09351927809680066, "grad_norm": 1.7725334167480469, "learning_rate": 1.9789899527211943e-05, "loss": 0.8933, "step": 627 }, { "epoch": 0.09366843165038408, "grad_norm": 0.5200384259223938, "learning_rate": 1.9788913153155512e-05, "loss": 0.239, "step": 628 }, { "epoch": 0.09381758520396749, "grad_norm": 2.2049055099487305, "learning_rate": 1.9787924493814533e-05, "loss": 0.764, "step": 629 }, { "epoch": 0.0939667387575509, "grad_norm": 2.6333913803100586, "learning_rate": 1.9786933549419826e-05, "loss": 0.8359, "step": 630 }, { "epoch": 0.09411589231113432, "grad_norm": 2.4399325847625732, "learning_rate": 1.9785940320202726e-05, "loss": 0.8635, "step": 631 }, { "epoch": 0.09426504586471773, "grad_norm": 0.4784151017665863, "learning_rate": 1.978494480639511e-05, "loss": 0.2209, "step": 632 }, { "epoch": 0.09441419941830115, "grad_norm": 2.390080690383911, "learning_rate": 1.9783947008229387e-05, "loss": 0.7568, "step": 633 }, { "epoch": 0.09456335297188455, "grad_norm": 2.1727802753448486, "learning_rate": 1.97829469259385e-05, "loss": 0.8448, "step": 634 }, { "epoch": 0.09471250652546796, "grad_norm": 2.274048328399658, "learning_rate": 1.9781944559755924e-05, "loss": 0.7859, "step": 635 }, { "epoch": 0.09486166007905138, "grad_norm": 2.6616296768188477, "learning_rate": 1.9780939909915666e-05, "loss": 0.9253, "step": 636 }, { "epoch": 0.09501081363263479, "grad_norm": 2.671600580215454, "learning_rate": 1.9779932976652272e-05, "loss": 0.8382, "step": 637 }, { "epoch": 0.0951599671862182, "grad_norm": 1.727188229560852, "learning_rate": 1.977892376020082e-05, "loss": 0.8871, "step": 638 }, { "epoch": 0.09530912073980162, "grad_norm": 2.1968235969543457, "learning_rate": 1.977791226079691e-05, "loss": 0.7484, "step": 639 }, { "epoch": 0.09545827429338503, "grad_norm": 5.013859748840332, "learning_rate": 1.9776898478676684e-05, "loss": 0.8441, "step": 640 }, { "epoch": 0.09560742784696845, "grad_norm": 2.9420268535614014, "learning_rate": 1.9775882414076822e-05, "loss": 0.7857, "step": 641 }, { "epoch": 0.09575658140055186, "grad_norm": 2.3294456005096436, "learning_rate": 1.9774864067234525e-05, "loss": 0.8229, "step": 642 }, { "epoch": 0.09590573495413528, "grad_norm": 2.520538091659546, "learning_rate": 1.9773843438387534e-05, "loss": 0.8123, "step": 643 }, { "epoch": 0.09605488850771869, "grad_norm": 4.033265113830566, "learning_rate": 1.9772820527774127e-05, "loss": 0.7989, "step": 644 }, { "epoch": 0.0962040420613021, "grad_norm": 3.0386438369750977, "learning_rate": 1.9771795335633098e-05, "loss": 0.7575, "step": 645 }, { "epoch": 0.09635319561488552, "grad_norm": 1.8722622394561768, "learning_rate": 1.9770767862203795e-05, "loss": 0.8461, "step": 646 }, { "epoch": 0.09650234916846893, "grad_norm": 3.2173075675964355, "learning_rate": 1.976973810772608e-05, "loss": 0.8235, "step": 647 }, { "epoch": 0.09665150272205235, "grad_norm": 2.3234479427337646, "learning_rate": 1.976870607244036e-05, "loss": 0.8425, "step": 648 }, { "epoch": 0.09680065627563576, "grad_norm": 2.8490476608276367, "learning_rate": 1.9767671756587577e-05, "loss": 0.9058, "step": 649 }, { "epoch": 0.09694980982921918, "grad_norm": 2.6494905948638916, "learning_rate": 1.9766635160409186e-05, "loss": 0.7741, "step": 650 }, { "epoch": 0.09709896338280259, "grad_norm": 2.1425421237945557, "learning_rate": 1.9765596284147192e-05, "loss": 0.7479, "step": 651 }, { "epoch": 0.097248116936386, "grad_norm": 3.4786620140075684, "learning_rate": 1.9764555128044128e-05, "loss": 0.8144, "step": 652 }, { "epoch": 0.09739727048996942, "grad_norm": 2.728360414505005, "learning_rate": 1.9763511692343062e-05, "loss": 0.8268, "step": 653 }, { "epoch": 0.09754642404355283, "grad_norm": 3.3377373218536377, "learning_rate": 1.9762465977287587e-05, "loss": 0.8655, "step": 654 }, { "epoch": 0.09769557759713625, "grad_norm": 17.418981552124023, "learning_rate": 1.976141798312183e-05, "loss": 0.862, "step": 655 }, { "epoch": 0.09784473115071966, "grad_norm": 3.1046903133392334, "learning_rate": 1.976036771009046e-05, "loss": 0.8342, "step": 656 }, { "epoch": 0.09799388470430308, "grad_norm": 2.5028178691864014, "learning_rate": 1.9759315158438658e-05, "loss": 0.8443, "step": 657 }, { "epoch": 0.09814303825788649, "grad_norm": 3.32932710647583, "learning_rate": 1.9758260328412154e-05, "loss": 0.884, "step": 658 }, { "epoch": 0.0982921918114699, "grad_norm": 2.751117467880249, "learning_rate": 1.975720322025721e-05, "loss": 0.8771, "step": 659 }, { "epoch": 0.09844134536505332, "grad_norm": 14.268705368041992, "learning_rate": 1.975614383422061e-05, "loss": 0.867, "step": 660 }, { "epoch": 0.09859049891863673, "grad_norm": 0.5297582149505615, "learning_rate": 1.9755082170549675e-05, "loss": 0.2524, "step": 661 }, { "epoch": 0.09873965247222015, "grad_norm": 8.351545333862305, "learning_rate": 1.9754018229492254e-05, "loss": 0.861, "step": 662 }, { "epoch": 0.09888880602580356, "grad_norm": 16.73049545288086, "learning_rate": 1.975295201129674e-05, "loss": 0.8846, "step": 663 }, { "epoch": 0.09903795957938698, "grad_norm": 3.2050623893737793, "learning_rate": 1.975188351621204e-05, "loss": 0.8791, "step": 664 }, { "epoch": 0.09918711313297039, "grad_norm": 3.0757529735565186, "learning_rate": 1.9750812744487605e-05, "loss": 0.8819, "step": 665 }, { "epoch": 0.0993362666865538, "grad_norm": 4.745767593383789, "learning_rate": 1.974973969637341e-05, "loss": 0.7991, "step": 666 }, { "epoch": 0.09948542024013722, "grad_norm": 3.0686159133911133, "learning_rate": 1.974866437211997e-05, "loss": 0.8773, "step": 667 }, { "epoch": 0.09963457379372063, "grad_norm": 9.068389892578125, "learning_rate": 1.974758677197832e-05, "loss": 0.8697, "step": 668 }, { "epoch": 0.09978372734730405, "grad_norm": 2.776921272277832, "learning_rate": 1.974650689620004e-05, "loss": 0.8577, "step": 669 }, { "epoch": 0.09993288090088746, "grad_norm": 3.5375795364379883, "learning_rate": 1.9745424745037226e-05, "loss": 0.7439, "step": 670 }, { "epoch": 0.10008203445447088, "grad_norm": 4.604068279266357, "learning_rate": 1.974434031874252e-05, "loss": 0.8113, "step": 671 }, { "epoch": 0.10023118800805429, "grad_norm": 3.4834671020507812, "learning_rate": 1.9743253617569085e-05, "loss": 0.7939, "step": 672 }, { "epoch": 0.1003803415616377, "grad_norm": 3.472323179244995, "learning_rate": 1.9742164641770617e-05, "loss": 0.8438, "step": 673 }, { "epoch": 0.10052949511522112, "grad_norm": 3.592740297317505, "learning_rate": 1.974107339160135e-05, "loss": 0.8173, "step": 674 }, { "epoch": 0.10067864866880454, "grad_norm": 3.9779179096221924, "learning_rate": 1.9739979867316035e-05, "loss": 0.7456, "step": 675 }, { "epoch": 0.10082780222238795, "grad_norm": 6.648735046386719, "learning_rate": 1.9738884069169972e-05, "loss": 0.8973, "step": 676 }, { "epoch": 0.10097695577597136, "grad_norm": 4.347858428955078, "learning_rate": 1.9737785997418973e-05, "loss": 0.7872, "step": 677 }, { "epoch": 0.10112610932955478, "grad_norm": 4.3417487144470215, "learning_rate": 1.9736685652319398e-05, "loss": 0.8209, "step": 678 }, { "epoch": 0.10127526288313819, "grad_norm": 8.057451248168945, "learning_rate": 1.973558303412812e-05, "loss": 0.8155, "step": 679 }, { "epoch": 0.1014244164367216, "grad_norm": 8.785006523132324, "learning_rate": 1.973447814310256e-05, "loss": 0.854, "step": 680 }, { "epoch": 0.10157356999030502, "grad_norm": 3.313790798187256, "learning_rate": 1.973337097950066e-05, "loss": 0.8593, "step": 681 }, { "epoch": 0.10172272354388844, "grad_norm": 3.2078914642333984, "learning_rate": 1.9732261543580894e-05, "loss": 0.8621, "step": 682 }, { "epoch": 0.10187187709747185, "grad_norm": 4.27331018447876, "learning_rate": 1.973114983560227e-05, "loss": 0.8265, "step": 683 }, { "epoch": 0.10202103065105526, "grad_norm": 2.083495616912842, "learning_rate": 1.9730035855824317e-05, "loss": 0.9087, "step": 684 }, { "epoch": 0.10217018420463868, "grad_norm": 3.4303486347198486, "learning_rate": 1.9728919604507105e-05, "loss": 0.8847, "step": 685 }, { "epoch": 0.10231933775822209, "grad_norm": 5.821327209472656, "learning_rate": 1.9727801081911227e-05, "loss": 0.8418, "step": 686 }, { "epoch": 0.1024684913118055, "grad_norm": 1.7876102924346924, "learning_rate": 1.9726680288297815e-05, "loss": 0.8567, "step": 687 }, { "epoch": 0.10261764486538892, "grad_norm": 2.350482225418091, "learning_rate": 1.972555722392852e-05, "loss": 0.8969, "step": 688 }, { "epoch": 0.10276679841897234, "grad_norm": 2.362520217895508, "learning_rate": 1.972443188906553e-05, "loss": 0.8013, "step": 689 }, { "epoch": 0.10291595197255575, "grad_norm": 2.4565207958221436, "learning_rate": 1.9723304283971566e-05, "loss": 0.7969, "step": 690 }, { "epoch": 0.10306510552613916, "grad_norm": 2.1277353763580322, "learning_rate": 1.9722174408909866e-05, "loss": 0.8391, "step": 691 }, { "epoch": 0.10321425907972258, "grad_norm": 2.923170804977417, "learning_rate": 1.9721042264144214e-05, "loss": 0.8683, "step": 692 }, { "epoch": 0.10336341263330599, "grad_norm": 2.162055492401123, "learning_rate": 1.971990784993891e-05, "loss": 0.798, "step": 693 }, { "epoch": 0.10351256618688941, "grad_norm": 4.709125518798828, "learning_rate": 1.9718771166558796e-05, "loss": 0.846, "step": 694 }, { "epoch": 0.10366171974047282, "grad_norm": 2.7277090549468994, "learning_rate": 1.971763221426924e-05, "loss": 0.7865, "step": 695 }, { "epoch": 0.10381087329405624, "grad_norm": 4.088252544403076, "learning_rate": 1.971649099333613e-05, "loss": 0.8477, "step": 696 }, { "epoch": 0.10396002684763965, "grad_norm": 2.9493038654327393, "learning_rate": 1.971534750402589e-05, "loss": 0.8248, "step": 697 }, { "epoch": 0.10410918040122306, "grad_norm": 2.507768392562866, "learning_rate": 1.971420174660549e-05, "loss": 0.8571, "step": 698 }, { "epoch": 0.10425833395480648, "grad_norm": 0.5354949831962585, "learning_rate": 1.97130537213424e-05, "loss": 0.2125, "step": 699 }, { "epoch": 0.10440748750838989, "grad_norm": 3.37912654876709, "learning_rate": 1.971190342850464e-05, "loss": 0.808, "step": 700 }, { "epoch": 0.10455664106197331, "grad_norm": 2.1126956939697266, "learning_rate": 1.971075086836075e-05, "loss": 0.8655, "step": 701 }, { "epoch": 0.10470579461555672, "grad_norm": 3.445605993270874, "learning_rate": 1.9709596041179802e-05, "loss": 0.8475, "step": 702 }, { "epoch": 0.10485494816914014, "grad_norm": 3.220357656478882, "learning_rate": 1.9708438947231402e-05, "loss": 0.8139, "step": 703 }, { "epoch": 0.10500410172272355, "grad_norm": 4.121255874633789, "learning_rate": 1.970727958678568e-05, "loss": 0.8273, "step": 704 }, { "epoch": 0.10515325527630696, "grad_norm": 1.8336756229400635, "learning_rate": 1.970611796011329e-05, "loss": 0.8796, "step": 705 }, { "epoch": 0.10530240882989038, "grad_norm": 2.7519049644470215, "learning_rate": 1.9704954067485432e-05, "loss": 0.8326, "step": 706 }, { "epoch": 0.1054515623834738, "grad_norm": 2.917940378189087, "learning_rate": 1.9703787909173816e-05, "loss": 0.8229, "step": 707 }, { "epoch": 0.1056007159370572, "grad_norm": 3.2092463970184326, "learning_rate": 1.970261948545069e-05, "loss": 0.8821, "step": 708 }, { "epoch": 0.10574986949064061, "grad_norm": 2.541522264480591, "learning_rate": 1.9701448796588837e-05, "loss": 0.7785, "step": 709 }, { "epoch": 0.10589902304422402, "grad_norm": 2.0192558765411377, "learning_rate": 1.970027584286155e-05, "loss": 0.8761, "step": 710 }, { "epoch": 0.10604817659780744, "grad_norm": 3.4000191688537598, "learning_rate": 1.9699100624542673e-05, "loss": 0.769, "step": 711 }, { "epoch": 0.10619733015139085, "grad_norm": 2.880735397338867, "learning_rate": 1.9697923141906563e-05, "loss": 0.7863, "step": 712 }, { "epoch": 0.10634648370497427, "grad_norm": 3.7799413204193115, "learning_rate": 1.9696743395228113e-05, "loss": 0.769, "step": 713 }, { "epoch": 0.10649563725855768, "grad_norm": 2.9498369693756104, "learning_rate": 1.9695561384782743e-05, "loss": 0.8212, "step": 714 }, { "epoch": 0.1066447908121411, "grad_norm": 5.377349853515625, "learning_rate": 1.9694377110846393e-05, "loss": 0.8092, "step": 715 }, { "epoch": 0.10679394436572451, "grad_norm": 4.170783519744873, "learning_rate": 1.969319057369555e-05, "loss": 0.7468, "step": 716 }, { "epoch": 0.10694309791930792, "grad_norm": 3.145975351333618, "learning_rate": 1.9692001773607215e-05, "loss": 0.8219, "step": 717 }, { "epoch": 0.10709225147289134, "grad_norm": 2.054783821105957, "learning_rate": 1.969081071085892e-05, "loss": 0.7738, "step": 718 }, { "epoch": 0.10724140502647475, "grad_norm": 6.688480854034424, "learning_rate": 1.9689617385728726e-05, "loss": 0.7944, "step": 719 }, { "epoch": 0.10739055858005817, "grad_norm": 1.9312061071395874, "learning_rate": 1.9688421798495225e-05, "loss": 0.7911, "step": 720 }, { "epoch": 0.10753971213364158, "grad_norm": 3.081460952758789, "learning_rate": 1.968722394943753e-05, "loss": 0.7701, "step": 721 }, { "epoch": 0.107688865687225, "grad_norm": 1.9278032779693604, "learning_rate": 1.9686023838835292e-05, "loss": 0.8842, "step": 722 }, { "epoch": 0.10783801924080841, "grad_norm": 3.834873676300049, "learning_rate": 1.968482146696868e-05, "loss": 0.8554, "step": 723 }, { "epoch": 0.10798717279439182, "grad_norm": 2.850107431411743, "learning_rate": 1.9683616834118398e-05, "loss": 0.8135, "step": 724 }, { "epoch": 0.10813632634797524, "grad_norm": 3.121727705001831, "learning_rate": 1.968240994056567e-05, "loss": 0.7393, "step": 725 }, { "epoch": 0.10828547990155865, "grad_norm": 2.9758198261260986, "learning_rate": 1.9681200786592265e-05, "loss": 0.7862, "step": 726 }, { "epoch": 0.10843463345514207, "grad_norm": 2.3511736392974854, "learning_rate": 1.9679989372480456e-05, "loss": 0.6867, "step": 727 }, { "epoch": 0.10858378700872548, "grad_norm": 2.3530468940734863, "learning_rate": 1.967877569851306e-05, "loss": 0.8456, "step": 728 }, { "epoch": 0.1087329405623089, "grad_norm": 1.944705843925476, "learning_rate": 1.9677559764973416e-05, "loss": 0.7808, "step": 729 }, { "epoch": 0.10888209411589231, "grad_norm": 2.1758229732513428, "learning_rate": 1.967634157214539e-05, "loss": 0.7786, "step": 730 }, { "epoch": 0.10903124766947572, "grad_norm": 2.0649068355560303, "learning_rate": 1.967512112031338e-05, "loss": 0.7831, "step": 731 }, { "epoch": 0.10918040122305914, "grad_norm": 2.053643226623535, "learning_rate": 1.9673898409762315e-05, "loss": 0.8152, "step": 732 }, { "epoch": 0.10932955477664255, "grad_norm": 2.2040350437164307, "learning_rate": 1.9672673440777628e-05, "loss": 0.8429, "step": 733 }, { "epoch": 0.10947870833022597, "grad_norm": 2.8127799034118652, "learning_rate": 1.9671446213645306e-05, "loss": 0.7192, "step": 734 }, { "epoch": 0.10962786188380938, "grad_norm": 3.011967182159424, "learning_rate": 1.9670216728651854e-05, "loss": 0.8694, "step": 735 }, { "epoch": 0.1097770154373928, "grad_norm": 2.266118049621582, "learning_rate": 1.96689849860843e-05, "loss": 0.7627, "step": 736 }, { "epoch": 0.10992616899097621, "grad_norm": 3.4457147121429443, "learning_rate": 1.9667750986230203e-05, "loss": 0.8016, "step": 737 }, { "epoch": 0.11007532254455962, "grad_norm": 3.0277299880981445, "learning_rate": 1.966651472937765e-05, "loss": 0.7833, "step": 738 }, { "epoch": 0.11022447609814304, "grad_norm": 2.8349709510803223, "learning_rate": 1.9665276215815247e-05, "loss": 0.7786, "step": 739 }, { "epoch": 0.11037362965172645, "grad_norm": 2.264285087585449, "learning_rate": 1.966403544583214e-05, "loss": 0.8238, "step": 740 }, { "epoch": 0.11052278320530987, "grad_norm": 2.0733835697174072, "learning_rate": 1.966279241971799e-05, "loss": 0.8521, "step": 741 }, { "epoch": 0.11067193675889328, "grad_norm": 2.30551815032959, "learning_rate": 1.9661547137762994e-05, "loss": 0.8568, "step": 742 }, { "epoch": 0.1108210903124767, "grad_norm": 2.0153725147247314, "learning_rate": 1.966029960025787e-05, "loss": 0.9023, "step": 743 }, { "epoch": 0.11097024386606011, "grad_norm": 0.569275438785553, "learning_rate": 1.965904980749386e-05, "loss": 0.2242, "step": 744 }, { "epoch": 0.11111939741964352, "grad_norm": 1.8617186546325684, "learning_rate": 1.9657797759762735e-05, "loss": 0.8164, "step": 745 }, { "epoch": 0.11126855097322694, "grad_norm": 3.2866101264953613, "learning_rate": 1.96565434573568e-05, "loss": 0.8626, "step": 746 }, { "epoch": 0.11141770452681035, "grad_norm": 1.5502421855926514, "learning_rate": 1.965528690056888e-05, "loss": 0.8825, "step": 747 }, { "epoch": 0.11156685808039377, "grad_norm": 2.0817325115203857, "learning_rate": 1.9654028089692317e-05, "loss": 0.8133, "step": 748 }, { "epoch": 0.11171601163397718, "grad_norm": 1.9226375818252563, "learning_rate": 1.9652767025020997e-05, "loss": 0.7402, "step": 749 }, { "epoch": 0.1118651651875606, "grad_norm": 3.241757392883301, "learning_rate": 1.965150370684932e-05, "loss": 0.8259, "step": 750 }, { "epoch": 0.11201431874114401, "grad_norm": 2.0300674438476562, "learning_rate": 1.965023813547222e-05, "loss": 0.8041, "step": 751 }, { "epoch": 0.11216347229472742, "grad_norm": 2.1764445304870605, "learning_rate": 1.964897031118515e-05, "loss": 0.7774, "step": 752 }, { "epoch": 0.11231262584831084, "grad_norm": 4.360199928283691, "learning_rate": 1.9647700234284087e-05, "loss": 0.7839, "step": 753 }, { "epoch": 0.11246177940189425, "grad_norm": 2.14062237739563, "learning_rate": 1.9646427905065545e-05, "loss": 0.7627, "step": 754 }, { "epoch": 0.11261093295547767, "grad_norm": 2.235795021057129, "learning_rate": 1.9645153323826558e-05, "loss": 0.7693, "step": 755 }, { "epoch": 0.11276008650906108, "grad_norm": 4.320451259613037, "learning_rate": 1.9643876490864678e-05, "loss": 0.8664, "step": 756 }, { "epoch": 0.1129092400626445, "grad_norm": 2.6023197174072266, "learning_rate": 1.9642597406478e-05, "loss": 0.864, "step": 757 }, { "epoch": 0.11305839361622791, "grad_norm": 2.091099262237549, "learning_rate": 1.9641316070965123e-05, "loss": 0.8781, "step": 758 }, { "epoch": 0.11320754716981132, "grad_norm": 0.5808754563331604, "learning_rate": 1.964003248462519e-05, "loss": 0.2266, "step": 759 }, { "epoch": 0.11335670072339474, "grad_norm": 2.0434632301330566, "learning_rate": 1.963874664775786e-05, "loss": 0.8389, "step": 760 }, { "epoch": 0.11350585427697815, "grad_norm": 1.7886003255844116, "learning_rate": 1.9637458560663323e-05, "loss": 0.8024, "step": 761 }, { "epoch": 0.11365500783056157, "grad_norm": 2.195028305053711, "learning_rate": 1.9636168223642288e-05, "loss": 0.9055, "step": 762 }, { "epoch": 0.11380416138414498, "grad_norm": 2.613978147506714, "learning_rate": 1.9634875636996e-05, "loss": 0.8286, "step": 763 }, { "epoch": 0.1139533149377284, "grad_norm": 0.48986101150512695, "learning_rate": 1.9633580801026207e-05, "loss": 0.2297, "step": 764 }, { "epoch": 0.11410246849131181, "grad_norm": 2.7488393783569336, "learning_rate": 1.963228371603521e-05, "loss": 0.8372, "step": 765 }, { "epoch": 0.11425162204489522, "grad_norm": 5.439416408538818, "learning_rate": 1.9630984382325816e-05, "loss": 0.8164, "step": 766 }, { "epoch": 0.11440077559847864, "grad_norm": 2.11270809173584, "learning_rate": 1.9629682800201363e-05, "loss": 0.7736, "step": 767 }, { "epoch": 0.11454992915206205, "grad_norm": 2.2442221641540527, "learning_rate": 1.9628378969965712e-05, "loss": 0.8269, "step": 768 }, { "epoch": 0.11469908270564547, "grad_norm": 2.1655447483062744, "learning_rate": 1.9627072891923258e-05, "loss": 0.736, "step": 769 }, { "epoch": 0.11484823625922888, "grad_norm": 4.650902271270752, "learning_rate": 1.9625764566378903e-05, "loss": 0.7592, "step": 770 }, { "epoch": 0.1149973898128123, "grad_norm": 2.7398149967193604, "learning_rate": 1.9624453993638094e-05, "loss": 0.7408, "step": 771 }, { "epoch": 0.11514654336639571, "grad_norm": 2.503850221633911, "learning_rate": 1.9623141174006785e-05, "loss": 0.8644, "step": 772 }, { "epoch": 0.11529569691997912, "grad_norm": 2.927370548248291, "learning_rate": 1.9621826107791465e-05, "loss": 0.8175, "step": 773 }, { "epoch": 0.11544485047356254, "grad_norm": 2.444594621658325, "learning_rate": 1.9620508795299148e-05, "loss": 0.8095, "step": 774 }, { "epoch": 0.11559400402714595, "grad_norm": 1.8413902521133423, "learning_rate": 1.961918923683736e-05, "loss": 0.8293, "step": 775 }, { "epoch": 0.11574315758072937, "grad_norm": 2.780571937561035, "learning_rate": 1.961786743271417e-05, "loss": 0.7721, "step": 776 }, { "epoch": 0.11589231113431278, "grad_norm": 1.992701530456543, "learning_rate": 1.9616543383238158e-05, "loss": 0.8464, "step": 777 }, { "epoch": 0.1160414646878962, "grad_norm": 0.5350305438041687, "learning_rate": 1.961521708871843e-05, "loss": 0.1963, "step": 778 }, { "epoch": 0.11619061824147961, "grad_norm": 3.6759605407714844, "learning_rate": 1.961388854946462e-05, "loss": 0.7584, "step": 779 }, { "epoch": 0.11633977179506302, "grad_norm": 14.82889175415039, "learning_rate": 1.9612557765786884e-05, "loss": 0.8654, "step": 780 }, { "epoch": 0.11648892534864644, "grad_norm": 2.515516996383667, "learning_rate": 1.96112247379959e-05, "loss": 0.8211, "step": 781 }, { "epoch": 0.11663807890222985, "grad_norm": 4.004816055297852, "learning_rate": 1.9609889466402877e-05, "loss": 0.7637, "step": 782 }, { "epoch": 0.11678723245581325, "grad_norm": 4.307981967926025, "learning_rate": 1.9608551951319535e-05, "loss": 0.8025, "step": 783 }, { "epoch": 0.11693638600939667, "grad_norm": 6.535302639007568, "learning_rate": 1.960721219305813e-05, "loss": 0.8057, "step": 784 }, { "epoch": 0.11708553956298008, "grad_norm": 2.929621934890747, "learning_rate": 1.960587019193144e-05, "loss": 0.8009, "step": 785 }, { "epoch": 0.1172346931165635, "grad_norm": 2.4500203132629395, "learning_rate": 1.9604525948252758e-05, "loss": 0.8325, "step": 786 }, { "epoch": 0.11738384667014691, "grad_norm": 12.971890449523926, "learning_rate": 1.9603179462335907e-05, "loss": 0.808, "step": 787 }, { "epoch": 0.11753300022373032, "grad_norm": 3.569223403930664, "learning_rate": 1.9601830734495236e-05, "loss": 0.846, "step": 788 }, { "epoch": 0.11768215377731374, "grad_norm": 2.770012855529785, "learning_rate": 1.9600479765045615e-05, "loss": 0.8955, "step": 789 }, { "epoch": 0.11783130733089715, "grad_norm": 3.6560609340667725, "learning_rate": 1.959912655430243e-05, "loss": 0.7799, "step": 790 }, { "epoch": 0.11798046088448057, "grad_norm": 3.0810372829437256, "learning_rate": 1.9597771102581607e-05, "loss": 0.7989, "step": 791 }, { "epoch": 0.11812961443806398, "grad_norm": 0.5375723242759705, "learning_rate": 1.9596413410199574e-05, "loss": 0.1983, "step": 792 }, { "epoch": 0.1182787679916474, "grad_norm": 6.167840480804443, "learning_rate": 1.9595053477473302e-05, "loss": 0.7611, "step": 793 }, { "epoch": 0.11842792154523081, "grad_norm": 0.48420751094818115, "learning_rate": 1.959369130472027e-05, "loss": 0.23, "step": 794 }, { "epoch": 0.11857707509881422, "grad_norm": 3.0134971141815186, "learning_rate": 1.9592326892258486e-05, "loss": 0.763, "step": 795 }, { "epoch": 0.11872622865239764, "grad_norm": 4.185347080230713, "learning_rate": 1.9590960240406483e-05, "loss": 0.8454, "step": 796 }, { "epoch": 0.11887538220598105, "grad_norm": 4.1316237449646, "learning_rate": 1.9589591349483316e-05, "loss": 0.7671, "step": 797 }, { "epoch": 0.11902453575956447, "grad_norm": 7.2242960929870605, "learning_rate": 1.9588220219808554e-05, "loss": 0.7911, "step": 798 }, { "epoch": 0.11917368931314788, "grad_norm": 2.138535499572754, "learning_rate": 1.9586846851702307e-05, "loss": 0.8191, "step": 799 }, { "epoch": 0.1193228428667313, "grad_norm": 3.153193235397339, "learning_rate": 1.9585471245485193e-05, "loss": 0.8307, "step": 800 }, { "epoch": 0.11947199642031471, "grad_norm": 3.713001012802124, "learning_rate": 1.958409340147835e-05, "loss": 0.7137, "step": 801 }, { "epoch": 0.11962114997389812, "grad_norm": 4.376029014587402, "learning_rate": 1.9582713320003454e-05, "loss": 0.7934, "step": 802 }, { "epoch": 0.11977030352748154, "grad_norm": 2.770272970199585, "learning_rate": 1.9581331001382683e-05, "loss": 0.9067, "step": 803 }, { "epoch": 0.11991945708106495, "grad_norm": 2.5854084491729736, "learning_rate": 1.9579946445938755e-05, "loss": 0.8655, "step": 804 }, { "epoch": 0.12006861063464837, "grad_norm": 4.009220600128174, "learning_rate": 1.9578559653994905e-05, "loss": 0.7649, "step": 805 }, { "epoch": 0.12021776418823178, "grad_norm": 5.168023109436035, "learning_rate": 1.9577170625874885e-05, "loss": 0.8276, "step": 806 }, { "epoch": 0.1203669177418152, "grad_norm": 3.122032642364502, "learning_rate": 1.957577936190297e-05, "loss": 0.8298, "step": 807 }, { "epoch": 0.12051607129539861, "grad_norm": 3.045417547225952, "learning_rate": 1.9574385862403965e-05, "loss": 0.8891, "step": 808 }, { "epoch": 0.12066522484898203, "grad_norm": 3.713606119155884, "learning_rate": 1.957299012770319e-05, "loss": 0.8249, "step": 809 }, { "epoch": 0.12081437840256544, "grad_norm": 2.5008583068847656, "learning_rate": 1.9571592158126488e-05, "loss": 0.7499, "step": 810 }, { "epoch": 0.12096353195614885, "grad_norm": 3.0870532989501953, "learning_rate": 1.9570191954000225e-05, "loss": 0.8457, "step": 811 }, { "epoch": 0.12111268550973227, "grad_norm": 2.8016438484191895, "learning_rate": 1.956878951565128e-05, "loss": 0.7341, "step": 812 }, { "epoch": 0.12126183906331568, "grad_norm": 3.090841770172119, "learning_rate": 1.9567384843407068e-05, "loss": 0.8966, "step": 813 }, { "epoch": 0.1214109926168991, "grad_norm": 3.0694310665130615, "learning_rate": 1.9565977937595524e-05, "loss": 0.8237, "step": 814 }, { "epoch": 0.12156014617048251, "grad_norm": 3.9311916828155518, "learning_rate": 1.9564568798545086e-05, "loss": 0.7786, "step": 815 }, { "epoch": 0.12170929972406593, "grad_norm": 4.867962837219238, "learning_rate": 1.9563157426584737e-05, "loss": 0.842, "step": 816 }, { "epoch": 0.12185845327764934, "grad_norm": 2.289533853530884, "learning_rate": 1.9561743822043968e-05, "loss": 0.8413, "step": 817 }, { "epoch": 0.12200760683123275, "grad_norm": 2.560544490814209, "learning_rate": 1.9560327985252794e-05, "loss": 0.8006, "step": 818 }, { "epoch": 0.12215676038481617, "grad_norm": 0.6468976736068726, "learning_rate": 1.9558909916541746e-05, "loss": 0.2717, "step": 819 }, { "epoch": 0.12230591393839958, "grad_norm": 2.600376844406128, "learning_rate": 1.955748961624189e-05, "loss": 0.8894, "step": 820 }, { "epoch": 0.122455067491983, "grad_norm": 2.1999895572662354, "learning_rate": 1.95560670846848e-05, "loss": 0.7713, "step": 821 }, { "epoch": 0.12260422104556641, "grad_norm": 3.70650577545166, "learning_rate": 1.9554642322202574e-05, "loss": 0.7623, "step": 822 }, { "epoch": 0.12275337459914983, "grad_norm": 2.086709976196289, "learning_rate": 1.9553215329127834e-05, "loss": 0.829, "step": 823 }, { "epoch": 0.12290252815273324, "grad_norm": 2.406160831451416, "learning_rate": 1.955178610579372e-05, "loss": 0.7694, "step": 824 }, { "epoch": 0.12305168170631665, "grad_norm": 3.3508474826812744, "learning_rate": 1.955035465253389e-05, "loss": 0.7582, "step": 825 }, { "epoch": 0.12320083525990007, "grad_norm": 2.540121078491211, "learning_rate": 1.9548920969682535e-05, "loss": 0.8169, "step": 826 }, { "epoch": 0.12334998881348348, "grad_norm": 2.766216516494751, "learning_rate": 1.954748505757435e-05, "loss": 0.8504, "step": 827 }, { "epoch": 0.1234991423670669, "grad_norm": 2.494569778442383, "learning_rate": 1.9546046916544555e-05, "loss": 0.7648, "step": 828 }, { "epoch": 0.12364829592065031, "grad_norm": 0.6137761473655701, "learning_rate": 1.95446065469289e-05, "loss": 0.2469, "step": 829 }, { "epoch": 0.12379744947423373, "grad_norm": 2.474820375442505, "learning_rate": 1.9543163949063648e-05, "loss": 0.7576, "step": 830 }, { "epoch": 0.12394660302781714, "grad_norm": 1.9412078857421875, "learning_rate": 1.954171912328558e-05, "loss": 0.8597, "step": 831 }, { "epoch": 0.12409575658140055, "grad_norm": 2.5320849418640137, "learning_rate": 1.9540272069932e-05, "loss": 0.7957, "step": 832 }, { "epoch": 0.12424491013498397, "grad_norm": 4.432990550994873, "learning_rate": 1.9538822789340734e-05, "loss": 0.7468, "step": 833 }, { "epoch": 0.12439406368856738, "grad_norm": 3.5096611976623535, "learning_rate": 1.9537371281850123e-05, "loss": 0.828, "step": 834 }, { "epoch": 0.1245432172421508, "grad_norm": 2.2274680137634277, "learning_rate": 1.9535917547799036e-05, "loss": 0.9012, "step": 835 }, { "epoch": 0.12469237079573421, "grad_norm": 1.9351985454559326, "learning_rate": 1.9534461587526847e-05, "loss": 0.8476, "step": 836 }, { "epoch": 0.12484152434931763, "grad_norm": 4.721550464630127, "learning_rate": 1.953300340137347e-05, "loss": 0.8126, "step": 837 }, { "epoch": 0.12499067790290104, "grad_norm": 3.647736072540283, "learning_rate": 1.953154298967932e-05, "loss": 0.8261, "step": 838 }, { "epoch": 0.12513983145648444, "grad_norm": 3.8090248107910156, "learning_rate": 1.9530080352785343e-05, "loss": 0.7765, "step": 839 }, { "epoch": 0.12528898501006785, "grad_norm": 2.8093042373657227, "learning_rate": 1.9528615491033e-05, "loss": 0.9115, "step": 840 }, { "epoch": 0.12543813856365127, "grad_norm": 3.721938133239746, "learning_rate": 1.9527148404764275e-05, "loss": 0.7962, "step": 841 }, { "epoch": 0.12558729211723468, "grad_norm": 1.9400070905685425, "learning_rate": 1.9525679094321667e-05, "loss": 0.7833, "step": 842 }, { "epoch": 0.1257364456708181, "grad_norm": 2.4663374423980713, "learning_rate": 1.952420756004819e-05, "loss": 0.8361, "step": 843 }, { "epoch": 0.1258855992244015, "grad_norm": 2.2369534969329834, "learning_rate": 1.9522733802287394e-05, "loss": 0.7811, "step": 844 }, { "epoch": 0.12603475277798493, "grad_norm": 2.228618621826172, "learning_rate": 1.952125782138333e-05, "loss": 0.7093, "step": 845 }, { "epoch": 0.12618390633156834, "grad_norm": 3.7146995067596436, "learning_rate": 1.9519779617680577e-05, "loss": 0.7961, "step": 846 }, { "epoch": 0.12633305988515175, "grad_norm": 2.1151487827301025, "learning_rate": 1.9518299191524232e-05, "loss": 0.8985, "step": 847 }, { "epoch": 0.12648221343873517, "grad_norm": 2.87115478515625, "learning_rate": 1.9516816543259908e-05, "loss": 0.7827, "step": 848 }, { "epoch": 0.12663136699231858, "grad_norm": 3.4442150592803955, "learning_rate": 1.951533167323374e-05, "loss": 0.7749, "step": 849 }, { "epoch": 0.126780520545902, "grad_norm": 3.2826383113861084, "learning_rate": 1.951384458179238e-05, "loss": 0.8224, "step": 850 }, { "epoch": 0.1269296740994854, "grad_norm": 1.7199790477752686, "learning_rate": 1.9512355269283e-05, "loss": 0.8259, "step": 851 }, { "epoch": 0.12707882765306883, "grad_norm": 2.1554365158081055, "learning_rate": 1.9510863736053286e-05, "loss": 0.7418, "step": 852 }, { "epoch": 0.12722798120665224, "grad_norm": 3.3711960315704346, "learning_rate": 1.950936998245145e-05, "loss": 0.8683, "step": 853 }, { "epoch": 0.12737713476023566, "grad_norm": 1.9226443767547607, "learning_rate": 1.950787400882622e-05, "loss": 0.7995, "step": 854 }, { "epoch": 0.12752628831381907, "grad_norm": 1.7994880676269531, "learning_rate": 1.9506375815526833e-05, "loss": 0.814, "step": 855 }, { "epoch": 0.12767544186740248, "grad_norm": 7.079690933227539, "learning_rate": 1.950487540290306e-05, "loss": 0.8201, "step": 856 }, { "epoch": 0.1278245954209859, "grad_norm": 2.663043975830078, "learning_rate": 1.950337277130518e-05, "loss": 0.7958, "step": 857 }, { "epoch": 0.1279737489745693, "grad_norm": 3.5950210094451904, "learning_rate": 1.950186792108399e-05, "loss": 0.7941, "step": 858 }, { "epoch": 0.12812290252815273, "grad_norm": 1.9994341135025024, "learning_rate": 1.9500360852590806e-05, "loss": 0.7675, "step": 859 }, { "epoch": 0.12827205608173614, "grad_norm": 2.0373117923736572, "learning_rate": 1.9498851566177462e-05, "loss": 0.7461, "step": 860 }, { "epoch": 0.12842120963531956, "grad_norm": 2.514549970626831, "learning_rate": 1.9497340062196318e-05, "loss": 0.8217, "step": 861 }, { "epoch": 0.12857036318890297, "grad_norm": 0.6794589757919312, "learning_rate": 1.9495826341000237e-05, "loss": 0.2656, "step": 862 }, { "epoch": 0.12871951674248638, "grad_norm": 2.3097193241119385, "learning_rate": 1.9494310402942607e-05, "loss": 0.7986, "step": 863 }, { "epoch": 0.1288686702960698, "grad_norm": 1.9166615009307861, "learning_rate": 1.9492792248377337e-05, "loss": 0.7333, "step": 864 }, { "epoch": 0.1290178238496532, "grad_norm": 4.277560234069824, "learning_rate": 1.949127187765885e-05, "loss": 0.8469, "step": 865 }, { "epoch": 0.12916697740323663, "grad_norm": 2.146709680557251, "learning_rate": 1.948974929114208e-05, "loss": 0.8593, "step": 866 }, { "epoch": 0.12931613095682004, "grad_norm": 2.4419126510620117, "learning_rate": 1.9488224489182496e-05, "loss": 0.8183, "step": 867 }, { "epoch": 0.12946528451040346, "grad_norm": 1.8890548944473267, "learning_rate": 1.9486697472136063e-05, "loss": 0.7985, "step": 868 }, { "epoch": 0.12961443806398687, "grad_norm": 2.4111168384552, "learning_rate": 1.9485168240359277e-05, "loss": 0.808, "step": 869 }, { "epoch": 0.12976359161757028, "grad_norm": 1.570863127708435, "learning_rate": 1.9483636794209143e-05, "loss": 0.7729, "step": 870 }, { "epoch": 0.1299127451711537, "grad_norm": 0.5604217648506165, "learning_rate": 1.9482103134043194e-05, "loss": 0.242, "step": 871 }, { "epoch": 0.1300618987247371, "grad_norm": 1.994592547416687, "learning_rate": 1.9480567260219466e-05, "loss": 0.8027, "step": 872 }, { "epoch": 0.13021105227832053, "grad_norm": 3.058091402053833, "learning_rate": 1.9479029173096523e-05, "loss": 0.8484, "step": 873 }, { "epoch": 0.13036020583190394, "grad_norm": 1.7561966180801392, "learning_rate": 1.9477488873033435e-05, "loss": 0.8458, "step": 874 }, { "epoch": 0.13050935938548736, "grad_norm": 2.418508291244507, "learning_rate": 1.947594636038981e-05, "loss": 0.7195, "step": 875 }, { "epoch": 0.13065851293907077, "grad_norm": 2.2082929611206055, "learning_rate": 1.9474401635525738e-05, "loss": 0.7758, "step": 876 }, { "epoch": 0.13080766649265418, "grad_norm": 3.0233242511749268, "learning_rate": 1.9472854698801855e-05, "loss": 0.7945, "step": 877 }, { "epoch": 0.1309568200462376, "grad_norm": 1.8777096271514893, "learning_rate": 1.9471305550579305e-05, "loss": 0.6969, "step": 878 }, { "epoch": 0.131105973599821, "grad_norm": 0.5042579174041748, "learning_rate": 1.9469754191219743e-05, "loss": 0.2451, "step": 879 }, { "epoch": 0.13125512715340443, "grad_norm": 3.256403684616089, "learning_rate": 1.946820062108534e-05, "loss": 0.7329, "step": 880 }, { "epoch": 0.13140428070698784, "grad_norm": 3.0074100494384766, "learning_rate": 1.94666448405388e-05, "loss": 0.7447, "step": 881 }, { "epoch": 0.13155343426057126, "grad_norm": 2.797347068786621, "learning_rate": 1.9465086849943318e-05, "loss": 0.8267, "step": 882 }, { "epoch": 0.13170258781415467, "grad_norm": 1.9921234846115112, "learning_rate": 1.9463526649662617e-05, "loss": 0.735, "step": 883 }, { "epoch": 0.13185174136773808, "grad_norm": 3.3139894008636475, "learning_rate": 1.9461964240060944e-05, "loss": 0.7083, "step": 884 }, { "epoch": 0.1320008949213215, "grad_norm": 3.5850181579589844, "learning_rate": 1.9460399621503047e-05, "loss": 0.7705, "step": 885 }, { "epoch": 0.1321500484749049, "grad_norm": 4.325164318084717, "learning_rate": 1.9458832794354198e-05, "loss": 0.8753, "step": 886 }, { "epoch": 0.13229920202848833, "grad_norm": 3.2552335262298584, "learning_rate": 1.9457263758980182e-05, "loss": 0.8241, "step": 887 }, { "epoch": 0.13244835558207174, "grad_norm": 3.0180230140686035, "learning_rate": 1.9455692515747298e-05, "loss": 0.7358, "step": 888 }, { "epoch": 0.13259750913565516, "grad_norm": 2.7907986640930176, "learning_rate": 1.945411906502237e-05, "loss": 0.8202, "step": 889 }, { "epoch": 0.13274666268923857, "grad_norm": 4.328266620635986, "learning_rate": 1.9452543407172727e-05, "loss": 0.8636, "step": 890 }, { "epoch": 0.13289581624282198, "grad_norm": 3.1492271423339844, "learning_rate": 1.9450965542566217e-05, "loss": 0.8624, "step": 891 }, { "epoch": 0.1330449697964054, "grad_norm": 2.3014025688171387, "learning_rate": 1.9449385471571197e-05, "loss": 0.8115, "step": 892 }, { "epoch": 0.1331941233499888, "grad_norm": 7.07358980178833, "learning_rate": 1.9447803194556548e-05, "loss": 0.7704, "step": 893 }, { "epoch": 0.13334327690357223, "grad_norm": 1.797253966331482, "learning_rate": 1.9446218711891666e-05, "loss": 0.8268, "step": 894 }, { "epoch": 0.13349243045715564, "grad_norm": 0.5705305933952332, "learning_rate": 1.9444632023946456e-05, "loss": 0.2709, "step": 895 }, { "epoch": 0.13364158401073906, "grad_norm": 3.693243980407715, "learning_rate": 1.9443043131091343e-05, "loss": 0.7508, "step": 896 }, { "epoch": 0.13379073756432247, "grad_norm": 2.330545663833618, "learning_rate": 1.944145203369726e-05, "loss": 0.6886, "step": 897 }, { "epoch": 0.13393989111790588, "grad_norm": 0.5060058236122131, "learning_rate": 1.9439858732135657e-05, "loss": 0.2561, "step": 898 }, { "epoch": 0.1340890446714893, "grad_norm": 1.877583384513855, "learning_rate": 1.9438263226778508e-05, "loss": 0.8603, "step": 899 }, { "epoch": 0.1342381982250727, "grad_norm": 2.84515118598938, "learning_rate": 1.943666551799829e-05, "loss": 0.8513, "step": 900 }, { "epoch": 0.13438735177865613, "grad_norm": 3.1586763858795166, "learning_rate": 1.9435065606168e-05, "loss": 0.8205, "step": 901 }, { "epoch": 0.13453650533223954, "grad_norm": 4.835768699645996, "learning_rate": 1.9433463491661143e-05, "loss": 0.8147, "step": 902 }, { "epoch": 0.13468565888582296, "grad_norm": 2.6910154819488525, "learning_rate": 1.9431859174851748e-05, "loss": 0.8828, "step": 903 }, { "epoch": 0.13483481243940637, "grad_norm": 2.9341118335723877, "learning_rate": 1.943025265611435e-05, "loss": 0.8434, "step": 904 }, { "epoch": 0.13498396599298979, "grad_norm": 1.7182648181915283, "learning_rate": 1.9428643935824006e-05, "loss": 0.7901, "step": 905 }, { "epoch": 0.1351331195465732, "grad_norm": 1.8719489574432373, "learning_rate": 1.9427033014356276e-05, "loss": 0.7913, "step": 906 }, { "epoch": 0.1352822731001566, "grad_norm": 2.1790883541107178, "learning_rate": 1.942541989208724e-05, "loss": 0.8278, "step": 907 }, { "epoch": 0.13543142665374003, "grad_norm": 2.4223039150238037, "learning_rate": 1.9423804569393497e-05, "loss": 0.9081, "step": 908 }, { "epoch": 0.13558058020732344, "grad_norm": 3.672438144683838, "learning_rate": 1.942218704665215e-05, "loss": 0.7442, "step": 909 }, { "epoch": 0.13572973376090686, "grad_norm": 2.6825735569000244, "learning_rate": 1.9420567324240822e-05, "loss": 0.7692, "step": 910 }, { "epoch": 0.13587888731449027, "grad_norm": 3.610346555709839, "learning_rate": 1.9418945402537647e-05, "loss": 0.7508, "step": 911 }, { "epoch": 0.13602804086807369, "grad_norm": 3.629784345626831, "learning_rate": 1.9417321281921275e-05, "loss": 0.8064, "step": 912 }, { "epoch": 0.1361771944216571, "grad_norm": 5.6714935302734375, "learning_rate": 1.941569496277086e-05, "loss": 0.742, "step": 913 }, { "epoch": 0.1363263479752405, "grad_norm": 3.209068775177002, "learning_rate": 1.941406644546609e-05, "loss": 0.8686, "step": 914 }, { "epoch": 0.13647550152882393, "grad_norm": 2.2382590770721436, "learning_rate": 1.941243573038714e-05, "loss": 0.9, "step": 915 }, { "epoch": 0.13662465508240734, "grad_norm": 0.5633269548416138, "learning_rate": 1.9410802817914715e-05, "loss": 0.2438, "step": 916 }, { "epoch": 0.13677380863599076, "grad_norm": 2.9723496437072754, "learning_rate": 1.9409167708430036e-05, "loss": 0.7565, "step": 917 }, { "epoch": 0.13692296218957417, "grad_norm": 2.6692121028900146, "learning_rate": 1.9407530402314818e-05, "loss": 0.6971, "step": 918 }, { "epoch": 0.13707211574315759, "grad_norm": 2.4058620929718018, "learning_rate": 1.9405890899951306e-05, "loss": 0.835, "step": 919 }, { "epoch": 0.137221269296741, "grad_norm": 1.842944860458374, "learning_rate": 1.9404249201722255e-05, "loss": 0.8631, "step": 920 }, { "epoch": 0.13737042285032441, "grad_norm": 2.3994669914245605, "learning_rate": 1.9402605308010924e-05, "loss": 0.7468, "step": 921 }, { "epoch": 0.13751957640390783, "grad_norm": 2.6801137924194336, "learning_rate": 1.9400959219201096e-05, "loss": 0.791, "step": 922 }, { "epoch": 0.13766872995749124, "grad_norm": 2.3745522499084473, "learning_rate": 1.939931093567706e-05, "loss": 0.7868, "step": 923 }, { "epoch": 0.13781788351107466, "grad_norm": 3.0590481758117676, "learning_rate": 1.9397660457823618e-05, "loss": 0.7675, "step": 924 }, { "epoch": 0.13796703706465807, "grad_norm": 2.6488473415374756, "learning_rate": 1.9396007786026085e-05, "loss": 0.8525, "step": 925 }, { "epoch": 0.13811619061824149, "grad_norm": 2.245974063873291, "learning_rate": 1.9394352920670282e-05, "loss": 0.7911, "step": 926 }, { "epoch": 0.1382653441718249, "grad_norm": 2.3334481716156006, "learning_rate": 1.9392695862142556e-05, "loss": 0.7857, "step": 927 }, { "epoch": 0.13841449772540831, "grad_norm": 2.4135003089904785, "learning_rate": 1.9391036610829753e-05, "loss": 0.8481, "step": 928 }, { "epoch": 0.13856365127899173, "grad_norm": 2.992534637451172, "learning_rate": 1.9389375167119237e-05, "loss": 0.8276, "step": 929 }, { "epoch": 0.13871280483257514, "grad_norm": 3.685260057449341, "learning_rate": 1.9387711531398883e-05, "loss": 0.8238, "step": 930 }, { "epoch": 0.13886195838615856, "grad_norm": 2.984016180038452, "learning_rate": 1.9386045704057083e-05, "loss": 0.8008, "step": 931 }, { "epoch": 0.13901111193974197, "grad_norm": 1.797868013381958, "learning_rate": 1.9384377685482725e-05, "loss": 0.8398, "step": 932 }, { "epoch": 0.13916026549332539, "grad_norm": 5.7821550369262695, "learning_rate": 1.9382707476065224e-05, "loss": 0.7848, "step": 933 }, { "epoch": 0.1393094190469088, "grad_norm": 2.712038993835449, "learning_rate": 1.9381035076194502e-05, "loss": 0.7749, "step": 934 }, { "epoch": 0.13945857260049221, "grad_norm": 3.1488966941833496, "learning_rate": 1.9379360486260988e-05, "loss": 0.8139, "step": 935 }, { "epoch": 0.13960772615407563, "grad_norm": 3.4274892807006836, "learning_rate": 1.9377683706655626e-05, "loss": 0.8285, "step": 936 }, { "epoch": 0.13975687970765904, "grad_norm": 2.99845027923584, "learning_rate": 1.9376004737769878e-05, "loss": 0.7831, "step": 937 }, { "epoch": 0.13990603326124246, "grad_norm": 2.790196180343628, "learning_rate": 1.93743235799957e-05, "loss": 0.7848, "step": 938 }, { "epoch": 0.14005518681482587, "grad_norm": 1.9706332683563232, "learning_rate": 1.9372640233725576e-05, "loss": 0.8623, "step": 939 }, { "epoch": 0.14020434036840929, "grad_norm": 2.4215502738952637, "learning_rate": 1.937095469935249e-05, "loss": 0.7615, "step": 940 }, { "epoch": 0.1403534939219927, "grad_norm": 2.752847194671631, "learning_rate": 1.9369266977269946e-05, "loss": 0.7827, "step": 941 }, { "epoch": 0.14050264747557611, "grad_norm": 0.650356113910675, "learning_rate": 1.9367577067871948e-05, "loss": 0.259, "step": 942 }, { "epoch": 0.14065180102915953, "grad_norm": 2.4644906520843506, "learning_rate": 1.9365884971553014e-05, "loss": 0.7342, "step": 943 }, { "epoch": 0.14080095458274294, "grad_norm": 2.0687167644500732, "learning_rate": 1.9364190688708184e-05, "loss": 0.8348, "step": 944 }, { "epoch": 0.14095010813632636, "grad_norm": 2.114823341369629, "learning_rate": 1.9362494219732994e-05, "loss": 0.811, "step": 945 }, { "epoch": 0.14109926168990977, "grad_norm": 1.9836843013763428, "learning_rate": 1.9360795565023494e-05, "loss": 0.793, "step": 946 }, { "epoch": 0.14124841524349319, "grad_norm": 2.4133174419403076, "learning_rate": 1.9359094724976248e-05, "loss": 0.8649, "step": 947 }, { "epoch": 0.1413975687970766, "grad_norm": 2.0306396484375, "learning_rate": 1.935739169998833e-05, "loss": 0.7263, "step": 948 }, { "epoch": 0.14154672235066001, "grad_norm": 2.3687429428100586, "learning_rate": 1.9355686490457318e-05, "loss": 0.8119, "step": 949 }, { "epoch": 0.14169587590424343, "grad_norm": 1.706358790397644, "learning_rate": 1.9353979096781304e-05, "loss": 0.8368, "step": 950 }, { "epoch": 0.14184502945782684, "grad_norm": 2.009934186935425, "learning_rate": 1.9352269519358895e-05, "loss": 0.8628, "step": 951 }, { "epoch": 0.14199418301141026, "grad_norm": 3.3531107902526855, "learning_rate": 1.9350557758589195e-05, "loss": 0.7839, "step": 952 }, { "epoch": 0.14214333656499367, "grad_norm": 2.5806326866149902, "learning_rate": 1.9348843814871836e-05, "loss": 0.8224, "step": 953 }, { "epoch": 0.1422924901185771, "grad_norm": 2.091202735900879, "learning_rate": 1.934712768860694e-05, "loss": 0.7833, "step": 954 }, { "epoch": 0.1424416436721605, "grad_norm": 2.033689022064209, "learning_rate": 1.9345409380195154e-05, "loss": 0.7098, "step": 955 }, { "epoch": 0.14259079722574391, "grad_norm": 2.3632664680480957, "learning_rate": 1.934368889003762e-05, "loss": 0.7669, "step": 956 }, { "epoch": 0.14273995077932733, "grad_norm": 1.6150574684143066, "learning_rate": 1.9341966218536007e-05, "loss": 0.7461, "step": 957 }, { "epoch": 0.14288910433291074, "grad_norm": 1.8163273334503174, "learning_rate": 1.9340241366092475e-05, "loss": 0.8607, "step": 958 }, { "epoch": 0.14303825788649416, "grad_norm": 2.942878246307373, "learning_rate": 1.933851433310971e-05, "loss": 0.7129, "step": 959 }, { "epoch": 0.14318741144007757, "grad_norm": 1.9869203567504883, "learning_rate": 1.9336785119990894e-05, "loss": 0.7652, "step": 960 }, { "epoch": 0.143336564993661, "grad_norm": 2.799118995666504, "learning_rate": 1.933505372713972e-05, "loss": 0.7057, "step": 961 }, { "epoch": 0.1434857185472444, "grad_norm": 2.2259106636047363, "learning_rate": 1.9333320154960403e-05, "loss": 0.832, "step": 962 }, { "epoch": 0.14363487210082782, "grad_norm": 2.001495599746704, "learning_rate": 1.9331584403857645e-05, "loss": 0.8829, "step": 963 }, { "epoch": 0.14378402565441123, "grad_norm": 1.9048075675964355, "learning_rate": 1.9329846474236676e-05, "loss": 0.8228, "step": 964 }, { "epoch": 0.14393317920799464, "grad_norm": 3.080866813659668, "learning_rate": 1.9328106366503227e-05, "loss": 0.7786, "step": 965 }, { "epoch": 0.14408233276157806, "grad_norm": 3.2860803604125977, "learning_rate": 1.932636408106353e-05, "loss": 0.7235, "step": 966 }, { "epoch": 0.14423148631516147, "grad_norm": 2.2638936042785645, "learning_rate": 1.9324619618324338e-05, "loss": 0.7533, "step": 967 }, { "epoch": 0.1443806398687449, "grad_norm": 2.166391611099243, "learning_rate": 1.9322872978692907e-05, "loss": 0.8582, "step": 968 }, { "epoch": 0.14452979342232827, "grad_norm": 6.940752029418945, "learning_rate": 1.9321124162577e-05, "loss": 0.7682, "step": 969 }, { "epoch": 0.1446789469759117, "grad_norm": 1.865842342376709, "learning_rate": 1.9319373170384895e-05, "loss": 0.8294, "step": 970 }, { "epoch": 0.1448281005294951, "grad_norm": 2.04042387008667, "learning_rate": 1.931762000252536e-05, "loss": 0.7575, "step": 971 }, { "epoch": 0.14497725408307852, "grad_norm": 2.640061616897583, "learning_rate": 1.9315864659407696e-05, "loss": 0.8848, "step": 972 }, { "epoch": 0.14512640763666193, "grad_norm": 2.4717369079589844, "learning_rate": 1.931410714144169e-05, "loss": 0.7429, "step": 973 }, { "epoch": 0.14527556119024534, "grad_norm": 2.0837626457214355, "learning_rate": 1.931234744903765e-05, "loss": 0.8609, "step": 974 }, { "epoch": 0.14542471474382876, "grad_norm": 2.9800631999969482, "learning_rate": 1.9310585582606385e-05, "loss": 0.7852, "step": 975 }, { "epoch": 0.14557386829741217, "grad_norm": 4.168429374694824, "learning_rate": 1.930882154255922e-05, "loss": 0.8382, "step": 976 }, { "epoch": 0.1457230218509956, "grad_norm": 3.96506404876709, "learning_rate": 1.9307055329307975e-05, "loss": 0.7794, "step": 977 }, { "epoch": 0.145872175404579, "grad_norm": 2.903531074523926, "learning_rate": 1.930528694326499e-05, "loss": 0.8678, "step": 978 }, { "epoch": 0.14602132895816242, "grad_norm": 2.2958879470825195, "learning_rate": 1.9303516384843093e-05, "loss": 0.8661, "step": 979 }, { "epoch": 0.14617048251174583, "grad_norm": 3.445472002029419, "learning_rate": 1.9301743654455652e-05, "loss": 0.9208, "step": 980 }, { "epoch": 0.14631963606532924, "grad_norm": 0.6080155372619629, "learning_rate": 1.9299968752516505e-05, "loss": 0.2267, "step": 981 }, { "epoch": 0.14646878961891266, "grad_norm": 2.1973538398742676, "learning_rate": 1.9298191679440024e-05, "loss": 0.7896, "step": 982 }, { "epoch": 0.14661794317249607, "grad_norm": 2.4499435424804688, "learning_rate": 1.9296412435641073e-05, "loss": 0.7882, "step": 983 }, { "epoch": 0.1467670967260795, "grad_norm": 2.682828187942505, "learning_rate": 1.929463102153503e-05, "loss": 0.861, "step": 984 }, { "epoch": 0.1469162502796629, "grad_norm": 3.5421383380889893, "learning_rate": 1.9292847437537784e-05, "loss": 0.7557, "step": 985 }, { "epoch": 0.14706540383324632, "grad_norm": 22.724472045898438, "learning_rate": 1.929106168406571e-05, "loss": 0.8345, "step": 986 }, { "epoch": 0.14721455738682973, "grad_norm": 2.1804823875427246, "learning_rate": 1.9289273761535713e-05, "loss": 0.7743, "step": 987 }, { "epoch": 0.14736371094041315, "grad_norm": 2.4512834548950195, "learning_rate": 1.9287483670365193e-05, "loss": 0.7628, "step": 988 }, { "epoch": 0.14751286449399656, "grad_norm": 2.742365837097168, "learning_rate": 1.928569141097206e-05, "loss": 0.8356, "step": 989 }, { "epoch": 0.14766201804757997, "grad_norm": 8.783019065856934, "learning_rate": 1.9283896983774727e-05, "loss": 0.8183, "step": 990 }, { "epoch": 0.1478111716011634, "grad_norm": 2.0950756072998047, "learning_rate": 1.9282100389192116e-05, "loss": 0.7846, "step": 991 }, { "epoch": 0.1479603251547468, "grad_norm": 1.6336328983306885, "learning_rate": 1.9280301627643647e-05, "loss": 0.7988, "step": 992 }, { "epoch": 0.14810947870833022, "grad_norm": 5.707268238067627, "learning_rate": 1.927850069954926e-05, "loss": 0.8148, "step": 993 }, { "epoch": 0.14825863226191363, "grad_norm": 2.968501091003418, "learning_rate": 1.9276697605329392e-05, "loss": 0.8213, "step": 994 }, { "epoch": 0.14840778581549705, "grad_norm": 2.3638863563537598, "learning_rate": 1.9274892345404985e-05, "loss": 0.8747, "step": 995 }, { "epoch": 0.14855693936908046, "grad_norm": 17.468721389770508, "learning_rate": 1.9273084920197488e-05, "loss": 0.8462, "step": 996 }, { "epoch": 0.14870609292266387, "grad_norm": 2.561661958694458, "learning_rate": 1.9271275330128856e-05, "loss": 0.8641, "step": 997 }, { "epoch": 0.1488552464762473, "grad_norm": 3.5113322734832764, "learning_rate": 1.9269463575621552e-05, "loss": 0.7898, "step": 998 }, { "epoch": 0.1490044000298307, "grad_norm": 4.0780558586120605, "learning_rate": 1.926764965709854e-05, "loss": 0.8015, "step": 999 }, { "epoch": 0.14915355358341412, "grad_norm": 3.148930072784424, "learning_rate": 1.926583357498329e-05, "loss": 0.8513, "step": 1000 }, { "epoch": 0.14930270713699753, "grad_norm": 5.721534252166748, "learning_rate": 1.926401532969978e-05, "loss": 0.7622, "step": 1001 }, { "epoch": 0.14945186069058095, "grad_norm": 5.928384304046631, "learning_rate": 1.926219492167249e-05, "loss": 0.7338, "step": 1002 }, { "epoch": 0.14960101424416436, "grad_norm": 3.493884563446045, "learning_rate": 1.9260372351326406e-05, "loss": 0.8245, "step": 1003 }, { "epoch": 0.14975016779774777, "grad_norm": 2.7720866203308105, "learning_rate": 1.9258547619087017e-05, "loss": 0.8673, "step": 1004 }, { "epoch": 0.1498993213513312, "grad_norm": 8.100783348083496, "learning_rate": 1.9256720725380323e-05, "loss": 0.8235, "step": 1005 }, { "epoch": 0.1500484749049146, "grad_norm": 3.520995855331421, "learning_rate": 1.9254891670632823e-05, "loss": 0.8671, "step": 1006 }, { "epoch": 0.15019762845849802, "grad_norm": 2.2896411418914795, "learning_rate": 1.9253060455271516e-05, "loss": 0.746, "step": 1007 }, { "epoch": 0.15034678201208143, "grad_norm": 1.908290982246399, "learning_rate": 1.9251227079723917e-05, "loss": 0.8048, "step": 1008 }, { "epoch": 0.15049593556566485, "grad_norm": 5.252546787261963, "learning_rate": 1.924939154441803e-05, "loss": 0.7574, "step": 1009 }, { "epoch": 0.15064508911924826, "grad_norm": 4.661903381347656, "learning_rate": 1.924755384978239e-05, "loss": 0.7626, "step": 1010 }, { "epoch": 0.15079424267283167, "grad_norm": 2.2731003761291504, "learning_rate": 1.9245713996246e-05, "loss": 0.8157, "step": 1011 }, { "epoch": 0.1509433962264151, "grad_norm": 3.330455780029297, "learning_rate": 1.92438719842384e-05, "loss": 0.7821, "step": 1012 }, { "epoch": 0.1510925497799985, "grad_norm": 3.026158571243286, "learning_rate": 1.924202781418961e-05, "loss": 0.7677, "step": 1013 }, { "epoch": 0.15124170333358192, "grad_norm": 6.698156833648682, "learning_rate": 1.9240181486530166e-05, "loss": 0.7692, "step": 1014 }, { "epoch": 0.15139085688716533, "grad_norm": 5.063442707061768, "learning_rate": 1.9238333001691107e-05, "loss": 0.8055, "step": 1015 }, { "epoch": 0.15154001044074875, "grad_norm": 3.0125651359558105, "learning_rate": 1.923648236010397e-05, "loss": 0.8014, "step": 1016 }, { "epoch": 0.15168916399433216, "grad_norm": 7.749537944793701, "learning_rate": 1.9234629562200805e-05, "loss": 0.8127, "step": 1017 }, { "epoch": 0.15183831754791557, "grad_norm": 4.407508850097656, "learning_rate": 1.9232774608414153e-05, "loss": 0.8267, "step": 1018 }, { "epoch": 0.151987471101499, "grad_norm": 0.7123037576675415, "learning_rate": 1.9230917499177067e-05, "loss": 0.2321, "step": 1019 }, { "epoch": 0.1521366246550824, "grad_norm": 4.1747026443481445, "learning_rate": 1.9229058234923104e-05, "loss": 0.7964, "step": 1020 }, { "epoch": 0.15228577820866582, "grad_norm": 3.354031562805176, "learning_rate": 1.922719681608632e-05, "loss": 0.8019, "step": 1021 }, { "epoch": 0.15243493176224923, "grad_norm": 2.988852024078369, "learning_rate": 1.9225333243101275e-05, "loss": 0.8065, "step": 1022 }, { "epoch": 0.15258408531583265, "grad_norm": 1.7364163398742676, "learning_rate": 1.9223467516403028e-05, "loss": 0.8562, "step": 1023 }, { "epoch": 0.15273323886941606, "grad_norm": 7.357603073120117, "learning_rate": 1.922159963642715e-05, "loss": 0.8332, "step": 1024 }, { "epoch": 0.15288239242299947, "grad_norm": 4.5257391929626465, "learning_rate": 1.9219729603609706e-05, "loss": 0.78, "step": 1025 }, { "epoch": 0.1530315459765829, "grad_norm": 4.236738204956055, "learning_rate": 1.921785741838727e-05, "loss": 0.809, "step": 1026 }, { "epoch": 0.1531806995301663, "grad_norm": 2.527162551879883, "learning_rate": 1.921598308119691e-05, "loss": 0.7272, "step": 1027 }, { "epoch": 0.15332985308374972, "grad_norm": 2.079807758331299, "learning_rate": 1.9214106592476215e-05, "loss": 0.797, "step": 1028 }, { "epoch": 0.15347900663733313, "grad_norm": 4.539015293121338, "learning_rate": 1.9212227952663247e-05, "loss": 0.7509, "step": 1029 }, { "epoch": 0.15362816019091655, "grad_norm": 4.8181538581848145, "learning_rate": 1.9210347162196598e-05, "loss": 0.8021, "step": 1030 }, { "epoch": 0.15377731374449996, "grad_norm": 5.22769832611084, "learning_rate": 1.9208464221515347e-05, "loss": 0.9284, "step": 1031 }, { "epoch": 0.15392646729808337, "grad_norm": 5.169134140014648, "learning_rate": 1.9206579131059076e-05, "loss": 0.7742, "step": 1032 }, { "epoch": 0.1540756208516668, "grad_norm": 2.4419124126434326, "learning_rate": 1.9204691891267875e-05, "loss": 0.844, "step": 1033 }, { "epoch": 0.1542247744052502, "grad_norm": 6.100823879241943, "learning_rate": 1.9202802502582334e-05, "loss": 0.8479, "step": 1034 }, { "epoch": 0.15437392795883362, "grad_norm": 2.994243621826172, "learning_rate": 1.9200910965443537e-05, "loss": 0.8398, "step": 1035 }, { "epoch": 0.15452308151241703, "grad_norm": 2.022226333618164, "learning_rate": 1.919901728029308e-05, "loss": 0.8736, "step": 1036 }, { "epoch": 0.15467223506600045, "grad_norm": 3.5587565898895264, "learning_rate": 1.9197121447573053e-05, "loss": 0.8364, "step": 1037 }, { "epoch": 0.15482138861958386, "grad_norm": 2.3636927604675293, "learning_rate": 1.9195223467726056e-05, "loss": 0.749, "step": 1038 }, { "epoch": 0.15497054217316727, "grad_norm": 2.879638910293579, "learning_rate": 1.9193323341195175e-05, "loss": 0.7742, "step": 1039 }, { "epoch": 0.1551196957267507, "grad_norm": 3.0723183155059814, "learning_rate": 1.9191421068424017e-05, "loss": 0.7618, "step": 1040 }, { "epoch": 0.1552688492803341, "grad_norm": 3.568120241165161, "learning_rate": 1.918951664985667e-05, "loss": 0.8347, "step": 1041 }, { "epoch": 0.15541800283391752, "grad_norm": 4.552079677581787, "learning_rate": 1.918761008593774e-05, "loss": 0.7527, "step": 1042 }, { "epoch": 0.15556715638750093, "grad_norm": 2.24509859085083, "learning_rate": 1.9185701377112324e-05, "loss": 0.7125, "step": 1043 }, { "epoch": 0.15571630994108435, "grad_norm": 3.059147834777832, "learning_rate": 1.9183790523826022e-05, "loss": 0.8412, "step": 1044 }, { "epoch": 0.15586546349466776, "grad_norm": 0.6894723773002625, "learning_rate": 1.9181877526524937e-05, "loss": 0.2646, "step": 1045 }, { "epoch": 0.15601461704825118, "grad_norm": 27.46662139892578, "learning_rate": 1.9179962385655665e-05, "loss": 0.7984, "step": 1046 }, { "epoch": 0.1561637706018346, "grad_norm": 3.183537483215332, "learning_rate": 1.9178045101665317e-05, "loss": 0.7511, "step": 1047 }, { "epoch": 0.156312924155418, "grad_norm": 3.0791525840759277, "learning_rate": 1.9176125675001487e-05, "loss": 0.8466, "step": 1048 }, { "epoch": 0.15646207770900142, "grad_norm": 3.6660542488098145, "learning_rate": 1.917420410611228e-05, "loss": 0.7576, "step": 1049 }, { "epoch": 0.15661123126258483, "grad_norm": 3.8360917568206787, "learning_rate": 1.91722803954463e-05, "loss": 0.805, "step": 1050 }, { "epoch": 0.15676038481616825, "grad_norm": 4.691122055053711, "learning_rate": 1.917035454345265e-05, "loss": 0.8265, "step": 1051 }, { "epoch": 0.15690953836975166, "grad_norm": 3.1417007446289062, "learning_rate": 1.916842655058093e-05, "loss": 0.7429, "step": 1052 }, { "epoch": 0.15705869192333508, "grad_norm": 2.6371724605560303, "learning_rate": 1.9166496417281243e-05, "loss": 0.8064, "step": 1053 }, { "epoch": 0.1572078454769185, "grad_norm": 4.9562249183654785, "learning_rate": 1.916456414400419e-05, "loss": 0.8221, "step": 1054 }, { "epoch": 0.1573569990305019, "grad_norm": 10.607832908630371, "learning_rate": 1.9162629731200878e-05, "loss": 0.8525, "step": 1055 }, { "epoch": 0.15750615258408532, "grad_norm": 4.105825901031494, "learning_rate": 1.91606931793229e-05, "loss": 0.8361, "step": 1056 }, { "epoch": 0.15765530613766873, "grad_norm": 3.550576686859131, "learning_rate": 1.9158754488822366e-05, "loss": 0.8558, "step": 1057 }, { "epoch": 0.15780445969125215, "grad_norm": 6.532835483551025, "learning_rate": 1.9156813660151866e-05, "loss": 0.7716, "step": 1058 }, { "epoch": 0.15795361324483556, "grad_norm": 2.247488498687744, "learning_rate": 1.9154870693764504e-05, "loss": 0.7654, "step": 1059 }, { "epoch": 0.15810276679841898, "grad_norm": 1.8032113313674927, "learning_rate": 1.9152925590113878e-05, "loss": 0.8727, "step": 1060 }, { "epoch": 0.1582519203520024, "grad_norm": 2.1626408100128174, "learning_rate": 1.9150978349654082e-05, "loss": 0.7927, "step": 1061 }, { "epoch": 0.1584010739055858, "grad_norm": 2.174253463745117, "learning_rate": 1.9149028972839718e-05, "loss": 0.7909, "step": 1062 }, { "epoch": 0.15855022745916922, "grad_norm": 2.3437740802764893, "learning_rate": 1.9147077460125873e-05, "loss": 0.8444, "step": 1063 }, { "epoch": 0.15869938101275263, "grad_norm": 3.5826926231384277, "learning_rate": 1.9145123811968145e-05, "loss": 0.8405, "step": 1064 }, { "epoch": 0.15884853456633605, "grad_norm": 2.3546931743621826, "learning_rate": 1.9143168028822623e-05, "loss": 0.7242, "step": 1065 }, { "epoch": 0.15899768811991946, "grad_norm": 2.3243062496185303, "learning_rate": 1.9141210111145898e-05, "loss": 0.7804, "step": 1066 }, { "epoch": 0.15914684167350288, "grad_norm": 1.7282131910324097, "learning_rate": 1.913925005939506e-05, "loss": 0.8192, "step": 1067 }, { "epoch": 0.1592959952270863, "grad_norm": 4.227153778076172, "learning_rate": 1.9137287874027696e-05, "loss": 0.7683, "step": 1068 }, { "epoch": 0.1594451487806697, "grad_norm": 1.97390878200531, "learning_rate": 1.9135323555501885e-05, "loss": 0.8481, "step": 1069 }, { "epoch": 0.15959430233425312, "grad_norm": 1.9597947597503662, "learning_rate": 1.9133357104276218e-05, "loss": 0.8385, "step": 1070 }, { "epoch": 0.15974345588783653, "grad_norm": 4.2316575050354, "learning_rate": 1.913138852080977e-05, "loss": 0.8903, "step": 1071 }, { "epoch": 0.15989260944141995, "grad_norm": 2.4509313106536865, "learning_rate": 1.912941780556212e-05, "loss": 0.795, "step": 1072 }, { "epoch": 0.16004176299500336, "grad_norm": 3.3481173515319824, "learning_rate": 1.9127444958993345e-05, "loss": 0.793, "step": 1073 }, { "epoch": 0.16019091654858678, "grad_norm": 2.1127548217773438, "learning_rate": 1.912546998156402e-05, "loss": 0.6882, "step": 1074 }, { "epoch": 0.1603400701021702, "grad_norm": 2.4034221172332764, "learning_rate": 1.9123492873735216e-05, "loss": 0.8083, "step": 1075 }, { "epoch": 0.1604892236557536, "grad_norm": 3.4193923473358154, "learning_rate": 1.9121513635968497e-05, "loss": 0.729, "step": 1076 }, { "epoch": 0.16063837720933702, "grad_norm": 2.084831714630127, "learning_rate": 1.9119532268725935e-05, "loss": 0.8244, "step": 1077 }, { "epoch": 0.16078753076292043, "grad_norm": 4.374318599700928, "learning_rate": 1.9117548772470093e-05, "loss": 0.7173, "step": 1078 }, { "epoch": 0.16093668431650385, "grad_norm": 1.520500659942627, "learning_rate": 1.9115563147664022e-05, "loss": 0.8366, "step": 1079 }, { "epoch": 0.16108583787008726, "grad_norm": 4.5796589851379395, "learning_rate": 1.9113575394771287e-05, "loss": 0.827, "step": 1080 }, { "epoch": 0.16123499142367068, "grad_norm": 2.1846067905426025, "learning_rate": 1.9111585514255943e-05, "loss": 0.8094, "step": 1081 }, { "epoch": 0.1613841449772541, "grad_norm": 3.9214909076690674, "learning_rate": 1.910959350658253e-05, "loss": 0.817, "step": 1082 }, { "epoch": 0.1615332985308375, "grad_norm": 1.7585581541061401, "learning_rate": 1.9107599372216107e-05, "loss": 0.8112, "step": 1083 }, { "epoch": 0.16168245208442092, "grad_norm": 1.583350658416748, "learning_rate": 1.9105603111622212e-05, "loss": 0.8597, "step": 1084 }, { "epoch": 0.16183160563800433, "grad_norm": 4.46002197265625, "learning_rate": 1.910360472526688e-05, "loss": 0.7714, "step": 1085 }, { "epoch": 0.16198075919158775, "grad_norm": 2.598829746246338, "learning_rate": 1.910160421361666e-05, "loss": 0.8461, "step": 1086 }, { "epoch": 0.16212991274517116, "grad_norm": 2.282369613647461, "learning_rate": 1.9099601577138572e-05, "loss": 0.7708, "step": 1087 }, { "epoch": 0.16227906629875458, "grad_norm": 3.445528030395508, "learning_rate": 1.909759681630015e-05, "loss": 0.828, "step": 1088 }, { "epoch": 0.162428219852338, "grad_norm": 3.276546001434326, "learning_rate": 1.9095589931569418e-05, "loss": 0.7193, "step": 1089 }, { "epoch": 0.1625773734059214, "grad_norm": 6.100493431091309, "learning_rate": 1.909358092341489e-05, "loss": 0.8953, "step": 1090 }, { "epoch": 0.16272652695950482, "grad_norm": 2.590681552886963, "learning_rate": 1.9091569792305593e-05, "loss": 0.8412, "step": 1091 }, { "epoch": 0.16287568051308823, "grad_norm": 2.541531801223755, "learning_rate": 1.908955653871103e-05, "loss": 0.8546, "step": 1092 }, { "epoch": 0.16302483406667165, "grad_norm": 2.033155918121338, "learning_rate": 1.908754116310121e-05, "loss": 0.7481, "step": 1093 }, { "epoch": 0.16317398762025506, "grad_norm": 3.1823947429656982, "learning_rate": 1.908552366594664e-05, "loss": 0.8176, "step": 1094 }, { "epoch": 0.16332314117383848, "grad_norm": 2.078035831451416, "learning_rate": 1.9083504047718308e-05, "loss": 0.8044, "step": 1095 }, { "epoch": 0.1634722947274219, "grad_norm": 2.1293625831604004, "learning_rate": 1.9081482308887716e-05, "loss": 0.8276, "step": 1096 }, { "epoch": 0.1636214482810053, "grad_norm": 2.306452751159668, "learning_rate": 1.9079458449926847e-05, "loss": 0.7885, "step": 1097 }, { "epoch": 0.16377060183458872, "grad_norm": 2.8253417015075684, "learning_rate": 1.9077432471308182e-05, "loss": 0.8405, "step": 1098 }, { "epoch": 0.16391975538817213, "grad_norm": 2.881188154220581, "learning_rate": 1.9075404373504705e-05, "loss": 0.8382, "step": 1099 }, { "epoch": 0.16406890894175555, "grad_norm": 1.5402213335037231, "learning_rate": 1.9073374156989888e-05, "loss": 0.7556, "step": 1100 }, { "epoch": 0.16421806249533896, "grad_norm": 1.994405746459961, "learning_rate": 1.907134182223769e-05, "loss": 0.7484, "step": 1101 }, { "epoch": 0.16436721604892238, "grad_norm": 3.166215181350708, "learning_rate": 1.906930736972258e-05, "loss": 0.7764, "step": 1102 }, { "epoch": 0.1645163696025058, "grad_norm": 1.7725739479064941, "learning_rate": 1.9067270799919512e-05, "loss": 0.747, "step": 1103 }, { "epoch": 0.1646655231560892, "grad_norm": 3.48893141746521, "learning_rate": 1.9065232113303934e-05, "loss": 0.8142, "step": 1104 }, { "epoch": 0.16481467670967262, "grad_norm": 1.8566703796386719, "learning_rate": 1.9063191310351797e-05, "loss": 0.8415, "step": 1105 }, { "epoch": 0.16496383026325603, "grad_norm": 2.285134792327881, "learning_rate": 1.9061148391539534e-05, "loss": 0.8561, "step": 1106 }, { "epoch": 0.16511298381683945, "grad_norm": 0.6317870616912842, "learning_rate": 1.9059103357344075e-05, "loss": 0.2284, "step": 1107 }, { "epoch": 0.16526213737042286, "grad_norm": 2.7076690196990967, "learning_rate": 1.905705620824285e-05, "loss": 0.8236, "step": 1108 }, { "epoch": 0.16541129092400628, "grad_norm": 2.173612117767334, "learning_rate": 1.9055006944713782e-05, "loss": 0.7962, "step": 1109 }, { "epoch": 0.1655604444775897, "grad_norm": 2.6929738521575928, "learning_rate": 1.905295556723528e-05, "loss": 0.7393, "step": 1110 }, { "epoch": 0.1657095980311731, "grad_norm": 3.8751492500305176, "learning_rate": 1.9050902076286253e-05, "loss": 0.8248, "step": 1111 }, { "epoch": 0.16585875158475652, "grad_norm": 1.9241743087768555, "learning_rate": 1.9048846472346102e-05, "loss": 0.758, "step": 1112 }, { "epoch": 0.16600790513833993, "grad_norm": 12.4519681930542, "learning_rate": 1.9046788755894722e-05, "loss": 0.8292, "step": 1113 }, { "epoch": 0.16615705869192335, "grad_norm": 2.190614938735962, "learning_rate": 1.9044728927412495e-05, "loss": 0.7855, "step": 1114 }, { "epoch": 0.16630621224550676, "grad_norm": 2.10723614692688, "learning_rate": 1.904266698738031e-05, "loss": 0.7921, "step": 1115 }, { "epoch": 0.16645536579909018, "grad_norm": 2.0046260356903076, "learning_rate": 1.904060293627953e-05, "loss": 0.8474, "step": 1116 }, { "epoch": 0.1666045193526736, "grad_norm": 1.747193694114685, "learning_rate": 1.903853677459203e-05, "loss": 0.8071, "step": 1117 }, { "epoch": 0.16675367290625698, "grad_norm": 1.7617402076721191, "learning_rate": 1.903646850280016e-05, "loss": 0.8117, "step": 1118 }, { "epoch": 0.1669028264598404, "grad_norm": 3.06351375579834, "learning_rate": 1.903439812138678e-05, "loss": 0.7965, "step": 1119 }, { "epoch": 0.1670519800134238, "grad_norm": 2.6982295513153076, "learning_rate": 1.9032325630835227e-05, "loss": 0.7904, "step": 1120 }, { "epoch": 0.16720113356700722, "grad_norm": 3.7339324951171875, "learning_rate": 1.9030251031629338e-05, "loss": 0.7861, "step": 1121 }, { "epoch": 0.16735028712059064, "grad_norm": 2.0988054275512695, "learning_rate": 1.902817432425345e-05, "loss": 0.8196, "step": 1122 }, { "epoch": 0.16749944067417405, "grad_norm": 2.116464138031006, "learning_rate": 1.902609550919237e-05, "loss": 0.7439, "step": 1123 }, { "epoch": 0.16764859422775746, "grad_norm": 2.377239465713501, "learning_rate": 1.902401458693142e-05, "loss": 0.7631, "step": 1124 }, { "epoch": 0.16779774778134088, "grad_norm": 2.4058687686920166, "learning_rate": 1.9021931557956404e-05, "loss": 0.8188, "step": 1125 }, { "epoch": 0.1679469013349243, "grad_norm": 1.833255648612976, "learning_rate": 1.9019846422753615e-05, "loss": 0.8526, "step": 1126 }, { "epoch": 0.1680960548885077, "grad_norm": 2.830385208129883, "learning_rate": 1.9017759181809846e-05, "loss": 0.827, "step": 1127 }, { "epoch": 0.16824520844209112, "grad_norm": 3.1613247394561768, "learning_rate": 1.9015669835612375e-05, "loss": 0.7839, "step": 1128 }, { "epoch": 0.16839436199567454, "grad_norm": 0.6208321452140808, "learning_rate": 1.9013578384648968e-05, "loss": 0.2393, "step": 1129 }, { "epoch": 0.16854351554925795, "grad_norm": 3.480161428451538, "learning_rate": 1.901148482940789e-05, "loss": 0.8687, "step": 1130 }, { "epoch": 0.16869266910284136, "grad_norm": 2.1760165691375732, "learning_rate": 1.90093891703779e-05, "loss": 0.8647, "step": 1131 }, { "epoch": 0.16884182265642478, "grad_norm": 1.9063862562179565, "learning_rate": 1.9007291408048238e-05, "loss": 0.7809, "step": 1132 }, { "epoch": 0.1689909762100082, "grad_norm": 4.233536720275879, "learning_rate": 1.900519154290864e-05, "loss": 0.718, "step": 1133 }, { "epoch": 0.1691401297635916, "grad_norm": 2.2327721118927, "learning_rate": 1.900308957544934e-05, "loss": 0.77, "step": 1134 }, { "epoch": 0.16928928331717502, "grad_norm": 2.6692349910736084, "learning_rate": 1.9000985506161047e-05, "loss": 0.7686, "step": 1135 }, { "epoch": 0.16943843687075844, "grad_norm": 1.9706507921218872, "learning_rate": 1.8998879335534973e-05, "loss": 0.7821, "step": 1136 }, { "epoch": 0.16958759042434185, "grad_norm": 1.9263511896133423, "learning_rate": 1.899677106406282e-05, "loss": 0.8097, "step": 1137 }, { "epoch": 0.16973674397792526, "grad_norm": 2.8062970638275146, "learning_rate": 1.899466069223677e-05, "loss": 0.8592, "step": 1138 }, { "epoch": 0.16988589753150868, "grad_norm": 2.399930715560913, "learning_rate": 1.899254822054951e-05, "loss": 0.741, "step": 1139 }, { "epoch": 0.1700350510850921, "grad_norm": 3.5449821949005127, "learning_rate": 1.899043364949421e-05, "loss": 0.8098, "step": 1140 }, { "epoch": 0.1701842046386755, "grad_norm": 2.970799446105957, "learning_rate": 1.8988316979564523e-05, "loss": 0.7454, "step": 1141 }, { "epoch": 0.17033335819225892, "grad_norm": 1.855944037437439, "learning_rate": 1.8986198211254604e-05, "loss": 0.8136, "step": 1142 }, { "epoch": 0.17048251174584234, "grad_norm": 1.8846838474273682, "learning_rate": 1.8984077345059092e-05, "loss": 0.9, "step": 1143 }, { "epoch": 0.17063166529942575, "grad_norm": 2.319122314453125, "learning_rate": 1.8981954381473122e-05, "loss": 0.7897, "step": 1144 }, { "epoch": 0.17078081885300916, "grad_norm": 2.428562641143799, "learning_rate": 1.8979829320992307e-05, "loss": 0.6446, "step": 1145 }, { "epoch": 0.17092997240659258, "grad_norm": 2.0263640880584717, "learning_rate": 1.8977702164112757e-05, "loss": 0.8213, "step": 1146 }, { "epoch": 0.171079125960176, "grad_norm": 1.7895526885986328, "learning_rate": 1.897557291133107e-05, "loss": 0.7473, "step": 1147 }, { "epoch": 0.1712282795137594, "grad_norm": 2.8796231746673584, "learning_rate": 1.8973441563144338e-05, "loss": 0.717, "step": 1148 }, { "epoch": 0.17137743306734282, "grad_norm": 3.7610247135162354, "learning_rate": 1.8971308120050133e-05, "loss": 0.7324, "step": 1149 }, { "epoch": 0.17152658662092624, "grad_norm": 2.057750701904297, "learning_rate": 1.8969172582546528e-05, "loss": 0.7574, "step": 1150 }, { "epoch": 0.17167574017450965, "grad_norm": 0.7156519889831543, "learning_rate": 1.8967034951132066e-05, "loss": 0.2965, "step": 1151 }, { "epoch": 0.17182489372809306, "grad_norm": 3.2254464626312256, "learning_rate": 1.8964895226305802e-05, "loss": 0.7571, "step": 1152 }, { "epoch": 0.17197404728167648, "grad_norm": 1.63334321975708, "learning_rate": 1.8962753408567263e-05, "loss": 0.8407, "step": 1153 }, { "epoch": 0.1721232008352599, "grad_norm": 2.3304049968719482, "learning_rate": 1.896060949841647e-05, "loss": 0.7518, "step": 1154 }, { "epoch": 0.1722723543888433, "grad_norm": 2.9414353370666504, "learning_rate": 1.8958463496353935e-05, "loss": 0.7657, "step": 1155 }, { "epoch": 0.17242150794242672, "grad_norm": 1.6082231998443604, "learning_rate": 1.8956315402880655e-05, "loss": 0.7827, "step": 1156 }, { "epoch": 0.17257066149601014, "grad_norm": 1.922834038734436, "learning_rate": 1.8954165218498115e-05, "loss": 0.7145, "step": 1157 }, { "epoch": 0.17271981504959355, "grad_norm": 2.7060546875, "learning_rate": 1.895201294370829e-05, "loss": 0.8524, "step": 1158 }, { "epoch": 0.17286896860317696, "grad_norm": 2.8209950923919678, "learning_rate": 1.8949858579013645e-05, "loss": 0.7997, "step": 1159 }, { "epoch": 0.17301812215676038, "grad_norm": 4.338016986846924, "learning_rate": 1.8947702124917126e-05, "loss": 0.7687, "step": 1160 }, { "epoch": 0.1731672757103438, "grad_norm": 2.2888758182525635, "learning_rate": 1.8945543581922173e-05, "loss": 0.8788, "step": 1161 }, { "epoch": 0.1733164292639272, "grad_norm": 1.8160141706466675, "learning_rate": 1.8943382950532713e-05, "loss": 0.779, "step": 1162 }, { "epoch": 0.17346558281751062, "grad_norm": 1.801240086555481, "learning_rate": 1.894122023125316e-05, "loss": 0.7768, "step": 1163 }, { "epoch": 0.17361473637109404, "grad_norm": 4.694801330566406, "learning_rate": 1.8939055424588407e-05, "loss": 0.769, "step": 1164 }, { "epoch": 0.17376388992467745, "grad_norm": 1.906282901763916, "learning_rate": 1.8936888531043853e-05, "loss": 0.7983, "step": 1165 }, { "epoch": 0.17391304347826086, "grad_norm": 2.1289737224578857, "learning_rate": 1.8934719551125364e-05, "loss": 0.7526, "step": 1166 }, { "epoch": 0.17406219703184428, "grad_norm": 1.5408941507339478, "learning_rate": 1.8932548485339304e-05, "loss": 0.7156, "step": 1167 }, { "epoch": 0.1742113505854277, "grad_norm": 2.1503355503082275, "learning_rate": 1.893037533419253e-05, "loss": 0.806, "step": 1168 }, { "epoch": 0.1743605041390111, "grad_norm": 2.374664068222046, "learning_rate": 1.8928200098192372e-05, "loss": 0.8002, "step": 1169 }, { "epoch": 0.17450965769259452, "grad_norm": 1.827865481376648, "learning_rate": 1.8926022777846647e-05, "loss": 0.7659, "step": 1170 }, { "epoch": 0.17465881124617794, "grad_norm": 2.8104758262634277, "learning_rate": 1.8923843373663676e-05, "loss": 0.8219, "step": 1171 }, { "epoch": 0.17480796479976135, "grad_norm": 3.241145133972168, "learning_rate": 1.8921661886152248e-05, "loss": 0.8177, "step": 1172 }, { "epoch": 0.17495711835334476, "grad_norm": 3.464935302734375, "learning_rate": 1.8919478315821646e-05, "loss": 0.7382, "step": 1173 }, { "epoch": 0.17510627190692818, "grad_norm": 2.038585901260376, "learning_rate": 1.8917292663181638e-05, "loss": 0.8354, "step": 1174 }, { "epoch": 0.1752554254605116, "grad_norm": 2.371997117996216, "learning_rate": 1.8915104928742484e-05, "loss": 0.7366, "step": 1175 }, { "epoch": 0.175404579014095, "grad_norm": 2.947603940963745, "learning_rate": 1.8912915113014918e-05, "loss": 0.8854, "step": 1176 }, { "epoch": 0.17555373256767842, "grad_norm": 3.317857027053833, "learning_rate": 1.8910723216510168e-05, "loss": 0.8054, "step": 1177 }, { "epoch": 0.17570288612126184, "grad_norm": 2.002261161804199, "learning_rate": 1.8908529239739946e-05, "loss": 0.7681, "step": 1178 }, { "epoch": 0.17585203967484525, "grad_norm": 5.707380771636963, "learning_rate": 1.8906333183216455e-05, "loss": 0.7265, "step": 1179 }, { "epoch": 0.17600119322842867, "grad_norm": 4.471221446990967, "learning_rate": 1.890413504745237e-05, "loss": 0.7746, "step": 1180 }, { "epoch": 0.17615034678201208, "grad_norm": 2.1977787017822266, "learning_rate": 1.890193483296087e-05, "loss": 0.8302, "step": 1181 }, { "epoch": 0.1762995003355955, "grad_norm": 7.853874206542969, "learning_rate": 1.88997325402556e-05, "loss": 0.7648, "step": 1182 }, { "epoch": 0.1764486538891789, "grad_norm": 5.380675792694092, "learning_rate": 1.8897528169850706e-05, "loss": 0.6531, "step": 1183 }, { "epoch": 0.17659780744276232, "grad_norm": 2.5428812503814697, "learning_rate": 1.8895321722260806e-05, "loss": 0.813, "step": 1184 }, { "epoch": 0.17674696099634574, "grad_norm": 1.6823145151138306, "learning_rate": 1.8893113198001015e-05, "loss": 0.7983, "step": 1185 }, { "epoch": 0.17689611454992915, "grad_norm": 2.1610448360443115, "learning_rate": 1.8890902597586926e-05, "loss": 0.8364, "step": 1186 }, { "epoch": 0.17704526810351257, "grad_norm": 2.695946455001831, "learning_rate": 1.8888689921534612e-05, "loss": 0.7994, "step": 1187 }, { "epoch": 0.17719442165709598, "grad_norm": 2.5369434356689453, "learning_rate": 1.8886475170360644e-05, "loss": 0.7957, "step": 1188 }, { "epoch": 0.1773435752106794, "grad_norm": 2.7843668460845947, "learning_rate": 1.888425834458207e-05, "loss": 0.8185, "step": 1189 }, { "epoch": 0.1774927287642628, "grad_norm": 2.2332026958465576, "learning_rate": 1.8882039444716417e-05, "loss": 0.808, "step": 1190 }, { "epoch": 0.17764188231784622, "grad_norm": 1.5778794288635254, "learning_rate": 1.8879818471281703e-05, "loss": 0.7786, "step": 1191 }, { "epoch": 0.17779103587142964, "grad_norm": 3.517587184906006, "learning_rate": 1.8877595424796425e-05, "loss": 0.7376, "step": 1192 }, { "epoch": 0.17794018942501305, "grad_norm": 2.4248600006103516, "learning_rate": 1.887537030577958e-05, "loss": 0.737, "step": 1193 }, { "epoch": 0.17808934297859647, "grad_norm": 2.155339002609253, "learning_rate": 1.887314311475062e-05, "loss": 0.8265, "step": 1194 }, { "epoch": 0.17823849653217988, "grad_norm": 2.565547466278076, "learning_rate": 1.8870913852229513e-05, "loss": 0.6741, "step": 1195 }, { "epoch": 0.1783876500857633, "grad_norm": 1.7778321504592896, "learning_rate": 1.886868251873668e-05, "loss": 0.8402, "step": 1196 }, { "epoch": 0.1785368036393467, "grad_norm": 2.4160661697387695, "learning_rate": 1.886644911479305e-05, "loss": 0.8832, "step": 1197 }, { "epoch": 0.17868595719293012, "grad_norm": 2.0655124187469482, "learning_rate": 1.8864213640920023e-05, "loss": 0.7128, "step": 1198 }, { "epoch": 0.17883511074651354, "grad_norm": 0.7309791445732117, "learning_rate": 1.886197609763948e-05, "loss": 0.2495, "step": 1199 }, { "epoch": 0.17898426430009695, "grad_norm": 2.728119373321533, "learning_rate": 1.88597364854738e-05, "loss": 0.7615, "step": 1200 }, { "epoch": 0.17913341785368037, "grad_norm": 1.8141719102859497, "learning_rate": 1.8857494804945822e-05, "loss": 0.7865, "step": 1201 }, { "epoch": 0.17928257140726378, "grad_norm": 1.811522364616394, "learning_rate": 1.885525105657889e-05, "loss": 0.8454, "step": 1202 }, { "epoch": 0.1794317249608472, "grad_norm": 3.414031744003296, "learning_rate": 1.8853005240896818e-05, "loss": 0.8207, "step": 1203 }, { "epoch": 0.1795808785144306, "grad_norm": 7.862659454345703, "learning_rate": 1.8850757358423907e-05, "loss": 0.7701, "step": 1204 }, { "epoch": 0.17973003206801402, "grad_norm": 0.5278874635696411, "learning_rate": 1.884850740968494e-05, "loss": 0.2144, "step": 1205 }, { "epoch": 0.17987918562159744, "grad_norm": 1.8882378339767456, "learning_rate": 1.884625539520518e-05, "loss": 0.761, "step": 1206 }, { "epoch": 0.18002833917518085, "grad_norm": 3.3967347145080566, "learning_rate": 1.8844001315510375e-05, "loss": 0.8049, "step": 1207 }, { "epoch": 0.18017749272876427, "grad_norm": 2.208657741546631, "learning_rate": 1.8841745171126757e-05, "loss": 0.796, "step": 1208 }, { "epoch": 0.18032664628234768, "grad_norm": 8.503625869750977, "learning_rate": 1.8839486962581035e-05, "loss": 0.7687, "step": 1209 }, { "epoch": 0.1804757998359311, "grad_norm": 3.105268716812134, "learning_rate": 1.88372266904004e-05, "loss": 0.8212, "step": 1210 }, { "epoch": 0.1806249533895145, "grad_norm": 0.599524736404419, "learning_rate": 1.8834964355112532e-05, "loss": 0.2272, "step": 1211 }, { "epoch": 0.18077410694309792, "grad_norm": 4.37261438369751, "learning_rate": 1.8832699957245585e-05, "loss": 0.7236, "step": 1212 }, { "epoch": 0.18092326049668134, "grad_norm": 2.692173719406128, "learning_rate": 1.8830433497328194e-05, "loss": 0.7936, "step": 1213 }, { "epoch": 0.18107241405026475, "grad_norm": 2.3181796073913574, "learning_rate": 1.8828164975889486e-05, "loss": 0.7952, "step": 1214 }, { "epoch": 0.18122156760384817, "grad_norm": 3.2251906394958496, "learning_rate": 1.8825894393459058e-05, "loss": 0.7619, "step": 1215 }, { "epoch": 0.18137072115743158, "grad_norm": 2.34690523147583, "learning_rate": 1.882362175056699e-05, "loss": 0.7384, "step": 1216 }, { "epoch": 0.181519874711015, "grad_norm": 2.9654488563537598, "learning_rate": 1.8821347047743846e-05, "loss": 0.7317, "step": 1217 }, { "epoch": 0.1816690282645984, "grad_norm": 2.5896427631378174, "learning_rate": 1.8819070285520673e-05, "loss": 0.7773, "step": 1218 }, { "epoch": 0.18181818181818182, "grad_norm": 0.570091962814331, "learning_rate": 1.8816791464428993e-05, "loss": 0.2256, "step": 1219 }, { "epoch": 0.18196733537176524, "grad_norm": 2.513148307800293, "learning_rate": 1.8814510585000813e-05, "loss": 0.81, "step": 1220 }, { "epoch": 0.18211648892534865, "grad_norm": 2.3097705841064453, "learning_rate": 1.8812227647768616e-05, "loss": 0.7903, "step": 1221 }, { "epoch": 0.18226564247893207, "grad_norm": 2.3227546215057373, "learning_rate": 1.880994265326537e-05, "loss": 0.7634, "step": 1222 }, { "epoch": 0.18241479603251548, "grad_norm": 2.6859142780303955, "learning_rate": 1.8807655602024523e-05, "loss": 0.8484, "step": 1223 }, { "epoch": 0.1825639495860989, "grad_norm": 2.1044113636016846, "learning_rate": 1.8805366494580002e-05, "loss": 0.7622, "step": 1224 }, { "epoch": 0.1827131031396823, "grad_norm": 4.348609924316406, "learning_rate": 1.880307533146621e-05, "loss": 0.7731, "step": 1225 }, { "epoch": 0.18286225669326572, "grad_norm": 3.1236069202423096, "learning_rate": 1.8800782113218038e-05, "loss": 0.7682, "step": 1226 }, { "epoch": 0.18301141024684914, "grad_norm": 2.6928980350494385, "learning_rate": 1.879848684037085e-05, "loss": 0.8024, "step": 1227 }, { "epoch": 0.18316056380043255, "grad_norm": 2.941531181335449, "learning_rate": 1.8796189513460495e-05, "loss": 0.787, "step": 1228 }, { "epoch": 0.18330971735401597, "grad_norm": 2.1869256496429443, "learning_rate": 1.8793890133023295e-05, "loss": 0.7991, "step": 1229 }, { "epoch": 0.18345887090759938, "grad_norm": 3.6685128211975098, "learning_rate": 1.8791588699596057e-05, "loss": 0.7269, "step": 1230 }, { "epoch": 0.1836080244611828, "grad_norm": 3.852993965148926, "learning_rate": 1.878928521371606e-05, "loss": 0.8374, "step": 1231 }, { "epoch": 0.1837571780147662, "grad_norm": 1.4845640659332275, "learning_rate": 1.878697967592108e-05, "loss": 0.7266, "step": 1232 }, { "epoch": 0.18390633156834962, "grad_norm": 3.3396975994110107, "learning_rate": 1.878467208674935e-05, "loss": 0.7285, "step": 1233 }, { "epoch": 0.18405548512193304, "grad_norm": 1.4489290714263916, "learning_rate": 1.8782362446739594e-05, "loss": 0.9091, "step": 1234 }, { "epoch": 0.18420463867551645, "grad_norm": 2.7267160415649414, "learning_rate": 1.878005075643101e-05, "loss": 0.7409, "step": 1235 }, { "epoch": 0.18435379222909987, "grad_norm": 0.5035221576690674, "learning_rate": 1.877773701636328e-05, "loss": 0.2425, "step": 1236 }, { "epoch": 0.18450294578268328, "grad_norm": 2.164912462234497, "learning_rate": 1.877542122707656e-05, "loss": 0.8142, "step": 1237 }, { "epoch": 0.1846520993362667, "grad_norm": 1.9552243947982788, "learning_rate": 1.8773103389111486e-05, "loss": 0.7411, "step": 1238 }, { "epoch": 0.1848012528898501, "grad_norm": 1.9382476806640625, "learning_rate": 1.8770783503009174e-05, "loss": 0.7196, "step": 1239 }, { "epoch": 0.18495040644343352, "grad_norm": 1.7066532373428345, "learning_rate": 1.8768461569311215e-05, "loss": 0.8362, "step": 1240 }, { "epoch": 0.18509955999701694, "grad_norm": 9.926862716674805, "learning_rate": 1.8766137588559676e-05, "loss": 0.8123, "step": 1241 }, { "epoch": 0.18524871355060035, "grad_norm": 3.0318565368652344, "learning_rate": 1.876381156129711e-05, "loss": 0.7444, "step": 1242 }, { "epoch": 0.18539786710418377, "grad_norm": 2.12044095993042, "learning_rate": 1.876148348806654e-05, "loss": 0.7604, "step": 1243 }, { "epoch": 0.18554702065776718, "grad_norm": 2.305191993713379, "learning_rate": 1.875915336941147e-05, "loss": 0.6893, "step": 1244 }, { "epoch": 0.1856961742113506, "grad_norm": 4.796412467956543, "learning_rate": 1.8756821205875885e-05, "loss": 0.8023, "step": 1245 }, { "epoch": 0.185845327764934, "grad_norm": 2.139021158218384, "learning_rate": 1.875448699800424e-05, "loss": 0.8206, "step": 1246 }, { "epoch": 0.18599448131851742, "grad_norm": 2.455610513687134, "learning_rate": 1.8752150746341468e-05, "loss": 0.78, "step": 1247 }, { "epoch": 0.18614363487210084, "grad_norm": 3.0549702644348145, "learning_rate": 1.874981245143299e-05, "loss": 0.768, "step": 1248 }, { "epoch": 0.18629278842568425, "grad_norm": 2.4199838638305664, "learning_rate": 1.8747472113824687e-05, "loss": 0.7436, "step": 1249 }, { "epoch": 0.18644194197926767, "grad_norm": 2.3902783393859863, "learning_rate": 1.8745129734062934e-05, "loss": 0.9138, "step": 1250 }, { "epoch": 0.18659109553285108, "grad_norm": 2.4581425189971924, "learning_rate": 1.8742785312694564e-05, "loss": 0.7011, "step": 1251 }, { "epoch": 0.1867402490864345, "grad_norm": 1.6702909469604492, "learning_rate": 1.8740438850266907e-05, "loss": 0.8024, "step": 1252 }, { "epoch": 0.1868894026400179, "grad_norm": 2.438748359680176, "learning_rate": 1.873809034732776e-05, "loss": 0.6593, "step": 1253 }, { "epoch": 0.18703855619360132, "grad_norm": 2.20147967338562, "learning_rate": 1.873573980442539e-05, "loss": 0.8389, "step": 1254 }, { "epoch": 0.18718770974718474, "grad_norm": 2.148045063018799, "learning_rate": 1.8733387222108546e-05, "loss": 0.7567, "step": 1255 }, { "epoch": 0.18733686330076815, "grad_norm": 2.4178364276885986, "learning_rate": 1.873103260092646e-05, "loss": 0.835, "step": 1256 }, { "epoch": 0.18748601685435157, "grad_norm": 3.007380723953247, "learning_rate": 1.8728675941428827e-05, "loss": 0.822, "step": 1257 }, { "epoch": 0.18763517040793498, "grad_norm": 2.682657480239868, "learning_rate": 1.872631724416583e-05, "loss": 0.7535, "step": 1258 }, { "epoch": 0.1877843239615184, "grad_norm": 2.1691133975982666, "learning_rate": 1.8723956509688115e-05, "loss": 0.8049, "step": 1259 }, { "epoch": 0.1879334775151018, "grad_norm": 3.3836352825164795, "learning_rate": 1.8721593738546815e-05, "loss": 0.7688, "step": 1260 }, { "epoch": 0.18808263106868522, "grad_norm": 3.2430636882781982, "learning_rate": 1.8719228931293537e-05, "loss": 0.7777, "step": 1261 }, { "epoch": 0.18823178462226864, "grad_norm": 2.1415646076202393, "learning_rate": 1.8716862088480353e-05, "loss": 0.8927, "step": 1262 }, { "epoch": 0.18838093817585205, "grad_norm": 2.5579283237457275, "learning_rate": 1.8714493210659824e-05, "loss": 0.7896, "step": 1263 }, { "epoch": 0.18853009172943547, "grad_norm": 2.933960199356079, "learning_rate": 1.8712122298384977e-05, "loss": 0.8418, "step": 1264 }, { "epoch": 0.18867924528301888, "grad_norm": 2.817183017730713, "learning_rate": 1.8709749352209315e-05, "loss": 0.8282, "step": 1265 }, { "epoch": 0.1888283988366023, "grad_norm": 2.5209524631500244, "learning_rate": 1.8707374372686825e-05, "loss": 0.8717, "step": 1266 }, { "epoch": 0.18897755239018568, "grad_norm": 0.5501419305801392, "learning_rate": 1.870499736037195e-05, "loss": 0.2653, "step": 1267 }, { "epoch": 0.1891267059437691, "grad_norm": 2.34916615486145, "learning_rate": 1.8702618315819628e-05, "loss": 0.8354, "step": 1268 }, { "epoch": 0.1892758594973525, "grad_norm": 3.112663745880127, "learning_rate": 1.8700237239585253e-05, "loss": 0.7054, "step": 1269 }, { "epoch": 0.18942501305093593, "grad_norm": 3.1140267848968506, "learning_rate": 1.8697854132224713e-05, "loss": 0.7941, "step": 1270 }, { "epoch": 0.18957416660451934, "grad_norm": 2.194089651107788, "learning_rate": 1.8695468994294355e-05, "loss": 0.8205, "step": 1271 }, { "epoch": 0.18972332015810275, "grad_norm": 4.2866411209106445, "learning_rate": 1.8693081826351002e-05, "loss": 0.7252, "step": 1272 }, { "epoch": 0.18987247371168617, "grad_norm": 6.028648376464844, "learning_rate": 1.869069262895196e-05, "loss": 0.7297, "step": 1273 }, { "epoch": 0.19002162726526958, "grad_norm": 2.220998525619507, "learning_rate": 1.8688301402654995e-05, "loss": 0.6813, "step": 1274 }, { "epoch": 0.190170780818853, "grad_norm": 2.127930164337158, "learning_rate": 1.8685908148018362e-05, "loss": 0.6978, "step": 1275 }, { "epoch": 0.1903199343724364, "grad_norm": 2.7089428901672363, "learning_rate": 1.868351286560077e-05, "loss": 0.8707, "step": 1276 }, { "epoch": 0.19046908792601983, "grad_norm": 3.2410149574279785, "learning_rate": 1.868111555596143e-05, "loss": 0.7721, "step": 1277 }, { "epoch": 0.19061824147960324, "grad_norm": 3.050668478012085, "learning_rate": 1.8678716219659992e-05, "loss": 0.8167, "step": 1278 }, { "epoch": 0.19076739503318665, "grad_norm": 2.292487621307373, "learning_rate": 1.867631485725661e-05, "loss": 0.8022, "step": 1279 }, { "epoch": 0.19091654858677007, "grad_norm": 2.8475003242492676, "learning_rate": 1.867391146931189e-05, "loss": 0.7463, "step": 1280 }, { "epoch": 0.19106570214035348, "grad_norm": 2.226086378097534, "learning_rate": 1.8671506056386918e-05, "loss": 0.7944, "step": 1281 }, { "epoch": 0.1912148556939369, "grad_norm": 2.4573614597320557, "learning_rate": 1.866909861904326e-05, "loss": 0.877, "step": 1282 }, { "epoch": 0.1913640092475203, "grad_norm": 0.48684942722320557, "learning_rate": 1.8666689157842935e-05, "loss": 0.2332, "step": 1283 }, { "epoch": 0.19151316280110373, "grad_norm": 2.287616491317749, "learning_rate": 1.8664277673348463e-05, "loss": 0.8097, "step": 1284 }, { "epoch": 0.19166231635468714, "grad_norm": 2.1037673950195312, "learning_rate": 1.866186416612281e-05, "loss": 0.7745, "step": 1285 }, { "epoch": 0.19181146990827055, "grad_norm": 2.1988470554351807, "learning_rate": 1.8659448636729426e-05, "loss": 0.7175, "step": 1286 }, { "epoch": 0.19196062346185397, "grad_norm": 3.854637384414673, "learning_rate": 1.865703108573223e-05, "loss": 0.7027, "step": 1287 }, { "epoch": 0.19210977701543738, "grad_norm": 2.173640012741089, "learning_rate": 1.8654611513695622e-05, "loss": 0.836, "step": 1288 }, { "epoch": 0.1922589305690208, "grad_norm": 12.347512245178223, "learning_rate": 1.8652189921184462e-05, "loss": 0.8295, "step": 1289 }, { "epoch": 0.1924080841226042, "grad_norm": 2.5018861293792725, "learning_rate": 1.8649766308764085e-05, "loss": 0.815, "step": 1290 }, { "epoch": 0.19255723767618763, "grad_norm": 1.8205819129943848, "learning_rate": 1.8647340677000302e-05, "loss": 0.6917, "step": 1291 }, { "epoch": 0.19270639122977104, "grad_norm": 0.5233615040779114, "learning_rate": 1.864491302645939e-05, "loss": 0.2284, "step": 1292 }, { "epoch": 0.19285554478335445, "grad_norm": 2.7155182361602783, "learning_rate": 1.8642483357708102e-05, "loss": 0.7597, "step": 1293 }, { "epoch": 0.19300469833693787, "grad_norm": 1.6856476068496704, "learning_rate": 1.8640051671313656e-05, "loss": 0.8479, "step": 1294 }, { "epoch": 0.19315385189052128, "grad_norm": 2.2685580253601074, "learning_rate": 1.8637617967843748e-05, "loss": 0.8567, "step": 1295 }, { "epoch": 0.1933030054441047, "grad_norm": 3.2304561138153076, "learning_rate": 1.8635182247866545e-05, "loss": 0.801, "step": 1296 }, { "epoch": 0.1934521589976881, "grad_norm": 2.8282058238983154, "learning_rate": 1.863274451195067e-05, "loss": 0.7208, "step": 1297 }, { "epoch": 0.19360131255127153, "grad_norm": 6.359404563903809, "learning_rate": 1.8630304760665237e-05, "loss": 0.7715, "step": 1298 }, { "epoch": 0.19375046610485494, "grad_norm": 2.244926691055298, "learning_rate": 1.8627862994579823e-05, "loss": 0.7488, "step": 1299 }, { "epoch": 0.19389961965843835, "grad_norm": 0.521591305732727, "learning_rate": 1.862541921426447e-05, "loss": 0.2637, "step": 1300 }, { "epoch": 0.19404877321202177, "grad_norm": 2.4184963703155518, "learning_rate": 1.8622973420289692e-05, "loss": 0.785, "step": 1301 }, { "epoch": 0.19419792676560518, "grad_norm": 1.9293805360794067, "learning_rate": 1.862052561322648e-05, "loss": 0.8106, "step": 1302 }, { "epoch": 0.1943470803191886, "grad_norm": 4.507973670959473, "learning_rate": 1.8618075793646292e-05, "loss": 0.7638, "step": 1303 }, { "epoch": 0.194496233872772, "grad_norm": 16.102636337280273, "learning_rate": 1.8615623962121043e-05, "loss": 0.8023, "step": 1304 }, { "epoch": 0.19464538742635543, "grad_norm": 2.697291612625122, "learning_rate": 1.861317011922314e-05, "loss": 0.736, "step": 1305 }, { "epoch": 0.19479454097993884, "grad_norm": 2.0359857082366943, "learning_rate": 1.861071426552545e-05, "loss": 0.8137, "step": 1306 }, { "epoch": 0.19494369453352225, "grad_norm": 2.280965805053711, "learning_rate": 1.8608256401601294e-05, "loss": 0.7791, "step": 1307 }, { "epoch": 0.19509284808710567, "grad_norm": 2.183767795562744, "learning_rate": 1.860579652802449e-05, "loss": 0.8596, "step": 1308 }, { "epoch": 0.19524200164068908, "grad_norm": 5.11929988861084, "learning_rate": 1.8603334645369302e-05, "loss": 0.8112, "step": 1309 }, { "epoch": 0.1953911551942725, "grad_norm": 2.681131601333618, "learning_rate": 1.8600870754210477e-05, "loss": 0.7612, "step": 1310 }, { "epoch": 0.1955403087478559, "grad_norm": 2.646711826324463, "learning_rate": 1.859840485512323e-05, "loss": 0.7511, "step": 1311 }, { "epoch": 0.19568946230143933, "grad_norm": 2.0325989723205566, "learning_rate": 1.8595936948683234e-05, "loss": 0.7828, "step": 1312 }, { "epoch": 0.19583861585502274, "grad_norm": 3.478323221206665, "learning_rate": 1.8593467035466635e-05, "loss": 0.82, "step": 1313 }, { "epoch": 0.19598776940860616, "grad_norm": 8.19321346282959, "learning_rate": 1.859099511605006e-05, "loss": 0.7758, "step": 1314 }, { "epoch": 0.19613692296218957, "grad_norm": 3.712094306945801, "learning_rate": 1.8588521191010586e-05, "loss": 0.8148, "step": 1315 }, { "epoch": 0.19628607651577298, "grad_norm": 2.775988817214966, "learning_rate": 1.8586045260925773e-05, "loss": 0.7632, "step": 1316 }, { "epoch": 0.1964352300693564, "grad_norm": 2.5210142135620117, "learning_rate": 1.858356732637364e-05, "loss": 0.7542, "step": 1317 }, { "epoch": 0.1965843836229398, "grad_norm": 2.2147061824798584, "learning_rate": 1.8581087387932676e-05, "loss": 0.729, "step": 1318 }, { "epoch": 0.19673353717652323, "grad_norm": 2.8512001037597656, "learning_rate": 1.8578605446181838e-05, "loss": 0.7111, "step": 1319 }, { "epoch": 0.19688269073010664, "grad_norm": 3.185384750366211, "learning_rate": 1.8576121501700553e-05, "loss": 0.7066, "step": 1320 }, { "epoch": 0.19703184428369006, "grad_norm": 1.902845025062561, "learning_rate": 1.857363555506871e-05, "loss": 0.7185, "step": 1321 }, { "epoch": 0.19718099783727347, "grad_norm": 1.7494860887527466, "learning_rate": 1.8571147606866677e-05, "loss": 0.8795, "step": 1322 }, { "epoch": 0.19733015139085688, "grad_norm": 2.641038656234741, "learning_rate": 1.8568657657675272e-05, "loss": 0.7953, "step": 1323 }, { "epoch": 0.1974793049444403, "grad_norm": 2.9348294734954834, "learning_rate": 1.85661657080758e-05, "loss": 0.839, "step": 1324 }, { "epoch": 0.1976284584980237, "grad_norm": 1.783623456954956, "learning_rate": 1.8563671758650013e-05, "loss": 0.7792, "step": 1325 }, { "epoch": 0.19777761205160713, "grad_norm": 1.9879553318023682, "learning_rate": 1.8561175809980144e-05, "loss": 0.7954, "step": 1326 }, { "epoch": 0.19792676560519054, "grad_norm": 0.5618633031845093, "learning_rate": 1.8558677862648887e-05, "loss": 0.237, "step": 1327 }, { "epoch": 0.19807591915877396, "grad_norm": 3.8013322353363037, "learning_rate": 1.8556177917239406e-05, "loss": 0.7906, "step": 1328 }, { "epoch": 0.19822507271235737, "grad_norm": 3.23519229888916, "learning_rate": 1.8553675974335328e-05, "loss": 0.7809, "step": 1329 }, { "epoch": 0.19837422626594078, "grad_norm": 2.1840686798095703, "learning_rate": 1.8551172034520746e-05, "loss": 0.7911, "step": 1330 }, { "epoch": 0.1985233798195242, "grad_norm": 2.505990982055664, "learning_rate": 1.854866609838022e-05, "loss": 0.7766, "step": 1331 }, { "epoch": 0.1986725333731076, "grad_norm": 0.5355576872825623, "learning_rate": 1.8546158166498783e-05, "loss": 0.2562, "step": 1332 }, { "epoch": 0.19882168692669103, "grad_norm": 1.945932388305664, "learning_rate": 1.854364823946192e-05, "loss": 0.8112, "step": 1333 }, { "epoch": 0.19897084048027444, "grad_norm": 3.0201523303985596, "learning_rate": 1.8541136317855598e-05, "loss": 0.8083, "step": 1334 }, { "epoch": 0.19911999403385786, "grad_norm": 1.9987674951553345, "learning_rate": 1.8538622402266232e-05, "loss": 0.7935, "step": 1335 }, { "epoch": 0.19926914758744127, "grad_norm": 2.1595799922943115, "learning_rate": 1.853610649328072e-05, "loss": 0.7835, "step": 1336 }, { "epoch": 0.19941830114102468, "grad_norm": 2.181894063949585, "learning_rate": 1.853358859148641e-05, "loss": 0.8556, "step": 1337 }, { "epoch": 0.1995674546946081, "grad_norm": 6.425625801086426, "learning_rate": 1.8531068697471125e-05, "loss": 0.7034, "step": 1338 }, { "epoch": 0.1997166082481915, "grad_norm": 2.3842906951904297, "learning_rate": 1.8528546811823156e-05, "loss": 0.6981, "step": 1339 }, { "epoch": 0.19986576180177493, "grad_norm": 1.5852659940719604, "learning_rate": 1.8526022935131244e-05, "loss": 0.8455, "step": 1340 }, { "epoch": 0.20001491535535834, "grad_norm": 2.7132961750030518, "learning_rate": 1.852349706798461e-05, "loss": 0.7767, "step": 1341 }, { "epoch": 0.20016406890894176, "grad_norm": 1.8882261514663696, "learning_rate": 1.8520969210972932e-05, "loss": 0.746, "step": 1342 }, { "epoch": 0.20031322246252517, "grad_norm": 2.537346601486206, "learning_rate": 1.8518439364686358e-05, "loss": 0.8457, "step": 1343 }, { "epoch": 0.20046237601610858, "grad_norm": 1.7646015882492065, "learning_rate": 1.8515907529715492e-05, "loss": 0.7625, "step": 1344 }, { "epoch": 0.200611529569692, "grad_norm": 16.404340744018555, "learning_rate": 1.8513373706651406e-05, "loss": 0.8261, "step": 1345 }, { "epoch": 0.2007606831232754, "grad_norm": 2.017876625061035, "learning_rate": 1.8510837896085642e-05, "loss": 0.7984, "step": 1346 }, { "epoch": 0.20090983667685883, "grad_norm": 2.133983850479126, "learning_rate": 1.85083000986102e-05, "loss": 0.8366, "step": 1347 }, { "epoch": 0.20105899023044224, "grad_norm": 2.6359543800354004, "learning_rate": 1.8505760314817544e-05, "loss": 0.8578, "step": 1348 }, { "epoch": 0.20120814378402566, "grad_norm": 2.4234983921051025, "learning_rate": 1.8503218545300603e-05, "loss": 0.7543, "step": 1349 }, { "epoch": 0.20135729733760907, "grad_norm": 3.0893945693969727, "learning_rate": 1.850067479065277e-05, "loss": 0.8363, "step": 1350 }, { "epoch": 0.20150645089119248, "grad_norm": 2.6012465953826904, "learning_rate": 1.84981290514679e-05, "loss": 0.8197, "step": 1351 }, { "epoch": 0.2016556044447759, "grad_norm": 3.3458969593048096, "learning_rate": 1.8495581328340315e-05, "loss": 0.7768, "step": 1352 }, { "epoch": 0.2018047579983593, "grad_norm": 2.0643694400787354, "learning_rate": 1.8493031621864792e-05, "loss": 0.7795, "step": 1353 }, { "epoch": 0.20195391155194273, "grad_norm": 2.143440008163452, "learning_rate": 1.849047993263658e-05, "loss": 0.8318, "step": 1354 }, { "epoch": 0.20210306510552614, "grad_norm": 1.7199432849884033, "learning_rate": 1.8487926261251386e-05, "loss": 0.8186, "step": 1355 }, { "epoch": 0.20225221865910956, "grad_norm": 4.881162166595459, "learning_rate": 1.8485370608305384e-05, "loss": 0.7182, "step": 1356 }, { "epoch": 0.20240137221269297, "grad_norm": 2.345773696899414, "learning_rate": 1.8482812974395205e-05, "loss": 0.7836, "step": 1357 }, { "epoch": 0.20255052576627638, "grad_norm": 5.080211162567139, "learning_rate": 1.848025336011794e-05, "loss": 0.8221, "step": 1358 }, { "epoch": 0.2026996793198598, "grad_norm": 3.410480499267578, "learning_rate": 1.8477691766071156e-05, "loss": 0.7572, "step": 1359 }, { "epoch": 0.2028488328734432, "grad_norm": 2.500882863998413, "learning_rate": 1.847512819285287e-05, "loss": 0.7762, "step": 1360 }, { "epoch": 0.20299798642702663, "grad_norm": 4.3518967628479, "learning_rate": 1.8472562641061564e-05, "loss": 0.7151, "step": 1361 }, { "epoch": 0.20314713998061004, "grad_norm": 2.70234751701355, "learning_rate": 1.8469995111296183e-05, "loss": 0.7595, "step": 1362 }, { "epoch": 0.20329629353419346, "grad_norm": 1.6629480123519897, "learning_rate": 1.8467425604156133e-05, "loss": 0.8509, "step": 1363 }, { "epoch": 0.20344544708777687, "grad_norm": 2.7182233333587646, "learning_rate": 1.846485412024128e-05, "loss": 0.7534, "step": 1364 }, { "epoch": 0.20359460064136028, "grad_norm": 3.1632251739501953, "learning_rate": 1.8462280660151963e-05, "loss": 0.7506, "step": 1365 }, { "epoch": 0.2037437541949437, "grad_norm": 1.702002763748169, "learning_rate": 1.8459705224488958e-05, "loss": 0.8202, "step": 1366 }, { "epoch": 0.2038929077485271, "grad_norm": 4.204100608825684, "learning_rate": 1.845712781385353e-05, "loss": 0.6734, "step": 1367 }, { "epoch": 0.20404206130211053, "grad_norm": 1.893211841583252, "learning_rate": 1.8454548428847383e-05, "loss": 0.7909, "step": 1368 }, { "epoch": 0.20419121485569394, "grad_norm": 2.822861671447754, "learning_rate": 1.8451967070072693e-05, "loss": 0.7559, "step": 1369 }, { "epoch": 0.20434036840927736, "grad_norm": 2.8182594776153564, "learning_rate": 1.84493837381321e-05, "loss": 0.6888, "step": 1370 }, { "epoch": 0.20448952196286077, "grad_norm": 2.238226890563965, "learning_rate": 1.844679843362869e-05, "loss": 0.6914, "step": 1371 }, { "epoch": 0.20463867551644419, "grad_norm": 4.121176242828369, "learning_rate": 1.844421115716603e-05, "loss": 0.751, "step": 1372 }, { "epoch": 0.2047878290700276, "grad_norm": 1.5484964847564697, "learning_rate": 1.8441621909348132e-05, "loss": 0.8994, "step": 1373 }, { "epoch": 0.204936982623611, "grad_norm": 1.9902374744415283, "learning_rate": 1.8439030690779468e-05, "loss": 0.7894, "step": 1374 }, { "epoch": 0.20508613617719443, "grad_norm": 5.9574360847473145, "learning_rate": 1.8436437502064976e-05, "loss": 0.8141, "step": 1375 }, { "epoch": 0.20523528973077784, "grad_norm": 2.722743034362793, "learning_rate": 1.8433842343810058e-05, "loss": 0.7837, "step": 1376 }, { "epoch": 0.20538444328436126, "grad_norm": 2.735565185546875, "learning_rate": 1.8431245216620562e-05, "loss": 0.8134, "step": 1377 }, { "epoch": 0.20553359683794467, "grad_norm": 2.2164382934570312, "learning_rate": 1.8428646121102815e-05, "loss": 0.7035, "step": 1378 }, { "epoch": 0.20568275039152809, "grad_norm": 2.5971860885620117, "learning_rate": 1.8426045057863585e-05, "loss": 0.7389, "step": 1379 }, { "epoch": 0.2058319039451115, "grad_norm": 2.0451736450195312, "learning_rate": 1.8423442027510104e-05, "loss": 0.7927, "step": 1380 }, { "epoch": 0.2059810574986949, "grad_norm": 1.9204673767089844, "learning_rate": 1.8420837030650073e-05, "loss": 0.7585, "step": 1381 }, { "epoch": 0.20613021105227833, "grad_norm": 2.1308858394622803, "learning_rate": 1.8418230067891644e-05, "loss": 0.7355, "step": 1382 }, { "epoch": 0.20627936460586174, "grad_norm": 2.4867827892303467, "learning_rate": 1.8415621139843426e-05, "loss": 0.7603, "step": 1383 }, { "epoch": 0.20642851815944516, "grad_norm": 3.330124616622925, "learning_rate": 1.8413010247114492e-05, "loss": 0.7983, "step": 1384 }, { "epoch": 0.20657767171302857, "grad_norm": 1.9516277313232422, "learning_rate": 1.841039739031437e-05, "loss": 0.7328, "step": 1385 }, { "epoch": 0.20672682526661199, "grad_norm": 2.6689445972442627, "learning_rate": 1.840778257005305e-05, "loss": 0.7977, "step": 1386 }, { "epoch": 0.2068759788201954, "grad_norm": 2.161238431930542, "learning_rate": 1.8405165786940976e-05, "loss": 0.8476, "step": 1387 }, { "epoch": 0.20702513237377881, "grad_norm": 2.210757255554199, "learning_rate": 1.8402547041589057e-05, "loss": 0.8228, "step": 1388 }, { "epoch": 0.20717428592736223, "grad_norm": 10.995983123779297, "learning_rate": 1.8399926334608654e-05, "loss": 0.7358, "step": 1389 }, { "epoch": 0.20732343948094564, "grad_norm": 3.8095321655273438, "learning_rate": 1.8397303666611588e-05, "loss": 0.7872, "step": 1390 }, { "epoch": 0.20747259303452906, "grad_norm": 2.1636688709259033, "learning_rate": 1.839467903821014e-05, "loss": 0.7394, "step": 1391 }, { "epoch": 0.20762174658811247, "grad_norm": 2.9872779846191406, "learning_rate": 1.8392052450017036e-05, "loss": 0.8185, "step": 1392 }, { "epoch": 0.20777090014169589, "grad_norm": 4.907393932342529, "learning_rate": 1.838942390264548e-05, "loss": 0.7248, "step": 1393 }, { "epoch": 0.2079200536952793, "grad_norm": 1.8688935041427612, "learning_rate": 1.8386793396709123e-05, "loss": 0.8982, "step": 1394 }, { "epoch": 0.20806920724886271, "grad_norm": 4.0188140869140625, "learning_rate": 1.838416093282207e-05, "loss": 0.772, "step": 1395 }, { "epoch": 0.20821836080244613, "grad_norm": 4.6775641441345215, "learning_rate": 1.838152651159889e-05, "loss": 0.7356, "step": 1396 }, { "epoch": 0.20836751435602954, "grad_norm": 1.8780794143676758, "learning_rate": 1.83788901336546e-05, "loss": 0.7624, "step": 1397 }, { "epoch": 0.20851666790961296, "grad_norm": 3.2569432258605957, "learning_rate": 1.8376251799604684e-05, "loss": 0.8782, "step": 1398 }, { "epoch": 0.20866582146319637, "grad_norm": 1.7174274921417236, "learning_rate": 1.8373611510065077e-05, "loss": 0.7477, "step": 1399 }, { "epoch": 0.20881497501677979, "grad_norm": 3.1291582584381104, "learning_rate": 1.837096926565217e-05, "loss": 0.6787, "step": 1400 }, { "epoch": 0.2089641285703632, "grad_norm": 3.0005455017089844, "learning_rate": 1.8368325066982817e-05, "loss": 0.9002, "step": 1401 }, { "epoch": 0.20911328212394661, "grad_norm": 2.0357885360717773, "learning_rate": 1.836567891467431e-05, "loss": 0.7988, "step": 1402 }, { "epoch": 0.20926243567753003, "grad_norm": 4.610387325286865, "learning_rate": 1.8363030809344425e-05, "loss": 0.6914, "step": 1403 }, { "epoch": 0.20941158923111344, "grad_norm": 2.5922117233276367, "learning_rate": 1.8360380751611375e-05, "loss": 0.8071, "step": 1404 }, { "epoch": 0.20956074278469686, "grad_norm": 3.0457558631896973, "learning_rate": 1.8357728742093827e-05, "loss": 0.7346, "step": 1405 }, { "epoch": 0.20970989633828027, "grad_norm": 2.357156753540039, "learning_rate": 1.8355074781410918e-05, "loss": 0.7946, "step": 1406 }, { "epoch": 0.20985904989186369, "grad_norm": 0.6267806887626648, "learning_rate": 1.835241887018223e-05, "loss": 0.2725, "step": 1407 }, { "epoch": 0.2100082034454471, "grad_norm": 3.0003345012664795, "learning_rate": 1.8349761009027794e-05, "loss": 0.7781, "step": 1408 }, { "epoch": 0.21015735699903051, "grad_norm": 2.447859287261963, "learning_rate": 1.8347101198568116e-05, "loss": 0.7532, "step": 1409 }, { "epoch": 0.21030651055261393, "grad_norm": 4.678758144378662, "learning_rate": 1.8344439439424142e-05, "loss": 0.8081, "step": 1410 }, { "epoch": 0.21045566410619734, "grad_norm": 3.128300905227661, "learning_rate": 1.8341775732217275e-05, "loss": 0.7632, "step": 1411 }, { "epoch": 0.21060481765978076, "grad_norm": 11.577482223510742, "learning_rate": 1.8339110077569376e-05, "loss": 0.7935, "step": 1412 }, { "epoch": 0.21075397121336417, "grad_norm": 4.465867519378662, "learning_rate": 1.8336442476102757e-05, "loss": 0.791, "step": 1413 }, { "epoch": 0.2109031247669476, "grad_norm": 0.5044124126434326, "learning_rate": 1.8333772928440187e-05, "loss": 0.2362, "step": 1414 }, { "epoch": 0.211052278320531, "grad_norm": 2.4046833515167236, "learning_rate": 1.8331101435204896e-05, "loss": 0.7995, "step": 1415 }, { "epoch": 0.2112014318741144, "grad_norm": 2.3938441276550293, "learning_rate": 1.832842799702055e-05, "loss": 0.8445, "step": 1416 }, { "epoch": 0.2113505854276978, "grad_norm": 2.4612057209014893, "learning_rate": 1.832575261451129e-05, "loss": 0.8283, "step": 1417 }, { "epoch": 0.21149973898128122, "grad_norm": 3.4735898971557617, "learning_rate": 1.8323075288301693e-05, "loss": 0.7519, "step": 1418 }, { "epoch": 0.21164889253486463, "grad_norm": 3.0097858905792236, "learning_rate": 1.8320396019016805e-05, "loss": 0.7951, "step": 1419 }, { "epoch": 0.21179804608844804, "grad_norm": 2.496720552444458, "learning_rate": 1.8317714807282115e-05, "loss": 0.7595, "step": 1420 }, { "epoch": 0.21194719964203146, "grad_norm": 2.7538328170776367, "learning_rate": 1.831503165372357e-05, "loss": 0.7966, "step": 1421 }, { "epoch": 0.21209635319561487, "grad_norm": 6.351521015167236, "learning_rate": 1.831234655896757e-05, "loss": 0.7763, "step": 1422 }, { "epoch": 0.2122455067491983, "grad_norm": 2.8148488998413086, "learning_rate": 1.8309659523640965e-05, "loss": 0.8022, "step": 1423 }, { "epoch": 0.2123946603027817, "grad_norm": 2.4628257751464844, "learning_rate": 1.8306970548371062e-05, "loss": 0.8294, "step": 1424 }, { "epoch": 0.21254381385636512, "grad_norm": 2.4455623626708984, "learning_rate": 1.830427963378562e-05, "loss": 0.7211, "step": 1425 }, { "epoch": 0.21269296740994853, "grad_norm": 1.8000510931015015, "learning_rate": 1.830158678051285e-05, "loss": 0.818, "step": 1426 }, { "epoch": 0.21284212096353194, "grad_norm": 4.731330871582031, "learning_rate": 1.8298891989181417e-05, "loss": 0.8151, "step": 1427 }, { "epoch": 0.21299127451711536, "grad_norm": 3.428346872329712, "learning_rate": 1.8296195260420438e-05, "loss": 0.7264, "step": 1428 }, { "epoch": 0.21314042807069877, "grad_norm": 2.2754197120666504, "learning_rate": 1.8293496594859478e-05, "loss": 0.901, "step": 1429 }, { "epoch": 0.2132895816242822, "grad_norm": 2.287724018096924, "learning_rate": 1.829079599312856e-05, "loss": 0.7253, "step": 1430 }, { "epoch": 0.2134387351778656, "grad_norm": 3.252840757369995, "learning_rate": 1.828809345585816e-05, "loss": 0.726, "step": 1431 }, { "epoch": 0.21358788873144902, "grad_norm": 1.929551601409912, "learning_rate": 1.8285388983679192e-05, "loss": 0.8072, "step": 1432 }, { "epoch": 0.21373704228503243, "grad_norm": 1.8523309230804443, "learning_rate": 1.8282682577223044e-05, "loss": 0.8162, "step": 1433 }, { "epoch": 0.21388619583861584, "grad_norm": 0.5516501069068909, "learning_rate": 1.8279974237121537e-05, "loss": 0.2408, "step": 1434 }, { "epoch": 0.21403534939219926, "grad_norm": 2.1576297283172607, "learning_rate": 1.8277263964006958e-05, "loss": 0.894, "step": 1435 }, { "epoch": 0.21418450294578267, "grad_norm": 2.662875175476074, "learning_rate": 1.8274551758512026e-05, "loss": 0.8362, "step": 1436 }, { "epoch": 0.2143336564993661, "grad_norm": 1.57633638381958, "learning_rate": 1.8271837621269933e-05, "loss": 0.8124, "step": 1437 }, { "epoch": 0.2144828100529495, "grad_norm": 3.3258633613586426, "learning_rate": 1.8269121552914307e-05, "loss": 0.6554, "step": 1438 }, { "epoch": 0.21463196360653292, "grad_norm": 2.036513566970825, "learning_rate": 1.8266403554079237e-05, "loss": 0.7579, "step": 1439 }, { "epoch": 0.21478111716011633, "grad_norm": 0.5587921142578125, "learning_rate": 1.8263683625399244e-05, "loss": 0.2693, "step": 1440 }, { "epoch": 0.21493027071369974, "grad_norm": 1.6277704238891602, "learning_rate": 1.826096176750933e-05, "loss": 0.8279, "step": 1441 }, { "epoch": 0.21507942426728316, "grad_norm": 1.871964931488037, "learning_rate": 1.8258237981044915e-05, "loss": 0.752, "step": 1442 }, { "epoch": 0.21522857782086657, "grad_norm": 4.57315731048584, "learning_rate": 1.8255512266641894e-05, "loss": 0.7261, "step": 1443 }, { "epoch": 0.21537773137445, "grad_norm": 2.403218984603882, "learning_rate": 1.82527846249366e-05, "loss": 0.7638, "step": 1444 }, { "epoch": 0.2155268849280334, "grad_norm": 2.316667079925537, "learning_rate": 1.825005505656582e-05, "loss": 0.8079, "step": 1445 }, { "epoch": 0.21567603848161682, "grad_norm": 2.337874174118042, "learning_rate": 1.8247323562166785e-05, "loss": 0.8852, "step": 1446 }, { "epoch": 0.21582519203520023, "grad_norm": 2.416189670562744, "learning_rate": 1.8244590142377183e-05, "loss": 0.7126, "step": 1447 }, { "epoch": 0.21597434558878364, "grad_norm": 1.936765193939209, "learning_rate": 1.824185479783515e-05, "loss": 0.7866, "step": 1448 }, { "epoch": 0.21612349914236706, "grad_norm": 2.3109703063964844, "learning_rate": 1.8239117529179263e-05, "loss": 0.7908, "step": 1449 }, { "epoch": 0.21627265269595047, "grad_norm": 1.9724420309066772, "learning_rate": 1.8236378337048562e-05, "loss": 0.7644, "step": 1450 }, { "epoch": 0.2164218062495339, "grad_norm": 2.7447757720947266, "learning_rate": 1.8233637222082524e-05, "loss": 0.7919, "step": 1451 }, { "epoch": 0.2165709598031173, "grad_norm": 2.609410285949707, "learning_rate": 1.823089418492108e-05, "loss": 0.7153, "step": 1452 }, { "epoch": 0.21672011335670072, "grad_norm": 0.6001811623573303, "learning_rate": 1.8228149226204617e-05, "loss": 0.253, "step": 1453 }, { "epoch": 0.21686926691028413, "grad_norm": 2.205852508544922, "learning_rate": 1.8225402346573958e-05, "loss": 0.7377, "step": 1454 }, { "epoch": 0.21701842046386755, "grad_norm": 3.266230583190918, "learning_rate": 1.8222653546670377e-05, "loss": 0.8433, "step": 1455 }, { "epoch": 0.21716757401745096, "grad_norm": 0.49636736512184143, "learning_rate": 1.82199028271356e-05, "loss": 0.2432, "step": 1456 }, { "epoch": 0.21731672757103437, "grad_norm": 2.3896613121032715, "learning_rate": 1.8217150188611807e-05, "loss": 0.8024, "step": 1457 }, { "epoch": 0.2174658811246178, "grad_norm": 2.368680953979492, "learning_rate": 1.821439563174161e-05, "loss": 0.6532, "step": 1458 }, { "epoch": 0.2176150346782012, "grad_norm": 5.542156219482422, "learning_rate": 1.8211639157168082e-05, "loss": 0.7625, "step": 1459 }, { "epoch": 0.21776418823178462, "grad_norm": 1.7552763223648071, "learning_rate": 1.820888076553474e-05, "loss": 0.7869, "step": 1460 }, { "epoch": 0.21791334178536803, "grad_norm": 4.58807897567749, "learning_rate": 1.820612045748555e-05, "loss": 0.8531, "step": 1461 }, { "epoch": 0.21806249533895145, "grad_norm": 2.1785242557525635, "learning_rate": 1.8203358233664915e-05, "loss": 0.7851, "step": 1462 }, { "epoch": 0.21821164889253486, "grad_norm": 4.139426231384277, "learning_rate": 1.8200594094717708e-05, "loss": 0.8024, "step": 1463 }, { "epoch": 0.21836080244611827, "grad_norm": 1.7770295143127441, "learning_rate": 1.819782804128922e-05, "loss": 0.7862, "step": 1464 }, { "epoch": 0.2185099559997017, "grad_norm": 1.829182744026184, "learning_rate": 1.8195060074025216e-05, "loss": 0.7605, "step": 1465 }, { "epoch": 0.2186591095532851, "grad_norm": 2.9206480979919434, "learning_rate": 1.819229019357189e-05, "loss": 0.8224, "step": 1466 }, { "epoch": 0.21880826310686852, "grad_norm": 3.1756017208099365, "learning_rate": 1.8189518400575886e-05, "loss": 0.7099, "step": 1467 }, { "epoch": 0.21895741666045193, "grad_norm": 1.6667484045028687, "learning_rate": 1.81867446956843e-05, "loss": 0.75, "step": 1468 }, { "epoch": 0.21910657021403535, "grad_norm": 2.0332884788513184, "learning_rate": 1.8183969079544677e-05, "loss": 0.7668, "step": 1469 }, { "epoch": 0.21925572376761876, "grad_norm": 2.7457220554351807, "learning_rate": 1.818119155280499e-05, "loss": 0.7955, "step": 1470 }, { "epoch": 0.21940487732120217, "grad_norm": 1.5231565237045288, "learning_rate": 1.817841211611368e-05, "loss": 0.6172, "step": 1471 }, { "epoch": 0.2195540308747856, "grad_norm": 2.2425179481506348, "learning_rate": 1.817563077011962e-05, "loss": 0.7093, "step": 1472 }, { "epoch": 0.219703184428369, "grad_norm": 1.4791128635406494, "learning_rate": 1.8172847515472134e-05, "loss": 0.6807, "step": 1473 }, { "epoch": 0.21985233798195242, "grad_norm": 1.941364049911499, "learning_rate": 1.8170062352820993e-05, "loss": 0.7229, "step": 1474 }, { "epoch": 0.22000149153553583, "grad_norm": 2.7048277854919434, "learning_rate": 1.8167275282816406e-05, "loss": 0.7154, "step": 1475 }, { "epoch": 0.22015064508911925, "grad_norm": 1.8622678518295288, "learning_rate": 1.816448630610904e-05, "loss": 0.7668, "step": 1476 }, { "epoch": 0.22029979864270266, "grad_norm": 1.4647443294525146, "learning_rate": 1.816169542334999e-05, "loss": 0.8215, "step": 1477 }, { "epoch": 0.22044895219628607, "grad_norm": 2.9409217834472656, "learning_rate": 1.8158902635190812e-05, "loss": 0.7111, "step": 1478 }, { "epoch": 0.2205981057498695, "grad_norm": 2.293505907058716, "learning_rate": 1.81561079422835e-05, "loss": 0.8233, "step": 1479 }, { "epoch": 0.2207472593034529, "grad_norm": 0.6202112436294556, "learning_rate": 1.815331134528049e-05, "loss": 0.2452, "step": 1480 }, { "epoch": 0.22089641285703632, "grad_norm": 0.6248717904090881, "learning_rate": 1.8150512844834668e-05, "loss": 0.2513, "step": 1481 }, { "epoch": 0.22104556641061973, "grad_norm": 3.3827691078186035, "learning_rate": 1.814771244159936e-05, "loss": 0.6703, "step": 1482 }, { "epoch": 0.22119471996420315, "grad_norm": 2.719217300415039, "learning_rate": 1.814491013622834e-05, "loss": 0.8392, "step": 1483 }, { "epoch": 0.22134387351778656, "grad_norm": 2.9693589210510254, "learning_rate": 1.8142105929375823e-05, "loss": 0.7878, "step": 1484 }, { "epoch": 0.22149302707136997, "grad_norm": 2.5309550762176514, "learning_rate": 1.813929982169647e-05, "loss": 0.8329, "step": 1485 }, { "epoch": 0.2216421806249534, "grad_norm": 0.5213886499404907, "learning_rate": 1.813649181384538e-05, "loss": 0.2484, "step": 1486 }, { "epoch": 0.2217913341785368, "grad_norm": 2.5900449752807617, "learning_rate": 1.813368190647811e-05, "loss": 0.7687, "step": 1487 }, { "epoch": 0.22194048773212022, "grad_norm": 1.494953989982605, "learning_rate": 1.8130870100250643e-05, "loss": 0.7536, "step": 1488 }, { "epoch": 0.22208964128570363, "grad_norm": 3.5888192653656006, "learning_rate": 1.8128056395819414e-05, "loss": 0.7427, "step": 1489 }, { "epoch": 0.22223879483928705, "grad_norm": 1.6343798637390137, "learning_rate": 1.8125240793841304e-05, "loss": 0.7418, "step": 1490 }, { "epoch": 0.22238794839287046, "grad_norm": 2.9518516063690186, "learning_rate": 1.812242329497363e-05, "loss": 0.7967, "step": 1491 }, { "epoch": 0.22253710194645387, "grad_norm": 3.150777816772461, "learning_rate": 1.8119603899874163e-05, "loss": 0.7695, "step": 1492 }, { "epoch": 0.2226862555000373, "grad_norm": 1.587815523147583, "learning_rate": 1.8116782609201095e-05, "loss": 0.7903, "step": 1493 }, { "epoch": 0.2228354090536207, "grad_norm": 3.2235965728759766, "learning_rate": 1.8113959423613084e-05, "loss": 0.7916, "step": 1494 }, { "epoch": 0.22298456260720412, "grad_norm": 2.400172233581543, "learning_rate": 1.811113434376922e-05, "loss": 0.7003, "step": 1495 }, { "epoch": 0.22313371616078753, "grad_norm": 2.559138536453247, "learning_rate": 1.8108307370329032e-05, "loss": 0.6818, "step": 1496 }, { "epoch": 0.22328286971437095, "grad_norm": 3.1790974140167236, "learning_rate": 1.81054785039525e-05, "loss": 0.7814, "step": 1497 }, { "epoch": 0.22343202326795436, "grad_norm": 3.5307157039642334, "learning_rate": 1.810264774530004e-05, "loss": 0.7275, "step": 1498 }, { "epoch": 0.22358117682153777, "grad_norm": 1.9103853702545166, "learning_rate": 1.8099815095032502e-05, "loss": 0.6333, "step": 1499 }, { "epoch": 0.2237303303751212, "grad_norm": 2.4944586753845215, "learning_rate": 1.80969805538112e-05, "loss": 0.7624, "step": 1500 }, { "epoch": 0.2238794839287046, "grad_norm": 1.8628125190734863, "learning_rate": 1.8094144122297867e-05, "loss": 0.8984, "step": 1501 }, { "epoch": 0.22402863748228802, "grad_norm": 0.6013622879981995, "learning_rate": 1.809130580115469e-05, "loss": 0.2571, "step": 1502 }, { "epoch": 0.22417779103587143, "grad_norm": 1.7349120378494263, "learning_rate": 1.8088465591044292e-05, "loss": 0.7812, "step": 1503 }, { "epoch": 0.22432694458945485, "grad_norm": 2.281501054763794, "learning_rate": 1.808562349262974e-05, "loss": 0.7511, "step": 1504 }, { "epoch": 0.22447609814303826, "grad_norm": 2.009308338165283, "learning_rate": 1.8082779506574534e-05, "loss": 0.7545, "step": 1505 }, { "epoch": 0.22462525169662168, "grad_norm": 2.3461992740631104, "learning_rate": 1.807993363354263e-05, "loss": 0.8198, "step": 1506 }, { "epoch": 0.2247744052502051, "grad_norm": 2.9121954441070557, "learning_rate": 1.8077085874198404e-05, "loss": 0.7408, "step": 1507 }, { "epoch": 0.2249235588037885, "grad_norm": 3.817898988723755, "learning_rate": 1.8074236229206694e-05, "loss": 0.8195, "step": 1508 }, { "epoch": 0.22507271235737192, "grad_norm": 2.8346641063690186, "learning_rate": 1.8071384699232766e-05, "loss": 0.7877, "step": 1509 }, { "epoch": 0.22522186591095533, "grad_norm": 2.4342782497406006, "learning_rate": 1.8068531284942324e-05, "loss": 0.7849, "step": 1510 }, { "epoch": 0.22537101946453875, "grad_norm": 1.808713674545288, "learning_rate": 1.8065675987001517e-05, "loss": 0.7981, "step": 1511 }, { "epoch": 0.22552017301812216, "grad_norm": 1.8652938604354858, "learning_rate": 1.8062818806076934e-05, "loss": 0.8471, "step": 1512 }, { "epoch": 0.22566932657170558, "grad_norm": 1.8711155652999878, "learning_rate": 1.8059959742835604e-05, "loss": 0.7401, "step": 1513 }, { "epoch": 0.225818480125289, "grad_norm": 1.8259947299957275, "learning_rate": 1.8057098797944987e-05, "loss": 0.7977, "step": 1514 }, { "epoch": 0.2259676336788724, "grad_norm": 1.719848871231079, "learning_rate": 1.8054235972072994e-05, "loss": 0.8135, "step": 1515 }, { "epoch": 0.22611678723245582, "grad_norm": 3.401259183883667, "learning_rate": 1.805137126588797e-05, "loss": 0.7997, "step": 1516 }, { "epoch": 0.22626594078603923, "grad_norm": 2.0929317474365234, "learning_rate": 1.8048504680058704e-05, "loss": 0.852, "step": 1517 }, { "epoch": 0.22641509433962265, "grad_norm": 3.393115282058716, "learning_rate": 1.8045636215254407e-05, "loss": 0.7358, "step": 1518 }, { "epoch": 0.22656424789320606, "grad_norm": 2.8949086666107178, "learning_rate": 1.8042765872144747e-05, "loss": 0.7807, "step": 1519 }, { "epoch": 0.22671340144678948, "grad_norm": 2.8511569499969482, "learning_rate": 1.8039893651399823e-05, "loss": 0.7503, "step": 1520 }, { "epoch": 0.2268625550003729, "grad_norm": 1.5912160873413086, "learning_rate": 1.8037019553690176e-05, "loss": 0.8026, "step": 1521 }, { "epoch": 0.2270117085539563, "grad_norm": 1.988398551940918, "learning_rate": 1.803414357968678e-05, "loss": 0.814, "step": 1522 }, { "epoch": 0.22716086210753972, "grad_norm": 2.3347272872924805, "learning_rate": 1.803126573006105e-05, "loss": 0.7634, "step": 1523 }, { "epoch": 0.22731001566112313, "grad_norm": 1.8967167139053345, "learning_rate": 1.8028386005484837e-05, "loss": 0.8637, "step": 1524 }, { "epoch": 0.22745916921470655, "grad_norm": 2.066542863845825, "learning_rate": 1.8025504406630434e-05, "loss": 0.7821, "step": 1525 }, { "epoch": 0.22760832276828996, "grad_norm": 4.07410192489624, "learning_rate": 1.8022620934170568e-05, "loss": 0.7489, "step": 1526 }, { "epoch": 0.22775747632187338, "grad_norm": 2.0620856285095215, "learning_rate": 1.8019735588778404e-05, "loss": 0.7898, "step": 1527 }, { "epoch": 0.2279066298754568, "grad_norm": 1.8376519680023193, "learning_rate": 1.801684837112754e-05, "loss": 0.7866, "step": 1528 }, { "epoch": 0.2280557834290402, "grad_norm": 2.365175247192383, "learning_rate": 1.8013959281892025e-05, "loss": 0.7477, "step": 1529 }, { "epoch": 0.22820493698262362, "grad_norm": 2.710296630859375, "learning_rate": 1.801106832174633e-05, "loss": 0.78, "step": 1530 }, { "epoch": 0.22835409053620703, "grad_norm": 1.7660894393920898, "learning_rate": 1.8008175491365364e-05, "loss": 0.8103, "step": 1531 }, { "epoch": 0.22850324408979045, "grad_norm": 3.015047788619995, "learning_rate": 1.8005280791424483e-05, "loss": 0.7441, "step": 1532 }, { "epoch": 0.22865239764337386, "grad_norm": 1.7819002866744995, "learning_rate": 1.800238422259947e-05, "loss": 0.7998, "step": 1533 }, { "epoch": 0.22880155119695728, "grad_norm": 3.971029043197632, "learning_rate": 1.799948578556655e-05, "loss": 0.7835, "step": 1534 }, { "epoch": 0.2289507047505407, "grad_norm": 2.4040210247039795, "learning_rate": 1.799658548100238e-05, "loss": 0.8629, "step": 1535 }, { "epoch": 0.2290998583041241, "grad_norm": 1.9980107545852661, "learning_rate": 1.799368330958405e-05, "loss": 0.7756, "step": 1536 }, { "epoch": 0.22924901185770752, "grad_norm": 1.9595519304275513, "learning_rate": 1.7990779271989103e-05, "loss": 0.7811, "step": 1537 }, { "epoch": 0.22939816541129093, "grad_norm": 1.825899600982666, "learning_rate": 1.7987873368895494e-05, "loss": 0.8131, "step": 1538 }, { "epoch": 0.22954731896487435, "grad_norm": 1.435301661491394, "learning_rate": 1.798496560098163e-05, "loss": 0.7966, "step": 1539 }, { "epoch": 0.22969647251845776, "grad_norm": 1.6413638591766357, "learning_rate": 1.7982055968926344e-05, "loss": 0.7191, "step": 1540 }, { "epoch": 0.22984562607204118, "grad_norm": 2.927438735961914, "learning_rate": 1.7979144473408912e-05, "loss": 0.7807, "step": 1541 }, { "epoch": 0.2299947796256246, "grad_norm": 2.2583799362182617, "learning_rate": 1.797623111510904e-05, "loss": 0.7488, "step": 1542 }, { "epoch": 0.230143933179208, "grad_norm": 3.0982606410980225, "learning_rate": 1.7973315894706872e-05, "loss": 0.7345, "step": 1543 }, { "epoch": 0.23029308673279142, "grad_norm": 3.444295644760132, "learning_rate": 1.7970398812882982e-05, "loss": 0.6751, "step": 1544 }, { "epoch": 0.23044224028637483, "grad_norm": 2.7344911098480225, "learning_rate": 1.7967479870318384e-05, "loss": 0.7394, "step": 1545 }, { "epoch": 0.23059139383995825, "grad_norm": 1.7980653047561646, "learning_rate": 1.796455906769452e-05, "loss": 0.7295, "step": 1546 }, { "epoch": 0.23074054739354166, "grad_norm": 3.0867881774902344, "learning_rate": 1.7961636405693274e-05, "loss": 0.7116, "step": 1547 }, { "epoch": 0.23088970094712508, "grad_norm": 1.9740636348724365, "learning_rate": 1.795871188499696e-05, "loss": 0.7781, "step": 1548 }, { "epoch": 0.2310388545007085, "grad_norm": 1.7873395681381226, "learning_rate": 1.7955785506288324e-05, "loss": 0.8582, "step": 1549 }, { "epoch": 0.2311880080542919, "grad_norm": 2.060068368911743, "learning_rate": 1.795285727025055e-05, "loss": 0.7544, "step": 1550 }, { "epoch": 0.23133716160787532, "grad_norm": 2.849419116973877, "learning_rate": 1.794992717756725e-05, "loss": 0.7483, "step": 1551 }, { "epoch": 0.23148631516145873, "grad_norm": 1.7809734344482422, "learning_rate": 1.7946995228922474e-05, "loss": 0.7681, "step": 1552 }, { "epoch": 0.23163546871504215, "grad_norm": 1.9457184076309204, "learning_rate": 1.794406142500071e-05, "loss": 0.8077, "step": 1553 }, { "epoch": 0.23178462226862556, "grad_norm": 0.6736181378364563, "learning_rate": 1.7941125766486865e-05, "loss": 0.2683, "step": 1554 }, { "epoch": 0.23193377582220898, "grad_norm": 1.6603072881698608, "learning_rate": 1.7938188254066293e-05, "loss": 0.7693, "step": 1555 }, { "epoch": 0.2320829293757924, "grad_norm": 1.9768664836883545, "learning_rate": 1.793524888842477e-05, "loss": 0.7895, "step": 1556 }, { "epoch": 0.2322320829293758, "grad_norm": 2.0253982543945312, "learning_rate": 1.7932307670248518e-05, "loss": 0.8204, "step": 1557 }, { "epoch": 0.23238123648295922, "grad_norm": 2.4444234371185303, "learning_rate": 1.792936460022417e-05, "loss": 0.7747, "step": 1558 }, { "epoch": 0.23253039003654263, "grad_norm": 2.079991102218628, "learning_rate": 1.7926419679038823e-05, "loss": 0.8212, "step": 1559 }, { "epoch": 0.23267954359012605, "grad_norm": 1.8875548839569092, "learning_rate": 1.7923472907379968e-05, "loss": 0.7695, "step": 1560 }, { "epoch": 0.23282869714370946, "grad_norm": 8.82259750366211, "learning_rate": 1.7920524285935563e-05, "loss": 0.8701, "step": 1561 }, { "epoch": 0.23297785069729288, "grad_norm": 2.9474716186523438, "learning_rate": 1.7917573815393975e-05, "loss": 0.7986, "step": 1562 }, { "epoch": 0.2331270042508763, "grad_norm": 2.08091402053833, "learning_rate": 1.7914621496444015e-05, "loss": 0.819, "step": 1563 }, { "epoch": 0.2332761578044597, "grad_norm": 3.336019277572632, "learning_rate": 1.7911667329774914e-05, "loss": 0.7231, "step": 1564 }, { "epoch": 0.2334253113580431, "grad_norm": 0.5920599699020386, "learning_rate": 1.7908711316076345e-05, "loss": 0.2715, "step": 1565 }, { "epoch": 0.2335744649116265, "grad_norm": 3.1684951782226562, "learning_rate": 1.790575345603841e-05, "loss": 0.7577, "step": 1566 }, { "epoch": 0.23372361846520992, "grad_norm": 2.0586228370666504, "learning_rate": 1.790279375035164e-05, "loss": 0.8927, "step": 1567 }, { "epoch": 0.23387277201879333, "grad_norm": 6.539126396179199, "learning_rate": 1.7899832199706993e-05, "loss": 0.8167, "step": 1568 }, { "epoch": 0.23402192557237675, "grad_norm": 2.291084051132202, "learning_rate": 1.7896868804795863e-05, "loss": 0.8288, "step": 1569 }, { "epoch": 0.23417107912596016, "grad_norm": 3.12186336517334, "learning_rate": 1.789390356631008e-05, "loss": 0.7463, "step": 1570 }, { "epoch": 0.23432023267954358, "grad_norm": 4.511935234069824, "learning_rate": 1.7890936484941894e-05, "loss": 0.7587, "step": 1571 }, { "epoch": 0.234469386233127, "grad_norm": 2.5371053218841553, "learning_rate": 1.7887967561383986e-05, "loss": 0.7101, "step": 1572 }, { "epoch": 0.2346185397867104, "grad_norm": 2.9502644538879395, "learning_rate": 1.7884996796329472e-05, "loss": 0.7172, "step": 1573 }, { "epoch": 0.23476769334029382, "grad_norm": 3.148843765258789, "learning_rate": 1.78820241904719e-05, "loss": 0.7685, "step": 1574 }, { "epoch": 0.23491684689387723, "grad_norm": 1.8030400276184082, "learning_rate": 1.787904974450524e-05, "loss": 0.7571, "step": 1575 }, { "epoch": 0.23506600044746065, "grad_norm": 2.3032987117767334, "learning_rate": 1.7876073459123895e-05, "loss": 0.7667, "step": 1576 }, { "epoch": 0.23521515400104406, "grad_norm": 2.6898951530456543, "learning_rate": 1.78730953350227e-05, "loss": 0.7161, "step": 1577 }, { "epoch": 0.23536430755462748, "grad_norm": 3.7514140605926514, "learning_rate": 1.7870115372896915e-05, "loss": 0.8023, "step": 1578 }, { "epoch": 0.2355134611082109, "grad_norm": 1.95379638671875, "learning_rate": 1.7867133573442234e-05, "loss": 0.8435, "step": 1579 }, { "epoch": 0.2356626146617943, "grad_norm": 2.0307111740112305, "learning_rate": 1.786414993735478e-05, "loss": 0.7907, "step": 1580 }, { "epoch": 0.23581176821537772, "grad_norm": 1.950598120689392, "learning_rate": 1.786116446533109e-05, "loss": 0.7946, "step": 1581 }, { "epoch": 0.23596092176896113, "grad_norm": 1.7453429698944092, "learning_rate": 1.7858177158068154e-05, "loss": 0.7462, "step": 1582 }, { "epoch": 0.23611007532254455, "grad_norm": 2.1548423767089844, "learning_rate": 1.7855188016263377e-05, "loss": 0.7545, "step": 1583 }, { "epoch": 0.23625922887612796, "grad_norm": 1.8120620250701904, "learning_rate": 1.7852197040614583e-05, "loss": 0.788, "step": 1584 }, { "epoch": 0.23640838242971138, "grad_norm": 2.183702230453491, "learning_rate": 1.7849204231820042e-05, "loss": 0.7854, "step": 1585 }, { "epoch": 0.2365575359832948, "grad_norm": 2.224205255508423, "learning_rate": 1.784620959057845e-05, "loss": 0.7976, "step": 1586 }, { "epoch": 0.2367066895368782, "grad_norm": 0.5646920800209045, "learning_rate": 1.7843213117588913e-05, "loss": 0.2444, "step": 1587 }, { "epoch": 0.23685584309046162, "grad_norm": 2.059551954269409, "learning_rate": 1.7840214813550986e-05, "loss": 0.7789, "step": 1588 }, { "epoch": 0.23700499664404504, "grad_norm": 1.4762533903121948, "learning_rate": 1.7837214679164635e-05, "loss": 0.8698, "step": 1589 }, { "epoch": 0.23715415019762845, "grad_norm": 2.5852599143981934, "learning_rate": 1.783421271513027e-05, "loss": 0.7976, "step": 1590 }, { "epoch": 0.23730330375121186, "grad_norm": 3.1803228855133057, "learning_rate": 1.7831208922148708e-05, "loss": 0.7996, "step": 1591 }, { "epoch": 0.23745245730479528, "grad_norm": 3.23883318901062, "learning_rate": 1.7828203300921216e-05, "loss": 0.8154, "step": 1592 }, { "epoch": 0.2376016108583787, "grad_norm": 1.7009986639022827, "learning_rate": 1.7825195852149463e-05, "loss": 0.8056, "step": 1593 }, { "epoch": 0.2377507644119621, "grad_norm": 2.20283842086792, "learning_rate": 1.7822186576535566e-05, "loss": 0.8071, "step": 1594 }, { "epoch": 0.23789991796554552, "grad_norm": 2.026171922683716, "learning_rate": 1.781917547478205e-05, "loss": 0.7703, "step": 1595 }, { "epoch": 0.23804907151912894, "grad_norm": 2.4565377235412598, "learning_rate": 1.781616254759189e-05, "loss": 0.8627, "step": 1596 }, { "epoch": 0.23819822507271235, "grad_norm": 2.8021018505096436, "learning_rate": 1.7813147795668465e-05, "loss": 0.7102, "step": 1597 }, { "epoch": 0.23834737862629576, "grad_norm": 2.470834493637085, "learning_rate": 1.7810131219715585e-05, "loss": 0.8291, "step": 1598 }, { "epoch": 0.23849653217987918, "grad_norm": 2.543593168258667, "learning_rate": 1.7807112820437496e-05, "loss": 0.6937, "step": 1599 }, { "epoch": 0.2386456857334626, "grad_norm": 2.6252567768096924, "learning_rate": 1.7804092598538857e-05, "loss": 0.7828, "step": 1600 }, { "epoch": 0.238794839287046, "grad_norm": 2.038630485534668, "learning_rate": 1.7801070554724763e-05, "loss": 0.7912, "step": 1601 }, { "epoch": 0.23894399284062942, "grad_norm": 1.6230192184448242, "learning_rate": 1.7798046689700728e-05, "loss": 0.8248, "step": 1602 }, { "epoch": 0.23909314639421284, "grad_norm": 1.695982575416565, "learning_rate": 1.779502100417269e-05, "loss": 0.7914, "step": 1603 }, { "epoch": 0.23924229994779625, "grad_norm": 2.3373208045959473, "learning_rate": 1.7791993498847016e-05, "loss": 0.7432, "step": 1604 }, { "epoch": 0.23939145350137966, "grad_norm": 1.712449550628662, "learning_rate": 1.77889641744305e-05, "loss": 0.8509, "step": 1605 }, { "epoch": 0.23954060705496308, "grad_norm": 1.8426076173782349, "learning_rate": 1.778593303163035e-05, "loss": 0.8473, "step": 1606 }, { "epoch": 0.2396897606085465, "grad_norm": 1.8452553749084473, "learning_rate": 1.7782900071154215e-05, "loss": 0.8103, "step": 1607 }, { "epoch": 0.2398389141621299, "grad_norm": 2.1148324012756348, "learning_rate": 1.777986529371015e-05, "loss": 0.7824, "step": 1608 }, { "epoch": 0.23998806771571332, "grad_norm": 3.145231246948242, "learning_rate": 1.777682870000665e-05, "loss": 0.7618, "step": 1609 }, { "epoch": 0.24013722126929674, "grad_norm": 2.4364542961120605, "learning_rate": 1.7773790290752626e-05, "loss": 0.8123, "step": 1610 }, { "epoch": 0.24028637482288015, "grad_norm": 4.1387224197387695, "learning_rate": 1.777075006665741e-05, "loss": 0.7421, "step": 1611 }, { "epoch": 0.24043552837646356, "grad_norm": 2.542436361312866, "learning_rate": 1.7767708028430767e-05, "loss": 0.7515, "step": 1612 }, { "epoch": 0.24058468193004698, "grad_norm": 2.296804904937744, "learning_rate": 1.7764664176782872e-05, "loss": 0.7454, "step": 1613 }, { "epoch": 0.2407338354836304, "grad_norm": 2.1210334300994873, "learning_rate": 1.7761618512424347e-05, "loss": 0.6355, "step": 1614 }, { "epoch": 0.2408829890372138, "grad_norm": 4.194976806640625, "learning_rate": 1.7758571036066206e-05, "loss": 0.776, "step": 1615 }, { "epoch": 0.24103214259079722, "grad_norm": 1.5744552612304688, "learning_rate": 1.7755521748419912e-05, "loss": 0.7471, "step": 1616 }, { "epoch": 0.24118129614438064, "grad_norm": 2.116877555847168, "learning_rate": 1.775247065019733e-05, "loss": 0.6886, "step": 1617 }, { "epoch": 0.24133044969796405, "grad_norm": 5.068346977233887, "learning_rate": 1.7749417742110772e-05, "loss": 0.7988, "step": 1618 }, { "epoch": 0.24147960325154746, "grad_norm": 1.9486098289489746, "learning_rate": 1.774636302487295e-05, "loss": 0.7301, "step": 1619 }, { "epoch": 0.24162875680513088, "grad_norm": 4.763795852661133, "learning_rate": 1.7743306499197014e-05, "loss": 0.7094, "step": 1620 }, { "epoch": 0.2417779103587143, "grad_norm": 4.208978652954102, "learning_rate": 1.774024816579652e-05, "loss": 0.7667, "step": 1621 }, { "epoch": 0.2419270639122977, "grad_norm": 2.8769335746765137, "learning_rate": 1.7737188025385466e-05, "loss": 0.7665, "step": 1622 }, { "epoch": 0.24207621746588112, "grad_norm": 2.202235698699951, "learning_rate": 1.7734126078678252e-05, "loss": 0.774, "step": 1623 }, { "epoch": 0.24222537101946454, "grad_norm": 2.31099796295166, "learning_rate": 1.7731062326389716e-05, "loss": 0.7828, "step": 1624 }, { "epoch": 0.24237452457304795, "grad_norm": 0.6338681578636169, "learning_rate": 1.772799676923511e-05, "loss": 0.2991, "step": 1625 }, { "epoch": 0.24252367812663136, "grad_norm": 3.5031304359436035, "learning_rate": 1.77249294079301e-05, "loss": 0.815, "step": 1626 }, { "epoch": 0.24267283168021478, "grad_norm": 2.8648624420166016, "learning_rate": 1.772186024319079e-05, "loss": 0.7134, "step": 1627 }, { "epoch": 0.2428219852337982, "grad_norm": 2.4356110095977783, "learning_rate": 1.7718789275733694e-05, "loss": 0.7527, "step": 1628 }, { "epoch": 0.2429711387873816, "grad_norm": 2.7814464569091797, "learning_rate": 1.7715716506275747e-05, "loss": 0.7133, "step": 1629 }, { "epoch": 0.24312029234096502, "grad_norm": 2.9686379432678223, "learning_rate": 1.771264193553431e-05, "loss": 0.7396, "step": 1630 }, { "epoch": 0.24326944589454844, "grad_norm": 2.518496513366699, "learning_rate": 1.770956556422716e-05, "loss": 0.7834, "step": 1631 }, { "epoch": 0.24341859944813185, "grad_norm": 3.4250900745391846, "learning_rate": 1.7706487393072492e-05, "loss": 0.6561, "step": 1632 }, { "epoch": 0.24356775300171526, "grad_norm": 2.253861665725708, "learning_rate": 1.7703407422788933e-05, "loss": 0.8002, "step": 1633 }, { "epoch": 0.24371690655529868, "grad_norm": 3.775139093399048, "learning_rate": 1.770032565409551e-05, "loss": 0.7637, "step": 1634 }, { "epoch": 0.2438660601088821, "grad_norm": 1.7949278354644775, "learning_rate": 1.769724208771169e-05, "loss": 0.802, "step": 1635 }, { "epoch": 0.2440152136624655, "grad_norm": 2.677255392074585, "learning_rate": 1.7694156724357352e-05, "loss": 0.7356, "step": 1636 }, { "epoch": 0.24416436721604892, "grad_norm": 4.365253448486328, "learning_rate": 1.7691069564752793e-05, "loss": 0.7866, "step": 1637 }, { "epoch": 0.24431352076963234, "grad_norm": 1.8698647022247314, "learning_rate": 1.7687980609618726e-05, "loss": 0.828, "step": 1638 }, { "epoch": 0.24446267432321575, "grad_norm": 1.795937418937683, "learning_rate": 1.768488985967629e-05, "loss": 0.7938, "step": 1639 }, { "epoch": 0.24461182787679916, "grad_norm": 2.3935959339141846, "learning_rate": 1.768179731564704e-05, "loss": 0.7369, "step": 1640 }, { "epoch": 0.24476098143038258, "grad_norm": 2.2403643131256104, "learning_rate": 1.767870297825295e-05, "loss": 0.7836, "step": 1641 }, { "epoch": 0.244910134983966, "grad_norm": 3.9523849487304688, "learning_rate": 1.767560684821642e-05, "loss": 0.7797, "step": 1642 }, { "epoch": 0.2450592885375494, "grad_norm": 1.8309128284454346, "learning_rate": 1.7672508926260244e-05, "loss": 0.7788, "step": 1643 }, { "epoch": 0.24520844209113282, "grad_norm": 1.6446045637130737, "learning_rate": 1.7669409213107674e-05, "loss": 0.8558, "step": 1644 }, { "epoch": 0.24535759564471624, "grad_norm": 2.182175397872925, "learning_rate": 1.766630770948234e-05, "loss": 0.7582, "step": 1645 }, { "epoch": 0.24550674919829965, "grad_norm": 2.347594738006592, "learning_rate": 1.7663204416108315e-05, "loss": 0.8495, "step": 1646 }, { "epoch": 0.24565590275188307, "grad_norm": 2.3581418991088867, "learning_rate": 1.7660099333710084e-05, "loss": 0.7117, "step": 1647 }, { "epoch": 0.24580505630546648, "grad_norm": 2.0282037258148193, "learning_rate": 1.7656992463012548e-05, "loss": 0.7634, "step": 1648 }, { "epoch": 0.2459542098590499, "grad_norm": 2.8264238834381104, "learning_rate": 1.765388380474102e-05, "loss": 0.7704, "step": 1649 }, { "epoch": 0.2461033634126333, "grad_norm": 1.8581401109695435, "learning_rate": 1.765077335962124e-05, "loss": 0.6853, "step": 1650 }, { "epoch": 0.24625251696621672, "grad_norm": 3.132004976272583, "learning_rate": 1.7647661128379373e-05, "loss": 0.7259, "step": 1651 }, { "epoch": 0.24640167051980014, "grad_norm": 2.7868528366088867, "learning_rate": 1.7644547111741968e-05, "loss": 0.6833, "step": 1652 }, { "epoch": 0.24655082407338355, "grad_norm": 4.800034523010254, "learning_rate": 1.7641431310436025e-05, "loss": 0.7085, "step": 1653 }, { "epoch": 0.24669997762696697, "grad_norm": 1.366904377937317, "learning_rate": 1.7638313725188948e-05, "loss": 0.8335, "step": 1654 }, { "epoch": 0.24684913118055038, "grad_norm": 2.1288113594055176, "learning_rate": 1.7635194356728553e-05, "loss": 0.884, "step": 1655 }, { "epoch": 0.2469982847341338, "grad_norm": 13.939248085021973, "learning_rate": 1.7632073205783076e-05, "loss": 0.7744, "step": 1656 }, { "epoch": 0.2471474382877172, "grad_norm": 0.6238555312156677, "learning_rate": 1.7628950273081176e-05, "loss": 0.2276, "step": 1657 }, { "epoch": 0.24729659184130062, "grad_norm": 1.839195966720581, "learning_rate": 1.7625825559351917e-05, "loss": 0.8176, "step": 1658 }, { "epoch": 0.24744574539488404, "grad_norm": 2.3725109100341797, "learning_rate": 1.762269906532478e-05, "loss": 0.8144, "step": 1659 }, { "epoch": 0.24759489894846745, "grad_norm": 1.9466501474380493, "learning_rate": 1.7619570791729676e-05, "loss": 0.7563, "step": 1660 }, { "epoch": 0.24774405250205087, "grad_norm": 5.407500743865967, "learning_rate": 1.7616440739296908e-05, "loss": 0.6614, "step": 1661 }, { "epoch": 0.24789320605563428, "grad_norm": 1.857494592666626, "learning_rate": 1.7613308908757215e-05, "loss": 0.7726, "step": 1662 }, { "epoch": 0.2480423596092177, "grad_norm": 2.079479694366455, "learning_rate": 1.761017530084174e-05, "loss": 0.7905, "step": 1663 }, { "epoch": 0.2481915131628011, "grad_norm": 2.2664334774017334, "learning_rate": 1.7607039916282044e-05, "loss": 0.7365, "step": 1664 }, { "epoch": 0.24834066671638452, "grad_norm": 2.223867893218994, "learning_rate": 1.7603902755810102e-05, "loss": 0.7458, "step": 1665 }, { "epoch": 0.24848982026996794, "grad_norm": 2.9154186248779297, "learning_rate": 1.7600763820158308e-05, "loss": 0.6635, "step": 1666 }, { "epoch": 0.24863897382355135, "grad_norm": 2.567328691482544, "learning_rate": 1.7597623110059462e-05, "loss": 0.7004, "step": 1667 }, { "epoch": 0.24878812737713477, "grad_norm": 2.1876261234283447, "learning_rate": 1.7594480626246784e-05, "loss": 0.7739, "step": 1668 }, { "epoch": 0.24893728093071818, "grad_norm": 0.6046738624572754, "learning_rate": 1.759133636945391e-05, "loss": 0.2698, "step": 1669 }, { "epoch": 0.2490864344843016, "grad_norm": 2.235708475112915, "learning_rate": 1.7588190340414882e-05, "loss": 0.7779, "step": 1670 }, { "epoch": 0.249235588037885, "grad_norm": 2.0190610885620117, "learning_rate": 1.7585042539864164e-05, "loss": 0.7848, "step": 1671 }, { "epoch": 0.24938474159146842, "grad_norm": 2.0433461666107178, "learning_rate": 1.758189296853663e-05, "loss": 0.8387, "step": 1672 }, { "epoch": 0.24953389514505184, "grad_norm": 0.5278853178024292, "learning_rate": 1.757874162716757e-05, "loss": 0.2265, "step": 1673 }, { "epoch": 0.24968304869863525, "grad_norm": 4.099000930786133, "learning_rate": 1.7575588516492677e-05, "loss": 0.7712, "step": 1674 }, { "epoch": 0.24983220225221867, "grad_norm": 2.887049436569214, "learning_rate": 1.757243363724807e-05, "loss": 0.79, "step": 1675 }, { "epoch": 0.24998135580580208, "grad_norm": 1.9194786548614502, "learning_rate": 1.7569276990170276e-05, "loss": 0.842, "step": 1676 }, { "epoch": 0.25013050935938547, "grad_norm": 0.5682175159454346, "learning_rate": 1.7566118575996238e-05, "loss": 0.2321, "step": 1677 }, { "epoch": 0.2502796629129689, "grad_norm": 2.7948997020721436, "learning_rate": 1.75629583954633e-05, "loss": 0.737, "step": 1678 }, { "epoch": 0.2504288164665523, "grad_norm": 1.7758601903915405, "learning_rate": 1.7559796449309233e-05, "loss": 0.7353, "step": 1679 }, { "epoch": 0.2505779700201357, "grad_norm": 4.338418960571289, "learning_rate": 1.755663273827221e-05, "loss": 0.7872, "step": 1680 }, { "epoch": 0.2507271235737191, "grad_norm": 1.902687430381775, "learning_rate": 1.7553467263090822e-05, "loss": 0.741, "step": 1681 }, { "epoch": 0.25087627712730254, "grad_norm": 2.488394021987915, "learning_rate": 1.7550300024504067e-05, "loss": 0.82, "step": 1682 }, { "epoch": 0.25102543068088595, "grad_norm": 2.4534804821014404, "learning_rate": 1.754713102325136e-05, "loss": 0.7621, "step": 1683 }, { "epoch": 0.25117458423446937, "grad_norm": 2.316082000732422, "learning_rate": 1.7543960260072522e-05, "loss": 0.8589, "step": 1684 }, { "epoch": 0.2513237377880528, "grad_norm": 2.4853525161743164, "learning_rate": 1.754078773570779e-05, "loss": 0.7468, "step": 1685 }, { "epoch": 0.2514728913416362, "grad_norm": 2.0736215114593506, "learning_rate": 1.753761345089781e-05, "loss": 0.6589, "step": 1686 }, { "epoch": 0.2516220448952196, "grad_norm": 0.5948911309242249, "learning_rate": 1.7534437406383637e-05, "loss": 0.2576, "step": 1687 }, { "epoch": 0.251771198448803, "grad_norm": 1.5657265186309814, "learning_rate": 1.753125960290674e-05, "loss": 0.7926, "step": 1688 }, { "epoch": 0.25192035200238644, "grad_norm": 2.318450927734375, "learning_rate": 1.7528080041209e-05, "loss": 0.7808, "step": 1689 }, { "epoch": 0.25206950555596985, "grad_norm": 2.2615532875061035, "learning_rate": 1.7524898722032704e-05, "loss": 0.8219, "step": 1690 }, { "epoch": 0.25221865910955327, "grad_norm": 2.7184062004089355, "learning_rate": 1.7521715646120547e-05, "loss": 0.796, "step": 1691 }, { "epoch": 0.2523678126631367, "grad_norm": 2.673820734024048, "learning_rate": 1.751853081421565e-05, "loss": 0.7655, "step": 1692 }, { "epoch": 0.2525169662167201, "grad_norm": 0.5517602562904358, "learning_rate": 1.751534422706152e-05, "loss": 0.2442, "step": 1693 }, { "epoch": 0.2526661197703035, "grad_norm": 5.168885707855225, "learning_rate": 1.7512155885402095e-05, "loss": 0.7153, "step": 1694 }, { "epoch": 0.2528152733238869, "grad_norm": 1.4675525426864624, "learning_rate": 1.7508965789981706e-05, "loss": 0.7931, "step": 1695 }, { "epoch": 0.25296442687747034, "grad_norm": 2.3724138736724854, "learning_rate": 1.7505773941545108e-05, "loss": 0.7347, "step": 1696 }, { "epoch": 0.25311358043105375, "grad_norm": 2.4605307579040527, "learning_rate": 1.7502580340837455e-05, "loss": 0.7487, "step": 1697 }, { "epoch": 0.25326273398463717, "grad_norm": 1.7040411233901978, "learning_rate": 1.7499384988604316e-05, "loss": 0.7996, "step": 1698 }, { "epoch": 0.2534118875382206, "grad_norm": 2.93603253364563, "learning_rate": 1.7496187885591664e-05, "loss": 0.7925, "step": 1699 }, { "epoch": 0.253561041091804, "grad_norm": 1.983357310295105, "learning_rate": 1.7492989032545886e-05, "loss": 0.8168, "step": 1700 }, { "epoch": 0.2537101946453874, "grad_norm": 3.4209299087524414, "learning_rate": 1.7489788430213774e-05, "loss": 0.6878, "step": 1701 }, { "epoch": 0.2538593481989708, "grad_norm": 2.1879260540008545, "learning_rate": 1.7486586079342523e-05, "loss": 0.7724, "step": 1702 }, { "epoch": 0.25400850175255424, "grad_norm": 2.0597612857818604, "learning_rate": 1.748338198067975e-05, "loss": 0.6833, "step": 1703 }, { "epoch": 0.25415765530613765, "grad_norm": 2.1641485691070557, "learning_rate": 1.7480176134973474e-05, "loss": 0.746, "step": 1704 }, { "epoch": 0.25430680885972107, "grad_norm": 4.218671798706055, "learning_rate": 1.7476968542972112e-05, "loss": 0.7431, "step": 1705 }, { "epoch": 0.2544559624133045, "grad_norm": 1.9187580347061157, "learning_rate": 1.74737592054245e-05, "loss": 0.7581, "step": 1706 }, { "epoch": 0.2546051159668879, "grad_norm": 3.105710029602051, "learning_rate": 1.7470548123079884e-05, "loss": 0.6468, "step": 1707 }, { "epoch": 0.2547542695204713, "grad_norm": 1.5544617176055908, "learning_rate": 1.7467335296687903e-05, "loss": 0.7796, "step": 1708 }, { "epoch": 0.2549034230740547, "grad_norm": 1.7798353433609009, "learning_rate": 1.7464120726998616e-05, "loss": 0.8274, "step": 1709 }, { "epoch": 0.25505257662763814, "grad_norm": 1.5890856981277466, "learning_rate": 1.7460904414762488e-05, "loss": 0.8397, "step": 1710 }, { "epoch": 0.25520173018122155, "grad_norm": 2.696477174758911, "learning_rate": 1.7457686360730382e-05, "loss": 0.6912, "step": 1711 }, { "epoch": 0.25535088373480497, "grad_norm": 1.4761089086532593, "learning_rate": 1.745446656565358e-05, "loss": 0.8248, "step": 1712 }, { "epoch": 0.2555000372883884, "grad_norm": 2.1109461784362793, "learning_rate": 1.7451245030283755e-05, "loss": 0.8312, "step": 1713 }, { "epoch": 0.2556491908419718, "grad_norm": 1.7588642835617065, "learning_rate": 1.7448021755373005e-05, "loss": 0.7188, "step": 1714 }, { "epoch": 0.2557983443955552, "grad_norm": 4.67503547668457, "learning_rate": 1.7444796741673814e-05, "loss": 0.8023, "step": 1715 }, { "epoch": 0.2559474979491386, "grad_norm": 0.6421114802360535, "learning_rate": 1.7441569989939092e-05, "loss": 0.2553, "step": 1716 }, { "epoch": 0.25609665150272204, "grad_norm": 2.9050676822662354, "learning_rate": 1.7438341500922137e-05, "loss": 0.7744, "step": 1717 }, { "epoch": 0.25624580505630545, "grad_norm": 3.499586582183838, "learning_rate": 1.7435111275376668e-05, "loss": 0.7444, "step": 1718 }, { "epoch": 0.25639495860988887, "grad_norm": 1.9730546474456787, "learning_rate": 1.7431879314056792e-05, "loss": 0.7317, "step": 1719 }, { "epoch": 0.2565441121634723, "grad_norm": 2.0030641555786133, "learning_rate": 1.742864561771704e-05, "loss": 0.7916, "step": 1720 }, { "epoch": 0.2566932657170557, "grad_norm": 1.8007392883300781, "learning_rate": 1.7425410187112334e-05, "loss": 0.7794, "step": 1721 }, { "epoch": 0.2568424192706391, "grad_norm": 2.54343843460083, "learning_rate": 1.742217302299801e-05, "loss": 0.8474, "step": 1722 }, { "epoch": 0.2569915728242225, "grad_norm": 2.2085964679718018, "learning_rate": 1.74189341261298e-05, "loss": 0.7914, "step": 1723 }, { "epoch": 0.25714072637780594, "grad_norm": 3.4898383617401123, "learning_rate": 1.741569349726385e-05, "loss": 0.7367, "step": 1724 }, { "epoch": 0.25728987993138935, "grad_norm": 2.1834287643432617, "learning_rate": 1.74124511371567e-05, "loss": 0.7288, "step": 1725 }, { "epoch": 0.25743903348497277, "grad_norm": 3.856422185897827, "learning_rate": 1.7409207046565306e-05, "loss": 0.7775, "step": 1726 }, { "epoch": 0.2575881870385562, "grad_norm": 1.7609199285507202, "learning_rate": 1.7405961226247022e-05, "loss": 0.715, "step": 1727 }, { "epoch": 0.2577373405921396, "grad_norm": 1.5041253566741943, "learning_rate": 1.7402713676959598e-05, "loss": 0.7114, "step": 1728 }, { "epoch": 0.257886494145723, "grad_norm": 1.6901838779449463, "learning_rate": 1.73994643994612e-05, "loss": 0.7025, "step": 1729 }, { "epoch": 0.2580356476993064, "grad_norm": 5.427707195281982, "learning_rate": 1.7396213394510393e-05, "loss": 0.8175, "step": 1730 }, { "epoch": 0.25818480125288984, "grad_norm": 3.4788715839385986, "learning_rate": 1.7392960662866143e-05, "loss": 0.7832, "step": 1731 }, { "epoch": 0.25833395480647325, "grad_norm": 0.6845600008964539, "learning_rate": 1.7389706205287824e-05, "loss": 0.2743, "step": 1732 }, { "epoch": 0.25848310836005667, "grad_norm": 4.902288913726807, "learning_rate": 1.7386450022535207e-05, "loss": 0.6981, "step": 1733 }, { "epoch": 0.2586322619136401, "grad_norm": 1.8391146659851074, "learning_rate": 1.738319211536847e-05, "loss": 0.7809, "step": 1734 }, { "epoch": 0.2587814154672235, "grad_norm": 2.2746105194091797, "learning_rate": 1.7379932484548193e-05, "loss": 0.7536, "step": 1735 }, { "epoch": 0.2589305690208069, "grad_norm": 1.368125081062317, "learning_rate": 1.7376671130835362e-05, "loss": 0.7844, "step": 1736 }, { "epoch": 0.2590797225743903, "grad_norm": 1.9754377603530884, "learning_rate": 1.7373408054991348e-05, "loss": 0.7092, "step": 1737 }, { "epoch": 0.25922887612797374, "grad_norm": 2.225799322128296, "learning_rate": 1.737014325777795e-05, "loss": 0.7513, "step": 1738 }, { "epoch": 0.25937802968155715, "grad_norm": 2.669060230255127, "learning_rate": 1.7366876739957346e-05, "loss": 0.8191, "step": 1739 }, { "epoch": 0.25952718323514057, "grad_norm": 2.8020339012145996, "learning_rate": 1.7363608502292136e-05, "loss": 0.709, "step": 1740 }, { "epoch": 0.259676336788724, "grad_norm": 1.3752715587615967, "learning_rate": 1.7360338545545303e-05, "loss": 0.7641, "step": 1741 }, { "epoch": 0.2598254903423074, "grad_norm": 1.5724873542785645, "learning_rate": 1.735706687048024e-05, "loss": 0.7549, "step": 1742 }, { "epoch": 0.2599746438958908, "grad_norm": 1.7470000982284546, "learning_rate": 1.7353793477860746e-05, "loss": 0.8742, "step": 1743 }, { "epoch": 0.2601237974494742, "grad_norm": 0.693554699420929, "learning_rate": 1.735051836845101e-05, "loss": 0.232, "step": 1744 }, { "epoch": 0.26027295100305764, "grad_norm": 1.6864017248153687, "learning_rate": 1.734724154301563e-05, "loss": 0.8342, "step": 1745 }, { "epoch": 0.26042210455664105, "grad_norm": 1.5526691675186157, "learning_rate": 1.7343963002319597e-05, "loss": 0.7573, "step": 1746 }, { "epoch": 0.26057125811022447, "grad_norm": 1.7017691135406494, "learning_rate": 1.7340682747128314e-05, "loss": 0.7704, "step": 1747 }, { "epoch": 0.2607204116638079, "grad_norm": 2.2863268852233887, "learning_rate": 1.7337400778207578e-05, "loss": 0.7328, "step": 1748 }, { "epoch": 0.2608695652173913, "grad_norm": 1.758424997329712, "learning_rate": 1.7334117096323578e-05, "loss": 0.7855, "step": 1749 }, { "epoch": 0.2610187187709747, "grad_norm": 1.6003097295761108, "learning_rate": 1.733083170224292e-05, "loss": 0.8006, "step": 1750 }, { "epoch": 0.2611678723245581, "grad_norm": 3.273928642272949, "learning_rate": 1.732754459673259e-05, "loss": 0.8456, "step": 1751 }, { "epoch": 0.26131702587814154, "grad_norm": 2.2727108001708984, "learning_rate": 1.7324255780559993e-05, "loss": 0.8081, "step": 1752 }, { "epoch": 0.26146617943172495, "grad_norm": 1.9959208965301514, "learning_rate": 1.732096525449292e-05, "loss": 0.8872, "step": 1753 }, { "epoch": 0.26161533298530837, "grad_norm": 2.5016727447509766, "learning_rate": 1.7317673019299566e-05, "loss": 0.7858, "step": 1754 }, { "epoch": 0.2617644865388918, "grad_norm": 2.4180641174316406, "learning_rate": 1.7314379075748524e-05, "loss": 0.7505, "step": 1755 }, { "epoch": 0.2619136400924752, "grad_norm": 1.7469532489776611, "learning_rate": 1.7311083424608785e-05, "loss": 0.6892, "step": 1756 }, { "epoch": 0.2620627936460586, "grad_norm": 2.0607473850250244, "learning_rate": 1.7307786066649742e-05, "loss": 0.7293, "step": 1757 }, { "epoch": 0.262211947199642, "grad_norm": 1.4527727365493774, "learning_rate": 1.730448700264119e-05, "loss": 0.7306, "step": 1758 }, { "epoch": 0.26236110075322544, "grad_norm": 1.640815019607544, "learning_rate": 1.7301186233353303e-05, "loss": 0.7316, "step": 1759 }, { "epoch": 0.26251025430680885, "grad_norm": 2.0887527465820312, "learning_rate": 1.7297883759556676e-05, "loss": 0.737, "step": 1760 }, { "epoch": 0.26265940786039227, "grad_norm": 1.6747246980667114, "learning_rate": 1.7294579582022296e-05, "loss": 0.8267, "step": 1761 }, { "epoch": 0.2628085614139757, "grad_norm": 2.8678321838378906, "learning_rate": 1.7291273701521534e-05, "loss": 0.7161, "step": 1762 }, { "epoch": 0.2629577149675591, "grad_norm": 1.9013103246688843, "learning_rate": 1.7287966118826174e-05, "loss": 0.8571, "step": 1763 }, { "epoch": 0.2631068685211425, "grad_norm": 3.8178930282592773, "learning_rate": 1.72846568347084e-05, "loss": 0.7481, "step": 1764 }, { "epoch": 0.2632560220747259, "grad_norm": 2.3151793479919434, "learning_rate": 1.728134584994077e-05, "loss": 0.7139, "step": 1765 }, { "epoch": 0.26340517562830934, "grad_norm": 1.840468406677246, "learning_rate": 1.7278033165296267e-05, "loss": 0.7494, "step": 1766 }, { "epoch": 0.26355432918189275, "grad_norm": 1.8728991746902466, "learning_rate": 1.7274718781548256e-05, "loss": 0.7858, "step": 1767 }, { "epoch": 0.26370348273547617, "grad_norm": 2.474663257598877, "learning_rate": 1.7271402699470498e-05, "loss": 0.7587, "step": 1768 }, { "epoch": 0.2638526362890596, "grad_norm": 1.9923291206359863, "learning_rate": 1.7268084919837155e-05, "loss": 0.7165, "step": 1769 }, { "epoch": 0.264001789842643, "grad_norm": 2.306786060333252, "learning_rate": 1.7264765443422783e-05, "loss": 0.904, "step": 1770 }, { "epoch": 0.2641509433962264, "grad_norm": 1.4068653583526611, "learning_rate": 1.726144427100234e-05, "loss": 0.761, "step": 1771 }, { "epoch": 0.2643000969498098, "grad_norm": 0.6313273310661316, "learning_rate": 1.7258121403351168e-05, "loss": 0.2773, "step": 1772 }, { "epoch": 0.26444925050339324, "grad_norm": 2.5454883575439453, "learning_rate": 1.7254796841245017e-05, "loss": 0.7066, "step": 1773 }, { "epoch": 0.26459840405697665, "grad_norm": 7.0059895515441895, "learning_rate": 1.7251470585460026e-05, "loss": 0.7451, "step": 1774 }, { "epoch": 0.26474755761056007, "grad_norm": 0.5529683828353882, "learning_rate": 1.724814263677273e-05, "loss": 0.2449, "step": 1775 }, { "epoch": 0.2648967111641435, "grad_norm": 2.4088714122772217, "learning_rate": 1.7244812995960056e-05, "loss": 0.7976, "step": 1776 }, { "epoch": 0.2650458647177269, "grad_norm": 3.937040090560913, "learning_rate": 1.7241481663799337e-05, "loss": 0.7791, "step": 1777 }, { "epoch": 0.2651950182713103, "grad_norm": 2.2575619220733643, "learning_rate": 1.7238148641068292e-05, "loss": 0.7691, "step": 1778 }, { "epoch": 0.2653441718248937, "grad_norm": 1.622682809829712, "learning_rate": 1.7234813928545034e-05, "loss": 0.8766, "step": 1779 }, { "epoch": 0.26549332537847714, "grad_norm": 2.834636688232422, "learning_rate": 1.7231477527008074e-05, "loss": 0.7956, "step": 1780 }, { "epoch": 0.26564247893206056, "grad_norm": 3.247171401977539, "learning_rate": 1.722813943723632e-05, "loss": 0.7807, "step": 1781 }, { "epoch": 0.26579163248564397, "grad_norm": 2.7184348106384277, "learning_rate": 1.7224799660009064e-05, "loss": 0.8807, "step": 1782 }, { "epoch": 0.2659407860392274, "grad_norm": 2.3716111183166504, "learning_rate": 1.7221458196106003e-05, "loss": 0.7215, "step": 1783 }, { "epoch": 0.2660899395928108, "grad_norm": 2.3887557983398438, "learning_rate": 1.721811504630722e-05, "loss": 0.7938, "step": 1784 }, { "epoch": 0.2662390931463942, "grad_norm": 2.312490701675415, "learning_rate": 1.72147702113932e-05, "loss": 0.7303, "step": 1785 }, { "epoch": 0.2663882466999776, "grad_norm": 1.4858182668685913, "learning_rate": 1.721142369214481e-05, "loss": 0.7879, "step": 1786 }, { "epoch": 0.26653740025356104, "grad_norm": 1.6814863681793213, "learning_rate": 1.7208075489343318e-05, "loss": 0.8235, "step": 1787 }, { "epoch": 0.26668655380714446, "grad_norm": 1.6888840198516846, "learning_rate": 1.7204725603770387e-05, "loss": 0.8283, "step": 1788 }, { "epoch": 0.26683570736072787, "grad_norm": 5.2005791664123535, "learning_rate": 1.7201374036208066e-05, "loss": 0.7791, "step": 1789 }, { "epoch": 0.2669848609143113, "grad_norm": 2.0711514949798584, "learning_rate": 1.71980207874388e-05, "loss": 0.7282, "step": 1790 }, { "epoch": 0.2671340144678947, "grad_norm": 1.587503433227539, "learning_rate": 1.7194665858245428e-05, "loss": 0.7537, "step": 1791 }, { "epoch": 0.2672831680214781, "grad_norm": 1.7209060192108154, "learning_rate": 1.719130924941118e-05, "loss": 0.7435, "step": 1792 }, { "epoch": 0.2674323215750615, "grad_norm": 1.7213977575302124, "learning_rate": 1.718795096171968e-05, "loss": 0.7302, "step": 1793 }, { "epoch": 0.26758147512864494, "grad_norm": 2.3541433811187744, "learning_rate": 1.718459099595493e-05, "loss": 0.7173, "step": 1794 }, { "epoch": 0.26773062868222836, "grad_norm": 2.494248867034912, "learning_rate": 1.718122935290135e-05, "loss": 0.761, "step": 1795 }, { "epoch": 0.26787978223581177, "grad_norm": 2.8768975734710693, "learning_rate": 1.717786603334373e-05, "loss": 0.8035, "step": 1796 }, { "epoch": 0.2680289357893952, "grad_norm": 2.934316873550415, "learning_rate": 1.717450103806726e-05, "loss": 0.8289, "step": 1797 }, { "epoch": 0.2681780893429786, "grad_norm": 3.6028554439544678, "learning_rate": 1.717113436785752e-05, "loss": 0.6972, "step": 1798 }, { "epoch": 0.268327242896562, "grad_norm": 1.9719009399414062, "learning_rate": 1.716776602350048e-05, "loss": 0.8323, "step": 1799 }, { "epoch": 0.2684763964501454, "grad_norm": 1.9953566789627075, "learning_rate": 1.71643960057825e-05, "loss": 0.7264, "step": 1800 }, { "epoch": 0.26862555000372884, "grad_norm": 1.5900700092315674, "learning_rate": 1.7161024315490336e-05, "loss": 0.8543, "step": 1801 }, { "epoch": 0.26877470355731226, "grad_norm": 6.42753267288208, "learning_rate": 1.715765095341113e-05, "loss": 0.7166, "step": 1802 }, { "epoch": 0.26892385711089567, "grad_norm": 2.3267006874084473, "learning_rate": 1.715427592033241e-05, "loss": 0.7395, "step": 1803 }, { "epoch": 0.2690730106644791, "grad_norm": 2.7834935188293457, "learning_rate": 1.715089921704211e-05, "loss": 0.7728, "step": 1804 }, { "epoch": 0.2692221642180625, "grad_norm": 2.1304633617401123, "learning_rate": 1.7147520844328526e-05, "loss": 0.8296, "step": 1805 }, { "epoch": 0.2693713177716459, "grad_norm": 2.355076789855957, "learning_rate": 1.7144140802980377e-05, "loss": 0.82, "step": 1806 }, { "epoch": 0.2695204713252293, "grad_norm": 1.9710816144943237, "learning_rate": 1.714075909378675e-05, "loss": 0.7401, "step": 1807 }, { "epoch": 0.26966962487881274, "grad_norm": 2.614776134490967, "learning_rate": 1.7137375717537122e-05, "loss": 0.8308, "step": 1808 }, { "epoch": 0.26981877843239616, "grad_norm": 2.291337490081787, "learning_rate": 1.7133990675021367e-05, "loss": 0.7798, "step": 1809 }, { "epoch": 0.26996793198597957, "grad_norm": 2.127798318862915, "learning_rate": 1.713060396702975e-05, "loss": 0.7701, "step": 1810 }, { "epoch": 0.270117085539563, "grad_norm": 2.4694371223449707, "learning_rate": 1.7127215594352914e-05, "loss": 0.7293, "step": 1811 }, { "epoch": 0.2702662390931464, "grad_norm": 1.8718599081039429, "learning_rate": 1.7123825557781894e-05, "loss": 0.8412, "step": 1812 }, { "epoch": 0.2704153926467298, "grad_norm": 5.753317832946777, "learning_rate": 1.7120433858108123e-05, "loss": 0.7751, "step": 1813 }, { "epoch": 0.2705645462003132, "grad_norm": 2.5626380443573, "learning_rate": 1.7117040496123408e-05, "loss": 0.6797, "step": 1814 }, { "epoch": 0.27071369975389664, "grad_norm": 2.268822193145752, "learning_rate": 1.711364547261996e-05, "loss": 0.7422, "step": 1815 }, { "epoch": 0.27086285330748006, "grad_norm": 1.7798115015029907, "learning_rate": 1.7110248788390358e-05, "loss": 0.8024, "step": 1816 }, { "epoch": 0.27101200686106347, "grad_norm": 2.1637468338012695, "learning_rate": 1.7106850444227588e-05, "loss": 0.7875, "step": 1817 }, { "epoch": 0.2711611604146469, "grad_norm": 2.119229793548584, "learning_rate": 1.7103450440925013e-05, "loss": 0.7635, "step": 1818 }, { "epoch": 0.2713103139682303, "grad_norm": 1.4857207536697388, "learning_rate": 1.710004877927638e-05, "loss": 0.8294, "step": 1819 }, { "epoch": 0.2714594675218137, "grad_norm": 1.7243037223815918, "learning_rate": 1.7096645460075837e-05, "loss": 0.7948, "step": 1820 }, { "epoch": 0.2716086210753971, "grad_norm": 1.5757683515548706, "learning_rate": 1.7093240484117907e-05, "loss": 0.8518, "step": 1821 }, { "epoch": 0.27175777462898054, "grad_norm": 2.811607837677002, "learning_rate": 1.7089833852197508e-05, "loss": 0.7756, "step": 1822 }, { "epoch": 0.27190692818256396, "grad_norm": 2.397941827774048, "learning_rate": 1.708642556510993e-05, "loss": 0.8365, "step": 1823 }, { "epoch": 0.27205608173614737, "grad_norm": 1.7791779041290283, "learning_rate": 1.7083015623650867e-05, "loss": 0.6962, "step": 1824 }, { "epoch": 0.2722052352897308, "grad_norm": 2.0461957454681396, "learning_rate": 1.707960402861639e-05, "loss": 0.7766, "step": 1825 }, { "epoch": 0.2723543888433142, "grad_norm": 1.4346766471862793, "learning_rate": 1.707619078080296e-05, "loss": 0.8778, "step": 1826 }, { "epoch": 0.2725035423968976, "grad_norm": 3.3446741104125977, "learning_rate": 1.707277588100742e-05, "loss": 0.8362, "step": 1827 }, { "epoch": 0.272652695950481, "grad_norm": 1.6209275722503662, "learning_rate": 1.7069359330027e-05, "loss": 0.7418, "step": 1828 }, { "epoch": 0.27280184950406444, "grad_norm": 2.399555206298828, "learning_rate": 1.706594112865931e-05, "loss": 0.7247, "step": 1829 }, { "epoch": 0.27295100305764786, "grad_norm": 1.9326001405715942, "learning_rate": 1.706252127770236e-05, "loss": 0.7482, "step": 1830 }, { "epoch": 0.27310015661123127, "grad_norm": 1.6257871389389038, "learning_rate": 1.7059099777954532e-05, "loss": 0.7402, "step": 1831 }, { "epoch": 0.2732493101648147, "grad_norm": 2.7106223106384277, "learning_rate": 1.7055676630214598e-05, "loss": 0.7394, "step": 1832 }, { "epoch": 0.2733984637183981, "grad_norm": 2.0858213901519775, "learning_rate": 1.7052251835281716e-05, "loss": 0.772, "step": 1833 }, { "epoch": 0.2735476172719815, "grad_norm": 2.0855822563171387, "learning_rate": 1.704882539395542e-05, "loss": 0.8095, "step": 1834 }, { "epoch": 0.27369677082556493, "grad_norm": 1.9212236404418945, "learning_rate": 1.704539730703564e-05, "loss": 0.822, "step": 1835 }, { "epoch": 0.27384592437914834, "grad_norm": 2.5932962894439697, "learning_rate": 1.704196757532268e-05, "loss": 0.7887, "step": 1836 }, { "epoch": 0.27399507793273176, "grad_norm": 2.8085134029388428, "learning_rate": 1.703853619961724e-05, "loss": 0.6658, "step": 1837 }, { "epoch": 0.27414423148631517, "grad_norm": 2.421466112136841, "learning_rate": 1.7035103180720392e-05, "loss": 0.8115, "step": 1838 }, { "epoch": 0.2742933850398986, "grad_norm": 2.715799570083618, "learning_rate": 1.70316685194336e-05, "loss": 0.7704, "step": 1839 }, { "epoch": 0.274442538593482, "grad_norm": 1.5890417098999023, "learning_rate": 1.70282322165587e-05, "loss": 0.755, "step": 1840 }, { "epoch": 0.2745916921470654, "grad_norm": 4.705983638763428, "learning_rate": 1.7024794272897926e-05, "loss": 0.7906, "step": 1841 }, { "epoch": 0.27474084570064883, "grad_norm": 1.8465627431869507, "learning_rate": 1.7021354689253888e-05, "loss": 0.7739, "step": 1842 }, { "epoch": 0.27488999925423224, "grad_norm": 2.071155548095703, "learning_rate": 1.7017913466429572e-05, "loss": 0.7156, "step": 1843 }, { "epoch": 0.27503915280781566, "grad_norm": 2.7608466148376465, "learning_rate": 1.701447060522836e-05, "loss": 0.7816, "step": 1844 }, { "epoch": 0.27518830636139907, "grad_norm": 2.386821746826172, "learning_rate": 1.7011026106454008e-05, "loss": 0.885, "step": 1845 }, { "epoch": 0.2753374599149825, "grad_norm": 1.5718663930892944, "learning_rate": 1.7007579970910657e-05, "loss": 0.6791, "step": 1846 }, { "epoch": 0.2754866134685659, "grad_norm": 5.154609203338623, "learning_rate": 1.700413219940283e-05, "loss": 0.785, "step": 1847 }, { "epoch": 0.2756357670221493, "grad_norm": 3.076584577560425, "learning_rate": 1.7000682792735427e-05, "loss": 0.8435, "step": 1848 }, { "epoch": 0.27578492057573273, "grad_norm": 2.1489717960357666, "learning_rate": 1.699723175171374e-05, "loss": 0.7584, "step": 1849 }, { "epoch": 0.27593407412931614, "grad_norm": 4.1603169441223145, "learning_rate": 1.6993779077143437e-05, "loss": 0.6259, "step": 1850 }, { "epoch": 0.27608322768289956, "grad_norm": 2.717961072921753, "learning_rate": 1.6990324769830557e-05, "loss": 0.786, "step": 1851 }, { "epoch": 0.27623238123648297, "grad_norm": 2.736154556274414, "learning_rate": 1.6986868830581542e-05, "loss": 0.6866, "step": 1852 }, { "epoch": 0.2763815347900664, "grad_norm": 0.7165511846542358, "learning_rate": 1.6983411260203196e-05, "loss": 0.2604, "step": 1853 }, { "epoch": 0.2765306883436498, "grad_norm": 2.1246097087860107, "learning_rate": 1.6979952059502715e-05, "loss": 0.6876, "step": 1854 }, { "epoch": 0.2766798418972332, "grad_norm": 2.7437496185302734, "learning_rate": 1.697649122928767e-05, "loss": 0.7462, "step": 1855 }, { "epoch": 0.27682899545081663, "grad_norm": 2.607227325439453, "learning_rate": 1.6973028770366015e-05, "loss": 0.7606, "step": 1856 }, { "epoch": 0.27697814900440004, "grad_norm": 2.5127928256988525, "learning_rate": 1.6969564683546077e-05, "loss": 0.8073, "step": 1857 }, { "epoch": 0.27712730255798346, "grad_norm": 2.067948818206787, "learning_rate": 1.6966098969636583e-05, "loss": 0.6635, "step": 1858 }, { "epoch": 0.27727645611156687, "grad_norm": 1.9408345222473145, "learning_rate": 1.696263162944661e-05, "loss": 0.8566, "step": 1859 }, { "epoch": 0.2774256096651503, "grad_norm": 1.6203283071517944, "learning_rate": 1.695916266378564e-05, "loss": 0.7669, "step": 1860 }, { "epoch": 0.2775747632187337, "grad_norm": 2.3529765605926514, "learning_rate": 1.695569207346353e-05, "loss": 0.7778, "step": 1861 }, { "epoch": 0.2777239167723171, "grad_norm": 2.373368501663208, "learning_rate": 1.69522198592905e-05, "loss": 0.7744, "step": 1862 }, { "epoch": 0.27787307032590053, "grad_norm": 2.497913360595703, "learning_rate": 1.6948746022077167e-05, "loss": 0.6915, "step": 1863 }, { "epoch": 0.27802222387948394, "grad_norm": 4.733153343200684, "learning_rate": 1.694527056263452e-05, "loss": 0.717, "step": 1864 }, { "epoch": 0.27817137743306736, "grad_norm": 0.7245834469795227, "learning_rate": 1.6941793481773924e-05, "loss": 0.2672, "step": 1865 }, { "epoch": 0.27832053098665077, "grad_norm": 1.8044342994689941, "learning_rate": 1.693831478030713e-05, "loss": 0.8438, "step": 1866 }, { "epoch": 0.2784696845402342, "grad_norm": 4.503196716308594, "learning_rate": 1.6934834459046262e-05, "loss": 0.7669, "step": 1867 }, { "epoch": 0.2786188380938176, "grad_norm": 2.1882476806640625, "learning_rate": 1.6931352518803825e-05, "loss": 0.8108, "step": 1868 }, { "epoch": 0.278767991647401, "grad_norm": 2.7858285903930664, "learning_rate": 1.6927868960392698e-05, "loss": 0.8422, "step": 1869 }, { "epoch": 0.27891714520098443, "grad_norm": 2.343038320541382, "learning_rate": 1.692438378462614e-05, "loss": 0.7935, "step": 1870 }, { "epoch": 0.27906629875456784, "grad_norm": 1.9709877967834473, "learning_rate": 1.6920896992317785e-05, "loss": 0.734, "step": 1871 }, { "epoch": 0.27921545230815126, "grad_norm": 2.6940016746520996, "learning_rate": 1.6917408584281654e-05, "loss": 0.7872, "step": 1872 }, { "epoch": 0.27936460586173467, "grad_norm": 2.228667974472046, "learning_rate": 1.6913918561332132e-05, "loss": 0.756, "step": 1873 }, { "epoch": 0.2795137594153181, "grad_norm": 3.0465047359466553, "learning_rate": 1.6910426924283993e-05, "loss": 0.7612, "step": 1874 }, { "epoch": 0.2796629129689015, "grad_norm": 6.576287746429443, "learning_rate": 1.6906933673952375e-05, "loss": 0.7322, "step": 1875 }, { "epoch": 0.2798120665224849, "grad_norm": 2.3167762756347656, "learning_rate": 1.6903438811152803e-05, "loss": 0.7134, "step": 1876 }, { "epoch": 0.27996122007606833, "grad_norm": 1.734398603439331, "learning_rate": 1.6899942336701176e-05, "loss": 0.7659, "step": 1877 }, { "epoch": 0.28011037362965174, "grad_norm": 2.1053593158721924, "learning_rate": 1.6896444251413768e-05, "loss": 0.7437, "step": 1878 }, { "epoch": 0.28025952718323516, "grad_norm": 2.4168221950531006, "learning_rate": 1.6892944556107233e-05, "loss": 0.7324, "step": 1879 }, { "epoch": 0.28040868073681857, "grad_norm": 2.058641195297241, "learning_rate": 1.688944325159859e-05, "loss": 0.8128, "step": 1880 }, { "epoch": 0.280557834290402, "grad_norm": 2.76786208152771, "learning_rate": 1.6885940338705243e-05, "loss": 0.6892, "step": 1881 }, { "epoch": 0.2807069878439854, "grad_norm": 2.217219829559326, "learning_rate": 1.6882435818244976e-05, "loss": 0.6659, "step": 1882 }, { "epoch": 0.2808561413975688, "grad_norm": 0.7524304986000061, "learning_rate": 1.687892969103593e-05, "loss": 0.2637, "step": 1883 }, { "epoch": 0.28100529495115223, "grad_norm": 1.5415362119674683, "learning_rate": 1.6875421957896646e-05, "loss": 0.7656, "step": 1884 }, { "epoch": 0.28115444850473564, "grad_norm": 3.5320582389831543, "learning_rate": 1.6871912619646017e-05, "loss": 0.7657, "step": 1885 }, { "epoch": 0.28130360205831906, "grad_norm": 2.629786252975464, "learning_rate": 1.6868401677103324e-05, "loss": 0.7906, "step": 1886 }, { "epoch": 0.28145275561190247, "grad_norm": 2.953551769256592, "learning_rate": 1.6864889131088223e-05, "loss": 0.7638, "step": 1887 }, { "epoch": 0.2816019091654859, "grad_norm": 2.4514355659484863, "learning_rate": 1.686137498242073e-05, "loss": 0.7666, "step": 1888 }, { "epoch": 0.2817510627190693, "grad_norm": 2.6964781284332275, "learning_rate": 1.6857859231921258e-05, "loss": 0.749, "step": 1889 }, { "epoch": 0.2819002162726527, "grad_norm": 1.9181281328201294, "learning_rate": 1.6854341880410573e-05, "loss": 0.8119, "step": 1890 }, { "epoch": 0.28204936982623613, "grad_norm": 1.737630009651184, "learning_rate": 1.6850822928709825e-05, "loss": 0.7571, "step": 1891 }, { "epoch": 0.28219852337981954, "grad_norm": 2.1718785762786865, "learning_rate": 1.6847302377640538e-05, "loss": 0.8054, "step": 1892 }, { "epoch": 0.28234767693340296, "grad_norm": 1.7824453115463257, "learning_rate": 1.6843780228024605e-05, "loss": 0.8064, "step": 1893 }, { "epoch": 0.28249683048698637, "grad_norm": 1.9210150241851807, "learning_rate": 1.6840256480684294e-05, "loss": 0.7492, "step": 1894 }, { "epoch": 0.2826459840405698, "grad_norm": 2.4759302139282227, "learning_rate": 1.683673113644225e-05, "loss": 0.7229, "step": 1895 }, { "epoch": 0.2827951375941532, "grad_norm": 2.6877281665802, "learning_rate": 1.683320419612148e-05, "loss": 0.7415, "step": 1896 }, { "epoch": 0.2829442911477366, "grad_norm": 2.5276038646698, "learning_rate": 1.682967566054538e-05, "loss": 0.709, "step": 1897 }, { "epoch": 0.28309344470132003, "grad_norm": 2.1186139583587646, "learning_rate": 1.6826145530537705e-05, "loss": 0.6917, "step": 1898 }, { "epoch": 0.28324259825490344, "grad_norm": 1.9405908584594727, "learning_rate": 1.682261380692259e-05, "loss": 0.7945, "step": 1899 }, { "epoch": 0.28339175180848686, "grad_norm": 1.5279576778411865, "learning_rate": 1.6819080490524527e-05, "loss": 0.7817, "step": 1900 }, { "epoch": 0.2835409053620703, "grad_norm": 1.4628338813781738, "learning_rate": 1.6815545582168403e-05, "loss": 0.8176, "step": 1901 }, { "epoch": 0.2836900589156537, "grad_norm": 2.401406764984131, "learning_rate": 1.681200908267946e-05, "loss": 0.7347, "step": 1902 }, { "epoch": 0.2838392124692371, "grad_norm": 2.064256429672241, "learning_rate": 1.680847099288332e-05, "loss": 0.7596, "step": 1903 }, { "epoch": 0.2839883660228205, "grad_norm": 1.9243853092193604, "learning_rate": 1.680493131360597e-05, "loss": 0.7161, "step": 1904 }, { "epoch": 0.28413751957640393, "grad_norm": 2.240424871444702, "learning_rate": 1.680139004567377e-05, "loss": 0.8078, "step": 1905 }, { "epoch": 0.28428667312998734, "grad_norm": 3.0162270069122314, "learning_rate": 1.6797847189913456e-05, "loss": 0.7961, "step": 1906 }, { "epoch": 0.28443582668357076, "grad_norm": 2.1161348819732666, "learning_rate": 1.6794302747152125e-05, "loss": 0.7441, "step": 1907 }, { "epoch": 0.2845849802371542, "grad_norm": 3.2488973140716553, "learning_rate": 1.6790756718217252e-05, "loss": 0.7078, "step": 1908 }, { "epoch": 0.2847341337907376, "grad_norm": 1.7492684125900269, "learning_rate": 1.6787209103936677e-05, "loss": 0.7076, "step": 1909 }, { "epoch": 0.284883287344321, "grad_norm": 2.09171199798584, "learning_rate": 1.6783659905138626e-05, "loss": 0.7701, "step": 1910 }, { "epoch": 0.2850324408979044, "grad_norm": 4.586521625518799, "learning_rate": 1.6780109122651665e-05, "loss": 0.7251, "step": 1911 }, { "epoch": 0.28518159445148783, "grad_norm": 1.4203078746795654, "learning_rate": 1.677655675730476e-05, "loss": 0.7662, "step": 1912 }, { "epoch": 0.28533074800507124, "grad_norm": 2.710071086883545, "learning_rate": 1.6773002809927228e-05, "loss": 0.748, "step": 1913 }, { "epoch": 0.28547990155865466, "grad_norm": 1.9315780401229858, "learning_rate": 1.6769447281348757e-05, "loss": 0.7952, "step": 1914 }, { "epoch": 0.2856290551122381, "grad_norm": 2.329756021499634, "learning_rate": 1.676589017239942e-05, "loss": 0.6398, "step": 1915 }, { "epoch": 0.2857782086658215, "grad_norm": 1.8476775884628296, "learning_rate": 1.676233148390963e-05, "loss": 0.7848, "step": 1916 }, { "epoch": 0.2859273622194049, "grad_norm": 2.0007965564727783, "learning_rate": 1.6758771216710205e-05, "loss": 0.7623, "step": 1917 }, { "epoch": 0.2860765157729883, "grad_norm": 2.224273920059204, "learning_rate": 1.675520937163229e-05, "loss": 0.7573, "step": 1918 }, { "epoch": 0.28622566932657173, "grad_norm": 2.6983041763305664, "learning_rate": 1.675164594950744e-05, "loss": 0.6848, "step": 1919 }, { "epoch": 0.28637482288015514, "grad_norm": 1.2616801261901855, "learning_rate": 1.6748080951167552e-05, "loss": 0.7827, "step": 1920 }, { "epoch": 0.28652397643373856, "grad_norm": 2.4162988662719727, "learning_rate": 1.6744514377444895e-05, "loss": 0.7937, "step": 1921 }, { "epoch": 0.286673129987322, "grad_norm": 2.302753210067749, "learning_rate": 1.674094622917211e-05, "loss": 0.784, "step": 1922 }, { "epoch": 0.2868222835409054, "grad_norm": 1.9920685291290283, "learning_rate": 1.6737376507182205e-05, "loss": 0.7744, "step": 1923 }, { "epoch": 0.2869714370944888, "grad_norm": 2.7309961318969727, "learning_rate": 1.6733805212308553e-05, "loss": 0.8366, "step": 1924 }, { "epoch": 0.2871205906480722, "grad_norm": 1.6220357418060303, "learning_rate": 1.67302323453849e-05, "loss": 0.7217, "step": 1925 }, { "epoch": 0.28726974420165563, "grad_norm": 2.699460029602051, "learning_rate": 1.6726657907245348e-05, "loss": 0.7474, "step": 1926 }, { "epoch": 0.28741889775523904, "grad_norm": 1.8887845277786255, "learning_rate": 1.6723081898724377e-05, "loss": 0.7526, "step": 1927 }, { "epoch": 0.28756805130882246, "grad_norm": 1.5567444562911987, "learning_rate": 1.6719504320656827e-05, "loss": 0.7753, "step": 1928 }, { "epoch": 0.2877172048624059, "grad_norm": 1.8846838474273682, "learning_rate": 1.671592517387791e-05, "loss": 0.8177, "step": 1929 }, { "epoch": 0.2878663584159893, "grad_norm": 2.214735746383667, "learning_rate": 1.6712344459223198e-05, "loss": 0.7729, "step": 1930 }, { "epoch": 0.2880155119695727, "grad_norm": 1.580751657485962, "learning_rate": 1.6708762177528634e-05, "loss": 0.7529, "step": 1931 }, { "epoch": 0.2881646655231561, "grad_norm": 1.4202580451965332, "learning_rate": 1.670517832963052e-05, "loss": 0.7683, "step": 1932 }, { "epoch": 0.28831381907673953, "grad_norm": 1.6489832401275635, "learning_rate": 1.670159291636553e-05, "loss": 0.7414, "step": 1933 }, { "epoch": 0.28846297263032294, "grad_norm": 2.291013240814209, "learning_rate": 1.6698005938570702e-05, "loss": 0.7551, "step": 1934 }, { "epoch": 0.28861212618390636, "grad_norm": 2.0948572158813477, "learning_rate": 1.6694417397083446e-05, "loss": 0.7364, "step": 1935 }, { "epoch": 0.2887612797374898, "grad_norm": 2.351504325866699, "learning_rate": 1.669082729274152e-05, "loss": 0.7461, "step": 1936 }, { "epoch": 0.28891043329107313, "grad_norm": 4.12407922744751, "learning_rate": 1.6687235626383057e-05, "loss": 0.6307, "step": 1937 }, { "epoch": 0.28905958684465655, "grad_norm": 1.6647415161132812, "learning_rate": 1.6683642398846563e-05, "loss": 0.8166, "step": 1938 }, { "epoch": 0.28920874039823996, "grad_norm": 2.185990333557129, "learning_rate": 1.6680047610970894e-05, "loss": 0.7739, "step": 1939 }, { "epoch": 0.2893578939518234, "grad_norm": 2.1002774238586426, "learning_rate": 1.6676451263595276e-05, "loss": 0.7398, "step": 1940 }, { "epoch": 0.2895070475054068, "grad_norm": 1.562994122505188, "learning_rate": 1.6672853357559304e-05, "loss": 0.7021, "step": 1941 }, { "epoch": 0.2896562010589902, "grad_norm": 1.8241381645202637, "learning_rate": 1.666925389370293e-05, "loss": 0.7791, "step": 1942 }, { "epoch": 0.2898053546125736, "grad_norm": 2.268526077270508, "learning_rate": 1.666565287286647e-05, "loss": 0.7917, "step": 1943 }, { "epoch": 0.28995450816615703, "grad_norm": 7.714114665985107, "learning_rate": 1.6662050295890605e-05, "loss": 0.8143, "step": 1944 }, { "epoch": 0.29010366171974045, "grad_norm": 2.0038204193115234, "learning_rate": 1.6658446163616376e-05, "loss": 0.7167, "step": 1945 }, { "epoch": 0.29025281527332386, "grad_norm": 3.197638988494873, "learning_rate": 1.6654840476885205e-05, "loss": 0.7611, "step": 1946 }, { "epoch": 0.2904019688269073, "grad_norm": 0.7130116820335388, "learning_rate": 1.665123323653885e-05, "loss": 0.2667, "step": 1947 }, { "epoch": 0.2905511223804907, "grad_norm": 1.7078827619552612, "learning_rate": 1.6647624443419446e-05, "loss": 0.7739, "step": 1948 }, { "epoch": 0.2907002759340741, "grad_norm": 2.794883966445923, "learning_rate": 1.664401409836949e-05, "loss": 0.7976, "step": 1949 }, { "epoch": 0.2908494294876575, "grad_norm": 1.754306435585022, "learning_rate": 1.6640402202231847e-05, "loss": 0.7885, "step": 1950 }, { "epoch": 0.29099858304124093, "grad_norm": 1.3997567892074585, "learning_rate": 1.6636788755849725e-05, "loss": 0.7865, "step": 1951 }, { "epoch": 0.29114773659482435, "grad_norm": 1.71700119972229, "learning_rate": 1.6633173760066717e-05, "loss": 0.7566, "step": 1952 }, { "epoch": 0.29129689014840776, "grad_norm": 1.9928358793258667, "learning_rate": 1.6629557215726762e-05, "loss": 0.8032, "step": 1953 }, { "epoch": 0.2914460437019912, "grad_norm": 2.134006977081299, "learning_rate": 1.6625939123674165e-05, "loss": 0.8133, "step": 1954 }, { "epoch": 0.2915951972555746, "grad_norm": 1.5680311918258667, "learning_rate": 1.6622319484753595e-05, "loss": 0.8105, "step": 1955 }, { "epoch": 0.291744350809158, "grad_norm": 1.8153663873672485, "learning_rate": 1.6618698299810078e-05, "loss": 0.7462, "step": 1956 }, { "epoch": 0.2918935043627414, "grad_norm": 3.73686146736145, "learning_rate": 1.6615075569689005e-05, "loss": 0.7871, "step": 1957 }, { "epoch": 0.29204265791632483, "grad_norm": 1.571130633354187, "learning_rate": 1.661145129523612e-05, "loss": 0.7555, "step": 1958 }, { "epoch": 0.29219181146990825, "grad_norm": 1.5460548400878906, "learning_rate": 1.660782547729754e-05, "loss": 0.8272, "step": 1959 }, { "epoch": 0.29234096502349166, "grad_norm": 0.6229889988899231, "learning_rate": 1.6604198116719735e-05, "loss": 0.2678, "step": 1960 }, { "epoch": 0.2924901185770751, "grad_norm": 1.9426469802856445, "learning_rate": 1.6600569214349528e-05, "loss": 0.7777, "step": 1961 }, { "epoch": 0.2926392721306585, "grad_norm": 2.6193289756774902, "learning_rate": 1.6596938771034116e-05, "loss": 0.8566, "step": 1962 }, { "epoch": 0.2927884256842419, "grad_norm": 2.1342363357543945, "learning_rate": 1.6593306787621052e-05, "loss": 0.7175, "step": 1963 }, { "epoch": 0.2929375792378253, "grad_norm": 2.097839593887329, "learning_rate": 1.658967326495824e-05, "loss": 0.7884, "step": 1964 }, { "epoch": 0.29308673279140873, "grad_norm": 2.8692030906677246, "learning_rate": 1.658603820389395e-05, "loss": 0.7592, "step": 1965 }, { "epoch": 0.29323588634499215, "grad_norm": 3.4846951961517334, "learning_rate": 1.6582401605276813e-05, "loss": 0.7462, "step": 1966 }, { "epoch": 0.29338503989857556, "grad_norm": 1.7115036249160767, "learning_rate": 1.657876346995581e-05, "loss": 0.788, "step": 1967 }, { "epoch": 0.293534193452159, "grad_norm": 1.8706284761428833, "learning_rate": 1.65751237987803e-05, "loss": 0.7564, "step": 1968 }, { "epoch": 0.2936833470057424, "grad_norm": 2.0572919845581055, "learning_rate": 1.6571482592599974e-05, "loss": 0.8336, "step": 1969 }, { "epoch": 0.2938325005593258, "grad_norm": 4.221684455871582, "learning_rate": 1.6567839852264898e-05, "loss": 0.7112, "step": 1970 }, { "epoch": 0.2939816541129092, "grad_norm": 2.555386543273926, "learning_rate": 1.65641955786255e-05, "loss": 0.7699, "step": 1971 }, { "epoch": 0.29413080766649263, "grad_norm": 2.019097328186035, "learning_rate": 1.656054977253255e-05, "loss": 0.8253, "step": 1972 }, { "epoch": 0.29427996122007605, "grad_norm": 2.6139416694641113, "learning_rate": 1.655690243483719e-05, "loss": 0.7087, "step": 1973 }, { "epoch": 0.29442911477365946, "grad_norm": 2.2765209674835205, "learning_rate": 1.6553253566390916e-05, "loss": 0.7565, "step": 1974 }, { "epoch": 0.2945782683272429, "grad_norm": 2.5440526008605957, "learning_rate": 1.6549603168045577e-05, "loss": 0.7546, "step": 1975 }, { "epoch": 0.2947274218808263, "grad_norm": 2.3412013053894043, "learning_rate": 1.6545951240653383e-05, "loss": 0.8335, "step": 1976 }, { "epoch": 0.2948765754344097, "grad_norm": 2.03371262550354, "learning_rate": 1.6542297785066898e-05, "loss": 0.7478, "step": 1977 }, { "epoch": 0.2950257289879931, "grad_norm": 3.6851155757904053, "learning_rate": 1.6538642802139042e-05, "loss": 0.7555, "step": 1978 }, { "epoch": 0.29517488254157653, "grad_norm": 1.8937995433807373, "learning_rate": 1.65349862927231e-05, "loss": 0.7815, "step": 1979 }, { "epoch": 0.29532403609515995, "grad_norm": 1.9959765672683716, "learning_rate": 1.6531328257672707e-05, "loss": 0.7231, "step": 1980 }, { "epoch": 0.29547318964874336, "grad_norm": 2.9791293144226074, "learning_rate": 1.6527668697841853e-05, "loss": 0.7523, "step": 1981 }, { "epoch": 0.2956223432023268, "grad_norm": 2.240304946899414, "learning_rate": 1.6524007614084886e-05, "loss": 0.694, "step": 1982 }, { "epoch": 0.2957714967559102, "grad_norm": 2.183492422103882, "learning_rate": 1.652034500725651e-05, "loss": 0.7606, "step": 1983 }, { "epoch": 0.2959206503094936, "grad_norm": 2.549391508102417, "learning_rate": 1.651668087821178e-05, "loss": 0.7235, "step": 1984 }, { "epoch": 0.296069803863077, "grad_norm": 2.5811846256256104, "learning_rate": 1.6513015227806117e-05, "loss": 0.7516, "step": 1985 }, { "epoch": 0.29621895741666043, "grad_norm": 2.0495071411132812, "learning_rate": 1.6509348056895284e-05, "loss": 0.7194, "step": 1986 }, { "epoch": 0.29636811097024385, "grad_norm": 2.4313061237335205, "learning_rate": 1.650567936633541e-05, "loss": 0.7094, "step": 1987 }, { "epoch": 0.29651726452382726, "grad_norm": 5.817094326019287, "learning_rate": 1.6502009156982974e-05, "loss": 0.6849, "step": 1988 }, { "epoch": 0.2966664180774107, "grad_norm": 2.5179474353790283, "learning_rate": 1.649833742969481e-05, "loss": 0.8797, "step": 1989 }, { "epoch": 0.2968155716309941, "grad_norm": 2.662015199661255, "learning_rate": 1.6494664185328103e-05, "loss": 0.7429, "step": 1990 }, { "epoch": 0.2969647251845775, "grad_norm": 2.3706367015838623, "learning_rate": 1.64909894247404e-05, "loss": 0.757, "step": 1991 }, { "epoch": 0.2971138787381609, "grad_norm": 1.8093204498291016, "learning_rate": 1.6487313148789597e-05, "loss": 0.7897, "step": 1992 }, { "epoch": 0.29726303229174433, "grad_norm": 1.6703834533691406, "learning_rate": 1.648363535833394e-05, "loss": 0.8663, "step": 1993 }, { "epoch": 0.29741218584532775, "grad_norm": 2.3566696643829346, "learning_rate": 1.6479956054232034e-05, "loss": 0.7145, "step": 1994 }, { "epoch": 0.29756133939891116, "grad_norm": 2.2881479263305664, "learning_rate": 1.647627523734284e-05, "loss": 0.6523, "step": 1995 }, { "epoch": 0.2977104929524946, "grad_norm": 2.5794477462768555, "learning_rate": 1.6472592908525666e-05, "loss": 0.7741, "step": 1996 }, { "epoch": 0.297859646506078, "grad_norm": 3.10917592048645, "learning_rate": 1.6468909068640174e-05, "loss": 0.8217, "step": 1997 }, { "epoch": 0.2980088000596614, "grad_norm": 2.166274070739746, "learning_rate": 1.6465223718546383e-05, "loss": 0.7258, "step": 1998 }, { "epoch": 0.2981579536132448, "grad_norm": 1.5381088256835938, "learning_rate": 1.6461536859104658e-05, "loss": 0.7725, "step": 1999 }, { "epoch": 0.29830710716682823, "grad_norm": 1.877776026725769, "learning_rate": 1.645784849117572e-05, "loss": 0.8405, "step": 2000 }, { "epoch": 0.29845626072041165, "grad_norm": 2.5232090950012207, "learning_rate": 1.6454158615620643e-05, "loss": 0.7494, "step": 2001 }, { "epoch": 0.29860541427399506, "grad_norm": 1.8046237230300903, "learning_rate": 1.6450467233300854e-05, "loss": 0.7115, "step": 2002 }, { "epoch": 0.2987545678275785, "grad_norm": 1.4850696325302124, "learning_rate": 1.644677434507813e-05, "loss": 0.7767, "step": 2003 }, { "epoch": 0.2989037213811619, "grad_norm": 3.5374207496643066, "learning_rate": 1.64430799518146e-05, "loss": 0.7733, "step": 2004 }, { "epoch": 0.2990528749347453, "grad_norm": 1.8989436626434326, "learning_rate": 1.643938405437274e-05, "loss": 0.8235, "step": 2005 }, { "epoch": 0.2992020284883287, "grad_norm": 1.8182789087295532, "learning_rate": 1.643568665361538e-05, "loss": 0.7179, "step": 2006 }, { "epoch": 0.29935118204191213, "grad_norm": 1.7373992204666138, "learning_rate": 1.6431987750405708e-05, "loss": 0.7804, "step": 2007 }, { "epoch": 0.29950033559549555, "grad_norm": 1.816549301147461, "learning_rate": 1.6428287345607255e-05, "loss": 0.7392, "step": 2008 }, { "epoch": 0.29964948914907896, "grad_norm": 3.322305202484131, "learning_rate": 1.64245854400839e-05, "loss": 0.756, "step": 2009 }, { "epoch": 0.2997986427026624, "grad_norm": 1.7126874923706055, "learning_rate": 1.6420882034699882e-05, "loss": 0.7447, "step": 2010 }, { "epoch": 0.2999477962562458, "grad_norm": 3.219386577606201, "learning_rate": 1.641717713031978e-05, "loss": 0.7557, "step": 2011 }, { "epoch": 0.3000969498098292, "grad_norm": 0.5809448957443237, "learning_rate": 1.6413470727808533e-05, "loss": 0.2573, "step": 2012 }, { "epoch": 0.3002461033634126, "grad_norm": 3.6766793727874756, "learning_rate": 1.6409762828031416e-05, "loss": 0.6322, "step": 2013 }, { "epoch": 0.30039525691699603, "grad_norm": 1.8009755611419678, "learning_rate": 1.6406053431854066e-05, "loss": 0.7674, "step": 2014 }, { "epoch": 0.30054441047057945, "grad_norm": 2.131849765777588, "learning_rate": 1.6402342540142474e-05, "loss": 0.8043, "step": 2015 }, { "epoch": 0.30069356402416286, "grad_norm": 2.9931235313415527, "learning_rate": 1.639863015376296e-05, "loss": 0.7547, "step": 2016 }, { "epoch": 0.3008427175777463, "grad_norm": 2.309089422225952, "learning_rate": 1.6394916273582208e-05, "loss": 0.7529, "step": 2017 }, { "epoch": 0.3009918711313297, "grad_norm": 3.070016384124756, "learning_rate": 1.6391200900467245e-05, "loss": 0.6459, "step": 2018 }, { "epoch": 0.3011410246849131, "grad_norm": 1.5799354314804077, "learning_rate": 1.6387484035285456e-05, "loss": 0.7673, "step": 2019 }, { "epoch": 0.3012901782384965, "grad_norm": 1.9424976110458374, "learning_rate": 1.6383765678904563e-05, "loss": 0.7223, "step": 2020 }, { "epoch": 0.30143933179207993, "grad_norm": 6.199465274810791, "learning_rate": 1.6380045832192634e-05, "loss": 0.8186, "step": 2021 }, { "epoch": 0.30158848534566335, "grad_norm": 2.875786066055298, "learning_rate": 1.6376324496018096e-05, "loss": 0.8042, "step": 2022 }, { "epoch": 0.30173763889924676, "grad_norm": 2.187584161758423, "learning_rate": 1.6372601671249724e-05, "loss": 0.72, "step": 2023 }, { "epoch": 0.3018867924528302, "grad_norm": 2.3417372703552246, "learning_rate": 1.636887735875663e-05, "loss": 0.762, "step": 2024 }, { "epoch": 0.3020359460064136, "grad_norm": 0.6143196225166321, "learning_rate": 1.6365151559408276e-05, "loss": 0.2684, "step": 2025 }, { "epoch": 0.302185099559997, "grad_norm": 1.6672996282577515, "learning_rate": 1.636142427407448e-05, "loss": 0.7722, "step": 2026 }, { "epoch": 0.3023342531135804, "grad_norm": 3.4475886821746826, "learning_rate": 1.6357695503625394e-05, "loss": 0.674, "step": 2027 }, { "epoch": 0.30248340666716383, "grad_norm": 2.265866279602051, "learning_rate": 1.635396524893153e-05, "loss": 0.7549, "step": 2028 }, { "epoch": 0.30263256022074725, "grad_norm": 1.7987216711044312, "learning_rate": 1.6350233510863736e-05, "loss": 0.7816, "step": 2029 }, { "epoch": 0.30278171377433066, "grad_norm": 1.884336233139038, "learning_rate": 1.634650029029321e-05, "loss": 0.8057, "step": 2030 }, { "epoch": 0.3029308673279141, "grad_norm": 2.2799160480499268, "learning_rate": 1.63427655880915e-05, "loss": 0.7872, "step": 2031 }, { "epoch": 0.3030800208814975, "grad_norm": 0.5139330625534058, "learning_rate": 1.633902940513049e-05, "loss": 0.2439, "step": 2032 }, { "epoch": 0.3032291744350809, "grad_norm": 2.000887632369995, "learning_rate": 1.633529174228242e-05, "loss": 0.7603, "step": 2033 }, { "epoch": 0.3033783279886643, "grad_norm": 3.017493963241577, "learning_rate": 1.633155260041987e-05, "loss": 0.7657, "step": 2034 }, { "epoch": 0.30352748154224773, "grad_norm": 1.7037999629974365, "learning_rate": 1.632781198041577e-05, "loss": 0.7622, "step": 2035 }, { "epoch": 0.30367663509583115, "grad_norm": 2.8720977306365967, "learning_rate": 1.632406988314339e-05, "loss": 0.7524, "step": 2036 }, { "epoch": 0.30382578864941456, "grad_norm": 2.2527568340301514, "learning_rate": 1.632032630947634e-05, "loss": 0.7671, "step": 2037 }, { "epoch": 0.303974942202998, "grad_norm": 2.2257630825042725, "learning_rate": 1.631658126028859e-05, "loss": 0.7622, "step": 2038 }, { "epoch": 0.3041240957565814, "grad_norm": 2.074143648147583, "learning_rate": 1.6312834736454446e-05, "loss": 0.7334, "step": 2039 }, { "epoch": 0.3042732493101648, "grad_norm": 1.8928534984588623, "learning_rate": 1.630908673884855e-05, "loss": 0.7528, "step": 2040 }, { "epoch": 0.3044224028637482, "grad_norm": 0.5025878548622131, "learning_rate": 1.63053372683459e-05, "loss": 0.2913, "step": 2041 }, { "epoch": 0.30457155641733163, "grad_norm": 2.1952531337738037, "learning_rate": 1.630158632582184e-05, "loss": 0.7658, "step": 2042 }, { "epoch": 0.30472070997091505, "grad_norm": 2.6720755100250244, "learning_rate": 1.6297833912152043e-05, "loss": 0.7403, "step": 2043 }, { "epoch": 0.30486986352449846, "grad_norm": 0.5182031989097595, "learning_rate": 1.6294080028212532e-05, "loss": 0.2624, "step": 2044 }, { "epoch": 0.3050190170780819, "grad_norm": 1.8013389110565186, "learning_rate": 1.629032467487969e-05, "loss": 0.7946, "step": 2045 }, { "epoch": 0.3051681706316653, "grad_norm": 4.261845588684082, "learning_rate": 1.6286567853030212e-05, "loss": 0.728, "step": 2046 }, { "epoch": 0.3053173241852487, "grad_norm": 2.4782984256744385, "learning_rate": 1.628280956354116e-05, "loss": 0.7413, "step": 2047 }, { "epoch": 0.3054664777388321, "grad_norm": 1.6434892416000366, "learning_rate": 1.6279049807289936e-05, "loss": 0.7456, "step": 2048 }, { "epoch": 0.30561563129241553, "grad_norm": 0.5277445912361145, "learning_rate": 1.6275288585154267e-05, "loss": 0.2519, "step": 2049 }, { "epoch": 0.30576478484599895, "grad_norm": 2.0453076362609863, "learning_rate": 1.6271525898012242e-05, "loss": 0.7406, "step": 2050 }, { "epoch": 0.30591393839958236, "grad_norm": 1.9954121112823486, "learning_rate": 1.626776174674228e-05, "loss": 0.8223, "step": 2051 }, { "epoch": 0.3060630919531658, "grad_norm": 2.259361505508423, "learning_rate": 1.6263996132223155e-05, "loss": 0.7107, "step": 2052 }, { "epoch": 0.3062122455067492, "grad_norm": 3.4693310260772705, "learning_rate": 1.6260229055333962e-05, "loss": 0.715, "step": 2053 }, { "epoch": 0.3063613990603326, "grad_norm": 2.0497066974639893, "learning_rate": 1.625646051695416e-05, "loss": 0.6638, "step": 2054 }, { "epoch": 0.306510552613916, "grad_norm": 1.5708417892456055, "learning_rate": 1.625269051796353e-05, "loss": 0.799, "step": 2055 }, { "epoch": 0.30665970616749944, "grad_norm": 2.5258677005767822, "learning_rate": 1.624891905924221e-05, "loss": 0.8143, "step": 2056 }, { "epoch": 0.30680885972108285, "grad_norm": 2.09870982170105, "learning_rate": 1.6245146141670662e-05, "loss": 0.8517, "step": 2057 }, { "epoch": 0.30695801327466626, "grad_norm": 2.4784419536590576, "learning_rate": 1.6241371766129707e-05, "loss": 0.6932, "step": 2058 }, { "epoch": 0.3071071668282497, "grad_norm": 2.0760498046875, "learning_rate": 1.6237595933500495e-05, "loss": 0.7561, "step": 2059 }, { "epoch": 0.3072563203818331, "grad_norm": 8.538180351257324, "learning_rate": 1.6233818644664514e-05, "loss": 0.7524, "step": 2060 }, { "epoch": 0.3074054739354165, "grad_norm": 2.4673640727996826, "learning_rate": 1.6230039900503598e-05, "loss": 0.7084, "step": 2061 }, { "epoch": 0.3075546274889999, "grad_norm": 2.074475049972534, "learning_rate": 1.6226259701899922e-05, "loss": 0.7488, "step": 2062 }, { "epoch": 0.30770378104258334, "grad_norm": 1.9732462167739868, "learning_rate": 1.622247804973599e-05, "loss": 0.7282, "step": 2063 }, { "epoch": 0.30785293459616675, "grad_norm": 1.6485540866851807, "learning_rate": 1.6218694944894666e-05, "loss": 0.7122, "step": 2064 }, { "epoch": 0.30800208814975016, "grad_norm": 2.6290998458862305, "learning_rate": 1.621491038825913e-05, "loss": 0.7621, "step": 2065 }, { "epoch": 0.3081512417033336, "grad_norm": 1.6595065593719482, "learning_rate": 1.6211124380712914e-05, "loss": 0.7072, "step": 2066 }, { "epoch": 0.308300395256917, "grad_norm": 16.908769607543945, "learning_rate": 1.6207336923139886e-05, "loss": 0.6985, "step": 2067 }, { "epoch": 0.3084495488105004, "grad_norm": 2.487393379211426, "learning_rate": 1.620354801642425e-05, "loss": 0.6918, "step": 2068 }, { "epoch": 0.3085987023640838, "grad_norm": 4.400454044342041, "learning_rate": 1.6199757661450552e-05, "loss": 0.7142, "step": 2069 }, { "epoch": 0.30874785591766724, "grad_norm": 3.306971311569214, "learning_rate": 1.6195965859103675e-05, "loss": 0.7306, "step": 2070 }, { "epoch": 0.30889700947125065, "grad_norm": 2.851656913757324, "learning_rate": 1.6192172610268838e-05, "loss": 0.8482, "step": 2071 }, { "epoch": 0.30904616302483406, "grad_norm": 3.6947319507598877, "learning_rate": 1.6188377915831605e-05, "loss": 0.7391, "step": 2072 }, { "epoch": 0.3091953165784175, "grad_norm": 2.8521876335144043, "learning_rate": 1.6184581776677864e-05, "loss": 0.8656, "step": 2073 }, { "epoch": 0.3093444701320009, "grad_norm": 3.3872950077056885, "learning_rate": 1.6180784193693852e-05, "loss": 0.7092, "step": 2074 }, { "epoch": 0.3094936236855843, "grad_norm": 2.222458839416504, "learning_rate": 1.617698516776614e-05, "loss": 0.7787, "step": 2075 }, { "epoch": 0.3096427772391677, "grad_norm": 2.135449171066284, "learning_rate": 1.6173184699781632e-05, "loss": 0.8388, "step": 2076 }, { "epoch": 0.30979193079275114, "grad_norm": 3.5784294605255127, "learning_rate": 1.6169382790627575e-05, "loss": 0.7625, "step": 2077 }, { "epoch": 0.30994108434633455, "grad_norm": 2.3737077713012695, "learning_rate": 1.6165579441191546e-05, "loss": 0.6986, "step": 2078 }, { "epoch": 0.31009023789991796, "grad_norm": 2.8244330883026123, "learning_rate": 1.6161774652361463e-05, "loss": 0.7826, "step": 2079 }, { "epoch": 0.3102393914535014, "grad_norm": 2.093940258026123, "learning_rate": 1.6157968425025577e-05, "loss": 0.7957, "step": 2080 }, { "epoch": 0.3103885450070848, "grad_norm": 1.698086142539978, "learning_rate": 1.6154160760072478e-05, "loss": 0.6876, "step": 2081 }, { "epoch": 0.3105376985606682, "grad_norm": 4.081239700317383, "learning_rate": 1.6150351658391086e-05, "loss": 0.7903, "step": 2082 }, { "epoch": 0.3106868521142516, "grad_norm": 4.208715438842773, "learning_rate": 1.6146541120870667e-05, "loss": 0.6775, "step": 2083 }, { "epoch": 0.31083600566783504, "grad_norm": 2.601114273071289, "learning_rate": 1.614272914840081e-05, "loss": 0.7091, "step": 2084 }, { "epoch": 0.31098515922141845, "grad_norm": 3.180849313735962, "learning_rate": 1.6138915741871445e-05, "loss": 0.7907, "step": 2085 }, { "epoch": 0.31113431277500186, "grad_norm": 2.289376974105835, "learning_rate": 1.6135100902172838e-05, "loss": 0.7902, "step": 2086 }, { "epoch": 0.3112834663285853, "grad_norm": 2.6732163429260254, "learning_rate": 1.6131284630195588e-05, "loss": 0.6871, "step": 2087 }, { "epoch": 0.3114326198821687, "grad_norm": 2.981137990951538, "learning_rate": 1.6127466926830625e-05, "loss": 0.8002, "step": 2088 }, { "epoch": 0.3115817734357521, "grad_norm": 3.8456485271453857, "learning_rate": 1.6123647792969217e-05, "loss": 0.6594, "step": 2089 }, { "epoch": 0.3117309269893355, "grad_norm": 4.720701217651367, "learning_rate": 1.6119827229502972e-05, "loss": 0.7763, "step": 2090 }, { "epoch": 0.31188008054291894, "grad_norm": 4.474816799163818, "learning_rate": 1.611600523732382e-05, "loss": 0.8037, "step": 2091 }, { "epoch": 0.31202923409650235, "grad_norm": 1.875588059425354, "learning_rate": 1.611218181732402e-05, "loss": 0.8329, "step": 2092 }, { "epoch": 0.31217838765008576, "grad_norm": 1.7372863292694092, "learning_rate": 1.6108356970396187e-05, "loss": 0.7475, "step": 2093 }, { "epoch": 0.3123275412036692, "grad_norm": 2.707559823989868, "learning_rate": 1.6104530697433258e-05, "loss": 0.7336, "step": 2094 }, { "epoch": 0.3124766947572526, "grad_norm": 2.813842535018921, "learning_rate": 1.6100702999328494e-05, "loss": 0.7276, "step": 2095 }, { "epoch": 0.312625848310836, "grad_norm": 2.232459783554077, "learning_rate": 1.6096873876975492e-05, "loss": 0.7567, "step": 2096 }, { "epoch": 0.3127750018644194, "grad_norm": 2.2365219593048096, "learning_rate": 1.6093043331268193e-05, "loss": 0.7455, "step": 2097 }, { "epoch": 0.31292415541800284, "grad_norm": 2.975494861602783, "learning_rate": 1.6089211363100858e-05, "loss": 0.7864, "step": 2098 }, { "epoch": 0.31307330897158625, "grad_norm": 2.166168451309204, "learning_rate": 1.6085377973368088e-05, "loss": 0.736, "step": 2099 }, { "epoch": 0.31322246252516966, "grad_norm": 3.0563607215881348, "learning_rate": 1.608154316296481e-05, "loss": 0.7613, "step": 2100 }, { "epoch": 0.3133716160787531, "grad_norm": 1.5482892990112305, "learning_rate": 1.6077706932786285e-05, "loss": 0.7386, "step": 2101 }, { "epoch": 0.3135207696323365, "grad_norm": 5.283257961273193, "learning_rate": 1.6073869283728103e-05, "loss": 0.7637, "step": 2102 }, { "epoch": 0.3136699231859199, "grad_norm": 2.0128326416015625, "learning_rate": 1.6070030216686196e-05, "loss": 0.7577, "step": 2103 }, { "epoch": 0.3138190767395033, "grad_norm": 2.498612880706787, "learning_rate": 1.6066189732556812e-05, "loss": 0.7151, "step": 2104 }, { "epoch": 0.31396823029308674, "grad_norm": 2.045896291732788, "learning_rate": 1.6062347832236538e-05, "loss": 0.8056, "step": 2105 }, { "epoch": 0.31411738384667015, "grad_norm": 2.24170184135437, "learning_rate": 1.6058504516622288e-05, "loss": 0.6993, "step": 2106 }, { "epoch": 0.31426653740025356, "grad_norm": 2.49250864982605, "learning_rate": 1.6054659786611314e-05, "loss": 0.7621, "step": 2107 }, { "epoch": 0.314415690953837, "grad_norm": 3.7259132862091064, "learning_rate": 1.6050813643101194e-05, "loss": 0.746, "step": 2108 }, { "epoch": 0.3145648445074204, "grad_norm": 2.501347064971924, "learning_rate": 1.6046966086989827e-05, "loss": 0.6809, "step": 2109 }, { "epoch": 0.3147139980610038, "grad_norm": 3.098494529724121, "learning_rate": 1.604311711917545e-05, "loss": 0.6847, "step": 2110 }, { "epoch": 0.3148631516145872, "grad_norm": 2.4537858963012695, "learning_rate": 1.6039266740556638e-05, "loss": 0.6311, "step": 2111 }, { "epoch": 0.31501230516817064, "grad_norm": 5.871305465698242, "learning_rate": 1.6035414952032277e-05, "loss": 0.7854, "step": 2112 }, { "epoch": 0.31516145872175405, "grad_norm": 3.6674439907073975, "learning_rate": 1.6031561754501602e-05, "loss": 0.7904, "step": 2113 }, { "epoch": 0.31531061227533747, "grad_norm": 2.0877861976623535, "learning_rate": 1.6027707148864155e-05, "loss": 0.7177, "step": 2114 }, { "epoch": 0.3154597658289209, "grad_norm": 3.099853992462158, "learning_rate": 1.6023851136019827e-05, "loss": 0.7296, "step": 2115 }, { "epoch": 0.3156089193825043, "grad_norm": 1.640985369682312, "learning_rate": 1.601999371686883e-05, "loss": 0.7962, "step": 2116 }, { "epoch": 0.3157580729360877, "grad_norm": 2.7813355922698975, "learning_rate": 1.6016134892311694e-05, "loss": 0.7472, "step": 2117 }, { "epoch": 0.3159072264896711, "grad_norm": 2.823668956756592, "learning_rate": 1.6012274663249293e-05, "loss": 0.7598, "step": 2118 }, { "epoch": 0.31605638004325454, "grad_norm": 5.0116777420043945, "learning_rate": 1.600841303058282e-05, "loss": 0.8266, "step": 2119 }, { "epoch": 0.31620553359683795, "grad_norm": 2.374073028564453, "learning_rate": 1.60045499952138e-05, "loss": 0.7265, "step": 2120 }, { "epoch": 0.31635468715042137, "grad_norm": 2.4792988300323486, "learning_rate": 1.6000685558044082e-05, "loss": 0.7119, "step": 2121 }, { "epoch": 0.3165038407040048, "grad_norm": 0.5727195739746094, "learning_rate": 1.599681971997584e-05, "loss": 0.2491, "step": 2122 }, { "epoch": 0.3166529942575882, "grad_norm": 2.705540418624878, "learning_rate": 1.599295248191159e-05, "loss": 0.6661, "step": 2123 }, { "epoch": 0.3168021478111716, "grad_norm": 2.2193307876586914, "learning_rate": 1.5989083844754153e-05, "loss": 0.7277, "step": 2124 }, { "epoch": 0.316951301364755, "grad_norm": 2.0839126110076904, "learning_rate": 1.5985213809406686e-05, "loss": 0.6968, "step": 2125 }, { "epoch": 0.31710045491833844, "grad_norm": 2.0614633560180664, "learning_rate": 1.5981342376772687e-05, "loss": 0.7555, "step": 2126 }, { "epoch": 0.31724960847192185, "grad_norm": 2.171841621398926, "learning_rate": 1.597746954775595e-05, "loss": 0.7824, "step": 2127 }, { "epoch": 0.31739876202550527, "grad_norm": 2.29591703414917, "learning_rate": 1.597359532326062e-05, "loss": 0.7475, "step": 2128 }, { "epoch": 0.3175479155790887, "grad_norm": 1.6296980381011963, "learning_rate": 1.5969719704191164e-05, "loss": 0.7816, "step": 2129 }, { "epoch": 0.3176970691326721, "grad_norm": 2.471890449523926, "learning_rate": 1.596584269145236e-05, "loss": 0.7385, "step": 2130 }, { "epoch": 0.3178462226862555, "grad_norm": 0.5531636476516724, "learning_rate": 1.5961964285949326e-05, "loss": 0.2433, "step": 2131 }, { "epoch": 0.3179953762398389, "grad_norm": 6.751362323760986, "learning_rate": 1.59580844885875e-05, "loss": 0.8099, "step": 2132 }, { "epoch": 0.31814452979342234, "grad_norm": 2.299621820449829, "learning_rate": 1.5954203300272653e-05, "loss": 0.6808, "step": 2133 }, { "epoch": 0.31829368334700575, "grad_norm": 3.3169872760772705, "learning_rate": 1.5950320721910863e-05, "loss": 0.7801, "step": 2134 }, { "epoch": 0.31844283690058917, "grad_norm": 2.6599957942962646, "learning_rate": 1.5946436754408548e-05, "loss": 0.7174, "step": 2135 }, { "epoch": 0.3185919904541726, "grad_norm": 1.3233839273452759, "learning_rate": 1.5942551398672443e-05, "loss": 0.7735, "step": 2136 }, { "epoch": 0.318741144007756, "grad_norm": 1.7466455698013306, "learning_rate": 1.5938664655609612e-05, "loss": 0.666, "step": 2137 }, { "epoch": 0.3188902975613394, "grad_norm": 2.7951416969299316, "learning_rate": 1.5934776526127437e-05, "loss": 0.6537, "step": 2138 }, { "epoch": 0.3190394511149228, "grad_norm": 1.8140957355499268, "learning_rate": 1.5930887011133626e-05, "loss": 0.7897, "step": 2139 }, { "epoch": 0.31918860466850624, "grad_norm": 1.7350376844406128, "learning_rate": 1.5926996111536212e-05, "loss": 0.8315, "step": 2140 }, { "epoch": 0.31933775822208965, "grad_norm": 1.5117350816726685, "learning_rate": 1.592310382824356e-05, "loss": 0.7388, "step": 2141 }, { "epoch": 0.31948691177567307, "grad_norm": 1.789218783378601, "learning_rate": 1.591921016216433e-05, "loss": 0.81, "step": 2142 }, { "epoch": 0.3196360653292565, "grad_norm": 2.3065364360809326, "learning_rate": 1.591531511420754e-05, "loss": 0.7574, "step": 2143 }, { "epoch": 0.3197852188828399, "grad_norm": 2.3896658420562744, "learning_rate": 1.5911418685282506e-05, "loss": 0.8103, "step": 2144 }, { "epoch": 0.3199343724364233, "grad_norm": 2.322690010070801, "learning_rate": 1.5907520876298872e-05, "loss": 0.803, "step": 2145 }, { "epoch": 0.3200835259900067, "grad_norm": 1.6265400648117065, "learning_rate": 1.5903621688166614e-05, "loss": 0.7501, "step": 2146 }, { "epoch": 0.32023267954359014, "grad_norm": 2.275627374649048, "learning_rate": 1.589972112179602e-05, "loss": 0.8124, "step": 2147 }, { "epoch": 0.32038183309717355, "grad_norm": 0.560858964920044, "learning_rate": 1.58958191780977e-05, "loss": 0.2671, "step": 2148 }, { "epoch": 0.32053098665075697, "grad_norm": 2.4070169925689697, "learning_rate": 1.5891915857982583e-05, "loss": 0.7497, "step": 2149 }, { "epoch": 0.3206801402043404, "grad_norm": 1.3076696395874023, "learning_rate": 1.588801116236194e-05, "loss": 0.7898, "step": 2150 }, { "epoch": 0.3208292937579238, "grad_norm": 1.4519752264022827, "learning_rate": 1.5884105092147328e-05, "loss": 0.8313, "step": 2151 }, { "epoch": 0.3209784473115072, "grad_norm": 1.6632742881774902, "learning_rate": 1.5880197648250658e-05, "loss": 0.7829, "step": 2152 }, { "epoch": 0.3211276008650906, "grad_norm": 4.703943729400635, "learning_rate": 1.587628883158414e-05, "loss": 0.8034, "step": 2153 }, { "epoch": 0.32127675441867404, "grad_norm": 0.5779266357421875, "learning_rate": 1.587237864306032e-05, "loss": 0.2755, "step": 2154 }, { "epoch": 0.32142590797225745, "grad_norm": 2.8061914443969727, "learning_rate": 1.5868467083592044e-05, "loss": 0.7788, "step": 2155 }, { "epoch": 0.32157506152584087, "grad_norm": 2.4458954334259033, "learning_rate": 1.5864554154092503e-05, "loss": 0.7084, "step": 2156 }, { "epoch": 0.3217242150794243, "grad_norm": 2.924536943435669, "learning_rate": 1.5860639855475194e-05, "loss": 0.7563, "step": 2157 }, { "epoch": 0.3218733686330077, "grad_norm": 2.8919804096221924, "learning_rate": 1.5856724188653928e-05, "loss": 0.8509, "step": 2158 }, { "epoch": 0.3220225221865911, "grad_norm": 2.0244081020355225, "learning_rate": 1.585280715454285e-05, "loss": 0.7799, "step": 2159 }, { "epoch": 0.3221716757401745, "grad_norm": 2.7116410732269287, "learning_rate": 1.5848888754056408e-05, "loss": 0.7313, "step": 2160 }, { "epoch": 0.32232082929375794, "grad_norm": 2.8164000511169434, "learning_rate": 1.584496898810939e-05, "loss": 0.737, "step": 2161 }, { "epoch": 0.32246998284734135, "grad_norm": 1.6798874139785767, "learning_rate": 1.5841047857616876e-05, "loss": 0.7087, "step": 2162 }, { "epoch": 0.32261913640092477, "grad_norm": 3.5341317653656006, "learning_rate": 1.583712536349429e-05, "loss": 0.8112, "step": 2163 }, { "epoch": 0.3227682899545082, "grad_norm": 2.6780123710632324, "learning_rate": 1.583320150665736e-05, "loss": 0.7011, "step": 2164 }, { "epoch": 0.3229174435080916, "grad_norm": 3.392171859741211, "learning_rate": 1.5829276288022138e-05, "loss": 0.7338, "step": 2165 }, { "epoch": 0.323066597061675, "grad_norm": 2.452104330062866, "learning_rate": 1.5825349708504988e-05, "loss": 0.7508, "step": 2166 }, { "epoch": 0.3232157506152584, "grad_norm": 1.871716022491455, "learning_rate": 1.5821421769022593e-05, "loss": 0.7654, "step": 2167 }, { "epoch": 0.32336490416884184, "grad_norm": 2.3706185817718506, "learning_rate": 1.5817492470491962e-05, "loss": 0.7597, "step": 2168 }, { "epoch": 0.32351405772242525, "grad_norm": 1.7698057889938354, "learning_rate": 1.581356181383041e-05, "loss": 0.7533, "step": 2169 }, { "epoch": 0.32366321127600867, "grad_norm": 2.5304746627807617, "learning_rate": 1.5809629799955576e-05, "loss": 0.7716, "step": 2170 }, { "epoch": 0.3238123648295921, "grad_norm": 3.6602225303649902, "learning_rate": 1.5805696429785414e-05, "loss": 0.6843, "step": 2171 }, { "epoch": 0.3239615183831755, "grad_norm": 2.322796106338501, "learning_rate": 1.5801761704238197e-05, "loss": 0.691, "step": 2172 }, { "epoch": 0.3241106719367589, "grad_norm": 2.458456516265869, "learning_rate": 1.5797825624232506e-05, "loss": 0.7184, "step": 2173 }, { "epoch": 0.3242598254903423, "grad_norm": 2.0248191356658936, "learning_rate": 1.5793888190687247e-05, "loss": 0.8003, "step": 2174 }, { "epoch": 0.32440897904392574, "grad_norm": 1.5535224676132202, "learning_rate": 1.578994940452164e-05, "loss": 0.8472, "step": 2175 }, { "epoch": 0.32455813259750915, "grad_norm": 2.0607943534851074, "learning_rate": 1.578600926665522e-05, "loss": 0.7556, "step": 2176 }, { "epoch": 0.32470728615109257, "grad_norm": 1.8881525993347168, "learning_rate": 1.5782067778007835e-05, "loss": 0.7248, "step": 2177 }, { "epoch": 0.324856439704676, "grad_norm": 0.5976712107658386, "learning_rate": 1.5778124939499654e-05, "loss": 0.2444, "step": 2178 }, { "epoch": 0.3250055932582594, "grad_norm": 2.432565689086914, "learning_rate": 1.5774180752051152e-05, "loss": 0.7335, "step": 2179 }, { "epoch": 0.3251547468118428, "grad_norm": 1.7368693351745605, "learning_rate": 1.5770235216583136e-05, "loss": 0.7998, "step": 2180 }, { "epoch": 0.3253039003654262, "grad_norm": 2.036921739578247, "learning_rate": 1.5766288334016705e-05, "loss": 0.8103, "step": 2181 }, { "epoch": 0.32545305391900964, "grad_norm": 1.453689455986023, "learning_rate": 1.576234010527329e-05, "loss": 0.7419, "step": 2182 }, { "epoch": 0.32560220747259305, "grad_norm": 1.8044499158859253, "learning_rate": 1.575839053127463e-05, "loss": 0.7919, "step": 2183 }, { "epoch": 0.32575136102617647, "grad_norm": 2.311664581298828, "learning_rate": 1.5754439612942774e-05, "loss": 0.685, "step": 2184 }, { "epoch": 0.3259005145797599, "grad_norm": 2.7493414878845215, "learning_rate": 1.5750487351200096e-05, "loss": 0.733, "step": 2185 }, { "epoch": 0.3260496681333433, "grad_norm": 0.58433598279953, "learning_rate": 1.5746533746969275e-05, "loss": 0.237, "step": 2186 }, { "epoch": 0.3261988216869267, "grad_norm": 2.0137643814086914, "learning_rate": 1.57425788011733e-05, "loss": 0.7353, "step": 2187 }, { "epoch": 0.3263479752405101, "grad_norm": 1.691305160522461, "learning_rate": 1.5738622514735483e-05, "loss": 0.7347, "step": 2188 }, { "epoch": 0.32649712879409354, "grad_norm": 2.650662660598755, "learning_rate": 1.5734664888579448e-05, "loss": 0.738, "step": 2189 }, { "epoch": 0.32664628234767695, "grad_norm": 4.838780403137207, "learning_rate": 1.5730705923629116e-05, "loss": 0.7854, "step": 2190 }, { "epoch": 0.32679543590126037, "grad_norm": 1.6159945726394653, "learning_rate": 1.572674562080875e-05, "loss": 0.7304, "step": 2191 }, { "epoch": 0.3269445894548438, "grad_norm": 1.989288091659546, "learning_rate": 1.5722783981042892e-05, "loss": 0.7335, "step": 2192 }, { "epoch": 0.3270937430084272, "grad_norm": 1.6032410860061646, "learning_rate": 1.571882100525642e-05, "loss": 0.7686, "step": 2193 }, { "epoch": 0.3272428965620106, "grad_norm": 2.9249515533447266, "learning_rate": 1.5714856694374514e-05, "loss": 0.6779, "step": 2194 }, { "epoch": 0.327392050115594, "grad_norm": 2.5000710487365723, "learning_rate": 1.5710891049322672e-05, "loss": 0.7789, "step": 2195 }, { "epoch": 0.32754120366917744, "grad_norm": 1.3996561765670776, "learning_rate": 1.5706924071026693e-05, "loss": 0.8223, "step": 2196 }, { "epoch": 0.32769035722276085, "grad_norm": 1.7953572273254395, "learning_rate": 1.57029557604127e-05, "loss": 0.7632, "step": 2197 }, { "epoch": 0.32783951077634427, "grad_norm": 2.7106573581695557, "learning_rate": 1.5698986118407113e-05, "loss": 0.8172, "step": 2198 }, { "epoch": 0.3279886643299277, "grad_norm": 2.4647769927978516, "learning_rate": 1.569501514593668e-05, "loss": 0.7944, "step": 2199 }, { "epoch": 0.3281378178835111, "grad_norm": 1.3621187210083008, "learning_rate": 1.569104284392844e-05, "loss": 0.7698, "step": 2200 }, { "epoch": 0.3282869714370945, "grad_norm": 3.0774590969085693, "learning_rate": 1.568706921330976e-05, "loss": 0.7791, "step": 2201 }, { "epoch": 0.3284361249906779, "grad_norm": 1.7103877067565918, "learning_rate": 1.5683094255008304e-05, "loss": 0.6926, "step": 2202 }, { "epoch": 0.32858527854426134, "grad_norm": 2.6120071411132812, "learning_rate": 1.5679117969952055e-05, "loss": 0.7172, "step": 2203 }, { "epoch": 0.32873443209784475, "grad_norm": 3.3164546489715576, "learning_rate": 1.5675140359069302e-05, "loss": 0.6253, "step": 2204 }, { "epoch": 0.32888358565142817, "grad_norm": 1.982088565826416, "learning_rate": 1.5671161423288642e-05, "loss": 0.8094, "step": 2205 }, { "epoch": 0.3290327392050116, "grad_norm": 2.0183279514312744, "learning_rate": 1.566718116353898e-05, "loss": 0.724, "step": 2206 }, { "epoch": 0.329181892758595, "grad_norm": 2.880115509033203, "learning_rate": 1.5663199580749543e-05, "loss": 0.7319, "step": 2207 }, { "epoch": 0.3293310463121784, "grad_norm": 1.7967694997787476, "learning_rate": 1.565921667584985e-05, "loss": 0.7172, "step": 2208 }, { "epoch": 0.3294801998657618, "grad_norm": 1.7933380603790283, "learning_rate": 1.5655232449769738e-05, "loss": 0.7409, "step": 2209 }, { "epoch": 0.32962935341934524, "grad_norm": 1.771175742149353, "learning_rate": 1.5651246903439344e-05, "loss": 0.7908, "step": 2210 }, { "epoch": 0.32977850697292865, "grad_norm": 2.320526361465454, "learning_rate": 1.564726003778913e-05, "loss": 0.718, "step": 2211 }, { "epoch": 0.32992766052651207, "grad_norm": 6.01593542098999, "learning_rate": 1.5643271853749848e-05, "loss": 0.7582, "step": 2212 }, { "epoch": 0.3300768140800955, "grad_norm": 1.910101294517517, "learning_rate": 1.5639282352252568e-05, "loss": 0.74, "step": 2213 }, { "epoch": 0.3302259676336789, "grad_norm": 2.758133888244629, "learning_rate": 1.563529153422866e-05, "loss": 0.7804, "step": 2214 }, { "epoch": 0.3303751211872623, "grad_norm": 2.695472002029419, "learning_rate": 1.563129940060981e-05, "loss": 0.8229, "step": 2215 }, { "epoch": 0.3305242747408457, "grad_norm": 2.0598247051239014, "learning_rate": 1.562730595232801e-05, "loss": 0.6748, "step": 2216 }, { "epoch": 0.33067342829442914, "grad_norm": 2.2162399291992188, "learning_rate": 1.5623311190315554e-05, "loss": 0.6112, "step": 2217 }, { "epoch": 0.33082258184801255, "grad_norm": 2.1607887744903564, "learning_rate": 1.5619315115505037e-05, "loss": 0.8024, "step": 2218 }, { "epoch": 0.33097173540159597, "grad_norm": 2.129119873046875, "learning_rate": 1.5615317728829383e-05, "loss": 0.8448, "step": 2219 }, { "epoch": 0.3311208889551794, "grad_norm": 3.6469295024871826, "learning_rate": 1.5611319031221793e-05, "loss": 0.7285, "step": 2220 }, { "epoch": 0.3312700425087628, "grad_norm": 2.4837863445281982, "learning_rate": 1.5607319023615798e-05, "loss": 0.6756, "step": 2221 }, { "epoch": 0.3314191960623462, "grad_norm": 2.2039785385131836, "learning_rate": 1.5603317706945224e-05, "loss": 0.7834, "step": 2222 }, { "epoch": 0.3315683496159296, "grad_norm": 1.8907290697097778, "learning_rate": 1.55993150821442e-05, "loss": 0.8222, "step": 2223 }, { "epoch": 0.33171750316951304, "grad_norm": 2.1256344318389893, "learning_rate": 1.5595311150147167e-05, "loss": 0.8379, "step": 2224 }, { "epoch": 0.33186665672309645, "grad_norm": 2.0103204250335693, "learning_rate": 1.5591305911888876e-05, "loss": 0.7375, "step": 2225 }, { "epoch": 0.33201581027667987, "grad_norm": 3.3063673973083496, "learning_rate": 1.5587299368304362e-05, "loss": 0.746, "step": 2226 }, { "epoch": 0.3321649638302633, "grad_norm": 2.426652669906616, "learning_rate": 1.558329152032898e-05, "loss": 0.8327, "step": 2227 }, { "epoch": 0.3323141173838467, "grad_norm": 2.736750364303589, "learning_rate": 1.55792823688984e-05, "loss": 0.8226, "step": 2228 }, { "epoch": 0.3324632709374301, "grad_norm": 1.5851069688796997, "learning_rate": 1.5575271914948575e-05, "loss": 0.7949, "step": 2229 }, { "epoch": 0.3326124244910135, "grad_norm": 2.8312878608703613, "learning_rate": 1.557126015941577e-05, "loss": 0.7084, "step": 2230 }, { "epoch": 0.33276157804459694, "grad_norm": 2.100074529647827, "learning_rate": 1.5567247103236556e-05, "loss": 0.7299, "step": 2231 }, { "epoch": 0.33291073159818035, "grad_norm": 2.5682477951049805, "learning_rate": 1.5563232747347813e-05, "loss": 0.7471, "step": 2232 }, { "epoch": 0.33305988515176377, "grad_norm": 2.053731918334961, "learning_rate": 1.555921709268671e-05, "loss": 0.8465, "step": 2233 }, { "epoch": 0.3332090387053472, "grad_norm": 2.621652364730835, "learning_rate": 1.5555200140190732e-05, "loss": 0.6856, "step": 2234 }, { "epoch": 0.33335819225893054, "grad_norm": 1.938718557357788, "learning_rate": 1.555118189079766e-05, "loss": 0.659, "step": 2235 }, { "epoch": 0.33350734581251396, "grad_norm": 2.325775623321533, "learning_rate": 1.5547162345445584e-05, "loss": 0.6995, "step": 2236 }, { "epoch": 0.33365649936609737, "grad_norm": 1.8794758319854736, "learning_rate": 1.5543141505072888e-05, "loss": 0.7069, "step": 2237 }, { "epoch": 0.3338056529196808, "grad_norm": 2.660881280899048, "learning_rate": 1.5539119370618267e-05, "loss": 0.7461, "step": 2238 }, { "epoch": 0.3339548064732642, "grad_norm": 3.97544527053833, "learning_rate": 1.553509594302071e-05, "loss": 0.8077, "step": 2239 }, { "epoch": 0.3341039600268476, "grad_norm": 2.5230178833007812, "learning_rate": 1.5531071223219513e-05, "loss": 0.8048, "step": 2240 }, { "epoch": 0.334253113580431, "grad_norm": 2.82558274269104, "learning_rate": 1.5527045212154274e-05, "loss": 0.6989, "step": 2241 }, { "epoch": 0.33440226713401444, "grad_norm": 4.016232013702393, "learning_rate": 1.5523017910764892e-05, "loss": 0.7491, "step": 2242 }, { "epoch": 0.33455142068759786, "grad_norm": 1.6647282838821411, "learning_rate": 1.5518989319991563e-05, "loss": 0.8535, "step": 2243 }, { "epoch": 0.33470057424118127, "grad_norm": 1.6929668188095093, "learning_rate": 1.551495944077479e-05, "loss": 0.7149, "step": 2244 }, { "epoch": 0.3348497277947647, "grad_norm": 2.329629898071289, "learning_rate": 1.5510928274055373e-05, "loss": 0.7244, "step": 2245 }, { "epoch": 0.3349988813483481, "grad_norm": 1.9380446672439575, "learning_rate": 1.5506895820774416e-05, "loss": 0.7609, "step": 2246 }, { "epoch": 0.3351480349019315, "grad_norm": 2.7092583179473877, "learning_rate": 1.550286208187332e-05, "loss": 0.7847, "step": 2247 }, { "epoch": 0.3352971884555149, "grad_norm": 2.2697975635528564, "learning_rate": 1.5498827058293785e-05, "loss": 0.8027, "step": 2248 }, { "epoch": 0.33544634200909834, "grad_norm": 3.211369276046753, "learning_rate": 1.5494790750977814e-05, "loss": 0.6808, "step": 2249 }, { "epoch": 0.33559549556268176, "grad_norm": 1.694843053817749, "learning_rate": 1.549075316086771e-05, "loss": 0.7844, "step": 2250 }, { "epoch": 0.33574464911626517, "grad_norm": 5.167372703552246, "learning_rate": 1.5486714288906072e-05, "loss": 0.7092, "step": 2251 }, { "epoch": 0.3358938026698486, "grad_norm": 2.17952823638916, "learning_rate": 1.5482674136035804e-05, "loss": 0.7184, "step": 2252 }, { "epoch": 0.336042956223432, "grad_norm": 10.256339073181152, "learning_rate": 1.5478632703200104e-05, "loss": 0.7587, "step": 2253 }, { "epoch": 0.3361921097770154, "grad_norm": 1.834307312965393, "learning_rate": 1.5474589991342468e-05, "loss": 0.7461, "step": 2254 }, { "epoch": 0.3363412633305988, "grad_norm": 2.749706506729126, "learning_rate": 1.5470546001406698e-05, "loss": 0.7672, "step": 2255 }, { "epoch": 0.33649041688418224, "grad_norm": 6.905367374420166, "learning_rate": 1.5466500734336886e-05, "loss": 0.7486, "step": 2256 }, { "epoch": 0.33663957043776566, "grad_norm": 2.59146785736084, "learning_rate": 1.5462454191077427e-05, "loss": 0.8359, "step": 2257 }, { "epoch": 0.33678872399134907, "grad_norm": 3.92877197265625, "learning_rate": 1.5458406372573006e-05, "loss": 0.7396, "step": 2258 }, { "epoch": 0.3369378775449325, "grad_norm": 1.7420192956924438, "learning_rate": 1.5454357279768624e-05, "loss": 0.777, "step": 2259 }, { "epoch": 0.3370870310985159, "grad_norm": 4.335119724273682, "learning_rate": 1.5450306913609557e-05, "loss": 0.7334, "step": 2260 }, { "epoch": 0.3372361846520993, "grad_norm": 4.770608901977539, "learning_rate": 1.5446255275041398e-05, "loss": 0.7139, "step": 2261 }, { "epoch": 0.3373853382056827, "grad_norm": 2.106487989425659, "learning_rate": 1.5442202365010022e-05, "loss": 0.7276, "step": 2262 }, { "epoch": 0.33753449175926614, "grad_norm": 1.896460771560669, "learning_rate": 1.5438148184461606e-05, "loss": 0.8221, "step": 2263 }, { "epoch": 0.33768364531284956, "grad_norm": 2.622276544570923, "learning_rate": 1.543409273434263e-05, "loss": 0.6971, "step": 2264 }, { "epoch": 0.33783279886643297, "grad_norm": 2.1400763988494873, "learning_rate": 1.543003601559986e-05, "loss": 0.8255, "step": 2265 }, { "epoch": 0.3379819524200164, "grad_norm": 1.960747241973877, "learning_rate": 1.5425978029180367e-05, "loss": 0.8589, "step": 2266 }, { "epoch": 0.3381311059735998, "grad_norm": 4.3027825355529785, "learning_rate": 1.5421918776031506e-05, "loss": 0.6598, "step": 2267 }, { "epoch": 0.3382802595271832, "grad_norm": 2.018247365951538, "learning_rate": 1.5417858257100946e-05, "loss": 0.844, "step": 2268 }, { "epoch": 0.33842941308076663, "grad_norm": 2.0458011627197266, "learning_rate": 1.5413796473336635e-05, "loss": 0.6583, "step": 2269 }, { "epoch": 0.33857856663435004, "grad_norm": 5.006572246551514, "learning_rate": 1.5409733425686822e-05, "loss": 0.7793, "step": 2270 }, { "epoch": 0.33872772018793346, "grad_norm": 2.374748945236206, "learning_rate": 1.5405669115100057e-05, "loss": 0.7277, "step": 2271 }, { "epoch": 0.33887687374151687, "grad_norm": 2.1882433891296387, "learning_rate": 1.5401603542525172e-05, "loss": 0.7175, "step": 2272 }, { "epoch": 0.3390260272951003, "grad_norm": 1.3456417322158813, "learning_rate": 1.5397536708911308e-05, "loss": 0.8817, "step": 2273 }, { "epoch": 0.3391751808486837, "grad_norm": 3.985710382461548, "learning_rate": 1.5393468615207887e-05, "loss": 0.796, "step": 2274 }, { "epoch": 0.3393243344022671, "grad_norm": 2.044191598892212, "learning_rate": 1.5389399262364636e-05, "loss": 0.7741, "step": 2275 }, { "epoch": 0.33947348795585053, "grad_norm": 1.382901668548584, "learning_rate": 1.538532865133157e-05, "loss": 0.8609, "step": 2276 }, { "epoch": 0.33962264150943394, "grad_norm": 2.6210880279541016, "learning_rate": 1.5381256783059e-05, "loss": 0.7828, "step": 2277 }, { "epoch": 0.33977179506301736, "grad_norm": 2.4585652351379395, "learning_rate": 1.537718365849753e-05, "loss": 0.8392, "step": 2278 }, { "epoch": 0.33992094861660077, "grad_norm": 1.8846186399459839, "learning_rate": 1.5373109278598055e-05, "loss": 0.8033, "step": 2279 }, { "epoch": 0.3400701021701842, "grad_norm": 3.4000303745269775, "learning_rate": 1.5369033644311768e-05, "loss": 0.7255, "step": 2280 }, { "epoch": 0.3402192557237676, "grad_norm": 4.300263404846191, "learning_rate": 1.536495675659015e-05, "loss": 0.6336, "step": 2281 }, { "epoch": 0.340368409277351, "grad_norm": 2.2964179515838623, "learning_rate": 1.5360878616384975e-05, "loss": 0.79, "step": 2282 }, { "epoch": 0.34051756283093443, "grad_norm": 7.077869892120361, "learning_rate": 1.5356799224648312e-05, "loss": 0.7574, "step": 2283 }, { "epoch": 0.34066671638451784, "grad_norm": 1.73263680934906, "learning_rate": 1.5352718582332524e-05, "loss": 0.7761, "step": 2284 }, { "epoch": 0.34081586993810126, "grad_norm": 1.9595192670822144, "learning_rate": 1.534863669039026e-05, "loss": 0.7622, "step": 2285 }, { "epoch": 0.34096502349168467, "grad_norm": 2.04701828956604, "learning_rate": 1.5344553549774466e-05, "loss": 0.73, "step": 2286 }, { "epoch": 0.3411141770452681, "grad_norm": 2.2386221885681152, "learning_rate": 1.534046916143838e-05, "loss": 0.8068, "step": 2287 }, { "epoch": 0.3412633305988515, "grad_norm": 1.393654704093933, "learning_rate": 1.5336383526335517e-05, "loss": 0.6949, "step": 2288 }, { "epoch": 0.3414124841524349, "grad_norm": 2.396799087524414, "learning_rate": 1.5332296645419707e-05, "loss": 0.7485, "step": 2289 }, { "epoch": 0.34156163770601833, "grad_norm": 3.396411418914795, "learning_rate": 1.5328208519645052e-05, "loss": 0.7583, "step": 2290 }, { "epoch": 0.34171079125960174, "grad_norm": 3.1051692962646484, "learning_rate": 1.5324119149965957e-05, "loss": 0.7776, "step": 2291 }, { "epoch": 0.34185994481318516, "grad_norm": 1.8582717180252075, "learning_rate": 1.5320028537337108e-05, "loss": 0.7936, "step": 2292 }, { "epoch": 0.34200909836676857, "grad_norm": 2.694035291671753, "learning_rate": 1.531593668271348e-05, "loss": 0.7162, "step": 2293 }, { "epoch": 0.342158251920352, "grad_norm": 2.747638463973999, "learning_rate": 1.5311843587050352e-05, "loss": 0.7802, "step": 2294 }, { "epoch": 0.3423074054739354, "grad_norm": 2.1132118701934814, "learning_rate": 1.5307749251303278e-05, "loss": 0.7696, "step": 2295 }, { "epoch": 0.3424565590275188, "grad_norm": 2.2218668460845947, "learning_rate": 1.5303653676428106e-05, "loss": 0.6955, "step": 2296 }, { "epoch": 0.34260571258110223, "grad_norm": 2.523759365081787, "learning_rate": 1.529955686338098e-05, "loss": 0.7711, "step": 2297 }, { "epoch": 0.34275486613468564, "grad_norm": 2.475815534591675, "learning_rate": 1.529545881311832e-05, "loss": 0.6746, "step": 2298 }, { "epoch": 0.34290401968826906, "grad_norm": 3.0573763847351074, "learning_rate": 1.529135952659684e-05, "loss": 0.7317, "step": 2299 }, { "epoch": 0.34305317324185247, "grad_norm": 3.0882158279418945, "learning_rate": 1.528725900477356e-05, "loss": 0.6741, "step": 2300 }, { "epoch": 0.3432023267954359, "grad_norm": 1.8062540292739868, "learning_rate": 1.5283157248605758e-05, "loss": 0.7843, "step": 2301 }, { "epoch": 0.3433514803490193, "grad_norm": 2.1302649974823, "learning_rate": 1.5279054259051022e-05, "loss": 0.7492, "step": 2302 }, { "epoch": 0.3435006339026027, "grad_norm": 2.4879071712493896, "learning_rate": 1.527495003706722e-05, "loss": 0.7109, "step": 2303 }, { "epoch": 0.34364978745618613, "grad_norm": 2.7211127281188965, "learning_rate": 1.5270844583612507e-05, "loss": 0.743, "step": 2304 }, { "epoch": 0.34379894100976954, "grad_norm": 3.9498789310455322, "learning_rate": 1.5266737899645333e-05, "loss": 0.813, "step": 2305 }, { "epoch": 0.34394809456335296, "grad_norm": 2.7998061180114746, "learning_rate": 1.5262629986124422e-05, "loss": 0.6515, "step": 2306 }, { "epoch": 0.34409724811693637, "grad_norm": 1.9589927196502686, "learning_rate": 1.5258520844008797e-05, "loss": 0.6831, "step": 2307 }, { "epoch": 0.3442464016705198, "grad_norm": 3.607806444168091, "learning_rate": 1.5254410474257765e-05, "loss": 0.8083, "step": 2308 }, { "epoch": 0.3443955552241032, "grad_norm": 1.89371919631958, "learning_rate": 1.5250298877830916e-05, "loss": 0.7303, "step": 2309 }, { "epoch": 0.3445447087776866, "grad_norm": 2.170182704925537, "learning_rate": 1.5246186055688128e-05, "loss": 0.6829, "step": 2310 }, { "epoch": 0.34469386233127003, "grad_norm": 0.5653343796730042, "learning_rate": 1.5242072008789564e-05, "loss": 0.269, "step": 2311 }, { "epoch": 0.34484301588485344, "grad_norm": 1.7307647466659546, "learning_rate": 1.5237956738095681e-05, "loss": 0.8424, "step": 2312 }, { "epoch": 0.34499216943843686, "grad_norm": 1.521238923072815, "learning_rate": 1.5233840244567208e-05, "loss": 0.7469, "step": 2313 }, { "epoch": 0.34514132299202027, "grad_norm": 3.668344736099243, "learning_rate": 1.5229722529165175e-05, "loss": 0.7298, "step": 2314 }, { "epoch": 0.3452904765456037, "grad_norm": 2.0980496406555176, "learning_rate": 1.5225603592850881e-05, "loss": 0.7558, "step": 2315 }, { "epoch": 0.3454396300991871, "grad_norm": 1.8522306680679321, "learning_rate": 1.5221483436585923e-05, "loss": 0.8333, "step": 2316 }, { "epoch": 0.3455887836527705, "grad_norm": 2.93910551071167, "learning_rate": 1.5217362061332176e-05, "loss": 0.7604, "step": 2317 }, { "epoch": 0.34573793720635393, "grad_norm": 2.0968918800354004, "learning_rate": 1.5213239468051801e-05, "loss": 0.7767, "step": 2318 }, { "epoch": 0.34588709075993734, "grad_norm": 1.8606343269348145, "learning_rate": 1.5209115657707247e-05, "loss": 0.7776, "step": 2319 }, { "epoch": 0.34603624431352076, "grad_norm": 2.2233633995056152, "learning_rate": 1.520499063126124e-05, "loss": 0.6661, "step": 2320 }, { "epoch": 0.34618539786710417, "grad_norm": 2.1291275024414062, "learning_rate": 1.5200864389676793e-05, "loss": 0.7674, "step": 2321 }, { "epoch": 0.3463345514206876, "grad_norm": 1.7676811218261719, "learning_rate": 1.5196736933917211e-05, "loss": 0.7876, "step": 2322 }, { "epoch": 0.346483704974271, "grad_norm": 1.7408549785614014, "learning_rate": 1.519260826494607e-05, "loss": 0.7852, "step": 2323 }, { "epoch": 0.3466328585278544, "grad_norm": 2.0637264251708984, "learning_rate": 1.518847838372723e-05, "loss": 0.8123, "step": 2324 }, { "epoch": 0.34678201208143783, "grad_norm": 1.880610704421997, "learning_rate": 1.5184347291224843e-05, "loss": 0.793, "step": 2325 }, { "epoch": 0.34693116563502124, "grad_norm": 2.140885829925537, "learning_rate": 1.5180214988403343e-05, "loss": 0.7998, "step": 2326 }, { "epoch": 0.34708031918860466, "grad_norm": 2.1143789291381836, "learning_rate": 1.5176081476227436e-05, "loss": 0.6916, "step": 2327 }, { "epoch": 0.34722947274218807, "grad_norm": 3.537541627883911, "learning_rate": 1.5171946755662116e-05, "loss": 0.7669, "step": 2328 }, { "epoch": 0.3473786262957715, "grad_norm": 2.1448793411254883, "learning_rate": 1.5167810827672669e-05, "loss": 0.6439, "step": 2329 }, { "epoch": 0.3475277798493549, "grad_norm": 1.7791517972946167, "learning_rate": 1.5163673693224644e-05, "loss": 0.7012, "step": 2330 }, { "epoch": 0.3476769334029383, "grad_norm": 2.6066300868988037, "learning_rate": 1.5159535353283887e-05, "loss": 0.659, "step": 2331 }, { "epoch": 0.34782608695652173, "grad_norm": 1.5437183380126953, "learning_rate": 1.5155395808816518e-05, "loss": 0.7538, "step": 2332 }, { "epoch": 0.34797524051010514, "grad_norm": 1.8801668882369995, "learning_rate": 1.5151255060788941e-05, "loss": 0.8056, "step": 2333 }, { "epoch": 0.34812439406368856, "grad_norm": 1.4753236770629883, "learning_rate": 1.5147113110167841e-05, "loss": 0.7855, "step": 2334 }, { "epoch": 0.348273547617272, "grad_norm": 2.3804097175598145, "learning_rate": 1.5142969957920181e-05, "loss": 0.7117, "step": 2335 }, { "epoch": 0.3484227011708554, "grad_norm": 1.908578634262085, "learning_rate": 1.5138825605013208e-05, "loss": 0.7551, "step": 2336 }, { "epoch": 0.3485718547244388, "grad_norm": 1.9128226041793823, "learning_rate": 1.5134680052414446e-05, "loss": 0.6452, "step": 2337 }, { "epoch": 0.3487210082780222, "grad_norm": 1.533692717552185, "learning_rate": 1.51305333010917e-05, "loss": 0.7735, "step": 2338 }, { "epoch": 0.34887016183160563, "grad_norm": 2.022063970565796, "learning_rate": 1.5126385352013065e-05, "loss": 0.8039, "step": 2339 }, { "epoch": 0.34901931538518904, "grad_norm": 5.273938179016113, "learning_rate": 1.5122236206146892e-05, "loss": 0.7816, "step": 2340 }, { "epoch": 0.34916846893877246, "grad_norm": 1.9115153551101685, "learning_rate": 1.5118085864461835e-05, "loss": 0.7426, "step": 2341 }, { "epoch": 0.3493176224923559, "grad_norm": 2.796855926513672, "learning_rate": 1.5113934327926817e-05, "loss": 0.6971, "step": 2342 }, { "epoch": 0.3494667760459393, "grad_norm": 1.702865481376648, "learning_rate": 1.5109781597511038e-05, "loss": 0.7363, "step": 2343 }, { "epoch": 0.3496159295995227, "grad_norm": 2.8747506141662598, "learning_rate": 1.510562767418398e-05, "loss": 0.7269, "step": 2344 }, { "epoch": 0.3497650831531061, "grad_norm": 6.422840595245361, "learning_rate": 1.5101472558915408e-05, "loss": 0.7908, "step": 2345 }, { "epoch": 0.34991423670668953, "grad_norm": 1.696334719657898, "learning_rate": 1.5097316252675352e-05, "loss": 0.805, "step": 2346 }, { "epoch": 0.35006339026027294, "grad_norm": 2.059950590133667, "learning_rate": 1.5093158756434134e-05, "loss": 0.6903, "step": 2347 }, { "epoch": 0.35021254381385636, "grad_norm": 1.331876516342163, "learning_rate": 1.5089000071162347e-05, "loss": 0.8157, "step": 2348 }, { "epoch": 0.3503616973674398, "grad_norm": 2.6723155975341797, "learning_rate": 1.5084840197830861e-05, "loss": 0.7588, "step": 2349 }, { "epoch": 0.3505108509210232, "grad_norm": 2.0274693965911865, "learning_rate": 1.508067913741083e-05, "loss": 0.7204, "step": 2350 }, { "epoch": 0.3506600044746066, "grad_norm": 2.9137420654296875, "learning_rate": 1.5076516890873674e-05, "loss": 0.761, "step": 2351 }, { "epoch": 0.35080915802819, "grad_norm": 1.8956698179244995, "learning_rate": 1.50723534591911e-05, "loss": 0.6825, "step": 2352 }, { "epoch": 0.35095831158177343, "grad_norm": 1.4963164329528809, "learning_rate": 1.5068188843335087e-05, "loss": 0.6791, "step": 2353 }, { "epoch": 0.35110746513535684, "grad_norm": 1.528728723526001, "learning_rate": 1.5064023044277891e-05, "loss": 0.7557, "step": 2354 }, { "epoch": 0.35125661868894026, "grad_norm": 3.175849676132202, "learning_rate": 1.5059856062992042e-05, "loss": 0.7409, "step": 2355 }, { "epoch": 0.3514057722425237, "grad_norm": 1.523574709892273, "learning_rate": 1.5055687900450355e-05, "loss": 0.6123, "step": 2356 }, { "epoch": 0.3515549257961071, "grad_norm": 1.773869514465332, "learning_rate": 1.5051518557625908e-05, "loss": 0.7721, "step": 2357 }, { "epoch": 0.3517040793496905, "grad_norm": 1.8847607374191284, "learning_rate": 1.5047348035492067e-05, "loss": 0.7353, "step": 2358 }, { "epoch": 0.3518532329032739, "grad_norm": 2.0544021129608154, "learning_rate": 1.504317633502246e-05, "loss": 0.6753, "step": 2359 }, { "epoch": 0.35200238645685733, "grad_norm": 2.2533793449401855, "learning_rate": 1.5039003457191e-05, "loss": 0.748, "step": 2360 }, { "epoch": 0.35215154001044074, "grad_norm": 1.6982465982437134, "learning_rate": 1.5034829402971874e-05, "loss": 0.6836, "step": 2361 }, { "epoch": 0.35230069356402416, "grad_norm": 1.6561529636383057, "learning_rate": 1.503065417333954e-05, "loss": 0.773, "step": 2362 }, { "epoch": 0.3524498471176076, "grad_norm": 2.7837798595428467, "learning_rate": 1.5026477769268732e-05, "loss": 0.7911, "step": 2363 }, { "epoch": 0.352599000671191, "grad_norm": 2.243056535720825, "learning_rate": 1.502230019173446e-05, "loss": 0.7463, "step": 2364 }, { "epoch": 0.3527481542247744, "grad_norm": 3.1393747329711914, "learning_rate": 1.5018121441712005e-05, "loss": 0.7982, "step": 2365 }, { "epoch": 0.3528973077783578, "grad_norm": 2.069178342819214, "learning_rate": 1.5013941520176922e-05, "loss": 0.7354, "step": 2366 }, { "epoch": 0.35304646133194123, "grad_norm": 2.270618438720703, "learning_rate": 1.5009760428105045e-05, "loss": 0.6971, "step": 2367 }, { "epoch": 0.35319561488552464, "grad_norm": 3.0333316326141357, "learning_rate": 1.500557816647247e-05, "loss": 0.667, "step": 2368 }, { "epoch": 0.35334476843910806, "grad_norm": 1.8011709451675415, "learning_rate": 1.5001394736255575e-05, "loss": 0.7576, "step": 2369 }, { "epoch": 0.3534939219926915, "grad_norm": 1.4819210767745972, "learning_rate": 1.4997210138431011e-05, "loss": 0.7881, "step": 2370 }, { "epoch": 0.3536430755462749, "grad_norm": 1.3002806901931763, "learning_rate": 1.4993024373975698e-05, "loss": 0.7936, "step": 2371 }, { "epoch": 0.3537922290998583, "grad_norm": 2.678269147872925, "learning_rate": 1.4988837443866829e-05, "loss": 0.7762, "step": 2372 }, { "epoch": 0.3539413826534417, "grad_norm": 3.4634151458740234, "learning_rate": 1.4984649349081872e-05, "loss": 0.7105, "step": 2373 }, { "epoch": 0.35409053620702513, "grad_norm": 2.5345606803894043, "learning_rate": 1.4980460090598562e-05, "loss": 0.8196, "step": 2374 }, { "epoch": 0.35423968976060854, "grad_norm": 1.499001383781433, "learning_rate": 1.4976269669394908e-05, "loss": 0.7636, "step": 2375 }, { "epoch": 0.35438884331419196, "grad_norm": 1.385263204574585, "learning_rate": 1.497207808644919e-05, "loss": 0.8238, "step": 2376 }, { "epoch": 0.3545379968677754, "grad_norm": 0.5815172791481018, "learning_rate": 1.4967885342739963e-05, "loss": 0.2558, "step": 2377 }, { "epoch": 0.3546871504213588, "grad_norm": 1.730026125907898, "learning_rate": 1.496369143924605e-05, "loss": 0.7562, "step": 2378 }, { "epoch": 0.3548363039749422, "grad_norm": 5.932276725769043, "learning_rate": 1.495949637694654e-05, "loss": 0.7534, "step": 2379 }, { "epoch": 0.3549854575285256, "grad_norm": 2.2467567920684814, "learning_rate": 1.4955300156820805e-05, "loss": 0.7287, "step": 2380 }, { "epoch": 0.35513461108210903, "grad_norm": 0.5036952495574951, "learning_rate": 1.4951102779848473e-05, "loss": 0.2648, "step": 2381 }, { "epoch": 0.35528376463569245, "grad_norm": 2.4547550678253174, "learning_rate": 1.4946904247009446e-05, "loss": 0.7338, "step": 2382 }, { "epoch": 0.35543291818927586, "grad_norm": 2.0017035007476807, "learning_rate": 1.494270455928391e-05, "loss": 0.6369, "step": 2383 }, { "epoch": 0.3555820717428593, "grad_norm": 1.8305041790008545, "learning_rate": 1.49385037176523e-05, "loss": 0.8386, "step": 2384 }, { "epoch": 0.3557312252964427, "grad_norm": 2.4196698665618896, "learning_rate": 1.4934301723095325e-05, "loss": 0.7057, "step": 2385 }, { "epoch": 0.3558803788500261, "grad_norm": 2.190138578414917, "learning_rate": 1.4930098576593978e-05, "loss": 0.7857, "step": 2386 }, { "epoch": 0.3560295324036095, "grad_norm": 0.5876539349555969, "learning_rate": 1.4925894279129509e-05, "loss": 0.2602, "step": 2387 }, { "epoch": 0.35617868595719293, "grad_norm": 4.374207019805908, "learning_rate": 1.4921688831683433e-05, "loss": 0.5986, "step": 2388 }, { "epoch": 0.35632783951077635, "grad_norm": 2.0141375064849854, "learning_rate": 1.4917482235237541e-05, "loss": 0.7494, "step": 2389 }, { "epoch": 0.35647699306435976, "grad_norm": 3.0310652256011963, "learning_rate": 1.491327449077389e-05, "loss": 0.775, "step": 2390 }, { "epoch": 0.3566261466179432, "grad_norm": 1.6764107942581177, "learning_rate": 1.4909065599274806e-05, "loss": 0.7248, "step": 2391 }, { "epoch": 0.3567753001715266, "grad_norm": 3.0455520153045654, "learning_rate": 1.4904855561722881e-05, "loss": 0.6984, "step": 2392 }, { "epoch": 0.35692445372511, "grad_norm": 1.8272221088409424, "learning_rate": 1.4900644379100974e-05, "loss": 0.6512, "step": 2393 }, { "epoch": 0.3570736072786934, "grad_norm": 3.113023281097412, "learning_rate": 1.4896432052392213e-05, "loss": 0.8087, "step": 2394 }, { "epoch": 0.35722276083227683, "grad_norm": 1.921306848526001, "learning_rate": 1.4892218582579997e-05, "loss": 0.7526, "step": 2395 }, { "epoch": 0.35737191438586025, "grad_norm": 2.0603976249694824, "learning_rate": 1.4888003970647979e-05, "loss": 0.6964, "step": 2396 }, { "epoch": 0.35752106793944366, "grad_norm": 2.3995261192321777, "learning_rate": 1.4883788217580093e-05, "loss": 0.7422, "step": 2397 }, { "epoch": 0.3576702214930271, "grad_norm": 2.3653147220611572, "learning_rate": 1.4879571324360533e-05, "loss": 0.6641, "step": 2398 }, { "epoch": 0.3578193750466105, "grad_norm": 2.4195590019226074, "learning_rate": 1.487535329197376e-05, "loss": 0.7614, "step": 2399 }, { "epoch": 0.3579685286001939, "grad_norm": 2.307157039642334, "learning_rate": 1.4871134121404503e-05, "loss": 0.8302, "step": 2400 }, { "epoch": 0.3581176821537773, "grad_norm": 2.671609878540039, "learning_rate": 1.4866913813637749e-05, "loss": 0.7472, "step": 2401 }, { "epoch": 0.35826683570736073, "grad_norm": 2.6216907501220703, "learning_rate": 1.4862692369658755e-05, "loss": 0.8133, "step": 2402 }, { "epoch": 0.35841598926094415, "grad_norm": 2.4834113121032715, "learning_rate": 1.4858469790453049e-05, "loss": 0.793, "step": 2403 }, { "epoch": 0.35856514281452756, "grad_norm": 0.6543006300926208, "learning_rate": 1.485424607700642e-05, "loss": 0.2789, "step": 2404 }, { "epoch": 0.358714296368111, "grad_norm": 2.7701025009155273, "learning_rate": 1.4850021230304919e-05, "loss": 0.7391, "step": 2405 }, { "epoch": 0.3588634499216944, "grad_norm": 3.0614445209503174, "learning_rate": 1.4845795251334863e-05, "loss": 0.7708, "step": 2406 }, { "epoch": 0.3590126034752778, "grad_norm": 1.73982834815979, "learning_rate": 1.4841568141082832e-05, "loss": 0.738, "step": 2407 }, { "epoch": 0.3591617570288612, "grad_norm": 1.3200575113296509, "learning_rate": 1.4837339900535674e-05, "loss": 0.7507, "step": 2408 }, { "epoch": 0.35931091058244463, "grad_norm": 2.2536168098449707, "learning_rate": 1.4833110530680501e-05, "loss": 0.6139, "step": 2409 }, { "epoch": 0.35946006413602805, "grad_norm": 3.092571496963501, "learning_rate": 1.4828880032504684e-05, "loss": 0.7861, "step": 2410 }, { "epoch": 0.35960921768961146, "grad_norm": 2.587085485458374, "learning_rate": 1.4824648406995858e-05, "loss": 0.7632, "step": 2411 }, { "epoch": 0.3597583712431949, "grad_norm": 1.641861081123352, "learning_rate": 1.4820415655141932e-05, "loss": 0.7741, "step": 2412 }, { "epoch": 0.3599075247967783, "grad_norm": 2.6294336318969727, "learning_rate": 1.4816181777931056e-05, "loss": 0.7285, "step": 2413 }, { "epoch": 0.3600566783503617, "grad_norm": 2.2238800525665283, "learning_rate": 1.4811946776351667e-05, "loss": 0.8429, "step": 2414 }, { "epoch": 0.3602058319039451, "grad_norm": 2.4829983711242676, "learning_rate": 1.4807710651392446e-05, "loss": 0.8293, "step": 2415 }, { "epoch": 0.36035498545752853, "grad_norm": 1.9121978282928467, "learning_rate": 1.4803473404042345e-05, "loss": 0.7018, "step": 2416 }, { "epoch": 0.36050413901111195, "grad_norm": 2.362323760986328, "learning_rate": 1.479923503529058e-05, "loss": 0.8152, "step": 2417 }, { "epoch": 0.36065329256469536, "grad_norm": 1.8900823593139648, "learning_rate": 1.4794995546126625e-05, "loss": 0.7496, "step": 2418 }, { "epoch": 0.3608024461182788, "grad_norm": 0.5962539315223694, "learning_rate": 1.479075493754021e-05, "loss": 0.2855, "step": 2419 }, { "epoch": 0.3609515996718622, "grad_norm": 1.9431339502334595, "learning_rate": 1.4786513210521339e-05, "loss": 0.7474, "step": 2420 }, { "epoch": 0.3611007532254456, "grad_norm": 2.3583734035491943, "learning_rate": 1.4782270366060266e-05, "loss": 0.754, "step": 2421 }, { "epoch": 0.361249906779029, "grad_norm": 3.4956161975860596, "learning_rate": 1.4778026405147515e-05, "loss": 0.7522, "step": 2422 }, { "epoch": 0.36139906033261243, "grad_norm": 2.4022438526153564, "learning_rate": 1.477378132877386e-05, "loss": 0.7712, "step": 2423 }, { "epoch": 0.36154821388619585, "grad_norm": 2.001124382019043, "learning_rate": 1.4769535137930343e-05, "loss": 0.7251, "step": 2424 }, { "epoch": 0.36169736743977926, "grad_norm": 1.5872578620910645, "learning_rate": 1.4765287833608268e-05, "loss": 0.7549, "step": 2425 }, { "epoch": 0.3618465209933627, "grad_norm": 1.605965495109558, "learning_rate": 1.4761039416799192e-05, "loss": 0.7702, "step": 2426 }, { "epoch": 0.3619956745469461, "grad_norm": 3.661597490310669, "learning_rate": 1.4756789888494938e-05, "loss": 0.709, "step": 2427 }, { "epoch": 0.3621448281005295, "grad_norm": 1.5126835107803345, "learning_rate": 1.4752539249687583e-05, "loss": 0.7562, "step": 2428 }, { "epoch": 0.3622939816541129, "grad_norm": 2.656480550765991, "learning_rate": 1.4748287501369464e-05, "loss": 0.6918, "step": 2429 }, { "epoch": 0.36244313520769633, "grad_norm": 2.694622755050659, "learning_rate": 1.4744034644533185e-05, "loss": 0.803, "step": 2430 }, { "epoch": 0.36259228876127975, "grad_norm": 1.6953572034835815, "learning_rate": 1.4739780680171598e-05, "loss": 0.819, "step": 2431 }, { "epoch": 0.36274144231486316, "grad_norm": 2.7033393383026123, "learning_rate": 1.4735525609277819e-05, "loss": 0.674, "step": 2432 }, { "epoch": 0.3628905958684466, "grad_norm": 2.8050200939178467, "learning_rate": 1.4731269432845223e-05, "loss": 0.7221, "step": 2433 }, { "epoch": 0.36303974942203, "grad_norm": 1.8669803142547607, "learning_rate": 1.4727012151867442e-05, "loss": 0.834, "step": 2434 }, { "epoch": 0.3631889029756134, "grad_norm": 1.8742811679840088, "learning_rate": 1.472275376733836e-05, "loss": 0.7656, "step": 2435 }, { "epoch": 0.3633380565291968, "grad_norm": 1.8852061033248901, "learning_rate": 1.4718494280252133e-05, "loss": 0.7545, "step": 2436 }, { "epoch": 0.36348721008278023, "grad_norm": 1.6473538875579834, "learning_rate": 1.4714233691603161e-05, "loss": 0.7507, "step": 2437 }, { "epoch": 0.36363636363636365, "grad_norm": 1.5945158004760742, "learning_rate": 1.4709972002386104e-05, "loss": 0.8045, "step": 2438 }, { "epoch": 0.36378551718994706, "grad_norm": 2.080836534500122, "learning_rate": 1.4705709213595882e-05, "loss": 0.6433, "step": 2439 }, { "epoch": 0.3639346707435305, "grad_norm": 2.292114734649658, "learning_rate": 1.4701445326227675e-05, "loss": 0.7724, "step": 2440 }, { "epoch": 0.3640838242971139, "grad_norm": 2.173987627029419, "learning_rate": 1.4697180341276907e-05, "loss": 0.7553, "step": 2441 }, { "epoch": 0.3642329778506973, "grad_norm": 1.5108236074447632, "learning_rate": 1.4692914259739268e-05, "loss": 0.7897, "step": 2442 }, { "epoch": 0.3643821314042807, "grad_norm": 1.4673954248428345, "learning_rate": 1.4688647082610707e-05, "loss": 0.8103, "step": 2443 }, { "epoch": 0.36453128495786413, "grad_norm": 2.0701074600219727, "learning_rate": 1.4684378810887422e-05, "loss": 0.6668, "step": 2444 }, { "epoch": 0.36468043851144755, "grad_norm": 1.7718344926834106, "learning_rate": 1.4680109445565864e-05, "loss": 0.796, "step": 2445 }, { "epoch": 0.36482959206503096, "grad_norm": 2.2366504669189453, "learning_rate": 1.467583898764275e-05, "loss": 0.7495, "step": 2446 }, { "epoch": 0.3649787456186144, "grad_norm": 2.222013473510742, "learning_rate": 1.4671567438115039e-05, "loss": 0.7549, "step": 2447 }, { "epoch": 0.3651278991721978, "grad_norm": 1.8788864612579346, "learning_rate": 1.4667294797979958e-05, "loss": 0.779, "step": 2448 }, { "epoch": 0.3652770527257812, "grad_norm": 1.6783664226531982, "learning_rate": 1.466302106823498e-05, "loss": 0.7401, "step": 2449 }, { "epoch": 0.3654262062793646, "grad_norm": 1.571146011352539, "learning_rate": 1.4658746249877833e-05, "loss": 0.7776, "step": 2450 }, { "epoch": 0.36557535983294803, "grad_norm": 1.6026166677474976, "learning_rate": 1.4654470343906501e-05, "loss": 0.6921, "step": 2451 }, { "epoch": 0.36572451338653145, "grad_norm": 1.9640979766845703, "learning_rate": 1.4650193351319224e-05, "loss": 0.8214, "step": 2452 }, { "epoch": 0.36587366694011486, "grad_norm": 2.408332347869873, "learning_rate": 1.4645915273114492e-05, "loss": 0.7296, "step": 2453 }, { "epoch": 0.3660228204936983, "grad_norm": 2.0226380825042725, "learning_rate": 1.4641636110291051e-05, "loss": 0.7032, "step": 2454 }, { "epoch": 0.3661719740472817, "grad_norm": 2.2240333557128906, "learning_rate": 1.4637355863847893e-05, "loss": 0.7723, "step": 2455 }, { "epoch": 0.3663211276008651, "grad_norm": 4.11668062210083, "learning_rate": 1.4633074534784278e-05, "loss": 0.7202, "step": 2456 }, { "epoch": 0.3664702811544485, "grad_norm": 6.40680456161499, "learning_rate": 1.4628792124099704e-05, "loss": 0.6893, "step": 2457 }, { "epoch": 0.36661943470803193, "grad_norm": 1.6377924680709839, "learning_rate": 1.4624508632793928e-05, "loss": 0.7783, "step": 2458 }, { "epoch": 0.36676858826161535, "grad_norm": 3.3830230236053467, "learning_rate": 1.462022406186696e-05, "loss": 0.7818, "step": 2459 }, { "epoch": 0.36691774181519876, "grad_norm": 1.682913899421692, "learning_rate": 1.461593841231906e-05, "loss": 0.7744, "step": 2460 }, { "epoch": 0.3670668953687822, "grad_norm": 2.342630624771118, "learning_rate": 1.4611651685150738e-05, "loss": 0.7039, "step": 2461 }, { "epoch": 0.3672160489223656, "grad_norm": 1.977708339691162, "learning_rate": 1.4607363881362765e-05, "loss": 0.7898, "step": 2462 }, { "epoch": 0.367365202475949, "grad_norm": 1.550687551498413, "learning_rate": 1.4603075001956145e-05, "loss": 0.7392, "step": 2463 }, { "epoch": 0.3675143560295324, "grad_norm": 2.00559139251709, "learning_rate": 1.4598785047932153e-05, "loss": 0.7228, "step": 2464 }, { "epoch": 0.36766350958311583, "grad_norm": 1.911745548248291, "learning_rate": 1.4594494020292307e-05, "loss": 0.8047, "step": 2465 }, { "epoch": 0.36781266313669925, "grad_norm": 2.2448368072509766, "learning_rate": 1.4590201920038367e-05, "loss": 0.7456, "step": 2466 }, { "epoch": 0.36796181669028266, "grad_norm": 2.057196617126465, "learning_rate": 1.4585908748172361e-05, "loss": 0.7735, "step": 2467 }, { "epoch": 0.3681109702438661, "grad_norm": 1.745810627937317, "learning_rate": 1.4581614505696551e-05, "loss": 0.7981, "step": 2468 }, { "epoch": 0.3682601237974495, "grad_norm": 2.8653104305267334, "learning_rate": 1.4577319193613455e-05, "loss": 0.7256, "step": 2469 }, { "epoch": 0.3684092773510329, "grad_norm": 1.8399327993392944, "learning_rate": 1.4573022812925845e-05, "loss": 0.7507, "step": 2470 }, { "epoch": 0.3685584309046163, "grad_norm": 0.5944186449050903, "learning_rate": 1.4568725364636738e-05, "loss": 0.2764, "step": 2471 }, { "epoch": 0.36870758445819973, "grad_norm": 1.6008063554763794, "learning_rate": 1.45644268497494e-05, "loss": 0.7755, "step": 2472 }, { "epoch": 0.36885673801178315, "grad_norm": 2.9890358448028564, "learning_rate": 1.4560127269267344e-05, "loss": 0.7205, "step": 2473 }, { "epoch": 0.36900589156536656, "grad_norm": 1.65679931640625, "learning_rate": 1.4555826624194339e-05, "loss": 0.7579, "step": 2474 }, { "epoch": 0.36915504511895, "grad_norm": 2.1162049770355225, "learning_rate": 1.4551524915534396e-05, "loss": 0.7483, "step": 2475 }, { "epoch": 0.3693041986725334, "grad_norm": 1.5197691917419434, "learning_rate": 1.4547222144291777e-05, "loss": 0.7431, "step": 2476 }, { "epoch": 0.3694533522261168, "grad_norm": 1.5124804973602295, "learning_rate": 1.4542918311470988e-05, "loss": 0.8094, "step": 2477 }, { "epoch": 0.3696025057797002, "grad_norm": 2.338850975036621, "learning_rate": 1.4538613418076795e-05, "loss": 0.7107, "step": 2478 }, { "epoch": 0.36975165933328363, "grad_norm": 1.7917029857635498, "learning_rate": 1.4534307465114199e-05, "loss": 0.6372, "step": 2479 }, { "epoch": 0.36990081288686705, "grad_norm": 1.8362667560577393, "learning_rate": 1.4530000453588447e-05, "loss": 0.7602, "step": 2480 }, { "epoch": 0.37004996644045046, "grad_norm": 3.1983797550201416, "learning_rate": 1.4525692384505043e-05, "loss": 0.6704, "step": 2481 }, { "epoch": 0.3701991199940339, "grad_norm": 1.8255949020385742, "learning_rate": 1.4521383258869735e-05, "loss": 0.6738, "step": 2482 }, { "epoch": 0.3703482735476173, "grad_norm": 1.377828598022461, "learning_rate": 1.4517073077688513e-05, "loss": 0.8022, "step": 2483 }, { "epoch": 0.3704974271012007, "grad_norm": 4.337457180023193, "learning_rate": 1.4512761841967615e-05, "loss": 0.7389, "step": 2484 }, { "epoch": 0.3706465806547841, "grad_norm": 1.8591278791427612, "learning_rate": 1.4508449552713532e-05, "loss": 0.7122, "step": 2485 }, { "epoch": 0.37079573420836753, "grad_norm": 1.7108116149902344, "learning_rate": 1.450413621093299e-05, "loss": 0.6824, "step": 2486 }, { "epoch": 0.37094488776195095, "grad_norm": 2.0209710597991943, "learning_rate": 1.4499821817632973e-05, "loss": 0.6723, "step": 2487 }, { "epoch": 0.37109404131553436, "grad_norm": 2.0143556594848633, "learning_rate": 1.4495506373820695e-05, "loss": 0.757, "step": 2488 }, { "epoch": 0.3712431948691178, "grad_norm": 2.006776809692383, "learning_rate": 1.4491189880503633e-05, "loss": 0.7518, "step": 2489 }, { "epoch": 0.3713923484227012, "grad_norm": 1.6546885967254639, "learning_rate": 1.4486872338689492e-05, "loss": 0.7323, "step": 2490 }, { "epoch": 0.3715415019762846, "grad_norm": 1.9919873476028442, "learning_rate": 1.4482553749386234e-05, "loss": 0.7254, "step": 2491 }, { "epoch": 0.371690655529868, "grad_norm": 1.9807685613632202, "learning_rate": 1.4478234113602063e-05, "loss": 0.6826, "step": 2492 }, { "epoch": 0.37183980908345143, "grad_norm": 2.739018440246582, "learning_rate": 1.4473913432345426e-05, "loss": 0.6615, "step": 2493 }, { "epoch": 0.37198896263703485, "grad_norm": 1.7259342670440674, "learning_rate": 1.4469591706625003e-05, "loss": 0.6904, "step": 2494 }, { "epoch": 0.37213811619061826, "grad_norm": 1.897774577140808, "learning_rate": 1.4465268937449743e-05, "loss": 0.7874, "step": 2495 }, { "epoch": 0.3722872697442017, "grad_norm": 2.0673563480377197, "learning_rate": 1.446094512582882e-05, "loss": 0.6733, "step": 2496 }, { "epoch": 0.3724364232977851, "grad_norm": 1.9429478645324707, "learning_rate": 1.445662027277165e-05, "loss": 0.7173, "step": 2497 }, { "epoch": 0.3725855768513685, "grad_norm": 2.120622396469116, "learning_rate": 1.44522943792879e-05, "loss": 0.7623, "step": 2498 }, { "epoch": 0.3727347304049519, "grad_norm": 1.4273180961608887, "learning_rate": 1.4447967446387482e-05, "loss": 0.7487, "step": 2499 }, { "epoch": 0.37288388395853533, "grad_norm": 1.6639055013656616, "learning_rate": 1.444363947508054e-05, "loss": 0.7256, "step": 2500 }, { "epoch": 0.37303303751211875, "grad_norm": 4.161790370941162, "learning_rate": 1.4439310466377474e-05, "loss": 0.7249, "step": 2501 }, { "epoch": 0.37318219106570216, "grad_norm": 1.8307517766952515, "learning_rate": 1.4434980421288911e-05, "loss": 0.7993, "step": 2502 }, { "epoch": 0.3733313446192856, "grad_norm": 5.441394805908203, "learning_rate": 1.443064934082573e-05, "loss": 0.7155, "step": 2503 }, { "epoch": 0.373480498172869, "grad_norm": 1.4448796510696411, "learning_rate": 1.4426317225999055e-05, "loss": 0.8869, "step": 2504 }, { "epoch": 0.3736296517264524, "grad_norm": 3.9495160579681396, "learning_rate": 1.4421984077820242e-05, "loss": 0.795, "step": 2505 }, { "epoch": 0.3737788052800358, "grad_norm": 1.2230676412582397, "learning_rate": 1.4417649897300891e-05, "loss": 0.8577, "step": 2506 }, { "epoch": 0.37392795883361923, "grad_norm": 1.6637754440307617, "learning_rate": 1.4413314685452844e-05, "loss": 0.7409, "step": 2507 }, { "epoch": 0.37407711238720265, "grad_norm": 2.507099151611328, "learning_rate": 1.4408978443288186e-05, "loss": 0.7785, "step": 2508 }, { "epoch": 0.37422626594078606, "grad_norm": 1.562953233718872, "learning_rate": 1.440464117181924e-05, "loss": 0.8225, "step": 2509 }, { "epoch": 0.3743754194943695, "grad_norm": 3.965991258621216, "learning_rate": 1.4400302872058568e-05, "loss": 0.8398, "step": 2510 }, { "epoch": 0.3745245730479529, "grad_norm": 2.1535351276397705, "learning_rate": 1.439596354501898e-05, "loss": 0.7497, "step": 2511 }, { "epoch": 0.3746737266015363, "grad_norm": 1.6492199897766113, "learning_rate": 1.4391623191713513e-05, "loss": 0.8159, "step": 2512 }, { "epoch": 0.3748228801551197, "grad_norm": 2.7775397300720215, "learning_rate": 1.4387281813155451e-05, "loss": 0.7453, "step": 2513 }, { "epoch": 0.37497203370870313, "grad_norm": 2.6052751541137695, "learning_rate": 1.438293941035832e-05, "loss": 0.8121, "step": 2514 }, { "epoch": 0.37512118726228655, "grad_norm": 1.7551010847091675, "learning_rate": 1.4378595984335881e-05, "loss": 0.7867, "step": 2515 }, { "epoch": 0.37527034081586996, "grad_norm": 1.8810803890228271, "learning_rate": 1.4374251536102131e-05, "loss": 0.7315, "step": 2516 }, { "epoch": 0.3754194943694534, "grad_norm": 1.9948830604553223, "learning_rate": 1.4369906066671313e-05, "loss": 0.7097, "step": 2517 }, { "epoch": 0.3755686479230368, "grad_norm": 2.1941494941711426, "learning_rate": 1.4365559577057905e-05, "loss": 0.7519, "step": 2518 }, { "epoch": 0.3757178014766202, "grad_norm": 1.7295688390731812, "learning_rate": 1.4361212068276622e-05, "loss": 0.8015, "step": 2519 }, { "epoch": 0.3758669550302036, "grad_norm": 1.533087134361267, "learning_rate": 1.4356863541342416e-05, "loss": 0.6886, "step": 2520 }, { "epoch": 0.37601610858378703, "grad_norm": 1.594626784324646, "learning_rate": 1.435251399727048e-05, "loss": 0.803, "step": 2521 }, { "epoch": 0.37616526213737045, "grad_norm": 2.196753978729248, "learning_rate": 1.4348163437076243e-05, "loss": 0.7086, "step": 2522 }, { "epoch": 0.37631441569095386, "grad_norm": 0.5399982929229736, "learning_rate": 1.4343811861775373e-05, "loss": 0.2515, "step": 2523 }, { "epoch": 0.3764635692445373, "grad_norm": 2.0820841789245605, "learning_rate": 1.4339459272383766e-05, "loss": 0.6616, "step": 2524 }, { "epoch": 0.3766127227981207, "grad_norm": 1.6206889152526855, "learning_rate": 1.433510566991757e-05, "loss": 0.7411, "step": 2525 }, { "epoch": 0.3767618763517041, "grad_norm": 2.3448123931884766, "learning_rate": 1.4330751055393162e-05, "loss": 0.6946, "step": 2526 }, { "epoch": 0.3769110299052875, "grad_norm": 1.4949756860733032, "learning_rate": 1.4326395429827147e-05, "loss": 0.7107, "step": 2527 }, { "epoch": 0.37706018345887093, "grad_norm": 3.9631905555725098, "learning_rate": 1.4322038794236379e-05, "loss": 0.7934, "step": 2528 }, { "epoch": 0.37720933701245435, "grad_norm": 1.8587749004364014, "learning_rate": 1.4317681149637941e-05, "loss": 0.6472, "step": 2529 }, { "epoch": 0.37735849056603776, "grad_norm": 1.971408486366272, "learning_rate": 1.4313322497049153e-05, "loss": 0.7035, "step": 2530 }, { "epoch": 0.3775076441196212, "grad_norm": 2.395641326904297, "learning_rate": 1.4308962837487573e-05, "loss": 0.6571, "step": 2531 }, { "epoch": 0.3776567976732046, "grad_norm": 2.343587875366211, "learning_rate": 1.430460217197099e-05, "loss": 0.7472, "step": 2532 }, { "epoch": 0.37780595122678795, "grad_norm": 1.7088156938552856, "learning_rate": 1.4300240501517424e-05, "loss": 0.815, "step": 2533 }, { "epoch": 0.37795510478037136, "grad_norm": 2.129579782485962, "learning_rate": 1.4295877827145144e-05, "loss": 0.7427, "step": 2534 }, { "epoch": 0.3781042583339548, "grad_norm": 3.6093943119049072, "learning_rate": 1.4291514149872638e-05, "loss": 0.7228, "step": 2535 }, { "epoch": 0.3782534118875382, "grad_norm": 1.982795000076294, "learning_rate": 1.4287149470718635e-05, "loss": 0.7153, "step": 2536 }, { "epoch": 0.3784025654411216, "grad_norm": 2.553468942642212, "learning_rate": 1.4282783790702102e-05, "loss": 0.7939, "step": 2537 }, { "epoch": 0.378551718994705, "grad_norm": 2.2331721782684326, "learning_rate": 1.427841711084223e-05, "loss": 0.7307, "step": 2538 }, { "epoch": 0.37870087254828844, "grad_norm": 3.7194888591766357, "learning_rate": 1.427404943215845e-05, "loss": 0.7284, "step": 2539 }, { "epoch": 0.37885002610187185, "grad_norm": 0.6294202208518982, "learning_rate": 1.4269680755670425e-05, "loss": 0.2481, "step": 2540 }, { "epoch": 0.37899917965545526, "grad_norm": 4.2160964012146, "learning_rate": 1.426531108239805e-05, "loss": 0.6788, "step": 2541 }, { "epoch": 0.3791483332090387, "grad_norm": 3.213931083679199, "learning_rate": 1.4260940413361452e-05, "loss": 0.7277, "step": 2542 }, { "epoch": 0.3792974867626221, "grad_norm": 2.94417405128479, "learning_rate": 1.4256568749580996e-05, "loss": 0.7358, "step": 2543 }, { "epoch": 0.3794466403162055, "grad_norm": 1.9742141962051392, "learning_rate": 1.425219609207727e-05, "loss": 0.8341, "step": 2544 }, { "epoch": 0.3795957938697889, "grad_norm": 2.303034543991089, "learning_rate": 1.4247822441871105e-05, "loss": 0.7517, "step": 2545 }, { "epoch": 0.37974494742337234, "grad_norm": 1.6106630563735962, "learning_rate": 1.424344779998355e-05, "loss": 0.6925, "step": 2546 }, { "epoch": 0.37989410097695575, "grad_norm": 1.9367923736572266, "learning_rate": 1.4239072167435897e-05, "loss": 0.6577, "step": 2547 }, { "epoch": 0.38004325453053917, "grad_norm": 2.6783711910247803, "learning_rate": 1.4234695545249666e-05, "loss": 0.7186, "step": 2548 }, { "epoch": 0.3801924080841226, "grad_norm": 2.160407304763794, "learning_rate": 1.4230317934446607e-05, "loss": 0.6947, "step": 2549 }, { "epoch": 0.380341561637706, "grad_norm": 1.9060426950454712, "learning_rate": 1.4225939336048703e-05, "loss": 0.7672, "step": 2550 }, { "epoch": 0.3804907151912894, "grad_norm": 2.002310037612915, "learning_rate": 1.422155975107816e-05, "loss": 0.7193, "step": 2551 }, { "epoch": 0.3806398687448728, "grad_norm": 2.276428461074829, "learning_rate": 1.4217179180557428e-05, "loss": 0.7587, "step": 2552 }, { "epoch": 0.38078902229845624, "grad_norm": 2.51381516456604, "learning_rate": 1.4212797625509173e-05, "loss": 0.6153, "step": 2553 }, { "epoch": 0.38093817585203965, "grad_norm": 2.225541114807129, "learning_rate": 1.4208415086956305e-05, "loss": 0.6933, "step": 2554 }, { "epoch": 0.38108732940562307, "grad_norm": 1.887338638305664, "learning_rate": 1.4204031565921944e-05, "loss": 0.7144, "step": 2555 }, { "epoch": 0.3812364829592065, "grad_norm": 1.4863946437835693, "learning_rate": 1.419964706342946e-05, "loss": 0.7776, "step": 2556 }, { "epoch": 0.3813856365127899, "grad_norm": 2.0869219303131104, "learning_rate": 1.4195261580502442e-05, "loss": 0.772, "step": 2557 }, { "epoch": 0.3815347900663733, "grad_norm": 3.370859146118164, "learning_rate": 1.4190875118164706e-05, "loss": 0.8524, "step": 2558 }, { "epoch": 0.3816839436199567, "grad_norm": 1.4855656623840332, "learning_rate": 1.4186487677440304e-05, "loss": 0.8291, "step": 2559 }, { "epoch": 0.38183309717354014, "grad_norm": 1.997308611869812, "learning_rate": 1.4182099259353508e-05, "loss": 0.744, "step": 2560 }, { "epoch": 0.38198225072712355, "grad_norm": 2.4246273040771484, "learning_rate": 1.4177709864928822e-05, "loss": 0.7965, "step": 2561 }, { "epoch": 0.38213140428070697, "grad_norm": 2.6164183616638184, "learning_rate": 1.4173319495190984e-05, "loss": 0.7562, "step": 2562 }, { "epoch": 0.3822805578342904, "grad_norm": 2.2089903354644775, "learning_rate": 1.416892815116495e-05, "loss": 0.6848, "step": 2563 }, { "epoch": 0.3824297113878738, "grad_norm": 1.9455541372299194, "learning_rate": 1.4164535833875905e-05, "loss": 0.7055, "step": 2564 }, { "epoch": 0.3825788649414572, "grad_norm": 2.537647008895874, "learning_rate": 1.416014254434927e-05, "loss": 0.6463, "step": 2565 }, { "epoch": 0.3827280184950406, "grad_norm": 2.016191005706787, "learning_rate": 1.415574828361068e-05, "loss": 0.7949, "step": 2566 }, { "epoch": 0.38287717204862404, "grad_norm": 1.7349919080734253, "learning_rate": 1.4151353052686008e-05, "loss": 0.7975, "step": 2567 }, { "epoch": 0.38302632560220745, "grad_norm": 1.6704413890838623, "learning_rate": 1.4146956852601349e-05, "loss": 0.799, "step": 2568 }, { "epoch": 0.38317547915579087, "grad_norm": 1.9412486553192139, "learning_rate": 1.4142559684383018e-05, "loss": 0.7666, "step": 2569 }, { "epoch": 0.3833246327093743, "grad_norm": 4.524580955505371, "learning_rate": 1.413816154905757e-05, "loss": 0.696, "step": 2570 }, { "epoch": 0.3834737862629577, "grad_norm": 11.40431022644043, "learning_rate": 1.4133762447651774e-05, "loss": 0.6903, "step": 2571 }, { "epoch": 0.3836229398165411, "grad_norm": 3.7344532012939453, "learning_rate": 1.4129362381192626e-05, "loss": 0.6463, "step": 2572 }, { "epoch": 0.3837720933701245, "grad_norm": 2.9489004611968994, "learning_rate": 1.4124961350707354e-05, "loss": 0.6886, "step": 2573 }, { "epoch": 0.38392124692370794, "grad_norm": 3.457700729370117, "learning_rate": 1.4120559357223407e-05, "loss": 0.7079, "step": 2574 }, { "epoch": 0.38407040047729135, "grad_norm": 3.8770251274108887, "learning_rate": 1.4116156401768452e-05, "loss": 0.7414, "step": 2575 }, { "epoch": 0.38421955403087477, "grad_norm": 5.569248676300049, "learning_rate": 1.4111752485370399e-05, "loss": 0.6676, "step": 2576 }, { "epoch": 0.3843687075844582, "grad_norm": 2.523664951324463, "learning_rate": 1.4107347609057358e-05, "loss": 0.7636, "step": 2577 }, { "epoch": 0.3845178611380416, "grad_norm": 1.7438769340515137, "learning_rate": 1.4102941773857683e-05, "loss": 0.7111, "step": 2578 }, { "epoch": 0.384667014691625, "grad_norm": 3.226609230041504, "learning_rate": 1.4098534980799943e-05, "loss": 0.7157, "step": 2579 }, { "epoch": 0.3848161682452084, "grad_norm": 0.682859480381012, "learning_rate": 1.4094127230912931e-05, "loss": 0.2774, "step": 2580 }, { "epoch": 0.38496532179879184, "grad_norm": 1.7236559391021729, "learning_rate": 1.4089718525225667e-05, "loss": 0.7388, "step": 2581 }, { "epoch": 0.38511447535237525, "grad_norm": 2.7791123390197754, "learning_rate": 1.4085308864767389e-05, "loss": 0.6945, "step": 2582 }, { "epoch": 0.38526362890595867, "grad_norm": 4.317877292633057, "learning_rate": 1.4080898250567559e-05, "loss": 0.7169, "step": 2583 }, { "epoch": 0.3854127824595421, "grad_norm": 2.50342059135437, "learning_rate": 1.407648668365587e-05, "loss": 0.7727, "step": 2584 }, { "epoch": 0.3855619360131255, "grad_norm": 3.4429311752319336, "learning_rate": 1.4072074165062224e-05, "loss": 0.7518, "step": 2585 }, { "epoch": 0.3857110895667089, "grad_norm": 6.646127223968506, "learning_rate": 1.4067660695816751e-05, "loss": 0.745, "step": 2586 }, { "epoch": 0.3858602431202923, "grad_norm": 2.782975435256958, "learning_rate": 1.4063246276949811e-05, "loss": 0.7402, "step": 2587 }, { "epoch": 0.38600939667387574, "grad_norm": 2.8533074855804443, "learning_rate": 1.4058830909491971e-05, "loss": 0.678, "step": 2588 }, { "epoch": 0.38615855022745915, "grad_norm": 1.7028882503509521, "learning_rate": 1.4054414594474033e-05, "loss": 0.7866, "step": 2589 }, { "epoch": 0.38630770378104257, "grad_norm": 1.4914573431015015, "learning_rate": 1.4049997332927007e-05, "loss": 0.7696, "step": 2590 }, { "epoch": 0.386456857334626, "grad_norm": 1.5297391414642334, "learning_rate": 1.4045579125882136e-05, "loss": 0.7578, "step": 2591 }, { "epoch": 0.3866060108882094, "grad_norm": 1.5600651502609253, "learning_rate": 1.4041159974370881e-05, "loss": 0.7116, "step": 2592 }, { "epoch": 0.3867551644417928, "grad_norm": 1.8758840560913086, "learning_rate": 1.4036739879424916e-05, "loss": 0.784, "step": 2593 }, { "epoch": 0.3869043179953762, "grad_norm": 0.6825559735298157, "learning_rate": 1.403231884207614e-05, "loss": 0.2565, "step": 2594 }, { "epoch": 0.38705347154895964, "grad_norm": 1.4180793762207031, "learning_rate": 1.4027896863356679e-05, "loss": 0.7964, "step": 2595 }, { "epoch": 0.38720262510254305, "grad_norm": 1.981702208518982, "learning_rate": 1.4023473944298864e-05, "loss": 0.7334, "step": 2596 }, { "epoch": 0.38735177865612647, "grad_norm": 2.681269884109497, "learning_rate": 1.401905008593526e-05, "loss": 0.7542, "step": 2597 }, { "epoch": 0.3875009322097099, "grad_norm": 0.5824897289276123, "learning_rate": 1.4014625289298645e-05, "loss": 0.2633, "step": 2598 }, { "epoch": 0.3876500857632933, "grad_norm": 3.0331876277923584, "learning_rate": 1.401019955542201e-05, "loss": 0.7173, "step": 2599 }, { "epoch": 0.3877992393168767, "grad_norm": 0.5782367587089539, "learning_rate": 1.4005772885338578e-05, "loss": 0.2457, "step": 2600 }, { "epoch": 0.3879483928704601, "grad_norm": 2.5917909145355225, "learning_rate": 1.4001345280081782e-05, "loss": 0.7906, "step": 2601 }, { "epoch": 0.38809754642404354, "grad_norm": 2.017709970474243, "learning_rate": 1.399691674068527e-05, "loss": 0.8003, "step": 2602 }, { "epoch": 0.38824669997762695, "grad_norm": 4.222305774688721, "learning_rate": 1.399248726818292e-05, "loss": 0.6824, "step": 2603 }, { "epoch": 0.38839585353121037, "grad_norm": 2.4793152809143066, "learning_rate": 1.3988056863608815e-05, "loss": 0.8103, "step": 2604 }, { "epoch": 0.3885450070847938, "grad_norm": 2.8998212814331055, "learning_rate": 1.3983625527997264e-05, "loss": 0.7895, "step": 2605 }, { "epoch": 0.3886941606383772, "grad_norm": 1.7099344730377197, "learning_rate": 1.3979193262382791e-05, "loss": 0.7082, "step": 2606 }, { "epoch": 0.3888433141919606, "grad_norm": 1.6599347591400146, "learning_rate": 1.3974760067800137e-05, "loss": 0.7526, "step": 2607 }, { "epoch": 0.388992467745544, "grad_norm": 1.8054958581924438, "learning_rate": 1.3970325945284255e-05, "loss": 0.7345, "step": 2608 }, { "epoch": 0.38914162129912744, "grad_norm": 1.3928757905960083, "learning_rate": 1.3965890895870328e-05, "loss": 0.798, "step": 2609 }, { "epoch": 0.38929077485271085, "grad_norm": 1.9680131673812866, "learning_rate": 1.3961454920593743e-05, "loss": 0.8136, "step": 2610 }, { "epoch": 0.38943992840629427, "grad_norm": 1.884954571723938, "learning_rate": 1.3957018020490101e-05, "loss": 0.8125, "step": 2611 }, { "epoch": 0.3895890819598777, "grad_norm": 1.6717991828918457, "learning_rate": 1.3952580196595232e-05, "loss": 0.8157, "step": 2612 }, { "epoch": 0.3897382355134611, "grad_norm": 3.125805377960205, "learning_rate": 1.3948141449945172e-05, "loss": 0.7443, "step": 2613 }, { "epoch": 0.3898873890670445, "grad_norm": 3.4485068321228027, "learning_rate": 1.3943701781576172e-05, "loss": 0.6284, "step": 2614 }, { "epoch": 0.3900365426206279, "grad_norm": 2.2774605751037598, "learning_rate": 1.3939261192524708e-05, "loss": 0.8308, "step": 2615 }, { "epoch": 0.39018569617421134, "grad_norm": 8.444136619567871, "learning_rate": 1.3934819683827457e-05, "loss": 0.729, "step": 2616 }, { "epoch": 0.39033484972779475, "grad_norm": 2.8838016986846924, "learning_rate": 1.393037725652132e-05, "loss": 0.7925, "step": 2617 }, { "epoch": 0.39048400328137817, "grad_norm": 1.4260010719299316, "learning_rate": 1.3925933911643415e-05, "loss": 0.8024, "step": 2618 }, { "epoch": 0.3906331568349616, "grad_norm": 1.616339087486267, "learning_rate": 1.3921489650231061e-05, "loss": 0.7233, "step": 2619 }, { "epoch": 0.390782310388545, "grad_norm": 1.871172547340393, "learning_rate": 1.3917044473321805e-05, "loss": 0.7582, "step": 2620 }, { "epoch": 0.3909314639421284, "grad_norm": 1.8937416076660156, "learning_rate": 1.39125983819534e-05, "loss": 0.7859, "step": 2621 }, { "epoch": 0.3910806174957118, "grad_norm": 2.571308135986328, "learning_rate": 1.3908151377163815e-05, "loss": 0.7205, "step": 2622 }, { "epoch": 0.39122977104929524, "grad_norm": 3.091423273086548, "learning_rate": 1.3903703459991234e-05, "loss": 0.6538, "step": 2623 }, { "epoch": 0.39137892460287865, "grad_norm": 1.915156602859497, "learning_rate": 1.3899254631474048e-05, "loss": 0.7415, "step": 2624 }, { "epoch": 0.39152807815646207, "grad_norm": 1.8408817052841187, "learning_rate": 1.3894804892650864e-05, "loss": 0.7834, "step": 2625 }, { "epoch": 0.3916772317100455, "grad_norm": 1.9853514432907104, "learning_rate": 1.3890354244560507e-05, "loss": 0.754, "step": 2626 }, { "epoch": 0.3918263852636289, "grad_norm": 1.7915997505187988, "learning_rate": 1.3885902688242006e-05, "loss": 0.8026, "step": 2627 }, { "epoch": 0.3919755388172123, "grad_norm": 2.373114585876465, "learning_rate": 1.3881450224734604e-05, "loss": 0.6605, "step": 2628 }, { "epoch": 0.3921246923707957, "grad_norm": 2.802994966506958, "learning_rate": 1.3876996855077763e-05, "loss": 0.8199, "step": 2629 }, { "epoch": 0.39227384592437914, "grad_norm": 2.1590025424957275, "learning_rate": 1.3872542580311144e-05, "loss": 0.7213, "step": 2630 }, { "epoch": 0.39242299947796255, "grad_norm": 2.6983954906463623, "learning_rate": 1.3868087401474628e-05, "loss": 0.7402, "step": 2631 }, { "epoch": 0.39257215303154597, "grad_norm": 0.5820170044898987, "learning_rate": 1.3863631319608306e-05, "loss": 0.2501, "step": 2632 }, { "epoch": 0.3927213065851294, "grad_norm": 2.2828240394592285, "learning_rate": 1.385917433575248e-05, "loss": 0.7386, "step": 2633 }, { "epoch": 0.3928704601387128, "grad_norm": 2.2530736923217773, "learning_rate": 1.3854716450947658e-05, "loss": 0.6676, "step": 2634 }, { "epoch": 0.3930196136922962, "grad_norm": 2.4148614406585693, "learning_rate": 1.3850257666234569e-05, "loss": 0.8054, "step": 2635 }, { "epoch": 0.3931687672458796, "grad_norm": 3.1997182369232178, "learning_rate": 1.3845797982654134e-05, "loss": 0.7625, "step": 2636 }, { "epoch": 0.39331792079946304, "grad_norm": 2.390111207962036, "learning_rate": 1.3841337401247503e-05, "loss": 0.7071, "step": 2637 }, { "epoch": 0.39346707435304645, "grad_norm": 3.628296375274658, "learning_rate": 1.3836875923056026e-05, "loss": 0.7125, "step": 2638 }, { "epoch": 0.39361622790662987, "grad_norm": 8.064422607421875, "learning_rate": 1.383241354912126e-05, "loss": 0.7108, "step": 2639 }, { "epoch": 0.3937653814602133, "grad_norm": 0.6069369912147522, "learning_rate": 1.3827950280484981e-05, "loss": 0.2853, "step": 2640 }, { "epoch": 0.3939145350137967, "grad_norm": 2.9424004554748535, "learning_rate": 1.382348611818916e-05, "loss": 0.7475, "step": 2641 }, { "epoch": 0.3940636885673801, "grad_norm": 1.9877285957336426, "learning_rate": 1.381902106327599e-05, "loss": 0.7303, "step": 2642 }, { "epoch": 0.3942128421209635, "grad_norm": 1.9289114475250244, "learning_rate": 1.3814555116787864e-05, "loss": 0.7257, "step": 2643 }, { "epoch": 0.39436199567454694, "grad_norm": 2.6988449096679688, "learning_rate": 1.3810088279767389e-05, "loss": 0.7477, "step": 2644 }, { "epoch": 0.39451114922813035, "grad_norm": 1.9688607454299927, "learning_rate": 1.3805620553257374e-05, "loss": 0.7501, "step": 2645 }, { "epoch": 0.39466030278171377, "grad_norm": 1.623916745185852, "learning_rate": 1.380115193830084e-05, "loss": 0.6715, "step": 2646 }, { "epoch": 0.3948094563352972, "grad_norm": 1.3563040494918823, "learning_rate": 1.379668243594101e-05, "loss": 0.752, "step": 2647 }, { "epoch": 0.3949586098888806, "grad_norm": 1.7570879459381104, "learning_rate": 1.3792212047221326e-05, "loss": 0.6721, "step": 2648 }, { "epoch": 0.395107763442464, "grad_norm": 2.129155158996582, "learning_rate": 1.3787740773185418e-05, "loss": 0.7531, "step": 2649 }, { "epoch": 0.3952569169960474, "grad_norm": 2.3552567958831787, "learning_rate": 1.3783268614877144e-05, "loss": 0.8096, "step": 2650 }, { "epoch": 0.39540607054963084, "grad_norm": 2.428593158721924, "learning_rate": 1.3778795573340551e-05, "loss": 0.7075, "step": 2651 }, { "epoch": 0.39555522410321425, "grad_norm": 2.161959648132324, "learning_rate": 1.3774321649619902e-05, "loss": 0.762, "step": 2652 }, { "epoch": 0.39570437765679767, "grad_norm": 2.4051272869110107, "learning_rate": 1.376984684475966e-05, "loss": 0.7674, "step": 2653 }, { "epoch": 0.3958535312103811, "grad_norm": 3.199249267578125, "learning_rate": 1.3765371159804503e-05, "loss": 0.713, "step": 2654 }, { "epoch": 0.3960026847639645, "grad_norm": 3.7612223625183105, "learning_rate": 1.3760894595799305e-05, "loss": 0.8059, "step": 2655 }, { "epoch": 0.3961518383175479, "grad_norm": 2.4633214473724365, "learning_rate": 1.3756417153789148e-05, "loss": 0.8238, "step": 2656 }, { "epoch": 0.3963009918711313, "grad_norm": 2.008761405944824, "learning_rate": 1.375193883481932e-05, "loss": 0.7508, "step": 2657 }, { "epoch": 0.39645014542471474, "grad_norm": 3.563364267349243, "learning_rate": 1.3747459639935312e-05, "loss": 0.7903, "step": 2658 }, { "epoch": 0.39659929897829815, "grad_norm": 2.289320707321167, "learning_rate": 1.3742979570182827e-05, "loss": 0.6859, "step": 2659 }, { "epoch": 0.39674845253188157, "grad_norm": 4.490297794342041, "learning_rate": 1.3738498626607758e-05, "loss": 0.6422, "step": 2660 }, { "epoch": 0.396897606085465, "grad_norm": 2.721245527267456, "learning_rate": 1.3734016810256213e-05, "loss": 0.7245, "step": 2661 }, { "epoch": 0.3970467596390484, "grad_norm": 1.962740421295166, "learning_rate": 1.37295341221745e-05, "loss": 0.7256, "step": 2662 }, { "epoch": 0.3971959131926318, "grad_norm": 3.704921007156372, "learning_rate": 1.3725050563409135e-05, "loss": 0.7825, "step": 2663 }, { "epoch": 0.3973450667462152, "grad_norm": 2.185703754425049, "learning_rate": 1.372056613500683e-05, "loss": 0.7974, "step": 2664 }, { "epoch": 0.39749422029979864, "grad_norm": 5.288949012756348, "learning_rate": 1.37160808380145e-05, "loss": 0.6857, "step": 2665 }, { "epoch": 0.39764337385338205, "grad_norm": 3.3993144035339355, "learning_rate": 1.3711594673479279e-05, "loss": 0.6761, "step": 2666 }, { "epoch": 0.39779252740696547, "grad_norm": 2.3195669651031494, "learning_rate": 1.3707107642448477e-05, "loss": 0.7602, "step": 2667 }, { "epoch": 0.3979416809605489, "grad_norm": 2.285890817642212, "learning_rate": 1.3702619745969628e-05, "loss": 0.7507, "step": 2668 }, { "epoch": 0.3980908345141323, "grad_norm": 2.2374112606048584, "learning_rate": 1.3698130985090455e-05, "loss": 0.7799, "step": 2669 }, { "epoch": 0.3982399880677157, "grad_norm": 3.7779548168182373, "learning_rate": 1.3693641360858891e-05, "loss": 0.7507, "step": 2670 }, { "epoch": 0.3983891416212991, "grad_norm": 3.299612283706665, "learning_rate": 1.3689150874323072e-05, "loss": 0.6835, "step": 2671 }, { "epoch": 0.39853829517488254, "grad_norm": 3.6012587547302246, "learning_rate": 1.368465952653132e-05, "loss": 0.7442, "step": 2672 }, { "epoch": 0.39868744872846595, "grad_norm": 2.495443820953369, "learning_rate": 1.3680167318532182e-05, "loss": 0.7568, "step": 2673 }, { "epoch": 0.39883660228204937, "grad_norm": 2.573845148086548, "learning_rate": 1.3675674251374382e-05, "loss": 0.8272, "step": 2674 }, { "epoch": 0.3989857558356328, "grad_norm": 65.7842025756836, "learning_rate": 1.367118032610686e-05, "loss": 0.7453, "step": 2675 }, { "epoch": 0.3991349093892162, "grad_norm": 0.5566263794898987, "learning_rate": 1.3666685543778755e-05, "loss": 0.2764, "step": 2676 }, { "epoch": 0.3992840629427996, "grad_norm": 2.9559686183929443, "learning_rate": 1.3662189905439394e-05, "loss": 0.6555, "step": 2677 }, { "epoch": 0.399433216496383, "grad_norm": 4.235053062438965, "learning_rate": 1.3657693412138318e-05, "loss": 0.7687, "step": 2678 }, { "epoch": 0.39958237004996644, "grad_norm": 3.0798466205596924, "learning_rate": 1.3653196064925264e-05, "loss": 0.79, "step": 2679 }, { "epoch": 0.39973152360354985, "grad_norm": 6.550405979156494, "learning_rate": 1.3648697864850162e-05, "loss": 0.7213, "step": 2680 }, { "epoch": 0.39988067715713327, "grad_norm": 6.055425643920898, "learning_rate": 1.364419881296315e-05, "loss": 0.8075, "step": 2681 }, { "epoch": 0.4000298307107167, "grad_norm": 13.95593547821045, "learning_rate": 1.3639698910314556e-05, "loss": 0.6725, "step": 2682 }, { "epoch": 0.4001789842643001, "grad_norm": 8.7855863571167, "learning_rate": 1.3635198157954915e-05, "loss": 0.7363, "step": 2683 }, { "epoch": 0.4003281378178835, "grad_norm": 2.8556694984436035, "learning_rate": 1.3630696556934955e-05, "loss": 0.7207, "step": 2684 }, { "epoch": 0.4004772913714669, "grad_norm": 2.8693323135375977, "learning_rate": 1.3626194108305606e-05, "loss": 0.7046, "step": 2685 }, { "epoch": 0.40062644492505034, "grad_norm": 3.688178062438965, "learning_rate": 1.3621690813117987e-05, "loss": 0.7393, "step": 2686 }, { "epoch": 0.40077559847863375, "grad_norm": 4.183507442474365, "learning_rate": 1.3617186672423426e-05, "loss": 0.8089, "step": 2687 }, { "epoch": 0.40092475203221717, "grad_norm": 4.277623176574707, "learning_rate": 1.3612681687273445e-05, "loss": 0.7259, "step": 2688 }, { "epoch": 0.4010739055858006, "grad_norm": 6.250547409057617, "learning_rate": 1.3608175858719757e-05, "loss": 0.8131, "step": 2689 }, { "epoch": 0.401223059139384, "grad_norm": 3.2632229328155518, "learning_rate": 1.360366918781428e-05, "loss": 0.7644, "step": 2690 }, { "epoch": 0.4013722126929674, "grad_norm": 4.662309646606445, "learning_rate": 1.3599161675609125e-05, "loss": 0.6168, "step": 2691 }, { "epoch": 0.4015213662465508, "grad_norm": 2.905609607696533, "learning_rate": 1.3594653323156597e-05, "loss": 0.8439, "step": 2692 }, { "epoch": 0.40167051980013424, "grad_norm": 7.8471903800964355, "learning_rate": 1.3590144131509205e-05, "loss": 0.6796, "step": 2693 }, { "epoch": 0.40181967335371765, "grad_norm": 7.429738521575928, "learning_rate": 1.3585634101719642e-05, "loss": 0.6845, "step": 2694 }, { "epoch": 0.40196882690730107, "grad_norm": 2.278486967086792, "learning_rate": 1.3581123234840807e-05, "loss": 0.7772, "step": 2695 }, { "epoch": 0.4021179804608845, "grad_norm": 3.7000598907470703, "learning_rate": 1.3576611531925791e-05, "loss": 0.7511, "step": 2696 }, { "epoch": 0.4022671340144679, "grad_norm": 0.5573254227638245, "learning_rate": 1.357209899402788e-05, "loss": 0.2563, "step": 2697 }, { "epoch": 0.4024162875680513, "grad_norm": 4.154815196990967, "learning_rate": 1.3567585622200556e-05, "loss": 0.819, "step": 2698 }, { "epoch": 0.4025654411216347, "grad_norm": 2.877368211746216, "learning_rate": 1.3563071417497493e-05, "loss": 0.7791, "step": 2699 }, { "epoch": 0.40271459467521814, "grad_norm": 2.6917569637298584, "learning_rate": 1.3558556380972555e-05, "loss": 0.7184, "step": 2700 }, { "epoch": 0.40286374822880155, "grad_norm": 3.2527105808258057, "learning_rate": 1.3554040513679821e-05, "loss": 0.7256, "step": 2701 }, { "epoch": 0.40301290178238497, "grad_norm": 6.636259078979492, "learning_rate": 1.3549523816673536e-05, "loss": 0.7284, "step": 2702 }, { "epoch": 0.4031620553359684, "grad_norm": 11.438105583190918, "learning_rate": 1.3545006291008155e-05, "loss": 0.6693, "step": 2703 }, { "epoch": 0.4033112088895518, "grad_norm": 2.727491855621338, "learning_rate": 1.3540487937738327e-05, "loss": 0.8019, "step": 2704 }, { "epoch": 0.4034603624431352, "grad_norm": 4.258185863494873, "learning_rate": 1.3535968757918887e-05, "loss": 0.796, "step": 2705 }, { "epoch": 0.4036095159967186, "grad_norm": 0.5186730027198792, "learning_rate": 1.3531448752604867e-05, "loss": 0.2415, "step": 2706 }, { "epoch": 0.40375866955030204, "grad_norm": 12.436433792114258, "learning_rate": 1.3526927922851495e-05, "loss": 0.7802, "step": 2707 }, { "epoch": 0.40390782310388545, "grad_norm": 2.9618968963623047, "learning_rate": 1.3522406269714182e-05, "loss": 0.6992, "step": 2708 }, { "epoch": 0.40405697665746887, "grad_norm": 1.6451845169067383, "learning_rate": 1.3517883794248539e-05, "loss": 0.7738, "step": 2709 }, { "epoch": 0.4042061302110523, "grad_norm": 5.486622333526611, "learning_rate": 1.351336049751037e-05, "loss": 0.7341, "step": 2710 }, { "epoch": 0.4043552837646357, "grad_norm": 2.9393246173858643, "learning_rate": 1.3508836380555662e-05, "loss": 0.712, "step": 2711 }, { "epoch": 0.4045044373182191, "grad_norm": 3.0377652645111084, "learning_rate": 1.3504311444440605e-05, "loss": 0.8282, "step": 2712 }, { "epoch": 0.4046535908718025, "grad_norm": 3.396083354949951, "learning_rate": 1.3499785690221571e-05, "loss": 0.7658, "step": 2713 }, { "epoch": 0.40480274442538594, "grad_norm": 4.714700222015381, "learning_rate": 1.3495259118955124e-05, "loss": 0.6902, "step": 2714 }, { "epoch": 0.40495189797896936, "grad_norm": 3.954125165939331, "learning_rate": 1.3490731731698028e-05, "loss": 0.7217, "step": 2715 }, { "epoch": 0.40510105153255277, "grad_norm": 2.770977735519409, "learning_rate": 1.3486203529507225e-05, "loss": 0.7506, "step": 2716 }, { "epoch": 0.4052502050861362, "grad_norm": 4.35026741027832, "learning_rate": 1.3481674513439853e-05, "loss": 0.7596, "step": 2717 }, { "epoch": 0.4053993586397196, "grad_norm": 2.815690279006958, "learning_rate": 1.3477144684553243e-05, "loss": 0.7971, "step": 2718 }, { "epoch": 0.405548512193303, "grad_norm": 2.187633514404297, "learning_rate": 1.347261404390491e-05, "loss": 0.8133, "step": 2719 }, { "epoch": 0.4056976657468864, "grad_norm": 2.5404272079467773, "learning_rate": 1.3468082592552562e-05, "loss": 0.6986, "step": 2720 }, { "epoch": 0.40584681930046984, "grad_norm": 2.1610147953033447, "learning_rate": 1.3463550331554096e-05, "loss": 0.7193, "step": 2721 }, { "epoch": 0.40599597285405326, "grad_norm": 3.3893842697143555, "learning_rate": 1.3459017261967593e-05, "loss": 0.7907, "step": 2722 }, { "epoch": 0.40614512640763667, "grad_norm": 2.633361339569092, "learning_rate": 1.3454483384851335e-05, "loss": 0.6608, "step": 2723 }, { "epoch": 0.4062942799612201, "grad_norm": 2.6031715869903564, "learning_rate": 1.3449948701263782e-05, "loss": 0.7216, "step": 2724 }, { "epoch": 0.4064434335148035, "grad_norm": 3.3762331008911133, "learning_rate": 1.344541321226358e-05, "loss": 0.7585, "step": 2725 }, { "epoch": 0.4065925870683869, "grad_norm": 2.432025194168091, "learning_rate": 1.3440876918909571e-05, "loss": 0.7208, "step": 2726 }, { "epoch": 0.4067417406219703, "grad_norm": 2.5090818405151367, "learning_rate": 1.3436339822260785e-05, "loss": 0.7532, "step": 2727 }, { "epoch": 0.40689089417555374, "grad_norm": 1.9768298864364624, "learning_rate": 1.343180192337643e-05, "loss": 0.7679, "step": 2728 }, { "epoch": 0.40704004772913716, "grad_norm": 4.596574306488037, "learning_rate": 1.3427263223315916e-05, "loss": 0.7173, "step": 2729 }, { "epoch": 0.40718920128272057, "grad_norm": 3.382962465286255, "learning_rate": 1.3422723723138824e-05, "loss": 0.7311, "step": 2730 }, { "epoch": 0.407338354836304, "grad_norm": 2.9841432571411133, "learning_rate": 1.3418183423904931e-05, "loss": 0.6998, "step": 2731 }, { "epoch": 0.4074875083898874, "grad_norm": 2.337437152862549, "learning_rate": 1.34136423266742e-05, "loss": 0.698, "step": 2732 }, { "epoch": 0.4076366619434708, "grad_norm": 0.5931606292724609, "learning_rate": 1.3409100432506783e-05, "loss": 0.2914, "step": 2733 }, { "epoch": 0.4077858154970542, "grad_norm": 5.477479934692383, "learning_rate": 1.3404557742463009e-05, "loss": 0.7379, "step": 2734 }, { "epoch": 0.40793496905063764, "grad_norm": 3.6394851207733154, "learning_rate": 1.3400014257603399e-05, "loss": 0.7118, "step": 2735 }, { "epoch": 0.40808412260422106, "grad_norm": 5.091658592224121, "learning_rate": 1.339546997898866e-05, "loss": 0.6066, "step": 2736 }, { "epoch": 0.40823327615780447, "grad_norm": 3.936638116836548, "learning_rate": 1.3390924907679683e-05, "loss": 0.6784, "step": 2737 }, { "epoch": 0.4083824297113879, "grad_norm": 3.10974383354187, "learning_rate": 1.3386379044737545e-05, "loss": 0.7496, "step": 2738 }, { "epoch": 0.4085315832649713, "grad_norm": 5.399938583374023, "learning_rate": 1.3381832391223499e-05, "loss": 0.7536, "step": 2739 }, { "epoch": 0.4086807368185547, "grad_norm": 4.910519599914551, "learning_rate": 1.3377284948199006e-05, "loss": 0.7635, "step": 2740 }, { "epoch": 0.4088298903721381, "grad_norm": 2.413546085357666, "learning_rate": 1.337273671672568e-05, "loss": 0.7839, "step": 2741 }, { "epoch": 0.40897904392572154, "grad_norm": 1.9568369388580322, "learning_rate": 1.3368187697865342e-05, "loss": 0.6981, "step": 2742 }, { "epoch": 0.40912819747930496, "grad_norm": 2.272740602493286, "learning_rate": 1.336363789267999e-05, "loss": 0.7495, "step": 2743 }, { "epoch": 0.40927735103288837, "grad_norm": 3.3063151836395264, "learning_rate": 1.3359087302231806e-05, "loss": 0.7542, "step": 2744 }, { "epoch": 0.4094265045864718, "grad_norm": 2.8545961380004883, "learning_rate": 1.3354535927583153e-05, "loss": 0.7529, "step": 2745 }, { "epoch": 0.4095756581400552, "grad_norm": 2.30371356010437, "learning_rate": 1.3349983769796574e-05, "loss": 0.7161, "step": 2746 }, { "epoch": 0.4097248116936386, "grad_norm": 2.346182346343994, "learning_rate": 1.3345430829934806e-05, "loss": 0.7584, "step": 2747 }, { "epoch": 0.409873965247222, "grad_norm": 2.751300096511841, "learning_rate": 1.3340877109060762e-05, "loss": 0.6806, "step": 2748 }, { "epoch": 0.41002311880080544, "grad_norm": 5.300474643707275, "learning_rate": 1.3336322608237534e-05, "loss": 0.7583, "step": 2749 }, { "epoch": 0.41017227235438886, "grad_norm": 2.582216262817383, "learning_rate": 1.3331767328528398e-05, "loss": 0.7395, "step": 2750 }, { "epoch": 0.41032142590797227, "grad_norm": 3.6310646533966064, "learning_rate": 1.3327211270996818e-05, "loss": 0.7012, "step": 2751 }, { "epoch": 0.4104705794615557, "grad_norm": 3.1610610485076904, "learning_rate": 1.332265443670643e-05, "loss": 0.7702, "step": 2752 }, { "epoch": 0.4106197330151391, "grad_norm": 1.8699977397918701, "learning_rate": 1.3318096826721061e-05, "loss": 0.7585, "step": 2753 }, { "epoch": 0.4107688865687225, "grad_norm": 3.2015981674194336, "learning_rate": 1.3313538442104714e-05, "loss": 0.763, "step": 2754 }, { "epoch": 0.4109180401223059, "grad_norm": 2.9691858291625977, "learning_rate": 1.3308979283921568e-05, "loss": 0.7004, "step": 2755 }, { "epoch": 0.41106719367588934, "grad_norm": 2.434900999069214, "learning_rate": 1.3304419353235991e-05, "loss": 0.7246, "step": 2756 }, { "epoch": 0.41121634722947276, "grad_norm": 2.1566319465637207, "learning_rate": 1.3299858651112529e-05, "loss": 0.7189, "step": 2757 }, { "epoch": 0.41136550078305617, "grad_norm": 2.766838788986206, "learning_rate": 1.3295297178615904e-05, "loss": 0.6934, "step": 2758 }, { "epoch": 0.4115146543366396, "grad_norm": 3.761789560317993, "learning_rate": 1.3290734936811027e-05, "loss": 0.6821, "step": 2759 }, { "epoch": 0.411663807890223, "grad_norm": 2.147824287414551, "learning_rate": 1.3286171926762977e-05, "loss": 0.8162, "step": 2760 }, { "epoch": 0.4118129614438064, "grad_norm": 2.1910626888275146, "learning_rate": 1.3281608149537018e-05, "loss": 0.8092, "step": 2761 }, { "epoch": 0.4119621149973898, "grad_norm": 2.8438124656677246, "learning_rate": 1.3277043606198596e-05, "loss": 0.7551, "step": 2762 }, { "epoch": 0.41211126855097324, "grad_norm": 3.502095937728882, "learning_rate": 1.3272478297813334e-05, "loss": 0.7542, "step": 2763 }, { "epoch": 0.41226042210455666, "grad_norm": 3.866265296936035, "learning_rate": 1.3267912225447026e-05, "loss": 0.7638, "step": 2764 }, { "epoch": 0.41240957565814007, "grad_norm": 2.479959726333618, "learning_rate": 1.3263345390165654e-05, "loss": 0.7493, "step": 2765 }, { "epoch": 0.4125587292117235, "grad_norm": 2.3107573986053467, "learning_rate": 1.325877779303538e-05, "loss": 0.8068, "step": 2766 }, { "epoch": 0.4127078827653069, "grad_norm": 2.537980556488037, "learning_rate": 1.3254209435122533e-05, "loss": 0.7204, "step": 2767 }, { "epoch": 0.4128570363188903, "grad_norm": 1.8823951482772827, "learning_rate": 1.3249640317493628e-05, "loss": 0.719, "step": 2768 }, { "epoch": 0.41300618987247373, "grad_norm": 7.245850086212158, "learning_rate": 1.3245070441215355e-05, "loss": 0.6685, "step": 2769 }, { "epoch": 0.41315534342605714, "grad_norm": 4.525510311126709, "learning_rate": 1.3240499807354577e-05, "loss": 0.7102, "step": 2770 }, { "epoch": 0.41330449697964056, "grad_norm": 2.7589080333709717, "learning_rate": 1.3235928416978343e-05, "loss": 0.7093, "step": 2771 }, { "epoch": 0.41345365053322397, "grad_norm": 2.362365484237671, "learning_rate": 1.323135627115387e-05, "loss": 0.8305, "step": 2772 }, { "epoch": 0.4136028040868074, "grad_norm": 2.6057236194610596, "learning_rate": 1.3226783370948559e-05, "loss": 0.6893, "step": 2773 }, { "epoch": 0.4137519576403908, "grad_norm": 3.7372686862945557, "learning_rate": 1.3222209717429974e-05, "loss": 0.7533, "step": 2774 }, { "epoch": 0.4139011111939742, "grad_norm": 2.266864061355591, "learning_rate": 1.3217635311665876e-05, "loss": 0.7037, "step": 2775 }, { "epoch": 0.41405026474755763, "grad_norm": 2.383056163787842, "learning_rate": 1.3213060154724179e-05, "loss": 0.745, "step": 2776 }, { "epoch": 0.41419941830114104, "grad_norm": 4.843761444091797, "learning_rate": 1.3208484247672988e-05, "loss": 0.7892, "step": 2777 }, { "epoch": 0.41434857185472446, "grad_norm": 3.4345617294311523, "learning_rate": 1.3203907591580573e-05, "loss": 0.7496, "step": 2778 }, { "epoch": 0.41449772540830787, "grad_norm": 0.5610184073448181, "learning_rate": 1.3199330187515391e-05, "loss": 0.2764, "step": 2779 }, { "epoch": 0.4146468789618913, "grad_norm": 2.7458741664886475, "learning_rate": 1.3194752036546063e-05, "loss": 0.7106, "step": 2780 }, { "epoch": 0.4147960325154747, "grad_norm": 2.8605692386627197, "learning_rate": 1.3190173139741384e-05, "loss": 0.7072, "step": 2781 }, { "epoch": 0.4149451860690581, "grad_norm": 2.684771776199341, "learning_rate": 1.3185593498170334e-05, "loss": 0.7397, "step": 2782 }, { "epoch": 0.41509433962264153, "grad_norm": 2.583409547805786, "learning_rate": 1.3181013112902052e-05, "loss": 0.6931, "step": 2783 }, { "epoch": 0.41524349317622494, "grad_norm": 1.716774344444275, "learning_rate": 1.3176431985005864e-05, "loss": 0.6819, "step": 2784 }, { "epoch": 0.41539264672980836, "grad_norm": 1.7777010202407837, "learning_rate": 1.317185011555126e-05, "loss": 0.7847, "step": 2785 }, { "epoch": 0.41554180028339177, "grad_norm": 3.5222485065460205, "learning_rate": 1.316726750560791e-05, "loss": 0.7448, "step": 2786 }, { "epoch": 0.4156909538369752, "grad_norm": 2.824028968811035, "learning_rate": 1.3162684156245654e-05, "loss": 0.7462, "step": 2787 }, { "epoch": 0.4158401073905586, "grad_norm": 2.4415488243103027, "learning_rate": 1.31581000685345e-05, "loss": 0.7148, "step": 2788 }, { "epoch": 0.415989260944142, "grad_norm": 2.3729450702667236, "learning_rate": 1.3153515243544635e-05, "loss": 0.7382, "step": 2789 }, { "epoch": 0.41613841449772543, "grad_norm": 2.0624568462371826, "learning_rate": 1.3148929682346418e-05, "loss": 0.7508, "step": 2790 }, { "epoch": 0.41628756805130884, "grad_norm": 8.249366760253906, "learning_rate": 1.3144343386010375e-05, "loss": 0.6954, "step": 2791 }, { "epoch": 0.41643672160489226, "grad_norm": 2.943284749984741, "learning_rate": 1.3139756355607203e-05, "loss": 0.73, "step": 2792 }, { "epoch": 0.41658587515847567, "grad_norm": 2.16818904876709, "learning_rate": 1.3135168592207781e-05, "loss": 0.7315, "step": 2793 }, { "epoch": 0.4167350287120591, "grad_norm": 1.9100550413131714, "learning_rate": 1.313058009688315e-05, "loss": 0.7073, "step": 2794 }, { "epoch": 0.4168841822656425, "grad_norm": 2.8045828342437744, "learning_rate": 1.312599087070452e-05, "loss": 0.7319, "step": 2795 }, { "epoch": 0.4170333358192259, "grad_norm": 2.100945472717285, "learning_rate": 1.3121400914743275e-05, "loss": 0.6888, "step": 2796 }, { "epoch": 0.41718248937280933, "grad_norm": 2.605854034423828, "learning_rate": 1.3116810230070976e-05, "loss": 0.6244, "step": 2797 }, { "epoch": 0.41733164292639274, "grad_norm": 0.5194072723388672, "learning_rate": 1.3112218817759338e-05, "loss": 0.2781, "step": 2798 }, { "epoch": 0.41748079647997616, "grad_norm": 0.605749785900116, "learning_rate": 1.3107626678880267e-05, "loss": 0.2483, "step": 2799 }, { "epoch": 0.41762995003355957, "grad_norm": 2.8152947425842285, "learning_rate": 1.3103033814505817e-05, "loss": 0.718, "step": 2800 }, { "epoch": 0.417779103587143, "grad_norm": 0.5792120695114136, "learning_rate": 1.3098440225708232e-05, "loss": 0.2754, "step": 2801 }, { "epoch": 0.4179282571407264, "grad_norm": 3.630884885787964, "learning_rate": 1.3093845913559906e-05, "loss": 0.6998, "step": 2802 }, { "epoch": 0.4180774106943098, "grad_norm": 6.089999198913574, "learning_rate": 1.3089250879133412e-05, "loss": 0.6637, "step": 2803 }, { "epoch": 0.41822656424789323, "grad_norm": 4.689569473266602, "learning_rate": 1.3084655123501495e-05, "loss": 0.7503, "step": 2804 }, { "epoch": 0.41837571780147664, "grad_norm": 2.0419650077819824, "learning_rate": 1.3080058647737058e-05, "loss": 0.6937, "step": 2805 }, { "epoch": 0.41852487135506006, "grad_norm": 0.6236062049865723, "learning_rate": 1.3075461452913181e-05, "loss": 0.2653, "step": 2806 }, { "epoch": 0.41867402490864347, "grad_norm": 1.9076719284057617, "learning_rate": 1.307086354010311e-05, "loss": 0.7609, "step": 2807 }, { "epoch": 0.4188231784622269, "grad_norm": 2.5534119606018066, "learning_rate": 1.3066264910380251e-05, "loss": 0.7507, "step": 2808 }, { "epoch": 0.4189723320158103, "grad_norm": 1.5529756546020508, "learning_rate": 1.306166556481819e-05, "loss": 0.7298, "step": 2809 }, { "epoch": 0.4191214855693937, "grad_norm": 1.7646970748901367, "learning_rate": 1.3057065504490672e-05, "loss": 0.7791, "step": 2810 }, { "epoch": 0.41927063912297713, "grad_norm": 1.8147717714309692, "learning_rate": 1.3052464730471607e-05, "loss": 0.75, "step": 2811 }, { "epoch": 0.41941979267656054, "grad_norm": 1.4248266220092773, "learning_rate": 1.3047863243835081e-05, "loss": 0.7914, "step": 2812 }, { "epoch": 0.41956894623014396, "grad_norm": 2.0677285194396973, "learning_rate": 1.3043261045655338e-05, "loss": 0.7205, "step": 2813 }, { "epoch": 0.41971809978372737, "grad_norm": 2.005892038345337, "learning_rate": 1.3038658137006788e-05, "loss": 0.7097, "step": 2814 }, { "epoch": 0.4198672533373108, "grad_norm": 1.8596398830413818, "learning_rate": 1.3034054518964014e-05, "loss": 0.8289, "step": 2815 }, { "epoch": 0.4200164068908942, "grad_norm": 2.1304867267608643, "learning_rate": 1.3029450192601758e-05, "loss": 0.6941, "step": 2816 }, { "epoch": 0.4201655604444776, "grad_norm": 3.2176146507263184, "learning_rate": 1.3024845158994927e-05, "loss": 0.7099, "step": 2817 }, { "epoch": 0.42031471399806103, "grad_norm": 1.6367019414901733, "learning_rate": 1.30202394192186e-05, "loss": 0.7856, "step": 2818 }, { "epoch": 0.42046386755164444, "grad_norm": 2.6684508323669434, "learning_rate": 1.3015632974348015e-05, "loss": 0.76, "step": 2819 }, { "epoch": 0.42061302110522786, "grad_norm": 2.564516067504883, "learning_rate": 1.3011025825458576e-05, "loss": 0.7504, "step": 2820 }, { "epoch": 0.42076217465881127, "grad_norm": 2.6209895610809326, "learning_rate": 1.3006417973625853e-05, "loss": 0.7681, "step": 2821 }, { "epoch": 0.4209113282123947, "grad_norm": 1.8951725959777832, "learning_rate": 1.3001809419925575e-05, "loss": 0.7233, "step": 2822 }, { "epoch": 0.4210604817659781, "grad_norm": 2.0035297870635986, "learning_rate": 1.2997200165433639e-05, "loss": 0.6599, "step": 2823 }, { "epoch": 0.4212096353195615, "grad_norm": 2.382328987121582, "learning_rate": 1.2992590211226106e-05, "loss": 0.8149, "step": 2824 }, { "epoch": 0.42135878887314493, "grad_norm": 4.739049434661865, "learning_rate": 1.29879795583792e-05, "loss": 0.7139, "step": 2825 }, { "epoch": 0.42150794242672834, "grad_norm": 1.8962992429733276, "learning_rate": 1.2983368207969309e-05, "loss": 0.7484, "step": 2826 }, { "epoch": 0.42165709598031176, "grad_norm": 2.528228759765625, "learning_rate": 1.2978756161072978e-05, "loss": 0.6993, "step": 2827 }, { "epoch": 0.4218062495338952, "grad_norm": 3.8871212005615234, "learning_rate": 1.2974143418766922e-05, "loss": 0.6587, "step": 2828 }, { "epoch": 0.4219554030874786, "grad_norm": 2.344411611557007, "learning_rate": 1.2969529982128017e-05, "loss": 0.7355, "step": 2829 }, { "epoch": 0.422104556641062, "grad_norm": 0.5934022068977356, "learning_rate": 1.2964915852233295e-05, "loss": 0.3, "step": 2830 }, { "epoch": 0.42225371019464536, "grad_norm": 1.8170950412750244, "learning_rate": 1.2960301030159955e-05, "loss": 0.7976, "step": 2831 }, { "epoch": 0.4224028637482288, "grad_norm": 2.4991202354431152, "learning_rate": 1.295568551698536e-05, "loss": 0.7174, "step": 2832 }, { "epoch": 0.4225520173018122, "grad_norm": 2.491518497467041, "learning_rate": 1.2951069313787029e-05, "loss": 0.65, "step": 2833 }, { "epoch": 0.4227011708553956, "grad_norm": 6.108547210693359, "learning_rate": 1.2946452421642643e-05, "loss": 0.7311, "step": 2834 }, { "epoch": 0.422850324408979, "grad_norm": 1.7076976299285889, "learning_rate": 1.2941834841630046e-05, "loss": 0.7834, "step": 2835 }, { "epoch": 0.42299947796256243, "grad_norm": 1.919295310974121, "learning_rate": 1.2937216574827245e-05, "loss": 0.7218, "step": 2836 }, { "epoch": 0.42314863151614585, "grad_norm": 2.8118703365325928, "learning_rate": 1.2932597622312396e-05, "loss": 0.7485, "step": 2837 }, { "epoch": 0.42329778506972926, "grad_norm": 1.554808259010315, "learning_rate": 1.2927977985163834e-05, "loss": 0.7846, "step": 2838 }, { "epoch": 0.4234469386233127, "grad_norm": 1.9715572595596313, "learning_rate": 1.2923357664460032e-05, "loss": 0.7186, "step": 2839 }, { "epoch": 0.4235960921768961, "grad_norm": 2.4557974338531494, "learning_rate": 1.291873666127964e-05, "loss": 0.751, "step": 2840 }, { "epoch": 0.4237452457304795, "grad_norm": 2.001138925552368, "learning_rate": 1.2914114976701463e-05, "loss": 0.7436, "step": 2841 }, { "epoch": 0.4238943992840629, "grad_norm": 2.3958420753479004, "learning_rate": 1.2909492611804455e-05, "loss": 0.6515, "step": 2842 }, { "epoch": 0.42404355283764633, "grad_norm": 1.9660416841506958, "learning_rate": 1.2904869567667743e-05, "loss": 0.764, "step": 2843 }, { "epoch": 0.42419270639122975, "grad_norm": 1.3809394836425781, "learning_rate": 1.2900245845370603e-05, "loss": 0.7028, "step": 2844 }, { "epoch": 0.42434185994481316, "grad_norm": 5.803722858428955, "learning_rate": 1.2895621445992474e-05, "loss": 0.8078, "step": 2845 }, { "epoch": 0.4244910134983966, "grad_norm": 2.3808350563049316, "learning_rate": 1.2890996370612954e-05, "loss": 0.7189, "step": 2846 }, { "epoch": 0.42464016705198, "grad_norm": 2.928588390350342, "learning_rate": 1.2886370620311789e-05, "loss": 0.8039, "step": 2847 }, { "epoch": 0.4247893206055634, "grad_norm": 1.8089599609375, "learning_rate": 1.28817441961689e-05, "loss": 0.7964, "step": 2848 }, { "epoch": 0.4249384741591468, "grad_norm": 3.3122310638427734, "learning_rate": 1.2877117099264349e-05, "loss": 0.6648, "step": 2849 }, { "epoch": 0.42508762771273023, "grad_norm": 4.644115924835205, "learning_rate": 1.2872489330678363e-05, "loss": 0.6963, "step": 2850 }, { "epoch": 0.42523678126631365, "grad_norm": 1.9196035861968994, "learning_rate": 1.2867860891491326e-05, "loss": 0.7097, "step": 2851 }, { "epoch": 0.42538593481989706, "grad_norm": 2.1313869953155518, "learning_rate": 1.2863231782783774e-05, "loss": 0.6476, "step": 2852 }, { "epoch": 0.4255350883734805, "grad_norm": 0.5308580994606018, "learning_rate": 1.28586020056364e-05, "loss": 0.2443, "step": 2853 }, { "epoch": 0.4256842419270639, "grad_norm": 1.4906573295593262, "learning_rate": 1.2853971561130062e-05, "loss": 0.7684, "step": 2854 }, { "epoch": 0.4258333954806473, "grad_norm": 1.5964525938034058, "learning_rate": 1.2849340450345765e-05, "loss": 0.71, "step": 2855 }, { "epoch": 0.4259825490342307, "grad_norm": 2.021963119506836, "learning_rate": 1.2844708674364665e-05, "loss": 0.8388, "step": 2856 }, { "epoch": 0.42613170258781413, "grad_norm": 4.673083305358887, "learning_rate": 1.2840076234268083e-05, "loss": 0.8047, "step": 2857 }, { "epoch": 0.42628085614139755, "grad_norm": 2.4651427268981934, "learning_rate": 1.2835443131137502e-05, "loss": 0.7577, "step": 2858 }, { "epoch": 0.42643000969498096, "grad_norm": 2.5433247089385986, "learning_rate": 1.2830809366054533e-05, "loss": 0.744, "step": 2859 }, { "epoch": 0.4265791632485644, "grad_norm": 1.9976919889450073, "learning_rate": 1.282617494010097e-05, "loss": 0.7842, "step": 2860 }, { "epoch": 0.4267283168021478, "grad_norm": 2.3624401092529297, "learning_rate": 1.2821539854358745e-05, "loss": 0.76, "step": 2861 }, { "epoch": 0.4268774703557312, "grad_norm": 1.8542568683624268, "learning_rate": 1.2816904109909948e-05, "loss": 0.7647, "step": 2862 }, { "epoch": 0.4270266239093146, "grad_norm": 2.7066709995269775, "learning_rate": 1.2812267707836827e-05, "loss": 0.8032, "step": 2863 }, { "epoch": 0.42717577746289803, "grad_norm": 3.7022318840026855, "learning_rate": 1.2807630649221777e-05, "loss": 0.6385, "step": 2864 }, { "epoch": 0.42732493101648145, "grad_norm": 2.650571584701538, "learning_rate": 1.2802992935147348e-05, "loss": 0.741, "step": 2865 }, { "epoch": 0.42747408457006486, "grad_norm": 1.8536298274993896, "learning_rate": 1.2798354566696245e-05, "loss": 0.75, "step": 2866 }, { "epoch": 0.4276232381236483, "grad_norm": 2.407155990600586, "learning_rate": 1.2793715544951324e-05, "loss": 0.7768, "step": 2867 }, { "epoch": 0.4277723916772317, "grad_norm": 3.6199965476989746, "learning_rate": 1.27890758709956e-05, "loss": 0.749, "step": 2868 }, { "epoch": 0.4279215452308151, "grad_norm": 3.2960734367370605, "learning_rate": 1.2784435545912228e-05, "loss": 0.7212, "step": 2869 }, { "epoch": 0.4280706987843985, "grad_norm": 2.0489368438720703, "learning_rate": 1.277979457078452e-05, "loss": 0.7951, "step": 2870 }, { "epoch": 0.42821985233798193, "grad_norm": 2.1545400619506836, "learning_rate": 1.2775152946695953e-05, "loss": 0.7863, "step": 2871 }, { "epoch": 0.42836900589156535, "grad_norm": 1.8243217468261719, "learning_rate": 1.2770510674730132e-05, "loss": 0.7647, "step": 2872 }, { "epoch": 0.42851815944514876, "grad_norm": 2.565128803253174, "learning_rate": 1.276586775597083e-05, "loss": 0.7286, "step": 2873 }, { "epoch": 0.4286673129987322, "grad_norm": 2.8710923194885254, "learning_rate": 1.2761224191501964e-05, "loss": 0.646, "step": 2874 }, { "epoch": 0.4288164665523156, "grad_norm": 2.3151967525482178, "learning_rate": 1.2756579982407606e-05, "loss": 0.746, "step": 2875 }, { "epoch": 0.428965620105899, "grad_norm": 0.4997352957725525, "learning_rate": 1.2751935129771974e-05, "loss": 0.2493, "step": 2876 }, { "epoch": 0.4291147736594824, "grad_norm": 1.741317868232727, "learning_rate": 1.2747289634679445e-05, "loss": 0.8395, "step": 2877 }, { "epoch": 0.42926392721306583, "grad_norm": 0.5584595799446106, "learning_rate": 1.2742643498214534e-05, "loss": 0.2548, "step": 2878 }, { "epoch": 0.42941308076664925, "grad_norm": 1.9442743062973022, "learning_rate": 1.2737996721461907e-05, "loss": 0.6573, "step": 2879 }, { "epoch": 0.42956223432023266, "grad_norm": 3.2436110973358154, "learning_rate": 1.2733349305506395e-05, "loss": 0.7487, "step": 2880 }, { "epoch": 0.4297113878738161, "grad_norm": 1.866498589515686, "learning_rate": 1.272870125143296e-05, "loss": 0.715, "step": 2881 }, { "epoch": 0.4298605414273995, "grad_norm": 2.8080992698669434, "learning_rate": 1.2724052560326722e-05, "loss": 0.7068, "step": 2882 }, { "epoch": 0.4300096949809829, "grad_norm": 1.4333267211914062, "learning_rate": 1.2719403233272947e-05, "loss": 0.8114, "step": 2883 }, { "epoch": 0.4301588485345663, "grad_norm": 2.429830551147461, "learning_rate": 1.2714753271357047e-05, "loss": 0.644, "step": 2884 }, { "epoch": 0.43030800208814973, "grad_norm": 1.9826788902282715, "learning_rate": 1.2710102675664593e-05, "loss": 0.7434, "step": 2885 }, { "epoch": 0.43045715564173315, "grad_norm": 1.5022034645080566, "learning_rate": 1.2705451447281289e-05, "loss": 0.7336, "step": 2886 }, { "epoch": 0.43060630919531656, "grad_norm": 1.9590057134628296, "learning_rate": 1.2700799587293e-05, "loss": 0.7902, "step": 2887 }, { "epoch": 0.4307554627489, "grad_norm": 2.208606004714966, "learning_rate": 1.2696147096785727e-05, "loss": 0.7447, "step": 2888 }, { "epoch": 0.4309046163024834, "grad_norm": 2.7391340732574463, "learning_rate": 1.2691493976845627e-05, "loss": 0.7724, "step": 2889 }, { "epoch": 0.4310537698560668, "grad_norm": 2.025892734527588, "learning_rate": 1.2686840228559001e-05, "loss": 0.686, "step": 2890 }, { "epoch": 0.4312029234096502, "grad_norm": 2.395388126373291, "learning_rate": 1.2682185853012296e-05, "loss": 0.5922, "step": 2891 }, { "epoch": 0.43135207696323363, "grad_norm": 2.237816095352173, "learning_rate": 1.26775308512921e-05, "loss": 0.7805, "step": 2892 }, { "epoch": 0.43150123051681705, "grad_norm": 2.601940393447876, "learning_rate": 1.2672875224485166e-05, "loss": 0.6825, "step": 2893 }, { "epoch": 0.43165038407040046, "grad_norm": 1.6621969938278198, "learning_rate": 1.266821897367837e-05, "loss": 0.7067, "step": 2894 }, { "epoch": 0.4317995376239839, "grad_norm": 1.5239620208740234, "learning_rate": 1.2663562099958746e-05, "loss": 0.8185, "step": 2895 }, { "epoch": 0.4319486911775673, "grad_norm": 2.0126290321350098, "learning_rate": 1.2658904604413468e-05, "loss": 0.7679, "step": 2896 }, { "epoch": 0.4320978447311507, "grad_norm": 9.17834186553955, "learning_rate": 1.2654246488129864e-05, "loss": 0.7509, "step": 2897 }, { "epoch": 0.4322469982847341, "grad_norm": 2.775290012359619, "learning_rate": 1.2649587752195397e-05, "loss": 0.6726, "step": 2898 }, { "epoch": 0.43239615183831753, "grad_norm": 2.6449432373046875, "learning_rate": 1.2644928397697683e-05, "loss": 0.8519, "step": 2899 }, { "epoch": 0.43254530539190095, "grad_norm": 1.8954576253890991, "learning_rate": 1.2640268425724469e-05, "loss": 0.7099, "step": 2900 }, { "epoch": 0.43269445894548436, "grad_norm": 2.5897889137268066, "learning_rate": 1.2635607837363665e-05, "loss": 0.716, "step": 2901 }, { "epoch": 0.4328436124990678, "grad_norm": 3.018327236175537, "learning_rate": 1.2630946633703314e-05, "loss": 0.7999, "step": 2902 }, { "epoch": 0.4329927660526512, "grad_norm": 1.576436996459961, "learning_rate": 1.2626284815831597e-05, "loss": 0.6759, "step": 2903 }, { "epoch": 0.4331419196062346, "grad_norm": 5.532283782958984, "learning_rate": 1.2621622384836853e-05, "loss": 0.7505, "step": 2904 }, { "epoch": 0.433291073159818, "grad_norm": 1.9286662340164185, "learning_rate": 1.2616959341807553e-05, "loss": 0.7842, "step": 2905 }, { "epoch": 0.43344022671340143, "grad_norm": 0.6875457167625427, "learning_rate": 1.2612295687832315e-05, "loss": 0.2646, "step": 2906 }, { "epoch": 0.43358938026698485, "grad_norm": 1.4930278062820435, "learning_rate": 1.2607631423999898e-05, "loss": 0.7758, "step": 2907 }, { "epoch": 0.43373853382056826, "grad_norm": 4.817904949188232, "learning_rate": 1.2602966551399206e-05, "loss": 0.6528, "step": 2908 }, { "epoch": 0.4338876873741517, "grad_norm": 1.9121471643447876, "learning_rate": 1.2598301071119277e-05, "loss": 0.7132, "step": 2909 }, { "epoch": 0.4340368409277351, "grad_norm": 2.707101583480835, "learning_rate": 1.2593634984249307e-05, "loss": 0.7349, "step": 2910 }, { "epoch": 0.4341859944813185, "grad_norm": 1.6297633647918701, "learning_rate": 1.2588968291878621e-05, "loss": 0.7661, "step": 2911 }, { "epoch": 0.4343351480349019, "grad_norm": 1.8421916961669922, "learning_rate": 1.2584300995096684e-05, "loss": 0.7001, "step": 2912 }, { "epoch": 0.43448430158848533, "grad_norm": 1.5236502885818481, "learning_rate": 1.257963309499311e-05, "loss": 0.6967, "step": 2913 }, { "epoch": 0.43463345514206875, "grad_norm": 2.126485824584961, "learning_rate": 1.2574964592657648e-05, "loss": 0.7238, "step": 2914 }, { "epoch": 0.43478260869565216, "grad_norm": 2.8159918785095215, "learning_rate": 1.257029548918019e-05, "loss": 0.6377, "step": 2915 }, { "epoch": 0.4349317622492356, "grad_norm": 3.5642483234405518, "learning_rate": 1.2565625785650774e-05, "loss": 0.681, "step": 2916 }, { "epoch": 0.435080915802819, "grad_norm": 2.7014026641845703, "learning_rate": 1.2560955483159562e-05, "loss": 0.7251, "step": 2917 }, { "epoch": 0.4352300693564024, "grad_norm": 2.220010995864868, "learning_rate": 1.2556284582796874e-05, "loss": 0.7669, "step": 2918 }, { "epoch": 0.4353792229099858, "grad_norm": 1.7368544340133667, "learning_rate": 1.255161308565316e-05, "loss": 0.6922, "step": 2919 }, { "epoch": 0.43552837646356923, "grad_norm": 1.608953595161438, "learning_rate": 1.254694099281901e-05, "loss": 0.8056, "step": 2920 }, { "epoch": 0.43567753001715265, "grad_norm": 2.0390284061431885, "learning_rate": 1.2542268305385155e-05, "loss": 0.7316, "step": 2921 }, { "epoch": 0.43582668357073606, "grad_norm": 11.474618911743164, "learning_rate": 1.2537595024442462e-05, "loss": 0.7262, "step": 2922 }, { "epoch": 0.4359758371243195, "grad_norm": 2.1470892429351807, "learning_rate": 1.2532921151081935e-05, "loss": 0.7529, "step": 2923 }, { "epoch": 0.4361249906779029, "grad_norm": 0.6674973368644714, "learning_rate": 1.2528246686394732e-05, "loss": 0.2555, "step": 2924 }, { "epoch": 0.4362741442314863, "grad_norm": 1.6275084018707275, "learning_rate": 1.2523571631472123e-05, "loss": 0.7039, "step": 2925 }, { "epoch": 0.4364232977850697, "grad_norm": 2.6631288528442383, "learning_rate": 1.2518895987405539e-05, "loss": 0.7333, "step": 2926 }, { "epoch": 0.43657245133865313, "grad_norm": 1.9147707223892212, "learning_rate": 1.2514219755286531e-05, "loss": 0.7478, "step": 2927 }, { "epoch": 0.43672160489223655, "grad_norm": 2.0397658348083496, "learning_rate": 1.2509542936206802e-05, "loss": 0.6841, "step": 2928 }, { "epoch": 0.43687075844581996, "grad_norm": 1.6511625051498413, "learning_rate": 1.2504865531258186e-05, "loss": 0.7262, "step": 2929 }, { "epoch": 0.4370199119994034, "grad_norm": 1.6423760652542114, "learning_rate": 1.250018754153265e-05, "loss": 0.792, "step": 2930 }, { "epoch": 0.4371690655529868, "grad_norm": 1.8639003038406372, "learning_rate": 1.2495508968122297e-05, "loss": 0.8139, "step": 2931 }, { "epoch": 0.4373182191065702, "grad_norm": 3.43200945854187, "learning_rate": 1.2490829812119376e-05, "loss": 0.7496, "step": 2932 }, { "epoch": 0.4374673726601536, "grad_norm": 1.9960230588912964, "learning_rate": 1.2486150074616268e-05, "loss": 0.7098, "step": 2933 }, { "epoch": 0.43761652621373703, "grad_norm": 1.7810529470443726, "learning_rate": 1.2481469756705478e-05, "loss": 0.7569, "step": 2934 }, { "epoch": 0.43776567976732045, "grad_norm": 1.725061297416687, "learning_rate": 1.2476788859479667e-05, "loss": 0.7255, "step": 2935 }, { "epoch": 0.43791483332090386, "grad_norm": 2.3820137977600098, "learning_rate": 1.247210738403161e-05, "loss": 0.629, "step": 2936 }, { "epoch": 0.4380639868744873, "grad_norm": 1.5726245641708374, "learning_rate": 1.2467425331454237e-05, "loss": 0.8154, "step": 2937 }, { "epoch": 0.4382131404280707, "grad_norm": 1.566288709640503, "learning_rate": 1.2462742702840597e-05, "loss": 0.8053, "step": 2938 }, { "epoch": 0.4383622939816541, "grad_norm": 3.064197540283203, "learning_rate": 1.2458059499283884e-05, "loss": 0.7332, "step": 2939 }, { "epoch": 0.4385114475352375, "grad_norm": 1.5280715227127075, "learning_rate": 1.2453375721877417e-05, "loss": 0.7377, "step": 2940 }, { "epoch": 0.43866060108882093, "grad_norm": 2.262880325317383, "learning_rate": 1.2448691371714661e-05, "loss": 0.7909, "step": 2941 }, { "epoch": 0.43880975464240435, "grad_norm": 0.5704143047332764, "learning_rate": 1.2444006449889198e-05, "loss": 0.2528, "step": 2942 }, { "epoch": 0.43895890819598776, "grad_norm": 1.811890959739685, "learning_rate": 1.2439320957494762e-05, "loss": 0.7063, "step": 2943 }, { "epoch": 0.4391080617495712, "grad_norm": 2.1622166633605957, "learning_rate": 1.2434634895625206e-05, "loss": 0.7176, "step": 2944 }, { "epoch": 0.4392572153031546, "grad_norm": 1.8643194437026978, "learning_rate": 1.242994826537452e-05, "loss": 0.8251, "step": 2945 }, { "epoch": 0.439406368856738, "grad_norm": 3.8533241748809814, "learning_rate": 1.2425261067836835e-05, "loss": 0.8336, "step": 2946 }, { "epoch": 0.4395555224103214, "grad_norm": 2.3923704624176025, "learning_rate": 1.2420573304106402e-05, "loss": 0.7168, "step": 2947 }, { "epoch": 0.43970467596390483, "grad_norm": 1.8473883867263794, "learning_rate": 1.241588497527761e-05, "loss": 0.7164, "step": 2948 }, { "epoch": 0.43985382951748825, "grad_norm": 1.9417610168457031, "learning_rate": 1.2411196082444978e-05, "loss": 0.7642, "step": 2949 }, { "epoch": 0.44000298307107166, "grad_norm": 1.1600141525268555, "learning_rate": 1.2406506626703163e-05, "loss": 0.7503, "step": 2950 }, { "epoch": 0.4401521366246551, "grad_norm": 0.549741268157959, "learning_rate": 1.2401816609146942e-05, "loss": 0.2583, "step": 2951 }, { "epoch": 0.4403012901782385, "grad_norm": 2.1362087726593018, "learning_rate": 1.2397126030871235e-05, "loss": 0.6628, "step": 2952 }, { "epoch": 0.4404504437318219, "grad_norm": 1.6094499826431274, "learning_rate": 1.2392434892971086e-05, "loss": 0.753, "step": 2953 }, { "epoch": 0.4405995972854053, "grad_norm": 1.7817184925079346, "learning_rate": 1.2387743196541669e-05, "loss": 0.6786, "step": 2954 }, { "epoch": 0.44074875083898873, "grad_norm": 1.5262465476989746, "learning_rate": 1.2383050942678295e-05, "loss": 0.7903, "step": 2955 }, { "epoch": 0.44089790439257215, "grad_norm": 2.628392219543457, "learning_rate": 1.2378358132476395e-05, "loss": 0.6452, "step": 2956 }, { "epoch": 0.44104705794615556, "grad_norm": 2.413148880004883, "learning_rate": 1.237366476703154e-05, "loss": 0.7092, "step": 2957 }, { "epoch": 0.441196211499739, "grad_norm": 1.553109884262085, "learning_rate": 1.2368970847439426e-05, "loss": 0.7752, "step": 2958 }, { "epoch": 0.4413453650533224, "grad_norm": 1.5704846382141113, "learning_rate": 1.2364276374795878e-05, "loss": 0.7682, "step": 2959 }, { "epoch": 0.4414945186069058, "grad_norm": 2.392517328262329, "learning_rate": 1.235958135019685e-05, "loss": 0.777, "step": 2960 }, { "epoch": 0.4416436721604892, "grad_norm": 1.798089861869812, "learning_rate": 1.2354885774738428e-05, "loss": 0.7137, "step": 2961 }, { "epoch": 0.44179282571407263, "grad_norm": 0.530399739742279, "learning_rate": 1.2350189649516818e-05, "loss": 0.2657, "step": 2962 }, { "epoch": 0.44194197926765605, "grad_norm": 12.178558349609375, "learning_rate": 1.2345492975628368e-05, "loss": 0.7291, "step": 2963 }, { "epoch": 0.44209113282123946, "grad_norm": 2.493986129760742, "learning_rate": 1.2340795754169544e-05, "loss": 0.7138, "step": 2964 }, { "epoch": 0.4422402863748229, "grad_norm": 2.354215145111084, "learning_rate": 1.233609798623694e-05, "loss": 0.6517, "step": 2965 }, { "epoch": 0.4423894399284063, "grad_norm": 1.4884051084518433, "learning_rate": 1.233139967292728e-05, "loss": 0.7794, "step": 2966 }, { "epoch": 0.4425385934819897, "grad_norm": 1.5678348541259766, "learning_rate": 1.2326700815337422e-05, "loss": 0.8088, "step": 2967 }, { "epoch": 0.4426877470355731, "grad_norm": 3.3623409271240234, "learning_rate": 1.2322001414564336e-05, "loss": 0.7826, "step": 2968 }, { "epoch": 0.44283690058915653, "grad_norm": 1.8973461389541626, "learning_rate": 1.2317301471705134e-05, "loss": 0.6516, "step": 2969 }, { "epoch": 0.44298605414273995, "grad_norm": 1.4248735904693604, "learning_rate": 1.2312600987857041e-05, "loss": 0.7372, "step": 2970 }, { "epoch": 0.44313520769632336, "grad_norm": 2.1137115955352783, "learning_rate": 1.2307899964117422e-05, "loss": 0.7851, "step": 2971 }, { "epoch": 0.4432843612499068, "grad_norm": 2.5940065383911133, "learning_rate": 1.2303198401583759e-05, "loss": 0.7508, "step": 2972 }, { "epoch": 0.4434335148034902, "grad_norm": 2.143420934677124, "learning_rate": 1.2298496301353657e-05, "loss": 0.6908, "step": 2973 }, { "epoch": 0.4435826683570736, "grad_norm": 3.306037664413452, "learning_rate": 1.229379366452486e-05, "loss": 0.6504, "step": 2974 }, { "epoch": 0.443731821910657, "grad_norm": 4.09395170211792, "learning_rate": 1.228909049219522e-05, "loss": 0.6955, "step": 2975 }, { "epoch": 0.44388097546424043, "grad_norm": 2.7503230571746826, "learning_rate": 1.2284386785462728e-05, "loss": 0.6596, "step": 2976 }, { "epoch": 0.44403012901782385, "grad_norm": 3.862863063812256, "learning_rate": 1.2279682545425495e-05, "loss": 0.7296, "step": 2977 }, { "epoch": 0.44417928257140726, "grad_norm": 2.927825689315796, "learning_rate": 1.2274977773181753e-05, "loss": 0.7452, "step": 2978 }, { "epoch": 0.4443284361249907, "grad_norm": 7.35360050201416, "learning_rate": 1.2270272469829862e-05, "loss": 0.7601, "step": 2979 }, { "epoch": 0.4444775896785741, "grad_norm": 2.954904794692993, "learning_rate": 1.2265566636468309e-05, "loss": 0.6453, "step": 2980 }, { "epoch": 0.4446267432321575, "grad_norm": 1.4064351320266724, "learning_rate": 1.2260860274195694e-05, "loss": 0.8305, "step": 2981 }, { "epoch": 0.4447758967857409, "grad_norm": 2.0161526203155518, "learning_rate": 1.2256153384110754e-05, "loss": 0.6637, "step": 2982 }, { "epoch": 0.44492505033932434, "grad_norm": 2.5616955757141113, "learning_rate": 1.2251445967312341e-05, "loss": 0.7509, "step": 2983 }, { "epoch": 0.44507420389290775, "grad_norm": 2.1809909343719482, "learning_rate": 1.2246738024899424e-05, "loss": 0.8434, "step": 2984 }, { "epoch": 0.44522335744649116, "grad_norm": 1.465611219406128, "learning_rate": 1.2242029557971116e-05, "loss": 0.8279, "step": 2985 }, { "epoch": 0.4453725110000746, "grad_norm": 1.7463140487670898, "learning_rate": 1.223732056762663e-05, "loss": 0.7877, "step": 2986 }, { "epoch": 0.445521664553658, "grad_norm": 2.656967878341675, "learning_rate": 1.2232611054965308e-05, "loss": 0.7255, "step": 2987 }, { "epoch": 0.4456708181072414, "grad_norm": 2.1437296867370605, "learning_rate": 1.2227901021086624e-05, "loss": 0.7693, "step": 2988 }, { "epoch": 0.4458199716608248, "grad_norm": 2.2595865726470947, "learning_rate": 1.222319046709016e-05, "loss": 0.7133, "step": 2989 }, { "epoch": 0.44596912521440824, "grad_norm": 1.9084259271621704, "learning_rate": 1.2218479394075624e-05, "loss": 0.7924, "step": 2990 }, { "epoch": 0.44611827876799165, "grad_norm": 1.9125674962997437, "learning_rate": 1.2213767803142854e-05, "loss": 0.7841, "step": 2991 }, { "epoch": 0.44626743232157506, "grad_norm": 1.6819676160812378, "learning_rate": 1.220905569539179e-05, "loss": 0.751, "step": 2992 }, { "epoch": 0.4464165858751585, "grad_norm": 2.1283183097839355, "learning_rate": 1.2204343071922511e-05, "loss": 0.7686, "step": 2993 }, { "epoch": 0.4465657394287419, "grad_norm": 1.8956817388534546, "learning_rate": 1.2199629933835208e-05, "loss": 0.754, "step": 2994 }, { "epoch": 0.4467148929823253, "grad_norm": 2.725151300430298, "learning_rate": 1.2194916282230192e-05, "loss": 0.7569, "step": 2995 }, { "epoch": 0.4468640465359087, "grad_norm": 1.9253865480422974, "learning_rate": 1.21902021182079e-05, "loss": 0.7303, "step": 2996 }, { "epoch": 0.44701320008949214, "grad_norm": 2.3104500770568848, "learning_rate": 1.2185487442868876e-05, "loss": 0.8266, "step": 2997 }, { "epoch": 0.44716235364307555, "grad_norm": 1.9814645051956177, "learning_rate": 1.2180772257313793e-05, "loss": 0.727, "step": 2998 }, { "epoch": 0.44731150719665896, "grad_norm": 3.5004425048828125, "learning_rate": 1.2176056562643448e-05, "loss": 0.7681, "step": 2999 }, { "epoch": 0.4474606607502424, "grad_norm": 1.9153386354446411, "learning_rate": 1.2171340359958742e-05, "loss": 0.7672, "step": 3000 }, { "epoch": 0.4476098143038258, "grad_norm": 1.5810964107513428, "learning_rate": 1.2166623650360707e-05, "loss": 0.7179, "step": 3001 }, { "epoch": 0.4477589678574092, "grad_norm": 2.502894401550293, "learning_rate": 1.216190643495049e-05, "loss": 0.7313, "step": 3002 }, { "epoch": 0.4479081214109926, "grad_norm": 2.0360677242279053, "learning_rate": 1.2157188714829353e-05, "loss": 0.7121, "step": 3003 }, { "epoch": 0.44805727496457604, "grad_norm": 1.8932262659072876, "learning_rate": 1.2152470491098678e-05, "loss": 0.7108, "step": 3004 }, { "epoch": 0.44820642851815945, "grad_norm": 2.408930540084839, "learning_rate": 1.2147751764859967e-05, "loss": 0.7064, "step": 3005 }, { "epoch": 0.44835558207174286, "grad_norm": 2.448472499847412, "learning_rate": 1.2143032537214832e-05, "loss": 0.694, "step": 3006 }, { "epoch": 0.4485047356253263, "grad_norm": 2.5936572551727295, "learning_rate": 1.2138312809265012e-05, "loss": 0.8206, "step": 3007 }, { "epoch": 0.4486538891789097, "grad_norm": 2.1601412296295166, "learning_rate": 1.2133592582112354e-05, "loss": 0.7151, "step": 3008 }, { "epoch": 0.4488030427324931, "grad_norm": 2.1932260990142822, "learning_rate": 1.2128871856858828e-05, "loss": 0.7597, "step": 3009 }, { "epoch": 0.4489521962860765, "grad_norm": 2.3618531227111816, "learning_rate": 1.2124150634606515e-05, "loss": 0.7204, "step": 3010 }, { "epoch": 0.44910134983965994, "grad_norm": 2.1105031967163086, "learning_rate": 1.211942891645762e-05, "loss": 0.7564, "step": 3011 }, { "epoch": 0.44925050339324335, "grad_norm": 1.7367368936538696, "learning_rate": 1.2114706703514452e-05, "loss": 0.7344, "step": 3012 }, { "epoch": 0.44939965694682676, "grad_norm": 2.3091230392456055, "learning_rate": 1.2109983996879446e-05, "loss": 0.6825, "step": 3013 }, { "epoch": 0.4495488105004102, "grad_norm": 1.9528032541275024, "learning_rate": 1.2105260797655144e-05, "loss": 0.7587, "step": 3014 }, { "epoch": 0.4496979640539936, "grad_norm": 1.8726918697357178, "learning_rate": 1.2100537106944213e-05, "loss": 0.7136, "step": 3015 }, { "epoch": 0.449847117607577, "grad_norm": 2.5086276531219482, "learning_rate": 1.2095812925849424e-05, "loss": 0.7453, "step": 3016 }, { "epoch": 0.4499962711611604, "grad_norm": 1.9269722700119019, "learning_rate": 1.2091088255473669e-05, "loss": 0.8317, "step": 3017 }, { "epoch": 0.45014542471474384, "grad_norm": 3.551032781600952, "learning_rate": 1.2086363096919953e-05, "loss": 0.7003, "step": 3018 }, { "epoch": 0.45029457826832725, "grad_norm": 2.7965006828308105, "learning_rate": 1.2081637451291393e-05, "loss": 0.7368, "step": 3019 }, { "epoch": 0.45044373182191066, "grad_norm": 1.7140820026397705, "learning_rate": 1.2076911319691222e-05, "loss": 0.7696, "step": 3020 }, { "epoch": 0.4505928853754941, "grad_norm": 2.026794672012329, "learning_rate": 1.2072184703222791e-05, "loss": 0.8138, "step": 3021 }, { "epoch": 0.4507420389290775, "grad_norm": 0.5164436101913452, "learning_rate": 1.2067457602989552e-05, "loss": 0.2346, "step": 3022 }, { "epoch": 0.4508911924826609, "grad_norm": 1.5268033742904663, "learning_rate": 1.2062730020095073e-05, "loss": 0.7526, "step": 3023 }, { "epoch": 0.4510403460362443, "grad_norm": 1.6121494770050049, "learning_rate": 1.205800195564305e-05, "loss": 0.7351, "step": 3024 }, { "epoch": 0.45118949958982774, "grad_norm": 1.9389973878860474, "learning_rate": 1.2053273410737275e-05, "loss": 0.7023, "step": 3025 }, { "epoch": 0.45133865314341115, "grad_norm": 1.7974575757980347, "learning_rate": 1.2048544386481656e-05, "loss": 0.7245, "step": 3026 }, { "epoch": 0.45148780669699456, "grad_norm": 2.1492044925689697, "learning_rate": 1.204381488398021e-05, "loss": 0.7955, "step": 3027 }, { "epoch": 0.451636960250578, "grad_norm": 2.9780099391937256, "learning_rate": 1.2039084904337082e-05, "loss": 0.7405, "step": 3028 }, { "epoch": 0.4517861138041614, "grad_norm": 2.270310401916504, "learning_rate": 1.2034354448656505e-05, "loss": 0.7021, "step": 3029 }, { "epoch": 0.4519352673577448, "grad_norm": 1.968765377998352, "learning_rate": 1.2029623518042837e-05, "loss": 0.7526, "step": 3030 }, { "epoch": 0.4520844209113282, "grad_norm": 1.834328532218933, "learning_rate": 1.2024892113600544e-05, "loss": 0.6557, "step": 3031 }, { "epoch": 0.45223357446491164, "grad_norm": 1.5030287504196167, "learning_rate": 1.2020160236434203e-05, "loss": 0.7638, "step": 3032 }, { "epoch": 0.45238272801849505, "grad_norm": 1.837376356124878, "learning_rate": 1.2015427887648505e-05, "loss": 0.7276, "step": 3033 }, { "epoch": 0.45253188157207846, "grad_norm": 2.4332756996154785, "learning_rate": 1.2010695068348238e-05, "loss": 0.7751, "step": 3034 }, { "epoch": 0.4526810351256619, "grad_norm": 1.2300859689712524, "learning_rate": 1.2005961779638322e-05, "loss": 0.7947, "step": 3035 }, { "epoch": 0.4528301886792453, "grad_norm": 1.6550242900848389, "learning_rate": 1.2001228022623762e-05, "loss": 0.7124, "step": 3036 }, { "epoch": 0.4529793422328287, "grad_norm": 1.9171794652938843, "learning_rate": 1.1996493798409687e-05, "loss": 0.7182, "step": 3037 }, { "epoch": 0.4531284957864121, "grad_norm": 1.3280667066574097, "learning_rate": 1.1991759108101335e-05, "loss": 0.7753, "step": 3038 }, { "epoch": 0.45327764933999554, "grad_norm": 1.8209114074707031, "learning_rate": 1.1987023952804049e-05, "loss": 0.647, "step": 3039 }, { "epoch": 0.45342680289357895, "grad_norm": 2.8812055587768555, "learning_rate": 1.1982288333623277e-05, "loss": 0.6865, "step": 3040 }, { "epoch": 0.45357595644716237, "grad_norm": 1.952882170677185, "learning_rate": 1.1977552251664585e-05, "loss": 0.7563, "step": 3041 }, { "epoch": 0.4537251100007458, "grad_norm": 2.99402117729187, "learning_rate": 1.197281570803364e-05, "loss": 0.6539, "step": 3042 }, { "epoch": 0.4538742635543292, "grad_norm": 1.8382915258407593, "learning_rate": 1.1968078703836218e-05, "loss": 0.7429, "step": 3043 }, { "epoch": 0.4540234171079126, "grad_norm": 1.5008257627487183, "learning_rate": 1.1963341240178206e-05, "loss": 0.7538, "step": 3044 }, { "epoch": 0.454172570661496, "grad_norm": 1.6572102308273315, "learning_rate": 1.1958603318165586e-05, "loss": 0.6954, "step": 3045 }, { "epoch": 0.45432172421507944, "grad_norm": 6.632633686065674, "learning_rate": 1.1953864938904467e-05, "loss": 0.8208, "step": 3046 }, { "epoch": 0.45447087776866285, "grad_norm": 2.624079942703247, "learning_rate": 1.194912610350105e-05, "loss": 0.7031, "step": 3047 }, { "epoch": 0.45462003132224627, "grad_norm": 1.5357149839401245, "learning_rate": 1.1944386813061644e-05, "loss": 0.6675, "step": 3048 }, { "epoch": 0.4547691848758297, "grad_norm": 1.5382065773010254, "learning_rate": 1.193964706869267e-05, "loss": 0.7787, "step": 3049 }, { "epoch": 0.4549183384294131, "grad_norm": 2.021604061126709, "learning_rate": 1.1934906871500654e-05, "loss": 0.7785, "step": 3050 }, { "epoch": 0.4550674919829965, "grad_norm": 1.8119301795959473, "learning_rate": 1.1930166222592217e-05, "loss": 0.7721, "step": 3051 }, { "epoch": 0.4552166455365799, "grad_norm": 1.8695379495620728, "learning_rate": 1.1925425123074102e-05, "loss": 0.7708, "step": 3052 }, { "epoch": 0.45536579909016334, "grad_norm": 1.6479958295822144, "learning_rate": 1.1920683574053145e-05, "loss": 0.737, "step": 3053 }, { "epoch": 0.45551495264374675, "grad_norm": 1.7700783014297485, "learning_rate": 1.1915941576636293e-05, "loss": 0.7401, "step": 3054 }, { "epoch": 0.45566410619733017, "grad_norm": 1.9977173805236816, "learning_rate": 1.1911199131930593e-05, "loss": 0.7661, "step": 3055 }, { "epoch": 0.4558132597509136, "grad_norm": 1.7925950288772583, "learning_rate": 1.1906456241043203e-05, "loss": 0.7741, "step": 3056 }, { "epoch": 0.455962413304497, "grad_norm": 0.5445159077644348, "learning_rate": 1.190171290508138e-05, "loss": 0.2702, "step": 3057 }, { "epoch": 0.4561115668580804, "grad_norm": 2.6628379821777344, "learning_rate": 1.1896969125152482e-05, "loss": 0.784, "step": 3058 }, { "epoch": 0.4562607204116638, "grad_norm": 1.6131913661956787, "learning_rate": 1.189222490236398e-05, "loss": 0.6828, "step": 3059 }, { "epoch": 0.45640987396524724, "grad_norm": 1.468984842300415, "learning_rate": 1.1887480237823443e-05, "loss": 0.741, "step": 3060 }, { "epoch": 0.45655902751883065, "grad_norm": 2.5153448581695557, "learning_rate": 1.1882735132638544e-05, "loss": 0.7381, "step": 3061 }, { "epoch": 0.45670818107241407, "grad_norm": 2.3075339794158936, "learning_rate": 1.1877989587917046e-05, "loss": 0.6826, "step": 3062 }, { "epoch": 0.4568573346259975, "grad_norm": 1.1277083158493042, "learning_rate": 1.1873243604766846e-05, "loss": 0.8248, "step": 3063 }, { "epoch": 0.4570064881795809, "grad_norm": 1.9049655199050903, "learning_rate": 1.1868497184295916e-05, "loss": 0.7277, "step": 3064 }, { "epoch": 0.4571556417331643, "grad_norm": 1.3225849866867065, "learning_rate": 1.1863750327612333e-05, "loss": 0.7413, "step": 3065 }, { "epoch": 0.4573047952867477, "grad_norm": 1.346001148223877, "learning_rate": 1.1859003035824289e-05, "loss": 0.7736, "step": 3066 }, { "epoch": 0.45745394884033114, "grad_norm": 2.0428590774536133, "learning_rate": 1.1854255310040062e-05, "loss": 0.6758, "step": 3067 }, { "epoch": 0.45760310239391455, "grad_norm": 2.1607987880706787, "learning_rate": 1.1849507151368045e-05, "loss": 0.6913, "step": 3068 }, { "epoch": 0.45775225594749797, "grad_norm": 1.7204784154891968, "learning_rate": 1.1844758560916728e-05, "loss": 0.6869, "step": 3069 }, { "epoch": 0.4579014095010814, "grad_norm": 2.077310562133789, "learning_rate": 1.184000953979469e-05, "loss": 0.6877, "step": 3070 }, { "epoch": 0.4580505630546648, "grad_norm": 1.8522059917449951, "learning_rate": 1.183526008911063e-05, "loss": 0.7555, "step": 3071 }, { "epoch": 0.4581997166082482, "grad_norm": 1.407254695892334, "learning_rate": 1.1830510209973335e-05, "loss": 0.711, "step": 3072 }, { "epoch": 0.4583488701618316, "grad_norm": 1.465773344039917, "learning_rate": 1.1825759903491694e-05, "loss": 0.7335, "step": 3073 }, { "epoch": 0.45849802371541504, "grad_norm": 2.1661665439605713, "learning_rate": 1.1821009170774697e-05, "loss": 0.6709, "step": 3074 }, { "epoch": 0.45864717726899845, "grad_norm": 1.9399945735931396, "learning_rate": 1.1816258012931434e-05, "loss": 0.7929, "step": 3075 }, { "epoch": 0.45879633082258187, "grad_norm": 2.4230523109436035, "learning_rate": 1.1811506431071088e-05, "loss": 0.6745, "step": 3076 }, { "epoch": 0.4589454843761653, "grad_norm": 0.4904533326625824, "learning_rate": 1.1806754426302954e-05, "loss": 0.2447, "step": 3077 }, { "epoch": 0.4590946379297487, "grad_norm": 1.4279043674468994, "learning_rate": 1.1802001999736412e-05, "loss": 0.7284, "step": 3078 }, { "epoch": 0.4592437914833321, "grad_norm": 1.3356138467788696, "learning_rate": 1.179724915248095e-05, "loss": 0.7012, "step": 3079 }, { "epoch": 0.4593929450369155, "grad_norm": 1.8593926429748535, "learning_rate": 1.1792495885646148e-05, "loss": 0.7075, "step": 3080 }, { "epoch": 0.45954209859049894, "grad_norm": 0.5034606456756592, "learning_rate": 1.1787742200341687e-05, "loss": 0.2491, "step": 3081 }, { "epoch": 0.45969125214408235, "grad_norm": 2.4542078971862793, "learning_rate": 1.1782988097677349e-05, "loss": 0.6947, "step": 3082 }, { "epoch": 0.45984040569766577, "grad_norm": 1.4992529153823853, "learning_rate": 1.1778233578763005e-05, "loss": 0.7121, "step": 3083 }, { "epoch": 0.4599895592512492, "grad_norm": 2.540437698364258, "learning_rate": 1.1773478644708631e-05, "loss": 0.7484, "step": 3084 }, { "epoch": 0.4601387128048326, "grad_norm": 2.2880139350891113, "learning_rate": 1.1768723296624293e-05, "loss": 0.7715, "step": 3085 }, { "epoch": 0.460287866358416, "grad_norm": 1.8439743518829346, "learning_rate": 1.1763967535620164e-05, "loss": 0.734, "step": 3086 }, { "epoch": 0.4604370199119994, "grad_norm": 1.523332953453064, "learning_rate": 1.1759211362806501e-05, "loss": 0.6405, "step": 3087 }, { "epoch": 0.46058617346558284, "grad_norm": 2.433140516281128, "learning_rate": 1.1754454779293669e-05, "loss": 0.7339, "step": 3088 }, { "epoch": 0.46073532701916625, "grad_norm": 1.913106918334961, "learning_rate": 1.1749697786192113e-05, "loss": 0.6522, "step": 3089 }, { "epoch": 0.46088448057274967, "grad_norm": 2.11919903755188, "learning_rate": 1.1744940384612394e-05, "loss": 0.7063, "step": 3090 }, { "epoch": 0.4610336341263331, "grad_norm": 1.6279749870300293, "learning_rate": 1.1740182575665154e-05, "loss": 0.7627, "step": 3091 }, { "epoch": 0.4611827876799165, "grad_norm": 1.6560213565826416, "learning_rate": 1.1735424360461134e-05, "loss": 0.7591, "step": 3092 }, { "epoch": 0.4613319412334999, "grad_norm": 1.7440851926803589, "learning_rate": 1.1730665740111164e-05, "loss": 0.6872, "step": 3093 }, { "epoch": 0.4614810947870833, "grad_norm": 2.771949291229248, "learning_rate": 1.1725906715726185e-05, "loss": 0.6795, "step": 3094 }, { "epoch": 0.46163024834066674, "grad_norm": 2.9047653675079346, "learning_rate": 1.1721147288417214e-05, "loss": 0.6969, "step": 3095 }, { "epoch": 0.46177940189425015, "grad_norm": 2.1816370487213135, "learning_rate": 1.1716387459295375e-05, "loss": 0.7584, "step": 3096 }, { "epoch": 0.46192855544783357, "grad_norm": 2.0307443141937256, "learning_rate": 1.1711627229471876e-05, "loss": 0.7534, "step": 3097 }, { "epoch": 0.462077709001417, "grad_norm": 1.5518739223480225, "learning_rate": 1.1706866600058025e-05, "loss": 0.6994, "step": 3098 }, { "epoch": 0.4622268625550004, "grad_norm": 1.4106379747390747, "learning_rate": 1.1702105572165223e-05, "loss": 0.7194, "step": 3099 }, { "epoch": 0.4623760161085838, "grad_norm": 2.092452049255371, "learning_rate": 1.1697344146904964e-05, "loss": 0.7788, "step": 3100 }, { "epoch": 0.4625251696621672, "grad_norm": 1.80832839012146, "learning_rate": 1.1692582325388824e-05, "loss": 0.772, "step": 3101 }, { "epoch": 0.46267432321575064, "grad_norm": 1.8384075164794922, "learning_rate": 1.1687820108728491e-05, "loss": 0.6327, "step": 3102 }, { "epoch": 0.46282347676933405, "grad_norm": 1.6467667818069458, "learning_rate": 1.1683057498035733e-05, "loss": 0.7263, "step": 3103 }, { "epoch": 0.46297263032291747, "grad_norm": 1.3603047132492065, "learning_rate": 1.1678294494422406e-05, "loss": 0.7437, "step": 3104 }, { "epoch": 0.4631217838765009, "grad_norm": 1.5367162227630615, "learning_rate": 1.167353109900047e-05, "loss": 0.7689, "step": 3105 }, { "epoch": 0.4632709374300843, "grad_norm": 1.644187331199646, "learning_rate": 1.1668767312881967e-05, "loss": 0.7512, "step": 3106 }, { "epoch": 0.4634200909836677, "grad_norm": 2.4843504428863525, "learning_rate": 1.1664003137179036e-05, "loss": 0.7202, "step": 3107 }, { "epoch": 0.4635692445372511, "grad_norm": 1.575810432434082, "learning_rate": 1.1659238573003903e-05, "loss": 0.7136, "step": 3108 }, { "epoch": 0.46371839809083454, "grad_norm": 1.7716023921966553, "learning_rate": 1.1654473621468888e-05, "loss": 0.6802, "step": 3109 }, { "epoch": 0.46386755164441795, "grad_norm": 1.489600419998169, "learning_rate": 1.1649708283686394e-05, "loss": 0.8263, "step": 3110 }, { "epoch": 0.46401670519800137, "grad_norm": 1.8485076427459717, "learning_rate": 1.1644942560768926e-05, "loss": 0.7438, "step": 3111 }, { "epoch": 0.4641658587515848, "grad_norm": 1.339643955230713, "learning_rate": 1.1640176453829066e-05, "loss": 0.7609, "step": 3112 }, { "epoch": 0.4643150123051682, "grad_norm": 1.8729406595230103, "learning_rate": 1.16354099639795e-05, "loss": 0.7231, "step": 3113 }, { "epoch": 0.4644641658587516, "grad_norm": 2.352970600128174, "learning_rate": 1.163064309233299e-05, "loss": 0.7886, "step": 3114 }, { "epoch": 0.464613319412335, "grad_norm": 1.8579888343811035, "learning_rate": 1.1625875840002392e-05, "loss": 0.6033, "step": 3115 }, { "epoch": 0.46476247296591844, "grad_norm": 2.8477871417999268, "learning_rate": 1.1621108208100657e-05, "loss": 0.6944, "step": 3116 }, { "epoch": 0.46491162651950185, "grad_norm": 1.6627157926559448, "learning_rate": 1.161634019774082e-05, "loss": 0.7261, "step": 3117 }, { "epoch": 0.46506078007308527, "grad_norm": 1.7235338687896729, "learning_rate": 1.1611571810035991e-05, "loss": 0.801, "step": 3118 }, { "epoch": 0.4652099336266687, "grad_norm": 3.5694539546966553, "learning_rate": 1.1606803046099392e-05, "loss": 0.6832, "step": 3119 }, { "epoch": 0.4653590871802521, "grad_norm": 4.100478649139404, "learning_rate": 1.1602033907044324e-05, "loss": 0.7349, "step": 3120 }, { "epoch": 0.4655082407338355, "grad_norm": 3.050969123840332, "learning_rate": 1.1597264393984165e-05, "loss": 0.6317, "step": 3121 }, { "epoch": 0.4656573942874189, "grad_norm": 1.2830173969268799, "learning_rate": 1.1592494508032393e-05, "loss": 0.7005, "step": 3122 }, { "epoch": 0.46580654784100234, "grad_norm": 1.2933545112609863, "learning_rate": 1.1587724250302564e-05, "loss": 0.6897, "step": 3123 }, { "epoch": 0.46595570139458575, "grad_norm": 1.4784008264541626, "learning_rate": 1.1582953621908328e-05, "loss": 0.7939, "step": 3124 }, { "epoch": 0.46610485494816917, "grad_norm": 1.4448784589767456, "learning_rate": 1.1578182623963422e-05, "loss": 0.81, "step": 3125 }, { "epoch": 0.4662540085017526, "grad_norm": 3.9870901107788086, "learning_rate": 1.1573411257581659e-05, "loss": 0.7486, "step": 3126 }, { "epoch": 0.466403162055336, "grad_norm": 2.2093794345855713, "learning_rate": 1.1568639523876955e-05, "loss": 0.732, "step": 3127 }, { "epoch": 0.4665523156089194, "grad_norm": 3.4541988372802734, "learning_rate": 1.1563867423963291e-05, "loss": 0.7324, "step": 3128 }, { "epoch": 0.46670146916250277, "grad_norm": 2.4263312816619873, "learning_rate": 1.155909495895475e-05, "loss": 0.6807, "step": 3129 }, { "epoch": 0.4668506227160862, "grad_norm": 1.969394326210022, "learning_rate": 1.1554322129965495e-05, "loss": 0.7194, "step": 3130 }, { "epoch": 0.4669997762696696, "grad_norm": 1.75937020778656, "learning_rate": 1.1549548938109775e-05, "loss": 0.7641, "step": 3131 }, { "epoch": 0.467148929823253, "grad_norm": 1.89434814453125, "learning_rate": 1.1544775384501914e-05, "loss": 0.7421, "step": 3132 }, { "epoch": 0.4672980833768364, "grad_norm": 1.5727157592773438, "learning_rate": 1.1540001470256339e-05, "loss": 0.8129, "step": 3133 }, { "epoch": 0.46744723693041984, "grad_norm": 0.5610573887825012, "learning_rate": 1.1535227196487545e-05, "loss": 0.2636, "step": 3134 }, { "epoch": 0.46759639048400325, "grad_norm": 2.440617322921753, "learning_rate": 1.1530452564310117e-05, "loss": 0.705, "step": 3135 }, { "epoch": 0.46774554403758667, "grad_norm": 1.59683358669281, "learning_rate": 1.1525677574838728e-05, "loss": 0.7335, "step": 3136 }, { "epoch": 0.4678946975911701, "grad_norm": 1.992727518081665, "learning_rate": 1.1520902229188122e-05, "loss": 0.6949, "step": 3137 }, { "epoch": 0.4680438511447535, "grad_norm": 1.2635822296142578, "learning_rate": 1.151612652847314e-05, "loss": 0.7739, "step": 3138 }, { "epoch": 0.4681930046983369, "grad_norm": 1.9938188791275024, "learning_rate": 1.1511350473808699e-05, "loss": 0.6515, "step": 3139 }, { "epoch": 0.4683421582519203, "grad_norm": 1.627907633781433, "learning_rate": 1.1506574066309796e-05, "loss": 0.6844, "step": 3140 }, { "epoch": 0.46849131180550374, "grad_norm": 2.1021645069122314, "learning_rate": 1.150179730709152e-05, "loss": 0.711, "step": 3141 }, { "epoch": 0.46864046535908715, "grad_norm": 1.4935399293899536, "learning_rate": 1.1497020197269033e-05, "loss": 0.7869, "step": 3142 }, { "epoch": 0.46878961891267057, "grad_norm": 2.396883487701416, "learning_rate": 1.1492242737957582e-05, "loss": 0.7401, "step": 3143 }, { "epoch": 0.468938772466254, "grad_norm": 1.5655854940414429, "learning_rate": 1.1487464930272496e-05, "loss": 0.6704, "step": 3144 }, { "epoch": 0.4690879260198374, "grad_norm": 1.75323486328125, "learning_rate": 1.1482686775329183e-05, "loss": 0.6724, "step": 3145 }, { "epoch": 0.4692370795734208, "grad_norm": 1.3672226667404175, "learning_rate": 1.1477908274243135e-05, "loss": 0.7634, "step": 3146 }, { "epoch": 0.4693862331270042, "grad_norm": 1.7732123136520386, "learning_rate": 1.1473129428129926e-05, "loss": 0.7464, "step": 3147 }, { "epoch": 0.46953538668058764, "grad_norm": 2.369917869567871, "learning_rate": 1.1468350238105203e-05, "loss": 0.7386, "step": 3148 }, { "epoch": 0.46968454023417106, "grad_norm": 2.954575538635254, "learning_rate": 1.1463570705284705e-05, "loss": 0.7841, "step": 3149 }, { "epoch": 0.46983369378775447, "grad_norm": 1.592024326324463, "learning_rate": 1.1458790830784242e-05, "loss": 0.652, "step": 3150 }, { "epoch": 0.4699828473413379, "grad_norm": 1.8013514280319214, "learning_rate": 1.1454010615719701e-05, "loss": 0.7987, "step": 3151 }, { "epoch": 0.4701320008949213, "grad_norm": 1.544229507446289, "learning_rate": 1.1449230061207064e-05, "loss": 0.6773, "step": 3152 }, { "epoch": 0.4702811544485047, "grad_norm": 1.9808224439620972, "learning_rate": 1.1444449168362375e-05, "loss": 0.761, "step": 3153 }, { "epoch": 0.4704303080020881, "grad_norm": 2.605067253112793, "learning_rate": 1.1439667938301762e-05, "loss": 0.8016, "step": 3154 }, { "epoch": 0.47057946155567154, "grad_norm": 1.6863934993743896, "learning_rate": 1.1434886372141443e-05, "loss": 0.7705, "step": 3155 }, { "epoch": 0.47072861510925496, "grad_norm": 1.565940499305725, "learning_rate": 1.1430104470997698e-05, "loss": 0.8031, "step": 3156 }, { "epoch": 0.47087776866283837, "grad_norm": 1.5259214639663696, "learning_rate": 1.1425322235986893e-05, "loss": 0.751, "step": 3157 }, { "epoch": 0.4710269222164218, "grad_norm": 0.574603796005249, "learning_rate": 1.1420539668225475e-05, "loss": 0.2672, "step": 3158 }, { "epoch": 0.4711760757700052, "grad_norm": 1.4916564226150513, "learning_rate": 1.141575676882996e-05, "loss": 0.6258, "step": 3159 }, { "epoch": 0.4713252293235886, "grad_norm": 2.1986145973205566, "learning_rate": 1.141097353891695e-05, "loss": 0.6794, "step": 3160 }, { "epoch": 0.471474382877172, "grad_norm": 1.9725950956344604, "learning_rate": 1.1406189979603122e-05, "loss": 0.7452, "step": 3161 }, { "epoch": 0.47162353643075544, "grad_norm": 1.693302035331726, "learning_rate": 1.1401406092005226e-05, "loss": 0.7793, "step": 3162 }, { "epoch": 0.47177268998433886, "grad_norm": 1.4514915943145752, "learning_rate": 1.139662187724009e-05, "loss": 0.6828, "step": 3163 }, { "epoch": 0.47192184353792227, "grad_norm": 5.5687079429626465, "learning_rate": 1.1391837336424625e-05, "loss": 0.7376, "step": 3164 }, { "epoch": 0.4720709970915057, "grad_norm": 1.3586657047271729, "learning_rate": 1.1387052470675806e-05, "loss": 0.7364, "step": 3165 }, { "epoch": 0.4722201506450891, "grad_norm": 1.6008988618850708, "learning_rate": 1.1382267281110697e-05, "loss": 0.7159, "step": 3166 }, { "epoch": 0.4723693041986725, "grad_norm": 1.079091191291809, "learning_rate": 1.1377481768846426e-05, "loss": 0.7588, "step": 3167 }, { "epoch": 0.4725184577522559, "grad_norm": 1.8195222616195679, "learning_rate": 1.1372695935000204e-05, "loss": 0.7047, "step": 3168 }, { "epoch": 0.47266761130583934, "grad_norm": 2.0705151557922363, "learning_rate": 1.1367909780689315e-05, "loss": 0.6745, "step": 3169 }, { "epoch": 0.47281676485942276, "grad_norm": 1.440582275390625, "learning_rate": 1.1363123307031118e-05, "loss": 0.7404, "step": 3170 }, { "epoch": 0.47296591841300617, "grad_norm": 1.308807373046875, "learning_rate": 1.135833651514304e-05, "loss": 0.6862, "step": 3171 }, { "epoch": 0.4731150719665896, "grad_norm": 1.9004042148590088, "learning_rate": 1.1353549406142596e-05, "loss": 0.7844, "step": 3172 }, { "epoch": 0.473264225520173, "grad_norm": 1.6017411947250366, "learning_rate": 1.1348761981147366e-05, "loss": 0.6891, "step": 3173 }, { "epoch": 0.4734133790737564, "grad_norm": 2.231832981109619, "learning_rate": 1.1343974241274998e-05, "loss": 0.6858, "step": 3174 }, { "epoch": 0.4735625326273398, "grad_norm": 2.087080478668213, "learning_rate": 1.1339186187643229e-05, "loss": 0.7009, "step": 3175 }, { "epoch": 0.47371168618092324, "grad_norm": 2.438065767288208, "learning_rate": 1.1334397821369858e-05, "loss": 0.7978, "step": 3176 }, { "epoch": 0.47386083973450666, "grad_norm": 1.5210343599319458, "learning_rate": 1.1329609143572757e-05, "loss": 0.7158, "step": 3177 }, { "epoch": 0.47400999328809007, "grad_norm": 1.8951514959335327, "learning_rate": 1.1324820155369878e-05, "loss": 0.6508, "step": 3178 }, { "epoch": 0.4741591468416735, "grad_norm": 2.3727991580963135, "learning_rate": 1.1320030857879238e-05, "loss": 0.6768, "step": 3179 }, { "epoch": 0.4743083003952569, "grad_norm": 2.04838490486145, "learning_rate": 1.1315241252218929e-05, "loss": 0.7462, "step": 3180 }, { "epoch": 0.4744574539488403, "grad_norm": 1.4765616655349731, "learning_rate": 1.131045133950712e-05, "loss": 0.7542, "step": 3181 }, { "epoch": 0.4746066075024237, "grad_norm": 1.803985834121704, "learning_rate": 1.130566112086204e-05, "loss": 0.7996, "step": 3182 }, { "epoch": 0.47475576105600714, "grad_norm": 1.4382790327072144, "learning_rate": 1.1300870597402e-05, "loss": 0.6922, "step": 3183 }, { "epoch": 0.47490491460959056, "grad_norm": 1.4640839099884033, "learning_rate": 1.1296079770245378e-05, "loss": 0.8172, "step": 3184 }, { "epoch": 0.47505406816317397, "grad_norm": 1.8027018308639526, "learning_rate": 1.1291288640510623e-05, "loss": 0.769, "step": 3185 }, { "epoch": 0.4752032217167574, "grad_norm": 1.940299391746521, "learning_rate": 1.1286497209316256e-05, "loss": 0.6765, "step": 3186 }, { "epoch": 0.4753523752703408, "grad_norm": 1.544934630393982, "learning_rate": 1.1281705477780866e-05, "loss": 0.7225, "step": 3187 }, { "epoch": 0.4755015288239242, "grad_norm": 1.6930012702941895, "learning_rate": 1.1276913447023114e-05, "loss": 0.7206, "step": 3188 }, { "epoch": 0.4756506823775076, "grad_norm": 2.268054485321045, "learning_rate": 1.1272121118161729e-05, "loss": 0.7195, "step": 3189 }, { "epoch": 0.47579983593109104, "grad_norm": 2.6220314502716064, "learning_rate": 1.1267328492315513e-05, "loss": 0.7086, "step": 3190 }, { "epoch": 0.47594898948467446, "grad_norm": 1.5968910455703735, "learning_rate": 1.1262535570603335e-05, "loss": 0.6379, "step": 3191 }, { "epoch": 0.47609814303825787, "grad_norm": 1.5533720254898071, "learning_rate": 1.1257742354144132e-05, "loss": 0.6572, "step": 3192 }, { "epoch": 0.4762472965918413, "grad_norm": 1.774082064628601, "learning_rate": 1.1252948844056912e-05, "loss": 0.7068, "step": 3193 }, { "epoch": 0.4763964501454247, "grad_norm": 1.833182454109192, "learning_rate": 1.1248155041460749e-05, "loss": 0.7344, "step": 3194 }, { "epoch": 0.4765456036990081, "grad_norm": 1.6147278547286987, "learning_rate": 1.124336094747479e-05, "loss": 0.7488, "step": 3195 }, { "epoch": 0.47669475725259153, "grad_norm": 0.5931664705276489, "learning_rate": 1.1238566563218244e-05, "loss": 0.2469, "step": 3196 }, { "epoch": 0.47684391080617494, "grad_norm": 4.142011642456055, "learning_rate": 1.1233771889810394e-05, "loss": 0.8007, "step": 3197 }, { "epoch": 0.47699306435975836, "grad_norm": 2.1898350715637207, "learning_rate": 1.1228976928370583e-05, "loss": 0.7411, "step": 3198 }, { "epoch": 0.47714221791334177, "grad_norm": 5.547106742858887, "learning_rate": 1.122418168001823e-05, "loss": 0.5943, "step": 3199 }, { "epoch": 0.4772913714669252, "grad_norm": 2.9408299922943115, "learning_rate": 1.1219386145872812e-05, "loss": 0.6183, "step": 3200 }, { "epoch": 0.4774405250205086, "grad_norm": 2.802184581756592, "learning_rate": 1.121459032705388e-05, "loss": 0.7453, "step": 3201 }, { "epoch": 0.477589678574092, "grad_norm": 1.7804977893829346, "learning_rate": 1.1209794224681048e-05, "loss": 0.7256, "step": 3202 }, { "epoch": 0.47773883212767543, "grad_norm": 1.6961147785186768, "learning_rate": 1.1204997839874e-05, "loss": 0.705, "step": 3203 }, { "epoch": 0.47788798568125884, "grad_norm": 1.6160271167755127, "learning_rate": 1.1200201173752476e-05, "loss": 0.6239, "step": 3204 }, { "epoch": 0.47803713923484226, "grad_norm": 4.132756233215332, "learning_rate": 1.1195404227436295e-05, "loss": 0.6922, "step": 3205 }, { "epoch": 0.47818629278842567, "grad_norm": 2.403907299041748, "learning_rate": 1.1190607002045332e-05, "loss": 0.6908, "step": 3206 }, { "epoch": 0.4783354463420091, "grad_norm": 2.684354305267334, "learning_rate": 1.1185809498699526e-05, "loss": 0.6906, "step": 3207 }, { "epoch": 0.4784845998955925, "grad_norm": 1.7647478580474854, "learning_rate": 1.1181011718518895e-05, "loss": 0.698, "step": 3208 }, { "epoch": 0.4786337534491759, "grad_norm": 1.9484226703643799, "learning_rate": 1.1176213662623502e-05, "loss": 0.7645, "step": 3209 }, { "epoch": 0.47878290700275933, "grad_norm": 1.5789352655410767, "learning_rate": 1.1171415332133488e-05, "loss": 0.7319, "step": 3210 }, { "epoch": 0.47893206055634274, "grad_norm": 1.9309359788894653, "learning_rate": 1.1166616728169052e-05, "loss": 0.7045, "step": 3211 }, { "epoch": 0.47908121410992616, "grad_norm": 2.499622344970703, "learning_rate": 1.1161817851850464e-05, "loss": 0.784, "step": 3212 }, { "epoch": 0.47923036766350957, "grad_norm": 3.7575366497039795, "learning_rate": 1.1157018704298049e-05, "loss": 0.7697, "step": 3213 }, { "epoch": 0.479379521217093, "grad_norm": 2.551877498626709, "learning_rate": 1.1152219286632197e-05, "loss": 0.7293, "step": 3214 }, { "epoch": 0.4795286747706764, "grad_norm": 3.2033019065856934, "learning_rate": 1.1147419599973364e-05, "loss": 0.7137, "step": 3215 }, { "epoch": 0.4796778283242598, "grad_norm": 2.551133155822754, "learning_rate": 1.1142619645442068e-05, "loss": 0.7082, "step": 3216 }, { "epoch": 0.47982698187784323, "grad_norm": 2.5078935623168945, "learning_rate": 1.1137819424158891e-05, "loss": 0.6315, "step": 3217 }, { "epoch": 0.47997613543142664, "grad_norm": 2.0039188861846924, "learning_rate": 1.1133018937244471e-05, "loss": 0.7237, "step": 3218 }, { "epoch": 0.48012528898501006, "grad_norm": 1.7703373432159424, "learning_rate": 1.1128218185819517e-05, "loss": 0.7174, "step": 3219 }, { "epoch": 0.48027444253859347, "grad_norm": 1.6656876802444458, "learning_rate": 1.1123417171004794e-05, "loss": 0.7792, "step": 3220 }, { "epoch": 0.4804235960921769, "grad_norm": 2.3286685943603516, "learning_rate": 1.1118615893921125e-05, "loss": 0.6552, "step": 3221 }, { "epoch": 0.4805727496457603, "grad_norm": 2.2547974586486816, "learning_rate": 1.1113814355689408e-05, "loss": 0.6866, "step": 3222 }, { "epoch": 0.4807219031993437, "grad_norm": 2.4561498165130615, "learning_rate": 1.1109012557430585e-05, "loss": 0.7524, "step": 3223 }, { "epoch": 0.48087105675292713, "grad_norm": 2.240703821182251, "learning_rate": 1.1104210500265668e-05, "loss": 0.6546, "step": 3224 }, { "epoch": 0.48102021030651054, "grad_norm": 3.9061713218688965, "learning_rate": 1.1099408185315734e-05, "loss": 0.6639, "step": 3225 }, { "epoch": 0.48116936386009396, "grad_norm": 1.728479027748108, "learning_rate": 1.1094605613701905e-05, "loss": 0.6503, "step": 3226 }, { "epoch": 0.48131851741367737, "grad_norm": 2.817756414413452, "learning_rate": 1.108980278654538e-05, "loss": 0.6778, "step": 3227 }, { "epoch": 0.4814676709672608, "grad_norm": 2.1310625076293945, "learning_rate": 1.1084999704967406e-05, "loss": 0.7272, "step": 3228 }, { "epoch": 0.4816168245208442, "grad_norm": 1.831081748008728, "learning_rate": 1.1080196370089293e-05, "loss": 0.7796, "step": 3229 }, { "epoch": 0.4817659780744276, "grad_norm": 2.41745662689209, "learning_rate": 1.1075392783032412e-05, "loss": 0.6813, "step": 3230 }, { "epoch": 0.48191513162801103, "grad_norm": 2.417243719100952, "learning_rate": 1.1070588944918193e-05, "loss": 0.7228, "step": 3231 }, { "epoch": 0.48206428518159444, "grad_norm": 1.5688945055007935, "learning_rate": 1.1065784856868116e-05, "loss": 0.7462, "step": 3232 }, { "epoch": 0.48221343873517786, "grad_norm": 2.3839685916900635, "learning_rate": 1.106098052000373e-05, "loss": 0.6853, "step": 3233 }, { "epoch": 0.48236259228876127, "grad_norm": 1.692922592163086, "learning_rate": 1.1056175935446642e-05, "loss": 0.8008, "step": 3234 }, { "epoch": 0.4825117458423447, "grad_norm": 1.956600308418274, "learning_rate": 1.1051371104318507e-05, "loss": 0.7058, "step": 3235 }, { "epoch": 0.4826608993959281, "grad_norm": 2.222158908843994, "learning_rate": 1.1046566027741048e-05, "loss": 0.7556, "step": 3236 }, { "epoch": 0.4828100529495115, "grad_norm": 1.6394529342651367, "learning_rate": 1.1041760706836037e-05, "loss": 0.6782, "step": 3237 }, { "epoch": 0.48295920650309493, "grad_norm": 7.02539587020874, "learning_rate": 1.1036955142725309e-05, "loss": 0.7639, "step": 3238 }, { "epoch": 0.48310836005667834, "grad_norm": 2.0615997314453125, "learning_rate": 1.1032149336530757e-05, "loss": 0.6988, "step": 3239 }, { "epoch": 0.48325751361026176, "grad_norm": 1.4621835947036743, "learning_rate": 1.1027343289374322e-05, "loss": 0.7747, "step": 3240 }, { "epoch": 0.48340666716384517, "grad_norm": 1.61691153049469, "learning_rate": 1.102253700237801e-05, "loss": 0.696, "step": 3241 }, { "epoch": 0.4835558207174286, "grad_norm": 1.3735705614089966, "learning_rate": 1.1017730476663878e-05, "loss": 0.7509, "step": 3242 }, { "epoch": 0.483704974271012, "grad_norm": 0.5618718266487122, "learning_rate": 1.1012923713354039e-05, "loss": 0.2383, "step": 3243 }, { "epoch": 0.4838541278245954, "grad_norm": 2.829066753387451, "learning_rate": 1.1008116713570664e-05, "loss": 0.5998, "step": 3244 }, { "epoch": 0.48400328137817883, "grad_norm": 1.8450647592544556, "learning_rate": 1.1003309478435982e-05, "loss": 0.7159, "step": 3245 }, { "epoch": 0.48415243493176224, "grad_norm": 2.1982462406158447, "learning_rate": 1.0998502009072264e-05, "loss": 0.7091, "step": 3246 }, { "epoch": 0.48430158848534566, "grad_norm": 1.6377393007278442, "learning_rate": 1.0993694306601852e-05, "loss": 0.7376, "step": 3247 }, { "epoch": 0.48445074203892907, "grad_norm": 2.04807186126709, "learning_rate": 1.0988886372147135e-05, "loss": 0.7121, "step": 3248 }, { "epoch": 0.4845998955925125, "grad_norm": 1.900765061378479, "learning_rate": 1.0984078206830548e-05, "loss": 0.7531, "step": 3249 }, { "epoch": 0.4847490491460959, "grad_norm": 2.432833194732666, "learning_rate": 1.0979269811774598e-05, "loss": 0.6876, "step": 3250 }, { "epoch": 0.4848982026996793, "grad_norm": 1.9179781675338745, "learning_rate": 1.0974461188101831e-05, "loss": 0.7948, "step": 3251 }, { "epoch": 0.48504735625326273, "grad_norm": 1.5272663831710815, "learning_rate": 1.096965233693485e-05, "loss": 0.7914, "step": 3252 }, { "epoch": 0.48519650980684614, "grad_norm": 1.7859017848968506, "learning_rate": 1.0964843259396313e-05, "loss": 0.657, "step": 3253 }, { "epoch": 0.48534566336042956, "grad_norm": 0.6160869598388672, "learning_rate": 1.0960033956608931e-05, "loss": 0.2826, "step": 3254 }, { "epoch": 0.48549481691401297, "grad_norm": 2.1919374465942383, "learning_rate": 1.0955224429695466e-05, "loss": 0.7485, "step": 3255 }, { "epoch": 0.4856439704675964, "grad_norm": 1.9079320430755615, "learning_rate": 1.0950414679778736e-05, "loss": 0.7029, "step": 3256 }, { "epoch": 0.4857931240211798, "grad_norm": 1.5120373964309692, "learning_rate": 1.0945604707981601e-05, "loss": 0.7154, "step": 3257 }, { "epoch": 0.4859422775747632, "grad_norm": 1.5386030673980713, "learning_rate": 1.0940794515426986e-05, "loss": 0.6643, "step": 3258 }, { "epoch": 0.48609143112834663, "grad_norm": 2.3268203735351562, "learning_rate": 1.0935984103237857e-05, "loss": 0.7258, "step": 3259 }, { "epoch": 0.48624058468193004, "grad_norm": 2.3659005165100098, "learning_rate": 1.0931173472537237e-05, "loss": 0.7944, "step": 3260 }, { "epoch": 0.48638973823551346, "grad_norm": 1.989534616470337, "learning_rate": 1.0926362624448202e-05, "loss": 0.8231, "step": 3261 }, { "epoch": 0.4865388917890969, "grad_norm": 2.1026225090026855, "learning_rate": 1.0921551560093872e-05, "loss": 0.6208, "step": 3262 }, { "epoch": 0.4866880453426803, "grad_norm": 2.4635164737701416, "learning_rate": 1.0916740280597417e-05, "loss": 0.7206, "step": 3263 }, { "epoch": 0.4868371988962637, "grad_norm": 1.5957015752792358, "learning_rate": 1.091192878708207e-05, "loss": 0.7556, "step": 3264 }, { "epoch": 0.4869863524498471, "grad_norm": 2.1079726219177246, "learning_rate": 1.0907117080671099e-05, "loss": 0.7417, "step": 3265 }, { "epoch": 0.48713550600343053, "grad_norm": 2.2839293479919434, "learning_rate": 1.0902305162487829e-05, "loss": 0.6725, "step": 3266 }, { "epoch": 0.48728465955701394, "grad_norm": 3.696577310562134, "learning_rate": 1.0897493033655636e-05, "loss": 0.6617, "step": 3267 }, { "epoch": 0.48743381311059736, "grad_norm": 2.599727153778076, "learning_rate": 1.0892680695297932e-05, "loss": 0.6573, "step": 3268 }, { "epoch": 0.4875829666641808, "grad_norm": 1.793224811553955, "learning_rate": 1.0887868148538204e-05, "loss": 0.6757, "step": 3269 }, { "epoch": 0.4877321202177642, "grad_norm": 1.6963611841201782, "learning_rate": 1.0883055394499962e-05, "loss": 0.7074, "step": 3270 }, { "epoch": 0.4878812737713476, "grad_norm": 6.037520408630371, "learning_rate": 1.0878242434306772e-05, "loss": 0.7853, "step": 3271 }, { "epoch": 0.488030427324931, "grad_norm": 2.717055320739746, "learning_rate": 1.0873429269082256e-05, "loss": 0.754, "step": 3272 }, { "epoch": 0.48817958087851443, "grad_norm": 2.407322883605957, "learning_rate": 1.086861589995008e-05, "loss": 0.7472, "step": 3273 }, { "epoch": 0.48832873443209784, "grad_norm": 1.9862772226333618, "learning_rate": 1.0863802328033947e-05, "loss": 0.6888, "step": 3274 }, { "epoch": 0.48847788798568126, "grad_norm": 1.765489935874939, "learning_rate": 1.0858988554457626e-05, "loss": 0.653, "step": 3275 }, { "epoch": 0.4886270415392647, "grad_norm": 2.1738131046295166, "learning_rate": 1.0854174580344918e-05, "loss": 0.7195, "step": 3276 }, { "epoch": 0.4887761950928481, "grad_norm": 2.6630795001983643, "learning_rate": 1.0849360406819676e-05, "loss": 0.6692, "step": 3277 }, { "epoch": 0.4889253486464315, "grad_norm": 1.3841874599456787, "learning_rate": 1.0844546035005803e-05, "loss": 0.767, "step": 3278 }, { "epoch": 0.4890745022000149, "grad_norm": 2.191614866256714, "learning_rate": 1.0839731466027242e-05, "loss": 0.6606, "step": 3279 }, { "epoch": 0.48922365575359833, "grad_norm": 1.260330080986023, "learning_rate": 1.0834916701007985e-05, "loss": 0.7664, "step": 3280 }, { "epoch": 0.48937280930718174, "grad_norm": 1.6861523389816284, "learning_rate": 1.0830101741072069e-05, "loss": 0.7583, "step": 3281 }, { "epoch": 0.48952196286076516, "grad_norm": 3.371089220046997, "learning_rate": 1.0825286587343582e-05, "loss": 0.6972, "step": 3282 }, { "epoch": 0.4896711164143486, "grad_norm": 1.3144782781600952, "learning_rate": 1.082047124094665e-05, "loss": 0.7408, "step": 3283 }, { "epoch": 0.489820269967932, "grad_norm": 1.5596603155136108, "learning_rate": 1.0815655703005446e-05, "loss": 0.7641, "step": 3284 }, { "epoch": 0.4899694235215154, "grad_norm": 1.4951192140579224, "learning_rate": 1.0810839974644183e-05, "loss": 0.683, "step": 3285 }, { "epoch": 0.4901185770750988, "grad_norm": 1.8723441362380981, "learning_rate": 1.0806024056987132e-05, "loss": 0.7225, "step": 3286 }, { "epoch": 0.49026773062868223, "grad_norm": 1.492048978805542, "learning_rate": 1.0801207951158599e-05, "loss": 0.7127, "step": 3287 }, { "epoch": 0.49041688418226564, "grad_norm": 1.5292861461639404, "learning_rate": 1.079639165828293e-05, "loss": 0.6955, "step": 3288 }, { "epoch": 0.49056603773584906, "grad_norm": 1.4033946990966797, "learning_rate": 1.0791575179484523e-05, "loss": 0.7295, "step": 3289 }, { "epoch": 0.4907151912894325, "grad_norm": 2.006059408187866, "learning_rate": 1.0786758515887814e-05, "loss": 0.6792, "step": 3290 }, { "epoch": 0.4908643448430159, "grad_norm": 1.5134053230285645, "learning_rate": 1.0781941668617285e-05, "loss": 0.6444, "step": 3291 }, { "epoch": 0.4910134983965993, "grad_norm": 1.8875211477279663, "learning_rate": 1.077712463879746e-05, "loss": 0.7817, "step": 3292 }, { "epoch": 0.4911626519501827, "grad_norm": 1.8130658864974976, "learning_rate": 1.0772307427552903e-05, "loss": 0.6435, "step": 3293 }, { "epoch": 0.49131180550376613, "grad_norm": 1.8091539144515991, "learning_rate": 1.0767490036008225e-05, "loss": 0.6978, "step": 3294 }, { "epoch": 0.49146095905734954, "grad_norm": 2.071532726287842, "learning_rate": 1.0762672465288079e-05, "loss": 0.7282, "step": 3295 }, { "epoch": 0.49161011261093296, "grad_norm": 1.2029953002929688, "learning_rate": 1.0757854716517156e-05, "loss": 0.8228, "step": 3296 }, { "epoch": 0.4917592661645164, "grad_norm": 2.9632558822631836, "learning_rate": 1.075303679082019e-05, "loss": 0.6383, "step": 3297 }, { "epoch": 0.4919084197180998, "grad_norm": 4.3289666175842285, "learning_rate": 1.0748218689321954e-05, "loss": 0.6404, "step": 3298 }, { "epoch": 0.4920575732716832, "grad_norm": 1.774199366569519, "learning_rate": 1.0743400413147269e-05, "loss": 0.7898, "step": 3299 }, { "epoch": 0.4922067268252666, "grad_norm": 1.4603984355926514, "learning_rate": 1.0738581963420994e-05, "loss": 0.7568, "step": 3300 }, { "epoch": 0.49235588037885003, "grad_norm": 1.5523335933685303, "learning_rate": 1.073376334126802e-05, "loss": 0.7592, "step": 3301 }, { "epoch": 0.49250503393243344, "grad_norm": 4.2005133628845215, "learning_rate": 1.0728944547813289e-05, "loss": 0.7495, "step": 3302 }, { "epoch": 0.49265418748601686, "grad_norm": 1.8660484552383423, "learning_rate": 1.072412558418178e-05, "loss": 0.6853, "step": 3303 }, { "epoch": 0.4928033410396003, "grad_norm": 1.8337358236312866, "learning_rate": 1.0719306451498513e-05, "loss": 0.7398, "step": 3304 }, { "epoch": 0.4929524945931837, "grad_norm": 1.6892856359481812, "learning_rate": 1.0714487150888537e-05, "loss": 0.6031, "step": 3305 }, { "epoch": 0.4931016481467671, "grad_norm": 1.9784512519836426, "learning_rate": 1.0709667683476962e-05, "loss": 0.7434, "step": 3306 }, { "epoch": 0.4932508017003505, "grad_norm": 1.771559238433838, "learning_rate": 1.0704848050388905e-05, "loss": 0.7201, "step": 3307 }, { "epoch": 0.49339995525393393, "grad_norm": 1.5424484014511108, "learning_rate": 1.0700028252749559e-05, "loss": 0.7072, "step": 3308 }, { "epoch": 0.49354910880751734, "grad_norm": 1.8284648656845093, "learning_rate": 1.069520829168413e-05, "loss": 0.6713, "step": 3309 }, { "epoch": 0.49369826236110076, "grad_norm": 2.6538422107696533, "learning_rate": 1.0690388168317863e-05, "loss": 0.7005, "step": 3310 }, { "epoch": 0.4938474159146842, "grad_norm": 2.7101683616638184, "learning_rate": 1.0685567883776054e-05, "loss": 0.7618, "step": 3311 }, { "epoch": 0.4939965694682676, "grad_norm": 3.207545042037964, "learning_rate": 1.0680747439184025e-05, "loss": 0.7248, "step": 3312 }, { "epoch": 0.494145723021851, "grad_norm": 1.363322138786316, "learning_rate": 1.0675926835667142e-05, "loss": 0.683, "step": 3313 }, { "epoch": 0.4942948765754344, "grad_norm": 2.334472894668579, "learning_rate": 1.0671106074350805e-05, "loss": 0.6827, "step": 3314 }, { "epoch": 0.49444403012901783, "grad_norm": 2.038658380508423, "learning_rate": 1.0666285156360451e-05, "loss": 0.7342, "step": 3315 }, { "epoch": 0.49459318368260125, "grad_norm": 1.6707812547683716, "learning_rate": 1.0661464082821558e-05, "loss": 0.7581, "step": 3316 }, { "epoch": 0.49474233723618466, "grad_norm": 1.6945194005966187, "learning_rate": 1.065664285485963e-05, "loss": 0.7344, "step": 3317 }, { "epoch": 0.4948914907897681, "grad_norm": 1.6287341117858887, "learning_rate": 1.0651821473600218e-05, "loss": 0.652, "step": 3318 }, { "epoch": 0.4950406443433515, "grad_norm": 1.8568816184997559, "learning_rate": 1.0646999940168908e-05, "loss": 0.7769, "step": 3319 }, { "epoch": 0.4951897978969349, "grad_norm": 1.9873363971710205, "learning_rate": 1.064217825569131e-05, "loss": 0.6886, "step": 3320 }, { "epoch": 0.4953389514505183, "grad_norm": 2.4016613960266113, "learning_rate": 1.0637356421293077e-05, "loss": 0.6499, "step": 3321 }, { "epoch": 0.49548810500410173, "grad_norm": 2.021703004837036, "learning_rate": 1.0632534438099906e-05, "loss": 0.7051, "step": 3322 }, { "epoch": 0.49563725855768515, "grad_norm": 3.7919745445251465, "learning_rate": 1.0627712307237513e-05, "loss": 0.6603, "step": 3323 }, { "epoch": 0.49578641211126856, "grad_norm": 0.5908796787261963, "learning_rate": 1.0622890029831656e-05, "loss": 0.2715, "step": 3324 }, { "epoch": 0.495935565664852, "grad_norm": 1.7248437404632568, "learning_rate": 1.0618067607008127e-05, "loss": 0.7648, "step": 3325 }, { "epoch": 0.4960847192184354, "grad_norm": 1.3198431730270386, "learning_rate": 1.0613245039892755e-05, "loss": 0.6223, "step": 3326 }, { "epoch": 0.4962338727720188, "grad_norm": 1.735366702079773, "learning_rate": 1.0608422329611393e-05, "loss": 0.7035, "step": 3327 }, { "epoch": 0.4963830263256022, "grad_norm": 1.478170394897461, "learning_rate": 1.0603599477289939e-05, "loss": 0.6723, "step": 3328 }, { "epoch": 0.49653217987918563, "grad_norm": 1.5071765184402466, "learning_rate": 1.0598776484054313e-05, "loss": 0.7367, "step": 3329 }, { "epoch": 0.49668133343276905, "grad_norm": 2.932912588119507, "learning_rate": 1.0593953351030481e-05, "loss": 0.6949, "step": 3330 }, { "epoch": 0.49683048698635246, "grad_norm": 2.397260904312134, "learning_rate": 1.0589130079344431e-05, "loss": 0.7984, "step": 3331 }, { "epoch": 0.4969796405399359, "grad_norm": 2.221308708190918, "learning_rate": 1.0584306670122186e-05, "loss": 0.7277, "step": 3332 }, { "epoch": 0.4971287940935193, "grad_norm": 1.8499332666397095, "learning_rate": 1.05794831244898e-05, "loss": 0.6755, "step": 3333 }, { "epoch": 0.4972779476471027, "grad_norm": 1.679248571395874, "learning_rate": 1.0574659443573367e-05, "loss": 0.7764, "step": 3334 }, { "epoch": 0.4974271012006861, "grad_norm": 0.7295615673065186, "learning_rate": 1.0569835628498998e-05, "loss": 0.2425, "step": 3335 }, { "epoch": 0.49757625475426953, "grad_norm": 1.8547061681747437, "learning_rate": 1.0565011680392852e-05, "loss": 0.6581, "step": 3336 }, { "epoch": 0.49772540830785295, "grad_norm": 1.7699201107025146, "learning_rate": 1.0560187600381104e-05, "loss": 0.7546, "step": 3337 }, { "epoch": 0.49787456186143636, "grad_norm": 2.614280939102173, "learning_rate": 1.0555363389589966e-05, "loss": 0.7593, "step": 3338 }, { "epoch": 0.4980237154150198, "grad_norm": 1.9257951974868774, "learning_rate": 1.0550539049145687e-05, "loss": 0.721, "step": 3339 }, { "epoch": 0.4981728689686032, "grad_norm": 3.2454769611358643, "learning_rate": 1.054571458017454e-05, "loss": 0.6699, "step": 3340 }, { "epoch": 0.4983220225221866, "grad_norm": 2.0113909244537354, "learning_rate": 1.054088998380282e-05, "loss": 0.7031, "step": 3341 }, { "epoch": 0.49847117607577, "grad_norm": 0.5548203587532043, "learning_rate": 1.0536065261156864e-05, "loss": 0.2644, "step": 3342 }, { "epoch": 0.49862032962935343, "grad_norm": 1.9350905418395996, "learning_rate": 1.053124041336304e-05, "loss": 0.8339, "step": 3343 }, { "epoch": 0.49876948318293685, "grad_norm": 1.9300392866134644, "learning_rate": 1.0526415441547732e-05, "loss": 0.6073, "step": 3344 }, { "epoch": 0.49891863673652026, "grad_norm": 1.3150237798690796, "learning_rate": 1.0521590346837366e-05, "loss": 0.8359, "step": 3345 }, { "epoch": 0.4990677902901037, "grad_norm": 1.72158682346344, "learning_rate": 1.0516765130358389e-05, "loss": 0.6903, "step": 3346 }, { "epoch": 0.4992169438436871, "grad_norm": 1.496649980545044, "learning_rate": 1.0511939793237275e-05, "loss": 0.8091, "step": 3347 }, { "epoch": 0.4993660973972705, "grad_norm": 2.41050386428833, "learning_rate": 1.0507114336600539e-05, "loss": 0.7371, "step": 3348 }, { "epoch": 0.4995152509508539, "grad_norm": 1.5338939428329468, "learning_rate": 1.0502288761574706e-05, "loss": 0.7703, "step": 3349 }, { "epoch": 0.49966440450443733, "grad_norm": 1.2544947862625122, "learning_rate": 1.0497463069286343e-05, "loss": 0.8055, "step": 3350 }, { "epoch": 0.49981355805802075, "grad_norm": 2.8899881839752197, "learning_rate": 1.0492637260862036e-05, "loss": 0.7162, "step": 3351 }, { "epoch": 0.49996271161160416, "grad_norm": 1.5588089227676392, "learning_rate": 1.04878113374284e-05, "loss": 0.6586, "step": 3352 }, { "epoch": 0.5001118651651876, "grad_norm": 1.5837825536727905, "learning_rate": 1.0482985300112081e-05, "loss": 0.6499, "step": 3353 }, { "epoch": 0.5002610187187709, "grad_norm": 1.7567018270492554, "learning_rate": 1.0478159150039745e-05, "loss": 0.7366, "step": 3354 }, { "epoch": 0.5004101722723544, "grad_norm": 1.5010707378387451, "learning_rate": 1.047333288833809e-05, "loss": 0.5701, "step": 3355 }, { "epoch": 0.5005593258259378, "grad_norm": 2.057220220565796, "learning_rate": 1.046850651613384e-05, "loss": 0.7132, "step": 3356 }, { "epoch": 0.5007084793795212, "grad_norm": 1.464414358139038, "learning_rate": 1.0463680034553738e-05, "loss": 0.7116, "step": 3357 }, { "epoch": 0.5008576329331046, "grad_norm": 3.052220582962036, "learning_rate": 1.045885344472456e-05, "loss": 0.7014, "step": 3358 }, { "epoch": 0.5010067864866881, "grad_norm": 2.219841480255127, "learning_rate": 1.0454026747773103e-05, "loss": 0.6854, "step": 3359 }, { "epoch": 0.5011559400402714, "grad_norm": 1.5549192428588867, "learning_rate": 1.0449199944826185e-05, "loss": 0.6923, "step": 3360 }, { "epoch": 0.5013050935938549, "grad_norm": 2.1834332942962646, "learning_rate": 1.0444373037010667e-05, "loss": 0.7265, "step": 3361 }, { "epoch": 0.5014542471474382, "grad_norm": 1.6465387344360352, "learning_rate": 1.0439546025453411e-05, "loss": 0.7524, "step": 3362 }, { "epoch": 0.5016034007010217, "grad_norm": 1.544236183166504, "learning_rate": 1.0434718911281316e-05, "loss": 0.7916, "step": 3363 }, { "epoch": 0.5017525542546051, "grad_norm": 1.8105897903442383, "learning_rate": 1.0429891695621304e-05, "loss": 0.8014, "step": 3364 }, { "epoch": 0.5019017078081885, "grad_norm": 2.6176044940948486, "learning_rate": 1.042506437960032e-05, "loss": 0.6486, "step": 3365 }, { "epoch": 0.5020508613617719, "grad_norm": 2.001774787902832, "learning_rate": 1.0420236964345332e-05, "loss": 0.6306, "step": 3366 }, { "epoch": 0.5022000149153554, "grad_norm": 1.8408493995666504, "learning_rate": 1.041540945098333e-05, "loss": 0.6285, "step": 3367 }, { "epoch": 0.5023491684689387, "grad_norm": 1.5073113441467285, "learning_rate": 1.0410581840641324e-05, "loss": 0.7164, "step": 3368 }, { "epoch": 0.5024983220225222, "grad_norm": 1.272460699081421, "learning_rate": 1.040575413444636e-05, "loss": 0.7681, "step": 3369 }, { "epoch": 0.5026474755761056, "grad_norm": 1.5835671424865723, "learning_rate": 1.040092633352549e-05, "loss": 0.6854, "step": 3370 }, { "epoch": 0.502796629129689, "grad_norm": 3.1281960010528564, "learning_rate": 1.0396098439005796e-05, "loss": 0.7668, "step": 3371 }, { "epoch": 0.5029457826832724, "grad_norm": 3.282503843307495, "learning_rate": 1.0391270452014382e-05, "loss": 0.6645, "step": 3372 }, { "epoch": 0.5030949362368559, "grad_norm": 1.9713677167892456, "learning_rate": 1.0386442373678372e-05, "loss": 0.7727, "step": 3373 }, { "epoch": 0.5032440897904392, "grad_norm": 1.8522738218307495, "learning_rate": 1.038161420512491e-05, "loss": 0.6568, "step": 3374 }, { "epoch": 0.5033932433440227, "grad_norm": 2.9655535221099854, "learning_rate": 1.0376785947481168e-05, "loss": 0.7609, "step": 3375 }, { "epoch": 0.503542396897606, "grad_norm": 1.8802098035812378, "learning_rate": 1.037195760187433e-05, "loss": 0.6716, "step": 3376 }, { "epoch": 0.5036915504511895, "grad_norm": 1.421863317489624, "learning_rate": 1.03671291694316e-05, "loss": 0.7202, "step": 3377 }, { "epoch": 0.5038407040047729, "grad_norm": 2.2005538940429688, "learning_rate": 1.0362300651280217e-05, "loss": 0.7651, "step": 3378 }, { "epoch": 0.5039898575583563, "grad_norm": 1.546616792678833, "learning_rate": 1.0357472048547423e-05, "loss": 0.6934, "step": 3379 }, { "epoch": 0.5041390111119397, "grad_norm": 1.691002368927002, "learning_rate": 1.0352643362360486e-05, "loss": 0.6477, "step": 3380 }, { "epoch": 0.5042881646655232, "grad_norm": 1.5052564144134521, "learning_rate": 1.0347814593846694e-05, "loss": 0.6992, "step": 3381 }, { "epoch": 0.5044373182191065, "grad_norm": 2.9557366371154785, "learning_rate": 1.0342985744133358e-05, "loss": 0.7006, "step": 3382 }, { "epoch": 0.50458647177269, "grad_norm": 2.8049428462982178, "learning_rate": 1.0338156814347799e-05, "loss": 0.6344, "step": 3383 }, { "epoch": 0.5047356253262734, "grad_norm": 1.9102730751037598, "learning_rate": 1.0333327805617367e-05, "loss": 0.6325, "step": 3384 }, { "epoch": 0.5048847788798568, "grad_norm": 1.680511474609375, "learning_rate": 1.0328498719069416e-05, "loss": 0.7286, "step": 3385 }, { "epoch": 0.5050339324334402, "grad_norm": 2.704206705093384, "learning_rate": 1.0323669555831332e-05, "loss": 0.7484, "step": 3386 }, { "epoch": 0.5051830859870237, "grad_norm": 2.1362547874450684, "learning_rate": 1.0318840317030518e-05, "loss": 0.7005, "step": 3387 }, { "epoch": 0.505332239540607, "grad_norm": 2.043144464492798, "learning_rate": 1.0314011003794386e-05, "loss": 0.7224, "step": 3388 }, { "epoch": 0.5054813930941905, "grad_norm": 2.344668388366699, "learning_rate": 1.0309181617250374e-05, "loss": 0.7938, "step": 3389 }, { "epoch": 0.5056305466477738, "grad_norm": 1.5046324729919434, "learning_rate": 1.030435215852593e-05, "loss": 0.7339, "step": 3390 }, { "epoch": 0.5057797002013573, "grad_norm": 1.7516433000564575, "learning_rate": 1.0299522628748522e-05, "loss": 0.7072, "step": 3391 }, { "epoch": 0.5059288537549407, "grad_norm": 1.988605260848999, "learning_rate": 1.0294693029045636e-05, "loss": 0.685, "step": 3392 }, { "epoch": 0.5060780073085241, "grad_norm": 2.322361707687378, "learning_rate": 1.0289863360544775e-05, "loss": 0.6992, "step": 3393 }, { "epoch": 0.5062271608621075, "grad_norm": 1.689100742340088, "learning_rate": 1.0285033624373453e-05, "loss": 0.7599, "step": 3394 }, { "epoch": 0.506376314415691, "grad_norm": 2.0685291290283203, "learning_rate": 1.0280203821659203e-05, "loss": 0.7024, "step": 3395 }, { "epoch": 0.5065254679692743, "grad_norm": 1.5994294881820679, "learning_rate": 1.0275373953529572e-05, "loss": 0.7576, "step": 3396 }, { "epoch": 0.5066746215228578, "grad_norm": 1.15507972240448, "learning_rate": 1.027054402111213e-05, "loss": 0.7787, "step": 3397 }, { "epoch": 0.5068237750764412, "grad_norm": 3.1324007511138916, "learning_rate": 1.0265714025534451e-05, "loss": 0.7364, "step": 3398 }, { "epoch": 0.5069729286300246, "grad_norm": 2.631925106048584, "learning_rate": 1.0260883967924123e-05, "loss": 0.6775, "step": 3399 }, { "epoch": 0.507122082183608, "grad_norm": 1.2838984727859497, "learning_rate": 1.0256053849408768e-05, "loss": 0.6785, "step": 3400 }, { "epoch": 0.5072712357371915, "grad_norm": 1.4466445446014404, "learning_rate": 1.0251223671115995e-05, "loss": 0.7099, "step": 3401 }, { "epoch": 0.5074203892907748, "grad_norm": 1.809421181678772, "learning_rate": 1.0246393434173446e-05, "loss": 0.657, "step": 3402 }, { "epoch": 0.5075695428443583, "grad_norm": 2.038848876953125, "learning_rate": 1.024156313970877e-05, "loss": 0.7078, "step": 3403 }, { "epoch": 0.5077186963979416, "grad_norm": 1.5946061611175537, "learning_rate": 1.023673278884963e-05, "loss": 0.6895, "step": 3404 }, { "epoch": 0.5078678499515251, "grad_norm": 1.4062825441360474, "learning_rate": 1.0231902382723704e-05, "loss": 0.7204, "step": 3405 }, { "epoch": 0.5080170035051085, "grad_norm": 2.1569643020629883, "learning_rate": 1.022707192245868e-05, "loss": 0.6839, "step": 3406 }, { "epoch": 0.508166157058692, "grad_norm": 1.9131007194519043, "learning_rate": 1.0222241409182256e-05, "loss": 0.7334, "step": 3407 }, { "epoch": 0.5083153106122753, "grad_norm": 1.9568451642990112, "learning_rate": 1.0217410844022154e-05, "loss": 0.7345, "step": 3408 }, { "epoch": 0.5084644641658588, "grad_norm": 2.7167530059814453, "learning_rate": 1.0212580228106094e-05, "loss": 0.7074, "step": 3409 }, { "epoch": 0.5086136177194421, "grad_norm": 2.155876636505127, "learning_rate": 1.0207749562561817e-05, "loss": 0.7365, "step": 3410 }, { "epoch": 0.5087627712730256, "grad_norm": 2.557018280029297, "learning_rate": 1.0202918848517075e-05, "loss": 0.6833, "step": 3411 }, { "epoch": 0.508911924826609, "grad_norm": 2.4267208576202393, "learning_rate": 1.0198088087099624e-05, "loss": 0.6459, "step": 3412 }, { "epoch": 0.5090610783801924, "grad_norm": 2.124199151992798, "learning_rate": 1.0193257279437238e-05, "loss": 0.7777, "step": 3413 }, { "epoch": 0.5092102319337758, "grad_norm": 1.9305486679077148, "learning_rate": 1.0188426426657705e-05, "loss": 0.6623, "step": 3414 }, { "epoch": 0.5093593854873593, "grad_norm": 1.8992012739181519, "learning_rate": 1.0183595529888812e-05, "loss": 0.7166, "step": 3415 }, { "epoch": 0.5095085390409426, "grad_norm": 1.8163341283798218, "learning_rate": 1.0178764590258363e-05, "loss": 0.72, "step": 3416 }, { "epoch": 0.5096576925945261, "grad_norm": 1.60196852684021, "learning_rate": 1.0173933608894177e-05, "loss": 0.6978, "step": 3417 }, { "epoch": 0.5098068461481094, "grad_norm": 1.548319935798645, "learning_rate": 1.0169102586924077e-05, "loss": 0.6909, "step": 3418 }, { "epoch": 0.5099559997016929, "grad_norm": 1.7419800758361816, "learning_rate": 1.016427152547589e-05, "loss": 0.6967, "step": 3419 }, { "epoch": 0.5101051532552763, "grad_norm": 2.3299918174743652, "learning_rate": 1.0159440425677466e-05, "loss": 0.7741, "step": 3420 }, { "epoch": 0.5102543068088597, "grad_norm": 2.3428382873535156, "learning_rate": 1.0154609288656647e-05, "loss": 0.631, "step": 3421 }, { "epoch": 0.5104034603624431, "grad_norm": 1.955151081085205, "learning_rate": 1.01497781155413e-05, "loss": 0.7661, "step": 3422 }, { "epoch": 0.5105526139160266, "grad_norm": 2.20231556892395, "learning_rate": 1.0144946907459294e-05, "loss": 0.6447, "step": 3423 }, { "epoch": 0.5107017674696099, "grad_norm": 1.7737367153167725, "learning_rate": 1.0140115665538502e-05, "loss": 0.8124, "step": 3424 }, { "epoch": 0.5108509210231934, "grad_norm": 1.36978018283844, "learning_rate": 1.0135284390906805e-05, "loss": 0.7062, "step": 3425 }, { "epoch": 0.5110000745767768, "grad_norm": 1.9172247648239136, "learning_rate": 1.0130453084692108e-05, "loss": 0.741, "step": 3426 }, { "epoch": 0.5111492281303602, "grad_norm": 1.7368900775909424, "learning_rate": 1.0125621748022295e-05, "loss": 0.7551, "step": 3427 }, { "epoch": 0.5112983816839436, "grad_norm": 1.3074618577957153, "learning_rate": 1.0120790382025282e-05, "loss": 0.7033, "step": 3428 }, { "epoch": 0.5114475352375271, "grad_norm": 2.058091878890991, "learning_rate": 1.0115958987828977e-05, "loss": 0.6954, "step": 3429 }, { "epoch": 0.5115966887911104, "grad_norm": 1.8810315132141113, "learning_rate": 1.0111127566561305e-05, "loss": 0.6752, "step": 3430 }, { "epoch": 0.5117458423446939, "grad_norm": 1.655214548110962, "learning_rate": 1.0106296119350192e-05, "loss": 0.788, "step": 3431 }, { "epoch": 0.5118949958982772, "grad_norm": 2.021977663040161, "learning_rate": 1.0101464647323567e-05, "loss": 0.6799, "step": 3432 }, { "epoch": 0.5120441494518607, "grad_norm": 1.6175659894943237, "learning_rate": 1.009663315160937e-05, "loss": 0.7575, "step": 3433 }, { "epoch": 0.5121933030054441, "grad_norm": 1.9609363079071045, "learning_rate": 1.0091801633335544e-05, "loss": 0.7523, "step": 3434 }, { "epoch": 0.5123424565590275, "grad_norm": 2.79010009765625, "learning_rate": 1.0086970093630036e-05, "loss": 0.7196, "step": 3435 }, { "epoch": 0.5124916101126109, "grad_norm": 0.5802003741264343, "learning_rate": 1.0082138533620803e-05, "loss": 0.252, "step": 3436 }, { "epoch": 0.5126407636661944, "grad_norm": 2.234968662261963, "learning_rate": 1.0077306954435804e-05, "loss": 0.6975, "step": 3437 }, { "epoch": 0.5127899172197777, "grad_norm": 2.156080484390259, "learning_rate": 1.0072475357202998e-05, "loss": 0.7028, "step": 3438 }, { "epoch": 0.5129390707733612, "grad_norm": 1.5588791370391846, "learning_rate": 1.0067643743050357e-05, "loss": 0.6611, "step": 3439 }, { "epoch": 0.5130882243269446, "grad_norm": 1.5667775869369507, "learning_rate": 1.006281211310585e-05, "loss": 0.8385, "step": 3440 }, { "epoch": 0.513237377880528, "grad_norm": 1.6738715171813965, "learning_rate": 1.0057980468497453e-05, "loss": 0.5874, "step": 3441 }, { "epoch": 0.5133865314341114, "grad_norm": 1.7612308263778687, "learning_rate": 1.0053148810353146e-05, "loss": 0.6291, "step": 3442 }, { "epoch": 0.5135356849876949, "grad_norm": 1.5337746143341064, "learning_rate": 1.0048317139800906e-05, "loss": 0.8047, "step": 3443 }, { "epoch": 0.5136848385412782, "grad_norm": 1.7839057445526123, "learning_rate": 1.0043485457968717e-05, "loss": 0.7007, "step": 3444 }, { "epoch": 0.5138339920948617, "grad_norm": 1.5471974611282349, "learning_rate": 1.0038653765984573e-05, "loss": 0.6897, "step": 3445 }, { "epoch": 0.513983145648445, "grad_norm": 1.6419175863265991, "learning_rate": 1.0033822064976457e-05, "loss": 0.6794, "step": 3446 }, { "epoch": 0.5141322992020285, "grad_norm": 2.661245584487915, "learning_rate": 1.0028990356072364e-05, "loss": 0.7204, "step": 3447 }, { "epoch": 0.5142814527556119, "grad_norm": 1.4997076988220215, "learning_rate": 1.0024158640400288e-05, "loss": 0.689, "step": 3448 }, { "epoch": 0.5144306063091953, "grad_norm": 1.832845687866211, "learning_rate": 1.0019326919088221e-05, "loss": 0.6725, "step": 3449 }, { "epoch": 0.5145797598627787, "grad_norm": 2.9066686630249023, "learning_rate": 1.0014495193264162e-05, "loss": 0.6629, "step": 3450 }, { "epoch": 0.5147289134163622, "grad_norm": 1.8610471487045288, "learning_rate": 1.0009663464056108e-05, "loss": 0.6981, "step": 3451 }, { "epoch": 0.5148780669699455, "grad_norm": 2.464007616043091, "learning_rate": 1.0004831732592053e-05, "loss": 0.6515, "step": 3452 }, { "epoch": 0.515027220523529, "grad_norm": 1.793543815612793, "learning_rate": 1e-05, "loss": 0.6937, "step": 3453 }, { "epoch": 0.5151763740771124, "grad_norm": 2.5370538234710693, "learning_rate": 9.99516826740795e-06, "loss": 0.744, "step": 3454 }, { "epoch": 0.5153255276306958, "grad_norm": 1.6140114068984985, "learning_rate": 9.990336535943897e-06, "loss": 0.7111, "step": 3455 }, { "epoch": 0.5154746811842792, "grad_norm": 2.444795608520508, "learning_rate": 9.985504806735841e-06, "loss": 0.7284, "step": 3456 }, { "epoch": 0.5156238347378627, "grad_norm": 1.772559404373169, "learning_rate": 9.98067308091178e-06, "loss": 0.7066, "step": 3457 }, { "epoch": 0.515772988291446, "grad_norm": 1.826404094696045, "learning_rate": 9.975841359599712e-06, "loss": 0.7288, "step": 3458 }, { "epoch": 0.5159221418450295, "grad_norm": 1.7714037895202637, "learning_rate": 9.971009643927636e-06, "loss": 0.6183, "step": 3459 }, { "epoch": 0.5160712953986129, "grad_norm": 1.422187328338623, "learning_rate": 9.966177935023545e-06, "loss": 0.7495, "step": 3460 }, { "epoch": 0.5162204489521963, "grad_norm": 2.3142473697662354, "learning_rate": 9.96134623401543e-06, "loss": 0.7015, "step": 3461 }, { "epoch": 0.5163696025057797, "grad_norm": 1.9133673906326294, "learning_rate": 9.956514542031286e-06, "loss": 0.7254, "step": 3462 }, { "epoch": 0.5165187560593631, "grad_norm": 1.7928401231765747, "learning_rate": 9.9516828601991e-06, "loss": 0.6787, "step": 3463 }, { "epoch": 0.5166679096129465, "grad_norm": 1.094786286354065, "learning_rate": 9.94685118964686e-06, "loss": 0.7665, "step": 3464 }, { "epoch": 0.51681706316653, "grad_norm": 1.4576809406280518, "learning_rate": 9.942019531502552e-06, "loss": 0.7398, "step": 3465 }, { "epoch": 0.5169662167201133, "grad_norm": 1.5560585260391235, "learning_rate": 9.937187886894153e-06, "loss": 0.6954, "step": 3466 }, { "epoch": 0.5171153702736968, "grad_norm": 1.5424326658248901, "learning_rate": 9.932356256949643e-06, "loss": 0.821, "step": 3467 }, { "epoch": 0.5172645238272802, "grad_norm": 1.7845503091812134, "learning_rate": 9.927524642797003e-06, "loss": 0.7168, "step": 3468 }, { "epoch": 0.5174136773808636, "grad_norm": 1.3804558515548706, "learning_rate": 9.9226930455642e-06, "loss": 0.746, "step": 3469 }, { "epoch": 0.517562830934447, "grad_norm": 1.8238136768341064, "learning_rate": 9.9178614663792e-06, "loss": 0.7387, "step": 3470 }, { "epoch": 0.5177119844880305, "grad_norm": 1.3135260343551636, "learning_rate": 9.91302990636997e-06, "loss": 0.6578, "step": 3471 }, { "epoch": 0.5178611380416138, "grad_norm": 0.6403598785400391, "learning_rate": 9.908198366664461e-06, "loss": 0.2704, "step": 3472 }, { "epoch": 0.5180102915951973, "grad_norm": 1.7428603172302246, "learning_rate": 9.903366848390635e-06, "loss": 0.722, "step": 3473 }, { "epoch": 0.5181594451487807, "grad_norm": 2.2747063636779785, "learning_rate": 9.898535352676438e-06, "loss": 0.7422, "step": 3474 }, { "epoch": 0.5183085987023641, "grad_norm": 1.8513877391815186, "learning_rate": 9.893703880649808e-06, "loss": 0.6563, "step": 3475 }, { "epoch": 0.5184577522559475, "grad_norm": 1.793891429901123, "learning_rate": 9.888872433438695e-06, "loss": 0.6667, "step": 3476 }, { "epoch": 0.518606905809531, "grad_norm": 3.604379415512085, "learning_rate": 9.884041012171023e-06, "loss": 0.754, "step": 3477 }, { "epoch": 0.5187560593631143, "grad_norm": 2.3501882553100586, "learning_rate": 9.879209617974721e-06, "loss": 0.6956, "step": 3478 }, { "epoch": 0.5189052129166978, "grad_norm": 2.3058061599731445, "learning_rate": 9.874378251977709e-06, "loss": 0.8614, "step": 3479 }, { "epoch": 0.5190543664702811, "grad_norm": 1.9110368490219116, "learning_rate": 9.869546915307897e-06, "loss": 0.7959, "step": 3480 }, { "epoch": 0.5192035200238646, "grad_norm": 1.612473726272583, "learning_rate": 9.864715609093196e-06, "loss": 0.7422, "step": 3481 }, { "epoch": 0.519352673577448, "grad_norm": 1.527597188949585, "learning_rate": 9.859884334461503e-06, "loss": 0.788, "step": 3482 }, { "epoch": 0.5195018271310314, "grad_norm": 2.4392240047454834, "learning_rate": 9.85505309254071e-06, "loss": 0.689, "step": 3483 }, { "epoch": 0.5196509806846148, "grad_norm": 1.5156619548797607, "learning_rate": 9.8502218844587e-06, "loss": 0.6779, "step": 3484 }, { "epoch": 0.5198001342381983, "grad_norm": 1.6228950023651123, "learning_rate": 9.845390711343356e-06, "loss": 0.706, "step": 3485 }, { "epoch": 0.5199492877917816, "grad_norm": 1.500841736793518, "learning_rate": 9.840559574322538e-06, "loss": 0.6911, "step": 3486 }, { "epoch": 0.5200984413453651, "grad_norm": 1.5559784173965454, "learning_rate": 9.835728474524113e-06, "loss": 0.6818, "step": 3487 }, { "epoch": 0.5202475948989485, "grad_norm": 1.3280019760131836, "learning_rate": 9.830897413075926e-06, "loss": 0.7995, "step": 3488 }, { "epoch": 0.5203967484525319, "grad_norm": 1.9883924722671509, "learning_rate": 9.826066391105824e-06, "loss": 0.7789, "step": 3489 }, { "epoch": 0.5205459020061153, "grad_norm": 2.521171808242798, "learning_rate": 9.82123540974164e-06, "loss": 0.7144, "step": 3490 }, { "epoch": 0.5206950555596987, "grad_norm": 1.8304057121276855, "learning_rate": 9.816404470111191e-06, "loss": 0.7585, "step": 3491 }, { "epoch": 0.5208442091132821, "grad_norm": 1.9216959476470947, "learning_rate": 9.8115735733423e-06, "loss": 0.7116, "step": 3492 }, { "epoch": 0.5209933626668656, "grad_norm": 5.396724700927734, "learning_rate": 9.806742720562762e-06, "loss": 0.6874, "step": 3493 }, { "epoch": 0.5211425162204489, "grad_norm": 1.650060772895813, "learning_rate": 9.801911912900378e-06, "loss": 0.7975, "step": 3494 }, { "epoch": 0.5212916697740324, "grad_norm": 1.7681878805160522, "learning_rate": 9.797081151482928e-06, "loss": 0.7071, "step": 3495 }, { "epoch": 0.5214408233276158, "grad_norm": 1.8288781642913818, "learning_rate": 9.792250437438186e-06, "loss": 0.6658, "step": 3496 }, { "epoch": 0.5215899768811992, "grad_norm": 1.679120659828186, "learning_rate": 9.787419771893907e-06, "loss": 0.6587, "step": 3497 }, { "epoch": 0.5217391304347826, "grad_norm": 1.6499536037445068, "learning_rate": 9.78258915597785e-06, "loss": 0.7382, "step": 3498 }, { "epoch": 0.5218882839883661, "grad_norm": 3.5921106338500977, "learning_rate": 9.777758590817746e-06, "loss": 0.7565, "step": 3499 }, { "epoch": 0.5220374375419494, "grad_norm": 2.642549514770508, "learning_rate": 9.772928077541325e-06, "loss": 0.6843, "step": 3500 }, { "epoch": 0.5221865910955329, "grad_norm": 1.4709789752960205, "learning_rate": 9.768097617276303e-06, "loss": 0.7673, "step": 3501 }, { "epoch": 0.5223357446491163, "grad_norm": 1.4245041608810425, "learning_rate": 9.763267211150372e-06, "loss": 0.6792, "step": 3502 }, { "epoch": 0.5224848982026997, "grad_norm": 2.117687702178955, "learning_rate": 9.758436860291232e-06, "loss": 0.6637, "step": 3503 }, { "epoch": 0.5226340517562831, "grad_norm": 1.9803581237792969, "learning_rate": 9.753606565826556e-06, "loss": 0.6357, "step": 3504 }, { "epoch": 0.5227832053098665, "grad_norm": 1.7586249113082886, "learning_rate": 9.748776328884008e-06, "loss": 0.7049, "step": 3505 }, { "epoch": 0.5229323588634499, "grad_norm": 1.5012303590774536, "learning_rate": 9.743946150591237e-06, "loss": 0.7176, "step": 3506 }, { "epoch": 0.5230815124170334, "grad_norm": 4.152196407318115, "learning_rate": 9.739116032075879e-06, "loss": 0.686, "step": 3507 }, { "epoch": 0.5232306659706167, "grad_norm": 1.38535475730896, "learning_rate": 9.734285974465554e-06, "loss": 0.7274, "step": 3508 }, { "epoch": 0.5233798195242002, "grad_norm": 3.223958730697632, "learning_rate": 9.729455978887877e-06, "loss": 0.7378, "step": 3509 }, { "epoch": 0.5235289730777836, "grad_norm": 1.3177540302276611, "learning_rate": 9.72462604647043e-06, "loss": 0.768, "step": 3510 }, { "epoch": 0.523678126631367, "grad_norm": 1.9008607864379883, "learning_rate": 9.719796178340799e-06, "loss": 0.7568, "step": 3511 }, { "epoch": 0.5238272801849504, "grad_norm": 1.54548180103302, "learning_rate": 9.71496637562655e-06, "loss": 0.6096, "step": 3512 }, { "epoch": 0.5239764337385339, "grad_norm": 1.895930290222168, "learning_rate": 9.710136639455229e-06, "loss": 0.6959, "step": 3513 }, { "epoch": 0.5241255872921172, "grad_norm": 2.2917447090148926, "learning_rate": 9.705306970954365e-06, "loss": 0.7559, "step": 3514 }, { "epoch": 0.5242747408457007, "grad_norm": 1.5414263010025024, "learning_rate": 9.700477371251481e-06, "loss": 0.7503, "step": 3515 }, { "epoch": 0.524423894399284, "grad_norm": 2.4811010360717773, "learning_rate": 9.695647841474073e-06, "loss": 0.7011, "step": 3516 }, { "epoch": 0.5245730479528675, "grad_norm": 1.4317994117736816, "learning_rate": 9.69081838274963e-06, "loss": 0.7032, "step": 3517 }, { "epoch": 0.5247222015064509, "grad_norm": 1.794439673423767, "learning_rate": 9.685988996205616e-06, "loss": 0.7349, "step": 3518 }, { "epoch": 0.5248713550600344, "grad_norm": 1.6983376741409302, "learning_rate": 9.681159682969483e-06, "loss": 0.6875, "step": 3519 }, { "epoch": 0.5250205086136177, "grad_norm": 1.8310259580612183, "learning_rate": 9.676330444168668e-06, "loss": 0.6605, "step": 3520 }, { "epoch": 0.5251696621672012, "grad_norm": 1.5968644618988037, "learning_rate": 9.671501280930588e-06, "loss": 0.7647, "step": 3521 }, { "epoch": 0.5253188157207845, "grad_norm": 2.10583758354187, "learning_rate": 9.666672194382639e-06, "loss": 0.682, "step": 3522 }, { "epoch": 0.525467969274368, "grad_norm": 2.2510695457458496, "learning_rate": 9.661843185652202e-06, "loss": 0.661, "step": 3523 }, { "epoch": 0.5256171228279514, "grad_norm": 1.3669170141220093, "learning_rate": 9.657014255866643e-06, "loss": 0.7876, "step": 3524 }, { "epoch": 0.5257662763815348, "grad_norm": 1.8591009378433228, "learning_rate": 9.652185406153307e-06, "loss": 0.7695, "step": 3525 }, { "epoch": 0.5259154299351182, "grad_norm": 1.5887867212295532, "learning_rate": 9.647356637639518e-06, "loss": 0.7186, "step": 3526 }, { "epoch": 0.5260645834887017, "grad_norm": 2.267925500869751, "learning_rate": 9.64252795145258e-06, "loss": 0.7308, "step": 3527 }, { "epoch": 0.526213737042285, "grad_norm": 1.9759211540222168, "learning_rate": 9.637699348719783e-06, "loss": 0.6172, "step": 3528 }, { "epoch": 0.5263628905958685, "grad_norm": 1.6756607294082642, "learning_rate": 9.632870830568399e-06, "loss": 0.8029, "step": 3529 }, { "epoch": 0.5265120441494519, "grad_norm": 1.7165535688400269, "learning_rate": 9.628042398125673e-06, "loss": 0.6967, "step": 3530 }, { "epoch": 0.5266611977030353, "grad_norm": 1.2171882390975952, "learning_rate": 9.623214052518836e-06, "loss": 0.7351, "step": 3531 }, { "epoch": 0.5268103512566187, "grad_norm": 1.289023756980896, "learning_rate": 9.618385794875094e-06, "loss": 0.6924, "step": 3532 }, { "epoch": 0.5269595048102022, "grad_norm": 1.4405983686447144, "learning_rate": 9.613557626321633e-06, "loss": 0.6814, "step": 3533 }, { "epoch": 0.5271086583637855, "grad_norm": 1.341544270515442, "learning_rate": 9.608729547985623e-06, "loss": 0.791, "step": 3534 }, { "epoch": 0.527257811917369, "grad_norm": 1.5862761735916138, "learning_rate": 9.60390156099421e-06, "loss": 0.7395, "step": 3535 }, { "epoch": 0.5274069654709523, "grad_norm": 1.525604009628296, "learning_rate": 9.599073666474516e-06, "loss": 0.6698, "step": 3536 }, { "epoch": 0.5275561190245358, "grad_norm": 1.6164274215698242, "learning_rate": 9.594245865553641e-06, "loss": 0.7226, "step": 3537 }, { "epoch": 0.5277052725781192, "grad_norm": 4.107484340667725, "learning_rate": 9.589418159358677e-06, "loss": 0.723, "step": 3538 }, { "epoch": 0.5278544261317026, "grad_norm": 1.6487946510314941, "learning_rate": 9.584590549016674e-06, "loss": 0.7238, "step": 3539 }, { "epoch": 0.528003579685286, "grad_norm": 1.9746050834655762, "learning_rate": 9.579763035654671e-06, "loss": 0.7122, "step": 3540 }, { "epoch": 0.5281527332388695, "grad_norm": 1.5293192863464355, "learning_rate": 9.574935620399681e-06, "loss": 0.6569, "step": 3541 }, { "epoch": 0.5283018867924528, "grad_norm": 2.2687041759490967, "learning_rate": 9.5701083043787e-06, "loss": 0.6778, "step": 3542 }, { "epoch": 0.5284510403460363, "grad_norm": 2.6340079307556152, "learning_rate": 9.56528108871869e-06, "loss": 0.6923, "step": 3543 }, { "epoch": 0.5286001938996197, "grad_norm": 0.6132571697235107, "learning_rate": 9.560453974546594e-06, "loss": 0.2413, "step": 3544 }, { "epoch": 0.5287493474532031, "grad_norm": 3.53940486907959, "learning_rate": 9.555626962989335e-06, "loss": 0.7252, "step": 3545 }, { "epoch": 0.5288985010067865, "grad_norm": 1.4242452383041382, "learning_rate": 9.550800055173815e-06, "loss": 0.7208, "step": 3546 }, { "epoch": 0.52904765456037, "grad_norm": 2.014026641845703, "learning_rate": 9.5459732522269e-06, "loss": 0.6851, "step": 3547 }, { "epoch": 0.5291968081139533, "grad_norm": 1.3295259475708008, "learning_rate": 9.541146555275444e-06, "loss": 0.6771, "step": 3548 }, { "epoch": 0.5293459616675368, "grad_norm": 2.040443181991577, "learning_rate": 9.536319965446265e-06, "loss": 0.746, "step": 3549 }, { "epoch": 0.5294951152211201, "grad_norm": 2.989140033721924, "learning_rate": 9.531493483866163e-06, "loss": 0.8047, "step": 3550 }, { "epoch": 0.5296442687747036, "grad_norm": 0.5606538653373718, "learning_rate": 9.526667111661912e-06, "loss": 0.239, "step": 3551 }, { "epoch": 0.529793422328287, "grad_norm": 1.9535839557647705, "learning_rate": 9.521840849960256e-06, "loss": 0.6664, "step": 3552 }, { "epoch": 0.5299425758818704, "grad_norm": 1.8215339183807373, "learning_rate": 9.517014699887924e-06, "loss": 0.7254, "step": 3553 }, { "epoch": 0.5300917294354538, "grad_norm": 1.7625226974487305, "learning_rate": 9.512188662571601e-06, "loss": 0.6568, "step": 3554 }, { "epoch": 0.5302408829890373, "grad_norm": 2.5398380756378174, "learning_rate": 9.50736273913797e-06, "loss": 0.6827, "step": 3555 }, { "epoch": 0.5303900365426206, "grad_norm": 1.9008301496505737, "learning_rate": 9.502536930713659e-06, "loss": 0.8173, "step": 3556 }, { "epoch": 0.5305391900962041, "grad_norm": 2.2006967067718506, "learning_rate": 9.497711238425296e-06, "loss": 0.6719, "step": 3557 }, { "epoch": 0.5306883436497875, "grad_norm": 1.8962758779525757, "learning_rate": 9.492885663399465e-06, "loss": 0.684, "step": 3558 }, { "epoch": 0.5308374972033709, "grad_norm": 1.3660752773284912, "learning_rate": 9.488060206762727e-06, "loss": 0.801, "step": 3559 }, { "epoch": 0.5309866507569543, "grad_norm": 1.4002035856246948, "learning_rate": 9.483234869641616e-06, "loss": 0.6159, "step": 3560 }, { "epoch": 0.5311358043105378, "grad_norm": 0.5567858815193176, "learning_rate": 9.478409653162639e-06, "loss": 0.2435, "step": 3561 }, { "epoch": 0.5312849578641211, "grad_norm": 2.0762553215026855, "learning_rate": 9.473584558452273e-06, "loss": 0.7459, "step": 3562 }, { "epoch": 0.5314341114177046, "grad_norm": 1.6483365297317505, "learning_rate": 9.468759586636963e-06, "loss": 0.7227, "step": 3563 }, { "epoch": 0.5315832649712879, "grad_norm": 1.8610960245132446, "learning_rate": 9.463934738843135e-06, "loss": 0.7309, "step": 3564 }, { "epoch": 0.5317324185248714, "grad_norm": 3.1872682571411133, "learning_rate": 9.459110016197184e-06, "loss": 0.7481, "step": 3565 }, { "epoch": 0.5318815720784548, "grad_norm": 1.631185531616211, "learning_rate": 9.454285419825464e-06, "loss": 0.7491, "step": 3566 }, { "epoch": 0.5320307256320382, "grad_norm": 1.5683586597442627, "learning_rate": 9.449460950854315e-06, "loss": 0.7787, "step": 3567 }, { "epoch": 0.5321798791856216, "grad_norm": 2.045043706893921, "learning_rate": 9.444636610410036e-06, "loss": 0.7437, "step": 3568 }, { "epoch": 0.5323290327392051, "grad_norm": 2.1713407039642334, "learning_rate": 9.439812399618901e-06, "loss": 0.7357, "step": 3569 }, { "epoch": 0.5324781862927884, "grad_norm": 5.127035140991211, "learning_rate": 9.434988319607153e-06, "loss": 0.6667, "step": 3570 }, { "epoch": 0.5326273398463719, "grad_norm": 1.6420881748199463, "learning_rate": 9.430164371501002e-06, "loss": 0.7312, "step": 3571 }, { "epoch": 0.5327764933999553, "grad_norm": 2.0100502967834473, "learning_rate": 9.425340556426635e-06, "loss": 0.6218, "step": 3572 }, { "epoch": 0.5329256469535387, "grad_norm": 1.6560795307159424, "learning_rate": 9.420516875510201e-06, "loss": 0.8029, "step": 3573 }, { "epoch": 0.5330748005071221, "grad_norm": 2.626089096069336, "learning_rate": 9.415693329877818e-06, "loss": 0.7033, "step": 3574 }, { "epoch": 0.5332239540607056, "grad_norm": 1.9530608654022217, "learning_rate": 9.41086992065557e-06, "loss": 0.7306, "step": 3575 }, { "epoch": 0.5333731076142889, "grad_norm": 1.3812346458435059, "learning_rate": 9.40604664896952e-06, "loss": 0.7137, "step": 3576 }, { "epoch": 0.5335222611678723, "grad_norm": 1.291420340538025, "learning_rate": 9.401223515945688e-06, "loss": 0.7819, "step": 3577 }, { "epoch": 0.5336714147214557, "grad_norm": 0.5463979244232178, "learning_rate": 9.396400522710066e-06, "loss": 0.2799, "step": 3578 }, { "epoch": 0.5338205682750391, "grad_norm": 1.5228617191314697, "learning_rate": 9.391577670388612e-06, "loss": 0.6529, "step": 3579 }, { "epoch": 0.5339697218286226, "grad_norm": 1.7894257307052612, "learning_rate": 9.38675496010725e-06, "loss": 0.7102, "step": 3580 }, { "epoch": 0.5341188753822059, "grad_norm": 1.3846936225891113, "learning_rate": 9.381932392991874e-06, "loss": 0.6931, "step": 3581 }, { "epoch": 0.5342680289357894, "grad_norm": 1.4691951274871826, "learning_rate": 9.377109970168348e-06, "loss": 0.7303, "step": 3582 }, { "epoch": 0.5344171824893728, "grad_norm": 1.862562656402588, "learning_rate": 9.372287692762489e-06, "loss": 0.7744, "step": 3583 }, { "epoch": 0.5345663360429562, "grad_norm": 1.5823639631271362, "learning_rate": 9.367465561900097e-06, "loss": 0.7586, "step": 3584 }, { "epoch": 0.5347154895965396, "grad_norm": 1.6431199312210083, "learning_rate": 9.362643578706926e-06, "loss": 0.6411, "step": 3585 }, { "epoch": 0.534864643150123, "grad_norm": 1.412437915802002, "learning_rate": 9.357821744308696e-06, "loss": 0.7365, "step": 3586 }, { "epoch": 0.5350137967037064, "grad_norm": 1.8465375900268555, "learning_rate": 9.353000059831097e-06, "loss": 0.694, "step": 3587 }, { "epoch": 0.5351629502572899, "grad_norm": 2.1955997943878174, "learning_rate": 9.348178526399783e-06, "loss": 0.6784, "step": 3588 }, { "epoch": 0.5353121038108732, "grad_norm": 0.5219040513038635, "learning_rate": 9.343357145140368e-06, "loss": 0.2484, "step": 3589 }, { "epoch": 0.5354612573644567, "grad_norm": 1.6263405084609985, "learning_rate": 9.338535917178444e-06, "loss": 0.6954, "step": 3590 }, { "epoch": 0.5356104109180401, "grad_norm": 1.7858760356903076, "learning_rate": 9.33371484363955e-06, "loss": 0.7009, "step": 3591 }, { "epoch": 0.5357595644716235, "grad_norm": 1.90546452999115, "learning_rate": 9.328893925649196e-06, "loss": 0.778, "step": 3592 }, { "epoch": 0.5359087180252069, "grad_norm": 4.072181224822998, "learning_rate": 9.324073164332861e-06, "loss": 0.6569, "step": 3593 }, { "epoch": 0.5360578715787904, "grad_norm": 1.4507479667663574, "learning_rate": 9.319252560815977e-06, "loss": 0.717, "step": 3594 }, { "epoch": 0.5362070251323737, "grad_norm": 0.5528897047042847, "learning_rate": 9.31443211622395e-06, "loss": 0.2599, "step": 3595 }, { "epoch": 0.5363561786859572, "grad_norm": 2.125397205352783, "learning_rate": 9.309611831682142e-06, "loss": 0.6459, "step": 3596 }, { "epoch": 0.5365053322395406, "grad_norm": 1.9360449314117432, "learning_rate": 9.304791708315876e-06, "loss": 0.6837, "step": 3597 }, { "epoch": 0.536654485793124, "grad_norm": 2.386007785797119, "learning_rate": 9.29997174725044e-06, "loss": 0.7446, "step": 3598 }, { "epoch": 0.5368036393467074, "grad_norm": 1.7942208051681519, "learning_rate": 9.295151949611095e-06, "loss": 0.7096, "step": 3599 }, { "epoch": 0.5369527929002909, "grad_norm": 3.20825457572937, "learning_rate": 9.290332316523043e-06, "loss": 0.5957, "step": 3600 }, { "epoch": 0.5371019464538742, "grad_norm": 1.9941400289535522, "learning_rate": 9.285512849111465e-06, "loss": 0.7174, "step": 3601 }, { "epoch": 0.5372511000074577, "grad_norm": 1.1349592208862305, "learning_rate": 9.28069354850149e-06, "loss": 0.7159, "step": 3602 }, { "epoch": 0.537400253561041, "grad_norm": 0.552884578704834, "learning_rate": 9.275874415818222e-06, "loss": 0.2626, "step": 3603 }, { "epoch": 0.5375494071146245, "grad_norm": 1.8024629354476929, "learning_rate": 9.271055452186716e-06, "loss": 0.7456, "step": 3604 }, { "epoch": 0.5376985606682079, "grad_norm": 2.6325695514678955, "learning_rate": 9.266236658731985e-06, "loss": 0.794, "step": 3605 }, { "epoch": 0.5378477142217913, "grad_norm": 1.5614746809005737, "learning_rate": 9.261418036579008e-06, "loss": 0.7439, "step": 3606 }, { "epoch": 0.5379968677753747, "grad_norm": 1.2656532526016235, "learning_rate": 9.256599586852731e-06, "loss": 0.7392, "step": 3607 }, { "epoch": 0.5381460213289582, "grad_norm": 1.5642619132995605, "learning_rate": 9.251781310678046e-06, "loss": 0.7655, "step": 3608 }, { "epoch": 0.5382951748825415, "grad_norm": 1.9983147382736206, "learning_rate": 9.246963209179813e-06, "loss": 0.6414, "step": 3609 }, { "epoch": 0.538444328436125, "grad_norm": 2.605685234069824, "learning_rate": 9.242145283482848e-06, "loss": 0.6889, "step": 3610 }, { "epoch": 0.5385934819897084, "grad_norm": 2.0177338123321533, "learning_rate": 9.237327534711922e-06, "loss": 0.7092, "step": 3611 }, { "epoch": 0.5387426355432918, "grad_norm": 2.4482486248016357, "learning_rate": 9.232509963991776e-06, "loss": 0.7192, "step": 3612 }, { "epoch": 0.5388917890968752, "grad_norm": 1.680896520614624, "learning_rate": 9.2276925724471e-06, "loss": 0.7992, "step": 3613 }, { "epoch": 0.5390409426504587, "grad_norm": 5.10906982421875, "learning_rate": 9.222875361202546e-06, "loss": 0.6908, "step": 3614 }, { "epoch": 0.539190096204042, "grad_norm": 1.728209376335144, "learning_rate": 9.218058331382717e-06, "loss": 0.811, "step": 3615 }, { "epoch": 0.5393392497576255, "grad_norm": 0.5350586175918579, "learning_rate": 9.213241484112188e-06, "loss": 0.2511, "step": 3616 }, { "epoch": 0.5394884033112088, "grad_norm": 1.1979527473449707, "learning_rate": 9.208424820515478e-06, "loss": 0.7468, "step": 3617 }, { "epoch": 0.5396375568647923, "grad_norm": 1.7399457693099976, "learning_rate": 9.203608341717073e-06, "loss": 0.7675, "step": 3618 }, { "epoch": 0.5397867104183757, "grad_norm": 1.4185596704483032, "learning_rate": 9.198792048841403e-06, "loss": 0.6778, "step": 3619 }, { "epoch": 0.5399358639719591, "grad_norm": 1.688645362854004, "learning_rate": 9.19397594301287e-06, "loss": 0.7268, "step": 3620 }, { "epoch": 0.5400850175255425, "grad_norm": 1.275475263595581, "learning_rate": 9.18916002535582e-06, "loss": 0.7569, "step": 3621 }, { "epoch": 0.540234171079126, "grad_norm": 1.4488502740859985, "learning_rate": 9.184344296994559e-06, "loss": 0.7562, "step": 3622 }, { "epoch": 0.5403833246327093, "grad_norm": 2.2997241020202637, "learning_rate": 9.179528759053355e-06, "loss": 0.7338, "step": 3623 }, { "epoch": 0.5405324781862928, "grad_norm": 2.0208189487457275, "learning_rate": 9.174713412656418e-06, "loss": 0.7401, "step": 3624 }, { "epoch": 0.5406816317398762, "grad_norm": 1.290565848350525, "learning_rate": 9.16989825892793e-06, "loss": 0.705, "step": 3625 }, { "epoch": 0.5408307852934596, "grad_norm": 1.4692543745040894, "learning_rate": 9.165083298992019e-06, "loss": 0.7643, "step": 3626 }, { "epoch": 0.540979938847043, "grad_norm": 1.7669098377227783, "learning_rate": 9.160268533972763e-06, "loss": 0.7222, "step": 3627 }, { "epoch": 0.5411290924006265, "grad_norm": 1.262935996055603, "learning_rate": 9.155453964994202e-06, "loss": 0.7416, "step": 3628 }, { "epoch": 0.5412782459542098, "grad_norm": 1.6294143199920654, "learning_rate": 9.150639593180327e-06, "loss": 0.7089, "step": 3629 }, { "epoch": 0.5414273995077933, "grad_norm": 1.741641879081726, "learning_rate": 9.145825419655086e-06, "loss": 0.7028, "step": 3630 }, { "epoch": 0.5415765530613766, "grad_norm": 1.973541498184204, "learning_rate": 9.141011445542377e-06, "loss": 0.7125, "step": 3631 }, { "epoch": 0.5417257066149601, "grad_norm": 1.6993296146392822, "learning_rate": 9.136197671966058e-06, "loss": 0.7194, "step": 3632 }, { "epoch": 0.5418748601685435, "grad_norm": 1.5071898698806763, "learning_rate": 9.131384100049924e-06, "loss": 0.7136, "step": 3633 }, { "epoch": 0.5420240137221269, "grad_norm": 2.400486469268799, "learning_rate": 9.126570730917744e-06, "loss": 0.7708, "step": 3634 }, { "epoch": 0.5421731672757103, "grad_norm": 1.233373761177063, "learning_rate": 9.12175756569323e-06, "loss": 0.7271, "step": 3635 }, { "epoch": 0.5423223208292938, "grad_norm": 1.3257489204406738, "learning_rate": 9.116944605500041e-06, "loss": 0.6549, "step": 3636 }, { "epoch": 0.5424714743828771, "grad_norm": 1.8350000381469727, "learning_rate": 9.1121318514618e-06, "loss": 0.7477, "step": 3637 }, { "epoch": 0.5426206279364606, "grad_norm": 1.8555419445037842, "learning_rate": 9.10731930470207e-06, "loss": 0.7902, "step": 3638 }, { "epoch": 0.542769781490044, "grad_norm": 1.4330530166625977, "learning_rate": 9.10250696634437e-06, "loss": 0.6209, "step": 3639 }, { "epoch": 0.5429189350436274, "grad_norm": 2.816071033477783, "learning_rate": 9.097694837512175e-06, "loss": 0.6844, "step": 3640 }, { "epoch": 0.5430680885972108, "grad_norm": 1.7764291763305664, "learning_rate": 9.092882919328901e-06, "loss": 0.6372, "step": 3641 }, { "epoch": 0.5432172421507943, "grad_norm": 2.076425075531006, "learning_rate": 9.08807121291793e-06, "loss": 0.6882, "step": 3642 }, { "epoch": 0.5433663957043776, "grad_norm": 1.5978840589523315, "learning_rate": 9.083259719402583e-06, "loss": 0.7916, "step": 3643 }, { "epoch": 0.5435155492579611, "grad_norm": 0.5042641162872314, "learning_rate": 9.07844843990613e-06, "loss": 0.2544, "step": 3644 }, { "epoch": 0.5436647028115444, "grad_norm": 2.762676477432251, "learning_rate": 9.0736373755518e-06, "loss": 0.6594, "step": 3645 }, { "epoch": 0.5438138563651279, "grad_norm": 1.6426795721054077, "learning_rate": 9.068826527462766e-06, "loss": 0.633, "step": 3646 }, { "epoch": 0.5439630099187113, "grad_norm": 1.5248552560806274, "learning_rate": 9.064015896762146e-06, "loss": 0.6934, "step": 3647 }, { "epoch": 0.5441121634722947, "grad_norm": 1.9539908170700073, "learning_rate": 9.059205484573019e-06, "loss": 0.7419, "step": 3648 }, { "epoch": 0.5442613170258781, "grad_norm": 5.736156463623047, "learning_rate": 9.054395292018402e-06, "loss": 0.7581, "step": 3649 }, { "epoch": 0.5444104705794616, "grad_norm": 1.4293334484100342, "learning_rate": 9.049585320221266e-06, "loss": 0.7266, "step": 3650 }, { "epoch": 0.5445596241330449, "grad_norm": 1.6883680820465088, "learning_rate": 9.044775570304534e-06, "loss": 0.6357, "step": 3651 }, { "epoch": 0.5447087776866284, "grad_norm": 1.543062448501587, "learning_rate": 9.03996604339107e-06, "loss": 0.7033, "step": 3652 }, { "epoch": 0.5448579312402118, "grad_norm": 2.6661369800567627, "learning_rate": 9.035156740603689e-06, "loss": 0.6187, "step": 3653 }, { "epoch": 0.5450070847937952, "grad_norm": 1.740500569343567, "learning_rate": 9.030347663065152e-06, "loss": 0.633, "step": 3654 }, { "epoch": 0.5451562383473786, "grad_norm": 1.4008313417434692, "learning_rate": 9.025538811898172e-06, "loss": 0.6942, "step": 3655 }, { "epoch": 0.545305391900962, "grad_norm": 1.8198364973068237, "learning_rate": 9.020730188225405e-06, "loss": 0.618, "step": 3656 }, { "epoch": 0.5454545454545454, "grad_norm": 1.931496262550354, "learning_rate": 9.015921793169455e-06, "loss": 0.7011, "step": 3657 }, { "epoch": 0.5456036990081289, "grad_norm": 1.8838574886322021, "learning_rate": 9.01111362785287e-06, "loss": 0.7223, "step": 3658 }, { "epoch": 0.5457528525617122, "grad_norm": 2.271688222885132, "learning_rate": 9.006305693398148e-06, "loss": 0.7728, "step": 3659 }, { "epoch": 0.5459020061152957, "grad_norm": 1.9953796863555908, "learning_rate": 9.001497990927738e-06, "loss": 0.6697, "step": 3660 }, { "epoch": 0.5460511596688791, "grad_norm": 1.9642930030822754, "learning_rate": 8.996690521564021e-06, "loss": 0.6419, "step": 3661 }, { "epoch": 0.5462003132224625, "grad_norm": 1.9051960706710815, "learning_rate": 8.991883286429337e-06, "loss": 0.705, "step": 3662 }, { "epoch": 0.5463494667760459, "grad_norm": 2.0027894973754883, "learning_rate": 8.987076286645965e-06, "loss": 0.6814, "step": 3663 }, { "epoch": 0.5464986203296294, "grad_norm": 2.2803986072540283, "learning_rate": 8.982269523336126e-06, "loss": 0.6062, "step": 3664 }, { "epoch": 0.5466477738832127, "grad_norm": 1.959107756614685, "learning_rate": 8.977462997621994e-06, "loss": 0.7642, "step": 3665 }, { "epoch": 0.5467969274367962, "grad_norm": 1.9298306703567505, "learning_rate": 8.972656710625682e-06, "loss": 0.7128, "step": 3666 }, { "epoch": 0.5469460809903796, "grad_norm": 2.238124370574951, "learning_rate": 8.967850663469248e-06, "loss": 0.7415, "step": 3667 }, { "epoch": 0.547095234543963, "grad_norm": 1.9017740488052368, "learning_rate": 8.963044857274691e-06, "loss": 0.6737, "step": 3668 }, { "epoch": 0.5472443880975464, "grad_norm": 1.7869994640350342, "learning_rate": 8.958239293163966e-06, "loss": 0.6971, "step": 3669 }, { "epoch": 0.5473935416511299, "grad_norm": 1.5244758129119873, "learning_rate": 8.953433972258955e-06, "loss": 0.7411, "step": 3670 }, { "epoch": 0.5475426952047132, "grad_norm": 2.131707191467285, "learning_rate": 8.948628895681498e-06, "loss": 0.7101, "step": 3671 }, { "epoch": 0.5476918487582967, "grad_norm": 0.5552185773849487, "learning_rate": 8.943824064553361e-06, "loss": 0.25, "step": 3672 }, { "epoch": 0.54784100231188, "grad_norm": 2.4863452911376953, "learning_rate": 8.939019479996272e-06, "loss": 0.7271, "step": 3673 }, { "epoch": 0.5479901558654635, "grad_norm": 1.7818865776062012, "learning_rate": 8.934215143131891e-06, "loss": 0.7326, "step": 3674 }, { "epoch": 0.5481393094190469, "grad_norm": 1.4858100414276123, "learning_rate": 8.929411055081812e-06, "loss": 0.7483, "step": 3675 }, { "epoch": 0.5482884629726303, "grad_norm": 2.2612223625183105, "learning_rate": 8.924607216967588e-06, "loss": 0.7211, "step": 3676 }, { "epoch": 0.5484376165262137, "grad_norm": 1.7118947505950928, "learning_rate": 8.919803629910709e-06, "loss": 0.685, "step": 3677 }, { "epoch": 0.5485867700797972, "grad_norm": 0.5211814045906067, "learning_rate": 8.915000295032594e-06, "loss": 0.2219, "step": 3678 }, { "epoch": 0.5487359236333805, "grad_norm": 3.7676968574523926, "learning_rate": 8.910197213454622e-06, "loss": 0.6732, "step": 3679 }, { "epoch": 0.548885077186964, "grad_norm": 1.8218871355056763, "learning_rate": 8.905394386298098e-06, "loss": 0.6375, "step": 3680 }, { "epoch": 0.5490342307405474, "grad_norm": 2.5643837451934814, "learning_rate": 8.900591814684269e-06, "loss": 0.8061, "step": 3681 }, { "epoch": 0.5491833842941308, "grad_norm": 2.2539703845977783, "learning_rate": 8.895789499734335e-06, "loss": 0.7011, "step": 3682 }, { "epoch": 0.5493325378477142, "grad_norm": 1.7527645826339722, "learning_rate": 8.890987442569419e-06, "loss": 0.7149, "step": 3683 }, { "epoch": 0.5494816914012977, "grad_norm": 1.6215862035751343, "learning_rate": 8.886185644310597e-06, "loss": 0.6992, "step": 3684 }, { "epoch": 0.549630844954881, "grad_norm": 1.6098459959030151, "learning_rate": 8.881384106078875e-06, "loss": 0.7295, "step": 3685 }, { "epoch": 0.5497799985084645, "grad_norm": 2.7194011211395264, "learning_rate": 8.876582828995211e-06, "loss": 0.6928, "step": 3686 }, { "epoch": 0.5499291520620478, "grad_norm": 1.5338184833526611, "learning_rate": 8.871781814180486e-06, "loss": 0.708, "step": 3687 }, { "epoch": 0.5500783056156313, "grad_norm": 2.135246515274048, "learning_rate": 8.866981062755532e-06, "loss": 0.7508, "step": 3688 }, { "epoch": 0.5502274591692147, "grad_norm": 1.4545507431030273, "learning_rate": 8.862180575841112e-06, "loss": 0.6014, "step": 3689 }, { "epoch": 0.5503766127227981, "grad_norm": 2.674943208694458, "learning_rate": 8.857380354557937e-06, "loss": 0.7174, "step": 3690 }, { "epoch": 0.5505257662763815, "grad_norm": 1.3275295495986938, "learning_rate": 8.85258040002664e-06, "loss": 0.6684, "step": 3691 }, { "epoch": 0.550674919829965, "grad_norm": 1.9883731603622437, "learning_rate": 8.847780713367808e-06, "loss": 0.7043, "step": 3692 }, { "epoch": 0.5508240733835483, "grad_norm": 3.1635327339172363, "learning_rate": 8.842981295701956e-06, "loss": 0.6162, "step": 3693 }, { "epoch": 0.5509732269371318, "grad_norm": 1.8907151222229004, "learning_rate": 8.838182148149537e-06, "loss": 0.7175, "step": 3694 }, { "epoch": 0.5511223804907152, "grad_norm": 2.731407880783081, "learning_rate": 8.833383271830946e-06, "loss": 0.6636, "step": 3695 }, { "epoch": 0.5512715340442986, "grad_norm": 1.8289650678634644, "learning_rate": 8.828584667866514e-06, "loss": 0.6992, "step": 3696 }, { "epoch": 0.551420687597882, "grad_norm": 1.3592363595962524, "learning_rate": 8.8237863373765e-06, "loss": 0.7064, "step": 3697 }, { "epoch": 0.5515698411514655, "grad_norm": 1.82905912399292, "learning_rate": 8.818988281481109e-06, "loss": 0.6824, "step": 3698 }, { "epoch": 0.5517189947050488, "grad_norm": 1.571868658065796, "learning_rate": 8.814190501300475e-06, "loss": 0.7521, "step": 3699 }, { "epoch": 0.5518681482586323, "grad_norm": 1.6480895280838013, "learning_rate": 8.809392997954673e-06, "loss": 0.669, "step": 3700 }, { "epoch": 0.5520173018122156, "grad_norm": 1.328823208808899, "learning_rate": 8.80459577256371e-06, "loss": 0.7937, "step": 3701 }, { "epoch": 0.5521664553657991, "grad_norm": 1.501028060913086, "learning_rate": 8.799798826247526e-06, "loss": 0.7192, "step": 3702 }, { "epoch": 0.5523156089193825, "grad_norm": 1.733740210533142, "learning_rate": 8.795002160126002e-06, "loss": 0.7443, "step": 3703 }, { "epoch": 0.5524647624729659, "grad_norm": 2.4533751010894775, "learning_rate": 8.790205775318952e-06, "loss": 0.6874, "step": 3704 }, { "epoch": 0.5526139160265493, "grad_norm": 2.237820625305176, "learning_rate": 8.785409672946123e-06, "loss": 0.7087, "step": 3705 }, { "epoch": 0.5527630695801328, "grad_norm": 2.0153636932373047, "learning_rate": 8.78061385412719e-06, "loss": 0.718, "step": 3706 }, { "epoch": 0.5529122231337161, "grad_norm": 2.4533376693725586, "learning_rate": 8.775818319981776e-06, "loss": 0.6583, "step": 3707 }, { "epoch": 0.5530613766872996, "grad_norm": 1.5492249727249146, "learning_rate": 8.77102307162942e-06, "loss": 0.6948, "step": 3708 }, { "epoch": 0.553210530240883, "grad_norm": 1.8687622547149658, "learning_rate": 8.76622811018961e-06, "loss": 0.778, "step": 3709 }, { "epoch": 0.5533596837944664, "grad_norm": 2.782144069671631, "learning_rate": 8.76143343678176e-06, "loss": 0.7097, "step": 3710 }, { "epoch": 0.5535088373480498, "grad_norm": 1.5655790567398071, "learning_rate": 8.756639052525213e-06, "loss": 0.7418, "step": 3711 }, { "epoch": 0.5536579909016333, "grad_norm": 1.7705808877944946, "learning_rate": 8.751844958539251e-06, "loss": 0.739, "step": 3712 }, { "epoch": 0.5538071444552166, "grad_norm": 1.8424291610717773, "learning_rate": 8.747051155943091e-06, "loss": 0.6942, "step": 3713 }, { "epoch": 0.5539562980088001, "grad_norm": 2.266449451446533, "learning_rate": 8.74225764585587e-06, "loss": 0.845, "step": 3714 }, { "epoch": 0.5541054515623834, "grad_norm": 1.760221242904663, "learning_rate": 8.737464429396668e-06, "loss": 0.7405, "step": 3715 }, { "epoch": 0.5542546051159669, "grad_norm": 1.9791351556777954, "learning_rate": 8.73267150768449e-06, "loss": 0.6217, "step": 3716 }, { "epoch": 0.5544037586695503, "grad_norm": 1.4533275365829468, "learning_rate": 8.727878881838273e-06, "loss": 0.726, "step": 3717 }, { "epoch": 0.5545529122231337, "grad_norm": 2.567040205001831, "learning_rate": 8.72308655297689e-06, "loss": 0.7707, "step": 3718 }, { "epoch": 0.5547020657767171, "grad_norm": 2.2075915336608887, "learning_rate": 8.718294522219137e-06, "loss": 0.6775, "step": 3719 }, { "epoch": 0.5548512193303006, "grad_norm": 1.8873659372329712, "learning_rate": 8.713502790683743e-06, "loss": 0.7213, "step": 3720 }, { "epoch": 0.5550003728838839, "grad_norm": 0.6175308227539062, "learning_rate": 8.708711359489377e-06, "loss": 0.2725, "step": 3721 }, { "epoch": 0.5551495264374674, "grad_norm": 2.7451491355895996, "learning_rate": 8.703920229754624e-06, "loss": 0.6976, "step": 3722 }, { "epoch": 0.5552986799910508, "grad_norm": 3.368654727935791, "learning_rate": 8.699129402598001e-06, "loss": 0.6819, "step": 3723 }, { "epoch": 0.5554478335446342, "grad_norm": 1.4264625310897827, "learning_rate": 8.694338879137962e-06, "loss": 0.715, "step": 3724 }, { "epoch": 0.5555969870982176, "grad_norm": 3.1380550861358643, "learning_rate": 8.689548660492882e-06, "loss": 0.6552, "step": 3725 }, { "epoch": 0.5557461406518011, "grad_norm": 2.4055585861206055, "learning_rate": 8.684758747781073e-06, "loss": 0.6895, "step": 3726 }, { "epoch": 0.5558952942053844, "grad_norm": 2.1874420642852783, "learning_rate": 8.679969142120765e-06, "loss": 0.6714, "step": 3727 }, { "epoch": 0.5560444477589679, "grad_norm": 1.4521681070327759, "learning_rate": 8.675179844630125e-06, "loss": 0.6836, "step": 3728 }, { "epoch": 0.5561936013125512, "grad_norm": 1.89670991897583, "learning_rate": 8.670390856427242e-06, "loss": 0.6834, "step": 3729 }, { "epoch": 0.5563427548661347, "grad_norm": 1.9351245164871216, "learning_rate": 8.665602178630146e-06, "loss": 0.7755, "step": 3730 }, { "epoch": 0.5564919084197181, "grad_norm": 1.807674765586853, "learning_rate": 8.660813812356773e-06, "loss": 0.6344, "step": 3731 }, { "epoch": 0.5566410619733015, "grad_norm": 3.9844772815704346, "learning_rate": 8.656025758725004e-06, "loss": 0.7718, "step": 3732 }, { "epoch": 0.5567902155268849, "grad_norm": 1.513313889503479, "learning_rate": 8.651238018852638e-06, "loss": 0.7178, "step": 3733 }, { "epoch": 0.5569393690804684, "grad_norm": 2.381739854812622, "learning_rate": 8.646450593857407e-06, "loss": 0.8044, "step": 3734 }, { "epoch": 0.5570885226340517, "grad_norm": 2.2192625999450684, "learning_rate": 8.641663484856964e-06, "loss": 0.6423, "step": 3735 }, { "epoch": 0.5572376761876352, "grad_norm": 2.4010374546051025, "learning_rate": 8.636876692968887e-06, "loss": 0.677, "step": 3736 }, { "epoch": 0.5573868297412186, "grad_norm": 2.0908377170562744, "learning_rate": 8.632090219310688e-06, "loss": 0.6388, "step": 3737 }, { "epoch": 0.557535983294802, "grad_norm": 2.1788454055786133, "learning_rate": 8.627304064999798e-06, "loss": 0.6843, "step": 3738 }, { "epoch": 0.5576851368483854, "grad_norm": 0.49958521127700806, "learning_rate": 8.622518231153574e-06, "loss": 0.2921, "step": 3739 }, { "epoch": 0.5578342904019689, "grad_norm": 2.6564128398895264, "learning_rate": 8.617732718889305e-06, "loss": 0.7183, "step": 3740 }, { "epoch": 0.5579834439555522, "grad_norm": 2.0996620655059814, "learning_rate": 8.612947529324196e-06, "loss": 0.6951, "step": 3741 }, { "epoch": 0.5581325975091357, "grad_norm": 2.6661055088043213, "learning_rate": 8.608162663575378e-06, "loss": 0.7516, "step": 3742 }, { "epoch": 0.558281751062719, "grad_norm": 2.1812093257904053, "learning_rate": 8.603378122759912e-06, "loss": 0.6774, "step": 3743 }, { "epoch": 0.5584309046163025, "grad_norm": 1.9749552011489868, "learning_rate": 8.598593907994778e-06, "loss": 0.7687, "step": 3744 }, { "epoch": 0.5585800581698859, "grad_norm": 1.8095687627792358, "learning_rate": 8.593810020396882e-06, "loss": 0.7149, "step": 3745 }, { "epoch": 0.5587292117234693, "grad_norm": 1.750532627105713, "learning_rate": 8.58902646108305e-06, "loss": 0.6742, "step": 3746 }, { "epoch": 0.5588783652770527, "grad_norm": 2.255873203277588, "learning_rate": 8.584243231170042e-06, "loss": 0.7745, "step": 3747 }, { "epoch": 0.5590275188306362, "grad_norm": 1.7797257900238037, "learning_rate": 8.579460331774529e-06, "loss": 0.7322, "step": 3748 }, { "epoch": 0.5591766723842195, "grad_norm": 0.5623656511306763, "learning_rate": 8.57467776401311e-06, "loss": 0.2633, "step": 3749 }, { "epoch": 0.559325825937803, "grad_norm": 1.9962756633758545, "learning_rate": 8.569895529002305e-06, "loss": 0.6003, "step": 3750 }, { "epoch": 0.5594749794913864, "grad_norm": 1.643848180770874, "learning_rate": 8.565113627858562e-06, "loss": 0.7303, "step": 3751 }, { "epoch": 0.5596241330449698, "grad_norm": 1.4805148839950562, "learning_rate": 8.560332061698242e-06, "loss": 0.7386, "step": 3752 }, { "epoch": 0.5597732865985532, "grad_norm": 1.7199268341064453, "learning_rate": 8.55555083163763e-06, "loss": 0.7113, "step": 3753 }, { "epoch": 0.5599224401521367, "grad_norm": 1.6140964031219482, "learning_rate": 8.550769938792943e-06, "loss": 0.7605, "step": 3754 }, { "epoch": 0.56007159370572, "grad_norm": 1.589288592338562, "learning_rate": 8.5459893842803e-06, "loss": 0.6295, "step": 3755 }, { "epoch": 0.5602207472593035, "grad_norm": 1.9492323398590088, "learning_rate": 8.54120916921576e-06, "loss": 0.674, "step": 3756 }, { "epoch": 0.5603699008128868, "grad_norm": 3.573601484298706, "learning_rate": 8.536429294715296e-06, "loss": 0.6846, "step": 3757 }, { "epoch": 0.5605190543664703, "grad_norm": 2.873673677444458, "learning_rate": 8.5316497618948e-06, "loss": 0.749, "step": 3758 }, { "epoch": 0.5606682079200537, "grad_norm": 1.5727475881576538, "learning_rate": 8.526870571870077e-06, "loss": 0.6906, "step": 3759 }, { "epoch": 0.5608173614736371, "grad_norm": 1.4702345132827759, "learning_rate": 8.522091725756868e-06, "loss": 0.7269, "step": 3760 }, { "epoch": 0.5609665150272205, "grad_norm": 1.7032101154327393, "learning_rate": 8.51731322467082e-06, "loss": 0.6894, "step": 3761 }, { "epoch": 0.561115668580804, "grad_norm": 2.1529083251953125, "learning_rate": 8.51253506972751e-06, "loss": 0.7216, "step": 3762 }, { "epoch": 0.5612648221343873, "grad_norm": 1.4309476613998413, "learning_rate": 8.507757262042423e-06, "loss": 0.6934, "step": 3763 }, { "epoch": 0.5614139756879708, "grad_norm": 0.5713330507278442, "learning_rate": 8.502979802730968e-06, "loss": 0.2643, "step": 3764 }, { "epoch": 0.5615631292415542, "grad_norm": 5.014711856842041, "learning_rate": 8.49820269290848e-06, "loss": 0.6463, "step": 3765 }, { "epoch": 0.5617122827951376, "grad_norm": 2.0044524669647217, "learning_rate": 8.493425933690205e-06, "loss": 0.7836, "step": 3766 }, { "epoch": 0.561861436348721, "grad_norm": 1.2022265195846558, "learning_rate": 8.488649526191303e-06, "loss": 0.7127, "step": 3767 }, { "epoch": 0.5620105899023045, "grad_norm": 1.3674122095108032, "learning_rate": 8.483873471526865e-06, "loss": 0.7588, "step": 3768 }, { "epoch": 0.5621597434558878, "grad_norm": 1.8329880237579346, "learning_rate": 8.479097770811881e-06, "loss": 0.7217, "step": 3769 }, { "epoch": 0.5623088970094713, "grad_norm": 1.5366486310958862, "learning_rate": 8.474322425161279e-06, "loss": 0.7853, "step": 3770 }, { "epoch": 0.5624580505630546, "grad_norm": 0.560156524181366, "learning_rate": 8.469547435689888e-06, "loss": 0.2673, "step": 3771 }, { "epoch": 0.5626072041166381, "grad_norm": 2.3153014183044434, "learning_rate": 8.464772803512458e-06, "loss": 0.7581, "step": 3772 }, { "epoch": 0.5627563576702215, "grad_norm": 2.050569534301758, "learning_rate": 8.459998529743661e-06, "loss": 0.7128, "step": 3773 }, { "epoch": 0.5629055112238049, "grad_norm": 1.7008299827575684, "learning_rate": 8.455224615498086e-06, "loss": 0.6995, "step": 3774 }, { "epoch": 0.5630546647773883, "grad_norm": 2.5953798294067383, "learning_rate": 8.450451061890228e-06, "loss": 0.7212, "step": 3775 }, { "epoch": 0.5632038183309718, "grad_norm": 0.5232828855514526, "learning_rate": 8.445677870034506e-06, "loss": 0.2484, "step": 3776 }, { "epoch": 0.5633529718845551, "grad_norm": 1.5019723176956177, "learning_rate": 8.440905041045253e-06, "loss": 0.6826, "step": 3777 }, { "epoch": 0.5635021254381386, "grad_norm": 1.589329481124878, "learning_rate": 8.43613257603671e-06, "loss": 0.7429, "step": 3778 }, { "epoch": 0.563651278991722, "grad_norm": 2.9986729621887207, "learning_rate": 8.43136047612305e-06, "loss": 0.6594, "step": 3779 }, { "epoch": 0.5638004325453054, "grad_norm": 2.286280632019043, "learning_rate": 8.426588742418343e-06, "loss": 0.7347, "step": 3780 }, { "epoch": 0.5639495860988888, "grad_norm": 1.9058029651641846, "learning_rate": 8.421817376036578e-06, "loss": 0.7012, "step": 3781 }, { "epoch": 0.5640987396524723, "grad_norm": 2.312730550765991, "learning_rate": 8.417046378091674e-06, "loss": 0.712, "step": 3782 }, { "epoch": 0.5642478932060556, "grad_norm": 2.098970413208008, "learning_rate": 8.41227574969744e-06, "loss": 0.7419, "step": 3783 }, { "epoch": 0.5643970467596391, "grad_norm": 2.1316747665405273, "learning_rate": 8.40750549196761e-06, "loss": 0.7135, "step": 3784 }, { "epoch": 0.5645462003132224, "grad_norm": 1.7111142873764038, "learning_rate": 8.40273560601584e-06, "loss": 0.7567, "step": 3785 }, { "epoch": 0.5646953538668059, "grad_norm": 1.2664203643798828, "learning_rate": 8.397966092955678e-06, "loss": 0.7166, "step": 3786 }, { "epoch": 0.5648445074203893, "grad_norm": 1.8189977407455444, "learning_rate": 8.39319695390061e-06, "loss": 0.7316, "step": 3787 }, { "epoch": 0.5649936609739727, "grad_norm": 1.605750560760498, "learning_rate": 8.388428189964014e-06, "loss": 0.7501, "step": 3788 }, { "epoch": 0.5651428145275561, "grad_norm": 0.6013856530189514, "learning_rate": 8.383659802259187e-06, "loss": 0.2739, "step": 3789 }, { "epoch": 0.5652919680811396, "grad_norm": 1.5207902193069458, "learning_rate": 8.378891791899343e-06, "loss": 0.6413, "step": 3790 }, { "epoch": 0.5654411216347229, "grad_norm": 1.5043851137161255, "learning_rate": 8.37412415999761e-06, "loss": 0.7139, "step": 3791 }, { "epoch": 0.5655902751883064, "grad_norm": 2.643291473388672, "learning_rate": 8.369356907667013e-06, "loss": 0.6966, "step": 3792 }, { "epoch": 0.5657394287418898, "grad_norm": 1.5622364282608032, "learning_rate": 8.364590036020503e-06, "loss": 0.7505, "step": 3793 }, { "epoch": 0.5658885822954732, "grad_norm": 2.503899097442627, "learning_rate": 8.359823546170936e-06, "loss": 0.7179, "step": 3794 }, { "epoch": 0.5660377358490566, "grad_norm": 2.172168016433716, "learning_rate": 8.355057439231078e-06, "loss": 0.6579, "step": 3795 }, { "epoch": 0.5661868894026401, "grad_norm": 0.5627760887145996, "learning_rate": 8.35029171631361e-06, "loss": 0.2568, "step": 3796 }, { "epoch": 0.5663360429562234, "grad_norm": 1.7682746648788452, "learning_rate": 8.345526378531117e-06, "loss": 0.7194, "step": 3797 }, { "epoch": 0.5664851965098069, "grad_norm": 3.009763479232788, "learning_rate": 8.3407614269961e-06, "loss": 0.7315, "step": 3798 }, { "epoch": 0.5666343500633902, "grad_norm": 2.004267692565918, "learning_rate": 8.335996862820964e-06, "loss": 0.7228, "step": 3799 }, { "epoch": 0.5667835036169737, "grad_norm": 1.5314327478408813, "learning_rate": 8.331232687118035e-06, "loss": 0.6886, "step": 3800 }, { "epoch": 0.5669326571705571, "grad_norm": 2.6287484169006348, "learning_rate": 8.326468900999532e-06, "loss": 0.8065, "step": 3801 }, { "epoch": 0.5670818107241405, "grad_norm": 1.94427490234375, "learning_rate": 8.321705505577597e-06, "loss": 0.6673, "step": 3802 }, { "epoch": 0.5672309642777239, "grad_norm": 1.7519357204437256, "learning_rate": 8.31694250196427e-06, "loss": 0.7092, "step": 3803 }, { "epoch": 0.5673801178313074, "grad_norm": 2.0105552673339844, "learning_rate": 8.312179891271512e-06, "loss": 0.673, "step": 3804 }, { "epoch": 0.5675292713848907, "grad_norm": 2.7611019611358643, "learning_rate": 8.30741767461118e-06, "loss": 0.7962, "step": 3805 }, { "epoch": 0.5676784249384742, "grad_norm": 1.9339401721954346, "learning_rate": 8.302655853095043e-06, "loss": 0.7025, "step": 3806 }, { "epoch": 0.5678275784920576, "grad_norm": 6.515270233154297, "learning_rate": 8.297894427834777e-06, "loss": 0.6818, "step": 3807 }, { "epoch": 0.567976732045641, "grad_norm": 2.2288496494293213, "learning_rate": 8.293133399941977e-06, "loss": 0.6445, "step": 3808 }, { "epoch": 0.5681258855992244, "grad_norm": 2.008420944213867, "learning_rate": 8.288372770528125e-06, "loss": 0.7009, "step": 3809 }, { "epoch": 0.5682750391528079, "grad_norm": 3.8445186614990234, "learning_rate": 8.283612540704628e-06, "loss": 0.6834, "step": 3810 }, { "epoch": 0.5684241927063912, "grad_norm": 1.6557999849319458, "learning_rate": 8.27885271158279e-06, "loss": 0.7061, "step": 3811 }, { "epoch": 0.5685733462599747, "grad_norm": 1.8114877939224243, "learning_rate": 8.274093284273819e-06, "loss": 0.7002, "step": 3812 }, { "epoch": 0.568722499813558, "grad_norm": 1.3924981355667114, "learning_rate": 8.26933425988884e-06, "loss": 0.7347, "step": 3813 }, { "epoch": 0.5688716533671415, "grad_norm": 1.9740315675735474, "learning_rate": 8.264575639538873e-06, "loss": 0.6555, "step": 3814 }, { "epoch": 0.5690208069207249, "grad_norm": 1.8530036211013794, "learning_rate": 8.259817424334851e-06, "loss": 0.7138, "step": 3815 }, { "epoch": 0.5691699604743083, "grad_norm": 1.9462255239486694, "learning_rate": 8.255059615387606e-06, "loss": 0.7696, "step": 3816 }, { "epoch": 0.5693191140278917, "grad_norm": 2.0976734161376953, "learning_rate": 8.250302213807886e-06, "loss": 0.7614, "step": 3817 }, { "epoch": 0.5694682675814752, "grad_norm": 1.4978973865509033, "learning_rate": 8.245545220706334e-06, "loss": 0.7043, "step": 3818 }, { "epoch": 0.5696174211350585, "grad_norm": 1.6699931621551514, "learning_rate": 8.2407886371935e-06, "loss": 0.6534, "step": 3819 }, { "epoch": 0.569766574688642, "grad_norm": 1.814379334449768, "learning_rate": 8.236032464379838e-06, "loss": 0.6306, "step": 3820 }, { "epoch": 0.5699157282422254, "grad_norm": 1.7858954668045044, "learning_rate": 8.231276703375708e-06, "loss": 0.5822, "step": 3821 }, { "epoch": 0.5700648817958088, "grad_norm": 2.029982328414917, "learning_rate": 8.226521355291372e-06, "loss": 0.6538, "step": 3822 }, { "epoch": 0.5702140353493922, "grad_norm": 1.6219335794448853, "learning_rate": 8.221766421237e-06, "loss": 0.6683, "step": 3823 }, { "epoch": 0.5703631889029757, "grad_norm": 1.505981683731079, "learning_rate": 8.217011902322656e-06, "loss": 0.7323, "step": 3824 }, { "epoch": 0.570512342456559, "grad_norm": 1.9916837215423584, "learning_rate": 8.212257799658315e-06, "loss": 0.7234, "step": 3825 }, { "epoch": 0.5706614960101425, "grad_norm": 2.208219051361084, "learning_rate": 8.207504114353854e-06, "loss": 0.6361, "step": 3826 }, { "epoch": 0.5708106495637258, "grad_norm": 1.5548590421676636, "learning_rate": 8.202750847519055e-06, "loss": 0.7277, "step": 3827 }, { "epoch": 0.5709598031173093, "grad_norm": 1.6686533689498901, "learning_rate": 8.197998000263591e-06, "loss": 0.6463, "step": 3828 }, { "epoch": 0.5711089566708927, "grad_norm": 2.050408363342285, "learning_rate": 8.193245573697051e-06, "loss": 0.6735, "step": 3829 }, { "epoch": 0.5712581102244761, "grad_norm": 1.659718632698059, "learning_rate": 8.188493568928916e-06, "loss": 0.657, "step": 3830 }, { "epoch": 0.5714072637780595, "grad_norm": 2.3099021911621094, "learning_rate": 8.18374198706857e-06, "loss": 0.5988, "step": 3831 }, { "epoch": 0.571556417331643, "grad_norm": 1.969160795211792, "learning_rate": 8.178990829225308e-06, "loss": 0.5661, "step": 3832 }, { "epoch": 0.5717055708852263, "grad_norm": 1.7593683004379272, "learning_rate": 8.17424009650831e-06, "loss": 0.7768, "step": 3833 }, { "epoch": 0.5718547244388098, "grad_norm": 2.286020278930664, "learning_rate": 8.169489790026664e-06, "loss": 0.6553, "step": 3834 }, { "epoch": 0.5720038779923932, "grad_norm": 1.6840384006500244, "learning_rate": 8.16473991088937e-06, "loss": 0.7407, "step": 3835 }, { "epoch": 0.5721530315459766, "grad_norm": 1.726454257965088, "learning_rate": 8.159990460205312e-06, "loss": 0.7036, "step": 3836 }, { "epoch": 0.57230218509956, "grad_norm": 1.8046543598175049, "learning_rate": 8.155241439083277e-06, "loss": 0.6912, "step": 3837 }, { "epoch": 0.5724513386531435, "grad_norm": 1.4980729818344116, "learning_rate": 8.150492848631958e-06, "loss": 0.7574, "step": 3838 }, { "epoch": 0.5726004922067268, "grad_norm": 1.9585530757904053, "learning_rate": 8.14574468995994e-06, "loss": 0.6868, "step": 3839 }, { "epoch": 0.5727496457603103, "grad_norm": 1.6419490575790405, "learning_rate": 8.140996964175716e-06, "loss": 0.6649, "step": 3840 }, { "epoch": 0.5728987993138936, "grad_norm": 2.057501792907715, "learning_rate": 8.136249672387673e-06, "loss": 0.7092, "step": 3841 }, { "epoch": 0.5730479528674771, "grad_norm": 1.2174545526504517, "learning_rate": 8.131502815704087e-06, "loss": 0.7389, "step": 3842 }, { "epoch": 0.5731971064210605, "grad_norm": 3.4706499576568604, "learning_rate": 8.126756395233154e-06, "loss": 0.7234, "step": 3843 }, { "epoch": 0.573346259974644, "grad_norm": 0.6159731149673462, "learning_rate": 8.122010412082952e-06, "loss": 0.2743, "step": 3844 }, { "epoch": 0.5734954135282273, "grad_norm": 1.5970946550369263, "learning_rate": 8.117264867361461e-06, "loss": 0.7593, "step": 3845 }, { "epoch": 0.5736445670818108, "grad_norm": 1.765227198600769, "learning_rate": 8.112519762176559e-06, "loss": 0.7236, "step": 3846 }, { "epoch": 0.5737937206353941, "grad_norm": 2.000227212905884, "learning_rate": 8.107775097636023e-06, "loss": 0.7502, "step": 3847 }, { "epoch": 0.5739428741889776, "grad_norm": 2.1018924713134766, "learning_rate": 8.103030874847521e-06, "loss": 0.6745, "step": 3848 }, { "epoch": 0.574092027742561, "grad_norm": 1.8911349773406982, "learning_rate": 8.098287094918625e-06, "loss": 0.6728, "step": 3849 }, { "epoch": 0.5742411812961444, "grad_norm": 1.4128532409667969, "learning_rate": 8.093543758956802e-06, "loss": 0.7017, "step": 3850 }, { "epoch": 0.5743903348497278, "grad_norm": 1.4283119440078735, "learning_rate": 8.088800868069406e-06, "loss": 0.7231, "step": 3851 }, { "epoch": 0.5745394884033113, "grad_norm": 1.60612952709198, "learning_rate": 8.084058423363709e-06, "loss": 0.7372, "step": 3852 }, { "epoch": 0.5746886419568946, "grad_norm": 1.6442772150039673, "learning_rate": 8.079316425946858e-06, "loss": 0.7299, "step": 3853 }, { "epoch": 0.5748377955104781, "grad_norm": 1.49176025390625, "learning_rate": 8.0745748769259e-06, "loss": 0.8225, "step": 3854 }, { "epoch": 0.5749869490640614, "grad_norm": 2.1827163696289062, "learning_rate": 8.069833777407786e-06, "loss": 0.7776, "step": 3855 }, { "epoch": 0.5751361026176449, "grad_norm": 1.3986265659332275, "learning_rate": 8.065093128499351e-06, "loss": 0.7706, "step": 3856 }, { "epoch": 0.5752852561712283, "grad_norm": 2.121901035308838, "learning_rate": 8.060352931307332e-06, "loss": 0.743, "step": 3857 }, { "epoch": 0.5754344097248117, "grad_norm": 0.5063158869743347, "learning_rate": 8.055613186938357e-06, "loss": 0.2524, "step": 3858 }, { "epoch": 0.5755835632783951, "grad_norm": 1.2531960010528564, "learning_rate": 8.050873896498955e-06, "loss": 0.7682, "step": 3859 }, { "epoch": 0.5757327168319786, "grad_norm": 2.8251235485076904, "learning_rate": 8.046135061095534e-06, "loss": 0.6358, "step": 3860 }, { "epoch": 0.5758818703855619, "grad_norm": 1.6541701555252075, "learning_rate": 8.041396681834415e-06, "loss": 0.7119, "step": 3861 }, { "epoch": 0.5760310239391454, "grad_norm": 2.131650924682617, "learning_rate": 8.036658759821799e-06, "loss": 0.6611, "step": 3862 }, { "epoch": 0.5761801774927288, "grad_norm": 1.411014437675476, "learning_rate": 8.031921296163785e-06, "loss": 0.6483, "step": 3863 }, { "epoch": 0.5763293310463122, "grad_norm": 2.2336323261260986, "learning_rate": 8.027184291966361e-06, "loss": 0.6545, "step": 3864 }, { "epoch": 0.5764784845998956, "grad_norm": 2.567856550216675, "learning_rate": 8.022447748335418e-06, "loss": 0.7371, "step": 3865 }, { "epoch": 0.5766276381534791, "grad_norm": 2.0339434146881104, "learning_rate": 8.017711666376726e-06, "loss": 0.6479, "step": 3866 }, { "epoch": 0.5767767917070624, "grad_norm": 1.7938176393508911, "learning_rate": 8.012976047195955e-06, "loss": 0.7025, "step": 3867 }, { "epoch": 0.5769259452606459, "grad_norm": 1.9182754755020142, "learning_rate": 8.00824089189867e-06, "loss": 0.6879, "step": 3868 }, { "epoch": 0.5770750988142292, "grad_norm": 1.6121547222137451, "learning_rate": 8.003506201590315e-06, "loss": 0.707, "step": 3869 }, { "epoch": 0.5772242523678127, "grad_norm": 0.5666581392288208, "learning_rate": 7.99877197737624e-06, "loss": 0.2761, "step": 3870 }, { "epoch": 0.5773734059213961, "grad_norm": 1.5676562786102295, "learning_rate": 7.994038220361682e-06, "loss": 0.7058, "step": 3871 }, { "epoch": 0.5775225594749795, "grad_norm": 2.61928391456604, "learning_rate": 7.989304931651763e-06, "loss": 0.7445, "step": 3872 }, { "epoch": 0.5776717130285629, "grad_norm": 1.6773537397384644, "learning_rate": 7.984572112351499e-06, "loss": 0.769, "step": 3873 }, { "epoch": 0.5778208665821463, "grad_norm": 1.635866403579712, "learning_rate": 7.9798397635658e-06, "loss": 0.7488, "step": 3874 }, { "epoch": 0.5779700201357297, "grad_norm": 2.0060019493103027, "learning_rate": 7.975107886399457e-06, "loss": 0.5951, "step": 3875 }, { "epoch": 0.5781191736893131, "grad_norm": 1.8277469873428345, "learning_rate": 7.970376481957166e-06, "loss": 0.7364, "step": 3876 }, { "epoch": 0.5782683272428966, "grad_norm": 1.9141792058944702, "learning_rate": 7.965645551343497e-06, "loss": 0.6351, "step": 3877 }, { "epoch": 0.5784174807964799, "grad_norm": 2.443897008895874, "learning_rate": 7.960915095662922e-06, "loss": 0.624, "step": 3878 }, { "epoch": 0.5785666343500634, "grad_norm": 1.3098751306533813, "learning_rate": 7.956185116019787e-06, "loss": 0.8482, "step": 3879 }, { "epoch": 0.5787157879036468, "grad_norm": 2.0621554851531982, "learning_rate": 7.951455613518348e-06, "loss": 0.6235, "step": 3880 }, { "epoch": 0.5788649414572302, "grad_norm": 1.8850924968719482, "learning_rate": 7.946726589262726e-06, "loss": 0.7082, "step": 3881 }, { "epoch": 0.5790140950108136, "grad_norm": 1.3871525526046753, "learning_rate": 7.941998044356951e-06, "loss": 0.8156, "step": 3882 }, { "epoch": 0.579163248564397, "grad_norm": 1.9429855346679688, "learning_rate": 7.937269979904928e-06, "loss": 0.7018, "step": 3883 }, { "epoch": 0.5793124021179804, "grad_norm": 1.6200242042541504, "learning_rate": 7.932542397010453e-06, "loss": 0.6777, "step": 3884 }, { "epoch": 0.5794615556715639, "grad_norm": 2.571787118911743, "learning_rate": 7.927815296777216e-06, "loss": 0.6564, "step": 3885 }, { "epoch": 0.5796107092251472, "grad_norm": 0.5711537003517151, "learning_rate": 7.923088680308777e-06, "loss": 0.2475, "step": 3886 }, { "epoch": 0.5797598627787307, "grad_norm": 5.121997833251953, "learning_rate": 7.918362548708607e-06, "loss": 0.6736, "step": 3887 }, { "epoch": 0.5799090163323141, "grad_norm": 1.4316216707229614, "learning_rate": 7.91363690308005e-06, "loss": 0.5896, "step": 3888 }, { "epoch": 0.5800581698858975, "grad_norm": 2.9267826080322266, "learning_rate": 7.908911744526334e-06, "loss": 0.7031, "step": 3889 }, { "epoch": 0.5802073234394809, "grad_norm": 1.368135690689087, "learning_rate": 7.90418707415058e-06, "loss": 0.7437, "step": 3890 }, { "epoch": 0.5803564769930644, "grad_norm": 1.422475814819336, "learning_rate": 7.899462893055792e-06, "loss": 0.7709, "step": 3891 }, { "epoch": 0.5805056305466477, "grad_norm": 1.6914973258972168, "learning_rate": 7.894739202344857e-06, "loss": 0.6619, "step": 3892 }, { "epoch": 0.5806547841002312, "grad_norm": 1.3406238555908203, "learning_rate": 7.890016003120559e-06, "loss": 0.784, "step": 3893 }, { "epoch": 0.5808039376538146, "grad_norm": 1.7352644205093384, "learning_rate": 7.885293296485551e-06, "loss": 0.6311, "step": 3894 }, { "epoch": 0.580953091207398, "grad_norm": 1.508895754814148, "learning_rate": 7.880571083542381e-06, "loss": 0.6557, "step": 3895 }, { "epoch": 0.5811022447609814, "grad_norm": 2.0226972103118896, "learning_rate": 7.875849365393484e-06, "loss": 0.7239, "step": 3896 }, { "epoch": 0.5812513983145648, "grad_norm": 1.7025877237319946, "learning_rate": 7.871128143141175e-06, "loss": 0.7169, "step": 3897 }, { "epoch": 0.5814005518681482, "grad_norm": 2.6978237628936768, "learning_rate": 7.866407417887647e-06, "loss": 0.6499, "step": 3898 }, { "epoch": 0.5815497054217317, "grad_norm": 2.5355308055877686, "learning_rate": 7.861687190734992e-06, "loss": 0.6982, "step": 3899 }, { "epoch": 0.581698858975315, "grad_norm": 2.611577033996582, "learning_rate": 7.85696746278517e-06, "loss": 0.6738, "step": 3900 }, { "epoch": 0.5818480125288985, "grad_norm": 1.617555022239685, "learning_rate": 7.852248235140038e-06, "loss": 0.7284, "step": 3901 }, { "epoch": 0.5819971660824819, "grad_norm": 1.7125986814498901, "learning_rate": 7.847529508901327e-06, "loss": 0.7173, "step": 3902 }, { "epoch": 0.5821463196360653, "grad_norm": 1.645580768585205, "learning_rate": 7.84281128517065e-06, "loss": 0.6385, "step": 3903 }, { "epoch": 0.5822954731896487, "grad_norm": 1.5958614349365234, "learning_rate": 7.83809356504951e-06, "loss": 0.6817, "step": 3904 }, { "epoch": 0.5824446267432322, "grad_norm": 2.0625507831573486, "learning_rate": 7.833376349639295e-06, "loss": 0.7283, "step": 3905 }, { "epoch": 0.5825937802968155, "grad_norm": 0.5585536956787109, "learning_rate": 7.82865964004126e-06, "loss": 0.2641, "step": 3906 }, { "epoch": 0.582742933850399, "grad_norm": 1.963940978050232, "learning_rate": 7.823943437356556e-06, "loss": 0.6844, "step": 3907 }, { "epoch": 0.5828920874039824, "grad_norm": 1.27061927318573, "learning_rate": 7.81922774268621e-06, "loss": 0.632, "step": 3908 }, { "epoch": 0.5830412409575658, "grad_norm": 2.009888172149658, "learning_rate": 7.81451255713113e-06, "loss": 0.7838, "step": 3909 }, { "epoch": 0.5831903945111492, "grad_norm": 1.7355901002883911, "learning_rate": 7.809797881792108e-06, "loss": 0.669, "step": 3910 }, { "epoch": 0.5833395480647326, "grad_norm": 0.5221549272537231, "learning_rate": 7.80508371776981e-06, "loss": 0.2414, "step": 3911 }, { "epoch": 0.583488701618316, "grad_norm": 3.546488046646118, "learning_rate": 7.800370066164793e-06, "loss": 0.6969, "step": 3912 }, { "epoch": 0.5836378551718995, "grad_norm": 1.485827922821045, "learning_rate": 7.79565692807749e-06, "loss": 0.699, "step": 3913 }, { "epoch": 0.5837870087254828, "grad_norm": 1.3516594171524048, "learning_rate": 7.790944304608214e-06, "loss": 0.7403, "step": 3914 }, { "epoch": 0.5839361622790663, "grad_norm": 1.553840160369873, "learning_rate": 7.786232196857151e-06, "loss": 0.7555, "step": 3915 }, { "epoch": 0.5840853158326497, "grad_norm": 1.5586384534835815, "learning_rate": 7.781520605924378e-06, "loss": 0.7268, "step": 3916 }, { "epoch": 0.5842344693862331, "grad_norm": 2.004723310470581, "learning_rate": 7.776809532909843e-06, "loss": 0.5683, "step": 3917 }, { "epoch": 0.5843836229398165, "grad_norm": 1.3750096559524536, "learning_rate": 7.772098978913381e-06, "loss": 0.7088, "step": 3918 }, { "epoch": 0.5845327764934, "grad_norm": 1.7664002180099487, "learning_rate": 7.767388945034695e-06, "loss": 0.6055, "step": 3919 }, { "epoch": 0.5846819300469833, "grad_norm": 1.597292423248291, "learning_rate": 7.762679432373376e-06, "loss": 0.7213, "step": 3920 }, { "epoch": 0.5848310836005668, "grad_norm": 1.3283518552780151, "learning_rate": 7.757970442028886e-06, "loss": 0.7873, "step": 3921 }, { "epoch": 0.5849802371541502, "grad_norm": 1.7736663818359375, "learning_rate": 7.753261975100577e-06, "loss": 0.6425, "step": 3922 }, { "epoch": 0.5851293907077336, "grad_norm": 1.6303248405456543, "learning_rate": 7.748554032687664e-06, "loss": 0.7227, "step": 3923 }, { "epoch": 0.585278544261317, "grad_norm": 1.9928339719772339, "learning_rate": 7.74384661588925e-06, "loss": 0.6248, "step": 3924 }, { "epoch": 0.5854276978149004, "grad_norm": 1.9333276748657227, "learning_rate": 7.73913972580431e-06, "loss": 0.6996, "step": 3925 }, { "epoch": 0.5855768513684838, "grad_norm": 0.5217165350914001, "learning_rate": 7.734433363531694e-06, "loss": 0.2624, "step": 3926 }, { "epoch": 0.5857260049220673, "grad_norm": 2.9917964935302734, "learning_rate": 7.729727530170141e-06, "loss": 0.6538, "step": 3927 }, { "epoch": 0.5858751584756506, "grad_norm": 1.3708359003067017, "learning_rate": 7.72502222681825e-06, "loss": 0.6808, "step": 3928 }, { "epoch": 0.5860243120292341, "grad_norm": 1.6151431798934937, "learning_rate": 7.72031745457451e-06, "loss": 0.7165, "step": 3929 }, { "epoch": 0.5861734655828175, "grad_norm": 1.4086570739746094, "learning_rate": 7.715613214537272e-06, "loss": 0.6769, "step": 3930 }, { "epoch": 0.5863226191364009, "grad_norm": 1.6238110065460205, "learning_rate": 7.710909507804782e-06, "loss": 0.7339, "step": 3931 }, { "epoch": 0.5864717726899843, "grad_norm": 2.5790138244628906, "learning_rate": 7.706206335475143e-06, "loss": 0.7208, "step": 3932 }, { "epoch": 0.5866209262435678, "grad_norm": 13.168998718261719, "learning_rate": 7.701503698646345e-06, "loss": 0.7036, "step": 3933 }, { "epoch": 0.5867700797971511, "grad_norm": 1.8345178365707397, "learning_rate": 7.696801598416245e-06, "loss": 0.7382, "step": 3934 }, { "epoch": 0.5869192333507346, "grad_norm": 2.5030879974365234, "learning_rate": 7.692100035882581e-06, "loss": 0.7602, "step": 3935 }, { "epoch": 0.587068386904318, "grad_norm": 3.889665126800537, "learning_rate": 7.687399012142964e-06, "loss": 0.6348, "step": 3936 }, { "epoch": 0.5872175404579014, "grad_norm": 1.68043053150177, "learning_rate": 7.682698528294872e-06, "loss": 0.6634, "step": 3937 }, { "epoch": 0.5873666940114848, "grad_norm": 2.244962453842163, "learning_rate": 7.677998585435669e-06, "loss": 0.7436, "step": 3938 }, { "epoch": 0.5875158475650682, "grad_norm": 1.6625018119812012, "learning_rate": 7.673299184662582e-06, "loss": 0.666, "step": 3939 }, { "epoch": 0.5876650011186516, "grad_norm": 1.4299719333648682, "learning_rate": 7.668600327072721e-06, "loss": 0.7623, "step": 3940 }, { "epoch": 0.5878141546722351, "grad_norm": 1.7078981399536133, "learning_rate": 7.663902013763064e-06, "loss": 0.726, "step": 3941 }, { "epoch": 0.5879633082258184, "grad_norm": 2.9367806911468506, "learning_rate": 7.65920424583046e-06, "loss": 0.6007, "step": 3942 }, { "epoch": 0.5881124617794019, "grad_norm": 1.931456446647644, "learning_rate": 7.654507024371635e-06, "loss": 0.6706, "step": 3943 }, { "epoch": 0.5882616153329853, "grad_norm": 1.4188965559005737, "learning_rate": 7.649810350483187e-06, "loss": 0.7614, "step": 3944 }, { "epoch": 0.5884107688865687, "grad_norm": 1.7989565134048462, "learning_rate": 7.645114225261577e-06, "loss": 0.7147, "step": 3945 }, { "epoch": 0.5885599224401521, "grad_norm": 1.5966877937316895, "learning_rate": 7.640418649803155e-06, "loss": 0.8113, "step": 3946 }, { "epoch": 0.5887090759937356, "grad_norm": 1.8143314123153687, "learning_rate": 7.635723625204124e-06, "loss": 0.6697, "step": 3947 }, { "epoch": 0.5888582295473189, "grad_norm": 3.620640516281128, "learning_rate": 7.631029152560574e-06, "loss": 0.7066, "step": 3948 }, { "epoch": 0.5890073831009024, "grad_norm": 0.519180417060852, "learning_rate": 7.62633523296846e-06, "loss": 0.2351, "step": 3949 }, { "epoch": 0.5891565366544858, "grad_norm": 1.5886335372924805, "learning_rate": 7.621641867523608e-06, "loss": 0.7264, "step": 3950 }, { "epoch": 0.5893056902080692, "grad_norm": 2.298377513885498, "learning_rate": 7.6169490573217085e-06, "loss": 0.7343, "step": 3951 }, { "epoch": 0.5894548437616526, "grad_norm": 1.9720733165740967, "learning_rate": 7.612256803458335e-06, "loss": 0.7039, "step": 3952 }, { "epoch": 0.589603997315236, "grad_norm": 2.3261306285858154, "learning_rate": 7.607565107028918e-06, "loss": 0.7756, "step": 3953 }, { "epoch": 0.5897531508688194, "grad_norm": 1.556676983833313, "learning_rate": 7.602873969128769e-06, "loss": 0.7576, "step": 3954 }, { "epoch": 0.5899023044224029, "grad_norm": 1.8181921243667603, "learning_rate": 7.598183390853063e-06, "loss": 0.6233, "step": 3955 }, { "epoch": 0.5900514579759862, "grad_norm": 1.6347100734710693, "learning_rate": 7.593493373296841e-06, "loss": 0.629, "step": 3956 }, { "epoch": 0.5902006115295697, "grad_norm": 0.5270280241966248, "learning_rate": 7.588803917555023e-06, "loss": 0.2425, "step": 3957 }, { "epoch": 0.5903497650831531, "grad_norm": 2.047816753387451, "learning_rate": 7.584115024722392e-06, "loss": 0.6813, "step": 3958 }, { "epoch": 0.5904989186367365, "grad_norm": 2.3577768802642822, "learning_rate": 7.579426695893599e-06, "loss": 0.6482, "step": 3959 }, { "epoch": 0.5906480721903199, "grad_norm": 1.714807152748108, "learning_rate": 7.574738932163167e-06, "loss": 0.7281, "step": 3960 }, { "epoch": 0.5907972257439034, "grad_norm": 1.4603724479675293, "learning_rate": 7.570051734625481e-06, "loss": 0.7266, "step": 3961 }, { "epoch": 0.5909463792974867, "grad_norm": 1.806136965751648, "learning_rate": 7.565365104374798e-06, "loss": 0.7035, "step": 3962 }, { "epoch": 0.5910955328510702, "grad_norm": 1.7390921115875244, "learning_rate": 7.560679042505242e-06, "loss": 0.7042, "step": 3963 }, { "epoch": 0.5912446864046536, "grad_norm": 1.1908626556396484, "learning_rate": 7.555993550110805e-06, "loss": 0.7412, "step": 3964 }, { "epoch": 0.591393839958237, "grad_norm": 1.6944888830184937, "learning_rate": 7.551308628285341e-06, "loss": 0.7813, "step": 3965 }, { "epoch": 0.5915429935118204, "grad_norm": 1.6120522022247314, "learning_rate": 7.546624278122583e-06, "loss": 0.6354, "step": 3966 }, { "epoch": 0.5916921470654039, "grad_norm": 1.8322749137878418, "learning_rate": 7.5419405007161195e-06, "loss": 0.7961, "step": 3967 }, { "epoch": 0.5918413006189872, "grad_norm": 2.378995418548584, "learning_rate": 7.537257297159404e-06, "loss": 0.7474, "step": 3968 }, { "epoch": 0.5919904541725707, "grad_norm": 2.047290325164795, "learning_rate": 7.532574668545767e-06, "loss": 0.6386, "step": 3969 }, { "epoch": 0.592139607726154, "grad_norm": 1.9097446203231812, "learning_rate": 7.527892615968392e-06, "loss": 0.7239, "step": 3970 }, { "epoch": 0.5922887612797375, "grad_norm": 1.859856367111206, "learning_rate": 7.523211140520339e-06, "loss": 0.7229, "step": 3971 }, { "epoch": 0.5924379148333209, "grad_norm": 1.4817503690719604, "learning_rate": 7.518530243294526e-06, "loss": 0.6789, "step": 3972 }, { "epoch": 0.5925870683869043, "grad_norm": 1.8249187469482422, "learning_rate": 7.513849925383736e-06, "loss": 0.653, "step": 3973 }, { "epoch": 0.5927362219404877, "grad_norm": 3.002032518386841, "learning_rate": 7.509170187880623e-06, "loss": 0.6786, "step": 3974 }, { "epoch": 0.5928853754940712, "grad_norm": 1.4238044023513794, "learning_rate": 7.504491031877704e-06, "loss": 0.7145, "step": 3975 }, { "epoch": 0.5930345290476545, "grad_norm": 2.4355671405792236, "learning_rate": 7.499812458467353e-06, "loss": 0.6902, "step": 3976 }, { "epoch": 0.593183682601238, "grad_norm": 1.8129183053970337, "learning_rate": 7.495134468741816e-06, "loss": 0.7982, "step": 3977 }, { "epoch": 0.5933328361548214, "grad_norm": 2.482367515563965, "learning_rate": 7.490457063793199e-06, "loss": 0.5918, "step": 3978 }, { "epoch": 0.5934819897084048, "grad_norm": 1.2964963912963867, "learning_rate": 7.4857802447134706e-06, "loss": 0.7367, "step": 3979 }, { "epoch": 0.5936311432619882, "grad_norm": 0.5823512077331543, "learning_rate": 7.481104012594466e-06, "loss": 0.2671, "step": 3980 }, { "epoch": 0.5937802968155717, "grad_norm": 1.9798545837402344, "learning_rate": 7.476428368527879e-06, "loss": 0.7002, "step": 3981 }, { "epoch": 0.593929450369155, "grad_norm": 1.3457262516021729, "learning_rate": 7.47175331360527e-06, "loss": 0.8085, "step": 3982 }, { "epoch": 0.5940786039227385, "grad_norm": 0.5372493863105774, "learning_rate": 7.467078848918065e-06, "loss": 0.2641, "step": 3983 }, { "epoch": 0.5942277574763218, "grad_norm": 3.2242746353149414, "learning_rate": 7.46240497555754e-06, "loss": 0.7073, "step": 3984 }, { "epoch": 0.5943769110299053, "grad_norm": 2.385279417037964, "learning_rate": 7.457731694614848e-06, "loss": 0.7703, "step": 3985 }, { "epoch": 0.5945260645834887, "grad_norm": 1.6394786834716797, "learning_rate": 7.453059007180994e-06, "loss": 0.7722, "step": 3986 }, { "epoch": 0.5946752181370721, "grad_norm": 2.1070995330810547, "learning_rate": 7.448386914346842e-06, "loss": 0.647, "step": 3987 }, { "epoch": 0.5948243716906555, "grad_norm": 1.4018882513046265, "learning_rate": 7.443715417203128e-06, "loss": 0.7484, "step": 3988 }, { "epoch": 0.594973525244239, "grad_norm": 2.0480270385742188, "learning_rate": 7.439044516840439e-06, "loss": 0.8151, "step": 3989 }, { "epoch": 0.5951226787978223, "grad_norm": 1.8082526922225952, "learning_rate": 7.434374214349232e-06, "loss": 0.781, "step": 3990 }, { "epoch": 0.5952718323514058, "grad_norm": 1.6899211406707764, "learning_rate": 7.42970451081981e-06, "loss": 0.652, "step": 3991 }, { "epoch": 0.5954209859049892, "grad_norm": 1.8148953914642334, "learning_rate": 7.425035407342355e-06, "loss": 0.7133, "step": 3992 }, { "epoch": 0.5955701394585726, "grad_norm": 2.8260414600372314, "learning_rate": 7.420366905006893e-06, "loss": 0.6723, "step": 3993 }, { "epoch": 0.595719293012156, "grad_norm": 1.3879754543304443, "learning_rate": 7.415699004903319e-06, "loss": 0.7286, "step": 3994 }, { "epoch": 0.5958684465657395, "grad_norm": 1.9358408451080322, "learning_rate": 7.4110317081213825e-06, "loss": 0.6704, "step": 3995 }, { "epoch": 0.5960176001193228, "grad_norm": 1.586374044418335, "learning_rate": 7.406365015750696e-06, "loss": 0.7374, "step": 3996 }, { "epoch": 0.5961667536729063, "grad_norm": 1.4313595294952393, "learning_rate": 7.401698928880726e-06, "loss": 0.7583, "step": 3997 }, { "epoch": 0.5963159072264896, "grad_norm": 3.7822282314300537, "learning_rate": 7.3970334486008e-06, "loss": 0.6891, "step": 3998 }, { "epoch": 0.5964650607800731, "grad_norm": 1.9363207817077637, "learning_rate": 7.3923685760001085e-06, "loss": 0.7012, "step": 3999 }, { "epoch": 0.5966142143336565, "grad_norm": 3.017584800720215, "learning_rate": 7.387704312167687e-06, "loss": 0.6445, "step": 4000 }, { "epoch": 0.5967633678872399, "grad_norm": 2.0005428791046143, "learning_rate": 7.383040658192449e-06, "loss": 0.7019, "step": 4001 }, { "epoch": 0.5969125214408233, "grad_norm": 1.6618291139602661, "learning_rate": 7.378377615163148e-06, "loss": 0.757, "step": 4002 }, { "epoch": 0.5970616749944068, "grad_norm": 1.6044782400131226, "learning_rate": 7.373715184168405e-06, "loss": 0.7232, "step": 4003 }, { "epoch": 0.5972108285479901, "grad_norm": 1.223394751548767, "learning_rate": 7.36905336629669e-06, "loss": 0.6483, "step": 4004 }, { "epoch": 0.5973599821015736, "grad_norm": 1.6144193410873413, "learning_rate": 7.364392162636338e-06, "loss": 0.7855, "step": 4005 }, { "epoch": 0.597509135655157, "grad_norm": 2.4348442554473877, "learning_rate": 7.359731574275533e-06, "loss": 0.7962, "step": 4006 }, { "epoch": 0.5976582892087404, "grad_norm": 1.47750985622406, "learning_rate": 7.355071602302324e-06, "loss": 0.6769, "step": 4007 }, { "epoch": 0.5978074427623238, "grad_norm": 1.4105534553527832, "learning_rate": 7.350412247804603e-06, "loss": 0.7758, "step": 4008 }, { "epoch": 0.5979565963159073, "grad_norm": 1.5183075666427612, "learning_rate": 7.345753511870139e-06, "loss": 0.6623, "step": 4009 }, { "epoch": 0.5981057498694906, "grad_norm": 2.0407514572143555, "learning_rate": 7.3410953955865324e-06, "loss": 0.6881, "step": 4010 }, { "epoch": 0.5982549034230741, "grad_norm": 1.420008897781372, "learning_rate": 7.336437900041258e-06, "loss": 0.751, "step": 4011 }, { "epoch": 0.5984040569766574, "grad_norm": 1.766586422920227, "learning_rate": 7.331781026321631e-06, "loss": 0.6877, "step": 4012 }, { "epoch": 0.5985532105302409, "grad_norm": 1.3281668424606323, "learning_rate": 7.327124775514837e-06, "loss": 0.7181, "step": 4013 }, { "epoch": 0.5987023640838243, "grad_norm": 1.1739262342453003, "learning_rate": 7.3224691487079e-06, "loss": 0.6604, "step": 4014 }, { "epoch": 0.5988515176374077, "grad_norm": 2.749162435531616, "learning_rate": 7.317814146987708e-06, "loss": 0.7244, "step": 4015 }, { "epoch": 0.5990006711909911, "grad_norm": 1.4995930194854736, "learning_rate": 7.313159771441003e-06, "loss": 0.711, "step": 4016 }, { "epoch": 0.5991498247445746, "grad_norm": 1.6344549655914307, "learning_rate": 7.308506023154375e-06, "loss": 0.7146, "step": 4017 }, { "epoch": 0.5992989782981579, "grad_norm": 2.2936196327209473, "learning_rate": 7.303852903214274e-06, "loss": 0.6663, "step": 4018 }, { "epoch": 0.5994481318517414, "grad_norm": 0.5837433338165283, "learning_rate": 7.299200412707004e-06, "loss": 0.2656, "step": 4019 }, { "epoch": 0.5995972854053248, "grad_norm": 2.202550172805786, "learning_rate": 7.294548552718714e-06, "loss": 0.6719, "step": 4020 }, { "epoch": 0.5997464389589082, "grad_norm": 2.0357911586761475, "learning_rate": 7.289897324335411e-06, "loss": 0.6342, "step": 4021 }, { "epoch": 0.5998955925124916, "grad_norm": 1.4275243282318115, "learning_rate": 7.285246728642956e-06, "loss": 0.7378, "step": 4022 }, { "epoch": 0.600044746066075, "grad_norm": 1.3783434629440308, "learning_rate": 7.280596766727057e-06, "loss": 0.7595, "step": 4023 }, { "epoch": 0.6001938996196584, "grad_norm": 2.2552006244659424, "learning_rate": 7.2759474396732835e-06, "loss": 0.646, "step": 4024 }, { "epoch": 0.6003430531732419, "grad_norm": 1.4987882375717163, "learning_rate": 7.271298748567043e-06, "loss": 0.7015, "step": 4025 }, { "epoch": 0.6004922067268252, "grad_norm": 1.512794852256775, "learning_rate": 7.2666506944936045e-06, "loss": 0.72, "step": 4026 }, { "epoch": 0.6006413602804087, "grad_norm": 2.4998366832733154, "learning_rate": 7.262003278538092e-06, "loss": 0.6063, "step": 4027 }, { "epoch": 0.6007905138339921, "grad_norm": 1.4216142892837524, "learning_rate": 7.25735650178547e-06, "loss": 0.6931, "step": 4028 }, { "epoch": 0.6009396673875755, "grad_norm": 1.6273059844970703, "learning_rate": 7.252710365320557e-06, "loss": 0.6233, "step": 4029 }, { "epoch": 0.6010888209411589, "grad_norm": 1.3127095699310303, "learning_rate": 7.248064870228028e-06, "loss": 0.7042, "step": 4030 }, { "epoch": 0.6012379744947424, "grad_norm": 1.598621129989624, "learning_rate": 7.243420017592397e-06, "loss": 0.6544, "step": 4031 }, { "epoch": 0.6013871280483257, "grad_norm": 1.6604175567626953, "learning_rate": 7.2387758084980405e-06, "loss": 0.6609, "step": 4032 }, { "epoch": 0.6015362816019092, "grad_norm": 1.9978601932525635, "learning_rate": 7.234132244029177e-06, "loss": 0.7008, "step": 4033 }, { "epoch": 0.6016854351554926, "grad_norm": 1.8154696226119995, "learning_rate": 7.229489325269874e-06, "loss": 0.6922, "step": 4034 }, { "epoch": 0.601834588709076, "grad_norm": 1.325527310371399, "learning_rate": 7.224847053304049e-06, "loss": 0.6782, "step": 4035 }, { "epoch": 0.6019837422626594, "grad_norm": 2.1064717769622803, "learning_rate": 7.22020542921548e-06, "loss": 0.713, "step": 4036 }, { "epoch": 0.6021328958162429, "grad_norm": 1.7215259075164795, "learning_rate": 7.215564454087775e-06, "loss": 0.7376, "step": 4037 }, { "epoch": 0.6022820493698262, "grad_norm": 1.4486976861953735, "learning_rate": 7.210924129004404e-06, "loss": 0.682, "step": 4038 }, { "epoch": 0.6024312029234097, "grad_norm": 15.797464370727539, "learning_rate": 7.206284455048677e-06, "loss": 0.6929, "step": 4039 }, { "epoch": 0.602580356476993, "grad_norm": 1.723679780960083, "learning_rate": 7.2016454333037585e-06, "loss": 0.6961, "step": 4040 }, { "epoch": 0.6027295100305765, "grad_norm": 0.5364365577697754, "learning_rate": 7.1970070648526565e-06, "loss": 0.2483, "step": 4041 }, { "epoch": 0.6028786635841599, "grad_norm": 1.5877629518508911, "learning_rate": 7.1923693507782276e-06, "loss": 0.6689, "step": 4042 }, { "epoch": 0.6030278171377433, "grad_norm": 1.5674418210983276, "learning_rate": 7.187732292163173e-06, "loss": 0.7309, "step": 4043 }, { "epoch": 0.6031769706913267, "grad_norm": 1.4503952264785767, "learning_rate": 7.183095890090052e-06, "loss": 0.7061, "step": 4044 }, { "epoch": 0.6033261242449102, "grad_norm": 2.124354839324951, "learning_rate": 7.178460145641257e-06, "loss": 0.6569, "step": 4045 }, { "epoch": 0.6034752777984935, "grad_norm": 1.7164536714553833, "learning_rate": 7.173825059899031e-06, "loss": 0.6158, "step": 4046 }, { "epoch": 0.603624431352077, "grad_norm": 1.7024486064910889, "learning_rate": 7.1691906339454685e-06, "loss": 0.7181, "step": 4047 }, { "epoch": 0.6037735849056604, "grad_norm": 2.0695152282714844, "learning_rate": 7.164556868862502e-06, "loss": 0.6703, "step": 4048 }, { "epoch": 0.6039227384592438, "grad_norm": 0.5331042408943176, "learning_rate": 7.159923765731917e-06, "loss": 0.2583, "step": 4049 }, { "epoch": 0.6040718920128272, "grad_norm": 1.7795480489730835, "learning_rate": 7.1552913256353405e-06, "loss": 0.6907, "step": 4050 }, { "epoch": 0.6042210455664107, "grad_norm": 1.568605661392212, "learning_rate": 7.150659549654242e-06, "loss": 0.7087, "step": 4051 }, { "epoch": 0.604370199119994, "grad_norm": 1.6556283235549927, "learning_rate": 7.146028438869938e-06, "loss": 0.7818, "step": 4052 }, { "epoch": 0.6045193526735775, "grad_norm": 2.050433874130249, "learning_rate": 7.141397994363602e-06, "loss": 0.7554, "step": 4053 }, { "epoch": 0.6046685062271608, "grad_norm": 1.6040081977844238, "learning_rate": 7.136768217216227e-06, "loss": 0.8029, "step": 4054 }, { "epoch": 0.6048176597807443, "grad_norm": 1.4951035976409912, "learning_rate": 7.132139108508678e-06, "loss": 0.7555, "step": 4055 }, { "epoch": 0.6049668133343277, "grad_norm": 1.5550098419189453, "learning_rate": 7.12751066932164e-06, "loss": 0.6928, "step": 4056 }, { "epoch": 0.6051159668879111, "grad_norm": 1.2981834411621094, "learning_rate": 7.122882900735653e-06, "loss": 0.8071, "step": 4057 }, { "epoch": 0.6052651204414945, "grad_norm": 1.1296002864837646, "learning_rate": 7.118255803831104e-06, "loss": 0.7541, "step": 4058 }, { "epoch": 0.605414273995078, "grad_norm": 7.800332069396973, "learning_rate": 7.113629379688212e-06, "loss": 0.7437, "step": 4059 }, { "epoch": 0.6055634275486613, "grad_norm": 1.6512999534606934, "learning_rate": 7.109003629387052e-06, "loss": 0.7462, "step": 4060 }, { "epoch": 0.6057125811022448, "grad_norm": 2.7030067443847656, "learning_rate": 7.104378554007527e-06, "loss": 0.7033, "step": 4061 }, { "epoch": 0.6058617346558282, "grad_norm": 1.4231940507888794, "learning_rate": 7.099754154629399e-06, "loss": 0.6762, "step": 4062 }, { "epoch": 0.6060108882094116, "grad_norm": 2.09629487991333, "learning_rate": 7.09513043233226e-06, "loss": 0.6257, "step": 4063 }, { "epoch": 0.606160041762995, "grad_norm": 1.9033805131912231, "learning_rate": 7.090507388195549e-06, "loss": 0.7012, "step": 4064 }, { "epoch": 0.6063091953165785, "grad_norm": 2.2056517601013184, "learning_rate": 7.085885023298541e-06, "loss": 0.7161, "step": 4065 }, { "epoch": 0.6064583488701618, "grad_norm": 1.9018460512161255, "learning_rate": 7.081263338720362e-06, "loss": 0.7044, "step": 4066 }, { "epoch": 0.6066075024237453, "grad_norm": 2.1680760383605957, "learning_rate": 7.076642335539969e-06, "loss": 0.7424, "step": 4067 }, { "epoch": 0.6067566559773286, "grad_norm": 2.4540352821350098, "learning_rate": 7.072022014836172e-06, "loss": 0.6702, "step": 4068 }, { "epoch": 0.6069058095309121, "grad_norm": 1.8168591260910034, "learning_rate": 7.0674023776876086e-06, "loss": 0.6389, "step": 4069 }, { "epoch": 0.6070549630844955, "grad_norm": 1.2842986583709717, "learning_rate": 7.062783425172759e-06, "loss": 0.7726, "step": 4070 }, { "epoch": 0.6072041166380789, "grad_norm": 1.7744426727294922, "learning_rate": 7.058165158369955e-06, "loss": 0.7284, "step": 4071 }, { "epoch": 0.6073532701916623, "grad_norm": 1.8909424543380737, "learning_rate": 7.0535475783573606e-06, "loss": 0.673, "step": 4072 }, { "epoch": 0.6075024237452458, "grad_norm": 1.5588964223861694, "learning_rate": 7.048930686212974e-06, "loss": 0.6968, "step": 4073 }, { "epoch": 0.6076515772988291, "grad_norm": 2.2113289833068848, "learning_rate": 7.044314483014642e-06, "loss": 0.7609, "step": 4074 }, { "epoch": 0.6078007308524126, "grad_norm": 1.8253417015075684, "learning_rate": 7.039698969840049e-06, "loss": 0.7395, "step": 4075 }, { "epoch": 0.607949884405996, "grad_norm": 1.7374591827392578, "learning_rate": 7.035084147766709e-06, "loss": 0.7446, "step": 4076 }, { "epoch": 0.6080990379595794, "grad_norm": 1.3961905241012573, "learning_rate": 7.030470017871989e-06, "loss": 0.6651, "step": 4077 }, { "epoch": 0.6082481915131628, "grad_norm": 1.3693164587020874, "learning_rate": 7.025856581233078e-06, "loss": 0.6904, "step": 4078 }, { "epoch": 0.6083973450667463, "grad_norm": 1.7458916902542114, "learning_rate": 7.021243838927021e-06, "loss": 0.7007, "step": 4079 }, { "epoch": 0.6085464986203296, "grad_norm": 1.5940829515457153, "learning_rate": 7.016631792030692e-06, "loss": 0.7672, "step": 4080 }, { "epoch": 0.6086956521739131, "grad_norm": 2.1612017154693604, "learning_rate": 7.012020441620801e-06, "loss": 0.7128, "step": 4081 }, { "epoch": 0.6088448057274964, "grad_norm": 1.374117136001587, "learning_rate": 7.007409788773895e-06, "loss": 0.6882, "step": 4082 }, { "epoch": 0.6089939592810799, "grad_norm": 1.434301495552063, "learning_rate": 7.002799834566365e-06, "loss": 0.7479, "step": 4083 }, { "epoch": 0.6091431128346633, "grad_norm": 1.3231159448623657, "learning_rate": 6.998190580074429e-06, "loss": 0.7156, "step": 4084 }, { "epoch": 0.6092922663882467, "grad_norm": 1.385677695274353, "learning_rate": 6.993582026374152e-06, "loss": 0.7363, "step": 4085 }, { "epoch": 0.6094414199418301, "grad_norm": 1.058082938194275, "learning_rate": 6.988974174541428e-06, "loss": 0.6774, "step": 4086 }, { "epoch": 0.6095905734954136, "grad_norm": 2.0287163257598877, "learning_rate": 6.9843670256519855e-06, "loss": 0.7284, "step": 4087 }, { "epoch": 0.6097397270489969, "grad_norm": 1.596834421157837, "learning_rate": 6.979760580781399e-06, "loss": 0.6957, "step": 4088 }, { "epoch": 0.6098888806025804, "grad_norm": 1.5063209533691406, "learning_rate": 6.975154841005074e-06, "loss": 0.7244, "step": 4089 }, { "epoch": 0.6100380341561638, "grad_norm": 1.7935199737548828, "learning_rate": 6.970549807398244e-06, "loss": 0.7191, "step": 4090 }, { "epoch": 0.6101871877097472, "grad_norm": 1.851332426071167, "learning_rate": 6.965945481035989e-06, "loss": 0.6884, "step": 4091 }, { "epoch": 0.6103363412633306, "grad_norm": 1.5971406698226929, "learning_rate": 6.961341862993215e-06, "loss": 0.8345, "step": 4092 }, { "epoch": 0.610485494816914, "grad_norm": 1.986403226852417, "learning_rate": 6.9567389543446665e-06, "loss": 0.7664, "step": 4093 }, { "epoch": 0.6106346483704974, "grad_norm": 1.5124565362930298, "learning_rate": 6.952136756164922e-06, "loss": 0.7866, "step": 4094 }, { "epoch": 0.6107838019240809, "grad_norm": 1.8153799772262573, "learning_rate": 6.947535269528396e-06, "loss": 0.7818, "step": 4095 }, { "epoch": 0.6109329554776642, "grad_norm": 1.1053889989852905, "learning_rate": 6.942934495509329e-06, "loss": 0.7477, "step": 4096 }, { "epoch": 0.6110821090312477, "grad_norm": 1.3469126224517822, "learning_rate": 6.938334435181812e-06, "loss": 0.7284, "step": 4097 }, { "epoch": 0.6112312625848311, "grad_norm": 1.686099648475647, "learning_rate": 6.933735089619751e-06, "loss": 0.6566, "step": 4098 }, { "epoch": 0.6113804161384145, "grad_norm": 1.6823227405548096, "learning_rate": 6.929136459896893e-06, "loss": 0.759, "step": 4099 }, { "epoch": 0.6115295696919979, "grad_norm": 3.2010021209716797, "learning_rate": 6.924538547086822e-06, "loss": 0.6783, "step": 4100 }, { "epoch": 0.6116787232455814, "grad_norm": 1.627236247062683, "learning_rate": 6.919941352262944e-06, "loss": 0.6418, "step": 4101 }, { "epoch": 0.6118278767991647, "grad_norm": 1.7597182989120483, "learning_rate": 6.915344876498509e-06, "loss": 0.6959, "step": 4102 }, { "epoch": 0.6119770303527482, "grad_norm": 2.1792237758636475, "learning_rate": 6.910749120866592e-06, "loss": 0.7898, "step": 4103 }, { "epoch": 0.6121261839063316, "grad_norm": 1.8679797649383545, "learning_rate": 6.9061540864400986e-06, "loss": 0.6356, "step": 4104 }, { "epoch": 0.612275337459915, "grad_norm": 1.6867222785949707, "learning_rate": 6.901559774291769e-06, "loss": 0.8179, "step": 4105 }, { "epoch": 0.6124244910134984, "grad_norm": 3.1172282695770264, "learning_rate": 6.8969661854941826e-06, "loss": 0.69, "step": 4106 }, { "epoch": 0.6125736445670819, "grad_norm": 2.277776002883911, "learning_rate": 6.892373321119734e-06, "loss": 0.6444, "step": 4107 }, { "epoch": 0.6127227981206652, "grad_norm": 1.2720495462417603, "learning_rate": 6.8877811822406625e-06, "loss": 0.7341, "step": 4108 }, { "epoch": 0.6128719516742487, "grad_norm": 1.6133532524108887, "learning_rate": 6.883189769929028e-06, "loss": 0.6911, "step": 4109 }, { "epoch": 0.613021105227832, "grad_norm": 1.8459198474884033, "learning_rate": 6.878599085256728e-06, "loss": 0.6472, "step": 4110 }, { "epoch": 0.6131702587814155, "grad_norm": 1.4426215887069702, "learning_rate": 6.874009129295487e-06, "loss": 0.7162, "step": 4111 }, { "epoch": 0.6133194123349989, "grad_norm": 1.7601100206375122, "learning_rate": 6.8694199031168555e-06, "loss": 0.714, "step": 4112 }, { "epoch": 0.6134685658885823, "grad_norm": 1.1940059661865234, "learning_rate": 6.864831407792218e-06, "loss": 0.6621, "step": 4113 }, { "epoch": 0.6136177194421657, "grad_norm": 1.6119135618209839, "learning_rate": 6.8602436443927975e-06, "loss": 0.6421, "step": 4114 }, { "epoch": 0.6137668729957492, "grad_norm": 1.4676165580749512, "learning_rate": 6.855656613989627e-06, "loss": 0.7351, "step": 4115 }, { "epoch": 0.6139160265493325, "grad_norm": 1.654343605041504, "learning_rate": 6.851070317653585e-06, "loss": 0.7583, "step": 4116 }, { "epoch": 0.614065180102916, "grad_norm": 1.603097677230835, "learning_rate": 6.846484756455368e-06, "loss": 0.6878, "step": 4117 }, { "epoch": 0.6142143336564994, "grad_norm": 2.7016677856445312, "learning_rate": 6.841899931465503e-06, "loss": 0.7888, "step": 4118 }, { "epoch": 0.6143634872100828, "grad_norm": 3.1135501861572266, "learning_rate": 6.837315843754351e-06, "loss": 0.6985, "step": 4119 }, { "epoch": 0.6145126407636662, "grad_norm": 0.5303614735603333, "learning_rate": 6.832732494392092e-06, "loss": 0.2352, "step": 4120 }, { "epoch": 0.6146617943172497, "grad_norm": 1.673911690711975, "learning_rate": 6.828149884448743e-06, "loss": 0.729, "step": 4121 }, { "epoch": 0.614810947870833, "grad_norm": 1.176283359527588, "learning_rate": 6.823568014994138e-06, "loss": 0.7167, "step": 4122 }, { "epoch": 0.6149601014244165, "grad_norm": 1.4266936779022217, "learning_rate": 6.818986887097949e-06, "loss": 0.7467, "step": 4123 }, { "epoch": 0.6151092549779998, "grad_norm": 2.1434988975524902, "learning_rate": 6.814406501829668e-06, "loss": 0.7098, "step": 4124 }, { "epoch": 0.6152584085315833, "grad_norm": 1.4566802978515625, "learning_rate": 6.809826860258617e-06, "loss": 0.7265, "step": 4125 }, { "epoch": 0.6154075620851667, "grad_norm": 1.5443789958953857, "learning_rate": 6.8052479634539395e-06, "loss": 0.6518, "step": 4126 }, { "epoch": 0.6155567156387501, "grad_norm": 0.5141080617904663, "learning_rate": 6.8006698124846106e-06, "loss": 0.2583, "step": 4127 }, { "epoch": 0.6157058691923335, "grad_norm": 1.3666936159133911, "learning_rate": 6.796092408419429e-06, "loss": 0.7607, "step": 4128 }, { "epoch": 0.615855022745917, "grad_norm": 2.352780342102051, "learning_rate": 6.791515752327016e-06, "loss": 0.7359, "step": 4129 }, { "epoch": 0.6160041762995003, "grad_norm": 1.6706708669662476, "learning_rate": 6.786939845275826e-06, "loss": 0.7139, "step": 4130 }, { "epoch": 0.6161533298530838, "grad_norm": 1.3947056531906128, "learning_rate": 6.782364688334127e-06, "loss": 0.7056, "step": 4131 }, { "epoch": 0.6163024834066672, "grad_norm": 2.976505756378174, "learning_rate": 6.777790282570025e-06, "loss": 0.7328, "step": 4132 }, { "epoch": 0.6164516369602506, "grad_norm": 1.7498209476470947, "learning_rate": 6.773216629051444e-06, "loss": 0.7406, "step": 4133 }, { "epoch": 0.616600790513834, "grad_norm": 1.3208063840866089, "learning_rate": 6.768643728846132e-06, "loss": 0.6802, "step": 4134 }, { "epoch": 0.6167499440674175, "grad_norm": 1.906700611114502, "learning_rate": 6.764071583021659e-06, "loss": 0.659, "step": 4135 }, { "epoch": 0.6168990976210008, "grad_norm": 2.34427809715271, "learning_rate": 6.759500192645425e-06, "loss": 0.7148, "step": 4136 }, { "epoch": 0.6170482511745843, "grad_norm": 1.7877485752105713, "learning_rate": 6.754929558784648e-06, "loss": 0.6856, "step": 4137 }, { "epoch": 0.6171974047281676, "grad_norm": 1.455389380455017, "learning_rate": 6.750359682506376e-06, "loss": 0.7406, "step": 4138 }, { "epoch": 0.6173465582817511, "grad_norm": 1.8641157150268555, "learning_rate": 6.745790564877471e-06, "loss": 0.6505, "step": 4139 }, { "epoch": 0.6174957118353345, "grad_norm": 1.8092398643493652, "learning_rate": 6.741222206964622e-06, "loss": 0.6693, "step": 4140 }, { "epoch": 0.6176448653889179, "grad_norm": 1.671208381652832, "learning_rate": 6.7366546098343455e-06, "loss": 0.629, "step": 4141 }, { "epoch": 0.6177940189425013, "grad_norm": 2.2770802974700928, "learning_rate": 6.732087774552978e-06, "loss": 0.7479, "step": 4142 }, { "epoch": 0.6179431724960848, "grad_norm": 1.3466647863388062, "learning_rate": 6.7275217021866705e-06, "loss": 0.6622, "step": 4143 }, { "epoch": 0.6180923260496681, "grad_norm": 1.6983642578125, "learning_rate": 6.722956393801408e-06, "loss": 0.7689, "step": 4144 }, { "epoch": 0.6182414796032516, "grad_norm": 0.4905639588832855, "learning_rate": 6.718391850462986e-06, "loss": 0.2554, "step": 4145 }, { "epoch": 0.618390633156835, "grad_norm": 1.2014120817184448, "learning_rate": 6.7138280732370274e-06, "loss": 0.7161, "step": 4146 }, { "epoch": 0.6185397867104184, "grad_norm": 1.6246998310089111, "learning_rate": 6.709265063188978e-06, "loss": 0.7202, "step": 4147 }, { "epoch": 0.6186889402640018, "grad_norm": 1.866754174232483, "learning_rate": 6.704702821384096e-06, "loss": 0.6727, "step": 4148 }, { "epoch": 0.6188380938175853, "grad_norm": 1.943662166595459, "learning_rate": 6.700141348887472e-06, "loss": 0.6942, "step": 4149 }, { "epoch": 0.6189872473711686, "grad_norm": 2.2672715187072754, "learning_rate": 6.69558064676401e-06, "loss": 0.6236, "step": 4150 }, { "epoch": 0.6191364009247521, "grad_norm": 1.8329319953918457, "learning_rate": 6.691020716078434e-06, "loss": 0.7578, "step": 4151 }, { "epoch": 0.6192855544783354, "grad_norm": 1.4190112352371216, "learning_rate": 6.68646155789529e-06, "loss": 0.7055, "step": 4152 }, { "epoch": 0.6194347080319189, "grad_norm": 1.2315040826797485, "learning_rate": 6.6819031732789405e-06, "loss": 0.7883, "step": 4153 }, { "epoch": 0.6195838615855023, "grad_norm": 2.364542245864868, "learning_rate": 6.677345563293571e-06, "loss": 0.7561, "step": 4154 }, { "epoch": 0.6197330151390857, "grad_norm": 1.7971916198730469, "learning_rate": 6.6727887290031865e-06, "loss": 0.7138, "step": 4155 }, { "epoch": 0.6198821686926691, "grad_norm": 1.4521169662475586, "learning_rate": 6.668232671471605e-06, "loss": 0.6102, "step": 4156 }, { "epoch": 0.6200313222462526, "grad_norm": 2.1520872116088867, "learning_rate": 6.663677391762468e-06, "loss": 0.6929, "step": 4157 }, { "epoch": 0.6201804757998359, "grad_norm": 1.7144724130630493, "learning_rate": 6.65912289093924e-06, "loss": 0.6475, "step": 4158 }, { "epoch": 0.6203296293534194, "grad_norm": 2.2046384811401367, "learning_rate": 6.654569170065195e-06, "loss": 0.6201, "step": 4159 }, { "epoch": 0.6204787829070028, "grad_norm": 1.2096822261810303, "learning_rate": 6.6500162302034265e-06, "loss": 0.6945, "step": 4160 }, { "epoch": 0.6206279364605862, "grad_norm": 1.2983509302139282, "learning_rate": 6.6454640724168514e-06, "loss": 0.7659, "step": 4161 }, { "epoch": 0.6207770900141696, "grad_norm": 3.1565816402435303, "learning_rate": 6.640912697768196e-06, "loss": 0.7574, "step": 4162 }, { "epoch": 0.6209262435677531, "grad_norm": 1.510305643081665, "learning_rate": 6.636362107320011e-06, "loss": 0.7865, "step": 4163 }, { "epoch": 0.6210753971213364, "grad_norm": 4.772711277008057, "learning_rate": 6.631812302134662e-06, "loss": 0.6975, "step": 4164 }, { "epoch": 0.6212245506749199, "grad_norm": 1.6188076734542847, "learning_rate": 6.6272632832743234e-06, "loss": 0.761, "step": 4165 }, { "epoch": 0.6213737042285032, "grad_norm": 1.6626391410827637, "learning_rate": 6.6227150518009965e-06, "loss": 0.6978, "step": 4166 }, { "epoch": 0.6215228577820867, "grad_norm": 1.815168023109436, "learning_rate": 6.6181676087765e-06, "loss": 0.6181, "step": 4167 }, { "epoch": 0.6216720113356701, "grad_norm": 2.499821662902832, "learning_rate": 6.613620955262459e-06, "loss": 0.6311, "step": 4168 }, { "epoch": 0.6218211648892535, "grad_norm": 3.0541067123413086, "learning_rate": 6.60907509232032e-06, "loss": 0.6613, "step": 4169 }, { "epoch": 0.6219703184428369, "grad_norm": 1.7416387796401978, "learning_rate": 6.604530021011344e-06, "loss": 0.6787, "step": 4170 }, { "epoch": 0.6221194719964204, "grad_norm": 1.6571048498153687, "learning_rate": 6.599985742396604e-06, "loss": 0.6164, "step": 4171 }, { "epoch": 0.6222686255500037, "grad_norm": 1.7597018480300903, "learning_rate": 6.595442257536995e-06, "loss": 0.7291, "step": 4172 }, { "epoch": 0.6224177791035871, "grad_norm": 1.6560146808624268, "learning_rate": 6.590899567493221e-06, "loss": 0.7153, "step": 4173 }, { "epoch": 0.6225669326571706, "grad_norm": 1.9885329008102417, "learning_rate": 6.586357673325798e-06, "loss": 0.7728, "step": 4174 }, { "epoch": 0.6227160862107539, "grad_norm": 2.0963521003723145, "learning_rate": 6.58181657609507e-06, "loss": 0.6692, "step": 4175 }, { "epoch": 0.6228652397643374, "grad_norm": 2.0630199909210205, "learning_rate": 6.57727627686118e-06, "loss": 0.6619, "step": 4176 }, { "epoch": 0.6230143933179207, "grad_norm": 1.8982168436050415, "learning_rate": 6.572736776684087e-06, "loss": 0.7149, "step": 4177 }, { "epoch": 0.6231635468715042, "grad_norm": 1.6669375896453857, "learning_rate": 6.568198076623571e-06, "loss": 0.707, "step": 4178 }, { "epoch": 0.6233127004250876, "grad_norm": 1.3763481378555298, "learning_rate": 6.563660177739217e-06, "loss": 0.7041, "step": 4179 }, { "epoch": 0.623461853978671, "grad_norm": 1.8038784265518188, "learning_rate": 6.5591230810904316e-06, "loss": 0.6829, "step": 4180 }, { "epoch": 0.6236110075322544, "grad_norm": 1.8976051807403564, "learning_rate": 6.554586787736425e-06, "loss": 0.6881, "step": 4181 }, { "epoch": 0.6237601610858379, "grad_norm": 8.987482070922852, "learning_rate": 6.550051298736223e-06, "loss": 0.7305, "step": 4182 }, { "epoch": 0.6239093146394212, "grad_norm": 2.295105218887329, "learning_rate": 6.5455166151486645e-06, "loss": 0.7123, "step": 4183 }, { "epoch": 0.6240584681930047, "grad_norm": 2.6433680057525635, "learning_rate": 6.540982738032406e-06, "loss": 0.7074, "step": 4184 }, { "epoch": 0.6242076217465881, "grad_norm": 1.1517900228500366, "learning_rate": 6.536449668445905e-06, "loss": 0.7813, "step": 4185 }, { "epoch": 0.6243567753001715, "grad_norm": 1.518724799156189, "learning_rate": 6.531917407447441e-06, "loss": 0.6584, "step": 4186 }, { "epoch": 0.6245059288537549, "grad_norm": 1.6058101654052734, "learning_rate": 6.527385956095094e-06, "loss": 0.6418, "step": 4187 }, { "epoch": 0.6246550824073384, "grad_norm": 2.157137632369995, "learning_rate": 6.52285531544676e-06, "loss": 0.6616, "step": 4188 }, { "epoch": 0.6248042359609217, "grad_norm": 0.5251557230949402, "learning_rate": 6.518325486560151e-06, "loss": 0.2621, "step": 4189 }, { "epoch": 0.6249533895145052, "grad_norm": 2.3681161403656006, "learning_rate": 6.5137964704927795e-06, "loss": 0.6618, "step": 4190 }, { "epoch": 0.6251025430680885, "grad_norm": 1.3086875677108765, "learning_rate": 6.509268268301976e-06, "loss": 0.7038, "step": 4191 }, { "epoch": 0.625251696621672, "grad_norm": 2.5470619201660156, "learning_rate": 6.504740881044875e-06, "loss": 0.6876, "step": 4192 }, { "epoch": 0.6254008501752554, "grad_norm": 2.641162157058716, "learning_rate": 6.500214309778432e-06, "loss": 0.7478, "step": 4193 }, { "epoch": 0.6255500037288388, "grad_norm": 1.5060312747955322, "learning_rate": 6.495688555559396e-06, "loss": 0.7013, "step": 4194 }, { "epoch": 0.6256991572824222, "grad_norm": 1.6886192560195923, "learning_rate": 6.491163619444341e-06, "loss": 0.7624, "step": 4195 }, { "epoch": 0.6258483108360057, "grad_norm": 3.0904040336608887, "learning_rate": 6.4866395024896335e-06, "loss": 0.6206, "step": 4196 }, { "epoch": 0.625997464389589, "grad_norm": 2.2801060676574707, "learning_rate": 6.4821162057514635e-06, "loss": 0.6983, "step": 4197 }, { "epoch": 0.6261466179431725, "grad_norm": 1.7159005403518677, "learning_rate": 6.477593730285821e-06, "loss": 0.744, "step": 4198 }, { "epoch": 0.6262957714967559, "grad_norm": 1.812872290611267, "learning_rate": 6.4730720771485104e-06, "loss": 0.7024, "step": 4199 }, { "epoch": 0.6264449250503393, "grad_norm": 1.5252591371536255, "learning_rate": 6.468551247395136e-06, "loss": 0.6507, "step": 4200 }, { "epoch": 0.6265940786039227, "grad_norm": 2.1406259536743164, "learning_rate": 6.464031242081114e-06, "loss": 0.6616, "step": 4201 }, { "epoch": 0.6267432321575062, "grad_norm": 2.0472278594970703, "learning_rate": 6.459512062261674e-06, "loss": 0.7082, "step": 4202 }, { "epoch": 0.6268923857110895, "grad_norm": 1.8938335180282593, "learning_rate": 6.4549937089918464e-06, "loss": 0.749, "step": 4203 }, { "epoch": 0.627041539264673, "grad_norm": 2.091913938522339, "learning_rate": 6.450476183326466e-06, "loss": 0.7003, "step": 4204 }, { "epoch": 0.6271906928182563, "grad_norm": 1.9300293922424316, "learning_rate": 6.445959486320184e-06, "loss": 0.6875, "step": 4205 }, { "epoch": 0.6273398463718398, "grad_norm": 2.95285701751709, "learning_rate": 6.441443619027445e-06, "loss": 0.6773, "step": 4206 }, { "epoch": 0.6274889999254232, "grad_norm": 1.9782322645187378, "learning_rate": 6.4369285825025115e-06, "loss": 0.587, "step": 4207 }, { "epoch": 0.6276381534790066, "grad_norm": 1.636971354484558, "learning_rate": 6.432414377799449e-06, "loss": 0.761, "step": 4208 }, { "epoch": 0.62778730703259, "grad_norm": 3.0430407524108887, "learning_rate": 6.4279010059721194e-06, "loss": 0.6354, "step": 4209 }, { "epoch": 0.6279364605861735, "grad_norm": 1.2264729738235474, "learning_rate": 6.423388468074207e-06, "loss": 0.7118, "step": 4210 }, { "epoch": 0.6280856141397568, "grad_norm": 1.6233872175216675, "learning_rate": 6.418876765159195e-06, "loss": 0.6537, "step": 4211 }, { "epoch": 0.6282347676933403, "grad_norm": 1.8954890966415405, "learning_rate": 6.414365898280362e-06, "loss": 0.7842, "step": 4212 }, { "epoch": 0.6283839212469237, "grad_norm": 2.2501943111419678, "learning_rate": 6.409855868490799e-06, "loss": 0.7085, "step": 4213 }, { "epoch": 0.6285330748005071, "grad_norm": 2.346285581588745, "learning_rate": 6.405346676843406e-06, "loss": 0.7464, "step": 4214 }, { "epoch": 0.6286822283540905, "grad_norm": 1.5228614807128906, "learning_rate": 6.400838324390878e-06, "loss": 0.6736, "step": 4215 }, { "epoch": 0.628831381907674, "grad_norm": 1.4624254703521729, "learning_rate": 6.3963308121857234e-06, "loss": 0.6678, "step": 4216 }, { "epoch": 0.6289805354612573, "grad_norm": 2.6819489002227783, "learning_rate": 6.391824141280247e-06, "loss": 0.6504, "step": 4217 }, { "epoch": 0.6291296890148408, "grad_norm": 1.6049575805664062, "learning_rate": 6.387318312726558e-06, "loss": 0.7401, "step": 4218 }, { "epoch": 0.6292788425684241, "grad_norm": 1.4335554838180542, "learning_rate": 6.382813327576574e-06, "loss": 0.6493, "step": 4219 }, { "epoch": 0.6294279961220076, "grad_norm": 1.3205571174621582, "learning_rate": 6.378309186882016e-06, "loss": 0.6819, "step": 4220 }, { "epoch": 0.629577149675591, "grad_norm": 1.5087835788726807, "learning_rate": 6.373805891694398e-06, "loss": 0.6879, "step": 4221 }, { "epoch": 0.6297263032291744, "grad_norm": 1.6704767942428589, "learning_rate": 6.369303443065047e-06, "loss": 0.7345, "step": 4222 }, { "epoch": 0.6298754567827578, "grad_norm": 1.3554821014404297, "learning_rate": 6.364801842045088e-06, "loss": 0.6262, "step": 4223 }, { "epoch": 0.6300246103363413, "grad_norm": 1.6386882066726685, "learning_rate": 6.360301089685445e-06, "loss": 0.653, "step": 4224 }, { "epoch": 0.6301737638899246, "grad_norm": 1.9462547302246094, "learning_rate": 6.355801187036854e-06, "loss": 0.732, "step": 4225 }, { "epoch": 0.6303229174435081, "grad_norm": 2.1085288524627686, "learning_rate": 6.3513021351498404e-06, "loss": 0.6755, "step": 4226 }, { "epoch": 0.6304720709970915, "grad_norm": 1.5857281684875488, "learning_rate": 6.346803935074737e-06, "loss": 0.6108, "step": 4227 }, { "epoch": 0.6306212245506749, "grad_norm": 1.5459142923355103, "learning_rate": 6.342306587861683e-06, "loss": 0.6423, "step": 4228 }, { "epoch": 0.6307703781042583, "grad_norm": 4.363780975341797, "learning_rate": 6.337810094560609e-06, "loss": 0.6644, "step": 4229 }, { "epoch": 0.6309195316578418, "grad_norm": 1.3243211507797241, "learning_rate": 6.333314456221249e-06, "loss": 0.763, "step": 4230 }, { "epoch": 0.6310686852114251, "grad_norm": 1.745556116104126, "learning_rate": 6.328819673893143e-06, "loss": 0.6896, "step": 4231 }, { "epoch": 0.6312178387650086, "grad_norm": 1.4658805131912231, "learning_rate": 6.324325748625619e-06, "loss": 0.7693, "step": 4232 }, { "epoch": 0.631366992318592, "grad_norm": 2.5462191104888916, "learning_rate": 6.3198326814678225e-06, "loss": 0.6809, "step": 4233 }, { "epoch": 0.6315161458721754, "grad_norm": 1.5253355503082275, "learning_rate": 6.31534047346868e-06, "loss": 0.7556, "step": 4234 }, { "epoch": 0.6316652994257588, "grad_norm": 1.5879379510879517, "learning_rate": 6.310849125676934e-06, "loss": 0.7494, "step": 4235 }, { "epoch": 0.6318144529793422, "grad_norm": 2.6166160106658936, "learning_rate": 6.306358639141109e-06, "loss": 0.7144, "step": 4236 }, { "epoch": 0.6319636065329256, "grad_norm": 1.5289918184280396, "learning_rate": 6.301869014909548e-06, "loss": 0.768, "step": 4237 }, { "epoch": 0.6321127600865091, "grad_norm": 4.532857894897461, "learning_rate": 6.297380254030376e-06, "loss": 0.5993, "step": 4238 }, { "epoch": 0.6322619136400924, "grad_norm": 1.919459342956543, "learning_rate": 6.292892357551527e-06, "loss": 0.6598, "step": 4239 }, { "epoch": 0.6324110671936759, "grad_norm": 1.2460975646972656, "learning_rate": 6.288405326520726e-06, "loss": 0.6913, "step": 4240 }, { "epoch": 0.6325602207472593, "grad_norm": 2.0021581649780273, "learning_rate": 6.283919161985501e-06, "loss": 0.6105, "step": 4241 }, { "epoch": 0.6327093743008427, "grad_norm": 2.3681044578552246, "learning_rate": 6.279433864993176e-06, "loss": 0.6714, "step": 4242 }, { "epoch": 0.6328585278544261, "grad_norm": 1.5565961599349976, "learning_rate": 6.274949436590869e-06, "loss": 0.7471, "step": 4243 }, { "epoch": 0.6330076814080096, "grad_norm": 2.531525135040283, "learning_rate": 6.2704658778255e-06, "loss": 0.7262, "step": 4244 }, { "epoch": 0.6331568349615929, "grad_norm": 1.5310461521148682, "learning_rate": 6.2659831897437895e-06, "loss": 0.7219, "step": 4245 }, { "epoch": 0.6333059885151764, "grad_norm": 1.8918356895446777, "learning_rate": 6.261501373392245e-06, "loss": 0.6386, "step": 4246 }, { "epoch": 0.6334551420687597, "grad_norm": 2.4693963527679443, "learning_rate": 6.257020429817177e-06, "loss": 0.6762, "step": 4247 }, { "epoch": 0.6336042956223432, "grad_norm": 1.3142776489257812, "learning_rate": 6.252540360064689e-06, "loss": 0.7229, "step": 4248 }, { "epoch": 0.6337534491759266, "grad_norm": 1.5629382133483887, "learning_rate": 6.248061165180682e-06, "loss": 0.6951, "step": 4249 }, { "epoch": 0.63390260272951, "grad_norm": 1.8097251653671265, "learning_rate": 6.243582846210856e-06, "loss": 0.7605, "step": 4250 }, { "epoch": 0.6340517562830934, "grad_norm": 1.4359604120254517, "learning_rate": 6.239105404200698e-06, "loss": 0.721, "step": 4251 }, { "epoch": 0.6342009098366769, "grad_norm": 1.3283536434173584, "learning_rate": 6.2346288401955e-06, "loss": 0.751, "step": 4252 }, { "epoch": 0.6343500633902602, "grad_norm": 2.0604841709136963, "learning_rate": 6.230153155240339e-06, "loss": 0.6464, "step": 4253 }, { "epoch": 0.6344992169438437, "grad_norm": 2.451479434967041, "learning_rate": 6.225678350380102e-06, "loss": 0.7448, "step": 4254 }, { "epoch": 0.6346483704974271, "grad_norm": 1.3146766424179077, "learning_rate": 6.221204426659452e-06, "loss": 0.684, "step": 4255 }, { "epoch": 0.6347975240510105, "grad_norm": 1.4732213020324707, "learning_rate": 6.21673138512286e-06, "loss": 0.6642, "step": 4256 }, { "epoch": 0.6349466776045939, "grad_norm": 1.434781789779663, "learning_rate": 6.212259226814583e-06, "loss": 0.6484, "step": 4257 }, { "epoch": 0.6350958311581774, "grad_norm": 1.376650333404541, "learning_rate": 6.207787952778679e-06, "loss": 0.7738, "step": 4258 }, { "epoch": 0.6352449847117607, "grad_norm": 2.1216845512390137, "learning_rate": 6.203317564058993e-06, "loss": 0.7467, "step": 4259 }, { "epoch": 0.6353941382653442, "grad_norm": 2.103663682937622, "learning_rate": 6.1988480616991635e-06, "loss": 0.657, "step": 4260 }, { "epoch": 0.6355432918189275, "grad_norm": 1.8096301555633545, "learning_rate": 6.19437944674263e-06, "loss": 0.6382, "step": 4261 }, { "epoch": 0.635692445372511, "grad_norm": 1.2352467775344849, "learning_rate": 6.189911720232612e-06, "loss": 0.6589, "step": 4262 }, { "epoch": 0.6358415989260944, "grad_norm": 1.8598921298980713, "learning_rate": 6.185444883212135e-06, "loss": 0.739, "step": 4263 }, { "epoch": 0.6359907524796778, "grad_norm": 1.7905588150024414, "learning_rate": 6.180978936724011e-06, "loss": 0.6819, "step": 4264 }, { "epoch": 0.6361399060332612, "grad_norm": 1.7892320156097412, "learning_rate": 6.176513881810844e-06, "loss": 0.6205, "step": 4265 }, { "epoch": 0.6362890595868447, "grad_norm": 1.4306457042694092, "learning_rate": 6.172049719515023e-06, "loss": 0.6707, "step": 4266 }, { "epoch": 0.636438213140428, "grad_norm": 1.6417509317398071, "learning_rate": 6.167586450878743e-06, "loss": 0.5965, "step": 4267 }, { "epoch": 0.6365873666940115, "grad_norm": 1.9593991041183472, "learning_rate": 6.163124076943978e-06, "loss": 0.7641, "step": 4268 }, { "epoch": 0.6367365202475949, "grad_norm": 1.6888298988342285, "learning_rate": 6.158662598752501e-06, "loss": 0.6641, "step": 4269 }, { "epoch": 0.6368856738011783, "grad_norm": 1.5274288654327393, "learning_rate": 6.154202017345872e-06, "loss": 0.6466, "step": 4270 }, { "epoch": 0.6370348273547617, "grad_norm": 1.7501877546310425, "learning_rate": 6.1497423337654365e-06, "loss": 0.6686, "step": 4271 }, { "epoch": 0.6371839809083452, "grad_norm": 2.055830240249634, "learning_rate": 6.145283549052342e-06, "loss": 0.6622, "step": 4272 }, { "epoch": 0.6373331344619285, "grad_norm": 1.6631487607955933, "learning_rate": 6.140825664247523e-06, "loss": 0.766, "step": 4273 }, { "epoch": 0.637482288015512, "grad_norm": 1.4579617977142334, "learning_rate": 6.136368680391695e-06, "loss": 0.7752, "step": 4274 }, { "epoch": 0.6376314415690953, "grad_norm": 2.194614887237549, "learning_rate": 6.1319125985253754e-06, "loss": 0.6825, "step": 4275 }, { "epoch": 0.6377805951226788, "grad_norm": 3.1887459754943848, "learning_rate": 6.1274574196888606e-06, "loss": 0.7397, "step": 4276 }, { "epoch": 0.6379297486762622, "grad_norm": 1.5666182041168213, "learning_rate": 6.123003144922242e-06, "loss": 0.6612, "step": 4277 }, { "epoch": 0.6380789022298456, "grad_norm": 2.104363203048706, "learning_rate": 6.1185497752654e-06, "loss": 0.7255, "step": 4278 }, { "epoch": 0.638228055783429, "grad_norm": 1.0319231748580933, "learning_rate": 6.114097311757996e-06, "loss": 0.7317, "step": 4279 }, { "epoch": 0.6383772093370125, "grad_norm": 0.5360146760940552, "learning_rate": 6.109645755439495e-06, "loss": 0.2654, "step": 4280 }, { "epoch": 0.6385263628905958, "grad_norm": 1.6783788204193115, "learning_rate": 6.105195107349137e-06, "loss": 0.7167, "step": 4281 }, { "epoch": 0.6386755164441793, "grad_norm": 1.6648074388504028, "learning_rate": 6.100745368525955e-06, "loss": 0.593, "step": 4282 }, { "epoch": 0.6388246699977627, "grad_norm": 1.4632936716079712, "learning_rate": 6.09629654000877e-06, "loss": 0.7352, "step": 4283 }, { "epoch": 0.6389738235513461, "grad_norm": 1.31744384765625, "learning_rate": 6.091848622836187e-06, "loss": 0.6449, "step": 4284 }, { "epoch": 0.6391229771049295, "grad_norm": 2.5209293365478516, "learning_rate": 6.087401618046602e-06, "loss": 0.7527, "step": 4285 }, { "epoch": 0.639272130658513, "grad_norm": 1.9284700155258179, "learning_rate": 6.082955526678199e-06, "loss": 0.595, "step": 4286 }, { "epoch": 0.6394212842120963, "grad_norm": 1.6877341270446777, "learning_rate": 6.078510349768942e-06, "loss": 0.6697, "step": 4287 }, { "epoch": 0.6395704377656798, "grad_norm": 1.8198860883712769, "learning_rate": 6.074066088356587e-06, "loss": 0.7078, "step": 4288 }, { "epoch": 0.6397195913192631, "grad_norm": 1.5439482927322388, "learning_rate": 6.069622743478681e-06, "loss": 0.602, "step": 4289 }, { "epoch": 0.6398687448728466, "grad_norm": 1.5218864679336548, "learning_rate": 6.065180316172547e-06, "loss": 0.6071, "step": 4290 }, { "epoch": 0.64001789842643, "grad_norm": 1.5184732675552368, "learning_rate": 6.060738807475295e-06, "loss": 0.6519, "step": 4291 }, { "epoch": 0.6401670519800134, "grad_norm": 2.1369028091430664, "learning_rate": 6.056298218423831e-06, "loss": 0.7567, "step": 4292 }, { "epoch": 0.6403162055335968, "grad_norm": 0.5536779761314392, "learning_rate": 6.051858550054832e-06, "loss": 0.2399, "step": 4293 }, { "epoch": 0.6404653590871803, "grad_norm": 1.8487671613693237, "learning_rate": 6.047419803404772e-06, "loss": 0.6672, "step": 4294 }, { "epoch": 0.6406145126407636, "grad_norm": 1.6636244058609009, "learning_rate": 6.042981979509904e-06, "loss": 0.6498, "step": 4295 }, { "epoch": 0.6407636661943471, "grad_norm": 0.5224639177322388, "learning_rate": 6.038545079406264e-06, "loss": 0.2483, "step": 4296 }, { "epoch": 0.6409128197479305, "grad_norm": 2.137129545211792, "learning_rate": 6.034109104129673e-06, "loss": 0.5795, "step": 4297 }, { "epoch": 0.6410619733015139, "grad_norm": 1.9409979581832886, "learning_rate": 6.029674054715744e-06, "loss": 0.7647, "step": 4298 }, { "epoch": 0.6412111268550973, "grad_norm": 2.21157169342041, "learning_rate": 6.025239932199864e-06, "loss": 0.76, "step": 4299 }, { "epoch": 0.6413602804086808, "grad_norm": 1.3808739185333252, "learning_rate": 6.020806737617211e-06, "loss": 0.6499, "step": 4300 }, { "epoch": 0.6415094339622641, "grad_norm": 1.696022391319275, "learning_rate": 6.016374472002739e-06, "loss": 0.7383, "step": 4301 }, { "epoch": 0.6416585875158476, "grad_norm": 1.594355821609497, "learning_rate": 6.0119431363911875e-06, "loss": 0.6849, "step": 4302 }, { "epoch": 0.641807741069431, "grad_norm": 1.7869912385940552, "learning_rate": 6.007512731817085e-06, "loss": 0.7262, "step": 4303 }, { "epoch": 0.6419568946230144, "grad_norm": 3.974478006362915, "learning_rate": 6.0030832593147326e-06, "loss": 0.6247, "step": 4304 }, { "epoch": 0.6421060481765978, "grad_norm": 1.6670024394989014, "learning_rate": 5.998654719918223e-06, "loss": 0.7388, "step": 4305 }, { "epoch": 0.6422552017301812, "grad_norm": 2.0094988346099854, "learning_rate": 5.994227114661423e-06, "loss": 0.6907, "step": 4306 }, { "epoch": 0.6424043552837646, "grad_norm": 1.8419015407562256, "learning_rate": 5.989800444577991e-06, "loss": 0.7037, "step": 4307 }, { "epoch": 0.6425535088373481, "grad_norm": 1.6608576774597168, "learning_rate": 5.985374710701358e-06, "loss": 0.7093, "step": 4308 }, { "epoch": 0.6427026623909314, "grad_norm": 1.7019952535629272, "learning_rate": 5.980949914064742e-06, "loss": 0.7524, "step": 4309 }, { "epoch": 0.6428518159445149, "grad_norm": 1.49445378780365, "learning_rate": 5.976526055701137e-06, "loss": 0.6902, "step": 4310 }, { "epoch": 0.6430009694980983, "grad_norm": 2.013037919998169, "learning_rate": 5.972103136643326e-06, "loss": 0.7129, "step": 4311 }, { "epoch": 0.6431501230516817, "grad_norm": 1.3182475566864014, "learning_rate": 5.967681157923864e-06, "loss": 0.6543, "step": 4312 }, { "epoch": 0.6432992766052651, "grad_norm": 1.4475274085998535, "learning_rate": 5.963260120575089e-06, "loss": 0.6517, "step": 4313 }, { "epoch": 0.6434484301588486, "grad_norm": 1.861732006072998, "learning_rate": 5.9588400256291204e-06, "loss": 0.6593, "step": 4314 }, { "epoch": 0.6435975837124319, "grad_norm": 2.8832626342773438, "learning_rate": 5.954420874117864e-06, "loss": 0.7274, "step": 4315 }, { "epoch": 0.6437467372660154, "grad_norm": 2.8090217113494873, "learning_rate": 5.950002667072994e-06, "loss": 0.6797, "step": 4316 }, { "epoch": 0.6438958908195987, "grad_norm": 1.72067129611969, "learning_rate": 5.945585405525971e-06, "loss": 0.7188, "step": 4317 }, { "epoch": 0.6440450443731822, "grad_norm": 2.476433277130127, "learning_rate": 5.941169090508032e-06, "loss": 0.7692, "step": 4318 }, { "epoch": 0.6441941979267656, "grad_norm": 1.6948060989379883, "learning_rate": 5.936753723050192e-06, "loss": 0.6856, "step": 4319 }, { "epoch": 0.644343351480349, "grad_norm": 0.5921309590339661, "learning_rate": 5.932339304183251e-06, "loss": 0.2735, "step": 4320 }, { "epoch": 0.6444925050339324, "grad_norm": 2.0060911178588867, "learning_rate": 5.92792583493778e-06, "loss": 0.715, "step": 4321 }, { "epoch": 0.6446416585875159, "grad_norm": 2.3900673389434814, "learning_rate": 5.923513316344135e-06, "loss": 0.6949, "step": 4322 }, { "epoch": 0.6447908121410992, "grad_norm": 1.2868684530258179, "learning_rate": 5.919101749432441e-06, "loss": 0.6493, "step": 4323 }, { "epoch": 0.6449399656946827, "grad_norm": 1.8211215734481812, "learning_rate": 5.914691135232613e-06, "loss": 0.7589, "step": 4324 }, { "epoch": 0.6450891192482661, "grad_norm": 1.8054903745651245, "learning_rate": 5.910281474774335e-06, "loss": 0.6546, "step": 4325 }, { "epoch": 0.6452382728018495, "grad_norm": 1.8043298721313477, "learning_rate": 5.905872769087071e-06, "loss": 0.7301, "step": 4326 }, { "epoch": 0.6453874263554329, "grad_norm": 1.6385784149169922, "learning_rate": 5.901465019200059e-06, "loss": 0.7024, "step": 4327 }, { "epoch": 0.6455365799090164, "grad_norm": 1.195682406425476, "learning_rate": 5.897058226142321e-06, "loss": 0.6608, "step": 4328 }, { "epoch": 0.6456857334625997, "grad_norm": 2.75079345703125, "learning_rate": 5.892652390942645e-06, "loss": 0.7056, "step": 4329 }, { "epoch": 0.6458348870161832, "grad_norm": 1.1999355554580688, "learning_rate": 5.888247514629607e-06, "loss": 0.7231, "step": 4330 }, { "epoch": 0.6459840405697665, "grad_norm": 2.6289050579071045, "learning_rate": 5.883843598231551e-06, "loss": 0.6489, "step": 4331 }, { "epoch": 0.64613319412335, "grad_norm": 1.6355366706848145, "learning_rate": 5.879440642776597e-06, "loss": 0.6704, "step": 4332 }, { "epoch": 0.6462823476769334, "grad_norm": 2.225332498550415, "learning_rate": 5.875038649292648e-06, "loss": 0.5884, "step": 4333 }, { "epoch": 0.6464315012305168, "grad_norm": 1.47904634475708, "learning_rate": 5.8706376188073775e-06, "loss": 0.6781, "step": 4334 }, { "epoch": 0.6465806547841002, "grad_norm": 4.253615379333496, "learning_rate": 5.866237552348231e-06, "loss": 0.7087, "step": 4335 }, { "epoch": 0.6467298083376837, "grad_norm": 1.4903998374938965, "learning_rate": 5.861838450942434e-06, "loss": 0.7439, "step": 4336 }, { "epoch": 0.646878961891267, "grad_norm": 1.5703983306884766, "learning_rate": 5.857440315616987e-06, "loss": 0.6952, "step": 4337 }, { "epoch": 0.6470281154448505, "grad_norm": 1.6998728513717651, "learning_rate": 5.853043147398656e-06, "loss": 0.589, "step": 4338 }, { "epoch": 0.6471772689984339, "grad_norm": 1.4484593868255615, "learning_rate": 5.848646947313996e-06, "loss": 0.6947, "step": 4339 }, { "epoch": 0.6473264225520173, "grad_norm": 1.4568895101547241, "learning_rate": 5.844251716389324e-06, "loss": 0.6942, "step": 4340 }, { "epoch": 0.6474755761056007, "grad_norm": 0.5374563336372375, "learning_rate": 5.839857455650732e-06, "loss": 0.2617, "step": 4341 }, { "epoch": 0.6476247296591842, "grad_norm": 1.9671188592910767, "learning_rate": 5.835464166124096e-06, "loss": 0.6708, "step": 4342 }, { "epoch": 0.6477738832127675, "grad_norm": 1.9561405181884766, "learning_rate": 5.831071848835053e-06, "loss": 0.6339, "step": 4343 }, { "epoch": 0.647923036766351, "grad_norm": 1.7458710670471191, "learning_rate": 5.8266805048090216e-06, "loss": 0.8145, "step": 4344 }, { "epoch": 0.6480721903199343, "grad_norm": 0.5626246929168701, "learning_rate": 5.82229013507118e-06, "loss": 0.2354, "step": 4345 }, { "epoch": 0.6482213438735178, "grad_norm": 1.8117691278457642, "learning_rate": 5.817900740646496e-06, "loss": 0.7063, "step": 4346 }, { "epoch": 0.6483704974271012, "grad_norm": 1.3932774066925049, "learning_rate": 5.813512322559699e-06, "loss": 0.6169, "step": 4347 }, { "epoch": 0.6485196509806846, "grad_norm": 1.8160942792892456, "learning_rate": 5.809124881835299e-06, "loss": 0.6945, "step": 4348 }, { "epoch": 0.648668804534268, "grad_norm": 2.7967183589935303, "learning_rate": 5.804738419497558e-06, "loss": 0.6663, "step": 4349 }, { "epoch": 0.6488179580878515, "grad_norm": 1.339351773262024, "learning_rate": 5.800352936570543e-06, "loss": 0.684, "step": 4350 }, { "epoch": 0.6489671116414348, "grad_norm": 1.600953221321106, "learning_rate": 5.795968434078059e-06, "loss": 0.7149, "step": 4351 }, { "epoch": 0.6491162651950183, "grad_norm": 1.1067110300064087, "learning_rate": 5.791584913043699e-06, "loss": 0.7214, "step": 4352 }, { "epoch": 0.6492654187486017, "grad_norm": 1.515522837638855, "learning_rate": 5.787202374490826e-06, "loss": 0.695, "step": 4353 }, { "epoch": 0.6494145723021851, "grad_norm": 2.008253335952759, "learning_rate": 5.782820819442576e-06, "loss": 0.685, "step": 4354 }, { "epoch": 0.6495637258557685, "grad_norm": 1.4341086149215698, "learning_rate": 5.778440248921842e-06, "loss": 0.7312, "step": 4355 }, { "epoch": 0.649712879409352, "grad_norm": 1.6886094808578491, "learning_rate": 5.7740606639513e-06, "loss": 0.7272, "step": 4356 }, { "epoch": 0.6498620329629353, "grad_norm": 3.2093379497528076, "learning_rate": 5.7696820655533984e-06, "loss": 0.6993, "step": 4357 }, { "epoch": 0.6500111865165188, "grad_norm": 1.4095252752304077, "learning_rate": 5.765304454750333e-06, "loss": 0.7732, "step": 4358 }, { "epoch": 0.6501603400701021, "grad_norm": 1.3840166330337524, "learning_rate": 5.760927832564103e-06, "loss": 0.6686, "step": 4359 }, { "epoch": 0.6503094936236856, "grad_norm": 1.387215495109558, "learning_rate": 5.756552200016454e-06, "loss": 0.6557, "step": 4360 }, { "epoch": 0.650458647177269, "grad_norm": 1.3501533269882202, "learning_rate": 5.752177558128899e-06, "loss": 0.6977, "step": 4361 }, { "epoch": 0.6506078007308524, "grad_norm": 1.4571022987365723, "learning_rate": 5.74780390792273e-06, "loss": 0.6087, "step": 4362 }, { "epoch": 0.6507569542844358, "grad_norm": 1.3017090559005737, "learning_rate": 5.743431250419007e-06, "loss": 0.7292, "step": 4363 }, { "epoch": 0.6509061078380193, "grad_norm": 1.2918291091918945, "learning_rate": 5.73905958663855e-06, "loss": 0.7116, "step": 4364 }, { "epoch": 0.6510552613916026, "grad_norm": 1.408965826034546, "learning_rate": 5.734688917601952e-06, "loss": 0.7057, "step": 4365 }, { "epoch": 0.6512044149451861, "grad_norm": 1.6046184301376343, "learning_rate": 5.7303192443295805e-06, "loss": 0.6902, "step": 4366 }, { "epoch": 0.6513535684987695, "grad_norm": 2.303786277770996, "learning_rate": 5.725950567841552e-06, "loss": 0.7291, "step": 4367 }, { "epoch": 0.6515027220523529, "grad_norm": 2.242067813873291, "learning_rate": 5.7215828891577705e-06, "loss": 0.6914, "step": 4368 }, { "epoch": 0.6516518756059363, "grad_norm": 2.1367833614349365, "learning_rate": 5.717216209297902e-06, "loss": 0.7336, "step": 4369 }, { "epoch": 0.6518010291595198, "grad_norm": 0.5371888279914856, "learning_rate": 5.712850529281366e-06, "loss": 0.2521, "step": 4370 }, { "epoch": 0.6519501827131031, "grad_norm": 1.9100117683410645, "learning_rate": 5.708485850127365e-06, "loss": 0.6429, "step": 4371 }, { "epoch": 0.6520993362666866, "grad_norm": 1.8519777059555054, "learning_rate": 5.704122172854863e-06, "loss": 0.6804, "step": 4372 }, { "epoch": 0.65224848982027, "grad_norm": 1.6606851816177368, "learning_rate": 5.6997594984825795e-06, "loss": 0.657, "step": 4373 }, { "epoch": 0.6523976433738534, "grad_norm": 1.978601098060608, "learning_rate": 5.695397828029016e-06, "loss": 0.6637, "step": 4374 }, { "epoch": 0.6525467969274368, "grad_norm": 1.8835886716842651, "learning_rate": 5.69103716251243e-06, "loss": 0.661, "step": 4375 }, { "epoch": 0.6526959504810202, "grad_norm": 0.5494411587715149, "learning_rate": 5.686677502950848e-06, "loss": 0.2519, "step": 4376 }, { "epoch": 0.6528451040346036, "grad_norm": 3.0177996158599854, "learning_rate": 5.682318850362061e-06, "loss": 0.6991, "step": 4377 }, { "epoch": 0.6529942575881871, "grad_norm": 1.4293850660324097, "learning_rate": 5.677961205763626e-06, "loss": 0.756, "step": 4378 }, { "epoch": 0.6531434111417704, "grad_norm": 1.226680874824524, "learning_rate": 5.673604570172857e-06, "loss": 0.6721, "step": 4379 }, { "epoch": 0.6532925646953539, "grad_norm": 1.548054814338684, "learning_rate": 5.669248944606842e-06, "loss": 0.7521, "step": 4380 }, { "epoch": 0.6534417182489373, "grad_norm": 0.5232499837875366, "learning_rate": 5.66489433008243e-06, "loss": 0.2461, "step": 4381 }, { "epoch": 0.6535908718025207, "grad_norm": 1.2550864219665527, "learning_rate": 5.660540727616237e-06, "loss": 0.7554, "step": 4382 }, { "epoch": 0.6537400253561041, "grad_norm": 2.3822433948516846, "learning_rate": 5.656188138224633e-06, "loss": 0.6823, "step": 4383 }, { "epoch": 0.6538891789096876, "grad_norm": 2.0275983810424805, "learning_rate": 5.651836562923761e-06, "loss": 0.6851, "step": 4384 }, { "epoch": 0.6540383324632709, "grad_norm": 1.1303808689117432, "learning_rate": 5.647486002729523e-06, "loss": 0.7109, "step": 4385 }, { "epoch": 0.6541874860168544, "grad_norm": 1.4208344221115112, "learning_rate": 5.643136458657586e-06, "loss": 0.657, "step": 4386 }, { "epoch": 0.6543366395704378, "grad_norm": 2.322322368621826, "learning_rate": 5.638787931723379e-06, "loss": 0.718, "step": 4387 }, { "epoch": 0.6544857931240212, "grad_norm": 1.9910364151000977, "learning_rate": 5.634440422942098e-06, "loss": 0.5916, "step": 4388 }, { "epoch": 0.6546349466776046, "grad_norm": 1.2649263143539429, "learning_rate": 5.630093933328688e-06, "loss": 0.7098, "step": 4389 }, { "epoch": 0.654784100231188, "grad_norm": 1.6163370609283447, "learning_rate": 5.625748463897871e-06, "loss": 0.6977, "step": 4390 }, { "epoch": 0.6549332537847714, "grad_norm": 1.9872795343399048, "learning_rate": 5.621404015664125e-06, "loss": 0.756, "step": 4391 }, { "epoch": 0.6550824073383549, "grad_norm": 3.004342555999756, "learning_rate": 5.617060589641685e-06, "loss": 0.6976, "step": 4392 }, { "epoch": 0.6552315608919382, "grad_norm": 1.8155876398086548, "learning_rate": 5.612718186844548e-06, "loss": 0.7403, "step": 4393 }, { "epoch": 0.6553807144455217, "grad_norm": 2.0058257579803467, "learning_rate": 5.608376808286491e-06, "loss": 0.7269, "step": 4394 }, { "epoch": 0.6555298679991051, "grad_norm": 1.7006341218948364, "learning_rate": 5.604036454981024e-06, "loss": 0.7244, "step": 4395 }, { "epoch": 0.6556790215526885, "grad_norm": 1.8929401636123657, "learning_rate": 5.599697127941432e-06, "loss": 0.6578, "step": 4396 }, { "epoch": 0.6558281751062719, "grad_norm": 1.5723003149032593, "learning_rate": 5.5953588281807644e-06, "loss": 0.6407, "step": 4397 }, { "epoch": 0.6559773286598554, "grad_norm": 1.5597765445709229, "learning_rate": 5.591021556711818e-06, "loss": 0.7581, "step": 4398 }, { "epoch": 0.6561264822134387, "grad_norm": 2.192612886428833, "learning_rate": 5.586685314547159e-06, "loss": 0.716, "step": 4399 }, { "epoch": 0.6562756357670222, "grad_norm": 5.551145553588867, "learning_rate": 5.582350102699112e-06, "loss": 0.7387, "step": 4400 }, { "epoch": 0.6564247893206056, "grad_norm": 2.2038049697875977, "learning_rate": 5.578015922179764e-06, "loss": 0.6198, "step": 4401 }, { "epoch": 0.656573942874189, "grad_norm": 4.019809246063232, "learning_rate": 5.573682774000944e-06, "loss": 0.762, "step": 4402 }, { "epoch": 0.6567230964277724, "grad_norm": 2.802577495574951, "learning_rate": 5.5693506591742705e-06, "loss": 0.6686, "step": 4403 }, { "epoch": 0.6568722499813558, "grad_norm": 0.5388402342796326, "learning_rate": 5.5650195787110915e-06, "loss": 0.2577, "step": 4404 }, { "epoch": 0.6570214035349392, "grad_norm": 3.0188417434692383, "learning_rate": 5.560689533622529e-06, "loss": 0.6968, "step": 4405 }, { "epoch": 0.6571705570885227, "grad_norm": 2.498260259628296, "learning_rate": 5.55636052491946e-06, "loss": 0.7026, "step": 4406 }, { "epoch": 0.657319710642106, "grad_norm": 1.9908671379089355, "learning_rate": 5.552032553612523e-06, "loss": 0.6939, "step": 4407 }, { "epoch": 0.6574688641956895, "grad_norm": 1.8239784240722656, "learning_rate": 5.547705620712103e-06, "loss": 0.776, "step": 4408 }, { "epoch": 0.6576180177492729, "grad_norm": 2.454294204711914, "learning_rate": 5.543379727228354e-06, "loss": 0.6473, "step": 4409 }, { "epoch": 0.6577671713028563, "grad_norm": 1.3614141941070557, "learning_rate": 5.539054874171183e-06, "loss": 0.6808, "step": 4410 }, { "epoch": 0.6579163248564397, "grad_norm": 1.8461554050445557, "learning_rate": 5.534731062550257e-06, "loss": 0.615, "step": 4411 }, { "epoch": 0.6580654784100232, "grad_norm": 1.4894474744796753, "learning_rate": 5.530408293374995e-06, "loss": 0.6336, "step": 4412 }, { "epoch": 0.6582146319636065, "grad_norm": 2.3852522373199463, "learning_rate": 5.526086567654581e-06, "loss": 0.6716, "step": 4413 }, { "epoch": 0.65836378551719, "grad_norm": 1.8563623428344727, "learning_rate": 5.521765886397938e-06, "loss": 0.6523, "step": 4414 }, { "epoch": 0.6585129390707734, "grad_norm": 1.4187408685684204, "learning_rate": 5.517446250613766e-06, "loss": 0.7333, "step": 4415 }, { "epoch": 0.6586620926243568, "grad_norm": 1.7962132692337036, "learning_rate": 5.513127661310512e-06, "loss": 0.6783, "step": 4416 }, { "epoch": 0.6588112461779402, "grad_norm": 1.52881920337677, "learning_rate": 5.508810119496372e-06, "loss": 0.7154, "step": 4417 }, { "epoch": 0.6589603997315236, "grad_norm": 1.8403565883636475, "learning_rate": 5.504493626179307e-06, "loss": 0.6652, "step": 4418 }, { "epoch": 0.659109553285107, "grad_norm": 1.3446712493896484, "learning_rate": 5.5001781823670305e-06, "loss": 0.7205, "step": 4419 }, { "epoch": 0.6592587068386905, "grad_norm": 4.2628703117370605, "learning_rate": 5.4958637890670105e-06, "loss": 0.679, "step": 4420 }, { "epoch": 0.6594078603922738, "grad_norm": 1.3929723501205444, "learning_rate": 5.491550447286469e-06, "loss": 0.6268, "step": 4421 }, { "epoch": 0.6595570139458573, "grad_norm": 1.5407100915908813, "learning_rate": 5.487238158032388e-06, "loss": 0.6882, "step": 4422 }, { "epoch": 0.6597061674994407, "grad_norm": 1.88955819606781, "learning_rate": 5.482926922311491e-06, "loss": 0.6688, "step": 4423 }, { "epoch": 0.6598553210530241, "grad_norm": 1.5667777061462402, "learning_rate": 5.478616741130269e-06, "loss": 0.6275, "step": 4424 }, { "epoch": 0.6600044746066075, "grad_norm": 3.023026704788208, "learning_rate": 5.474307615494958e-06, "loss": 0.7046, "step": 4425 }, { "epoch": 0.660153628160191, "grad_norm": 1.275750994682312, "learning_rate": 5.469999546411557e-06, "loss": 0.6391, "step": 4426 }, { "epoch": 0.6603027817137743, "grad_norm": 2.451975107192993, "learning_rate": 5.465692534885807e-06, "loss": 0.7061, "step": 4427 }, { "epoch": 0.6604519352673578, "grad_norm": 2.8647964000701904, "learning_rate": 5.461386581923207e-06, "loss": 0.6161, "step": 4428 }, { "epoch": 0.6606010888209412, "grad_norm": 1.899825096130371, "learning_rate": 5.457081688529011e-06, "loss": 0.6067, "step": 4429 }, { "epoch": 0.6607502423745246, "grad_norm": 1.5848779678344727, "learning_rate": 5.452777855708224e-06, "loss": 0.7034, "step": 4430 }, { "epoch": 0.660899395928108, "grad_norm": 1.3122140169143677, "learning_rate": 5.448475084465605e-06, "loss": 0.7381, "step": 4431 }, { "epoch": 0.6610485494816914, "grad_norm": 3.0870187282562256, "learning_rate": 5.4441733758056655e-06, "loss": 0.7093, "step": 4432 }, { "epoch": 0.6611977030352748, "grad_norm": 1.6702085733413696, "learning_rate": 5.439872730732659e-06, "loss": 0.6674, "step": 4433 }, { "epoch": 0.6613468565888583, "grad_norm": 1.2916197776794434, "learning_rate": 5.4355731502506035e-06, "loss": 0.7212, "step": 4434 }, { "epoch": 0.6614960101424416, "grad_norm": 1.277906060218811, "learning_rate": 5.431274635363268e-06, "loss": 0.6928, "step": 4435 }, { "epoch": 0.6616451636960251, "grad_norm": 1.8570852279663086, "learning_rate": 5.426977187074158e-06, "loss": 0.6794, "step": 4436 }, { "epoch": 0.6617943172496085, "grad_norm": 1.7203264236450195, "learning_rate": 5.422680806386544e-06, "loss": 0.7027, "step": 4437 }, { "epoch": 0.6619434708031919, "grad_norm": 3.706315517425537, "learning_rate": 5.418385494303453e-06, "loss": 0.6942, "step": 4438 }, { "epoch": 0.6620926243567753, "grad_norm": 1.7634986639022827, "learning_rate": 5.414091251827642e-06, "loss": 0.6583, "step": 4439 }, { "epoch": 0.6622417779103588, "grad_norm": 2.0648608207702637, "learning_rate": 5.409798079961632e-06, "loss": 0.6209, "step": 4440 }, { "epoch": 0.6623909314639421, "grad_norm": 2.0157723426818848, "learning_rate": 5.405505979707698e-06, "loss": 0.7119, "step": 4441 }, { "epoch": 0.6625400850175256, "grad_norm": 1.5698504447937012, "learning_rate": 5.401214952067849e-06, "loss": 0.757, "step": 4442 }, { "epoch": 0.662689238571109, "grad_norm": 2.9247608184814453, "learning_rate": 5.396924998043858e-06, "loss": 0.6554, "step": 4443 }, { "epoch": 0.6628383921246924, "grad_norm": 1.229015827178955, "learning_rate": 5.392636118637242e-06, "loss": 0.7319, "step": 4444 }, { "epoch": 0.6629875456782758, "grad_norm": 1.3179643154144287, "learning_rate": 5.388348314849261e-06, "loss": 0.7101, "step": 4445 }, { "epoch": 0.6631366992318593, "grad_norm": 1.7080414295196533, "learning_rate": 5.38406158768094e-06, "loss": 0.6954, "step": 4446 }, { "epoch": 0.6632858527854426, "grad_norm": 1.6592848300933838, "learning_rate": 5.379775938133043e-06, "loss": 0.7554, "step": 4447 }, { "epoch": 0.6634350063390261, "grad_norm": 1.5362216234207153, "learning_rate": 5.375491367206074e-06, "loss": 0.6796, "step": 4448 }, { "epoch": 0.6635841598926094, "grad_norm": 1.7156093120574951, "learning_rate": 5.371207875900298e-06, "loss": 0.6054, "step": 4449 }, { "epoch": 0.6637333134461929, "grad_norm": 1.103496789932251, "learning_rate": 5.366925465215728e-06, "loss": 0.7394, "step": 4450 }, { "epoch": 0.6638824669997763, "grad_norm": 1.2533541917800903, "learning_rate": 5.362644136152111e-06, "loss": 0.7156, "step": 4451 }, { "epoch": 0.6640316205533597, "grad_norm": 1.2523480653762817, "learning_rate": 5.358363889708954e-06, "loss": 0.7032, "step": 4452 }, { "epoch": 0.6641807741069431, "grad_norm": 2.0762224197387695, "learning_rate": 5.354084726885511e-06, "loss": 0.75, "step": 4453 }, { "epoch": 0.6643299276605266, "grad_norm": 1.6956455707550049, "learning_rate": 5.349806648680778e-06, "loss": 0.6737, "step": 4454 }, { "epoch": 0.6644790812141099, "grad_norm": 1.7725576162338257, "learning_rate": 5.3455296560935e-06, "loss": 0.7099, "step": 4455 }, { "epoch": 0.6646282347676934, "grad_norm": 1.797861099243164, "learning_rate": 5.34125375012217e-06, "loss": 0.7229, "step": 4456 }, { "epoch": 0.6647773883212768, "grad_norm": 1.4869180917739868, "learning_rate": 5.336978931765023e-06, "loss": 0.6449, "step": 4457 }, { "epoch": 0.6649265418748602, "grad_norm": 1.5367627143859863, "learning_rate": 5.332705202020043e-06, "loss": 0.6222, "step": 4458 }, { "epoch": 0.6650756954284436, "grad_norm": 1.8204538822174072, "learning_rate": 5.328432561884962e-06, "loss": 0.6566, "step": 4459 }, { "epoch": 0.665224848982027, "grad_norm": 3.4206371307373047, "learning_rate": 5.324161012357256e-06, "loss": 0.7236, "step": 4460 }, { "epoch": 0.6653740025356104, "grad_norm": 1.86432683467865, "learning_rate": 5.31989055443414e-06, "loss": 0.6812, "step": 4461 }, { "epoch": 0.6655231560891939, "grad_norm": 3.143601894378662, "learning_rate": 5.315621189112582e-06, "loss": 0.6328, "step": 4462 }, { "epoch": 0.6656723096427772, "grad_norm": 2.4312217235565186, "learning_rate": 5.3113529173892945e-06, "loss": 0.6807, "step": 4463 }, { "epoch": 0.6658214631963607, "grad_norm": 1.8169423341751099, "learning_rate": 5.307085740260731e-06, "loss": 0.6506, "step": 4464 }, { "epoch": 0.6659706167499441, "grad_norm": 1.389554500579834, "learning_rate": 5.302819658723095e-06, "loss": 0.6678, "step": 4465 }, { "epoch": 0.6661197703035275, "grad_norm": 2.0191240310668945, "learning_rate": 5.29855467377233e-06, "loss": 0.7667, "step": 4466 }, { "epoch": 0.6662689238571109, "grad_norm": 1.5544750690460205, "learning_rate": 5.294290786404119e-06, "loss": 0.7256, "step": 4467 }, { "epoch": 0.6664180774106944, "grad_norm": 1.2893950939178467, "learning_rate": 5.290027997613898e-06, "loss": 0.7364, "step": 4468 }, { "epoch": 0.6665672309642777, "grad_norm": 2.7751972675323486, "learning_rate": 5.285766308396845e-06, "loss": 0.6973, "step": 4469 }, { "epoch": 0.6667163845178611, "grad_norm": 1.541918396949768, "learning_rate": 5.28150571974787e-06, "loss": 0.6642, "step": 4470 }, { "epoch": 0.6668655380714446, "grad_norm": 1.4370489120483398, "learning_rate": 5.277246232661641e-06, "loss": 0.737, "step": 4471 }, { "epoch": 0.6670146916250279, "grad_norm": 1.5606262683868408, "learning_rate": 5.272987848132562e-06, "loss": 0.6651, "step": 4472 }, { "epoch": 0.6671638451786114, "grad_norm": 1.767815113067627, "learning_rate": 5.268730567154778e-06, "loss": 0.7302, "step": 4473 }, { "epoch": 0.6673129987321947, "grad_norm": 1.9948269128799438, "learning_rate": 5.264474390722181e-06, "loss": 0.7377, "step": 4474 }, { "epoch": 0.6674621522857782, "grad_norm": 3.610396146774292, "learning_rate": 5.260219319828405e-06, "loss": 0.6936, "step": 4475 }, { "epoch": 0.6676113058393616, "grad_norm": 2.0315263271331787, "learning_rate": 5.2559653554668184e-06, "loss": 0.7137, "step": 4476 }, { "epoch": 0.667760459392945, "grad_norm": 2.364027738571167, "learning_rate": 5.251712498630537e-06, "loss": 0.7075, "step": 4477 }, { "epoch": 0.6679096129465284, "grad_norm": 2.066331624984741, "learning_rate": 5.24746075031242e-06, "loss": 0.63, "step": 4478 }, { "epoch": 0.6680587665001119, "grad_norm": 2.014753580093384, "learning_rate": 5.243210111505068e-06, "loss": 0.8215, "step": 4479 }, { "epoch": 0.6682079200536952, "grad_norm": 1.906602382659912, "learning_rate": 5.238960583200807e-06, "loss": 0.6609, "step": 4480 }, { "epoch": 0.6683570736072787, "grad_norm": 1.5053049325942993, "learning_rate": 5.234712166391735e-06, "loss": 0.7337, "step": 4481 }, { "epoch": 0.668506227160862, "grad_norm": 1.8446515798568726, "learning_rate": 5.230464862069658e-06, "loss": 0.6568, "step": 4482 }, { "epoch": 0.6686553807144455, "grad_norm": 1.299201488494873, "learning_rate": 5.226218671226142e-06, "loss": 0.737, "step": 4483 }, { "epoch": 0.6688045342680289, "grad_norm": 2.5208263397216797, "learning_rate": 5.221973594852488e-06, "loss": 0.7435, "step": 4484 }, { "epoch": 0.6689536878216124, "grad_norm": 1.7094491720199585, "learning_rate": 5.217729633939737e-06, "loss": 0.7496, "step": 4485 }, { "epoch": 0.6691028413751957, "grad_norm": 1.5567823648452759, "learning_rate": 5.213486789478665e-06, "loss": 0.6949, "step": 4486 }, { "epoch": 0.6692519949287792, "grad_norm": 0.711086630821228, "learning_rate": 5.209245062459791e-06, "loss": 0.2311, "step": 4487 }, { "epoch": 0.6694011484823625, "grad_norm": 1.6125377416610718, "learning_rate": 5.205004453873381e-06, "loss": 0.6767, "step": 4488 }, { "epoch": 0.669550302035946, "grad_norm": 1.9184825420379639, "learning_rate": 5.2007649647094195e-06, "loss": 0.6587, "step": 4489 }, { "epoch": 0.6696994555895294, "grad_norm": 1.6250901222229004, "learning_rate": 5.196526595957654e-06, "loss": 0.7142, "step": 4490 }, { "epoch": 0.6698486091431128, "grad_norm": 1.5786240100860596, "learning_rate": 5.192289348607557e-06, "loss": 0.6595, "step": 4491 }, { "epoch": 0.6699977626966962, "grad_norm": 1.533821940422058, "learning_rate": 5.188053223648337e-06, "loss": 0.736, "step": 4492 }, { "epoch": 0.6701469162502797, "grad_norm": 2.227269411087036, "learning_rate": 5.183818222068944e-06, "loss": 0.6299, "step": 4493 }, { "epoch": 0.670296069803863, "grad_norm": 2.6510868072509766, "learning_rate": 5.179584344858074e-06, "loss": 0.6737, "step": 4494 }, { "epoch": 0.6704452233574465, "grad_norm": 1.468064308166504, "learning_rate": 5.175351593004143e-06, "loss": 0.6246, "step": 4495 }, { "epoch": 0.6705943769110299, "grad_norm": 1.8979164361953735, "learning_rate": 5.171119967495319e-06, "loss": 0.7145, "step": 4496 }, { "epoch": 0.6707435304646133, "grad_norm": 1.4669830799102783, "learning_rate": 5.1668894693195045e-06, "loss": 0.639, "step": 4497 }, { "epoch": 0.6708926840181967, "grad_norm": 1.313421368598938, "learning_rate": 5.162660099464327e-06, "loss": 0.7585, "step": 4498 }, { "epoch": 0.6710418375717802, "grad_norm": 2.5665056705474854, "learning_rate": 5.158431858917169e-06, "loss": 0.776, "step": 4499 }, { "epoch": 0.6711909911253635, "grad_norm": 0.5351099967956543, "learning_rate": 5.1542047486651415e-06, "loss": 0.29, "step": 4500 }, { "epoch": 0.671340144678947, "grad_norm": 1.3235180377960205, "learning_rate": 5.149978769695084e-06, "loss": 0.6599, "step": 4501 }, { "epoch": 0.6714892982325303, "grad_norm": 2.1096720695495605, "learning_rate": 5.145753922993582e-06, "loss": 0.573, "step": 4502 }, { "epoch": 0.6716384517861138, "grad_norm": 0.4911508560180664, "learning_rate": 5.141530209546954e-06, "loss": 0.2335, "step": 4503 }, { "epoch": 0.6717876053396972, "grad_norm": 1.8578084707260132, "learning_rate": 5.137307630341248e-06, "loss": 0.6634, "step": 4504 }, { "epoch": 0.6719367588932806, "grad_norm": 1.6388272047042847, "learning_rate": 5.133086186362257e-06, "loss": 0.6633, "step": 4505 }, { "epoch": 0.672085912446864, "grad_norm": 1.7341417074203491, "learning_rate": 5.128865878595502e-06, "loss": 0.6926, "step": 4506 }, { "epoch": 0.6722350660004475, "grad_norm": 1.8133784532546997, "learning_rate": 5.124646708026241e-06, "loss": 0.6234, "step": 4507 }, { "epoch": 0.6723842195540308, "grad_norm": 1.9107444286346436, "learning_rate": 5.120428675639466e-06, "loss": 0.7943, "step": 4508 }, { "epoch": 0.6725333731076143, "grad_norm": 1.5586607456207275, "learning_rate": 5.116211782419911e-06, "loss": 0.5505, "step": 4509 }, { "epoch": 0.6726825266611977, "grad_norm": 2.7187726497650146, "learning_rate": 5.111996029352025e-06, "loss": 0.6877, "step": 4510 }, { "epoch": 0.6728316802147811, "grad_norm": 1.7824113368988037, "learning_rate": 5.107781417420008e-06, "loss": 0.6383, "step": 4511 }, { "epoch": 0.6729808337683645, "grad_norm": 1.696698546409607, "learning_rate": 5.103567947607788e-06, "loss": 0.7616, "step": 4512 }, { "epoch": 0.673129987321948, "grad_norm": 1.2763831615447998, "learning_rate": 5.099355620899032e-06, "loss": 0.6925, "step": 4513 }, { "epoch": 0.6732791408755313, "grad_norm": 2.145531177520752, "learning_rate": 5.095144438277124e-06, "loss": 0.6824, "step": 4514 }, { "epoch": 0.6734282944291148, "grad_norm": 1.2037262916564941, "learning_rate": 5.090934400725194e-06, "loss": 0.6927, "step": 4515 }, { "epoch": 0.6735774479826981, "grad_norm": 2.1955480575561523, "learning_rate": 5.086725509226111e-06, "loss": 0.7517, "step": 4516 }, { "epoch": 0.6737266015362816, "grad_norm": 2.092460870742798, "learning_rate": 5.08251776476246e-06, "loss": 0.7202, "step": 4517 }, { "epoch": 0.673875755089865, "grad_norm": 2.577289342880249, "learning_rate": 5.0783111683165676e-06, "loss": 0.6351, "step": 4518 }, { "epoch": 0.6740249086434484, "grad_norm": 1.6134214401245117, "learning_rate": 5.074105720870495e-06, "loss": 0.665, "step": 4519 }, { "epoch": 0.6741740621970318, "grad_norm": 0.5580251216888428, "learning_rate": 5.069901423406023e-06, "loss": 0.2546, "step": 4520 }, { "epoch": 0.6743232157506153, "grad_norm": 1.9448801279067993, "learning_rate": 5.065698276904676e-06, "loss": 0.6849, "step": 4521 }, { "epoch": 0.6744723693041986, "grad_norm": 2.315091609954834, "learning_rate": 5.061496282347709e-06, "loss": 0.5518, "step": 4522 }, { "epoch": 0.6746215228577821, "grad_norm": 3.3176565170288086, "learning_rate": 5.0572954407160954e-06, "loss": 0.692, "step": 4523 }, { "epoch": 0.6747706764113655, "grad_norm": 1.8562496900558472, "learning_rate": 5.0530957529905515e-06, "loss": 0.6712, "step": 4524 }, { "epoch": 0.6749198299649489, "grad_norm": 2.2315571308135986, "learning_rate": 5.048897220151532e-06, "loss": 0.6825, "step": 4525 }, { "epoch": 0.6750689835185323, "grad_norm": 2.399383306503296, "learning_rate": 5.044699843179197e-06, "loss": 0.7197, "step": 4526 }, { "epoch": 0.6752181370721158, "grad_norm": 1.3960278034210205, "learning_rate": 5.040503623053458e-06, "loss": 0.5973, "step": 4527 }, { "epoch": 0.6753672906256991, "grad_norm": 1.5118615627288818, "learning_rate": 5.036308560753955e-06, "loss": 0.7528, "step": 4528 }, { "epoch": 0.6755164441792826, "grad_norm": 1.8871530294418335, "learning_rate": 5.03211465726004e-06, "loss": 0.6778, "step": 4529 }, { "epoch": 0.6756655977328659, "grad_norm": 4.964208126068115, "learning_rate": 5.027921913550813e-06, "loss": 0.7305, "step": 4530 }, { "epoch": 0.6758147512864494, "grad_norm": 1.5002994537353516, "learning_rate": 5.023730330605095e-06, "loss": 0.7243, "step": 4531 }, { "epoch": 0.6759639048400328, "grad_norm": 2.454526662826538, "learning_rate": 5.019539909401445e-06, "loss": 0.639, "step": 4532 }, { "epoch": 0.6761130583936162, "grad_norm": 0.5099467039108276, "learning_rate": 5.015350650918129e-06, "loss": 0.2605, "step": 4533 }, { "epoch": 0.6762622119471996, "grad_norm": 1.6115611791610718, "learning_rate": 5.011162556133174e-06, "loss": 0.673, "step": 4534 }, { "epoch": 0.6764113655007831, "grad_norm": 1.493699312210083, "learning_rate": 5.006975626024304e-06, "loss": 0.6962, "step": 4535 }, { "epoch": 0.6765605190543664, "grad_norm": 0.5331735014915466, "learning_rate": 5.00278986156899e-06, "loss": 0.2517, "step": 4536 }, { "epoch": 0.6767096726079499, "grad_norm": 1.747756838798523, "learning_rate": 4.998605263744426e-06, "loss": 0.7093, "step": 4537 }, { "epoch": 0.6768588261615333, "grad_norm": 2.103405237197876, "learning_rate": 4.994421833527536e-06, "loss": 0.6209, "step": 4538 }, { "epoch": 0.6770079797151167, "grad_norm": 2.570828676223755, "learning_rate": 4.99023957189496e-06, "loss": 0.7506, "step": 4539 }, { "epoch": 0.6771571332687001, "grad_norm": 4.258380889892578, "learning_rate": 4.986058479823079e-06, "loss": 0.6603, "step": 4540 }, { "epoch": 0.6773062868222836, "grad_norm": 1.6926804780960083, "learning_rate": 4.9818785582880006e-06, "loss": 0.5667, "step": 4541 }, { "epoch": 0.6774554403758669, "grad_norm": 1.4645648002624512, "learning_rate": 4.9776998082655405e-06, "loss": 0.6956, "step": 4542 }, { "epoch": 0.6776045939294504, "grad_norm": 2.6049416065216064, "learning_rate": 4.973522230731267e-06, "loss": 0.7108, "step": 4543 }, { "epoch": 0.6777537474830337, "grad_norm": 2.4595770835876465, "learning_rate": 4.969345826660462e-06, "loss": 0.7822, "step": 4544 }, { "epoch": 0.6779029010366172, "grad_norm": 1.3491767644882202, "learning_rate": 4.9651705970281286e-06, "loss": 0.7787, "step": 4545 }, { "epoch": 0.6780520545902006, "grad_norm": 1.8972301483154297, "learning_rate": 4.960996542809001e-06, "loss": 0.6724, "step": 4546 }, { "epoch": 0.678201208143784, "grad_norm": 2.433553457260132, "learning_rate": 4.956823664977545e-06, "loss": 0.6991, "step": 4547 }, { "epoch": 0.6783503616973674, "grad_norm": 2.04032039642334, "learning_rate": 4.9526519645079376e-06, "loss": 0.6277, "step": 4548 }, { "epoch": 0.6784995152509509, "grad_norm": 2.532601833343506, "learning_rate": 4.948481442374093e-06, "loss": 0.6406, "step": 4549 }, { "epoch": 0.6786486688045342, "grad_norm": 1.7413110733032227, "learning_rate": 4.944312099549647e-06, "loss": 0.714, "step": 4550 }, { "epoch": 0.6787978223581177, "grad_norm": 3.7007620334625244, "learning_rate": 4.940143937007957e-06, "loss": 0.6368, "step": 4551 }, { "epoch": 0.6789469759117011, "grad_norm": 2.1213743686676025, "learning_rate": 4.935976955722109e-06, "loss": 0.6342, "step": 4552 }, { "epoch": 0.6790961294652845, "grad_norm": 1.7663294076919556, "learning_rate": 4.931811156664916e-06, "loss": 0.656, "step": 4553 }, { "epoch": 0.6792452830188679, "grad_norm": 1.6787015199661255, "learning_rate": 4.927646540808903e-06, "loss": 0.6564, "step": 4554 }, { "epoch": 0.6793944365724514, "grad_norm": 1.4577088356018066, "learning_rate": 4.923483109126328e-06, "loss": 0.6745, "step": 4555 }, { "epoch": 0.6795435901260347, "grad_norm": 0.5436162352561951, "learning_rate": 4.919320862589172e-06, "loss": 0.2542, "step": 4556 }, { "epoch": 0.6796927436796182, "grad_norm": 1.4416730403900146, "learning_rate": 4.915159802169143e-06, "loss": 0.701, "step": 4557 }, { "epoch": 0.6798418972332015, "grad_norm": 2.1256182193756104, "learning_rate": 4.910999928837656e-06, "loss": 0.7158, "step": 4558 }, { "epoch": 0.679991050786785, "grad_norm": 1.830815076828003, "learning_rate": 4.906841243565869e-06, "loss": 0.7488, "step": 4559 }, { "epoch": 0.6801402043403684, "grad_norm": 1.5445514917373657, "learning_rate": 4.90268374732465e-06, "loss": 0.6981, "step": 4560 }, { "epoch": 0.6802893578939518, "grad_norm": 1.3661521673202515, "learning_rate": 4.898527441084595e-06, "loss": 0.6903, "step": 4561 }, { "epoch": 0.6804385114475352, "grad_norm": 2.6519265174865723, "learning_rate": 4.894372325816019e-06, "loss": 0.6263, "step": 4562 }, { "epoch": 0.6805876650011187, "grad_norm": 3.0289931297302246, "learning_rate": 4.890218402488966e-06, "loss": 0.6807, "step": 4563 }, { "epoch": 0.680736818554702, "grad_norm": 1.3586912155151367, "learning_rate": 4.886065672073186e-06, "loss": 0.7241, "step": 4564 }, { "epoch": 0.6808859721082855, "grad_norm": 1.8510888814926147, "learning_rate": 4.881914135538166e-06, "loss": 0.7635, "step": 4565 }, { "epoch": 0.6810351256618689, "grad_norm": 1.5755443572998047, "learning_rate": 4.877763793853112e-06, "loss": 0.6622, "step": 4566 }, { "epoch": 0.6811842792154523, "grad_norm": 1.6679240465164185, "learning_rate": 4.8736146479869404e-06, "loss": 0.6515, "step": 4567 }, { "epoch": 0.6813334327690357, "grad_norm": 2.032113790512085, "learning_rate": 4.869466698908297e-06, "loss": 0.622, "step": 4568 }, { "epoch": 0.6814825863226192, "grad_norm": 1.4645836353302002, "learning_rate": 4.865319947585556e-06, "loss": 0.7413, "step": 4569 }, { "epoch": 0.6816317398762025, "grad_norm": 1.7383720874786377, "learning_rate": 4.861174394986795e-06, "loss": 0.6523, "step": 4570 }, { "epoch": 0.681780893429786, "grad_norm": 2.0257692337036133, "learning_rate": 4.857030042079821e-06, "loss": 0.5946, "step": 4571 }, { "epoch": 0.6819300469833693, "grad_norm": 1.6888623237609863, "learning_rate": 4.852886889832163e-06, "loss": 0.6449, "step": 4572 }, { "epoch": 0.6820792005369528, "grad_norm": 1.8313699960708618, "learning_rate": 4.848744939211062e-06, "loss": 0.6117, "step": 4573 }, { "epoch": 0.6822283540905362, "grad_norm": 1.2960141897201538, "learning_rate": 4.844604191183485e-06, "loss": 0.7448, "step": 4574 }, { "epoch": 0.6823775076441196, "grad_norm": 1.640860676765442, "learning_rate": 4.8404646467161184e-06, "loss": 0.5678, "step": 4575 }, { "epoch": 0.682526661197703, "grad_norm": 1.7687914371490479, "learning_rate": 4.836326306775357e-06, "loss": 0.7482, "step": 4576 }, { "epoch": 0.6826758147512865, "grad_norm": 1.561454176902771, "learning_rate": 4.832189172327333e-06, "loss": 0.6159, "step": 4577 }, { "epoch": 0.6828249683048698, "grad_norm": 3.0940113067626953, "learning_rate": 4.8280532443378855e-06, "loss": 0.6938, "step": 4578 }, { "epoch": 0.6829741218584533, "grad_norm": 1.7313040494918823, "learning_rate": 4.823918523772567e-06, "loss": 0.6538, "step": 4579 }, { "epoch": 0.6831232754120367, "grad_norm": 2.4776008129119873, "learning_rate": 4.819785011596659e-06, "loss": 0.6639, "step": 4580 }, { "epoch": 0.6832724289656201, "grad_norm": 1.6771782636642456, "learning_rate": 4.81565270877516e-06, "loss": 0.5686, "step": 4581 }, { "epoch": 0.6834215825192035, "grad_norm": 1.5788743495941162, "learning_rate": 4.811521616272774e-06, "loss": 0.7012, "step": 4582 }, { "epoch": 0.683570736072787, "grad_norm": 1.846442699432373, "learning_rate": 4.807391735053936e-06, "loss": 0.6052, "step": 4583 }, { "epoch": 0.6837198896263703, "grad_norm": 1.71721613407135, "learning_rate": 4.8032630660827914e-06, "loss": 0.6864, "step": 4584 }, { "epoch": 0.6838690431799538, "grad_norm": 1.5929064750671387, "learning_rate": 4.799135610323207e-06, "loss": 0.6973, "step": 4585 }, { "epoch": 0.6840181967335371, "grad_norm": 1.98740816116333, "learning_rate": 4.795009368738761e-06, "loss": 0.6867, "step": 4586 }, { "epoch": 0.6841673502871206, "grad_norm": 1.373948335647583, "learning_rate": 4.790884342292758e-06, "loss": 0.6923, "step": 4587 }, { "epoch": 0.684316503840704, "grad_norm": 1.601185917854309, "learning_rate": 4.7867605319482014e-06, "loss": 0.7276, "step": 4588 }, { "epoch": 0.6844656573942874, "grad_norm": 1.2216637134552002, "learning_rate": 4.782637938667825e-06, "loss": 0.7308, "step": 4589 }, { "epoch": 0.6846148109478708, "grad_norm": 1.504313588142395, "learning_rate": 4.778516563414078e-06, "loss": 0.7846, "step": 4590 }, { "epoch": 0.6847639645014543, "grad_norm": 1.8480186462402344, "learning_rate": 4.7743964071491224e-06, "loss": 0.6234, "step": 4591 }, { "epoch": 0.6849131180550376, "grad_norm": 2.5995213985443115, "learning_rate": 4.770277470834829e-06, "loss": 0.6041, "step": 4592 }, { "epoch": 0.6850622716086211, "grad_norm": 2.0116560459136963, "learning_rate": 4.766159755432793e-06, "loss": 0.7162, "step": 4593 }, { "epoch": 0.6852114251622045, "grad_norm": 1.78677499294281, "learning_rate": 4.762043261904321e-06, "loss": 0.6735, "step": 4594 }, { "epoch": 0.6853605787157879, "grad_norm": 1.940709114074707, "learning_rate": 4.757927991210436e-06, "loss": 0.6696, "step": 4595 }, { "epoch": 0.6855097322693713, "grad_norm": 1.9565117359161377, "learning_rate": 4.753813944311873e-06, "loss": 0.5751, "step": 4596 }, { "epoch": 0.6856588858229548, "grad_norm": 1.3355927467346191, "learning_rate": 4.749701122169089e-06, "loss": 0.6707, "step": 4597 }, { "epoch": 0.6858080393765381, "grad_norm": 1.2333154678344727, "learning_rate": 4.745589525742238e-06, "loss": 0.6632, "step": 4598 }, { "epoch": 0.6859571929301216, "grad_norm": 1.396898865699768, "learning_rate": 4.741479155991204e-06, "loss": 0.7264, "step": 4599 }, { "epoch": 0.6861063464837049, "grad_norm": 1.2409266233444214, "learning_rate": 4.737370013875583e-06, "loss": 0.8111, "step": 4600 }, { "epoch": 0.6862555000372884, "grad_norm": 1.9136013984680176, "learning_rate": 4.7332621003546716e-06, "loss": 0.6854, "step": 4601 }, { "epoch": 0.6864046535908718, "grad_norm": 2.182199001312256, "learning_rate": 4.729155416387495e-06, "loss": 0.6926, "step": 4602 }, { "epoch": 0.6865538071444552, "grad_norm": 1.3126243352890015, "learning_rate": 4.725049962932782e-06, "loss": 0.6928, "step": 4603 }, { "epoch": 0.6867029606980386, "grad_norm": 2.80901837348938, "learning_rate": 4.720945740948979e-06, "loss": 0.6201, "step": 4604 }, { "epoch": 0.6868521142516221, "grad_norm": 1.583672046661377, "learning_rate": 4.716842751394241e-06, "loss": 0.6268, "step": 4605 }, { "epoch": 0.6870012678052054, "grad_norm": 0.5252143144607544, "learning_rate": 4.7127409952264445e-06, "loss": 0.2708, "step": 4606 }, { "epoch": 0.6871504213587889, "grad_norm": 1.5048472881317139, "learning_rate": 4.70864047340316e-06, "loss": 0.6817, "step": 4607 }, { "epoch": 0.6872995749123723, "grad_norm": 2.2624847888946533, "learning_rate": 4.704541186881685e-06, "loss": 0.6833, "step": 4608 }, { "epoch": 0.6874487284659557, "grad_norm": 2.9987120628356934, "learning_rate": 4.700443136619024e-06, "loss": 0.7015, "step": 4609 }, { "epoch": 0.6875978820195391, "grad_norm": 1.483548641204834, "learning_rate": 4.696346323571899e-06, "loss": 0.6545, "step": 4610 }, { "epoch": 0.6877470355731226, "grad_norm": 1.73288094997406, "learning_rate": 4.692250748696723e-06, "loss": 0.6987, "step": 4611 }, { "epoch": 0.6878961891267059, "grad_norm": 1.8294559717178345, "learning_rate": 4.688156412949651e-06, "loss": 0.6878, "step": 4612 }, { "epoch": 0.6880453426802894, "grad_norm": 2.984530448913574, "learning_rate": 4.684063317286521e-06, "loss": 0.6757, "step": 4613 }, { "epoch": 0.6881944962338727, "grad_norm": 2.3688297271728516, "learning_rate": 4.679971462662896e-06, "loss": 0.6225, "step": 4614 }, { "epoch": 0.6883436497874562, "grad_norm": 1.2374690771102905, "learning_rate": 4.675880850034045e-06, "loss": 0.6617, "step": 4615 }, { "epoch": 0.6884928033410396, "grad_norm": 1.6371433734893799, "learning_rate": 4.67179148035495e-06, "loss": 0.6965, "step": 4616 }, { "epoch": 0.688641956894623, "grad_norm": 1.2002118825912476, "learning_rate": 4.667703354580297e-06, "loss": 0.7724, "step": 4617 }, { "epoch": 0.6887911104482064, "grad_norm": 1.4455115795135498, "learning_rate": 4.663616473664485e-06, "loss": 0.702, "step": 4618 }, { "epoch": 0.6889402640017899, "grad_norm": 2.5461347103118896, "learning_rate": 4.659530838561629e-06, "loss": 0.7703, "step": 4619 }, { "epoch": 0.6890894175553732, "grad_norm": 1.5194875001907349, "learning_rate": 4.6554464502255345e-06, "loss": 0.704, "step": 4620 }, { "epoch": 0.6892385711089567, "grad_norm": 1.673726201057434, "learning_rate": 4.65136330960974e-06, "loss": 0.6319, "step": 4621 }, { "epoch": 0.6893877246625401, "grad_norm": 1.9906342029571533, "learning_rate": 4.64728141766748e-06, "loss": 0.6516, "step": 4622 }, { "epoch": 0.6895368782161235, "grad_norm": 1.261168122291565, "learning_rate": 4.6432007753516904e-06, "loss": 0.7118, "step": 4623 }, { "epoch": 0.6896860317697069, "grad_norm": 1.7074706554412842, "learning_rate": 4.6391213836150284e-06, "loss": 0.6508, "step": 4624 }, { "epoch": 0.6898351853232904, "grad_norm": 0.5801412463188171, "learning_rate": 4.635043243409857e-06, "loss": 0.217, "step": 4625 }, { "epoch": 0.6899843388768737, "grad_norm": 0.5457383990287781, "learning_rate": 4.6309663556882365e-06, "loss": 0.2459, "step": 4626 }, { "epoch": 0.6901334924304572, "grad_norm": 2.979233980178833, "learning_rate": 4.626890721401948e-06, "loss": 0.6992, "step": 4627 }, { "epoch": 0.6902826459840405, "grad_norm": 1.794230580329895, "learning_rate": 4.622816341502475e-06, "loss": 0.7378, "step": 4628 }, { "epoch": 0.690431799537624, "grad_norm": 1.8717834949493408, "learning_rate": 4.618743216941e-06, "loss": 0.7456, "step": 4629 }, { "epoch": 0.6905809530912074, "grad_norm": 1.5112415552139282, "learning_rate": 4.614671348668429e-06, "loss": 0.7445, "step": 4630 }, { "epoch": 0.6907301066447908, "grad_norm": 1.4070017337799072, "learning_rate": 4.610600737635367e-06, "loss": 0.6749, "step": 4631 }, { "epoch": 0.6908792601983742, "grad_norm": 2.3177194595336914, "learning_rate": 4.606531384792114e-06, "loss": 0.6191, "step": 4632 }, { "epoch": 0.6910284137519577, "grad_norm": 1.4852030277252197, "learning_rate": 4.602463291088695e-06, "loss": 0.686, "step": 4633 }, { "epoch": 0.691177567305541, "grad_norm": 1.191622257232666, "learning_rate": 4.5983964574748315e-06, "loss": 0.6314, "step": 4634 }, { "epoch": 0.6913267208591245, "grad_norm": 3.7512459754943848, "learning_rate": 4.594330884899948e-06, "loss": 0.694, "step": 4635 }, { "epoch": 0.6914758744127079, "grad_norm": 2.552114725112915, "learning_rate": 4.59026657431318e-06, "loss": 0.6667, "step": 4636 }, { "epoch": 0.6916250279662913, "grad_norm": 1.2182300090789795, "learning_rate": 4.586203526663368e-06, "loss": 0.6744, "step": 4637 }, { "epoch": 0.6917741815198747, "grad_norm": 1.325941801071167, "learning_rate": 4.582141742899056e-06, "loss": 0.6858, "step": 4638 }, { "epoch": 0.6919233350734582, "grad_norm": 1.6104624271392822, "learning_rate": 4.578081223968494e-06, "loss": 0.7747, "step": 4639 }, { "epoch": 0.6920724886270415, "grad_norm": 1.666830062866211, "learning_rate": 4.574021970819635e-06, "loss": 0.667, "step": 4640 }, { "epoch": 0.692221642180625, "grad_norm": 1.5387407541275024, "learning_rate": 4.569963984400143e-06, "loss": 0.6617, "step": 4641 }, { "epoch": 0.6923707957342083, "grad_norm": 1.9295637607574463, "learning_rate": 4.565907265657372e-06, "loss": 0.7086, "step": 4642 }, { "epoch": 0.6925199492877918, "grad_norm": 1.7462129592895508, "learning_rate": 4.561851815538394e-06, "loss": 0.7728, "step": 4643 }, { "epoch": 0.6926691028413752, "grad_norm": 5.018485069274902, "learning_rate": 4.557797634989982e-06, "loss": 0.6844, "step": 4644 }, { "epoch": 0.6928182563949586, "grad_norm": 1.7330728769302368, "learning_rate": 4.553744724958605e-06, "loss": 0.6738, "step": 4645 }, { "epoch": 0.692967409948542, "grad_norm": 1.522827386856079, "learning_rate": 4.54969308639044e-06, "loss": 0.7314, "step": 4646 }, { "epoch": 0.6931165635021255, "grad_norm": 1.220299482345581, "learning_rate": 4.545642720231378e-06, "loss": 0.8066, "step": 4647 }, { "epoch": 0.6932657170557088, "grad_norm": 2.0719187259674072, "learning_rate": 4.541593627426993e-06, "loss": 0.6068, "step": 4648 }, { "epoch": 0.6934148706092923, "grad_norm": 2.1869940757751465, "learning_rate": 4.537545808922577e-06, "loss": 0.7144, "step": 4649 }, { "epoch": 0.6935640241628757, "grad_norm": 2.269731044769287, "learning_rate": 4.5334992656631184e-06, "loss": 0.764, "step": 4650 }, { "epoch": 0.6937131777164591, "grad_norm": 2.303900957107544, "learning_rate": 4.529453998593305e-06, "loss": 0.6379, "step": 4651 }, { "epoch": 0.6938623312700425, "grad_norm": 1.6528596878051758, "learning_rate": 4.525410008657534e-06, "loss": 0.7454, "step": 4652 }, { "epoch": 0.694011484823626, "grad_norm": 1.918312907218933, "learning_rate": 4.521367296799902e-06, "loss": 0.737, "step": 4653 }, { "epoch": 0.6941606383772093, "grad_norm": 1.5340315103530884, "learning_rate": 4.517325863964201e-06, "loss": 0.7526, "step": 4654 }, { "epoch": 0.6943097919307928, "grad_norm": 1.3858803510665894, "learning_rate": 4.5132857110939275e-06, "loss": 0.7934, "step": 4655 }, { "epoch": 0.6944589454843761, "grad_norm": 0.4916435182094574, "learning_rate": 4.509246839132294e-06, "loss": 0.2271, "step": 4656 }, { "epoch": 0.6946080990379596, "grad_norm": 1.856598138809204, "learning_rate": 4.5052092490221885e-06, "loss": 0.6306, "step": 4657 }, { "epoch": 0.694757252591543, "grad_norm": 1.275337815284729, "learning_rate": 4.501172941706218e-06, "loss": 0.7347, "step": 4658 }, { "epoch": 0.6949064061451264, "grad_norm": 1.3353900909423828, "learning_rate": 4.497137918126685e-06, "loss": 0.7167, "step": 4659 }, { "epoch": 0.6950555596987098, "grad_norm": 2.03244948387146, "learning_rate": 4.4931041792255855e-06, "loss": 0.7008, "step": 4660 }, { "epoch": 0.6952047132522933, "grad_norm": 2.317187547683716, "learning_rate": 4.489071725944627e-06, "loss": 0.6558, "step": 4661 }, { "epoch": 0.6953538668058766, "grad_norm": 0.5058684349060059, "learning_rate": 4.485040559225211e-06, "loss": 0.2313, "step": 4662 }, { "epoch": 0.6955030203594601, "grad_norm": 1.3792054653167725, "learning_rate": 4.48101068000844e-06, "loss": 0.6276, "step": 4663 }, { "epoch": 0.6956521739130435, "grad_norm": 1.205427646636963, "learning_rate": 4.476982089235109e-06, "loss": 0.6404, "step": 4664 }, { "epoch": 0.6958013274666269, "grad_norm": 0.5145953297615051, "learning_rate": 4.472954787845729e-06, "loss": 0.2514, "step": 4665 }, { "epoch": 0.6959504810202103, "grad_norm": 3.3088784217834473, "learning_rate": 4.468928776780489e-06, "loss": 0.6717, "step": 4666 }, { "epoch": 0.6960996345737938, "grad_norm": 1.923409104347229, "learning_rate": 4.464904056979293e-06, "loss": 0.7066, "step": 4667 }, { "epoch": 0.6962487881273771, "grad_norm": 1.8708903789520264, "learning_rate": 4.460880629381736e-06, "loss": 0.7292, "step": 4668 }, { "epoch": 0.6963979416809606, "grad_norm": 1.1461952924728394, "learning_rate": 4.456858494927116e-06, "loss": 0.6919, "step": 4669 }, { "epoch": 0.696547095234544, "grad_norm": 1.5305912494659424, "learning_rate": 4.452837654554419e-06, "loss": 0.6423, "step": 4670 }, { "epoch": 0.6966962487881274, "grad_norm": 4.147036552429199, "learning_rate": 4.448818109202341e-06, "loss": 0.7451, "step": 4671 }, { "epoch": 0.6968454023417108, "grad_norm": 1.6934434175491333, "learning_rate": 4.444799859809274e-06, "loss": 0.6949, "step": 4672 }, { "epoch": 0.6969945558952942, "grad_norm": 1.6084880828857422, "learning_rate": 4.440782907313291e-06, "loss": 0.6232, "step": 4673 }, { "epoch": 0.6971437094488776, "grad_norm": 2.148697853088379, "learning_rate": 4.436767252652189e-06, "loss": 0.6399, "step": 4674 }, { "epoch": 0.6972928630024611, "grad_norm": 6.002870082855225, "learning_rate": 4.432752896763447e-06, "loss": 0.7097, "step": 4675 }, { "epoch": 0.6974420165560444, "grad_norm": 1.716395616531372, "learning_rate": 4.428739840584235e-06, "loss": 0.7235, "step": 4676 }, { "epoch": 0.6975911701096279, "grad_norm": 1.3277884721755981, "learning_rate": 4.42472808505143e-06, "loss": 0.6467, "step": 4677 }, { "epoch": 0.6977403236632113, "grad_norm": 1.5515141487121582, "learning_rate": 4.420717631101607e-06, "loss": 0.7379, "step": 4678 }, { "epoch": 0.6978894772167947, "grad_norm": 1.7967392206192017, "learning_rate": 4.416708479671022e-06, "loss": 0.6902, "step": 4679 }, { "epoch": 0.6980386307703781, "grad_norm": 1.560671329498291, "learning_rate": 4.412700631695645e-06, "loss": 0.6686, "step": 4680 }, { "epoch": 0.6981877843239616, "grad_norm": 2.069873332977295, "learning_rate": 4.4086940881111294e-06, "loss": 0.7217, "step": 4681 }, { "epoch": 0.6983369378775449, "grad_norm": 1.3004437685012817, "learning_rate": 4.404688849852832e-06, "loss": 0.6363, "step": 4682 }, { "epoch": 0.6984860914311284, "grad_norm": 2.228435754776001, "learning_rate": 4.4006849178558e-06, "loss": 0.6823, "step": 4683 }, { "epoch": 0.6986352449847117, "grad_norm": 2.421523094177246, "learning_rate": 4.396682293054779e-06, "loss": 0.6653, "step": 4684 }, { "epoch": 0.6987843985382952, "grad_norm": 1.9754871129989624, "learning_rate": 4.392680976384204e-06, "loss": 0.7494, "step": 4685 }, { "epoch": 0.6989335520918786, "grad_norm": 1.5583831071853638, "learning_rate": 4.388680968778207e-06, "loss": 0.7179, "step": 4686 }, { "epoch": 0.699082705645462, "grad_norm": 1.5933129787445068, "learning_rate": 4.384682271170619e-06, "loss": 0.6553, "step": 4687 }, { "epoch": 0.6992318591990454, "grad_norm": 1.2807397842407227, "learning_rate": 4.380684884494965e-06, "loss": 0.6925, "step": 4688 }, { "epoch": 0.6993810127526289, "grad_norm": 1.441238522529602, "learning_rate": 4.376688809684452e-06, "loss": 0.7219, "step": 4689 }, { "epoch": 0.6995301663062122, "grad_norm": 2.9361610412597656, "learning_rate": 4.3726940476719925e-06, "loss": 0.6989, "step": 4690 }, { "epoch": 0.6996793198597957, "grad_norm": 1.3195685148239136, "learning_rate": 4.3687005993901895e-06, "loss": 0.7538, "step": 4691 }, { "epoch": 0.6998284734133791, "grad_norm": 1.5080928802490234, "learning_rate": 4.364708465771341e-06, "loss": 0.7299, "step": 4692 }, { "epoch": 0.6999776269669625, "grad_norm": 4.4935455322265625, "learning_rate": 4.360717647747434e-06, "loss": 0.6479, "step": 4693 }, { "epoch": 0.7001267805205459, "grad_norm": 1.5784097909927368, "learning_rate": 4.3567281462501555e-06, "loss": 0.65, "step": 4694 }, { "epoch": 0.7002759340741294, "grad_norm": 2.3991751670837402, "learning_rate": 4.352739962210872e-06, "loss": 0.6969, "step": 4695 }, { "epoch": 0.7004250876277127, "grad_norm": 1.3224393129348755, "learning_rate": 4.348753096560655e-06, "loss": 0.621, "step": 4696 }, { "epoch": 0.7005742411812962, "grad_norm": 2.7135300636291504, "learning_rate": 4.344767550230268e-06, "loss": 0.6923, "step": 4697 }, { "epoch": 0.7007233947348795, "grad_norm": 1.8810158967971802, "learning_rate": 4.340783324150153e-06, "loss": 0.7644, "step": 4698 }, { "epoch": 0.700872548288463, "grad_norm": 1.8964574337005615, "learning_rate": 4.3368004192504554e-06, "loss": 0.6342, "step": 4699 }, { "epoch": 0.7010217018420464, "grad_norm": 2.9491512775421143, "learning_rate": 4.332818836461019e-06, "loss": 0.7227, "step": 4700 }, { "epoch": 0.7011708553956298, "grad_norm": 1.6958675384521484, "learning_rate": 4.32883857671136e-06, "loss": 0.6664, "step": 4701 }, { "epoch": 0.7013200089492132, "grad_norm": 3.124800682067871, "learning_rate": 4.3248596409306995e-06, "loss": 0.6229, "step": 4702 }, { "epoch": 0.7014691625027967, "grad_norm": 1.4989665746688843, "learning_rate": 4.3208820300479495e-06, "loss": 0.7572, "step": 4703 }, { "epoch": 0.70161831605638, "grad_norm": 2.0648598670959473, "learning_rate": 4.316905744991699e-06, "loss": 0.6623, "step": 4704 }, { "epoch": 0.7017674696099635, "grad_norm": 1.773672103881836, "learning_rate": 4.312930786690244e-06, "loss": 0.7196, "step": 4705 }, { "epoch": 0.7019166231635469, "grad_norm": 2.0571134090423584, "learning_rate": 4.308957156071565e-06, "loss": 0.6913, "step": 4706 }, { "epoch": 0.7020657767171303, "grad_norm": 3.167879104614258, "learning_rate": 4.304984854063326e-06, "loss": 0.7143, "step": 4707 }, { "epoch": 0.7022149302707137, "grad_norm": 1.6163334846496582, "learning_rate": 4.301013881592885e-06, "loss": 0.7084, "step": 4708 }, { "epoch": 0.7023640838242972, "grad_norm": 3.724363327026367, "learning_rate": 4.297044239587304e-06, "loss": 0.6396, "step": 4709 }, { "epoch": 0.7025132373778805, "grad_norm": 1.4343218803405762, "learning_rate": 4.293075928973308e-06, "loss": 0.6879, "step": 4710 }, { "epoch": 0.702662390931464, "grad_norm": 2.1691830158233643, "learning_rate": 4.28910895067733e-06, "loss": 0.6402, "step": 4711 }, { "epoch": 0.7028115444850473, "grad_norm": 2.4877307415008545, "learning_rate": 4.285143305625489e-06, "loss": 0.6837, "step": 4712 }, { "epoch": 0.7029606980386308, "grad_norm": 1.7918410301208496, "learning_rate": 4.281178994743584e-06, "loss": 0.6783, "step": 4713 }, { "epoch": 0.7031098515922142, "grad_norm": 1.4420539140701294, "learning_rate": 4.277216018957112e-06, "loss": 0.777, "step": 4714 }, { "epoch": 0.7032590051457976, "grad_norm": 1.6689730882644653, "learning_rate": 4.273254379191255e-06, "loss": 0.7052, "step": 4715 }, { "epoch": 0.703408158699381, "grad_norm": 0.5155194997787476, "learning_rate": 4.269294076370884e-06, "loss": 0.2447, "step": 4716 }, { "epoch": 0.7035573122529645, "grad_norm": 1.3120684623718262, "learning_rate": 4.265335111420554e-06, "loss": 0.7192, "step": 4717 }, { "epoch": 0.7037064658065478, "grad_norm": 1.7005383968353271, "learning_rate": 4.26137748526452e-06, "loss": 0.7194, "step": 4718 }, { "epoch": 0.7038556193601313, "grad_norm": 1.7282484769821167, "learning_rate": 4.257421198826703e-06, "loss": 0.6859, "step": 4719 }, { "epoch": 0.7040047729137147, "grad_norm": 2.158315896987915, "learning_rate": 4.253466253030728e-06, "loss": 0.7071, "step": 4720 }, { "epoch": 0.7041539264672981, "grad_norm": 1.3246339559555054, "learning_rate": 4.249512648799904e-06, "loss": 0.7671, "step": 4721 }, { "epoch": 0.7043030800208815, "grad_norm": 3.3532252311706543, "learning_rate": 4.245560387057228e-06, "loss": 0.663, "step": 4722 }, { "epoch": 0.704452233574465, "grad_norm": 3.660529851913452, "learning_rate": 4.241609468725374e-06, "loss": 0.6656, "step": 4723 }, { "epoch": 0.7046013871280483, "grad_norm": 1.5308586359024048, "learning_rate": 4.2376598947267124e-06, "loss": 0.5974, "step": 4724 }, { "epoch": 0.7047505406816318, "grad_norm": 1.5714521408081055, "learning_rate": 4.233711665983297e-06, "loss": 0.6786, "step": 4725 }, { "epoch": 0.7048996942352151, "grad_norm": 1.7245597839355469, "learning_rate": 4.229764783416867e-06, "loss": 0.6326, "step": 4726 }, { "epoch": 0.7050488477887986, "grad_norm": 2.555037260055542, "learning_rate": 4.225819247948846e-06, "loss": 0.6721, "step": 4727 }, { "epoch": 0.705198001342382, "grad_norm": 1.9718531370162964, "learning_rate": 4.22187506050035e-06, "loss": 0.6792, "step": 4728 }, { "epoch": 0.7053471548959654, "grad_norm": 1.6969339847564697, "learning_rate": 4.2179322219921684e-06, "loss": 0.6415, "step": 4729 }, { "epoch": 0.7054963084495488, "grad_norm": 1.6587623357772827, "learning_rate": 4.213990733344783e-06, "loss": 0.7519, "step": 4730 }, { "epoch": 0.7056454620031323, "grad_norm": 1.2128292322158813, "learning_rate": 4.210050595478365e-06, "loss": 0.6783, "step": 4731 }, { "epoch": 0.7057946155567156, "grad_norm": 3.2326271533966064, "learning_rate": 4.206111809312757e-06, "loss": 0.5952, "step": 4732 }, { "epoch": 0.7059437691102991, "grad_norm": 2.6256771087646484, "learning_rate": 4.202174375767498e-06, "loss": 0.5835, "step": 4733 }, { "epoch": 0.7060929226638825, "grad_norm": 1.253093957901001, "learning_rate": 4.198238295761807e-06, "loss": 0.8185, "step": 4734 }, { "epoch": 0.7062420762174659, "grad_norm": 1.5911357402801514, "learning_rate": 4.194303570214586e-06, "loss": 0.6537, "step": 4735 }, { "epoch": 0.7063912297710493, "grad_norm": 1.8024919033050537, "learning_rate": 4.1903702000444235e-06, "loss": 0.7845, "step": 4736 }, { "epoch": 0.7065403833246328, "grad_norm": 1.8052432537078857, "learning_rate": 4.1864381861695934e-06, "loss": 0.6487, "step": 4737 }, { "epoch": 0.7066895368782161, "grad_norm": 1.7043884992599487, "learning_rate": 4.182507529508042e-06, "loss": 0.6313, "step": 4738 }, { "epoch": 0.7068386904317996, "grad_norm": 1.2263211011886597, "learning_rate": 4.178578230977409e-06, "loss": 0.6903, "step": 4739 }, { "epoch": 0.706987843985383, "grad_norm": 1.7097184658050537, "learning_rate": 4.174650291495015e-06, "loss": 0.6594, "step": 4740 }, { "epoch": 0.7071369975389664, "grad_norm": 0.5012102723121643, "learning_rate": 4.170723711977867e-06, "loss": 0.2531, "step": 4741 }, { "epoch": 0.7072861510925498, "grad_norm": 1.6003881692886353, "learning_rate": 4.166798493342642e-06, "loss": 0.7451, "step": 4742 }, { "epoch": 0.7074353046461332, "grad_norm": 1.4633945226669312, "learning_rate": 4.162874636505713e-06, "loss": 0.6788, "step": 4743 }, { "epoch": 0.7075844581997166, "grad_norm": 2.022059679031372, "learning_rate": 4.1589521423831254e-06, "loss": 0.6824, "step": 4744 }, { "epoch": 0.7077336117533001, "grad_norm": 2.711886405944824, "learning_rate": 4.1550310118906145e-06, "loss": 0.6771, "step": 4745 }, { "epoch": 0.7078827653068834, "grad_norm": 1.8920217752456665, "learning_rate": 4.151111245943592e-06, "loss": 0.6767, "step": 4746 }, { "epoch": 0.7080319188604669, "grad_norm": 1.2822151184082031, "learning_rate": 4.1471928454571565e-06, "loss": 0.6213, "step": 4747 }, { "epoch": 0.7081810724140503, "grad_norm": 2.3437044620513916, "learning_rate": 4.143275811346076e-06, "loss": 0.694, "step": 4748 }, { "epoch": 0.7083302259676337, "grad_norm": 2.629436731338501, "learning_rate": 4.13936014452481e-06, "loss": 0.6724, "step": 4749 }, { "epoch": 0.7084793795212171, "grad_norm": 1.2776457071304321, "learning_rate": 4.1354458459075005e-06, "loss": 0.6787, "step": 4750 }, { "epoch": 0.7086285330748006, "grad_norm": 1.4009428024291992, "learning_rate": 4.131532916407955e-06, "loss": 0.6736, "step": 4751 }, { "epoch": 0.7087776866283839, "grad_norm": 1.7578363418579102, "learning_rate": 4.127621356939683e-06, "loss": 0.6899, "step": 4752 }, { "epoch": 0.7089268401819674, "grad_norm": 0.5482547283172607, "learning_rate": 4.1237111684158625e-06, "loss": 0.2445, "step": 4753 }, { "epoch": 0.7090759937355507, "grad_norm": 2.013422966003418, "learning_rate": 4.119802351749346e-06, "loss": 0.6178, "step": 4754 }, { "epoch": 0.7092251472891342, "grad_norm": 1.5243746042251587, "learning_rate": 4.1158949078526734e-06, "loss": 0.6772, "step": 4755 }, { "epoch": 0.7093743008427176, "grad_norm": 1.5281425714492798, "learning_rate": 4.111988837638067e-06, "loss": 0.7521, "step": 4756 }, { "epoch": 0.709523454396301, "grad_norm": 1.9295631647109985, "learning_rate": 4.1080841420174175e-06, "loss": 0.7539, "step": 4757 }, { "epoch": 0.7096726079498844, "grad_norm": 1.5014079809188843, "learning_rate": 4.104180821902305e-06, "loss": 0.6144, "step": 4758 }, { "epoch": 0.7098217615034679, "grad_norm": 2.532740592956543, "learning_rate": 4.100278878203986e-06, "loss": 0.689, "step": 4759 }, { "epoch": 0.7099709150570512, "grad_norm": 1.4906775951385498, "learning_rate": 4.096378311833386e-06, "loss": 0.6565, "step": 4760 }, { "epoch": 0.7101200686106347, "grad_norm": 1.2089900970458984, "learning_rate": 4.092479123701126e-06, "loss": 0.6553, "step": 4761 }, { "epoch": 0.7102692221642181, "grad_norm": 2.2200753688812256, "learning_rate": 4.088581314717498e-06, "loss": 0.6706, "step": 4762 }, { "epoch": 0.7104183757178015, "grad_norm": 2.7608273029327393, "learning_rate": 4.084684885792462e-06, "loss": 0.7562, "step": 4763 }, { "epoch": 0.7105675292713849, "grad_norm": 1.7012124061584473, "learning_rate": 4.08078983783567e-06, "loss": 0.6244, "step": 4764 }, { "epoch": 0.7107166828249684, "grad_norm": 3.452840566635132, "learning_rate": 4.076896171756444e-06, "loss": 0.6438, "step": 4765 }, { "epoch": 0.7108658363785517, "grad_norm": 1.6139156818389893, "learning_rate": 4.073003888463789e-06, "loss": 0.6679, "step": 4766 }, { "epoch": 0.7110149899321352, "grad_norm": 1.7300487756729126, "learning_rate": 4.069112988866377e-06, "loss": 0.6958, "step": 4767 }, { "epoch": 0.7111641434857185, "grad_norm": 1.524121642112732, "learning_rate": 4.065223473872567e-06, "loss": 0.6232, "step": 4768 }, { "epoch": 0.7113132970393019, "grad_norm": 1.5990604162216187, "learning_rate": 4.061335344390391e-06, "loss": 0.7662, "step": 4769 }, { "epoch": 0.7114624505928854, "grad_norm": 1.2594093084335327, "learning_rate": 4.0574486013275586e-06, "loss": 0.686, "step": 4770 }, { "epoch": 0.7116116041464687, "grad_norm": 2.631582736968994, "learning_rate": 4.053563245591452e-06, "loss": 0.7371, "step": 4771 }, { "epoch": 0.7117607577000522, "grad_norm": 2.0418033599853516, "learning_rate": 4.049679278089139e-06, "loss": 0.7895, "step": 4772 }, { "epoch": 0.7119099112536356, "grad_norm": 1.3257232904434204, "learning_rate": 4.045796699727349e-06, "loss": 0.6607, "step": 4773 }, { "epoch": 0.712059064807219, "grad_norm": 1.5659743547439575, "learning_rate": 4.0419155114124985e-06, "loss": 0.7196, "step": 4774 }, { "epoch": 0.7122082183608024, "grad_norm": 1.8577489852905273, "learning_rate": 4.038035714050678e-06, "loss": 0.6601, "step": 4775 }, { "epoch": 0.7123573719143859, "grad_norm": 1.332371711730957, "learning_rate": 4.034157308547645e-06, "loss": 0.6684, "step": 4776 }, { "epoch": 0.7125065254679692, "grad_norm": 2.043855667114258, "learning_rate": 4.030280295808838e-06, "loss": 0.7237, "step": 4777 }, { "epoch": 0.7126556790215527, "grad_norm": 1.9596699476242065, "learning_rate": 4.0264046767393815e-06, "loss": 0.6873, "step": 4778 }, { "epoch": 0.712804832575136, "grad_norm": 1.9085620641708374, "learning_rate": 4.022530452244052e-06, "loss": 0.5968, "step": 4779 }, { "epoch": 0.7129539861287195, "grad_norm": 1.3225762844085693, "learning_rate": 4.018657623227317e-06, "loss": 0.7592, "step": 4780 }, { "epoch": 0.7131031396823029, "grad_norm": 1.9454847574234009, "learning_rate": 4.0147861905933146e-06, "loss": 0.6806, "step": 4781 }, { "epoch": 0.7132522932358863, "grad_norm": 1.8581095933914185, "learning_rate": 4.010916155245851e-06, "loss": 0.7534, "step": 4782 }, { "epoch": 0.7134014467894697, "grad_norm": 1.8155044317245483, "learning_rate": 4.007047518088413e-06, "loss": 0.6784, "step": 4783 }, { "epoch": 0.7135506003430532, "grad_norm": 2.6568357944488525, "learning_rate": 4.003180280024163e-06, "loss": 0.6799, "step": 4784 }, { "epoch": 0.7136997538966365, "grad_norm": 1.8338370323181152, "learning_rate": 3.9993144419559234e-06, "loss": 0.7628, "step": 4785 }, { "epoch": 0.71384890745022, "grad_norm": 1.3471972942352295, "learning_rate": 3.995450004786201e-06, "loss": 0.7139, "step": 4786 }, { "epoch": 0.7139980610038034, "grad_norm": 1.9797979593276978, "learning_rate": 3.991586969417184e-06, "loss": 0.5699, "step": 4787 }, { "epoch": 0.7141472145573868, "grad_norm": 1.5451158285140991, "learning_rate": 3.9877253367507104e-06, "loss": 0.7627, "step": 4788 }, { "epoch": 0.7142963681109702, "grad_norm": 1.865011215209961, "learning_rate": 3.98386510768831e-06, "loss": 0.6503, "step": 4789 }, { "epoch": 0.7144455216645537, "grad_norm": 1.7070443630218506, "learning_rate": 3.980006283131178e-06, "loss": 0.6523, "step": 4790 }, { "epoch": 0.714594675218137, "grad_norm": 1.369995355606079, "learning_rate": 3.976148863980176e-06, "loss": 0.7255, "step": 4791 }, { "epoch": 0.7147438287717205, "grad_norm": 1.4655377864837646, "learning_rate": 3.972292851135847e-06, "loss": 0.6416, "step": 4792 }, { "epoch": 0.7148929823253038, "grad_norm": 1.1777821779251099, "learning_rate": 3.9684382454984015e-06, "loss": 0.6619, "step": 4793 }, { "epoch": 0.7150421358788873, "grad_norm": 1.4671989679336548, "learning_rate": 3.9645850479677264e-06, "loss": 0.7088, "step": 4794 }, { "epoch": 0.7151912894324707, "grad_norm": 1.5300490856170654, "learning_rate": 3.960733259443365e-06, "loss": 0.6006, "step": 4795 }, { "epoch": 0.7153404429860541, "grad_norm": 1.1821588277816772, "learning_rate": 3.956882880824553e-06, "loss": 0.6527, "step": 4796 }, { "epoch": 0.7154895965396375, "grad_norm": 1.7145073413848877, "learning_rate": 3.953033913010179e-06, "loss": 0.6625, "step": 4797 }, { "epoch": 0.715638750093221, "grad_norm": 1.8942207098007202, "learning_rate": 3.949186356898811e-06, "loss": 0.7603, "step": 4798 }, { "epoch": 0.7157879036468043, "grad_norm": 1.6523650884628296, "learning_rate": 3.945340213388687e-06, "loss": 0.6312, "step": 4799 }, { "epoch": 0.7159370572003878, "grad_norm": 2.432328939437866, "learning_rate": 3.941495483377714e-06, "loss": 0.6733, "step": 4800 }, { "epoch": 0.7160862107539712, "grad_norm": 0.5665059089660645, "learning_rate": 3.937652167763466e-06, "loss": 0.2588, "step": 4801 }, { "epoch": 0.7162353643075546, "grad_norm": 0.5216538906097412, "learning_rate": 3.933810267443191e-06, "loss": 0.2324, "step": 4802 }, { "epoch": 0.716384517861138, "grad_norm": 1.5149363279342651, "learning_rate": 3.9299697833138094e-06, "loss": 0.5851, "step": 4803 }, { "epoch": 0.7165336714147215, "grad_norm": 1.5894584655761719, "learning_rate": 3.926130716271896e-06, "loss": 0.6929, "step": 4804 }, { "epoch": 0.7166828249683048, "grad_norm": 1.72272789478302, "learning_rate": 3.9222930672137175e-06, "loss": 0.6527, "step": 4805 }, { "epoch": 0.7168319785218883, "grad_norm": 1.3177356719970703, "learning_rate": 3.918456837035195e-06, "loss": 0.7978, "step": 4806 }, { "epoch": 0.7169811320754716, "grad_norm": 1.196113109588623, "learning_rate": 3.914622026631916e-06, "loss": 0.757, "step": 4807 }, { "epoch": 0.7171302856290551, "grad_norm": 2.026520252227783, "learning_rate": 3.910788636899143e-06, "loss": 0.7254, "step": 4808 }, { "epoch": 0.7172794391826385, "grad_norm": 2.156013011932373, "learning_rate": 3.906956668731813e-06, "loss": 0.6896, "step": 4809 }, { "epoch": 0.717428592736222, "grad_norm": 1.6463013887405396, "learning_rate": 3.903126123024512e-06, "loss": 0.6246, "step": 4810 }, { "epoch": 0.7175777462898053, "grad_norm": 1.8732985258102417, "learning_rate": 3.899297000671511e-06, "loss": 0.7193, "step": 4811 }, { "epoch": 0.7177268998433888, "grad_norm": 2.4012672901153564, "learning_rate": 3.895469302566745e-06, "loss": 0.6661, "step": 4812 }, { "epoch": 0.7178760533969721, "grad_norm": 1.6134401559829712, "learning_rate": 3.891643029603811e-06, "loss": 0.6741, "step": 4813 }, { "epoch": 0.7180252069505556, "grad_norm": 2.198369264602661, "learning_rate": 3.88781818267598e-06, "loss": 0.7202, "step": 4814 }, { "epoch": 0.718174360504139, "grad_norm": 1.6608952283859253, "learning_rate": 3.883994762676189e-06, "loss": 0.7161, "step": 4815 }, { "epoch": 0.7183235140577224, "grad_norm": 0.4804899990558624, "learning_rate": 3.880172770497033e-06, "loss": 0.2615, "step": 4816 }, { "epoch": 0.7184726676113058, "grad_norm": 2.029961585998535, "learning_rate": 3.8763522070307835e-06, "loss": 0.7027, "step": 4817 }, { "epoch": 0.7186218211648893, "grad_norm": 1.3294658660888672, "learning_rate": 3.872533073169377e-06, "loss": 0.6439, "step": 4818 }, { "epoch": 0.7187709747184726, "grad_norm": 1.5531938076019287, "learning_rate": 3.868715369804418e-06, "loss": 0.7404, "step": 4819 }, { "epoch": 0.7189201282720561, "grad_norm": 1.5382217168807983, "learning_rate": 3.8648990978271646e-06, "loss": 0.649, "step": 4820 }, { "epoch": 0.7190692818256395, "grad_norm": 0.5343941450119019, "learning_rate": 3.861084258128558e-06, "loss": 0.2515, "step": 4821 }, { "epoch": 0.7192184353792229, "grad_norm": 1.5202596187591553, "learning_rate": 3.857270851599193e-06, "loss": 0.6873, "step": 4822 }, { "epoch": 0.7193675889328063, "grad_norm": 1.4478263854980469, "learning_rate": 3.853458879129335e-06, "loss": 0.7484, "step": 4823 }, { "epoch": 0.7195167424863897, "grad_norm": 1.5823311805725098, "learning_rate": 3.849648341608914e-06, "loss": 0.6727, "step": 4824 }, { "epoch": 0.7196658960399731, "grad_norm": 1.8822031021118164, "learning_rate": 3.845839239927527e-06, "loss": 0.7562, "step": 4825 }, { "epoch": 0.7198150495935566, "grad_norm": 1.615904450416565, "learning_rate": 3.842031574974426e-06, "loss": 0.7069, "step": 4826 }, { "epoch": 0.7199642031471399, "grad_norm": 1.6460202932357788, "learning_rate": 3.83822534763854e-06, "loss": 0.6759, "step": 4827 }, { "epoch": 0.7201133567007234, "grad_norm": 2.128080129623413, "learning_rate": 3.834420558808459e-06, "loss": 0.6609, "step": 4828 }, { "epoch": 0.7202625102543068, "grad_norm": 1.6809730529785156, "learning_rate": 3.830617209372429e-06, "loss": 0.6957, "step": 4829 }, { "epoch": 0.7204116638078902, "grad_norm": 1.6809113025665283, "learning_rate": 3.826815300218367e-06, "loss": 0.6792, "step": 4830 }, { "epoch": 0.7205608173614736, "grad_norm": 2.813279867172241, "learning_rate": 3.8230148322338625e-06, "loss": 0.6038, "step": 4831 }, { "epoch": 0.7207099709150571, "grad_norm": 2.2278807163238525, "learning_rate": 3.819215806306148e-06, "loss": 0.6619, "step": 4832 }, { "epoch": 0.7208591244686404, "grad_norm": 1.789292812347412, "learning_rate": 3.815418223322136e-06, "loss": 0.7074, "step": 4833 }, { "epoch": 0.7210082780222239, "grad_norm": 3.7808430194854736, "learning_rate": 3.811622084168399e-06, "loss": 0.6527, "step": 4834 }, { "epoch": 0.7211574315758073, "grad_norm": 1.699558138847351, "learning_rate": 3.8078273897311626e-06, "loss": 0.6876, "step": 4835 }, { "epoch": 0.7213065851293907, "grad_norm": 1.7283191680908203, "learning_rate": 3.8040341408963265e-06, "loss": 0.6381, "step": 4836 }, { "epoch": 0.7214557386829741, "grad_norm": 1.445152997970581, "learning_rate": 3.8002423385494534e-06, "loss": 0.6712, "step": 4837 }, { "epoch": 0.7216048922365575, "grad_norm": 2.634998321533203, "learning_rate": 3.7964519835757554e-06, "loss": 0.5575, "step": 4838 }, { "epoch": 0.7217540457901409, "grad_norm": 1.5241069793701172, "learning_rate": 3.792663076860116e-06, "loss": 0.7419, "step": 4839 }, { "epoch": 0.7219031993437244, "grad_norm": 1.7332139015197754, "learning_rate": 3.788875619287089e-06, "loss": 0.7086, "step": 4840 }, { "epoch": 0.7220523528973077, "grad_norm": 1.5402952432632446, "learning_rate": 3.785089611740872e-06, "loss": 0.6334, "step": 4841 }, { "epoch": 0.7222015064508912, "grad_norm": 1.3145670890808105, "learning_rate": 3.7813050551053344e-06, "loss": 0.6968, "step": 4842 }, { "epoch": 0.7223506600044746, "grad_norm": 1.2229820489883423, "learning_rate": 3.7775219502640105e-06, "loss": 0.7513, "step": 4843 }, { "epoch": 0.722499813558058, "grad_norm": 1.4598931074142456, "learning_rate": 3.7737402981000827e-06, "loss": 0.6205, "step": 4844 }, { "epoch": 0.7226489671116414, "grad_norm": 2.139437437057495, "learning_rate": 3.7699600994964046e-06, "loss": 0.6456, "step": 4845 }, { "epoch": 0.7227981206652249, "grad_norm": 1.138352632522583, "learning_rate": 3.766181355335489e-06, "loss": 0.7907, "step": 4846 }, { "epoch": 0.7229472742188082, "grad_norm": 1.6479636430740356, "learning_rate": 3.7624040664995075e-06, "loss": 0.6276, "step": 4847 }, { "epoch": 0.7230964277723917, "grad_norm": 3.805224657058716, "learning_rate": 3.7586282338702918e-06, "loss": 0.7283, "step": 4848 }, { "epoch": 0.723245581325975, "grad_norm": 1.5246987342834473, "learning_rate": 3.754853858329336e-06, "loss": 0.7394, "step": 4849 }, { "epoch": 0.7233947348795585, "grad_norm": 1.519824504852295, "learning_rate": 3.7510809407577932e-06, "loss": 0.6441, "step": 4850 }, { "epoch": 0.7235438884331419, "grad_norm": 0.526759147644043, "learning_rate": 3.7473094820364707e-06, "loss": 0.2316, "step": 4851 }, { "epoch": 0.7236930419867253, "grad_norm": 1.6079243421554565, "learning_rate": 3.7435394830458414e-06, "loss": 0.697, "step": 4852 }, { "epoch": 0.7238421955403087, "grad_norm": 1.7443846464157104, "learning_rate": 3.73977094466604e-06, "loss": 0.5633, "step": 4853 }, { "epoch": 0.7239913490938922, "grad_norm": 1.291777491569519, "learning_rate": 3.7360038677768495e-06, "loss": 0.7212, "step": 4854 }, { "epoch": 0.7241405026474755, "grad_norm": 0.5313031077384949, "learning_rate": 3.7322382532577206e-06, "loss": 0.2394, "step": 4855 }, { "epoch": 0.724289656201059, "grad_norm": 0.5955619812011719, "learning_rate": 3.72847410198776e-06, "loss": 0.2555, "step": 4856 }, { "epoch": 0.7244388097546424, "grad_norm": 1.8597893714904785, "learning_rate": 3.7247114148457342e-06, "loss": 0.7759, "step": 4857 }, { "epoch": 0.7245879633082258, "grad_norm": 1.1972180604934692, "learning_rate": 3.7209501927100666e-06, "loss": 0.7036, "step": 4858 }, { "epoch": 0.7247371168618092, "grad_norm": 1.5687410831451416, "learning_rate": 3.7171904364588405e-06, "loss": 0.6985, "step": 4859 }, { "epoch": 0.7248862704153927, "grad_norm": 1.5539051294326782, "learning_rate": 3.7134321469697886e-06, "loss": 0.751, "step": 4860 }, { "epoch": 0.725035423968976, "grad_norm": 1.7765949964523315, "learning_rate": 3.7096753251203134e-06, "loss": 0.6628, "step": 4861 }, { "epoch": 0.7251845775225595, "grad_norm": 1.2591749429702759, "learning_rate": 3.7059199717874693e-06, "loss": 0.7277, "step": 4862 }, { "epoch": 0.7253337310761429, "grad_norm": 3.262563943862915, "learning_rate": 3.7021660878479628e-06, "loss": 0.6566, "step": 4863 }, { "epoch": 0.7254828846297263, "grad_norm": 1.3418991565704346, "learning_rate": 3.698413674178165e-06, "loss": 0.7241, "step": 4864 }, { "epoch": 0.7256320381833097, "grad_norm": 1.6909159421920776, "learning_rate": 3.6946627316541017e-06, "loss": 0.7247, "step": 4865 }, { "epoch": 0.7257811917368931, "grad_norm": 1.2781214714050293, "learning_rate": 3.690913261151453e-06, "loss": 0.6963, "step": 4866 }, { "epoch": 0.7259303452904765, "grad_norm": 1.7492492198944092, "learning_rate": 3.6871652635455577e-06, "loss": 0.6313, "step": 4867 }, { "epoch": 0.72607949884406, "grad_norm": 1.3744221925735474, "learning_rate": 3.683418739711413e-06, "loss": 0.6257, "step": 4868 }, { "epoch": 0.7262286523976433, "grad_norm": 1.6885571479797363, "learning_rate": 3.6796736905236618e-06, "loss": 0.6537, "step": 4869 }, { "epoch": 0.7263778059512268, "grad_norm": 1.905976414680481, "learning_rate": 3.6759301168566152e-06, "loss": 0.7165, "step": 4870 }, { "epoch": 0.7265269595048102, "grad_norm": 3.3835532665252686, "learning_rate": 3.6721880195842317e-06, "loss": 0.686, "step": 4871 }, { "epoch": 0.7266761130583936, "grad_norm": 1.9282557964324951, "learning_rate": 3.668447399580133e-06, "loss": 0.6226, "step": 4872 }, { "epoch": 0.726825266611977, "grad_norm": 1.1920303106307983, "learning_rate": 3.664708257717583e-06, "loss": 0.7852, "step": 4873 }, { "epoch": 0.7269744201655605, "grad_norm": 1.3234282732009888, "learning_rate": 3.660970594869513e-06, "loss": 0.6644, "step": 4874 }, { "epoch": 0.7271235737191438, "grad_norm": 1.4736438989639282, "learning_rate": 3.6572344119085033e-06, "loss": 0.705, "step": 4875 }, { "epoch": 0.7272727272727273, "grad_norm": 1.9430279731750488, "learning_rate": 3.6534997097067913e-06, "loss": 0.6503, "step": 4876 }, { "epoch": 0.7274218808263107, "grad_norm": 1.3839378356933594, "learning_rate": 3.649766489136265e-06, "loss": 0.7493, "step": 4877 }, { "epoch": 0.7275710343798941, "grad_norm": 2.431945323944092, "learning_rate": 3.6460347510684736e-06, "loss": 0.6969, "step": 4878 }, { "epoch": 0.7277201879334775, "grad_norm": 1.9144729375839233, "learning_rate": 3.642304496374608e-06, "loss": 0.6668, "step": 4879 }, { "epoch": 0.727869341487061, "grad_norm": 1.5535203218460083, "learning_rate": 3.638575725925523e-06, "loss": 0.7119, "step": 4880 }, { "epoch": 0.7280184950406443, "grad_norm": 1.8989776372909546, "learning_rate": 3.634848440591728e-06, "loss": 0.5968, "step": 4881 }, { "epoch": 0.7281676485942278, "grad_norm": 1.643371343612671, "learning_rate": 3.631122641243372e-06, "loss": 0.6728, "step": 4882 }, { "epoch": 0.7283168021478111, "grad_norm": 1.4787664413452148, "learning_rate": 3.6273983287502756e-06, "loss": 0.6237, "step": 4883 }, { "epoch": 0.7284659557013946, "grad_norm": 1.5521117448806763, "learning_rate": 3.623675503981905e-06, "loss": 0.6099, "step": 4884 }, { "epoch": 0.728615109254978, "grad_norm": 1.5876655578613281, "learning_rate": 3.619954167807369e-06, "loss": 0.6837, "step": 4885 }, { "epoch": 0.7287642628085614, "grad_norm": 2.261718273162842, "learning_rate": 3.616234321095441e-06, "loss": 0.7145, "step": 4886 }, { "epoch": 0.7289134163621448, "grad_norm": 2.801173210144043, "learning_rate": 3.612515964714548e-06, "loss": 0.6702, "step": 4887 }, { "epoch": 0.7290625699157283, "grad_norm": 1.449162244796753, "learning_rate": 3.608799099532757e-06, "loss": 0.6198, "step": 4888 }, { "epoch": 0.7292117234693116, "grad_norm": 2.0348546504974365, "learning_rate": 3.6050837264177952e-06, "loss": 0.6749, "step": 4889 }, { "epoch": 0.7293608770228951, "grad_norm": 2.072033405303955, "learning_rate": 3.6013698462370426e-06, "loss": 0.7219, "step": 4890 }, { "epoch": 0.7295100305764785, "grad_norm": 1.361330509185791, "learning_rate": 3.5976574598575288e-06, "loss": 0.7731, "step": 4891 }, { "epoch": 0.7296591841300619, "grad_norm": 1.8661701679229736, "learning_rate": 3.593946568145932e-06, "loss": 0.6921, "step": 4892 }, { "epoch": 0.7298083376836453, "grad_norm": 1.2891581058502197, "learning_rate": 3.590237171968588e-06, "loss": 0.7226, "step": 4893 }, { "epoch": 0.7299574912372288, "grad_norm": 1.5752534866333008, "learning_rate": 3.5865292721914724e-06, "loss": 0.7097, "step": 4894 }, { "epoch": 0.7301066447908121, "grad_norm": 2.197068452835083, "learning_rate": 3.5828228696802226e-06, "loss": 0.6295, "step": 4895 }, { "epoch": 0.7302557983443956, "grad_norm": 1.3552398681640625, "learning_rate": 3.5791179653001195e-06, "loss": 0.6729, "step": 4896 }, { "epoch": 0.7304049518979789, "grad_norm": 2.0226709842681885, "learning_rate": 3.5754145599161026e-06, "loss": 0.6522, "step": 4897 }, { "epoch": 0.7305541054515624, "grad_norm": 2.4261932373046875, "learning_rate": 3.5717126543927484e-06, "loss": 0.6554, "step": 4898 }, { "epoch": 0.7307032590051458, "grad_norm": 1.5315459966659546, "learning_rate": 3.5680122495942925e-06, "loss": 0.6211, "step": 4899 }, { "epoch": 0.7308524125587292, "grad_norm": 1.546151876449585, "learning_rate": 3.5643133463846193e-06, "loss": 0.7219, "step": 4900 }, { "epoch": 0.7310015661123126, "grad_norm": 1.67578125, "learning_rate": 3.5606159456272613e-06, "loss": 0.7058, "step": 4901 }, { "epoch": 0.7311507196658961, "grad_norm": 1.6955194473266602, "learning_rate": 3.5569200481854003e-06, "loss": 0.7798, "step": 4902 }, { "epoch": 0.7312998732194794, "grad_norm": 1.5084625482559204, "learning_rate": 3.5532256549218715e-06, "loss": 0.6687, "step": 4903 }, { "epoch": 0.7314490267730629, "grad_norm": 2.1829400062561035, "learning_rate": 3.549532766699146e-06, "loss": 0.6873, "step": 4904 }, { "epoch": 0.7315981803266463, "grad_norm": 1.5182386636734009, "learning_rate": 3.5458413843793583e-06, "loss": 0.7236, "step": 4905 }, { "epoch": 0.7317473338802297, "grad_norm": 2.2980401515960693, "learning_rate": 3.5421515088242855e-06, "loss": 0.603, "step": 4906 }, { "epoch": 0.7318964874338131, "grad_norm": 1.1620439291000366, "learning_rate": 3.5384631408953483e-06, "loss": 0.7034, "step": 4907 }, { "epoch": 0.7320456409873966, "grad_norm": 1.3408235311508179, "learning_rate": 3.5347762814536224e-06, "loss": 0.6589, "step": 4908 }, { "epoch": 0.7321947945409799, "grad_norm": 1.3319282531738281, "learning_rate": 3.5310909313598287e-06, "loss": 0.7085, "step": 4909 }, { "epoch": 0.7323439480945634, "grad_norm": 2.0386369228363037, "learning_rate": 3.5274070914743362e-06, "loss": 0.6954, "step": 4910 }, { "epoch": 0.7324931016481467, "grad_norm": 1.5974948406219482, "learning_rate": 3.5237247626571604e-06, "loss": 0.6917, "step": 4911 }, { "epoch": 0.7326422552017302, "grad_norm": 2.721423387527466, "learning_rate": 3.520043945767968e-06, "loss": 0.7517, "step": 4912 }, { "epoch": 0.7327914087553136, "grad_norm": 1.597348928451538, "learning_rate": 3.5163646416660634e-06, "loss": 0.7261, "step": 4913 }, { "epoch": 0.732940562308897, "grad_norm": 1.6120909452438354, "learning_rate": 3.512686851210406e-06, "loss": 0.676, "step": 4914 }, { "epoch": 0.7330897158624804, "grad_norm": 1.604748249053955, "learning_rate": 3.509010575259604e-06, "loss": 0.627, "step": 4915 }, { "epoch": 0.7332388694160639, "grad_norm": 1.7513474225997925, "learning_rate": 3.5053358146719e-06, "loss": 0.6267, "step": 4916 }, { "epoch": 0.7333880229696472, "grad_norm": 1.8617902994155884, "learning_rate": 3.501662570305191e-06, "loss": 0.7207, "step": 4917 }, { "epoch": 0.7335371765232307, "grad_norm": 1.2604480981826782, "learning_rate": 3.4979908430170285e-06, "loss": 0.7356, "step": 4918 }, { "epoch": 0.733686330076814, "grad_norm": 2.3308465480804443, "learning_rate": 3.4943206336645917e-06, "loss": 0.6687, "step": 4919 }, { "epoch": 0.7338354836303975, "grad_norm": 1.6474708318710327, "learning_rate": 3.490651943104718e-06, "loss": 0.6734, "step": 4920 }, { "epoch": 0.7339846371839809, "grad_norm": 2.0006890296936035, "learning_rate": 3.4869847721938897e-06, "loss": 0.777, "step": 4921 }, { "epoch": 0.7341337907375644, "grad_norm": 1.477189540863037, "learning_rate": 3.4833191217882247e-06, "loss": 0.6725, "step": 4922 }, { "epoch": 0.7342829442911477, "grad_norm": 2.144035816192627, "learning_rate": 3.479654992743495e-06, "loss": 0.7183, "step": 4923 }, { "epoch": 0.7344320978447312, "grad_norm": 2.208404302597046, "learning_rate": 3.4759923859151167e-06, "loss": 0.6885, "step": 4924 }, { "epoch": 0.7345812513983145, "grad_norm": 1.5403738021850586, "learning_rate": 3.4723313021581517e-06, "loss": 0.7693, "step": 4925 }, { "epoch": 0.734730404951898, "grad_norm": 1.5952805280685425, "learning_rate": 3.4686717423272932e-06, "loss": 0.6891, "step": 4926 }, { "epoch": 0.7348795585054814, "grad_norm": 1.429093837738037, "learning_rate": 3.465013707276902e-06, "loss": 0.6269, "step": 4927 }, { "epoch": 0.7350287120590648, "grad_norm": 1.9802924394607544, "learning_rate": 3.4613571978609595e-06, "loss": 0.6082, "step": 4928 }, { "epoch": 0.7351778656126482, "grad_norm": 1.5002896785736084, "learning_rate": 3.4577022149331065e-06, "loss": 0.6976, "step": 4929 }, { "epoch": 0.7353270191662317, "grad_norm": 1.3859171867370605, "learning_rate": 3.4540487593466197e-06, "loss": 0.6077, "step": 4930 }, { "epoch": 0.735476172719815, "grad_norm": 2.5222864151000977, "learning_rate": 3.4503968319544266e-06, "loss": 0.6726, "step": 4931 }, { "epoch": 0.7356253262733985, "grad_norm": 1.2106963396072388, "learning_rate": 3.4467464336090863e-06, "loss": 0.6528, "step": 4932 }, { "epoch": 0.7357744798269819, "grad_norm": 1.7497116327285767, "learning_rate": 3.443097565162811e-06, "loss": 0.629, "step": 4933 }, { "epoch": 0.7359236333805653, "grad_norm": 1.7012567520141602, "learning_rate": 3.4394502274674544e-06, "loss": 0.7072, "step": 4934 }, { "epoch": 0.7360727869341487, "grad_norm": 1.6094810962677002, "learning_rate": 3.435804421374502e-06, "loss": 0.7358, "step": 4935 }, { "epoch": 0.7362219404877322, "grad_norm": 1.2289350032806396, "learning_rate": 3.4321601477351017e-06, "loss": 0.6736, "step": 4936 }, { "epoch": 0.7363710940413155, "grad_norm": 2.1155009269714355, "learning_rate": 3.4285174074000317e-06, "loss": 0.6596, "step": 4937 }, { "epoch": 0.736520247594899, "grad_norm": 1.9624170064926147, "learning_rate": 3.4248762012197047e-06, "loss": 0.6442, "step": 4938 }, { "epoch": 0.7366694011484823, "grad_norm": 2.295825719833374, "learning_rate": 3.42123653004419e-06, "loss": 0.6206, "step": 4939 }, { "epoch": 0.7368185547020658, "grad_norm": 6.336740493774414, "learning_rate": 3.417598394723193e-06, "loss": 0.681, "step": 4940 }, { "epoch": 0.7369677082556492, "grad_norm": 3.333146810531616, "learning_rate": 3.4139617961060546e-06, "loss": 0.6322, "step": 4941 }, { "epoch": 0.7371168618092326, "grad_norm": 1.7437856197357178, "learning_rate": 3.4103267350417645e-06, "loss": 0.6433, "step": 4942 }, { "epoch": 0.737266015362816, "grad_norm": 1.802078127861023, "learning_rate": 3.406693212378951e-06, "loss": 0.6198, "step": 4943 }, { "epoch": 0.7374151689163995, "grad_norm": 1.2776471376419067, "learning_rate": 3.4030612289658836e-06, "loss": 0.6943, "step": 4944 }, { "epoch": 0.7375643224699828, "grad_norm": 1.5335756540298462, "learning_rate": 3.399430785650473e-06, "loss": 0.6771, "step": 4945 }, { "epoch": 0.7377134760235663, "grad_norm": 1.5845118761062622, "learning_rate": 3.395801883280271e-06, "loss": 0.6578, "step": 4946 }, { "epoch": 0.7378626295771497, "grad_norm": 1.4425889253616333, "learning_rate": 3.3921745227024626e-06, "loss": 0.6676, "step": 4947 }, { "epoch": 0.7380117831307331, "grad_norm": 1.8459115028381348, "learning_rate": 3.388548704763882e-06, "loss": 0.5935, "step": 4948 }, { "epoch": 0.7381609366843165, "grad_norm": 2.7688488960266113, "learning_rate": 3.3849244303109986e-06, "loss": 0.621, "step": 4949 }, { "epoch": 0.7383100902379, "grad_norm": 1.726754903793335, "learning_rate": 3.381301700189927e-06, "loss": 0.6692, "step": 4950 }, { "epoch": 0.7384592437914833, "grad_norm": 1.4330254793167114, "learning_rate": 3.3776805152464087e-06, "loss": 0.7453, "step": 4951 }, { "epoch": 0.7386083973450668, "grad_norm": 1.8322275876998901, "learning_rate": 3.3740608763258375e-06, "loss": 0.6535, "step": 4952 }, { "epoch": 0.7387575508986501, "grad_norm": 1.3251410722732544, "learning_rate": 3.3704427842732403e-06, "loss": 0.7346, "step": 4953 }, { "epoch": 0.7389067044522336, "grad_norm": 1.355395793914795, "learning_rate": 3.366826239933283e-06, "loss": 0.6939, "step": 4954 }, { "epoch": 0.739055858005817, "grad_norm": 1.7743303775787354, "learning_rate": 3.363211244150273e-06, "loss": 0.6838, "step": 4955 }, { "epoch": 0.7392050115594004, "grad_norm": 3.9373044967651367, "learning_rate": 3.359597797768157e-06, "loss": 0.7027, "step": 4956 }, { "epoch": 0.7393541651129838, "grad_norm": 1.3613828420639038, "learning_rate": 3.3559859016305094e-06, "loss": 0.7008, "step": 4957 }, { "epoch": 0.7395033186665673, "grad_norm": 1.636875867843628, "learning_rate": 3.352375556580556e-06, "loss": 0.676, "step": 4958 }, { "epoch": 0.7396524722201506, "grad_norm": 2.228189468383789, "learning_rate": 3.3487667634611555e-06, "loss": 0.6165, "step": 4959 }, { "epoch": 0.7398016257737341, "grad_norm": 1.4116812944412231, "learning_rate": 3.3451595231148005e-06, "loss": 0.7181, "step": 4960 }, { "epoch": 0.7399507793273175, "grad_norm": 1.4908535480499268, "learning_rate": 3.341553836383621e-06, "loss": 0.6758, "step": 4961 }, { "epoch": 0.7400999328809009, "grad_norm": 2.2762720584869385, "learning_rate": 3.3379497041094e-06, "loss": 0.694, "step": 4962 }, { "epoch": 0.7402490864344843, "grad_norm": 5.720376968383789, "learning_rate": 3.334347127133534e-06, "loss": 0.7251, "step": 4963 }, { "epoch": 0.7403982399880678, "grad_norm": 1.6626170873641968, "learning_rate": 3.3307461062970726e-06, "loss": 0.6578, "step": 4964 }, { "epoch": 0.7405473935416511, "grad_norm": 2.1493875980377197, "learning_rate": 3.3271466424406984e-06, "loss": 0.637, "step": 4965 }, { "epoch": 0.7406965470952346, "grad_norm": 1.3172506093978882, "learning_rate": 3.323548736404725e-06, "loss": 0.7433, "step": 4966 }, { "epoch": 0.7408457006488179, "grad_norm": 1.5917984247207642, "learning_rate": 3.3199523890291074e-06, "loss": 0.7098, "step": 4967 }, { "epoch": 0.7409948542024014, "grad_norm": 1.2729474306106567, "learning_rate": 3.3163576011534417e-06, "loss": 0.6828, "step": 4968 }, { "epoch": 0.7411440077559848, "grad_norm": 1.3903692960739136, "learning_rate": 3.312764373616946e-06, "loss": 0.7259, "step": 4969 }, { "epoch": 0.7412931613095682, "grad_norm": 1.2282217741012573, "learning_rate": 3.3091727072584825e-06, "loss": 0.6849, "step": 4970 }, { "epoch": 0.7414423148631516, "grad_norm": 1.8679122924804688, "learning_rate": 3.305582602916558e-06, "loss": 0.677, "step": 4971 }, { "epoch": 0.7415914684167351, "grad_norm": 3.1926169395446777, "learning_rate": 3.3019940614292977e-06, "loss": 0.7282, "step": 4972 }, { "epoch": 0.7417406219703184, "grad_norm": 1.6288890838623047, "learning_rate": 3.2984070836344717e-06, "loss": 0.6532, "step": 4973 }, { "epoch": 0.7418897755239019, "grad_norm": 1.1773794889450073, "learning_rate": 3.2948216703694836e-06, "loss": 0.7192, "step": 4974 }, { "epoch": 0.7420389290774853, "grad_norm": 1.3549846410751343, "learning_rate": 3.2912378224713727e-06, "loss": 0.6091, "step": 4975 }, { "epoch": 0.7421880826310687, "grad_norm": 1.1118943691253662, "learning_rate": 3.287655540776805e-06, "loss": 0.6549, "step": 4976 }, { "epoch": 0.7423372361846521, "grad_norm": 1.73173987865448, "learning_rate": 3.284074826122092e-06, "loss": 0.6655, "step": 4977 }, { "epoch": 0.7424863897382356, "grad_norm": 2.1239054203033447, "learning_rate": 3.280495679343173e-06, "loss": 0.6995, "step": 4978 }, { "epoch": 0.7426355432918189, "grad_norm": 0.5194622278213501, "learning_rate": 3.2769181012756248e-06, "loss": 0.2445, "step": 4979 }, { "epoch": 0.7427846968454024, "grad_norm": 1.3660404682159424, "learning_rate": 3.2733420927546533e-06, "loss": 0.691, "step": 4980 }, { "epoch": 0.7429338503989857, "grad_norm": 1.8152834177017212, "learning_rate": 3.2697676546151045e-06, "loss": 0.7008, "step": 4981 }, { "epoch": 0.7430830039525692, "grad_norm": 2.1687135696411133, "learning_rate": 3.266194787691449e-06, "loss": 0.6737, "step": 4982 }, { "epoch": 0.7432321575061526, "grad_norm": 1.4208048582077026, "learning_rate": 3.262623492817798e-06, "loss": 0.6976, "step": 4983 }, { "epoch": 0.743381311059736, "grad_norm": 1.530199408531189, "learning_rate": 3.2590537708278956e-06, "loss": 0.69, "step": 4984 }, { "epoch": 0.7435304646133194, "grad_norm": 1.673867106437683, "learning_rate": 3.25548562255511e-06, "loss": 0.6858, "step": 4985 }, { "epoch": 0.7436796181669029, "grad_norm": 1.694718599319458, "learning_rate": 3.2519190488324528e-06, "loss": 0.6781, "step": 4986 }, { "epoch": 0.7438287717204862, "grad_norm": 1.6363760232925415, "learning_rate": 3.2483540504925616e-06, "loss": 0.7232, "step": 4987 }, { "epoch": 0.7439779252740697, "grad_norm": 1.4444175958633423, "learning_rate": 3.24479062836771e-06, "loss": 0.6435, "step": 4988 }, { "epoch": 0.744127078827653, "grad_norm": 1.5737664699554443, "learning_rate": 3.2412287832898004e-06, "loss": 0.6478, "step": 4989 }, { "epoch": 0.7442762323812365, "grad_norm": 1.5023373365402222, "learning_rate": 3.237668516090372e-06, "loss": 0.6514, "step": 4990 }, { "epoch": 0.7444253859348199, "grad_norm": 1.5534909963607788, "learning_rate": 3.2341098276005856e-06, "loss": 0.6562, "step": 4991 }, { "epoch": 0.7445745394884034, "grad_norm": 1.2811555862426758, "learning_rate": 3.2305527186512432e-06, "loss": 0.6533, "step": 4992 }, { "epoch": 0.7447236930419867, "grad_norm": 1.5049370527267456, "learning_rate": 3.226997190072777e-06, "loss": 0.691, "step": 4993 }, { "epoch": 0.7448728465955702, "grad_norm": 3.0051589012145996, "learning_rate": 3.2234432426952432e-06, "loss": 0.6929, "step": 4994 }, { "epoch": 0.7450220001491535, "grad_norm": 1.7581428289413452, "learning_rate": 3.219890877348336e-06, "loss": 0.6724, "step": 4995 }, { "epoch": 0.745171153702737, "grad_norm": 2.2561347484588623, "learning_rate": 3.216340094861378e-06, "loss": 0.6953, "step": 4996 }, { "epoch": 0.7453203072563204, "grad_norm": 2.4084436893463135, "learning_rate": 3.212790896063321e-06, "loss": 0.5511, "step": 4997 }, { "epoch": 0.7454694608099038, "grad_norm": 2.2233340740203857, "learning_rate": 3.2092432817827502e-06, "loss": 0.722, "step": 4998 }, { "epoch": 0.7456186143634872, "grad_norm": 1.1752979755401611, "learning_rate": 3.2056972528478802e-06, "loss": 0.8125, "step": 4999 }, { "epoch": 0.7457677679170707, "grad_norm": 1.7348535060882568, "learning_rate": 3.2021528100865483e-06, "loss": 0.706, "step": 5000 }, { "epoch": 0.745916921470654, "grad_norm": 1.9210377931594849, "learning_rate": 3.198609954326232e-06, "loss": 0.6328, "step": 5001 }, { "epoch": 0.7460660750242375, "grad_norm": 1.3841195106506348, "learning_rate": 3.1950686863940315e-06, "loss": 0.6567, "step": 5002 }, { "epoch": 0.7462152285778209, "grad_norm": 0.5469581484794617, "learning_rate": 3.1915290071166836e-06, "loss": 0.231, "step": 5003 }, { "epoch": 0.7463643821314043, "grad_norm": 1.8762807846069336, "learning_rate": 3.1879909173205425e-06, "loss": 0.6852, "step": 5004 }, { "epoch": 0.7465135356849877, "grad_norm": 1.6195948123931885, "learning_rate": 3.1844544178315995e-06, "loss": 0.6553, "step": 5005 }, { "epoch": 0.7466626892385712, "grad_norm": 2.164243459701538, "learning_rate": 3.1809195094754754e-06, "loss": 0.6321, "step": 5006 }, { "epoch": 0.7468118427921545, "grad_norm": 2.1316683292388916, "learning_rate": 3.177386193077415e-06, "loss": 0.726, "step": 5007 }, { "epoch": 0.746960996345738, "grad_norm": 1.3797911405563354, "learning_rate": 3.1738544694622955e-06, "loss": 0.6388, "step": 5008 }, { "epoch": 0.7471101498993213, "grad_norm": 1.690096139907837, "learning_rate": 3.170324339454621e-06, "loss": 0.7215, "step": 5009 }, { "epoch": 0.7472593034529048, "grad_norm": 1.4919441938400269, "learning_rate": 3.1667958038785206e-06, "loss": 0.7588, "step": 5010 }, { "epoch": 0.7474084570064882, "grad_norm": 1.7760390043258667, "learning_rate": 3.1632688635577535e-06, "loss": 0.6085, "step": 5011 }, { "epoch": 0.7475576105600716, "grad_norm": 1.5388494729995728, "learning_rate": 3.15974351931571e-06, "loss": 0.6907, "step": 5012 }, { "epoch": 0.747706764113655, "grad_norm": 2.001389503479004, "learning_rate": 3.156219771975397e-06, "loss": 0.7096, "step": 5013 }, { "epoch": 0.7478559176672385, "grad_norm": 1.3696134090423584, "learning_rate": 3.152697622359463e-06, "loss": 0.6969, "step": 5014 }, { "epoch": 0.7480050712208218, "grad_norm": 1.4973281621932983, "learning_rate": 3.149177071290178e-06, "loss": 0.7172, "step": 5015 }, { "epoch": 0.7481542247744053, "grad_norm": 1.6145000457763672, "learning_rate": 3.14565811958943e-06, "loss": 0.6737, "step": 5016 }, { "epoch": 0.7483033783279887, "grad_norm": 1.6761184930801392, "learning_rate": 3.142140768078744e-06, "loss": 0.7938, "step": 5017 }, { "epoch": 0.7484525318815721, "grad_norm": 1.9230211973190308, "learning_rate": 3.138625017579272e-06, "loss": 0.7014, "step": 5018 }, { "epoch": 0.7486016854351555, "grad_norm": 1.8877824544906616, "learning_rate": 3.1351108689117813e-06, "loss": 0.7041, "step": 5019 }, { "epoch": 0.748750838988739, "grad_norm": 1.867502212524414, "learning_rate": 3.1315983228966774e-06, "loss": 0.7145, "step": 5020 }, { "epoch": 0.7488999925423223, "grad_norm": 1.7579102516174316, "learning_rate": 3.1280873803539845e-06, "loss": 0.6365, "step": 5021 }, { "epoch": 0.7490491460959058, "grad_norm": 1.3961807489395142, "learning_rate": 3.1245780421033557e-06, "loss": 0.7221, "step": 5022 }, { "epoch": 0.7491982996494891, "grad_norm": 1.537582278251648, "learning_rate": 3.121070308964069e-06, "loss": 0.6957, "step": 5023 }, { "epoch": 0.7493474532030726, "grad_norm": 1.7169479131698608, "learning_rate": 3.1175641817550295e-06, "loss": 0.6357, "step": 5024 }, { "epoch": 0.749496606756656, "grad_norm": 2.6281092166900635, "learning_rate": 3.1140596612947582e-06, "loss": 0.6675, "step": 5025 }, { "epoch": 0.7496457603102394, "grad_norm": 1.8119837045669556, "learning_rate": 3.1105567484014133e-06, "loss": 0.6559, "step": 5026 }, { "epoch": 0.7497949138638228, "grad_norm": 1.6611311435699463, "learning_rate": 3.1070554438927703e-06, "loss": 0.6371, "step": 5027 }, { "epoch": 0.7499440674174063, "grad_norm": 1.4698675870895386, "learning_rate": 3.1035557485862343e-06, "loss": 0.658, "step": 5028 }, { "epoch": 0.7500932209709896, "grad_norm": 1.6011688709259033, "learning_rate": 3.1000576632988265e-06, "loss": 0.7513, "step": 5029 }, { "epoch": 0.7502423745245731, "grad_norm": 3.1141412258148193, "learning_rate": 3.0965611888471993e-06, "loss": 0.6818, "step": 5030 }, { "epoch": 0.7503915280781565, "grad_norm": 2.163206100463867, "learning_rate": 3.093066326047628e-06, "loss": 0.6806, "step": 5031 }, { "epoch": 0.7505406816317399, "grad_norm": 2.215420961380005, "learning_rate": 3.0895730757160104e-06, "loss": 0.7279, "step": 5032 }, { "epoch": 0.7506898351853233, "grad_norm": 1.5136805772781372, "learning_rate": 3.0860814386678683e-06, "loss": 0.7073, "step": 5033 }, { "epoch": 0.7508389887389068, "grad_norm": 1.452886939048767, "learning_rate": 3.0825914157183502e-06, "loss": 0.7299, "step": 5034 }, { "epoch": 0.7509881422924901, "grad_norm": 0.5288633704185486, "learning_rate": 3.079103007682217e-06, "loss": 0.2383, "step": 5035 }, { "epoch": 0.7511372958460736, "grad_norm": 2.115394353866577, "learning_rate": 3.0756162153738633e-06, "loss": 0.6933, "step": 5036 }, { "epoch": 0.7512864493996569, "grad_norm": 2.3039591312408447, "learning_rate": 3.072131039607308e-06, "loss": 0.648, "step": 5037 }, { "epoch": 0.7514356029532404, "grad_norm": 1.436305284500122, "learning_rate": 3.0686474811961787e-06, "loss": 0.7524, "step": 5038 }, { "epoch": 0.7515847565068238, "grad_norm": 1.5086421966552734, "learning_rate": 3.0651655409537394e-06, "loss": 0.7132, "step": 5039 }, { "epoch": 0.7517339100604072, "grad_norm": 1.2614079713821411, "learning_rate": 3.0616852196928714e-06, "loss": 0.7545, "step": 5040 }, { "epoch": 0.7518830636139906, "grad_norm": 0.5612931847572327, "learning_rate": 3.0582065182260777e-06, "loss": 0.2472, "step": 5041 }, { "epoch": 0.7520322171675741, "grad_norm": 1.799038290977478, "learning_rate": 3.054729437365482e-06, "loss": 0.6913, "step": 5042 }, { "epoch": 0.7521813707211574, "grad_norm": 1.3564401865005493, "learning_rate": 3.051253977922838e-06, "loss": 0.6778, "step": 5043 }, { "epoch": 0.7523305242747409, "grad_norm": 2.038909912109375, "learning_rate": 3.047780140709503e-06, "loss": 0.6505, "step": 5044 }, { "epoch": 0.7524796778283243, "grad_norm": 0.5338819622993469, "learning_rate": 3.044307926536474e-06, "loss": 0.2442, "step": 5045 }, { "epoch": 0.7526288313819077, "grad_norm": 1.507637619972229, "learning_rate": 3.0408373362143617e-06, "loss": 0.703, "step": 5046 }, { "epoch": 0.7527779849354911, "grad_norm": 1.8754016160964966, "learning_rate": 3.037368370553393e-06, "loss": 0.5244, "step": 5047 }, { "epoch": 0.7529271384890746, "grad_norm": 1.629431962966919, "learning_rate": 3.0339010303634186e-06, "loss": 0.6741, "step": 5048 }, { "epoch": 0.7530762920426579, "grad_norm": 1.5540764331817627, "learning_rate": 3.0304353164539224e-06, "loss": 0.6091, "step": 5049 }, { "epoch": 0.7532254455962414, "grad_norm": 1.7750765085220337, "learning_rate": 3.026971229633988e-06, "loss": 0.6243, "step": 5050 }, { "epoch": 0.7533745991498247, "grad_norm": 1.3332713842391968, "learning_rate": 3.023508770712331e-06, "loss": 0.7019, "step": 5051 }, { "epoch": 0.7535237527034082, "grad_norm": 3.6881754398345947, "learning_rate": 3.020047940497288e-06, "loss": 0.6979, "step": 5052 }, { "epoch": 0.7536729062569916, "grad_norm": 2.046842098236084, "learning_rate": 3.0165887397968064e-06, "loss": 0.6673, "step": 5053 }, { "epoch": 0.753822059810575, "grad_norm": 1.2343189716339111, "learning_rate": 3.0131311694184617e-06, "loss": 0.6882, "step": 5054 }, { "epoch": 0.7539712133641584, "grad_norm": 2.648679733276367, "learning_rate": 3.009675230169444e-06, "loss": 0.6266, "step": 5055 }, { "epoch": 0.7541203669177419, "grad_norm": 1.4448513984680176, "learning_rate": 3.006220922856571e-06, "loss": 0.6098, "step": 5056 }, { "epoch": 0.7542695204713252, "grad_norm": 1.9691001176834106, "learning_rate": 3.0027682482862606e-06, "loss": 0.6514, "step": 5057 }, { "epoch": 0.7544186740249087, "grad_norm": 2.1852762699127197, "learning_rate": 2.999317207264575e-06, "loss": 0.6875, "step": 5058 }, { "epoch": 0.7545678275784921, "grad_norm": 1.445947289466858, "learning_rate": 2.9958678005971744e-06, "loss": 0.6725, "step": 5059 }, { "epoch": 0.7547169811320755, "grad_norm": 2.592069387435913, "learning_rate": 2.9924200290893447e-06, "loss": 0.7151, "step": 5060 }, { "epoch": 0.7548661346856589, "grad_norm": 1.4153465032577515, "learning_rate": 2.9889738935459934e-06, "loss": 0.681, "step": 5061 }, { "epoch": 0.7550152882392424, "grad_norm": 1.2795034646987915, "learning_rate": 2.9855293947716446e-06, "loss": 0.6509, "step": 5062 }, { "epoch": 0.7551644417928257, "grad_norm": 1.5929635763168335, "learning_rate": 2.9820865335704318e-06, "loss": 0.6985, "step": 5063 }, { "epoch": 0.7553135953464092, "grad_norm": 1.3963947296142578, "learning_rate": 2.9786453107461166e-06, "loss": 0.7132, "step": 5064 }, { "epoch": 0.7554627488999925, "grad_norm": 1.661252737045288, "learning_rate": 2.9752057271020785e-06, "loss": 0.6108, "step": 5065 }, { "epoch": 0.7556119024535759, "grad_norm": 1.3357112407684326, "learning_rate": 2.9717677834413006e-06, "loss": 0.7293, "step": 5066 }, { "epoch": 0.7557610560071594, "grad_norm": 1.56090247631073, "learning_rate": 2.968331480566402e-06, "loss": 0.5718, "step": 5067 }, { "epoch": 0.7559102095607427, "grad_norm": 1.8816829919815063, "learning_rate": 2.96489681927961e-06, "loss": 0.6646, "step": 5068 }, { "epoch": 0.7560593631143262, "grad_norm": 1.266309142112732, "learning_rate": 2.961463800382761e-06, "loss": 0.6917, "step": 5069 }, { "epoch": 0.7562085166679096, "grad_norm": 0.5384680032730103, "learning_rate": 2.9580324246773195e-06, "loss": 0.2563, "step": 5070 }, { "epoch": 0.756357670221493, "grad_norm": 2.0554356575012207, "learning_rate": 2.9546026929643645e-06, "loss": 0.6522, "step": 5071 }, { "epoch": 0.7565068237750764, "grad_norm": 1.3449229001998901, "learning_rate": 2.9511746060445834e-06, "loss": 0.686, "step": 5072 }, { "epoch": 0.7566559773286599, "grad_norm": 1.753295660018921, "learning_rate": 2.947748164718288e-06, "loss": 0.7157, "step": 5073 }, { "epoch": 0.7568051308822432, "grad_norm": 1.961133360862732, "learning_rate": 2.9443233697854036e-06, "loss": 0.7107, "step": 5074 }, { "epoch": 0.7569542844358267, "grad_norm": 1.5808746814727783, "learning_rate": 2.9409002220454686e-06, "loss": 0.687, "step": 5075 }, { "epoch": 0.75710343798941, "grad_norm": 1.8539170026779175, "learning_rate": 2.9374787222976397e-06, "loss": 0.6476, "step": 5076 }, { "epoch": 0.7572525915429935, "grad_norm": 2.080857038497925, "learning_rate": 2.9340588713406927e-06, "loss": 0.5738, "step": 5077 }, { "epoch": 0.7574017450965769, "grad_norm": 1.2426832914352417, "learning_rate": 2.930640669973005e-06, "loss": 0.7492, "step": 5078 }, { "epoch": 0.7575508986501603, "grad_norm": 0.5216078162193298, "learning_rate": 2.927224118992582e-06, "loss": 0.2456, "step": 5079 }, { "epoch": 0.7577000522037437, "grad_norm": 2.128455877304077, "learning_rate": 2.92380921919704e-06, "loss": 0.6284, "step": 5080 }, { "epoch": 0.7578492057573272, "grad_norm": 1.2963236570358276, "learning_rate": 2.920395971383612e-06, "loss": 0.6695, "step": 5081 }, { "epoch": 0.7579983593109105, "grad_norm": 1.1915509700775146, "learning_rate": 2.9169843763491345e-06, "loss": 0.6255, "step": 5082 }, { "epoch": 0.758147512864494, "grad_norm": 1.319331407546997, "learning_rate": 2.9135744348900717e-06, "loss": 0.7038, "step": 5083 }, { "epoch": 0.7582966664180774, "grad_norm": 1.4149802923202515, "learning_rate": 2.9101661478024958e-06, "loss": 0.6556, "step": 5084 }, { "epoch": 0.7584458199716608, "grad_norm": 1.4916712045669556, "learning_rate": 2.9067595158820925e-06, "loss": 0.5857, "step": 5085 }, { "epoch": 0.7585949735252442, "grad_norm": 2.1741561889648438, "learning_rate": 2.9033545399241625e-06, "loss": 0.7567, "step": 5086 }, { "epoch": 0.7587441270788277, "grad_norm": 1.8433398008346558, "learning_rate": 2.8999512207236226e-06, "loss": 0.7325, "step": 5087 }, { "epoch": 0.758893280632411, "grad_norm": 1.692814588546753, "learning_rate": 2.8965495590749925e-06, "loss": 0.6109, "step": 5088 }, { "epoch": 0.7590424341859945, "grad_norm": 2.100382089614868, "learning_rate": 2.8931495557724154e-06, "loss": 0.644, "step": 5089 }, { "epoch": 0.7591915877395778, "grad_norm": 1.4549609422683716, "learning_rate": 2.8897512116096473e-06, "loss": 0.7278, "step": 5090 }, { "epoch": 0.7593407412931613, "grad_norm": 2.0979812145233154, "learning_rate": 2.8863545273800462e-06, "loss": 0.683, "step": 5091 }, { "epoch": 0.7594898948467447, "grad_norm": 1.7259933948516846, "learning_rate": 2.8829595038765914e-06, "loss": 0.6333, "step": 5092 }, { "epoch": 0.7596390484003281, "grad_norm": 1.9575622081756592, "learning_rate": 2.8795661418918806e-06, "loss": 0.6921, "step": 5093 }, { "epoch": 0.7597882019539115, "grad_norm": 1.4045408964157104, "learning_rate": 2.876174442218107e-06, "loss": 0.6952, "step": 5094 }, { "epoch": 0.759937355507495, "grad_norm": 1.698024034500122, "learning_rate": 2.8727844056470886e-06, "loss": 0.6888, "step": 5095 }, { "epoch": 0.7600865090610783, "grad_norm": 11.051603317260742, "learning_rate": 2.8693960329702542e-06, "loss": 0.6256, "step": 5096 }, { "epoch": 0.7602356626146618, "grad_norm": 1.3637218475341797, "learning_rate": 2.866009324978635e-06, "loss": 0.6377, "step": 5097 }, { "epoch": 0.7603848161682452, "grad_norm": 2.040456771850586, "learning_rate": 2.862624282462881e-06, "loss": 0.6102, "step": 5098 }, { "epoch": 0.7605339697218286, "grad_norm": 1.8490490913391113, "learning_rate": 2.859240906213254e-06, "loss": 0.7276, "step": 5099 }, { "epoch": 0.760683123275412, "grad_norm": 2.1777665615081787, "learning_rate": 2.855859197019627e-06, "loss": 0.623, "step": 5100 }, { "epoch": 0.7608322768289955, "grad_norm": 1.6899175643920898, "learning_rate": 2.8524791556714736e-06, "loss": 0.6466, "step": 5101 }, { "epoch": 0.7609814303825788, "grad_norm": 2.794332265853882, "learning_rate": 2.8491007829578965e-06, "loss": 0.7079, "step": 5102 }, { "epoch": 0.7611305839361623, "grad_norm": 2.0005383491516113, "learning_rate": 2.845724079667591e-06, "loss": 0.7137, "step": 5103 }, { "epoch": 0.7612797374897456, "grad_norm": 1.6181368827819824, "learning_rate": 2.8423490465888727e-06, "loss": 0.7112, "step": 5104 }, { "epoch": 0.7614288910433291, "grad_norm": 3.14265775680542, "learning_rate": 2.8389756845096637e-06, "loss": 0.6294, "step": 5105 }, { "epoch": 0.7615780445969125, "grad_norm": 2.635558843612671, "learning_rate": 2.835603994217502e-06, "loss": 0.6783, "step": 5106 }, { "epoch": 0.7617271981504959, "grad_norm": 1.5247243642807007, "learning_rate": 2.8322339764995235e-06, "loss": 0.7064, "step": 5107 }, { "epoch": 0.7618763517040793, "grad_norm": 0.5381494760513306, "learning_rate": 2.8288656321424824e-06, "loss": 0.2234, "step": 5108 }, { "epoch": 0.7620255052576628, "grad_norm": 1.4599275588989258, "learning_rate": 2.825498961932743e-06, "loss": 0.6901, "step": 5109 }, { "epoch": 0.7621746588112461, "grad_norm": 2.3479933738708496, "learning_rate": 2.8221339666562695e-06, "loss": 0.7492, "step": 5110 }, { "epoch": 0.7623238123648296, "grad_norm": 1.293577790260315, "learning_rate": 2.8187706470986496e-06, "loss": 0.7234, "step": 5111 }, { "epoch": 0.762472965918413, "grad_norm": 6.104905128479004, "learning_rate": 2.815409004045071e-06, "loss": 0.682, "step": 5112 }, { "epoch": 0.7626221194719964, "grad_norm": 2.1711745262145996, "learning_rate": 2.8120490382803244e-06, "loss": 0.6284, "step": 5113 }, { "epoch": 0.7627712730255798, "grad_norm": 1.715943455696106, "learning_rate": 2.8086907505888205e-06, "loss": 0.6884, "step": 5114 }, { "epoch": 0.7629204265791633, "grad_norm": 1.6648759841918945, "learning_rate": 2.8053341417545744e-06, "loss": 0.7827, "step": 5115 }, { "epoch": 0.7630695801327466, "grad_norm": 1.3333275318145752, "learning_rate": 2.801979212561202e-06, "loss": 0.7244, "step": 5116 }, { "epoch": 0.7632187336863301, "grad_norm": 1.5077836513519287, "learning_rate": 2.7986259637919365e-06, "loss": 0.7327, "step": 5117 }, { "epoch": 0.7633678872399134, "grad_norm": 1.7336407899856567, "learning_rate": 2.7952743962296146e-06, "loss": 0.6754, "step": 5118 }, { "epoch": 0.7635170407934969, "grad_norm": 2.048262596130371, "learning_rate": 2.7919245106566827e-06, "loss": 0.638, "step": 5119 }, { "epoch": 0.7636661943470803, "grad_norm": 1.5361459255218506, "learning_rate": 2.788576307855192e-06, "loss": 0.6444, "step": 5120 }, { "epoch": 0.7638153479006637, "grad_norm": 1.5958269834518433, "learning_rate": 2.785229788606806e-06, "loss": 0.6633, "step": 5121 }, { "epoch": 0.7639645014542471, "grad_norm": 1.660749077796936, "learning_rate": 2.7818849536927827e-06, "loss": 0.737, "step": 5122 }, { "epoch": 0.7641136550078306, "grad_norm": 1.3227134943008423, "learning_rate": 2.7785418038940004e-06, "loss": 0.6376, "step": 5123 }, { "epoch": 0.7642628085614139, "grad_norm": 1.2895358800888062, "learning_rate": 2.7752003399909423e-06, "loss": 0.7695, "step": 5124 }, { "epoch": 0.7644119621149974, "grad_norm": 1.7559384107589722, "learning_rate": 2.771860562763686e-06, "loss": 0.6332, "step": 5125 }, { "epoch": 0.7645611156685808, "grad_norm": 0.5317772030830383, "learning_rate": 2.768522472991929e-06, "loss": 0.2271, "step": 5126 }, { "epoch": 0.7647102692221642, "grad_norm": 1.2822517156600952, "learning_rate": 2.7651860714549695e-06, "loss": 0.6235, "step": 5127 }, { "epoch": 0.7648594227757476, "grad_norm": 1.9227359294891357, "learning_rate": 2.761851358931711e-06, "loss": 0.6007, "step": 5128 }, { "epoch": 0.7650085763293311, "grad_norm": 1.4219118356704712, "learning_rate": 2.758518336200664e-06, "loss": 0.6905, "step": 5129 }, { "epoch": 0.7651577298829144, "grad_norm": 1.2249077558517456, "learning_rate": 2.7551870040399475e-06, "loss": 0.6634, "step": 5130 }, { "epoch": 0.7653068834364979, "grad_norm": 1.3455406427383423, "learning_rate": 2.751857363227276e-06, "loss": 0.6586, "step": 5131 }, { "epoch": 0.7654560369900812, "grad_norm": 1.3951798677444458, "learning_rate": 2.7485294145399778e-06, "loss": 0.6791, "step": 5132 }, { "epoch": 0.7656051905436647, "grad_norm": 1.5257030725479126, "learning_rate": 2.7452031587549844e-06, "loss": 0.674, "step": 5133 }, { "epoch": 0.7657543440972481, "grad_norm": 1.5891464948654175, "learning_rate": 2.7418785966488347e-06, "loss": 0.6111, "step": 5134 }, { "epoch": 0.7659034976508315, "grad_norm": 1.5426981449127197, "learning_rate": 2.738555728997664e-06, "loss": 0.6376, "step": 5135 }, { "epoch": 0.7660526512044149, "grad_norm": 1.8277708292007446, "learning_rate": 2.7352345565772175e-06, "loss": 0.6488, "step": 5136 }, { "epoch": 0.7662018047579984, "grad_norm": 1.834139108657837, "learning_rate": 2.731915080162847e-06, "loss": 0.7024, "step": 5137 }, { "epoch": 0.7663509583115817, "grad_norm": 1.7974604368209839, "learning_rate": 2.728597300529503e-06, "loss": 0.5656, "step": 5138 }, { "epoch": 0.7665001118651652, "grad_norm": 1.1968920230865479, "learning_rate": 2.7252812184517454e-06, "loss": 0.6626, "step": 5139 }, { "epoch": 0.7666492654187486, "grad_norm": 1.3684724569320679, "learning_rate": 2.721966834703734e-06, "loss": 0.7882, "step": 5140 }, { "epoch": 0.766798418972332, "grad_norm": 1.6061028242111206, "learning_rate": 2.718654150059231e-06, "loss": 0.7371, "step": 5141 }, { "epoch": 0.7669475725259154, "grad_norm": 1.5682530403137207, "learning_rate": 2.715343165291604e-06, "loss": 0.6566, "step": 5142 }, { "epoch": 0.7670967260794989, "grad_norm": 0.5318388938903809, "learning_rate": 2.7120338811738277e-06, "loss": 0.2348, "step": 5143 }, { "epoch": 0.7672458796330822, "grad_norm": 2.075836181640625, "learning_rate": 2.708726298478469e-06, "loss": 0.6058, "step": 5144 }, { "epoch": 0.7673950331866657, "grad_norm": 1.582465410232544, "learning_rate": 2.7054204179777054e-06, "loss": 0.6684, "step": 5145 }, { "epoch": 0.767544186740249, "grad_norm": 1.2969040870666504, "learning_rate": 2.7021162404433243e-06, "loss": 0.7859, "step": 5146 }, { "epoch": 0.7676933402938325, "grad_norm": 1.120339274406433, "learning_rate": 2.6988137666466983e-06, "loss": 0.7735, "step": 5147 }, { "epoch": 0.7678424938474159, "grad_norm": 1.818491816520691, "learning_rate": 2.6955129973588136e-06, "loss": 0.6347, "step": 5148 }, { "epoch": 0.7679916474009993, "grad_norm": 2.1866281032562256, "learning_rate": 2.6922139333502594e-06, "loss": 0.6117, "step": 5149 }, { "epoch": 0.7681408009545827, "grad_norm": 1.7690799236297607, "learning_rate": 2.6889165753912173e-06, "loss": 0.6682, "step": 5150 }, { "epoch": 0.7682899545081662, "grad_norm": 1.8884261846542358, "learning_rate": 2.6856209242514797e-06, "loss": 0.651, "step": 5151 }, { "epoch": 0.7684391080617495, "grad_norm": 1.10110604763031, "learning_rate": 2.682326980700437e-06, "loss": 0.7473, "step": 5152 }, { "epoch": 0.768588261615333, "grad_norm": 1.5081795454025269, "learning_rate": 2.679034745507082e-06, "loss": 0.733, "step": 5153 }, { "epoch": 0.7687374151689164, "grad_norm": 3.4323136806488037, "learning_rate": 2.6757442194400087e-06, "loss": 0.6346, "step": 5154 }, { "epoch": 0.7688865687224998, "grad_norm": 1.5378254652023315, "learning_rate": 2.6724554032674133e-06, "loss": 0.662, "step": 5155 }, { "epoch": 0.7690357222760832, "grad_norm": 2.266279935836792, "learning_rate": 2.6691682977570855e-06, "loss": 0.7329, "step": 5156 }, { "epoch": 0.7691848758296667, "grad_norm": 1.5034401416778564, "learning_rate": 2.6658829036764232e-06, "loss": 0.7848, "step": 5157 }, { "epoch": 0.76933402938325, "grad_norm": 1.433694839477539, "learning_rate": 2.6625992217924245e-06, "loss": 0.6559, "step": 5158 }, { "epoch": 0.7694831829368335, "grad_norm": 2.8011586666107178, "learning_rate": 2.6593172528716884e-06, "loss": 0.6207, "step": 5159 }, { "epoch": 0.7696323364904168, "grad_norm": 1.388243317604065, "learning_rate": 2.6560369976804045e-06, "loss": 0.7919, "step": 5160 }, { "epoch": 0.7697814900440003, "grad_norm": 2.2767529487609863, "learning_rate": 2.6527584569843746e-06, "loss": 0.5986, "step": 5161 }, { "epoch": 0.7699306435975837, "grad_norm": 1.7233208417892456, "learning_rate": 2.6494816315489923e-06, "loss": 0.6689, "step": 5162 }, { "epoch": 0.7700797971511671, "grad_norm": 1.5470077991485596, "learning_rate": 2.6462065221392564e-06, "loss": 0.6482, "step": 5163 }, { "epoch": 0.7702289507047505, "grad_norm": 4.053182125091553, "learning_rate": 2.6429331295197593e-06, "loss": 0.7444, "step": 5164 }, { "epoch": 0.770378104258334, "grad_norm": 1.9315940141677856, "learning_rate": 2.6396614544547005e-06, "loss": 0.6608, "step": 5165 }, { "epoch": 0.7705272578119173, "grad_norm": 1.4786536693572998, "learning_rate": 2.6363914977078665e-06, "loss": 0.7068, "step": 5166 }, { "epoch": 0.7706764113655008, "grad_norm": 1.6178321838378906, "learning_rate": 2.6331232600426535e-06, "loss": 0.7392, "step": 5167 }, { "epoch": 0.7708255649190842, "grad_norm": 1.6542840003967285, "learning_rate": 2.6298567422220556e-06, "loss": 0.6039, "step": 5168 }, { "epoch": 0.7709747184726676, "grad_norm": 1.8989869356155396, "learning_rate": 2.6265919450086553e-06, "loss": 0.6561, "step": 5169 }, { "epoch": 0.771123872026251, "grad_norm": 0.49548158049583435, "learning_rate": 2.623328869164644e-06, "loss": 0.2593, "step": 5170 }, { "epoch": 0.7712730255798345, "grad_norm": 2.7582223415374756, "learning_rate": 2.6200675154518075e-06, "loss": 0.6588, "step": 5171 }, { "epoch": 0.7714221791334178, "grad_norm": 1.3782870769500732, "learning_rate": 2.6168078846315303e-06, "loss": 0.6303, "step": 5172 }, { "epoch": 0.7715713326870013, "grad_norm": 1.7923169136047363, "learning_rate": 2.613549977464793e-06, "loss": 0.6866, "step": 5173 }, { "epoch": 0.7717204862405846, "grad_norm": 1.6311924457550049, "learning_rate": 2.6102937947121798e-06, "loss": 0.6066, "step": 5174 }, { "epoch": 0.7718696397941681, "grad_norm": 2.5037784576416016, "learning_rate": 2.607039337133859e-06, "loss": 0.6699, "step": 5175 }, { "epoch": 0.7720187933477515, "grad_norm": 1.3678598403930664, "learning_rate": 2.60378660548961e-06, "loss": 0.7111, "step": 5176 }, { "epoch": 0.772167946901335, "grad_norm": 1.7879912853240967, "learning_rate": 2.6005356005388047e-06, "loss": 0.6593, "step": 5177 }, { "epoch": 0.7723171004549183, "grad_norm": 1.5374109745025635, "learning_rate": 2.5972863230404066e-06, "loss": 0.7181, "step": 5178 }, { "epoch": 0.7724662540085018, "grad_norm": 1.4517385959625244, "learning_rate": 2.594038773752984e-06, "loss": 0.6344, "step": 5179 }, { "epoch": 0.7726154075620851, "grad_norm": 1.305513858795166, "learning_rate": 2.590792953434695e-06, "loss": 0.7186, "step": 5180 }, { "epoch": 0.7727645611156686, "grad_norm": 1.9772592782974243, "learning_rate": 2.5875488628433e-06, "loss": 0.6593, "step": 5181 }, { "epoch": 0.772913714669252, "grad_norm": 1.5702152252197266, "learning_rate": 2.5843065027361526e-06, "loss": 0.671, "step": 5182 }, { "epoch": 0.7730628682228354, "grad_norm": 1.7553781270980835, "learning_rate": 2.581065873870203e-06, "loss": 0.7013, "step": 5183 }, { "epoch": 0.7732120217764188, "grad_norm": 1.5645884275436401, "learning_rate": 2.577826977001995e-06, "loss": 0.6147, "step": 5184 }, { "epoch": 0.7733611753300023, "grad_norm": 1.587423324584961, "learning_rate": 2.574589812887669e-06, "loss": 0.6481, "step": 5185 }, { "epoch": 0.7735103288835856, "grad_norm": 1.8008334636688232, "learning_rate": 2.5713543822829636e-06, "loss": 0.5853, "step": 5186 }, { "epoch": 0.7736594824371691, "grad_norm": 2.2465708255767822, "learning_rate": 2.5681206859432127e-06, "loss": 0.6695, "step": 5187 }, { "epoch": 0.7738086359907524, "grad_norm": 1.3511104583740234, "learning_rate": 2.5648887246233357e-06, "loss": 0.6995, "step": 5188 }, { "epoch": 0.7739577895443359, "grad_norm": 1.2024282217025757, "learning_rate": 2.5616584990778625e-06, "loss": 0.6864, "step": 5189 }, { "epoch": 0.7741069430979193, "grad_norm": 1.3164724111557007, "learning_rate": 2.5584300100609116e-06, "loss": 0.746, "step": 5190 }, { "epoch": 0.7742560966515027, "grad_norm": 1.6794952154159546, "learning_rate": 2.5552032583261867e-06, "loss": 0.7163, "step": 5191 }, { "epoch": 0.7744052502050861, "grad_norm": 8.964137077331543, "learning_rate": 2.551978244626998e-06, "loss": 0.6777, "step": 5192 }, { "epoch": 0.7745544037586696, "grad_norm": 1.8450753688812256, "learning_rate": 2.548754969716248e-06, "loss": 0.7202, "step": 5193 }, { "epoch": 0.7747035573122529, "grad_norm": 1.648600697517395, "learning_rate": 2.5455334343464246e-06, "loss": 0.6169, "step": 5194 }, { "epoch": 0.7748527108658364, "grad_norm": 1.5710448026657104, "learning_rate": 2.54231363926962e-06, "loss": 0.6252, "step": 5195 }, { "epoch": 0.7750018644194198, "grad_norm": 1.3758755922317505, "learning_rate": 2.5390955852375177e-06, "loss": 0.6578, "step": 5196 }, { "epoch": 0.7751510179730032, "grad_norm": 1.5838381052017212, "learning_rate": 2.5358792730013847e-06, "loss": 0.6382, "step": 5197 }, { "epoch": 0.7753001715265866, "grad_norm": 2.172877788543701, "learning_rate": 2.532664703312099e-06, "loss": 0.6647, "step": 5198 }, { "epoch": 0.7754493250801701, "grad_norm": 1.1434075832366943, "learning_rate": 2.5294518769201213e-06, "loss": 0.6622, "step": 5199 }, { "epoch": 0.7755984786337534, "grad_norm": 1.4838238954544067, "learning_rate": 2.5262407945755017e-06, "loss": 0.7668, "step": 5200 }, { "epoch": 0.7757476321873369, "grad_norm": 1.9584819078445435, "learning_rate": 2.5230314570278914e-06, "loss": 0.6301, "step": 5201 }, { "epoch": 0.7758967857409202, "grad_norm": 2.593740463256836, "learning_rate": 2.5198238650265317e-06, "loss": 0.7026, "step": 5202 }, { "epoch": 0.7760459392945037, "grad_norm": 1.4952434301376343, "learning_rate": 2.5166180193202517e-06, "loss": 0.6825, "step": 5203 }, { "epoch": 0.7761950928480871, "grad_norm": 1.2997734546661377, "learning_rate": 2.5134139206574793e-06, "loss": 0.6366, "step": 5204 }, { "epoch": 0.7763442464016705, "grad_norm": 0.5186955332756042, "learning_rate": 2.5102115697862304e-06, "loss": 0.2495, "step": 5205 }, { "epoch": 0.7764933999552539, "grad_norm": 2.5337250232696533, "learning_rate": 2.5070109674541155e-06, "loss": 0.7214, "step": 5206 }, { "epoch": 0.7766425535088374, "grad_norm": 1.7197351455688477, "learning_rate": 2.503812114408336e-06, "loss": 0.6539, "step": 5207 }, { "epoch": 0.7767917070624207, "grad_norm": 1.3872871398925781, "learning_rate": 2.5006150113956874e-06, "loss": 0.6859, "step": 5208 }, { "epoch": 0.7769408606160042, "grad_norm": 0.500211238861084, "learning_rate": 2.4974196591625467e-06, "loss": 0.2535, "step": 5209 }, { "epoch": 0.7770900141695876, "grad_norm": 2.164395332336426, "learning_rate": 2.494226058454894e-06, "loss": 0.6865, "step": 5210 }, { "epoch": 0.777239167723171, "grad_norm": 1.7892996072769165, "learning_rate": 2.491034210018295e-06, "loss": 0.7184, "step": 5211 }, { "epoch": 0.7773883212767544, "grad_norm": 1.2678252458572388, "learning_rate": 2.4878441145979115e-06, "loss": 0.739, "step": 5212 }, { "epoch": 0.7775374748303379, "grad_norm": 1.4855960607528687, "learning_rate": 2.4846557729384835e-06, "loss": 0.6229, "step": 5213 }, { "epoch": 0.7776866283839212, "grad_norm": 1.1382373571395874, "learning_rate": 2.4814691857843544e-06, "loss": 0.6729, "step": 5214 }, { "epoch": 0.7778357819375047, "grad_norm": 3.0549631118774414, "learning_rate": 2.478284353879453e-06, "loss": 0.5945, "step": 5215 }, { "epoch": 0.777984935491088, "grad_norm": 1.4513944387435913, "learning_rate": 2.475101277967299e-06, "loss": 0.7601, "step": 5216 }, { "epoch": 0.7781340890446715, "grad_norm": 1.7471706867218018, "learning_rate": 2.471919958791e-06, "loss": 0.5873, "step": 5217 }, { "epoch": 0.7782832425982549, "grad_norm": 1.3031257390975952, "learning_rate": 2.4687403970932622e-06, "loss": 0.7038, "step": 5218 }, { "epoch": 0.7784323961518383, "grad_norm": 1.2547146081924438, "learning_rate": 2.465562593616365e-06, "loss": 0.6911, "step": 5219 }, { "epoch": 0.7785815497054217, "grad_norm": 1.1001343727111816, "learning_rate": 2.4623865491021913e-06, "loss": 0.654, "step": 5220 }, { "epoch": 0.7787307032590052, "grad_norm": 1.3492555618286133, "learning_rate": 2.4592122642922134e-06, "loss": 0.6187, "step": 5221 }, { "epoch": 0.7788798568125885, "grad_norm": 1.154210090637207, "learning_rate": 2.456039739927479e-06, "loss": 0.6748, "step": 5222 }, { "epoch": 0.779029010366172, "grad_norm": 10.343118667602539, "learning_rate": 2.452868976748639e-06, "loss": 0.6853, "step": 5223 }, { "epoch": 0.7791781639197554, "grad_norm": 1.5627433061599731, "learning_rate": 2.449699975495934e-06, "loss": 0.621, "step": 5224 }, { "epoch": 0.7793273174733388, "grad_norm": 1.774337649345398, "learning_rate": 2.4465327369091784e-06, "loss": 0.6975, "step": 5225 }, { "epoch": 0.7794764710269222, "grad_norm": 1.707335114479065, "learning_rate": 2.4433672617277892e-06, "loss": 0.6641, "step": 5226 }, { "epoch": 0.7796256245805057, "grad_norm": 1.2483612298965454, "learning_rate": 2.4402035506907697e-06, "loss": 0.7527, "step": 5227 }, { "epoch": 0.779774778134089, "grad_norm": 1.3804057836532593, "learning_rate": 2.437041604536702e-06, "loss": 0.696, "step": 5228 }, { "epoch": 0.7799239316876725, "grad_norm": 1.3723536729812622, "learning_rate": 2.4338814240037643e-06, "loss": 0.7236, "step": 5229 }, { "epoch": 0.7800730852412558, "grad_norm": 2.3807103633880615, "learning_rate": 2.430723009829724e-06, "loss": 0.5991, "step": 5230 }, { "epoch": 0.7802222387948393, "grad_norm": 2.2104663848876953, "learning_rate": 2.427566362751934e-06, "loss": 0.6819, "step": 5231 }, { "epoch": 0.7803713923484227, "grad_norm": 0.552172064781189, "learning_rate": 2.424411483507325e-06, "loss": 0.275, "step": 5232 }, { "epoch": 0.7805205459020061, "grad_norm": 2.093196392059326, "learning_rate": 2.4212583728324367e-06, "loss": 0.615, "step": 5233 }, { "epoch": 0.7806696994555895, "grad_norm": 0.5064256191253662, "learning_rate": 2.4181070314633727e-06, "loss": 0.2532, "step": 5234 }, { "epoch": 0.780818853009173, "grad_norm": 1.2843811511993408, "learning_rate": 2.4149574601358383e-06, "loss": 0.6976, "step": 5235 }, { "epoch": 0.7809680065627563, "grad_norm": 1.2826594114303589, "learning_rate": 2.4118096595851205e-06, "loss": 0.6849, "step": 5236 }, { "epoch": 0.7811171601163398, "grad_norm": 2.343630313873291, "learning_rate": 2.408663630546095e-06, "loss": 0.6563, "step": 5237 }, { "epoch": 0.7812663136699232, "grad_norm": 1.540708065032959, "learning_rate": 2.405519373753219e-06, "loss": 0.6835, "step": 5238 }, { "epoch": 0.7814154672235066, "grad_norm": 3.194546937942505, "learning_rate": 2.4023768899405407e-06, "loss": 0.7129, "step": 5239 }, { "epoch": 0.78156462077709, "grad_norm": 1.732308030128479, "learning_rate": 2.3992361798416974e-06, "loss": 0.6605, "step": 5240 }, { "epoch": 0.7817137743306735, "grad_norm": 2.037419080734253, "learning_rate": 2.3960972441898976e-06, "loss": 0.6525, "step": 5241 }, { "epoch": 0.7818629278842568, "grad_norm": 1.6220290660858154, "learning_rate": 2.392960083717957e-06, "loss": 0.6233, "step": 5242 }, { "epoch": 0.7820120814378403, "grad_norm": 1.763848900794983, "learning_rate": 2.389824699158263e-06, "loss": 0.6266, "step": 5243 }, { "epoch": 0.7821612349914236, "grad_norm": 1.7658193111419678, "learning_rate": 2.3866910912427875e-06, "loss": 0.6913, "step": 5244 }, { "epoch": 0.7823103885450071, "grad_norm": 1.7073665857315063, "learning_rate": 2.383559260703093e-06, "loss": 0.6934, "step": 5245 }, { "epoch": 0.7824595420985905, "grad_norm": 0.5163313746452332, "learning_rate": 2.3804292082703295e-06, "loss": 0.2545, "step": 5246 }, { "epoch": 0.782608695652174, "grad_norm": 1.230303406715393, "learning_rate": 2.3773009346752207e-06, "loss": 0.6518, "step": 5247 }, { "epoch": 0.7827578492057573, "grad_norm": 1.7319250106811523, "learning_rate": 2.374174440648086e-06, "loss": 0.6349, "step": 5248 }, { "epoch": 0.7829070027593408, "grad_norm": 1.6000502109527588, "learning_rate": 2.3710497269188258e-06, "loss": 0.6655, "step": 5249 }, { "epoch": 0.7830561563129241, "grad_norm": 1.403842568397522, "learning_rate": 2.3679267942169237e-06, "loss": 0.6576, "step": 5250 }, { "epoch": 0.7832053098665076, "grad_norm": 1.8012070655822754, "learning_rate": 2.3648056432714483e-06, "loss": 0.704, "step": 5251 }, { "epoch": 0.783354463420091, "grad_norm": 1.868768572807312, "learning_rate": 2.361686274811056e-06, "loss": 0.6102, "step": 5252 }, { "epoch": 0.7835036169736744, "grad_norm": 2.593427896499634, "learning_rate": 2.3585686895639757e-06, "loss": 0.5776, "step": 5253 }, { "epoch": 0.7836527705272578, "grad_norm": 2.713892936706543, "learning_rate": 2.355452888258033e-06, "loss": 0.6738, "step": 5254 }, { "epoch": 0.7838019240808413, "grad_norm": 1.9322834014892578, "learning_rate": 2.352338871620634e-06, "loss": 0.6889, "step": 5255 }, { "epoch": 0.7839510776344246, "grad_norm": 1.6813840866088867, "learning_rate": 2.349226640378759e-06, "loss": 0.7096, "step": 5256 }, { "epoch": 0.7841002311880081, "grad_norm": 1.3619582653045654, "learning_rate": 2.346116195258982e-06, "loss": 0.7306, "step": 5257 }, { "epoch": 0.7842493847415914, "grad_norm": 1.3959652185440063, "learning_rate": 2.3430075369874563e-06, "loss": 0.6604, "step": 5258 }, { "epoch": 0.7843985382951749, "grad_norm": 1.3267347812652588, "learning_rate": 2.339900666289918e-06, "loss": 0.6993, "step": 5259 }, { "epoch": 0.7845476918487583, "grad_norm": 2.881722927093506, "learning_rate": 2.3367955838916855e-06, "loss": 0.6479, "step": 5260 }, { "epoch": 0.7846968454023417, "grad_norm": 2.0190529823303223, "learning_rate": 2.333692290517664e-06, "loss": 0.6872, "step": 5261 }, { "epoch": 0.7848459989559251, "grad_norm": 1.2697200775146484, "learning_rate": 2.3305907868923306e-06, "loss": 0.6653, "step": 5262 }, { "epoch": 0.7849951525095086, "grad_norm": 2.2111425399780273, "learning_rate": 2.327491073739755e-06, "loss": 0.6929, "step": 5263 }, { "epoch": 0.7851443060630919, "grad_norm": 1.7932466268539429, "learning_rate": 2.324393151783585e-06, "loss": 0.7016, "step": 5264 }, { "epoch": 0.7852934596166754, "grad_norm": 1.2595760822296143, "learning_rate": 2.321297021747052e-06, "loss": 0.8106, "step": 5265 }, { "epoch": 0.7854426131702588, "grad_norm": 1.7683005332946777, "learning_rate": 2.318202684352964e-06, "loss": 0.6605, "step": 5266 }, { "epoch": 0.7855917667238422, "grad_norm": 1.2968169450759888, "learning_rate": 2.315110140323713e-06, "loss": 0.7068, "step": 5267 }, { "epoch": 0.7857409202774256, "grad_norm": 1.4209256172180176, "learning_rate": 2.312019390381277e-06, "loss": 0.6397, "step": 5268 }, { "epoch": 0.7858900738310091, "grad_norm": 1.5179423093795776, "learning_rate": 2.3089304352472095e-06, "loss": 0.7431, "step": 5269 }, { "epoch": 0.7860392273845924, "grad_norm": 1.8450928926467896, "learning_rate": 2.3058432756426473e-06, "loss": 0.7062, "step": 5270 }, { "epoch": 0.7861883809381759, "grad_norm": 1.4740056991577148, "learning_rate": 2.3027579122883114e-06, "loss": 0.6764, "step": 5271 }, { "epoch": 0.7863375344917592, "grad_norm": 1.995815634727478, "learning_rate": 2.2996743459044925e-06, "loss": 0.6126, "step": 5272 }, { "epoch": 0.7864866880453427, "grad_norm": 2.2822394371032715, "learning_rate": 2.296592577211072e-06, "loss": 0.7147, "step": 5273 }, { "epoch": 0.7866358415989261, "grad_norm": 2.2236061096191406, "learning_rate": 2.2935126069275116e-06, "loss": 0.7055, "step": 5274 }, { "epoch": 0.7867849951525095, "grad_norm": 1.7025032043457031, "learning_rate": 2.290434435772845e-06, "loss": 0.6698, "step": 5275 }, { "epoch": 0.7869341487060929, "grad_norm": 1.2468805313110352, "learning_rate": 2.28735806446569e-06, "loss": 0.6926, "step": 5276 }, { "epoch": 0.7870833022596764, "grad_norm": 1.351550579071045, "learning_rate": 2.284283493724255e-06, "loss": 0.669, "step": 5277 }, { "epoch": 0.7872324558132597, "grad_norm": 2.113064765930176, "learning_rate": 2.2812107242663082e-06, "loss": 0.7006, "step": 5278 }, { "epoch": 0.7873816093668432, "grad_norm": 1.9498002529144287, "learning_rate": 2.2781397568092113e-06, "loss": 0.736, "step": 5279 }, { "epoch": 0.7875307629204266, "grad_norm": 1.5815337896347046, "learning_rate": 2.2750705920699044e-06, "loss": 0.6795, "step": 5280 }, { "epoch": 0.78767991647401, "grad_norm": 1.239050030708313, "learning_rate": 2.2720032307648967e-06, "loss": 0.7156, "step": 5281 }, { "epoch": 0.7878290700275934, "grad_norm": 1.6969035863876343, "learning_rate": 2.2689376736102874e-06, "loss": 0.694, "step": 5282 }, { "epoch": 0.7879782235811769, "grad_norm": 1.1393269300460815, "learning_rate": 2.2658739213217496e-06, "loss": 0.72, "step": 5283 }, { "epoch": 0.7881273771347602, "grad_norm": 1.6573718786239624, "learning_rate": 2.262811974614537e-06, "loss": 0.6507, "step": 5284 }, { "epoch": 0.7882765306883437, "grad_norm": 1.698103427886963, "learning_rate": 2.2597518342034797e-06, "loss": 0.7551, "step": 5285 }, { "epoch": 0.788425684241927, "grad_norm": 1.5246353149414062, "learning_rate": 2.25669350080299e-06, "loss": 0.6297, "step": 5286 }, { "epoch": 0.7885748377955105, "grad_norm": 1.4883803129196167, "learning_rate": 2.2536369751270514e-06, "loss": 0.7032, "step": 5287 }, { "epoch": 0.7887239913490939, "grad_norm": 1.3484443426132202, "learning_rate": 2.25058225788923e-06, "loss": 0.6858, "step": 5288 }, { "epoch": 0.7888731449026773, "grad_norm": 2.135321855545044, "learning_rate": 2.2475293498026697e-06, "loss": 0.6401, "step": 5289 }, { "epoch": 0.7890222984562607, "grad_norm": 1.8762850761413574, "learning_rate": 2.2444782515800946e-06, "loss": 0.6891, "step": 5290 }, { "epoch": 0.7891714520098442, "grad_norm": 2.043137311935425, "learning_rate": 2.2414289639337983e-06, "loss": 0.623, "step": 5291 }, { "epoch": 0.7893206055634275, "grad_norm": 1.747446060180664, "learning_rate": 2.2383814875756583e-06, "loss": 0.6548, "step": 5292 }, { "epoch": 0.789469759117011, "grad_norm": 1.602907419204712, "learning_rate": 2.235335823217127e-06, "loss": 0.6716, "step": 5293 }, { "epoch": 0.7896189126705944, "grad_norm": 1.1074146032333374, "learning_rate": 2.2322919715692358e-06, "loss": 0.6605, "step": 5294 }, { "epoch": 0.7897680662241778, "grad_norm": 1.2474974393844604, "learning_rate": 2.229249933342591e-06, "loss": 0.65, "step": 5295 }, { "epoch": 0.7899172197777612, "grad_norm": 1.6325199604034424, "learning_rate": 2.2262097092473776e-06, "loss": 0.5945, "step": 5296 }, { "epoch": 0.7900663733313447, "grad_norm": 1.3719757795333862, "learning_rate": 2.2231712999933506e-06, "loss": 0.725, "step": 5297 }, { "epoch": 0.790215526884928, "grad_norm": 1.5190999507904053, "learning_rate": 2.2201347062898505e-06, "loss": 0.7261, "step": 5298 }, { "epoch": 0.7903646804385115, "grad_norm": 1.9558838605880737, "learning_rate": 2.2170999288457896e-06, "loss": 0.6066, "step": 5299 }, { "epoch": 0.7905138339920948, "grad_norm": 2.089646339416504, "learning_rate": 2.2140669683696513e-06, "loss": 0.7585, "step": 5300 }, { "epoch": 0.7906629875456783, "grad_norm": 1.7110397815704346, "learning_rate": 2.211035825569503e-06, "loss": 0.6433, "step": 5301 }, { "epoch": 0.7908121410992617, "grad_norm": 0.5350414514541626, "learning_rate": 2.2080065011529848e-06, "loss": 0.2417, "step": 5302 }, { "epoch": 0.7909612946528451, "grad_norm": 1.9965349435806274, "learning_rate": 2.2049789958273117e-06, "loss": 0.6311, "step": 5303 }, { "epoch": 0.7911104482064285, "grad_norm": 1.4470839500427246, "learning_rate": 2.201953310299274e-06, "loss": 0.6906, "step": 5304 }, { "epoch": 0.791259601760012, "grad_norm": 1.308815360069275, "learning_rate": 2.1989294452752398e-06, "loss": 0.7347, "step": 5305 }, { "epoch": 0.7914087553135953, "grad_norm": 1.0800697803497314, "learning_rate": 2.1959074014611447e-06, "loss": 0.6606, "step": 5306 }, { "epoch": 0.7915579088671788, "grad_norm": 1.967681884765625, "learning_rate": 2.192887179562506e-06, "loss": 0.5693, "step": 5307 }, { "epoch": 0.7917070624207622, "grad_norm": 1.396614670753479, "learning_rate": 2.1898687802844187e-06, "loss": 0.7254, "step": 5308 }, { "epoch": 0.7918562159743456, "grad_norm": 1.2969939708709717, "learning_rate": 2.186852204331541e-06, "loss": 0.724, "step": 5309 }, { "epoch": 0.792005369527929, "grad_norm": 2.0337140560150146, "learning_rate": 2.183837452408113e-06, "loss": 0.6756, "step": 5310 }, { "epoch": 0.7921545230815125, "grad_norm": 2.2902767658233643, "learning_rate": 2.1808245252179503e-06, "loss": 0.673, "step": 5311 }, { "epoch": 0.7923036766350958, "grad_norm": 1.636951208114624, "learning_rate": 2.177813423464439e-06, "loss": 0.6751, "step": 5312 }, { "epoch": 0.7924528301886793, "grad_norm": 1.4615540504455566, "learning_rate": 2.1748041478505386e-06, "loss": 0.676, "step": 5313 }, { "epoch": 0.7926019837422627, "grad_norm": 1.7344300746917725, "learning_rate": 2.1717966990787877e-06, "loss": 0.5945, "step": 5314 }, { "epoch": 0.7927511372958461, "grad_norm": 1.372976541519165, "learning_rate": 2.168791077851293e-06, "loss": 0.6751, "step": 5315 }, { "epoch": 0.7929002908494295, "grad_norm": 1.9318337440490723, "learning_rate": 2.1657872848697336e-06, "loss": 0.6397, "step": 5316 }, { "epoch": 0.793049444403013, "grad_norm": 1.4270819425582886, "learning_rate": 2.1627853208353655e-06, "loss": 0.5365, "step": 5317 }, { "epoch": 0.7931985979565963, "grad_norm": 1.2417192459106445, "learning_rate": 2.1597851864490193e-06, "loss": 0.6634, "step": 5318 }, { "epoch": 0.7933477515101798, "grad_norm": 3.3594985008239746, "learning_rate": 2.156786882411087e-06, "loss": 0.5925, "step": 5319 }, { "epoch": 0.7934969050637631, "grad_norm": 1.4431394338607788, "learning_rate": 2.1537904094215512e-06, "loss": 0.6661, "step": 5320 }, { "epoch": 0.7936460586173466, "grad_norm": 1.5105409622192383, "learning_rate": 2.1507957681799574e-06, "loss": 0.6899, "step": 5321 }, { "epoch": 0.79379521217093, "grad_norm": 1.469994306564331, "learning_rate": 2.147802959385419e-06, "loss": 0.6762, "step": 5322 }, { "epoch": 0.7939443657245134, "grad_norm": 1.309552788734436, "learning_rate": 2.1448119837366266e-06, "loss": 0.6465, "step": 5323 }, { "epoch": 0.7940935192780968, "grad_norm": 1.6226563453674316, "learning_rate": 2.1418228419318486e-06, "loss": 0.6703, "step": 5324 }, { "epoch": 0.7942426728316803, "grad_norm": 2.5664501190185547, "learning_rate": 2.1388355346689118e-06, "loss": 0.6306, "step": 5325 }, { "epoch": 0.7943918263852636, "grad_norm": 2.0003273487091064, "learning_rate": 2.135850062645225e-06, "loss": 0.6583, "step": 5326 }, { "epoch": 0.7945409799388471, "grad_norm": 1.31466543674469, "learning_rate": 2.1328664265577694e-06, "loss": 0.6347, "step": 5327 }, { "epoch": 0.7946901334924305, "grad_norm": 2.083696126937866, "learning_rate": 2.1298846271030847e-06, "loss": 0.6375, "step": 5328 }, { "epoch": 0.7948392870460139, "grad_norm": 1.4198278188705444, "learning_rate": 2.126904664977302e-06, "loss": 0.5769, "step": 5329 }, { "epoch": 0.7949884405995973, "grad_norm": 1.231972098350525, "learning_rate": 2.123926540876109e-06, "loss": 0.6692, "step": 5330 }, { "epoch": 0.7951375941531807, "grad_norm": 1.4739465713500977, "learning_rate": 2.1209502554947636e-06, "loss": 0.6358, "step": 5331 }, { "epoch": 0.7952867477067641, "grad_norm": 1.3280391693115234, "learning_rate": 2.1179758095281023e-06, "loss": 0.6905, "step": 5332 }, { "epoch": 0.7954359012603476, "grad_norm": 1.7963173389434814, "learning_rate": 2.1150032036705316e-06, "loss": 0.7276, "step": 5333 }, { "epoch": 0.7955850548139309, "grad_norm": 0.5552616715431213, "learning_rate": 2.1120324386160187e-06, "loss": 0.2783, "step": 5334 }, { "epoch": 0.7957342083675144, "grad_norm": 1.7875940799713135, "learning_rate": 2.109063515058111e-06, "loss": 0.7836, "step": 5335 }, { "epoch": 0.7958833619210978, "grad_norm": 1.2740893363952637, "learning_rate": 2.1060964336899216e-06, "loss": 0.6383, "step": 5336 }, { "epoch": 0.7960325154746812, "grad_norm": 0.4674937129020691, "learning_rate": 2.1031311952041366e-06, "loss": 0.2469, "step": 5337 }, { "epoch": 0.7961816690282646, "grad_norm": 1.5004280805587769, "learning_rate": 2.1001678002930093e-06, "loss": 0.6827, "step": 5338 }, { "epoch": 0.7963308225818481, "grad_norm": 2.0685794353485107, "learning_rate": 2.0972062496483657e-06, "loss": 0.638, "step": 5339 }, { "epoch": 0.7964799761354314, "grad_norm": 1.5984338521957397, "learning_rate": 2.0942465439615935e-06, "loss": 0.6691, "step": 5340 }, { "epoch": 0.7966291296890149, "grad_norm": 1.9006760120391846, "learning_rate": 2.0912886839236567e-06, "loss": 0.7128, "step": 5341 }, { "epoch": 0.7967782832425983, "grad_norm": 1.3038604259490967, "learning_rate": 2.0883326702250885e-06, "loss": 0.7359, "step": 5342 }, { "epoch": 0.7969274367961817, "grad_norm": 0.4851776957511902, "learning_rate": 2.0853785035559903e-06, "loss": 0.2545, "step": 5343 }, { "epoch": 0.7970765903497651, "grad_norm": 1.5599749088287354, "learning_rate": 2.082426184606027e-06, "loss": 0.7302, "step": 5344 }, { "epoch": 0.7972257439033485, "grad_norm": 1.2509701251983643, "learning_rate": 2.0794757140644397e-06, "loss": 0.6437, "step": 5345 }, { "epoch": 0.7973748974569319, "grad_norm": 2.968257427215576, "learning_rate": 2.076527092620032e-06, "loss": 0.6117, "step": 5346 }, { "epoch": 0.7975240510105154, "grad_norm": 1.4301555156707764, "learning_rate": 2.0735803209611805e-06, "loss": 0.6118, "step": 5347 }, { "epoch": 0.7976732045640987, "grad_norm": 2.113762140274048, "learning_rate": 2.070635399775828e-06, "loss": 0.6562, "step": 5348 }, { "epoch": 0.7978223581176822, "grad_norm": 3.3009562492370605, "learning_rate": 2.0676923297514874e-06, "loss": 0.6428, "step": 5349 }, { "epoch": 0.7979715116712656, "grad_norm": 1.4880518913269043, "learning_rate": 2.064751111575232e-06, "loss": 0.7507, "step": 5350 }, { "epoch": 0.798120665224849, "grad_norm": 1.6363990306854248, "learning_rate": 2.0618117459337107e-06, "loss": 0.6458, "step": 5351 }, { "epoch": 0.7982698187784324, "grad_norm": 2.001044511795044, "learning_rate": 2.0588742335131397e-06, "loss": 0.7018, "step": 5352 }, { "epoch": 0.7984189723320159, "grad_norm": 1.2535425424575806, "learning_rate": 2.0559385749992956e-06, "loss": 0.7165, "step": 5353 }, { "epoch": 0.7985681258855992, "grad_norm": 1.686537265777588, "learning_rate": 2.053004771077525e-06, "loss": 0.5975, "step": 5354 }, { "epoch": 0.7987172794391827, "grad_norm": 2.1358373165130615, "learning_rate": 2.0500728224327537e-06, "loss": 0.6259, "step": 5355 }, { "epoch": 0.798866432992766, "grad_norm": 1.2851699590682983, "learning_rate": 2.047142729749454e-06, "loss": 0.602, "step": 5356 }, { "epoch": 0.7990155865463495, "grad_norm": 1.654598593711853, "learning_rate": 2.044214493711677e-06, "loss": 0.6492, "step": 5357 }, { "epoch": 0.7991647400999329, "grad_norm": 1.654331922531128, "learning_rate": 2.041288115003043e-06, "loss": 0.6867, "step": 5358 }, { "epoch": 0.7993138936535163, "grad_norm": 3.026122570037842, "learning_rate": 2.038363594306727e-06, "loss": 0.6962, "step": 5359 }, { "epoch": 0.7994630472070997, "grad_norm": 1.5341163873672485, "learning_rate": 2.0354409323054814e-06, "loss": 0.6441, "step": 5360 }, { "epoch": 0.7996122007606832, "grad_norm": 2.274946689605713, "learning_rate": 2.0325201296816177e-06, "loss": 0.6573, "step": 5361 }, { "epoch": 0.7997613543142665, "grad_norm": 1.1923420429229736, "learning_rate": 2.0296011871170208e-06, "loss": 0.7784, "step": 5362 }, { "epoch": 0.79991050786785, "grad_norm": 1.2621748447418213, "learning_rate": 2.0266841052931275e-06, "loss": 0.6449, "step": 5363 }, { "epoch": 0.8000596614214334, "grad_norm": 1.405401587486267, "learning_rate": 2.0237688848909607e-06, "loss": 0.6205, "step": 5364 }, { "epoch": 0.8002088149750167, "grad_norm": 2.269179344177246, "learning_rate": 2.020855526591089e-06, "loss": 0.6448, "step": 5365 }, { "epoch": 0.8003579685286002, "grad_norm": 2.02608585357666, "learning_rate": 2.0179440310736575e-06, "loss": 0.6912, "step": 5366 }, { "epoch": 0.8005071220821836, "grad_norm": 1.4769456386566162, "learning_rate": 2.015034399018373e-06, "loss": 0.7035, "step": 5367 }, { "epoch": 0.800656275635767, "grad_norm": 1.6321487426757812, "learning_rate": 2.0121266311045106e-06, "loss": 0.7238, "step": 5368 }, { "epoch": 0.8008054291893504, "grad_norm": 1.3136062622070312, "learning_rate": 2.009220728010901e-06, "loss": 0.7273, "step": 5369 }, { "epoch": 0.8009545827429339, "grad_norm": 1.4816021919250488, "learning_rate": 2.0063166904159516e-06, "loss": 0.6754, "step": 5370 }, { "epoch": 0.8011037362965172, "grad_norm": 1.1838620901107788, "learning_rate": 2.0034145189976275e-06, "loss": 0.6639, "step": 5371 }, { "epoch": 0.8012528898501007, "grad_norm": 1.603472113609314, "learning_rate": 2.0005142144334533e-06, "loss": 0.7194, "step": 5372 }, { "epoch": 0.801402043403684, "grad_norm": 1.577704906463623, "learning_rate": 1.9976157774005323e-06, "loss": 0.6055, "step": 5373 }, { "epoch": 0.8015511969572675, "grad_norm": 2.316282272338867, "learning_rate": 1.994719208575522e-06, "loss": 0.7385, "step": 5374 }, { "epoch": 0.8017003505108509, "grad_norm": 1.3556417226791382, "learning_rate": 1.9918245086346387e-06, "loss": 0.6126, "step": 5375 }, { "epoch": 0.8018495040644343, "grad_norm": 3.3434622287750244, "learning_rate": 1.9889316782536737e-06, "loss": 0.6513, "step": 5376 }, { "epoch": 0.8019986576180177, "grad_norm": 1.4716237783432007, "learning_rate": 1.9860407181079787e-06, "loss": 0.8474, "step": 5377 }, { "epoch": 0.8021478111716012, "grad_norm": 1.5578887462615967, "learning_rate": 1.9831516288724607e-06, "loss": 0.6502, "step": 5378 }, { "epoch": 0.8022969647251845, "grad_norm": 2.1762335300445557, "learning_rate": 1.9802644112215996e-06, "loss": 0.671, "step": 5379 }, { "epoch": 0.802446118278768, "grad_norm": 2.0256001949310303, "learning_rate": 1.9773790658294368e-06, "loss": 0.6155, "step": 5380 }, { "epoch": 0.8025952718323514, "grad_norm": 1.9654604196548462, "learning_rate": 1.9744955933695663e-06, "loss": 0.6215, "step": 5381 }, { "epoch": 0.8027444253859348, "grad_norm": 1.5726157426834106, "learning_rate": 1.9716139945151634e-06, "loss": 0.6959, "step": 5382 }, { "epoch": 0.8028935789395182, "grad_norm": 2.3014562129974365, "learning_rate": 1.9687342699389542e-06, "loss": 0.6365, "step": 5383 }, { "epoch": 0.8030427324931017, "grad_norm": 1.6085920333862305, "learning_rate": 1.9658564203132235e-06, "loss": 0.6278, "step": 5384 }, { "epoch": 0.803191886046685, "grad_norm": 1.5206668376922607, "learning_rate": 1.962980446309827e-06, "loss": 0.6552, "step": 5385 }, { "epoch": 0.8033410396002685, "grad_norm": 4.675626277923584, "learning_rate": 1.9601063486001815e-06, "loss": 0.6947, "step": 5386 }, { "epoch": 0.8034901931538518, "grad_norm": 1.3148542642593384, "learning_rate": 1.9572341278552575e-06, "loss": 0.6839, "step": 5387 }, { "epoch": 0.8036393467074353, "grad_norm": 3.4996771812438965, "learning_rate": 1.9543637847455976e-06, "loss": 0.6407, "step": 5388 }, { "epoch": 0.8037885002610187, "grad_norm": 1.718584418296814, "learning_rate": 1.9514953199413013e-06, "loss": 0.7384, "step": 5389 }, { "epoch": 0.8039376538146021, "grad_norm": 1.49325692653656, "learning_rate": 1.948628734112029e-06, "loss": 0.5963, "step": 5390 }, { "epoch": 0.8040868073681855, "grad_norm": 1.4561784267425537, "learning_rate": 1.9457640279270053e-06, "loss": 0.6344, "step": 5391 }, { "epoch": 0.804235960921769, "grad_norm": 1.2083340883255005, "learning_rate": 1.942901202055015e-06, "loss": 0.7618, "step": 5392 }, { "epoch": 0.8043851144753523, "grad_norm": 1.4084802865982056, "learning_rate": 1.9400402571644005e-06, "loss": 0.7127, "step": 5393 }, { "epoch": 0.8045342680289358, "grad_norm": 1.7985233068466187, "learning_rate": 1.937181193923068e-06, "loss": 0.6234, "step": 5394 }, { "epoch": 0.8046834215825192, "grad_norm": 1.3324172496795654, "learning_rate": 1.9343240129984843e-06, "loss": 0.6632, "step": 5395 }, { "epoch": 0.8048325751361026, "grad_norm": 1.6229157447814941, "learning_rate": 1.9314687150576806e-06, "loss": 0.6275, "step": 5396 }, { "epoch": 0.804981728689686, "grad_norm": 1.3061914443969727, "learning_rate": 1.928615300767237e-06, "loss": 0.7125, "step": 5397 }, { "epoch": 0.8051308822432695, "grad_norm": 1.976295828819275, "learning_rate": 1.9257637707933043e-06, "loss": 0.6834, "step": 5398 }, { "epoch": 0.8052800357968528, "grad_norm": 1.2118785381317139, "learning_rate": 1.922914125801596e-06, "loss": 0.6869, "step": 5399 }, { "epoch": 0.8054291893504363, "grad_norm": 0.5324331521987915, "learning_rate": 1.920066366457374e-06, "loss": 0.2674, "step": 5400 }, { "epoch": 0.8055783429040196, "grad_norm": 1.4005733728408813, "learning_rate": 1.917220493425467e-06, "loss": 0.7608, "step": 5401 }, { "epoch": 0.8057274964576031, "grad_norm": 1.1754721403121948, "learning_rate": 1.9143765073702646e-06, "loss": 0.6001, "step": 5402 }, { "epoch": 0.8058766500111865, "grad_norm": 2.1084420680999756, "learning_rate": 1.911534408955711e-06, "loss": 0.691, "step": 5403 }, { "epoch": 0.8060258035647699, "grad_norm": 2.911450147628784, "learning_rate": 1.908694198845312e-06, "loss": 0.6544, "step": 5404 }, { "epoch": 0.8061749571183533, "grad_norm": 1.234612226486206, "learning_rate": 1.9058558777021363e-06, "loss": 0.7053, "step": 5405 }, { "epoch": 0.8063241106719368, "grad_norm": 1.7314496040344238, "learning_rate": 1.9030194461888041e-06, "loss": 0.6105, "step": 5406 }, { "epoch": 0.8064732642255201, "grad_norm": 1.3334636688232422, "learning_rate": 1.900184904967498e-06, "loss": 0.6307, "step": 5407 }, { "epoch": 0.8066224177791036, "grad_norm": 0.4895336329936981, "learning_rate": 1.8973522546999667e-06, "loss": 0.2514, "step": 5408 }, { "epoch": 0.806771571332687, "grad_norm": 1.2773691415786743, "learning_rate": 1.8945214960475034e-06, "loss": 0.6958, "step": 5409 }, { "epoch": 0.8069207248862704, "grad_norm": 1.2023124694824219, "learning_rate": 1.8916926296709692e-06, "loss": 0.7879, "step": 5410 }, { "epoch": 0.8070698784398538, "grad_norm": 1.5770745277404785, "learning_rate": 1.8888656562307849e-06, "loss": 0.683, "step": 5411 }, { "epoch": 0.8072190319934373, "grad_norm": 1.2563502788543701, "learning_rate": 1.8860405763869183e-06, "loss": 0.7402, "step": 5412 }, { "epoch": 0.8073681855470206, "grad_norm": 2.6795084476470947, "learning_rate": 1.883217390798907e-06, "loss": 0.6634, "step": 5413 }, { "epoch": 0.8075173391006041, "grad_norm": 1.5323148965835571, "learning_rate": 1.8803961001258408e-06, "loss": 0.7301, "step": 5414 }, { "epoch": 0.8076664926541874, "grad_norm": 2.625885486602783, "learning_rate": 1.8775767050263683e-06, "loss": 0.6992, "step": 5415 }, { "epoch": 0.8078156462077709, "grad_norm": 1.4418400526046753, "learning_rate": 1.874759206158695e-06, "loss": 0.594, "step": 5416 }, { "epoch": 0.8079647997613543, "grad_norm": 1.9759573936462402, "learning_rate": 1.8719436041805872e-06, "loss": 0.6427, "step": 5417 }, { "epoch": 0.8081139533149377, "grad_norm": 1.4104843139648438, "learning_rate": 1.869129899749359e-06, "loss": 0.6456, "step": 5418 }, { "epoch": 0.8082631068685211, "grad_norm": 0.5154120922088623, "learning_rate": 1.8663180935218927e-06, "loss": 0.2369, "step": 5419 }, { "epoch": 0.8084122604221046, "grad_norm": 1.379485845565796, "learning_rate": 1.86350818615462e-06, "loss": 0.6787, "step": 5420 }, { "epoch": 0.8085614139756879, "grad_norm": 1.8138185739517212, "learning_rate": 1.860700178303535e-06, "loss": 0.6477, "step": 5421 }, { "epoch": 0.8087105675292714, "grad_norm": 1.418731689453125, "learning_rate": 1.857894070624181e-06, "loss": 0.637, "step": 5422 }, { "epoch": 0.8088597210828548, "grad_norm": 2.3491251468658447, "learning_rate": 1.855089863771663e-06, "loss": 0.6514, "step": 5423 }, { "epoch": 0.8090088746364382, "grad_norm": 1.3161710500717163, "learning_rate": 1.8522875584006417e-06, "loss": 0.7544, "step": 5424 }, { "epoch": 0.8091580281900216, "grad_norm": 1.958604097366333, "learning_rate": 1.8494871551653338e-06, "loss": 0.5522, "step": 5425 }, { "epoch": 0.809307181743605, "grad_norm": 1.2249606847763062, "learning_rate": 1.8466886547195106e-06, "loss": 0.7743, "step": 5426 }, { "epoch": 0.8094563352971884, "grad_norm": 4.809506893157959, "learning_rate": 1.8438920577165032e-06, "loss": 0.657, "step": 5427 }, { "epoch": 0.8096054888507719, "grad_norm": 2.0025241374969482, "learning_rate": 1.84109736480919e-06, "loss": 0.6619, "step": 5428 }, { "epoch": 0.8097546424043552, "grad_norm": 1.8745304346084595, "learning_rate": 1.8383045766500117e-06, "loss": 0.687, "step": 5429 }, { "epoch": 0.8099037959579387, "grad_norm": 1.1913214921951294, "learning_rate": 1.8355136938909656e-06, "loss": 0.6088, "step": 5430 }, { "epoch": 0.8100529495115221, "grad_norm": 1.8191167116165161, "learning_rate": 1.8327247171835961e-06, "loss": 0.6614, "step": 5431 }, { "epoch": 0.8102021030651055, "grad_norm": 1.5911319255828857, "learning_rate": 1.8299376471790097e-06, "loss": 0.5957, "step": 5432 }, { "epoch": 0.8103512566186889, "grad_norm": 1.9607263803482056, "learning_rate": 1.8271524845278676e-06, "loss": 0.7231, "step": 5433 }, { "epoch": 0.8105004101722724, "grad_norm": 1.531153917312622, "learning_rate": 1.8243692298803816e-06, "loss": 0.731, "step": 5434 }, { "epoch": 0.8106495637258557, "grad_norm": 1.6059856414794922, "learning_rate": 1.821587883886321e-06, "loss": 0.6366, "step": 5435 }, { "epoch": 0.8107987172794392, "grad_norm": 2.0053882598876953, "learning_rate": 1.818808447195013e-06, "loss": 0.6744, "step": 5436 }, { "epoch": 0.8109478708330226, "grad_norm": 1.4027076959609985, "learning_rate": 1.8160309204553272e-06, "loss": 0.6956, "step": 5437 }, { "epoch": 0.811097024386606, "grad_norm": 1.7179205417633057, "learning_rate": 1.8132553043156997e-06, "loss": 0.6323, "step": 5438 }, { "epoch": 0.8112461779401894, "grad_norm": 1.5448615550994873, "learning_rate": 1.8104815994241155e-06, "loss": 0.744, "step": 5439 }, { "epoch": 0.8113953314937729, "grad_norm": 1.7567778825759888, "learning_rate": 1.807709806428115e-06, "loss": 0.6832, "step": 5440 }, { "epoch": 0.8115444850473562, "grad_norm": 1.2939467430114746, "learning_rate": 1.8049399259747869e-06, "loss": 0.6759, "step": 5441 }, { "epoch": 0.8116936386009397, "grad_norm": 2.8213460445404053, "learning_rate": 1.8021719587107811e-06, "loss": 0.6312, "step": 5442 }, { "epoch": 0.811842792154523, "grad_norm": 1.3112393617630005, "learning_rate": 1.7994059052822953e-06, "loss": 0.6789, "step": 5443 }, { "epoch": 0.8119919457081065, "grad_norm": 2.6548731327056885, "learning_rate": 1.7966417663350843e-06, "loss": 0.6396, "step": 5444 }, { "epoch": 0.8121410992616899, "grad_norm": 2.072939872741699, "learning_rate": 1.7938795425144529e-06, "loss": 0.6755, "step": 5445 }, { "epoch": 0.8122902528152733, "grad_norm": 1.2565749883651733, "learning_rate": 1.7911192344652616e-06, "loss": 0.6003, "step": 5446 }, { "epoch": 0.8124394063688567, "grad_norm": 1.2646695375442505, "learning_rate": 1.78836084283192e-06, "loss": 0.6478, "step": 5447 }, { "epoch": 0.8125885599224402, "grad_norm": 1.262319803237915, "learning_rate": 1.7856043682583913e-06, "loss": 0.6701, "step": 5448 }, { "epoch": 0.8127377134760235, "grad_norm": 2.6613082885742188, "learning_rate": 1.7828498113881976e-06, "loss": 0.6557, "step": 5449 }, { "epoch": 0.812886867029607, "grad_norm": 1.5655313730239868, "learning_rate": 1.780097172864399e-06, "loss": 0.7789, "step": 5450 }, { "epoch": 0.8130360205831904, "grad_norm": 1.6491245031356812, "learning_rate": 1.7773464533296237e-06, "loss": 0.628, "step": 5451 }, { "epoch": 0.8131851741367738, "grad_norm": 0.49869465827941895, "learning_rate": 1.7745976534260457e-06, "loss": 0.2601, "step": 5452 }, { "epoch": 0.8133343276903572, "grad_norm": 2.433018207550049, "learning_rate": 1.7718507737953838e-06, "loss": 0.6816, "step": 5453 }, { "epoch": 0.8134834812439407, "grad_norm": 1.3742929697036743, "learning_rate": 1.7691058150789186e-06, "loss": 0.6926, "step": 5454 }, { "epoch": 0.813632634797524, "grad_norm": 1.513948678970337, "learning_rate": 1.7663627779174797e-06, "loss": 0.7567, "step": 5455 }, { "epoch": 0.8137817883511075, "grad_norm": 2.668703556060791, "learning_rate": 1.7636216629514435e-06, "loss": 0.6858, "step": 5456 }, { "epoch": 0.8139309419046908, "grad_norm": 1.749893307685852, "learning_rate": 1.7608824708207405e-06, "loss": 0.6715, "step": 5457 }, { "epoch": 0.8140800954582743, "grad_norm": 1.7671481370925903, "learning_rate": 1.758145202164857e-06, "loss": 0.7668, "step": 5458 }, { "epoch": 0.8142292490118577, "grad_norm": 1.434035301208496, "learning_rate": 1.7554098576228185e-06, "loss": 0.7281, "step": 5459 }, { "epoch": 0.8143784025654411, "grad_norm": 1.7653005123138428, "learning_rate": 1.752676437833216e-06, "loss": 0.6895, "step": 5460 }, { "epoch": 0.8145275561190245, "grad_norm": 1.744134783744812, "learning_rate": 1.7499449434341843e-06, "loss": 0.6651, "step": 5461 }, { "epoch": 0.814676709672608, "grad_norm": 1.2702093124389648, "learning_rate": 1.7472153750634014e-06, "loss": 0.6805, "step": 5462 }, { "epoch": 0.8148258632261913, "grad_norm": 1.2991526126861572, "learning_rate": 1.7444877333581067e-06, "loss": 0.6792, "step": 5463 }, { "epoch": 0.8149750167797748, "grad_norm": 2.5081071853637695, "learning_rate": 1.7417620189550877e-06, "loss": 0.651, "step": 5464 }, { "epoch": 0.8151241703333582, "grad_norm": 2.3612098693847656, "learning_rate": 1.7390382324906752e-06, "loss": 0.6439, "step": 5465 }, { "epoch": 0.8152733238869416, "grad_norm": 1.7667484283447266, "learning_rate": 1.7363163746007572e-06, "loss": 0.6476, "step": 5466 }, { "epoch": 0.815422477440525, "grad_norm": 1.4981048107147217, "learning_rate": 1.7335964459207688e-06, "loss": 0.7435, "step": 5467 }, { "epoch": 0.8155716309941085, "grad_norm": 1.9897420406341553, "learning_rate": 1.7308784470856944e-06, "loss": 0.721, "step": 5468 }, { "epoch": 0.8157207845476918, "grad_norm": 2.000847339630127, "learning_rate": 1.7281623787300672e-06, "loss": 0.709, "step": 5469 }, { "epoch": 0.8158699381012753, "grad_norm": 2.7248215675354004, "learning_rate": 1.725448241487976e-06, "loss": 0.6055, "step": 5470 }, { "epoch": 0.8160190916548586, "grad_norm": 1.4754194021224976, "learning_rate": 1.7227360359930468e-06, "loss": 0.7019, "step": 5471 }, { "epoch": 0.8161682452084421, "grad_norm": 1.5063751935958862, "learning_rate": 1.7200257628784633e-06, "loss": 0.6682, "step": 5472 }, { "epoch": 0.8163173987620255, "grad_norm": 1.2377967834472656, "learning_rate": 1.7173174227769574e-06, "loss": 0.7713, "step": 5473 }, { "epoch": 0.8164665523156089, "grad_norm": 1.4943021535873413, "learning_rate": 1.7146110163208108e-06, "loss": 0.6952, "step": 5474 }, { "epoch": 0.8166157058691923, "grad_norm": 0.49556222558021545, "learning_rate": 1.711906544141846e-06, "loss": 0.2534, "step": 5475 }, { "epoch": 0.8167648594227758, "grad_norm": 1.6596224308013916, "learning_rate": 1.7092040068714421e-06, "loss": 0.6835, "step": 5476 }, { "epoch": 0.8169140129763591, "grad_norm": 1.5693435668945312, "learning_rate": 1.7065034051405239e-06, "loss": 0.6756, "step": 5477 }, { "epoch": 0.8170631665299426, "grad_norm": 3.6811106204986572, "learning_rate": 1.703804739579563e-06, "loss": 0.7459, "step": 5478 }, { "epoch": 0.817212320083526, "grad_norm": 1.7472279071807861, "learning_rate": 1.701108010818583e-06, "loss": 0.556, "step": 5479 }, { "epoch": 0.8173614736371094, "grad_norm": 1.8426169157028198, "learning_rate": 1.6984132194871516e-06, "loss": 0.6544, "step": 5480 }, { "epoch": 0.8175106271906928, "grad_norm": 1.2247825860977173, "learning_rate": 1.6957203662143818e-06, "loss": 0.6111, "step": 5481 }, { "epoch": 0.8176597807442763, "grad_norm": 1.2143923044204712, "learning_rate": 1.6930294516289403e-06, "loss": 0.6467, "step": 5482 }, { "epoch": 0.8178089342978596, "grad_norm": 1.4054304361343384, "learning_rate": 1.6903404763590403e-06, "loss": 0.6748, "step": 5483 }, { "epoch": 0.8179580878514431, "grad_norm": 1.7573248147964478, "learning_rate": 1.6876534410324352e-06, "loss": 0.7087, "step": 5484 }, { "epoch": 0.8181072414050264, "grad_norm": 1.8631480932235718, "learning_rate": 1.684968346276431e-06, "loss": 0.7081, "step": 5485 }, { "epoch": 0.8182563949586099, "grad_norm": 2.390059232711792, "learning_rate": 1.6822851927178874e-06, "loss": 0.6825, "step": 5486 }, { "epoch": 0.8184055485121933, "grad_norm": 1.7360121011734009, "learning_rate": 1.6796039809831977e-06, "loss": 0.6379, "step": 5487 }, { "epoch": 0.8185547020657767, "grad_norm": 1.4836806058883667, "learning_rate": 1.6769247116983079e-06, "loss": 0.6326, "step": 5488 }, { "epoch": 0.8187038556193601, "grad_norm": 1.563904881477356, "learning_rate": 1.6742473854887154e-06, "loss": 0.7332, "step": 5489 }, { "epoch": 0.8188530091729436, "grad_norm": 4.2223992347717285, "learning_rate": 1.6715720029794525e-06, "loss": 0.6683, "step": 5490 }, { "epoch": 0.8190021627265269, "grad_norm": 11.214815139770508, "learning_rate": 1.6688985647951085e-06, "loss": 0.7183, "step": 5491 }, { "epoch": 0.8191513162801104, "grad_norm": 0.5475833415985107, "learning_rate": 1.666227071559814e-06, "loss": 0.279, "step": 5492 }, { "epoch": 0.8193004698336938, "grad_norm": 1.5963908433914185, "learning_rate": 1.6635575238972478e-06, "loss": 0.6888, "step": 5493 }, { "epoch": 0.8194496233872772, "grad_norm": 1.429506778717041, "learning_rate": 1.6608899224306264e-06, "loss": 0.6705, "step": 5494 }, { "epoch": 0.8195987769408606, "grad_norm": 1.699530839920044, "learning_rate": 1.6582242677827286e-06, "loss": 0.6603, "step": 5495 }, { "epoch": 0.819747930494444, "grad_norm": 1.1721888780593872, "learning_rate": 1.6555605605758606e-06, "loss": 0.65, "step": 5496 }, { "epoch": 0.8198970840480274, "grad_norm": 0.5064939260482788, "learning_rate": 1.6528988014318848e-06, "loss": 0.2762, "step": 5497 }, { "epoch": 0.8200462376016109, "grad_norm": 1.5794563293457031, "learning_rate": 1.650238990972205e-06, "loss": 0.683, "step": 5498 }, { "epoch": 0.8201953911551942, "grad_norm": 1.2565793991088867, "learning_rate": 1.6475811298177747e-06, "loss": 0.7416, "step": 5499 }, { "epoch": 0.8203445447087777, "grad_norm": 1.5194308757781982, "learning_rate": 1.644925218589083e-06, "loss": 0.6735, "step": 5500 }, { "epoch": 0.8204936982623611, "grad_norm": 1.386824131011963, "learning_rate": 1.6422712579061727e-06, "loss": 0.6067, "step": 5501 }, { "epoch": 0.8206428518159445, "grad_norm": 2.600724458694458, "learning_rate": 1.6396192483886285e-06, "loss": 0.6919, "step": 5502 }, { "epoch": 0.8207920053695279, "grad_norm": 1.230589747428894, "learning_rate": 1.636969190655574e-06, "loss": 0.6455, "step": 5503 }, { "epoch": 0.8209411589231114, "grad_norm": 1.3882901668548584, "learning_rate": 1.6343210853256885e-06, "loss": 0.6552, "step": 5504 }, { "epoch": 0.8210903124766947, "grad_norm": 1.3490962982177734, "learning_rate": 1.6316749330171888e-06, "loss": 0.5832, "step": 5505 }, { "epoch": 0.8212394660302782, "grad_norm": 1.780900001525879, "learning_rate": 1.6290307343478318e-06, "loss": 0.6763, "step": 5506 }, { "epoch": 0.8213886195838616, "grad_norm": 1.5425047874450684, "learning_rate": 1.6263884899349248e-06, "loss": 0.651, "step": 5507 }, { "epoch": 0.821537773137445, "grad_norm": 1.6001585721969604, "learning_rate": 1.6237482003953187e-06, "loss": 0.6837, "step": 5508 }, { "epoch": 0.8216869266910284, "grad_norm": 2.2403740882873535, "learning_rate": 1.6211098663454016e-06, "loss": 0.6754, "step": 5509 }, { "epoch": 0.8218360802446119, "grad_norm": 1.5930367708206177, "learning_rate": 1.6184734884011123e-06, "loss": 0.6456, "step": 5510 }, { "epoch": 0.8219852337981952, "grad_norm": 1.2722400426864624, "learning_rate": 1.6158390671779322e-06, "loss": 0.6009, "step": 5511 }, { "epoch": 0.8221343873517787, "grad_norm": 1.7167900800704956, "learning_rate": 1.6132066032908766e-06, "loss": 0.6933, "step": 5512 }, { "epoch": 0.822283540905362, "grad_norm": 1.190559983253479, "learning_rate": 1.6105760973545181e-06, "loss": 0.7127, "step": 5513 }, { "epoch": 0.8224326944589455, "grad_norm": 1.9381464719772339, "learning_rate": 1.6079475499829655e-06, "loss": 0.6564, "step": 5514 }, { "epoch": 0.8225818480125289, "grad_norm": 0.467787504196167, "learning_rate": 1.6053209617898646e-06, "loss": 0.2288, "step": 5515 }, { "epoch": 0.8227310015661123, "grad_norm": 1.5651326179504395, "learning_rate": 1.6026963333884127e-06, "loss": 0.6571, "step": 5516 }, { "epoch": 0.8228801551196957, "grad_norm": 1.537028193473816, "learning_rate": 1.6000736653913485e-06, "loss": 0.5728, "step": 5517 }, { "epoch": 0.8230293086732792, "grad_norm": 1.5900534391403198, "learning_rate": 1.5974529584109444e-06, "loss": 0.6199, "step": 5518 }, { "epoch": 0.8231784622268625, "grad_norm": 1.793529987335205, "learning_rate": 1.5948342130590256e-06, "loss": 0.6085, "step": 5519 }, { "epoch": 0.823327615780446, "grad_norm": 1.2618249654769897, "learning_rate": 1.5922174299469528e-06, "loss": 0.7299, "step": 5520 }, { "epoch": 0.8234767693340294, "grad_norm": 0.5118345618247986, "learning_rate": 1.5896026096856321e-06, "loss": 0.2361, "step": 5521 }, { "epoch": 0.8236259228876128, "grad_norm": 1.5933377742767334, "learning_rate": 1.5869897528855106e-06, "loss": 0.6673, "step": 5522 }, { "epoch": 0.8237750764411962, "grad_norm": 0.5366636514663696, "learning_rate": 1.5843788601565757e-06, "loss": 0.2457, "step": 5523 }, { "epoch": 0.8239242299947797, "grad_norm": 1.4511809349060059, "learning_rate": 1.58176993210836e-06, "loss": 0.6967, "step": 5524 }, { "epoch": 0.824073383548363, "grad_norm": 1.334044337272644, "learning_rate": 1.5791629693499289e-06, "loss": 0.6256, "step": 5525 }, { "epoch": 0.8242225371019465, "grad_norm": 1.8908145427703857, "learning_rate": 1.5765579724898973e-06, "loss": 0.6205, "step": 5526 }, { "epoch": 0.8243716906555298, "grad_norm": 2.343773365020752, "learning_rate": 1.5739549421364196e-06, "loss": 0.6326, "step": 5527 }, { "epoch": 0.8245208442091133, "grad_norm": 2.032282590866089, "learning_rate": 1.5713538788971882e-06, "loss": 0.7371, "step": 5528 }, { "epoch": 0.8246699977626967, "grad_norm": 1.5168758630752563, "learning_rate": 1.5687547833794349e-06, "loss": 0.6682, "step": 5529 }, { "epoch": 0.8248191513162801, "grad_norm": 1.2815461158752441, "learning_rate": 1.5661576561899438e-06, "loss": 0.6547, "step": 5530 }, { "epoch": 0.8249683048698635, "grad_norm": 1.4624489545822144, "learning_rate": 1.563562497935025e-06, "loss": 0.6553, "step": 5531 }, { "epoch": 0.825117458423447, "grad_norm": 1.3004529476165771, "learning_rate": 1.5609693092205347e-06, "loss": 0.7313, "step": 5532 }, { "epoch": 0.8252666119770303, "grad_norm": 1.3552219867706299, "learning_rate": 1.558378090651872e-06, "loss": 0.6778, "step": 5533 }, { "epoch": 0.8254157655306138, "grad_norm": 1.3310987949371338, "learning_rate": 1.5557888428339706e-06, "loss": 0.5101, "step": 5534 }, { "epoch": 0.8255649190841972, "grad_norm": 1.5092034339904785, "learning_rate": 1.5532015663713085e-06, "loss": 0.672, "step": 5535 }, { "epoch": 0.8257140726377806, "grad_norm": 1.8447239398956299, "learning_rate": 1.5506162618679043e-06, "loss": 0.571, "step": 5536 }, { "epoch": 0.825863226191364, "grad_norm": 1.8259892463684082, "learning_rate": 1.548032929927309e-06, "loss": 0.6525, "step": 5537 }, { "epoch": 0.8260123797449475, "grad_norm": 1.598663330078125, "learning_rate": 1.5454515711526187e-06, "loss": 0.7032, "step": 5538 }, { "epoch": 0.8261615332985308, "grad_norm": 1.4685604572296143, "learning_rate": 1.5428721861464746e-06, "loss": 0.735, "step": 5539 }, { "epoch": 0.8263106868521143, "grad_norm": 1.7372264862060547, "learning_rate": 1.540294775511043e-06, "loss": 0.6541, "step": 5540 }, { "epoch": 0.8264598404056976, "grad_norm": 1.401978611946106, "learning_rate": 1.5377193398480406e-06, "loss": 0.6692, "step": 5541 }, { "epoch": 0.8266089939592811, "grad_norm": 1.2045485973358154, "learning_rate": 1.5351458797587205e-06, "loss": 0.6768, "step": 5542 }, { "epoch": 0.8267581475128645, "grad_norm": 2.2893145084381104, "learning_rate": 1.5325743958438698e-06, "loss": 0.6635, "step": 5543 }, { "epoch": 0.8269073010664479, "grad_norm": 1.4070699214935303, "learning_rate": 1.53000488870382e-06, "loss": 0.613, "step": 5544 }, { "epoch": 0.8270564546200313, "grad_norm": 1.6646349430084229, "learning_rate": 1.5274373589384384e-06, "loss": 0.6643, "step": 5545 }, { "epoch": 0.8272056081736148, "grad_norm": 2.3418684005737305, "learning_rate": 1.5248718071471346e-06, "loss": 0.7374, "step": 5546 }, { "epoch": 0.8273547617271981, "grad_norm": 2.646747350692749, "learning_rate": 1.5223082339288452e-06, "loss": 0.6631, "step": 5547 }, { "epoch": 0.8275039152807816, "grad_norm": 1.357661247253418, "learning_rate": 1.5197466398820625e-06, "loss": 0.7357, "step": 5548 }, { "epoch": 0.827653068834365, "grad_norm": 1.7929767370224, "learning_rate": 1.5171870256048005e-06, "loss": 0.6991, "step": 5549 }, { "epoch": 0.8278022223879484, "grad_norm": 1.6019221544265747, "learning_rate": 1.5146293916946185e-06, "loss": 0.7022, "step": 5550 }, { "epoch": 0.8279513759415318, "grad_norm": 1.7144529819488525, "learning_rate": 1.512073738748614e-06, "loss": 0.7565, "step": 5551 }, { "epoch": 0.8281005294951153, "grad_norm": 1.9336354732513428, "learning_rate": 1.5095200673634224e-06, "loss": 0.6652, "step": 5552 }, { "epoch": 0.8282496830486986, "grad_norm": 1.271200180053711, "learning_rate": 1.5069683781352106e-06, "loss": 0.6634, "step": 5553 }, { "epoch": 0.8283988366022821, "grad_norm": 1.2818703651428223, "learning_rate": 1.5044186716596888e-06, "loss": 0.6847, "step": 5554 }, { "epoch": 0.8285479901558654, "grad_norm": 1.8822909593582153, "learning_rate": 1.5018709485321004e-06, "loss": 0.7943, "step": 5555 }, { "epoch": 0.8286971437094489, "grad_norm": 0.5000333786010742, "learning_rate": 1.499325209347231e-06, "loss": 0.2462, "step": 5556 }, { "epoch": 0.8288462972630323, "grad_norm": 6.137055397033691, "learning_rate": 1.4967814546993975e-06, "loss": 0.7982, "step": 5557 }, { "epoch": 0.8289954508166157, "grad_norm": 1.5818285942077637, "learning_rate": 1.4942396851824582e-06, "loss": 0.7491, "step": 5558 }, { "epoch": 0.8291446043701991, "grad_norm": 1.5679149627685547, "learning_rate": 1.4916999013898027e-06, "loss": 0.6793, "step": 5559 }, { "epoch": 0.8292937579237826, "grad_norm": 1.4027825593948364, "learning_rate": 1.4891621039143589e-06, "loss": 0.6565, "step": 5560 }, { "epoch": 0.8294429114773659, "grad_norm": 1.3167682886123657, "learning_rate": 1.4866262933485975e-06, "loss": 0.67, "step": 5561 }, { "epoch": 0.8295920650309494, "grad_norm": 1.8058347702026367, "learning_rate": 1.4840924702845128e-06, "loss": 0.7155, "step": 5562 }, { "epoch": 0.8297412185845328, "grad_norm": 1.5752511024475098, "learning_rate": 1.4815606353136459e-06, "loss": 0.6785, "step": 5563 }, { "epoch": 0.8298903721381162, "grad_norm": 1.2485496997833252, "learning_rate": 1.4790307890270694e-06, "loss": 0.7033, "step": 5564 }, { "epoch": 0.8300395256916996, "grad_norm": 1.3871899843215942, "learning_rate": 1.4765029320153912e-06, "loss": 0.6571, "step": 5565 }, { "epoch": 0.8301886792452831, "grad_norm": 1.6460869312286377, "learning_rate": 1.4739770648687568e-06, "loss": 0.6668, "step": 5566 }, { "epoch": 0.8303378327988664, "grad_norm": 3.7902603149414062, "learning_rate": 1.4714531881768478e-06, "loss": 0.6428, "step": 5567 }, { "epoch": 0.8304869863524499, "grad_norm": 2.149383068084717, "learning_rate": 1.4689313025288754e-06, "loss": 0.6617, "step": 5568 }, { "epoch": 0.8306361399060332, "grad_norm": 0.4667070508003235, "learning_rate": 1.4664114085135916e-06, "loss": 0.2479, "step": 5569 }, { "epoch": 0.8307852934596167, "grad_norm": 2.4460041522979736, "learning_rate": 1.4638935067192828e-06, "loss": 0.7057, "step": 5570 }, { "epoch": 0.8309344470132001, "grad_norm": 1.6047115325927734, "learning_rate": 1.4613775977337707e-06, "loss": 0.7231, "step": 5571 }, { "epoch": 0.8310836005667835, "grad_norm": 1.0446288585662842, "learning_rate": 1.4588636821444059e-06, "loss": 0.6776, "step": 5572 }, { "epoch": 0.8312327541203669, "grad_norm": 1.7513904571533203, "learning_rate": 1.4563517605380805e-06, "loss": 0.6631, "step": 5573 }, { "epoch": 0.8313819076739504, "grad_norm": 1.4694700241088867, "learning_rate": 1.4538418335012194e-06, "loss": 0.6285, "step": 5574 }, { "epoch": 0.8315310612275337, "grad_norm": 1.5997374057769775, "learning_rate": 1.4513339016197802e-06, "loss": 0.6841, "step": 5575 }, { "epoch": 0.8316802147811172, "grad_norm": 1.6030991077423096, "learning_rate": 1.4488279654792558e-06, "loss": 0.6606, "step": 5576 }, { "epoch": 0.8318293683347006, "grad_norm": 2.5137033462524414, "learning_rate": 1.446324025664676e-06, "loss": 0.5984, "step": 5577 }, { "epoch": 0.831978521888284, "grad_norm": 5.992222309112549, "learning_rate": 1.4438220827605965e-06, "loss": 0.6822, "step": 5578 }, { "epoch": 0.8321276754418674, "grad_norm": 2.243316888809204, "learning_rate": 1.4413221373511132e-06, "loss": 0.5889, "step": 5579 }, { "epoch": 0.8322768289954509, "grad_norm": 1.786265254020691, "learning_rate": 1.4388241900198597e-06, "loss": 0.7258, "step": 5580 }, { "epoch": 0.8324259825490342, "grad_norm": 1.3698625564575195, "learning_rate": 1.4363282413499902e-06, "loss": 0.7281, "step": 5581 }, { "epoch": 0.8325751361026177, "grad_norm": 1.361393928527832, "learning_rate": 1.433834291924201e-06, "loss": 0.6826, "step": 5582 }, { "epoch": 0.832724289656201, "grad_norm": 1.7428456544876099, "learning_rate": 1.4313423423247275e-06, "loss": 0.6846, "step": 5583 }, { "epoch": 0.8328734432097845, "grad_norm": 1.7227756977081299, "learning_rate": 1.4288523931333242e-06, "loss": 0.6908, "step": 5584 }, { "epoch": 0.8330225967633679, "grad_norm": 1.5712858438491821, "learning_rate": 1.4263644449312896e-06, "loss": 0.6545, "step": 5585 }, { "epoch": 0.8331717503169513, "grad_norm": 1.4947222471237183, "learning_rate": 1.4238784982994503e-06, "loss": 0.6724, "step": 5586 }, { "epoch": 0.8333209038705347, "grad_norm": 1.3760019540786743, "learning_rate": 1.4213945538181651e-06, "loss": 0.6688, "step": 5587 }, { "epoch": 0.8334700574241182, "grad_norm": 1.8472089767456055, "learning_rate": 1.418912612067327e-06, "loss": 0.6453, "step": 5588 }, { "epoch": 0.8336192109777015, "grad_norm": 1.8291268348693848, "learning_rate": 1.4164326736263645e-06, "loss": 0.5221, "step": 5589 }, { "epoch": 0.833768364531285, "grad_norm": 1.5183846950531006, "learning_rate": 1.413954739074227e-06, "loss": 0.6542, "step": 5590 }, { "epoch": 0.8339175180848684, "grad_norm": 1.4072704315185547, "learning_rate": 1.4114788089894128e-06, "loss": 0.6836, "step": 5591 }, { "epoch": 0.8340666716384518, "grad_norm": 0.49510079622268677, "learning_rate": 1.4090048839499426e-06, "loss": 0.2326, "step": 5592 }, { "epoch": 0.8342158251920352, "grad_norm": 1.388728141784668, "learning_rate": 1.4065329645333658e-06, "loss": 0.6905, "step": 5593 }, { "epoch": 0.8343649787456187, "grad_norm": 1.9896445274353027, "learning_rate": 1.4040630513167697e-06, "loss": 0.6896, "step": 5594 }, { "epoch": 0.834514132299202, "grad_norm": 2.18670916557312, "learning_rate": 1.401595144876775e-06, "loss": 0.7339, "step": 5595 }, { "epoch": 0.8346632858527855, "grad_norm": 2.4845738410949707, "learning_rate": 1.3991292457895234e-06, "loss": 0.667, "step": 5596 }, { "epoch": 0.8348124394063688, "grad_norm": 1.278974175453186, "learning_rate": 1.3966653546306997e-06, "loss": 0.6998, "step": 5597 }, { "epoch": 0.8349615929599523, "grad_norm": 2.14621901512146, "learning_rate": 1.3942034719755127e-06, "loss": 0.7265, "step": 5598 }, { "epoch": 0.8351107465135357, "grad_norm": 1.4625717401504517, "learning_rate": 1.391743598398707e-06, "loss": 0.6552, "step": 5599 }, { "epoch": 0.8352599000671191, "grad_norm": 1.3470667600631714, "learning_rate": 1.3892857344745537e-06, "loss": 0.6826, "step": 5600 }, { "epoch": 0.8354090536207025, "grad_norm": 0.5055582523345947, "learning_rate": 1.386829880776861e-06, "loss": 0.2494, "step": 5601 }, { "epoch": 0.835558207174286, "grad_norm": 1.4701844453811646, "learning_rate": 1.3843760378789583e-06, "loss": 0.7168, "step": 5602 }, { "epoch": 0.8357073607278693, "grad_norm": 1.3993552923202515, "learning_rate": 1.3819242063537131e-06, "loss": 0.7676, "step": 5603 }, { "epoch": 0.8358565142814528, "grad_norm": 1.3691238164901733, "learning_rate": 1.3794743867735206e-06, "loss": 0.6126, "step": 5604 }, { "epoch": 0.8360056678350362, "grad_norm": 1.4416382312774658, "learning_rate": 1.37702657971031e-06, "loss": 0.7297, "step": 5605 }, { "epoch": 0.8361548213886196, "grad_norm": 1.756352186203003, "learning_rate": 1.3745807857355342e-06, "loss": 0.6681, "step": 5606 }, { "epoch": 0.836303974942203, "grad_norm": 1.5228084325790405, "learning_rate": 1.3721370054201788e-06, "loss": 0.671, "step": 5607 }, { "epoch": 0.8364531284957865, "grad_norm": 1.2336171865463257, "learning_rate": 1.3696952393347629e-06, "loss": 0.6416, "step": 5608 }, { "epoch": 0.8366022820493698, "grad_norm": 1.6986241340637207, "learning_rate": 1.3672554880493305e-06, "loss": 0.7574, "step": 5609 }, { "epoch": 0.8367514356029533, "grad_norm": 1.3026576042175293, "learning_rate": 1.3648177521334582e-06, "loss": 0.6255, "step": 5610 }, { "epoch": 0.8369005891565366, "grad_norm": 1.3723092079162598, "learning_rate": 1.3623820321562531e-06, "loss": 0.7078, "step": 5611 }, { "epoch": 0.8370497427101201, "grad_norm": 1.9934064149856567, "learning_rate": 1.3599483286863458e-06, "loss": 0.6095, "step": 5612 }, { "epoch": 0.8371988962637035, "grad_norm": 2.3041703701019287, "learning_rate": 1.3575166422919006e-06, "loss": 0.744, "step": 5613 }, { "epoch": 0.8373480498172869, "grad_norm": 1.3622621297836304, "learning_rate": 1.3550869735406124e-06, "loss": 0.6019, "step": 5614 }, { "epoch": 0.8374972033708703, "grad_norm": 0.5243021249771118, "learning_rate": 1.352659322999701e-06, "loss": 0.2324, "step": 5615 }, { "epoch": 0.8376463569244538, "grad_norm": 1.6239824295043945, "learning_rate": 1.350233691235915e-06, "loss": 0.6788, "step": 5616 }, { "epoch": 0.8377955104780371, "grad_norm": 2.4014713764190674, "learning_rate": 1.3478100788155413e-06, "loss": 0.6395, "step": 5617 }, { "epoch": 0.8379446640316206, "grad_norm": 1.4406126737594604, "learning_rate": 1.3453884863043798e-06, "loss": 0.7147, "step": 5618 }, { "epoch": 0.838093817585204, "grad_norm": 0.5010892152786255, "learning_rate": 1.3429689142677704e-06, "loss": 0.2281, "step": 5619 }, { "epoch": 0.8382429711387874, "grad_norm": 1.7226265668869019, "learning_rate": 1.3405513632705792e-06, "loss": 0.6664, "step": 5620 }, { "epoch": 0.8383921246923708, "grad_norm": 1.383056640625, "learning_rate": 1.3381358338771954e-06, "loss": 0.7333, "step": 5621 }, { "epoch": 0.8385412782459543, "grad_norm": 1.308534860610962, "learning_rate": 1.3357223266515406e-06, "loss": 0.6826, "step": 5622 }, { "epoch": 0.8386904317995376, "grad_norm": 1.6195423603057861, "learning_rate": 1.333310842157064e-06, "loss": 0.7092, "step": 5623 }, { "epoch": 0.8388395853531211, "grad_norm": 1.7320187091827393, "learning_rate": 1.330901380956745e-06, "loss": 0.5704, "step": 5624 }, { "epoch": 0.8389887389067044, "grad_norm": 1.6342532634735107, "learning_rate": 1.3284939436130816e-06, "loss": 0.676, "step": 5625 }, { "epoch": 0.8391378924602879, "grad_norm": 1.4983696937561035, "learning_rate": 1.3260885306881122e-06, "loss": 0.6949, "step": 5626 }, { "epoch": 0.8392870460138713, "grad_norm": 1.8425556421279907, "learning_rate": 1.3236851427433917e-06, "loss": 0.5926, "step": 5627 }, { "epoch": 0.8394361995674547, "grad_norm": 0.5263767242431641, "learning_rate": 1.3212837803400068e-06, "loss": 0.2576, "step": 5628 }, { "epoch": 0.8395853531210381, "grad_norm": 1.5032817125320435, "learning_rate": 1.3188844440385716e-06, "loss": 0.6161, "step": 5629 }, { "epoch": 0.8397345066746216, "grad_norm": 1.7275261878967285, "learning_rate": 1.3164871343992292e-06, "loss": 0.6752, "step": 5630 }, { "epoch": 0.8398836602282049, "grad_norm": 1.7851146459579468, "learning_rate": 1.3140918519816415e-06, "loss": 0.6947, "step": 5631 }, { "epoch": 0.8400328137817884, "grad_norm": 1.490297794342041, "learning_rate": 1.3116985973450058e-06, "loss": 0.686, "step": 5632 }, { "epoch": 0.8401819673353718, "grad_norm": 1.5021450519561768, "learning_rate": 1.3093073710480442e-06, "loss": 0.6257, "step": 5633 }, { "epoch": 0.8403311208889552, "grad_norm": 1.9896395206451416, "learning_rate": 1.3069181736489978e-06, "loss": 0.6812, "step": 5634 }, { "epoch": 0.8404802744425386, "grad_norm": 1.74856436252594, "learning_rate": 1.3045310057056459e-06, "loss": 0.6934, "step": 5635 }, { "epoch": 0.8406294279961221, "grad_norm": 1.6081206798553467, "learning_rate": 1.3021458677752884e-06, "loss": 0.6489, "step": 5636 }, { "epoch": 0.8407785815497054, "grad_norm": 1.8100324869155884, "learning_rate": 1.2997627604147468e-06, "loss": 0.7329, "step": 5637 }, { "epoch": 0.8409277351032889, "grad_norm": 1.8703540563583374, "learning_rate": 1.2973816841803756e-06, "loss": 0.7204, "step": 5638 }, { "epoch": 0.8410768886568722, "grad_norm": 1.4510746002197266, "learning_rate": 1.2950026396280536e-06, "loss": 0.6915, "step": 5639 }, { "epoch": 0.8412260422104557, "grad_norm": 1.9027276039123535, "learning_rate": 1.2926256273131799e-06, "loss": 0.5982, "step": 5640 }, { "epoch": 0.8413751957640391, "grad_norm": 2.117297410964966, "learning_rate": 1.2902506477906862e-06, "loss": 0.6156, "step": 5641 }, { "epoch": 0.8415243493176225, "grad_norm": 0.5105044841766357, "learning_rate": 1.2878777016150267e-06, "loss": 0.2715, "step": 5642 }, { "epoch": 0.8416735028712059, "grad_norm": 2.190408229827881, "learning_rate": 1.2855067893401773e-06, "loss": 0.5437, "step": 5643 }, { "epoch": 0.8418226564247894, "grad_norm": 1.4996038675308228, "learning_rate": 1.2831379115196473e-06, "loss": 0.5995, "step": 5644 }, { "epoch": 0.8419718099783727, "grad_norm": 2.565772533416748, "learning_rate": 1.2807710687064667e-06, "loss": 0.684, "step": 5645 }, { "epoch": 0.8421209635319562, "grad_norm": 1.4231927394866943, "learning_rate": 1.2784062614531866e-06, "loss": 0.6734, "step": 5646 }, { "epoch": 0.8422701170855396, "grad_norm": 2.1416780948638916, "learning_rate": 1.2760434903118868e-06, "loss": 0.5745, "step": 5647 }, { "epoch": 0.842419270639123, "grad_norm": 1.4781179428100586, "learning_rate": 1.2736827558341735e-06, "loss": 0.6348, "step": 5648 }, { "epoch": 0.8425684241927064, "grad_norm": 1.3715364933013916, "learning_rate": 1.2713240585711762e-06, "loss": 0.6905, "step": 5649 }, { "epoch": 0.8427175777462899, "grad_norm": 1.6327552795410156, "learning_rate": 1.2689673990735428e-06, "loss": 0.6037, "step": 5650 }, { "epoch": 0.8428667312998732, "grad_norm": 1.450487494468689, "learning_rate": 1.266612777891455e-06, "loss": 0.6843, "step": 5651 }, { "epoch": 0.8430158848534567, "grad_norm": 1.253922939300537, "learning_rate": 1.2642601955746126e-06, "loss": 0.7803, "step": 5652 }, { "epoch": 0.84316503840704, "grad_norm": 1.5905134677886963, "learning_rate": 1.2619096526722418e-06, "loss": 0.6832, "step": 5653 }, { "epoch": 0.8433141919606235, "grad_norm": 1.2501786947250366, "learning_rate": 1.2595611497330917e-06, "loss": 0.625, "step": 5654 }, { "epoch": 0.8434633455142069, "grad_norm": 0.5152080059051514, "learning_rate": 1.257214687305437e-06, "loss": 0.2883, "step": 5655 }, { "epoch": 0.8436124990677903, "grad_norm": 1.4200917482376099, "learning_rate": 1.2548702659370703e-06, "loss": 0.6289, "step": 5656 }, { "epoch": 0.8437616526213737, "grad_norm": 1.6905244588851929, "learning_rate": 1.2525278861753142e-06, "loss": 0.7327, "step": 5657 }, { "epoch": 0.8439108061749572, "grad_norm": 1.7622151374816895, "learning_rate": 1.2501875485670145e-06, "loss": 0.7015, "step": 5658 }, { "epoch": 0.8440599597285405, "grad_norm": 3.9984724521636963, "learning_rate": 1.247849253658533e-06, "loss": 0.568, "step": 5659 }, { "epoch": 0.844209113282124, "grad_norm": 1.5454974174499512, "learning_rate": 1.2455130019957607e-06, "loss": 0.563, "step": 5660 }, { "epoch": 0.8443582668357074, "grad_norm": 1.821709394454956, "learning_rate": 1.2431787941241157e-06, "loss": 0.6951, "step": 5661 }, { "epoch": 0.8445074203892907, "grad_norm": 0.5672516226768494, "learning_rate": 1.240846630588529e-06, "loss": 0.2431, "step": 5662 }, { "epoch": 0.8446565739428742, "grad_norm": 1.3616896867752075, "learning_rate": 1.2385165119334607e-06, "loss": 0.6966, "step": 5663 }, { "epoch": 0.8448057274964575, "grad_norm": 1.852510690689087, "learning_rate": 1.2361884387028933e-06, "loss": 0.6064, "step": 5664 }, { "epoch": 0.844954881050041, "grad_norm": 1.4554622173309326, "learning_rate": 1.2338624114403263e-06, "loss": 0.5947, "step": 5665 }, { "epoch": 0.8451040346036244, "grad_norm": 1.5178816318511963, "learning_rate": 1.231538430688789e-06, "loss": 0.6826, "step": 5666 }, { "epoch": 0.8452531881572078, "grad_norm": 2.2442493438720703, "learning_rate": 1.2292164969908294e-06, "loss": 0.695, "step": 5667 }, { "epoch": 0.8454023417107912, "grad_norm": 1.9737601280212402, "learning_rate": 1.226896610888516e-06, "loss": 0.686, "step": 5668 }, { "epoch": 0.8455514952643747, "grad_norm": 3.6936848163604736, "learning_rate": 1.2245787729234404e-06, "loss": 0.7177, "step": 5669 }, { "epoch": 0.845700648817958, "grad_norm": 1.330443263053894, "learning_rate": 1.2222629836367227e-06, "loss": 0.6391, "step": 5670 }, { "epoch": 0.8458498023715415, "grad_norm": 1.2362900972366333, "learning_rate": 1.2199492435689918e-06, "loss": 0.6653, "step": 5671 }, { "epoch": 0.8459989559251249, "grad_norm": 1.3359825611114502, "learning_rate": 1.217637553260409e-06, "loss": 0.65, "step": 5672 }, { "epoch": 0.8461481094787083, "grad_norm": 2.6414718627929688, "learning_rate": 1.2153279132506535e-06, "loss": 0.6493, "step": 5673 }, { "epoch": 0.8462972630322917, "grad_norm": 1.4701260328292847, "learning_rate": 1.2130203240789228e-06, "loss": 0.709, "step": 5674 }, { "epoch": 0.8464464165858752, "grad_norm": 1.558929681777954, "learning_rate": 1.2107147862839396e-06, "loss": 0.671, "step": 5675 }, { "epoch": 0.8465955701394585, "grad_norm": 2.105651617050171, "learning_rate": 1.2084113004039467e-06, "loss": 0.6381, "step": 5676 }, { "epoch": 0.846744723693042, "grad_norm": 1.4014371633529663, "learning_rate": 1.20610986697671e-06, "loss": 0.6424, "step": 5677 }, { "epoch": 0.8468938772466253, "grad_norm": 2.0824289321899414, "learning_rate": 1.2038104865395072e-06, "loss": 0.6394, "step": 5678 }, { "epoch": 0.8470430308002088, "grad_norm": 1.8851923942565918, "learning_rate": 1.2015131596291518e-06, "loss": 0.6593, "step": 5679 }, { "epoch": 0.8471921843537922, "grad_norm": 1.355863094329834, "learning_rate": 1.1992178867819636e-06, "loss": 0.616, "step": 5680 }, { "epoch": 0.8473413379073756, "grad_norm": 1.4794343709945679, "learning_rate": 1.1969246685337909e-06, "loss": 0.6458, "step": 5681 }, { "epoch": 0.847490491460959, "grad_norm": 2.3357856273651123, "learning_rate": 1.1946335054199999e-06, "loss": 0.6556, "step": 5682 }, { "epoch": 0.8476396450145425, "grad_norm": 2.5646393299102783, "learning_rate": 1.192344397975479e-06, "loss": 0.6001, "step": 5683 }, { "epoch": 0.8477887985681258, "grad_norm": 1.326732873916626, "learning_rate": 1.1900573467346322e-06, "loss": 0.5995, "step": 5684 }, { "epoch": 0.8479379521217093, "grad_norm": 2.0888028144836426, "learning_rate": 1.1877723522313867e-06, "loss": 0.6622, "step": 5685 }, { "epoch": 0.8480871056752927, "grad_norm": 1.6377720832824707, "learning_rate": 1.1854894149991902e-06, "loss": 0.6456, "step": 5686 }, { "epoch": 0.8482362592288761, "grad_norm": 1.7456417083740234, "learning_rate": 1.1832085355710087e-06, "loss": 0.5718, "step": 5687 }, { "epoch": 0.8483854127824595, "grad_norm": 1.6721340417861938, "learning_rate": 1.1809297144793285e-06, "loss": 0.7307, "step": 5688 }, { "epoch": 0.848534566336043, "grad_norm": 1.7924435138702393, "learning_rate": 1.1786529522561564e-06, "loss": 0.7068, "step": 5689 }, { "epoch": 0.8486837198896263, "grad_norm": 1.2942982912063599, "learning_rate": 1.1763782494330135e-06, "loss": 0.6342, "step": 5690 }, { "epoch": 0.8488328734432098, "grad_norm": 2.0877256393432617, "learning_rate": 1.174105606540945e-06, "loss": 0.6196, "step": 5691 }, { "epoch": 0.8489820269967931, "grad_norm": 1.597952127456665, "learning_rate": 1.171835024110517e-06, "loss": 0.6326, "step": 5692 }, { "epoch": 0.8491311805503766, "grad_norm": 1.9658573865890503, "learning_rate": 1.1695665026718073e-06, "loss": 0.7271, "step": 5693 }, { "epoch": 0.84928033410396, "grad_norm": 1.8189787864685059, "learning_rate": 1.167300042754419e-06, "loss": 0.6773, "step": 5694 }, { "epoch": 0.8494294876575434, "grad_norm": 1.2613369226455688, "learning_rate": 1.16503564488747e-06, "loss": 0.7044, "step": 5695 }, { "epoch": 0.8495786412111268, "grad_norm": 1.7969306707382202, "learning_rate": 1.1627733095996008e-06, "loss": 0.6434, "step": 5696 }, { "epoch": 0.8497277947647103, "grad_norm": 1.4559684991836548, "learning_rate": 1.1605130374189676e-06, "loss": 0.7027, "step": 5697 }, { "epoch": 0.8498769483182936, "grad_norm": 1.8151276111602783, "learning_rate": 1.1582548288732465e-06, "loss": 0.6246, "step": 5698 }, { "epoch": 0.8500261018718771, "grad_norm": 2.1116111278533936, "learning_rate": 1.1559986844896265e-06, "loss": 0.6189, "step": 5699 }, { "epoch": 0.8501752554254605, "grad_norm": 1.2759202718734741, "learning_rate": 1.153744604794822e-06, "loss": 0.7224, "step": 5700 }, { "epoch": 0.8503244089790439, "grad_norm": 1.4904427528381348, "learning_rate": 1.151492590315062e-06, "loss": 0.6561, "step": 5701 }, { "epoch": 0.8504735625326273, "grad_norm": 1.309300422668457, "learning_rate": 1.149242641576096e-06, "loss": 0.6599, "step": 5702 }, { "epoch": 0.8506227160862108, "grad_norm": 1.1575607061386108, "learning_rate": 1.1469947591031848e-06, "loss": 0.6648, "step": 5703 }, { "epoch": 0.8507718696397941, "grad_norm": 1.5995666980743408, "learning_rate": 1.1447489434211124e-06, "loss": 0.7015, "step": 5704 }, { "epoch": 0.8509210231933776, "grad_norm": 1.5169572830200195, "learning_rate": 1.1425051950541798e-06, "loss": 0.6702, "step": 5705 }, { "epoch": 0.851070176746961, "grad_norm": 1.2029987573623657, "learning_rate": 1.1402635145262043e-06, "loss": 0.6877, "step": 5706 }, { "epoch": 0.8512193303005444, "grad_norm": 0.530342698097229, "learning_rate": 1.13802390236052e-06, "loss": 0.2419, "step": 5707 }, { "epoch": 0.8513684838541278, "grad_norm": 2.162001371383667, "learning_rate": 1.13578635907998e-06, "loss": 0.5649, "step": 5708 }, { "epoch": 0.8515176374077112, "grad_norm": 2.0372202396392822, "learning_rate": 1.133550885206951e-06, "loss": 0.6865, "step": 5709 }, { "epoch": 0.8516667909612946, "grad_norm": 1.7307581901550293, "learning_rate": 1.13131748126332e-06, "loss": 0.6717, "step": 5710 }, { "epoch": 0.8518159445148781, "grad_norm": 4.742397308349609, "learning_rate": 1.1290861477704918e-06, "loss": 0.631, "step": 5711 }, { "epoch": 0.8519650980684614, "grad_norm": 1.240922212600708, "learning_rate": 1.12685688524938e-06, "loss": 0.7341, "step": 5712 }, { "epoch": 0.8521142516220449, "grad_norm": 1.4442754983901978, "learning_rate": 1.1246296942204216e-06, "loss": 0.7026, "step": 5713 }, { "epoch": 0.8522634051756283, "grad_norm": 1.6768399477005005, "learning_rate": 1.122404575203574e-06, "loss": 0.6119, "step": 5714 }, { "epoch": 0.8524125587292117, "grad_norm": 1.869980812072754, "learning_rate": 1.1201815287183005e-06, "loss": 0.6907, "step": 5715 }, { "epoch": 0.8525617122827951, "grad_norm": 1.6100565195083618, "learning_rate": 1.1179605552835859e-06, "loss": 0.6757, "step": 5716 }, { "epoch": 0.8527108658363786, "grad_norm": 1.9128847122192383, "learning_rate": 1.1157416554179345e-06, "loss": 0.5991, "step": 5717 }, { "epoch": 0.8528600193899619, "grad_norm": 1.8784149885177612, "learning_rate": 1.1135248296393574e-06, "loss": 0.6586, "step": 5718 }, { "epoch": 0.8530091729435454, "grad_norm": 2.1903529167175293, "learning_rate": 1.1113100784653895e-06, "loss": 0.6317, "step": 5719 }, { "epoch": 0.8531583264971287, "grad_norm": 1.8832390308380127, "learning_rate": 1.1090974024130795e-06, "loss": 0.7222, "step": 5720 }, { "epoch": 0.8533074800507122, "grad_norm": 1.4997928142547607, "learning_rate": 1.1068868019989864e-06, "loss": 0.728, "step": 5721 }, { "epoch": 0.8534566336042956, "grad_norm": 1.9771325588226318, "learning_rate": 1.1046782777391951e-06, "loss": 0.6431, "step": 5722 }, { "epoch": 0.853605787157879, "grad_norm": 1.6526519060134888, "learning_rate": 1.1024718301492975e-06, "loss": 0.6614, "step": 5723 }, { "epoch": 0.8537549407114624, "grad_norm": 2.2805838584899902, "learning_rate": 1.1002674597444019e-06, "loss": 0.6958, "step": 5724 }, { "epoch": 0.8539040942650459, "grad_norm": 1.3929485082626343, "learning_rate": 1.0980651670391317e-06, "loss": 0.6513, "step": 5725 }, { "epoch": 0.8540532478186292, "grad_norm": 0.5392072200775146, "learning_rate": 1.0958649525476306e-06, "loss": 0.2399, "step": 5726 }, { "epoch": 0.8542024013722127, "grad_norm": 1.679125189781189, "learning_rate": 1.0936668167835484e-06, "loss": 0.6802, "step": 5727 }, { "epoch": 0.8543515549257961, "grad_norm": 1.8233290910720825, "learning_rate": 1.0914707602600549e-06, "loss": 0.718, "step": 5728 }, { "epoch": 0.8545007084793795, "grad_norm": 1.7528408765792847, "learning_rate": 1.0892767834898343e-06, "loss": 0.704, "step": 5729 }, { "epoch": 0.8546498620329629, "grad_norm": 1.663938045501709, "learning_rate": 1.0870848869850847e-06, "loss": 0.7362, "step": 5730 }, { "epoch": 0.8547990155865464, "grad_norm": 1.8347063064575195, "learning_rate": 1.084895071257518e-06, "loss": 0.6339, "step": 5731 }, { "epoch": 0.8549481691401297, "grad_norm": 1.580165982246399, "learning_rate": 1.0827073368183627e-06, "loss": 0.6792, "step": 5732 }, { "epoch": 0.8550973226937132, "grad_norm": 1.591011643409729, "learning_rate": 1.080521684178356e-06, "loss": 0.6382, "step": 5733 }, { "epoch": 0.8552464762472965, "grad_norm": 1.528718113899231, "learning_rate": 1.0783381138477544e-06, "loss": 0.7283, "step": 5734 }, { "epoch": 0.85539562980088, "grad_norm": 1.7741081714630127, "learning_rate": 1.0761566263363254e-06, "loss": 0.743, "step": 5735 }, { "epoch": 0.8555447833544634, "grad_norm": 1.2882431745529175, "learning_rate": 1.073977222153355e-06, "loss": 0.7605, "step": 5736 }, { "epoch": 0.8556939369080468, "grad_norm": 1.3725534677505493, "learning_rate": 1.071799901807633e-06, "loss": 0.6164, "step": 5737 }, { "epoch": 0.8558430904616302, "grad_norm": 1.6311594247817993, "learning_rate": 1.0696246658074728e-06, "loss": 0.7343, "step": 5738 }, { "epoch": 0.8559922440152137, "grad_norm": 1.5933282375335693, "learning_rate": 1.0674515146606957e-06, "loss": 0.5954, "step": 5739 }, { "epoch": 0.856141397568797, "grad_norm": 1.8758671283721924, "learning_rate": 1.0652804488746382e-06, "loss": 0.5747, "step": 5740 }, { "epoch": 0.8562905511223805, "grad_norm": 0.4734366238117218, "learning_rate": 1.0631114689561496e-06, "loss": 0.2188, "step": 5741 }, { "epoch": 0.8564397046759639, "grad_norm": 1.6358630657196045, "learning_rate": 1.0609445754115944e-06, "loss": 0.7113, "step": 5742 }, { "epoch": 0.8565888582295473, "grad_norm": 1.6275771856307983, "learning_rate": 1.0587797687468438e-06, "loss": 0.6467, "step": 5743 }, { "epoch": 0.8567380117831307, "grad_norm": 1.4248425960540771, "learning_rate": 1.0566170494672878e-06, "loss": 0.6489, "step": 5744 }, { "epoch": 0.8568871653367142, "grad_norm": 1.5330424308776855, "learning_rate": 1.0544564180778283e-06, "loss": 0.5429, "step": 5745 }, { "epoch": 0.8570363188902975, "grad_norm": 1.7837092876434326, "learning_rate": 1.0522978750828761e-06, "loss": 0.7121, "step": 5746 }, { "epoch": 0.857185472443881, "grad_norm": 1.7632014751434326, "learning_rate": 1.050141420986357e-06, "loss": 0.6923, "step": 5747 }, { "epoch": 0.8573346259974644, "grad_norm": 1.4001561403274536, "learning_rate": 1.0479870562917105e-06, "loss": 0.7152, "step": 5748 }, { "epoch": 0.8574837795510478, "grad_norm": 1.8089020252227783, "learning_rate": 1.0458347815018855e-06, "loss": 0.6697, "step": 5749 }, { "epoch": 0.8576329331046312, "grad_norm": 1.696367859840393, "learning_rate": 1.0436845971193465e-06, "loss": 0.6636, "step": 5750 }, { "epoch": 0.8577820866582146, "grad_norm": 0.5245295166969299, "learning_rate": 1.0415365036460679e-06, "loss": 0.2405, "step": 5751 }, { "epoch": 0.857931240211798, "grad_norm": 1.2497903108596802, "learning_rate": 1.0393905015835325e-06, "loss": 0.6783, "step": 5752 }, { "epoch": 0.8580803937653815, "grad_norm": 1.5822250843048096, "learning_rate": 1.0372465914327402e-06, "loss": 0.7795, "step": 5753 }, { "epoch": 0.8582295473189648, "grad_norm": 1.8754111528396606, "learning_rate": 1.0351047736942e-06, "loss": 0.6871, "step": 5754 }, { "epoch": 0.8583787008725483, "grad_norm": 2.1365201473236084, "learning_rate": 1.0329650488679366e-06, "loss": 0.6189, "step": 5755 }, { "epoch": 0.8585278544261317, "grad_norm": 1.7100785970687866, "learning_rate": 1.030827417453475e-06, "loss": 0.7308, "step": 5756 }, { "epoch": 0.8586770079797151, "grad_norm": 1.7833575010299683, "learning_rate": 1.028691879949868e-06, "loss": 0.59, "step": 5757 }, { "epoch": 0.8588261615332985, "grad_norm": 2.863273859024048, "learning_rate": 1.0265584368556636e-06, "loss": 0.7616, "step": 5758 }, { "epoch": 0.858975315086882, "grad_norm": 1.4790767431259155, "learning_rate": 1.02442708866893e-06, "loss": 0.6656, "step": 5759 }, { "epoch": 0.8591244686404653, "grad_norm": 1.6773145198822021, "learning_rate": 1.0222978358872448e-06, "loss": 0.6287, "step": 5760 }, { "epoch": 0.8592736221940488, "grad_norm": 1.7139923572540283, "learning_rate": 1.020170679007697e-06, "loss": 0.6272, "step": 5761 }, { "epoch": 0.8594227757476322, "grad_norm": 1.5856083631515503, "learning_rate": 1.0180456185268805e-06, "loss": 0.7669, "step": 5762 }, { "epoch": 0.8595719293012156, "grad_norm": 1.6838057041168213, "learning_rate": 1.0159226549409074e-06, "loss": 0.6165, "step": 5763 }, { "epoch": 0.859721082854799, "grad_norm": 1.441233515739441, "learning_rate": 1.0138017887453988e-06, "loss": 0.667, "step": 5764 }, { "epoch": 0.8598702364083824, "grad_norm": 1.3732049465179443, "learning_rate": 1.011683020435479e-06, "loss": 0.7329, "step": 5765 }, { "epoch": 0.8600193899619658, "grad_norm": 2.133324146270752, "learning_rate": 1.009566350505793e-06, "loss": 0.6456, "step": 5766 }, { "epoch": 0.8601685435155493, "grad_norm": 1.5540698766708374, "learning_rate": 1.0074517794504913e-06, "loss": 0.6586, "step": 5767 }, { "epoch": 0.8603176970691326, "grad_norm": 1.9387919902801514, "learning_rate": 1.0053393077632302e-06, "loss": 0.6591, "step": 5768 }, { "epoch": 0.8604668506227161, "grad_norm": 1.9433212280273438, "learning_rate": 1.0032289359371816e-06, "loss": 0.7183, "step": 5769 }, { "epoch": 0.8606160041762995, "grad_norm": 0.5044158697128296, "learning_rate": 1.0011206644650273e-06, "loss": 0.2386, "step": 5770 }, { "epoch": 0.8607651577298829, "grad_norm": 1.7895532846450806, "learning_rate": 9.990144938389546e-07, "loss": 0.692, "step": 5771 }, { "epoch": 0.8609143112834663, "grad_norm": 1.3254098892211914, "learning_rate": 9.96910424550661e-07, "loss": 0.7278, "step": 5772 }, { "epoch": 0.8610634648370498, "grad_norm": 3.629270553588867, "learning_rate": 9.948084570913585e-07, "loss": 0.7057, "step": 5773 }, { "epoch": 0.8612126183906331, "grad_norm": 1.5406177043914795, "learning_rate": 9.92708591951762e-07, "loss": 0.6605, "step": 5774 }, { "epoch": 0.8613617719442166, "grad_norm": 1.1999378204345703, "learning_rate": 9.906108296221007e-07, "loss": 0.7045, "step": 5775 }, { "epoch": 0.8615109254978, "grad_norm": 2.2691521644592285, "learning_rate": 9.885151705921115e-07, "loss": 0.6633, "step": 5776 }, { "epoch": 0.8616600790513834, "grad_norm": 1.3165968656539917, "learning_rate": 9.864216153510364e-07, "loss": 0.6691, "step": 5777 }, { "epoch": 0.8618092326049668, "grad_norm": 1.2055171728134155, "learning_rate": 9.843301643876292e-07, "loss": 0.6614, "step": 5778 }, { "epoch": 0.8619583861585502, "grad_norm": 1.4463399648666382, "learning_rate": 9.822408181901544e-07, "loss": 0.6014, "step": 5779 }, { "epoch": 0.8621075397121336, "grad_norm": 1.3710534572601318, "learning_rate": 9.801535772463856e-07, "loss": 0.7044, "step": 5780 }, { "epoch": 0.8622566932657171, "grad_norm": 2.686055898666382, "learning_rate": 9.78068442043597e-07, "loss": 0.6897, "step": 5781 }, { "epoch": 0.8624058468193004, "grad_norm": 1.423835039138794, "learning_rate": 9.759854130685798e-07, "loss": 0.7066, "step": 5782 }, { "epoch": 0.8625550003728839, "grad_norm": 1.3115005493164062, "learning_rate": 9.739044908076301e-07, "loss": 0.6849, "step": 5783 }, { "epoch": 0.8627041539264673, "grad_norm": 1.4272087812423706, "learning_rate": 9.718256757465526e-07, "loss": 0.6305, "step": 5784 }, { "epoch": 0.8628533074800507, "grad_norm": 0.5117992162704468, "learning_rate": 9.697489683706607e-07, "loss": 0.231, "step": 5785 }, { "epoch": 0.8630024610336341, "grad_norm": 1.4258272647857666, "learning_rate": 9.67674369164776e-07, "loss": 0.6656, "step": 5786 }, { "epoch": 0.8631516145872176, "grad_norm": 1.737663745880127, "learning_rate": 9.656018786132236e-07, "loss": 0.7604, "step": 5787 }, { "epoch": 0.8633007681408009, "grad_norm": 1.3228046894073486, "learning_rate": 9.63531497199841e-07, "loss": 0.6508, "step": 5788 }, { "epoch": 0.8634499216943844, "grad_norm": 1.4290175437927246, "learning_rate": 9.614632254079748e-07, "loss": 0.6858, "step": 5789 }, { "epoch": 0.8635990752479678, "grad_norm": 2.572284460067749, "learning_rate": 9.59397063720472e-07, "loss": 0.6451, "step": 5790 }, { "epoch": 0.8637482288015512, "grad_norm": 2.476447105407715, "learning_rate": 9.573330126196912e-07, "loss": 0.6519, "step": 5791 }, { "epoch": 0.8638973823551346, "grad_norm": 1.916027307510376, "learning_rate": 9.552710725875047e-07, "loss": 0.6684, "step": 5792 }, { "epoch": 0.864046535908718, "grad_norm": 1.6816232204437256, "learning_rate": 9.532112441052799e-07, "loss": 0.7294, "step": 5793 }, { "epoch": 0.8641956894623014, "grad_norm": 2.175873279571533, "learning_rate": 9.511535276538986e-07, "loss": 0.6342, "step": 5794 }, { "epoch": 0.8643448430158849, "grad_norm": 1.4115056991577148, "learning_rate": 9.490979237137487e-07, "loss": 0.6417, "step": 5795 }, { "epoch": 0.8644939965694682, "grad_norm": 2.4045822620391846, "learning_rate": 9.470444327647221e-07, "loss": 0.6495, "step": 5796 }, { "epoch": 0.8646431501230517, "grad_norm": 1.53602933883667, "learning_rate": 9.449930552862208e-07, "loss": 0.7017, "step": 5797 }, { "epoch": 0.8647923036766351, "grad_norm": 1.260985016822815, "learning_rate": 9.429437917571526e-07, "loss": 0.6812, "step": 5798 }, { "epoch": 0.8649414572302185, "grad_norm": 1.6511350870132446, "learning_rate": 9.408966426559296e-07, "loss": 0.6767, "step": 5799 }, { "epoch": 0.8650906107838019, "grad_norm": 2.579115152359009, "learning_rate": 9.388516084604704e-07, "loss": 0.6069, "step": 5800 }, { "epoch": 0.8652397643373854, "grad_norm": 1.4687743186950684, "learning_rate": 9.368086896482065e-07, "loss": 0.6347, "step": 5801 }, { "epoch": 0.8653889178909687, "grad_norm": 1.3009988069534302, "learning_rate": 9.347678866960664e-07, "loss": 0.7385, "step": 5802 }, { "epoch": 0.8655380714445522, "grad_norm": 1.997689127922058, "learning_rate": 9.3272920008049e-07, "loss": 0.674, "step": 5803 }, { "epoch": 0.8656872249981356, "grad_norm": 1.683837890625, "learning_rate": 9.306926302774233e-07, "loss": 0.7254, "step": 5804 }, { "epoch": 0.865836378551719, "grad_norm": 1.6708941459655762, "learning_rate": 9.286581777623127e-07, "loss": 0.6696, "step": 5805 }, { "epoch": 0.8659855321053024, "grad_norm": 2.7730965614318848, "learning_rate": 9.26625843010116e-07, "loss": 0.6908, "step": 5806 }, { "epoch": 0.8661346856588859, "grad_norm": 1.6586319208145142, "learning_rate": 9.24595626495296e-07, "loss": 0.6113, "step": 5807 }, { "epoch": 0.8662838392124692, "grad_norm": 1.5277800559997559, "learning_rate": 9.225675286918201e-07, "loss": 0.7096, "step": 5808 }, { "epoch": 0.8664329927660527, "grad_norm": 2.279780864715576, "learning_rate": 9.205415500731551e-07, "loss": 0.7569, "step": 5809 }, { "epoch": 0.866582146319636, "grad_norm": 1.4346230030059814, "learning_rate": 9.185176911122873e-07, "loss": 0.7115, "step": 5810 }, { "epoch": 0.8667312998732195, "grad_norm": 0.9982880353927612, "learning_rate": 9.164959522816941e-07, "loss": 0.6461, "step": 5811 }, { "epoch": 0.8668804534268029, "grad_norm": 1.3679332733154297, "learning_rate": 9.144763340533635e-07, "loss": 0.6813, "step": 5812 }, { "epoch": 0.8670296069803863, "grad_norm": 0.49789100885391235, "learning_rate": 9.124588368987896e-07, "loss": 0.2358, "step": 5813 }, { "epoch": 0.8671787605339697, "grad_norm": 1.6024305820465088, "learning_rate": 9.104434612889723e-07, "loss": 0.7217, "step": 5814 }, { "epoch": 0.8673279140875532, "grad_norm": 2.7345523834228516, "learning_rate": 9.084302076944096e-07, "loss": 0.6841, "step": 5815 }, { "epoch": 0.8674770676411365, "grad_norm": 1.2582836151123047, "learning_rate": 9.0641907658511e-07, "loss": 0.7103, "step": 5816 }, { "epoch": 0.86762622119472, "grad_norm": 1.5274667739868164, "learning_rate": 9.044100684305857e-07, "loss": 0.6096, "step": 5817 }, { "epoch": 0.8677753747483034, "grad_norm": 1.6853368282318115, "learning_rate": 9.024031836998525e-07, "loss": 0.6788, "step": 5818 }, { "epoch": 0.8679245283018868, "grad_norm": 1.5466699600219727, "learning_rate": 9.003984228614293e-07, "loss": 0.7335, "step": 5819 }, { "epoch": 0.8680736818554702, "grad_norm": 1.4389275312423706, "learning_rate": 8.983957863833437e-07, "loss": 0.6066, "step": 5820 }, { "epoch": 0.8682228354090537, "grad_norm": 1.4112470149993896, "learning_rate": 8.963952747331195e-07, "loss": 0.6331, "step": 5821 }, { "epoch": 0.868371988962637, "grad_norm": 1.8781111240386963, "learning_rate": 8.943968883777909e-07, "loss": 0.6668, "step": 5822 }, { "epoch": 0.8685211425162205, "grad_norm": 2.4709255695343018, "learning_rate": 8.92400627783897e-07, "loss": 0.6661, "step": 5823 }, { "epoch": 0.8686702960698038, "grad_norm": 1.4668099880218506, "learning_rate": 8.904064934174717e-07, "loss": 0.616, "step": 5824 }, { "epoch": 0.8688194496233873, "grad_norm": 1.9500662088394165, "learning_rate": 8.884144857440624e-07, "loss": 0.5775, "step": 5825 }, { "epoch": 0.8689686031769707, "grad_norm": 1.7827317714691162, "learning_rate": 8.864246052287151e-07, "loss": 0.6383, "step": 5826 }, { "epoch": 0.8691177567305541, "grad_norm": 1.1983288526535034, "learning_rate": 8.844368523359803e-07, "loss": 0.6712, "step": 5827 }, { "epoch": 0.8692669102841375, "grad_norm": 2.3327770233154297, "learning_rate": 8.824512275299114e-07, "loss": 0.6705, "step": 5828 }, { "epoch": 0.869416063837721, "grad_norm": 1.291890025138855, "learning_rate": 8.804677312740673e-07, "loss": 0.7444, "step": 5829 }, { "epoch": 0.8695652173913043, "grad_norm": 1.505776047706604, "learning_rate": 8.784863640315045e-07, "loss": 0.7718, "step": 5830 }, { "epoch": 0.8697143709448878, "grad_norm": 2.7097198963165283, "learning_rate": 8.765071262647873e-07, "loss": 0.6838, "step": 5831 }, { "epoch": 0.8698635244984712, "grad_norm": 1.8750156164169312, "learning_rate": 8.745300184359817e-07, "loss": 0.7157, "step": 5832 }, { "epoch": 0.8700126780520546, "grad_norm": 1.7511183023452759, "learning_rate": 8.725550410066575e-07, "loss": 0.6203, "step": 5833 }, { "epoch": 0.870161831605638, "grad_norm": 1.7329866886138916, "learning_rate": 8.705821944378834e-07, "loss": 0.7004, "step": 5834 }, { "epoch": 0.8703109851592215, "grad_norm": 1.599068522453308, "learning_rate": 8.686114791902334e-07, "loss": 0.6855, "step": 5835 }, { "epoch": 0.8704601387128048, "grad_norm": 1.3676378726959229, "learning_rate": 8.666428957237849e-07, "loss": 0.6688, "step": 5836 }, { "epoch": 0.8706092922663883, "grad_norm": 1.4480459690093994, "learning_rate": 8.64676444498116e-07, "loss": 0.6792, "step": 5837 }, { "epoch": 0.8707584458199716, "grad_norm": 2.8048136234283447, "learning_rate": 8.627121259723071e-07, "loss": 0.6941, "step": 5838 }, { "epoch": 0.8709075993735551, "grad_norm": 0.46646788716316223, "learning_rate": 8.607499406049424e-07, "loss": 0.2429, "step": 5839 }, { "epoch": 0.8710567529271385, "grad_norm": 1.4618544578552246, "learning_rate": 8.587898888541035e-07, "loss": 0.6957, "step": 5840 }, { "epoch": 0.8712059064807219, "grad_norm": 3.7863969802856445, "learning_rate": 8.568319711773787e-07, "loss": 0.6651, "step": 5841 }, { "epoch": 0.8713550600343053, "grad_norm": 1.607879638671875, "learning_rate": 8.54876188031859e-07, "loss": 0.7474, "step": 5842 }, { "epoch": 0.8715042135878888, "grad_norm": 1.6979683637619019, "learning_rate": 8.529225398741303e-07, "loss": 0.6889, "step": 5843 }, { "epoch": 0.8716533671414721, "grad_norm": 2.0106406211853027, "learning_rate": 8.509710271602833e-07, "loss": 0.6112, "step": 5844 }, { "epoch": 0.8718025206950556, "grad_norm": 1.6651607751846313, "learning_rate": 8.490216503459181e-07, "loss": 0.7022, "step": 5845 }, { "epoch": 0.871951674248639, "grad_norm": 1.3706026077270508, "learning_rate": 8.470744098861239e-07, "loss": 0.7671, "step": 5846 }, { "epoch": 0.8721008278022224, "grad_norm": 1.597765326499939, "learning_rate": 8.45129306235497e-07, "loss": 0.6778, "step": 5847 }, { "epoch": 0.8722499813558058, "grad_norm": 2.0865280628204346, "learning_rate": 8.431863398481366e-07, "loss": 0.7185, "step": 5848 }, { "epoch": 0.8723991349093893, "grad_norm": 3.799424886703491, "learning_rate": 8.412455111776374e-07, "loss": 0.7269, "step": 5849 }, { "epoch": 0.8725482884629726, "grad_norm": 1.1336872577667236, "learning_rate": 8.393068206770993e-07, "loss": 0.7449, "step": 5850 }, { "epoch": 0.8726974420165561, "grad_norm": 1.3960529565811157, "learning_rate": 8.373702687991247e-07, "loss": 0.724, "step": 5851 }, { "epoch": 0.8728465955701394, "grad_norm": 1.5356009006500244, "learning_rate": 8.354358559958087e-07, "loss": 0.6421, "step": 5852 }, { "epoch": 0.8729957491237229, "grad_norm": 1.997287631034851, "learning_rate": 8.335035827187577e-07, "loss": 0.5965, "step": 5853 }, { "epoch": 0.8731449026773063, "grad_norm": 1.548675537109375, "learning_rate": 8.31573449419073e-07, "loss": 0.7337, "step": 5854 }, { "epoch": 0.8732940562308897, "grad_norm": 1.4889984130859375, "learning_rate": 8.296454565473522e-07, "loss": 0.6681, "step": 5855 }, { "epoch": 0.8734432097844731, "grad_norm": 1.605482816696167, "learning_rate": 8.277196045537006e-07, "loss": 0.7029, "step": 5856 }, { "epoch": 0.8735923633380566, "grad_norm": 1.5788276195526123, "learning_rate": 8.25795893887722e-07, "loss": 0.6848, "step": 5857 }, { "epoch": 0.8737415168916399, "grad_norm": 1.1561882495880127, "learning_rate": 8.238743249985159e-07, "loss": 0.6929, "step": 5858 }, { "epoch": 0.8738906704452234, "grad_norm": 1.2360608577728271, "learning_rate": 8.219548983346859e-07, "loss": 0.6551, "step": 5859 }, { "epoch": 0.8740398239988068, "grad_norm": 2.5769920349121094, "learning_rate": 8.200376143443356e-07, "loss": 0.6411, "step": 5860 }, { "epoch": 0.8741889775523902, "grad_norm": 1.916981816291809, "learning_rate": 8.181224734750659e-07, "loss": 0.5749, "step": 5861 }, { "epoch": 0.8743381311059736, "grad_norm": 1.876320481300354, "learning_rate": 8.162094761739792e-07, "loss": 0.6412, "step": 5862 }, { "epoch": 0.874487284659557, "grad_norm": 1.1725873947143555, "learning_rate": 8.14298622887677e-07, "loss": 0.6656, "step": 5863 }, { "epoch": 0.8746364382131404, "grad_norm": 1.7041171789169312, "learning_rate": 8.123899140622616e-07, "loss": 0.7135, "step": 5864 }, { "epoch": 0.8747855917667239, "grad_norm": 1.6243089437484741, "learning_rate": 8.104833501433318e-07, "loss": 0.7179, "step": 5865 }, { "epoch": 0.8749347453203072, "grad_norm": 1.5544588565826416, "learning_rate": 8.085789315759862e-07, "loss": 0.6843, "step": 5866 }, { "epoch": 0.8750838988738907, "grad_norm": 1.6539610624313354, "learning_rate": 8.06676658804827e-07, "loss": 0.6916, "step": 5867 }, { "epoch": 0.8752330524274741, "grad_norm": 1.423124074935913, "learning_rate": 8.047765322739476e-07, "loss": 0.7162, "step": 5868 }, { "epoch": 0.8753822059810575, "grad_norm": 1.7804741859436035, "learning_rate": 8.028785524269466e-07, "loss": 0.6419, "step": 5869 }, { "epoch": 0.8755313595346409, "grad_norm": 1.982264518737793, "learning_rate": 8.009827197069209e-07, "loss": 0.7549, "step": 5870 }, { "epoch": 0.8756805130882244, "grad_norm": 1.2983739376068115, "learning_rate": 7.990890345564628e-07, "loss": 0.6493, "step": 5871 }, { "epoch": 0.8758296666418077, "grad_norm": 1.3298444747924805, "learning_rate": 7.971974974176666e-07, "loss": 0.6708, "step": 5872 }, { "epoch": 0.8759788201953912, "grad_norm": 1.9574185609817505, "learning_rate": 7.953081087321257e-07, "loss": 0.583, "step": 5873 }, { "epoch": 0.8761279737489746, "grad_norm": 2.148850917816162, "learning_rate": 7.934208689409251e-07, "loss": 0.5992, "step": 5874 }, { "epoch": 0.876277127302558, "grad_norm": 1.8096052408218384, "learning_rate": 7.915357784846556e-07, "loss": 0.6841, "step": 5875 }, { "epoch": 0.8764262808561414, "grad_norm": 1.6980416774749756, "learning_rate": 7.896528378034052e-07, "loss": 0.6913, "step": 5876 }, { "epoch": 0.8765754344097249, "grad_norm": 0.46410155296325684, "learning_rate": 7.877720473367556e-07, "loss": 0.2702, "step": 5877 }, { "epoch": 0.8767245879633082, "grad_norm": 1.681817889213562, "learning_rate": 7.858934075237901e-07, "loss": 0.646, "step": 5878 }, { "epoch": 0.8768737415168917, "grad_norm": 0.5030238032341003, "learning_rate": 7.840169188030899e-07, "loss": 0.226, "step": 5879 }, { "epoch": 0.877022895070475, "grad_norm": 1.919119954109192, "learning_rate": 7.821425816127337e-07, "loss": 0.6613, "step": 5880 }, { "epoch": 0.8771720486240585, "grad_norm": 3.5303003787994385, "learning_rate": 7.802703963902968e-07, "loss": 0.6543, "step": 5881 }, { "epoch": 0.8773212021776419, "grad_norm": 1.7003285884857178, "learning_rate": 7.784003635728555e-07, "loss": 0.7113, "step": 5882 }, { "epoch": 0.8774703557312253, "grad_norm": 0.5340738892555237, "learning_rate": 7.765324835969757e-07, "loss": 0.2473, "step": 5883 }, { "epoch": 0.8776195092848087, "grad_norm": 1.5763802528381348, "learning_rate": 7.746667568987287e-07, "loss": 0.6535, "step": 5884 }, { "epoch": 0.8777686628383922, "grad_norm": 2.01004958152771, "learning_rate": 7.728031839136818e-07, "loss": 0.59, "step": 5885 }, { "epoch": 0.8779178163919755, "grad_norm": 1.3210378885269165, "learning_rate": 7.70941765076898e-07, "loss": 0.7049, "step": 5886 }, { "epoch": 0.878066969945559, "grad_norm": 2.0531551837921143, "learning_rate": 7.690825008229319e-07, "loss": 0.673, "step": 5887 }, { "epoch": 0.8782161234991424, "grad_norm": 1.5450327396392822, "learning_rate": 7.672253915858496e-07, "loss": 0.6709, "step": 5888 }, { "epoch": 0.8783652770527258, "grad_norm": 1.2397346496582031, "learning_rate": 7.653704377991977e-07, "loss": 0.6649, "step": 5889 }, { "epoch": 0.8785144306063092, "grad_norm": 1.2843250036239624, "learning_rate": 7.635176398960308e-07, "loss": 0.5956, "step": 5890 }, { "epoch": 0.8786635841598927, "grad_norm": 1.6451939344406128, "learning_rate": 7.616669983088953e-07, "loss": 0.6134, "step": 5891 }, { "epoch": 0.878812737713476, "grad_norm": 1.3319048881530762, "learning_rate": 7.598185134698366e-07, "loss": 0.6239, "step": 5892 }, { "epoch": 0.8789618912670595, "grad_norm": 1.5582935810089111, "learning_rate": 7.579721858103928e-07, "loss": 0.7077, "step": 5893 }, { "epoch": 0.8791110448206428, "grad_norm": 1.1851454973220825, "learning_rate": 7.561280157616036e-07, "loss": 0.6152, "step": 5894 }, { "epoch": 0.8792601983742263, "grad_norm": 2.562108039855957, "learning_rate": 7.542860037540012e-07, "loss": 0.5733, "step": 5895 }, { "epoch": 0.8794093519278097, "grad_norm": 1.4580967426300049, "learning_rate": 7.524461502176128e-07, "loss": 0.634, "step": 5896 }, { "epoch": 0.8795585054813931, "grad_norm": 2.47883677482605, "learning_rate": 7.506084555819682e-07, "loss": 0.7438, "step": 5897 }, { "epoch": 0.8797076590349765, "grad_norm": 1.781768798828125, "learning_rate": 7.487729202760874e-07, "loss": 0.685, "step": 5898 }, { "epoch": 0.87985681258856, "grad_norm": 1.1655936241149902, "learning_rate": 7.469395447284866e-07, "loss": 0.6372, "step": 5899 }, { "epoch": 0.8800059661421433, "grad_norm": 1.7481794357299805, "learning_rate": 7.451083293671801e-07, "loss": 0.7184, "step": 5900 }, { "epoch": 0.8801551196957268, "grad_norm": 1.3829066753387451, "learning_rate": 7.432792746196793e-07, "loss": 0.7483, "step": 5901 }, { "epoch": 0.8803042732493102, "grad_norm": 1.514121174812317, "learning_rate": 7.414523809129836e-07, "loss": 0.6892, "step": 5902 }, { "epoch": 0.8804534268028936, "grad_norm": 3.2644081115722656, "learning_rate": 7.396276486735965e-07, "loss": 0.6017, "step": 5903 }, { "epoch": 0.880602580356477, "grad_norm": 2.0313243865966797, "learning_rate": 7.378050783275115e-07, "loss": 0.5659, "step": 5904 }, { "epoch": 0.8807517339100605, "grad_norm": 3.1241860389709473, "learning_rate": 7.359846703002216e-07, "loss": 0.6052, "step": 5905 }, { "epoch": 0.8809008874636438, "grad_norm": 2.1249327659606934, "learning_rate": 7.341664250167113e-07, "loss": 0.7419, "step": 5906 }, { "epoch": 0.8810500410172273, "grad_norm": 1.9925826787948608, "learning_rate": 7.323503429014633e-07, "loss": 0.5773, "step": 5907 }, { "epoch": 0.8811991945708106, "grad_norm": 1.1841599941253662, "learning_rate": 7.305364243784507e-07, "loss": 0.6937, "step": 5908 }, { "epoch": 0.8813483481243941, "grad_norm": 1.3568499088287354, "learning_rate": 7.287246698711459e-07, "loss": 0.638, "step": 5909 }, { "epoch": 0.8814975016779775, "grad_norm": 4.918546199798584, "learning_rate": 7.269150798025148e-07, "loss": 0.748, "step": 5910 }, { "epoch": 0.8816466552315609, "grad_norm": 1.333423376083374, "learning_rate": 7.251076545950198e-07, "loss": 0.6829, "step": 5911 }, { "epoch": 0.8817958087851443, "grad_norm": 1.1126041412353516, "learning_rate": 7.233023946706108e-07, "loss": 0.629, "step": 5912 }, { "epoch": 0.8819449623387278, "grad_norm": 0.5264179706573486, "learning_rate": 7.214993004507409e-07, "loss": 0.2491, "step": 5913 }, { "epoch": 0.8820941158923111, "grad_norm": 1.2730450630187988, "learning_rate": 7.196983723563544e-07, "loss": 0.7401, "step": 5914 }, { "epoch": 0.8822432694458946, "grad_norm": 1.633336067199707, "learning_rate": 7.178996108078873e-07, "loss": 0.7221, "step": 5915 }, { "epoch": 0.882392422999478, "grad_norm": 1.5305922031402588, "learning_rate": 7.161030162252735e-07, "loss": 0.6447, "step": 5916 }, { "epoch": 0.8825415765530614, "grad_norm": 1.8783878087997437, "learning_rate": 7.143085890279411e-07, "loss": 0.6027, "step": 5917 }, { "epoch": 0.8826907301066448, "grad_norm": 1.8260247707366943, "learning_rate": 7.12516329634807e-07, "loss": 0.5709, "step": 5918 }, { "epoch": 0.8828398836602283, "grad_norm": 2.0242605209350586, "learning_rate": 7.107262384642877e-07, "loss": 0.7102, "step": 5919 }, { "epoch": 0.8829890372138116, "grad_norm": 5.228055000305176, "learning_rate": 7.089383159342933e-07, "loss": 0.6456, "step": 5920 }, { "epoch": 0.8831381907673951, "grad_norm": 0.4784458875656128, "learning_rate": 7.071525624622211e-07, "loss": 0.2445, "step": 5921 }, { "epoch": 0.8832873443209784, "grad_norm": 1.6976070404052734, "learning_rate": 7.053689784649676e-07, "loss": 0.6205, "step": 5922 }, { "epoch": 0.8834364978745619, "grad_norm": 1.4926424026489258, "learning_rate": 7.035875643589274e-07, "loss": 0.6863, "step": 5923 }, { "epoch": 0.8835856514281453, "grad_norm": 0.5631885528564453, "learning_rate": 7.018083205599779e-07, "loss": 0.254, "step": 5924 }, { "epoch": 0.8837348049817287, "grad_norm": 1.4592877626419067, "learning_rate": 7.000312474834959e-07, "loss": 0.7412, "step": 5925 }, { "epoch": 0.8838839585353121, "grad_norm": 1.5975227355957031, "learning_rate": 6.982563455443525e-07, "loss": 0.5954, "step": 5926 }, { "epoch": 0.8840331120888956, "grad_norm": 0.5443398356437683, "learning_rate": 6.964836151569066e-07, "loss": 0.2573, "step": 5927 }, { "epoch": 0.8841822656424789, "grad_norm": 1.2957910299301147, "learning_rate": 6.947130567350147e-07, "loss": 0.7052, "step": 5928 }, { "epoch": 0.8843314191960624, "grad_norm": 1.4948463439941406, "learning_rate": 6.929446706920285e-07, "loss": 0.6904, "step": 5929 }, { "epoch": 0.8844805727496458, "grad_norm": 1.3288910388946533, "learning_rate": 6.911784574407832e-07, "loss": 0.6617, "step": 5930 }, { "epoch": 0.8846297263032292, "grad_norm": 1.3359169960021973, "learning_rate": 6.894144173936146e-07, "loss": 0.6752, "step": 5931 }, { "epoch": 0.8847788798568126, "grad_norm": 1.518280267715454, "learning_rate": 6.876525509623532e-07, "loss": 0.6389, "step": 5932 }, { "epoch": 0.884928033410396, "grad_norm": 2.44998836517334, "learning_rate": 6.858928585583135e-07, "loss": 0.6742, "step": 5933 }, { "epoch": 0.8850771869639794, "grad_norm": 1.3149770498275757, "learning_rate": 6.841353405923079e-07, "loss": 0.6836, "step": 5934 }, { "epoch": 0.8852263405175629, "grad_norm": 2.055126905441284, "learning_rate": 6.823799974746425e-07, "loss": 0.6649, "step": 5935 }, { "epoch": 0.8853754940711462, "grad_norm": 3.214480400085449, "learning_rate": 6.806268296151097e-07, "loss": 0.6787, "step": 5936 }, { "epoch": 0.8855246476247297, "grad_norm": 2.4973044395446777, "learning_rate": 6.788758374229998e-07, "loss": 0.6564, "step": 5937 }, { "epoch": 0.8856738011783131, "grad_norm": 1.3346240520477295, "learning_rate": 6.771270213070935e-07, "loss": 0.6821, "step": 5938 }, { "epoch": 0.8858229547318965, "grad_norm": 1.335082769393921, "learning_rate": 6.753803816756643e-07, "loss": 0.7304, "step": 5939 }, { "epoch": 0.8859721082854799, "grad_norm": 2.27835750579834, "learning_rate": 6.736359189364716e-07, "loss": 0.7371, "step": 5940 }, { "epoch": 0.8861212618390634, "grad_norm": 1.568376064300537, "learning_rate": 6.718936334967774e-07, "loss": 0.7106, "step": 5941 }, { "epoch": 0.8862704153926467, "grad_norm": 1.9574744701385498, "learning_rate": 6.701535257633252e-07, "loss": 0.6314, "step": 5942 }, { "epoch": 0.8864195689462302, "grad_norm": 1.3960280418395996, "learning_rate": 6.684155961423555e-07, "loss": 0.6455, "step": 5943 }, { "epoch": 0.8865687224998136, "grad_norm": 1.59869384765625, "learning_rate": 6.666798450395995e-07, "loss": 0.5919, "step": 5944 }, { "epoch": 0.886717876053397, "grad_norm": 1.1649994850158691, "learning_rate": 6.649462728602807e-07, "loss": 0.6805, "step": 5945 }, { "epoch": 0.8868670296069804, "grad_norm": 1.4619725942611694, "learning_rate": 6.632148800091099e-07, "loss": 0.6551, "step": 5946 }, { "epoch": 0.8870161831605639, "grad_norm": 1.5137485265731812, "learning_rate": 6.614856668902924e-07, "loss": 0.7672, "step": 5947 }, { "epoch": 0.8871653367141472, "grad_norm": 1.6954728364944458, "learning_rate": 6.597586339075279e-07, "loss": 0.6555, "step": 5948 }, { "epoch": 0.8873144902677307, "grad_norm": 1.7580208778381348, "learning_rate": 6.580337814639959e-07, "loss": 0.709, "step": 5949 }, { "epoch": 0.887463643821314, "grad_norm": 1.373799204826355, "learning_rate": 6.563111099623809e-07, "loss": 0.691, "step": 5950 }, { "epoch": 0.8876127973748975, "grad_norm": 1.1450865268707275, "learning_rate": 6.545906198048502e-07, "loss": 0.6604, "step": 5951 }, { "epoch": 0.8877619509284809, "grad_norm": 1.9612237215042114, "learning_rate": 6.528723113930613e-07, "loss": 0.6849, "step": 5952 }, { "epoch": 0.8879111044820643, "grad_norm": 1.639773964881897, "learning_rate": 6.511561851281656e-07, "loss": 0.7341, "step": 5953 }, { "epoch": 0.8880602580356477, "grad_norm": 1.5643446445465088, "learning_rate": 6.494422414108048e-07, "loss": 0.6929, "step": 5954 }, { "epoch": 0.8882094115892312, "grad_norm": 1.5867584943771362, "learning_rate": 6.477304806411078e-07, "loss": 0.6634, "step": 5955 }, { "epoch": 0.8883585651428145, "grad_norm": 1.7282602787017822, "learning_rate": 6.460209032186971e-07, "loss": 0.6698, "step": 5956 }, { "epoch": 0.888507718696398, "grad_norm": 2.170366048812866, "learning_rate": 6.443135095426845e-07, "loss": 0.711, "step": 5957 }, { "epoch": 0.8886568722499814, "grad_norm": 0.49989110231399536, "learning_rate": 6.426083000116723e-07, "loss": 0.231, "step": 5958 }, { "epoch": 0.8888060258035648, "grad_norm": 1.8050535917282104, "learning_rate": 6.409052750237521e-07, "loss": 0.613, "step": 5959 }, { "epoch": 0.8889551793571482, "grad_norm": 1.7046360969543457, "learning_rate": 6.39204434976507e-07, "loss": 0.7029, "step": 5960 }, { "epoch": 0.8891043329107315, "grad_norm": 1.277226209640503, "learning_rate": 6.375057802670081e-07, "loss": 0.7089, "step": 5961 }, { "epoch": 0.889253486464315, "grad_norm": 1.438496470451355, "learning_rate": 6.358093112918174e-07, "loss": 0.6678, "step": 5962 }, { "epoch": 0.8894026400178984, "grad_norm": 2.3852663040161133, "learning_rate": 6.341150284469855e-07, "loss": 0.5093, "step": 5963 }, { "epoch": 0.8895517935714818, "grad_norm": 1.7865631580352783, "learning_rate": 6.324229321280572e-07, "loss": 0.5854, "step": 5964 }, { "epoch": 0.8897009471250652, "grad_norm": 1.7496143579483032, "learning_rate": 6.307330227300579e-07, "loss": 0.6907, "step": 5965 }, { "epoch": 0.8898501006786487, "grad_norm": 1.3525499105453491, "learning_rate": 6.290453006475117e-07, "loss": 0.6932, "step": 5966 }, { "epoch": 0.889999254232232, "grad_norm": 1.812572956085205, "learning_rate": 6.273597662744269e-07, "loss": 0.6688, "step": 5967 }, { "epoch": 0.8901484077858155, "grad_norm": 1.854041576385498, "learning_rate": 6.25676420004302e-07, "loss": 0.7471, "step": 5968 }, { "epoch": 0.8902975613393989, "grad_norm": 2.5834550857543945, "learning_rate": 6.239952622301248e-07, "loss": 0.6893, "step": 5969 }, { "epoch": 0.8904467148929823, "grad_norm": 2.746077060699463, "learning_rate": 6.22316293344375e-07, "loss": 0.6584, "step": 5970 }, { "epoch": 0.8905958684465657, "grad_norm": 1.27880859375, "learning_rate": 6.206395137390153e-07, "loss": 0.7067, "step": 5971 }, { "epoch": 0.8907450220001492, "grad_norm": 2.184706687927246, "learning_rate": 6.189649238055018e-07, "loss": 0.5788, "step": 5972 }, { "epoch": 0.8908941755537325, "grad_norm": 1.416093111038208, "learning_rate": 6.172925239347793e-07, "loss": 0.6632, "step": 5973 }, { "epoch": 0.891043329107316, "grad_norm": 1.3094667196273804, "learning_rate": 6.15622314517278e-07, "loss": 0.5964, "step": 5974 }, { "epoch": 0.8911924826608993, "grad_norm": 1.8380647897720337, "learning_rate": 6.13954295942919e-07, "loss": 0.6303, "step": 5975 }, { "epoch": 0.8913416362144828, "grad_norm": 1.4479479789733887, "learning_rate": 6.122884686011166e-07, "loss": 0.6481, "step": 5976 }, { "epoch": 0.8914907897680662, "grad_norm": 1.7784814834594727, "learning_rate": 6.10624832880764e-07, "loss": 0.6994, "step": 5977 }, { "epoch": 0.8916399433216496, "grad_norm": 1.606401801109314, "learning_rate": 6.089633891702496e-07, "loss": 0.6384, "step": 5978 }, { "epoch": 0.891789096875233, "grad_norm": 1.3648568391799927, "learning_rate": 6.073041378574485e-07, "loss": 0.7335, "step": 5979 }, { "epoch": 0.8919382504288165, "grad_norm": 2.0499460697174072, "learning_rate": 6.056470793297209e-07, "loss": 0.6587, "step": 5980 }, { "epoch": 0.8920874039823998, "grad_norm": 1.9365731477737427, "learning_rate": 6.039922139739196e-07, "loss": 0.557, "step": 5981 }, { "epoch": 0.8922365575359833, "grad_norm": 1.3174885511398315, "learning_rate": 6.023395421763856e-07, "loss": 0.7019, "step": 5982 }, { "epoch": 0.8923857110895667, "grad_norm": 1.5665061473846436, "learning_rate": 6.006890643229424e-07, "loss": 0.6264, "step": 5983 }, { "epoch": 0.8925348646431501, "grad_norm": 1.5306485891342163, "learning_rate": 5.99040780798904e-07, "loss": 0.6754, "step": 5984 }, { "epoch": 0.8926840181967335, "grad_norm": 1.789031744003296, "learning_rate": 5.973946919890772e-07, "loss": 0.719, "step": 5985 }, { "epoch": 0.892833171750317, "grad_norm": 1.6414965391159058, "learning_rate": 5.957507982777477e-07, "loss": 0.6037, "step": 5986 }, { "epoch": 0.8929823253039003, "grad_norm": 1.559435486793518, "learning_rate": 5.941091000486953e-07, "loss": 0.7142, "step": 5987 }, { "epoch": 0.8931314788574838, "grad_norm": 1.16508948802948, "learning_rate": 5.924695976851846e-07, "loss": 0.5613, "step": 5988 }, { "epoch": 0.8932806324110671, "grad_norm": 1.8224014043807983, "learning_rate": 5.908322915699694e-07, "loss": 0.694, "step": 5989 }, { "epoch": 0.8934297859646506, "grad_norm": 1.6396493911743164, "learning_rate": 5.89197182085286e-07, "loss": 0.7024, "step": 5990 }, { "epoch": 0.893578939518234, "grad_norm": 1.621559500694275, "learning_rate": 5.875642696128625e-07, "loss": 0.6925, "step": 5991 }, { "epoch": 0.8937280930718174, "grad_norm": 1.9816261529922485, "learning_rate": 5.859335545339129e-07, "loss": 0.7264, "step": 5992 }, { "epoch": 0.8938772466254008, "grad_norm": 2.6096630096435547, "learning_rate": 5.843050372291381e-07, "loss": 0.6322, "step": 5993 }, { "epoch": 0.8940264001789843, "grad_norm": 2.047025203704834, "learning_rate": 5.826787180787274e-07, "loss": 0.6794, "step": 5994 }, { "epoch": 0.8941755537325676, "grad_norm": 1.5859453678131104, "learning_rate": 5.810545974623549e-07, "loss": 0.6241, "step": 5995 }, { "epoch": 0.8943247072861511, "grad_norm": 0.518303632736206, "learning_rate": 5.794326757591795e-07, "loss": 0.2495, "step": 5996 }, { "epoch": 0.8944738608397345, "grad_norm": 1.8879975080490112, "learning_rate": 5.778129533478516e-07, "loss": 0.6381, "step": 5997 }, { "epoch": 0.8946230143933179, "grad_norm": 1.6106517314910889, "learning_rate": 5.761954306065065e-07, "loss": 0.6314, "step": 5998 }, { "epoch": 0.8947721679469013, "grad_norm": 1.8084863424301147, "learning_rate": 5.745801079127622e-07, "loss": 0.6493, "step": 5999 }, { "epoch": 0.8949213215004848, "grad_norm": 1.470719814300537, "learning_rate": 5.729669856437281e-07, "loss": 0.569, "step": 6000 }, { "epoch": 0.8950704750540681, "grad_norm": 1.6022526025772095, "learning_rate": 5.713560641759975e-07, "loss": 0.7446, "step": 6001 }, { "epoch": 0.8952196286076516, "grad_norm": 1.1771738529205322, "learning_rate": 5.697473438856505e-07, "loss": 0.7044, "step": 6002 }, { "epoch": 0.8953687821612349, "grad_norm": 2.342536449432373, "learning_rate": 5.681408251482523e-07, "loss": 0.621, "step": 6003 }, { "epoch": 0.8955179357148184, "grad_norm": 1.2660982608795166, "learning_rate": 5.665365083388586e-07, "loss": 0.6546, "step": 6004 }, { "epoch": 0.8956670892684018, "grad_norm": 1.6462057828903198, "learning_rate": 5.649343938320029e-07, "loss": 0.8004, "step": 6005 }, { "epoch": 0.8958162428219852, "grad_norm": 1.2619446516036987, "learning_rate": 5.633344820017106e-07, "loss": 0.6396, "step": 6006 }, { "epoch": 0.8959653963755686, "grad_norm": 1.6726421117782593, "learning_rate": 5.61736773221494e-07, "loss": 0.7075, "step": 6007 }, { "epoch": 0.8961145499291521, "grad_norm": 1.1974372863769531, "learning_rate": 5.601412678643447e-07, "loss": 0.7919, "step": 6008 }, { "epoch": 0.8962637034827354, "grad_norm": 1.560433030128479, "learning_rate": 5.585479663027437e-07, "loss": 0.6735, "step": 6009 }, { "epoch": 0.8964128570363189, "grad_norm": 1.2346205711364746, "learning_rate": 5.569568689086602e-07, "loss": 0.6812, "step": 6010 }, { "epoch": 0.8965620105899023, "grad_norm": 1.6349204778671265, "learning_rate": 5.553679760535447e-07, "loss": 0.6789, "step": 6011 }, { "epoch": 0.8967111641434857, "grad_norm": 1.7393641471862793, "learning_rate": 5.537812881083349e-07, "loss": 0.7539, "step": 6012 }, { "epoch": 0.8968603176970691, "grad_norm": 1.601990818977356, "learning_rate": 5.521968054434534e-07, "loss": 0.6261, "step": 6013 }, { "epoch": 0.8970094712506526, "grad_norm": 2.1439266204833984, "learning_rate": 5.506145284288056e-07, "loss": 0.656, "step": 6014 }, { "epoch": 0.8971586248042359, "grad_norm": 1.3619965314865112, "learning_rate": 5.49034457433787e-07, "loss": 0.6963, "step": 6015 }, { "epoch": 0.8973077783578194, "grad_norm": 3.0017006397247314, "learning_rate": 5.474565928272735e-07, "loss": 0.6614, "step": 6016 }, { "epoch": 0.8974569319114027, "grad_norm": 1.7066000699996948, "learning_rate": 5.458809349776306e-07, "loss": 0.6013, "step": 6017 }, { "epoch": 0.8976060854649862, "grad_norm": 6.0885701179504395, "learning_rate": 5.443074842527007e-07, "loss": 0.6111, "step": 6018 }, { "epoch": 0.8977552390185696, "grad_norm": 1.4952656030654907, "learning_rate": 5.427362410198212e-07, "loss": 0.6998, "step": 6019 }, { "epoch": 0.897904392572153, "grad_norm": 2.456369400024414, "learning_rate": 5.411672056458051e-07, "loss": 0.7455, "step": 6020 }, { "epoch": 0.8980535461257364, "grad_norm": 2.3238728046417236, "learning_rate": 5.396003784969551e-07, "loss": 0.6094, "step": 6021 }, { "epoch": 0.8982026996793199, "grad_norm": 1.510000467300415, "learning_rate": 5.380357599390573e-07, "loss": 0.5958, "step": 6022 }, { "epoch": 0.8983518532329032, "grad_norm": 2.2908027172088623, "learning_rate": 5.364733503373842e-07, "loss": 0.7118, "step": 6023 }, { "epoch": 0.8985010067864867, "grad_norm": 1.9371888637542725, "learning_rate": 5.34913150056685e-07, "loss": 0.6497, "step": 6024 }, { "epoch": 0.8986501603400701, "grad_norm": 1.4485286474227905, "learning_rate": 5.333551594612018e-07, "loss": 0.6883, "step": 6025 }, { "epoch": 0.8987993138936535, "grad_norm": 5.648247241973877, "learning_rate": 5.317993789146591e-07, "loss": 0.6801, "step": 6026 }, { "epoch": 0.8989484674472369, "grad_norm": 1.4635722637176514, "learning_rate": 5.302458087802587e-07, "loss": 0.5848, "step": 6027 }, { "epoch": 0.8990976210008204, "grad_norm": 1.3943527936935425, "learning_rate": 5.286944494206969e-07, "loss": 0.7633, "step": 6028 }, { "epoch": 0.8992467745544037, "grad_norm": 1.2426657676696777, "learning_rate": 5.271453011981464e-07, "loss": 0.6107, "step": 6029 }, { "epoch": 0.8993959281079872, "grad_norm": 1.3263585567474365, "learning_rate": 5.255983644742646e-07, "loss": 0.6296, "step": 6030 }, { "epoch": 0.8995450816615705, "grad_norm": 1.9340437650680542, "learning_rate": 5.240536396101948e-07, "loss": 0.7119, "step": 6031 }, { "epoch": 0.899694235215154, "grad_norm": 1.4898762702941895, "learning_rate": 5.225111269665651e-07, "loss": 0.7038, "step": 6032 }, { "epoch": 0.8998433887687374, "grad_norm": 1.7918195724487305, "learning_rate": 5.209708269034797e-07, "loss": 0.7216, "step": 6033 }, { "epoch": 0.8999925423223208, "grad_norm": 0.48667436838150024, "learning_rate": 5.194327397805365e-07, "loss": 0.2605, "step": 6034 }, { "epoch": 0.9001416958759042, "grad_norm": 1.8644510507583618, "learning_rate": 5.178968659568084e-07, "loss": 0.6672, "step": 6035 }, { "epoch": 0.9002908494294877, "grad_norm": 1.4238231182098389, "learning_rate": 5.163632057908574e-07, "loss": 0.7718, "step": 6036 }, { "epoch": 0.900440002983071, "grad_norm": 0.58991938829422, "learning_rate": 5.148317596407259e-07, "loss": 0.2587, "step": 6037 }, { "epoch": 0.9005891565366545, "grad_norm": 2.293846368789673, "learning_rate": 5.133025278639403e-07, "loss": 0.7689, "step": 6038 }, { "epoch": 0.9007383100902379, "grad_norm": 1.3496073484420776, "learning_rate": 5.117755108175071e-07, "loss": 0.6879, "step": 6039 }, { "epoch": 0.9008874636438213, "grad_norm": 1.623340368270874, "learning_rate": 5.102507088579189e-07, "loss": 0.6388, "step": 6040 }, { "epoch": 0.9010366171974047, "grad_norm": 1.9236235618591309, "learning_rate": 5.087281223411522e-07, "loss": 0.6203, "step": 6041 }, { "epoch": 0.9011857707509882, "grad_norm": 1.2097411155700684, "learning_rate": 5.072077516226648e-07, "loss": 0.729, "step": 6042 }, { "epoch": 0.9013349243045715, "grad_norm": 1.3776875734329224, "learning_rate": 5.05689597057395e-07, "loss": 0.7008, "step": 6043 }, { "epoch": 0.901484077858155, "grad_norm": 2.010237216949463, "learning_rate": 5.04173658999767e-07, "loss": 0.6066, "step": 6044 }, { "epoch": 0.9016332314117383, "grad_norm": 1.8291559219360352, "learning_rate": 5.026599378036845e-07, "loss": 0.5375, "step": 6045 }, { "epoch": 0.9017823849653218, "grad_norm": 1.5584975481033325, "learning_rate": 5.011484338225381e-07, "loss": 0.6665, "step": 6046 }, { "epoch": 0.9019315385189052, "grad_norm": 1.4397122859954834, "learning_rate": 4.996391474091966e-07, "loss": 0.6093, "step": 6047 }, { "epoch": 0.9020806920724886, "grad_norm": 1.8401719331741333, "learning_rate": 4.981320789160138e-07, "loss": 0.6584, "step": 6048 }, { "epoch": 0.902229845626072, "grad_norm": 0.4515456557273865, "learning_rate": 4.966272286948215e-07, "loss": 0.2773, "step": 6049 }, { "epoch": 0.9023789991796555, "grad_norm": 1.7883405685424805, "learning_rate": 4.951245970969399e-07, "loss": 0.7439, "step": 6050 }, { "epoch": 0.9025281527332388, "grad_norm": 1.4726003408432007, "learning_rate": 4.936241844731671e-07, "loss": 0.5994, "step": 6051 }, { "epoch": 0.9026773062868223, "grad_norm": 2.5735628604888916, "learning_rate": 4.921259911737831e-07, "loss": 0.7272, "step": 6052 }, { "epoch": 0.9028264598404057, "grad_norm": 2.0836524963378906, "learning_rate": 4.906300175485501e-07, "loss": 0.6364, "step": 6053 }, { "epoch": 0.9029756133939891, "grad_norm": 2.2507495880126953, "learning_rate": 4.891362639467156e-07, "loss": 0.5868, "step": 6054 }, { "epoch": 0.9031247669475725, "grad_norm": 2.4207184314727783, "learning_rate": 4.87644730717004e-07, "loss": 0.6526, "step": 6055 }, { "epoch": 0.903273920501156, "grad_norm": 1.329693078994751, "learning_rate": 4.861554182076222e-07, "loss": 0.6278, "step": 6056 }, { "epoch": 0.9034230740547393, "grad_norm": 1.4157270193099976, "learning_rate": 4.846683267662632e-07, "loss": 0.7282, "step": 6057 }, { "epoch": 0.9035722276083228, "grad_norm": 1.3650728464126587, "learning_rate": 4.83183456740095e-07, "loss": 0.6545, "step": 6058 }, { "epoch": 0.9037213811619061, "grad_norm": 1.889258861541748, "learning_rate": 4.817008084757713e-07, "loss": 0.6443, "step": 6059 }, { "epoch": 0.9038705347154896, "grad_norm": 1.3157083988189697, "learning_rate": 4.802203823194263e-07, "loss": 0.6121, "step": 6060 }, { "epoch": 0.904019688269073, "grad_norm": 3.3181979656219482, "learning_rate": 4.787421786166724e-07, "loss": 0.6238, "step": 6061 }, { "epoch": 0.9041688418226564, "grad_norm": 1.9663805961608887, "learning_rate": 4.77266197712607e-07, "loss": 0.7571, "step": 6062 }, { "epoch": 0.9043179953762398, "grad_norm": 2.107212781906128, "learning_rate": 4.757924399518099e-07, "loss": 0.6695, "step": 6063 }, { "epoch": 0.9044671489298233, "grad_norm": 1.7824286222457886, "learning_rate": 4.743209056783371e-07, "loss": 0.6491, "step": 6064 }, { "epoch": 0.9046163024834066, "grad_norm": 1.5422929525375366, "learning_rate": 4.72851595235726e-07, "loss": 0.6194, "step": 6065 }, { "epoch": 0.9047654560369901, "grad_norm": 1.5332515239715576, "learning_rate": 4.7138450896700105e-07, "loss": 0.6886, "step": 6066 }, { "epoch": 0.9049146095905735, "grad_norm": 1.1573171615600586, "learning_rate": 4.6991964721465944e-07, "loss": 0.6806, "step": 6067 }, { "epoch": 0.9050637631441569, "grad_norm": 1.8881187438964844, "learning_rate": 4.68457010320682e-07, "loss": 0.6374, "step": 6068 }, { "epoch": 0.9052129166977403, "grad_norm": 2.1055092811584473, "learning_rate": 4.6699659862653347e-07, "loss": 0.6973, "step": 6069 }, { "epoch": 0.9053620702513238, "grad_norm": 2.7318248748779297, "learning_rate": 4.6553841247315544e-07, "loss": 0.6783, "step": 6070 }, { "epoch": 0.9055112238049071, "grad_norm": 5.055338382720947, "learning_rate": 4.6408245220096795e-07, "loss": 0.6937, "step": 6071 }, { "epoch": 0.9056603773584906, "grad_norm": 1.5456172227859497, "learning_rate": 4.6262871814987895e-07, "loss": 0.7201, "step": 6072 }, { "epoch": 0.905809530912074, "grad_norm": 1.9176024198532104, "learning_rate": 4.6117721065926824e-07, "loss": 0.6358, "step": 6073 }, { "epoch": 0.9059586844656574, "grad_norm": 1.4600099325180054, "learning_rate": 4.597279300680013e-07, "loss": 0.6759, "step": 6074 }, { "epoch": 0.9061078380192408, "grad_norm": 1.5289583206176758, "learning_rate": 4.58280876714422e-07, "loss": 0.6508, "step": 6075 }, { "epoch": 0.9062569915728242, "grad_norm": 1.5901590585708618, "learning_rate": 4.568360509363545e-07, "loss": 0.6488, "step": 6076 }, { "epoch": 0.9064061451264076, "grad_norm": 3.2263259887695312, "learning_rate": 4.5539345307110125e-07, "loss": 0.6542, "step": 6077 }, { "epoch": 0.9065552986799911, "grad_norm": 0.5865125060081482, "learning_rate": 4.539530834554473e-07, "loss": 0.274, "step": 6078 }, { "epoch": 0.9067044522335744, "grad_norm": 1.8514907360076904, "learning_rate": 4.5251494242565587e-07, "loss": 0.6473, "step": 6079 }, { "epoch": 0.9068536057871579, "grad_norm": 1.460381031036377, "learning_rate": 4.510790303174672e-07, "loss": 0.7295, "step": 6080 }, { "epoch": 0.9070027593407413, "grad_norm": 1.4737919569015503, "learning_rate": 4.496453474661089e-07, "loss": 0.6607, "step": 6081 }, { "epoch": 0.9071519128943247, "grad_norm": 1.0287777185440063, "learning_rate": 4.48213894206283e-07, "loss": 0.7212, "step": 6082 }, { "epoch": 0.9073010664479081, "grad_norm": 1.234641432762146, "learning_rate": 4.4678467087216794e-07, "loss": 0.7581, "step": 6083 }, { "epoch": 0.9074502200014916, "grad_norm": 1.543872594833374, "learning_rate": 4.453576777974278e-07, "loss": 0.7202, "step": 6084 }, { "epoch": 0.9075993735550749, "grad_norm": 1.8012452125549316, "learning_rate": 4.439329153152028e-07, "loss": 0.7076, "step": 6085 }, { "epoch": 0.9077485271086584, "grad_norm": 1.5551234483718872, "learning_rate": 4.425103837581124e-07, "loss": 0.6279, "step": 6086 }, { "epoch": 0.9078976806622417, "grad_norm": 1.5382804870605469, "learning_rate": 4.410900834582543e-07, "loss": 0.666, "step": 6087 }, { "epoch": 0.9080468342158252, "grad_norm": 1.6625351905822754, "learning_rate": 4.3967201474721e-07, "loss": 0.6584, "step": 6088 }, { "epoch": 0.9081959877694086, "grad_norm": 1.572860598564148, "learning_rate": 4.382561779560335e-07, "loss": 0.6553, "step": 6089 }, { "epoch": 0.908345141322992, "grad_norm": 0.5037521719932556, "learning_rate": 4.3684257341526373e-07, "loss": 0.2123, "step": 6090 }, { "epoch": 0.9084942948765754, "grad_norm": 1.3369735479354858, "learning_rate": 4.3543120145491555e-07, "loss": 0.729, "step": 6091 }, { "epoch": 0.9086434484301589, "grad_norm": 1.2362890243530273, "learning_rate": 4.3402206240447997e-07, "loss": 0.6957, "step": 6092 }, { "epoch": 0.9087926019837422, "grad_norm": 4.374029636383057, "learning_rate": 4.326151565929315e-07, "loss": 0.6661, "step": 6093 }, { "epoch": 0.9089417555373257, "grad_norm": 1.3124253749847412, "learning_rate": 4.3121048434872083e-07, "loss": 0.6754, "step": 6094 }, { "epoch": 0.9090909090909091, "grad_norm": 2.724825143814087, "learning_rate": 4.2980804599978e-07, "loss": 0.5772, "step": 6095 }, { "epoch": 0.9092400626444925, "grad_norm": 3.0967578887939453, "learning_rate": 4.284078418735138e-07, "loss": 0.6764, "step": 6096 }, { "epoch": 0.9093892161980759, "grad_norm": 1.8144564628601074, "learning_rate": 4.270098722968108e-07, "loss": 0.6474, "step": 6097 }, { "epoch": 0.9095383697516594, "grad_norm": 1.1990973949432373, "learning_rate": 4.256141375960343e-07, "loss": 0.666, "step": 6098 }, { "epoch": 0.9096875233052427, "grad_norm": 1.4803587198257446, "learning_rate": 4.2422063809702927e-07, "loss": 0.7291, "step": 6099 }, { "epoch": 0.9098366768588262, "grad_norm": 1.9431148767471313, "learning_rate": 4.228293741251166e-07, "loss": 0.7599, "step": 6100 }, { "epoch": 0.9099858304124095, "grad_norm": 1.9943301677703857, "learning_rate": 4.214403460050964e-07, "loss": 0.6776, "step": 6101 }, { "epoch": 0.910134983965993, "grad_norm": 2.3705451488494873, "learning_rate": 4.200535540612449e-07, "loss": 0.6316, "step": 6102 }, { "epoch": 0.9102841375195764, "grad_norm": 1.5676225423812866, "learning_rate": 4.1866899861731867e-07, "loss": 0.7722, "step": 6103 }, { "epoch": 0.9104332910731598, "grad_norm": 2.007136821746826, "learning_rate": 4.1728667999655027e-07, "loss": 0.7719, "step": 6104 }, { "epoch": 0.9105824446267432, "grad_norm": 4.336796283721924, "learning_rate": 4.159065985216515e-07, "loss": 0.5919, "step": 6105 }, { "epoch": 0.9107315981803267, "grad_norm": 1.2188483476638794, "learning_rate": 4.14528754514808e-07, "loss": 0.6186, "step": 6106 }, { "epoch": 0.91088075173391, "grad_norm": 2.601198196411133, "learning_rate": 4.131531482976925e-07, "loss": 0.6568, "step": 6107 }, { "epoch": 0.9110299052874935, "grad_norm": 1.2245081663131714, "learning_rate": 4.117797801914447e-07, "loss": 0.6711, "step": 6108 }, { "epoch": 0.9111790588410769, "grad_norm": 1.726948857307434, "learning_rate": 4.104086505166871e-07, "loss": 0.6826, "step": 6109 }, { "epoch": 0.9113282123946603, "grad_norm": 1.839072585105896, "learning_rate": 4.0903975959352026e-07, "loss": 0.5935, "step": 6110 }, { "epoch": 0.9114773659482437, "grad_norm": 1.4215044975280762, "learning_rate": 4.0767310774151746e-07, "loss": 0.678, "step": 6111 }, { "epoch": 0.9116265195018272, "grad_norm": 1.3578723669052124, "learning_rate": 4.063086952797346e-07, "loss": 0.5822, "step": 6112 }, { "epoch": 0.9117756730554105, "grad_norm": 1.2909653186798096, "learning_rate": 4.049465225267013e-07, "loss": 0.6695, "step": 6113 }, { "epoch": 0.911924826608994, "grad_norm": 2.511538028717041, "learning_rate": 4.0358658980042765e-07, "loss": 0.5767, "step": 6114 }, { "epoch": 0.9120739801625773, "grad_norm": 1.6422334909439087, "learning_rate": 4.022288974183941e-07, "loss": 0.6474, "step": 6115 }, { "epoch": 0.9122231337161608, "grad_norm": 1.4820259809494019, "learning_rate": 4.0087344569756934e-07, "loss": 0.7033, "step": 6116 }, { "epoch": 0.9123722872697442, "grad_norm": 2.1184449195861816, "learning_rate": 3.99520234954387e-07, "loss": 0.7254, "step": 6117 }, { "epoch": 0.9125214408233276, "grad_norm": 2.0581369400024414, "learning_rate": 3.981692655047642e-07, "loss": 0.6581, "step": 6118 }, { "epoch": 0.912670594376911, "grad_norm": 0.5030192136764526, "learning_rate": 3.968205376640932e-07, "loss": 0.2335, "step": 6119 }, { "epoch": 0.9128197479304945, "grad_norm": 1.5204672813415527, "learning_rate": 3.954740517472455e-07, "loss": 0.6828, "step": 6120 }, { "epoch": 0.9129689014840778, "grad_norm": 1.3610575199127197, "learning_rate": 3.94129808068564e-07, "loss": 0.729, "step": 6121 }, { "epoch": 0.9131180550376613, "grad_norm": 2.239823579788208, "learning_rate": 3.9278780694187114e-07, "loss": 0.6821, "step": 6122 }, { "epoch": 0.9132672085912447, "grad_norm": 3.359269142150879, "learning_rate": 3.9144804868046724e-07, "loss": 0.6316, "step": 6123 }, { "epoch": 0.9134163621448281, "grad_norm": 2.102123737335205, "learning_rate": 3.901105335971267e-07, "loss": 0.691, "step": 6124 }, { "epoch": 0.9135655156984115, "grad_norm": 0.540596604347229, "learning_rate": 3.887752620041008e-07, "loss": 0.2704, "step": 6125 }, { "epoch": 0.913714669251995, "grad_norm": 2.1417384147644043, "learning_rate": 3.8744223421311787e-07, "loss": 0.6249, "step": 6126 }, { "epoch": 0.9138638228055783, "grad_norm": 1.2872520685195923, "learning_rate": 3.8611145053538134e-07, "loss": 0.705, "step": 6127 }, { "epoch": 0.9140129763591618, "grad_norm": 1.2518770694732666, "learning_rate": 3.8478291128157155e-07, "loss": 0.6695, "step": 6128 }, { "epoch": 0.9141621299127451, "grad_norm": 1.4646977186203003, "learning_rate": 3.8345661676184475e-07, "loss": 0.6082, "step": 6129 }, { "epoch": 0.9143112834663286, "grad_norm": 1.3697909116744995, "learning_rate": 3.8213256728583115e-07, "loss": 0.6051, "step": 6130 }, { "epoch": 0.914460437019912, "grad_norm": 1.338006854057312, "learning_rate": 3.808107631626401e-07, "loss": 0.7141, "step": 6131 }, { "epoch": 0.9146095905734954, "grad_norm": 1.4798094034194946, "learning_rate": 3.7949120470085586e-07, "loss": 0.6789, "step": 6132 }, { "epoch": 0.9147587441270788, "grad_norm": 1.7467541694641113, "learning_rate": 3.781738922085354e-07, "loss": 0.634, "step": 6133 }, { "epoch": 0.9149078976806623, "grad_norm": 1.4721684455871582, "learning_rate": 3.76858825993216e-07, "loss": 0.7321, "step": 6134 }, { "epoch": 0.9150570512342456, "grad_norm": 1.971047043800354, "learning_rate": 3.7554600636190876e-07, "loss": 0.6114, "step": 6135 }, { "epoch": 0.9152062047878291, "grad_norm": 1.5349056720733643, "learning_rate": 3.742354336210974e-07, "loss": 0.5947, "step": 6136 }, { "epoch": 0.9153553583414125, "grad_norm": 1.083014726638794, "learning_rate": 3.7292710807674493e-07, "loss": 0.7768, "step": 6137 }, { "epoch": 0.9155045118949959, "grad_norm": 1.5190051794052124, "learning_rate": 3.716210300342893e-07, "loss": 0.6826, "step": 6138 }, { "epoch": 0.9156536654485793, "grad_norm": 1.7565560340881348, "learning_rate": 3.70317199798641e-07, "loss": 0.785, "step": 6139 }, { "epoch": 0.9158028190021628, "grad_norm": 1.9309207201004028, "learning_rate": 3.690156176741877e-07, "loss": 0.6412, "step": 6140 }, { "epoch": 0.9159519725557461, "grad_norm": 1.4598734378814697, "learning_rate": 3.6771628396479295e-07, "loss": 0.7065, "step": 6141 }, { "epoch": 0.9161011261093296, "grad_norm": 1.3233200311660767, "learning_rate": 3.664191989737942e-07, "loss": 0.6622, "step": 6142 }, { "epoch": 0.916250279662913, "grad_norm": 2.9834249019622803, "learning_rate": 3.651243630040047e-07, "loss": 0.7261, "step": 6143 }, { "epoch": 0.9163994332164964, "grad_norm": 1.272229552268982, "learning_rate": 3.638317763577126e-07, "loss": 0.632, "step": 6144 }, { "epoch": 0.9165485867700798, "grad_norm": 1.4132258892059326, "learning_rate": 3.6254143933667886e-07, "loss": 0.7041, "step": 6145 }, { "epoch": 0.9166977403236632, "grad_norm": 0.53647381067276, "learning_rate": 3.6125335224214133e-07, "loss": 0.2459, "step": 6146 }, { "epoch": 0.9168468938772466, "grad_norm": 1.3407455682754517, "learning_rate": 3.5996751537481277e-07, "loss": 0.7209, "step": 6147 }, { "epoch": 0.9169960474308301, "grad_norm": 2.3037264347076416, "learning_rate": 3.586839290348809e-07, "loss": 0.7078, "step": 6148 }, { "epoch": 0.9171452009844134, "grad_norm": 1.6498109102249146, "learning_rate": 3.57402593522006e-07, "loss": 0.6658, "step": 6149 }, { "epoch": 0.9172943545379969, "grad_norm": 2.8767993450164795, "learning_rate": 3.561235091353243e-07, "loss": 0.6564, "step": 6150 }, { "epoch": 0.9174435080915803, "grad_norm": 2.2882189750671387, "learning_rate": 3.548466761734459e-07, "loss": 0.6557, "step": 6151 }, { "epoch": 0.9175926616451637, "grad_norm": 1.6806575059890747, "learning_rate": 3.535720949344557e-07, "loss": 0.6676, "step": 6152 }, { "epoch": 0.9177418151987471, "grad_norm": 0.5513409972190857, "learning_rate": 3.522997657159133e-07, "loss": 0.2642, "step": 6153 }, { "epoch": 0.9178909687523306, "grad_norm": 1.6598119735717773, "learning_rate": 3.5102968881485344e-07, "loss": 0.6225, "step": 6154 }, { "epoch": 0.9180401223059139, "grad_norm": 1.2564301490783691, "learning_rate": 3.4976186452778116e-07, "loss": 0.6385, "step": 6155 }, { "epoch": 0.9181892758594974, "grad_norm": 1.533797025680542, "learning_rate": 3.4849629315067856e-07, "loss": 0.6866, "step": 6156 }, { "epoch": 0.9183384294130807, "grad_norm": 1.41595458984375, "learning_rate": 3.4723297497900487e-07, "loss": 0.5958, "step": 6157 }, { "epoch": 0.9184875829666642, "grad_norm": 2.022536277770996, "learning_rate": 3.459719103076831e-07, "loss": 0.7212, "step": 6158 }, { "epoch": 0.9186367365202476, "grad_norm": 1.5848757028579712, "learning_rate": 3.447130994311232e-07, "loss": 0.691, "step": 6159 }, { "epoch": 0.918785890073831, "grad_norm": 2.5838544368743896, "learning_rate": 3.4345654264320017e-07, "loss": 0.6022, "step": 6160 }, { "epoch": 0.9189350436274144, "grad_norm": 1.3261101245880127, "learning_rate": 3.422022402372649e-07, "loss": 0.7882, "step": 6161 }, { "epoch": 0.9190841971809979, "grad_norm": 2.5150368213653564, "learning_rate": 3.4095019250614316e-07, "loss": 0.721, "step": 6162 }, { "epoch": 0.9192333507345812, "grad_norm": 2.276285409927368, "learning_rate": 3.397003997421344e-07, "loss": 0.7539, "step": 6163 }, { "epoch": 0.9193825042881647, "grad_norm": 1.4259929656982422, "learning_rate": 3.3845286223700757e-07, "loss": 0.5592, "step": 6164 }, { "epoch": 0.9195316578417481, "grad_norm": 1.6336089372634888, "learning_rate": 3.372075802820107e-07, "loss": 0.6356, "step": 6165 }, { "epoch": 0.9196808113953315, "grad_norm": 3.098937511444092, "learning_rate": 3.3596455416786245e-07, "loss": 0.636, "step": 6166 }, { "epoch": 0.9198299649489149, "grad_norm": 1.5338687896728516, "learning_rate": 3.34723784184755e-07, "loss": 0.6789, "step": 6167 }, { "epoch": 0.9199791185024984, "grad_norm": 1.4153518676757812, "learning_rate": 3.334852706223546e-07, "loss": 0.6724, "step": 6168 }, { "epoch": 0.9201282720560817, "grad_norm": 1.8375704288482666, "learning_rate": 3.322490137697998e-07, "loss": 0.6567, "step": 6169 }, { "epoch": 0.9202774256096652, "grad_norm": 1.3284140825271606, "learning_rate": 3.310150139157031e-07, "loss": 0.6545, "step": 6170 }, { "epoch": 0.9204265791632485, "grad_norm": 1.830802321434021, "learning_rate": 3.297832713481486e-07, "loss": 0.7607, "step": 6171 }, { "epoch": 0.920575732716832, "grad_norm": 1.4576609134674072, "learning_rate": 3.2855378635469503e-07, "loss": 0.5928, "step": 6172 }, { "epoch": 0.9207248862704154, "grad_norm": 2.0201878547668457, "learning_rate": 3.273265592223751e-07, "loss": 0.6001, "step": 6173 }, { "epoch": 0.9208740398239988, "grad_norm": 2.6783528327941895, "learning_rate": 3.261015902376896e-07, "loss": 0.7714, "step": 6174 }, { "epoch": 0.9210231933775822, "grad_norm": 1.819715976715088, "learning_rate": 3.2487887968661866e-07, "loss": 0.8101, "step": 6175 }, { "epoch": 0.9211723469311657, "grad_norm": 1.388832449913025, "learning_rate": 3.2365842785460954e-07, "loss": 0.6965, "step": 6176 }, { "epoch": 0.921321500484749, "grad_norm": 1.4392623901367188, "learning_rate": 3.2244023502658537e-07, "loss": 0.7279, "step": 6177 }, { "epoch": 0.9214706540383325, "grad_norm": 1.8015687465667725, "learning_rate": 3.2122430148694203e-07, "loss": 0.6992, "step": 6178 }, { "epoch": 0.9216198075919159, "grad_norm": 1.364176630973816, "learning_rate": 3.2001062751954583e-07, "loss": 0.6585, "step": 6179 }, { "epoch": 0.9217689611454993, "grad_norm": 1.3597105741500854, "learning_rate": 3.1879921340773776e-07, "loss": 0.7171, "step": 6180 }, { "epoch": 0.9219181146990827, "grad_norm": 2.1056575775146484, "learning_rate": 3.175900594343284e-07, "loss": 0.6514, "step": 6181 }, { "epoch": 0.9220672682526662, "grad_norm": 1.3774750232696533, "learning_rate": 3.163831658816052e-07, "loss": 0.6921, "step": 6182 }, { "epoch": 0.9222164218062495, "grad_norm": 23.920351028442383, "learning_rate": 3.151785330313217e-07, "loss": 0.6422, "step": 6183 }, { "epoch": 0.922365575359833, "grad_norm": 1.9211349487304688, "learning_rate": 3.1397616116470964e-07, "loss": 0.6939, "step": 6184 }, { "epoch": 0.9225147289134163, "grad_norm": 1.752777099609375, "learning_rate": 3.1277605056246994e-07, "loss": 0.6404, "step": 6185 }, { "epoch": 0.9226638824669998, "grad_norm": 1.371679425239563, "learning_rate": 3.1157820150477634e-07, "loss": 0.6778, "step": 6186 }, { "epoch": 0.9228130360205832, "grad_norm": 1.6286433935165405, "learning_rate": 3.10382614271274e-07, "loss": 0.7049, "step": 6187 }, { "epoch": 0.9229621895741666, "grad_norm": 1.876528024673462, "learning_rate": 3.091892891410808e-07, "loss": 0.6389, "step": 6188 }, { "epoch": 0.92311134312775, "grad_norm": 1.3685739040374756, "learning_rate": 3.079982263927861e-07, "loss": 0.6925, "step": 6189 }, { "epoch": 0.9232604966813335, "grad_norm": 1.47667396068573, "learning_rate": 3.0680942630444965e-07, "loss": 0.629, "step": 6190 }, { "epoch": 0.9234096502349168, "grad_norm": 1.4253754615783691, "learning_rate": 3.0562288915360837e-07, "loss": 0.7212, "step": 6191 }, { "epoch": 0.9235588037885003, "grad_norm": 1.3477240800857544, "learning_rate": 3.0443861521726183e-07, "loss": 0.7385, "step": 6192 }, { "epoch": 0.9237079573420837, "grad_norm": 1.7665998935699463, "learning_rate": 3.0325660477188767e-07, "loss": 0.6279, "step": 6193 }, { "epoch": 0.9238571108956671, "grad_norm": 1.8372712135314941, "learning_rate": 3.020768580934386e-07, "loss": 0.6724, "step": 6194 }, { "epoch": 0.9240062644492505, "grad_norm": 1.5444672107696533, "learning_rate": 3.008993754573286e-07, "loss": 0.7092, "step": 6195 }, { "epoch": 0.924155418002834, "grad_norm": 1.4168999195098877, "learning_rate": 2.9972415713845016e-07, "loss": 0.6576, "step": 6196 }, { "epoch": 0.9243045715564173, "grad_norm": 2.5250678062438965, "learning_rate": 2.9855120341116706e-07, "loss": 0.6668, "step": 6197 }, { "epoch": 0.9244537251100008, "grad_norm": 1.4353126287460327, "learning_rate": 2.973805145493103e-07, "loss": 0.7527, "step": 6198 }, { "epoch": 0.9246028786635841, "grad_norm": 1.503208875656128, "learning_rate": 2.962120908261856e-07, "loss": 0.6622, "step": 6199 }, { "epoch": 0.9247520322171676, "grad_norm": 1.3931010961532593, "learning_rate": 2.950459325145705e-07, "loss": 0.6935, "step": 6200 }, { "epoch": 0.924901185770751, "grad_norm": 2.136873960494995, "learning_rate": 2.9388203988671037e-07, "loss": 0.6131, "step": 6201 }, { "epoch": 0.9250503393243344, "grad_norm": 1.4830045700073242, "learning_rate": 2.9272041321432353e-07, "loss": 0.6165, "step": 6202 }, { "epoch": 0.9251994928779178, "grad_norm": 2.2134363651275635, "learning_rate": 2.915610527685997e-07, "loss": 0.6848, "step": 6203 }, { "epoch": 0.9253486464315013, "grad_norm": 1.2033382654190063, "learning_rate": 2.904039588202001e-07, "loss": 0.6605, "step": 6204 }, { "epoch": 0.9254977999850846, "grad_norm": 2.4825146198272705, "learning_rate": 2.892491316392543e-07, "loss": 0.6984, "step": 6205 }, { "epoch": 0.9256469535386681, "grad_norm": 1.3591711521148682, "learning_rate": 2.880965714953643e-07, "loss": 0.6657, "step": 6206 }, { "epoch": 0.9257961070922515, "grad_norm": 1.4931035041809082, "learning_rate": 2.869462786576027e-07, "loss": 0.6868, "step": 6207 }, { "epoch": 0.9259452606458349, "grad_norm": 1.4548834562301636, "learning_rate": 2.857982533945125e-07, "loss": 0.6456, "step": 6208 }, { "epoch": 0.9260944141994183, "grad_norm": 1.3545396327972412, "learning_rate": 2.8465249597410816e-07, "loss": 0.6706, "step": 6209 }, { "epoch": 0.9262435677530018, "grad_norm": 1.3444935083389282, "learning_rate": 2.835090066638746e-07, "loss": 0.6172, "step": 6210 }, { "epoch": 0.9263927213065851, "grad_norm": 1.8213642835617065, "learning_rate": 2.823677857307638e-07, "loss": 0.6454, "step": 6211 }, { "epoch": 0.9265418748601686, "grad_norm": 1.9161300659179688, "learning_rate": 2.812288334412039e-07, "loss": 0.5953, "step": 6212 }, { "epoch": 0.926691028413752, "grad_norm": 1.2246432304382324, "learning_rate": 2.80092150061092e-07, "loss": 0.7449, "step": 6213 }, { "epoch": 0.9268401819673354, "grad_norm": 1.4413717985153198, "learning_rate": 2.7895773585579047e-07, "loss": 0.6554, "step": 6214 }, { "epoch": 0.9269893355209188, "grad_norm": 1.212165117263794, "learning_rate": 2.778255910901362e-07, "loss": 0.6992, "step": 6215 }, { "epoch": 0.9271384890745022, "grad_norm": 2.0138609409332275, "learning_rate": 2.766957160284389e-07, "loss": 0.5662, "step": 6216 }, { "epoch": 0.9272876426280856, "grad_norm": 1.8885475397109985, "learning_rate": 2.75568110934471e-07, "loss": 0.5512, "step": 6217 }, { "epoch": 0.9274367961816691, "grad_norm": 2.3558928966522217, "learning_rate": 2.744427760714818e-07, "loss": 0.6656, "step": 6218 }, { "epoch": 0.9275859497352524, "grad_norm": 1.393354058265686, "learning_rate": 2.7331971170218684e-07, "loss": 0.7297, "step": 6219 }, { "epoch": 0.9277351032888359, "grad_norm": 2.063938856124878, "learning_rate": 2.72198918088773e-07, "loss": 0.6777, "step": 6220 }, { "epoch": 0.9278842568424193, "grad_norm": 1.242203950881958, "learning_rate": 2.7108039549289754e-07, "loss": 0.7205, "step": 6221 }, { "epoch": 0.9280334103960027, "grad_norm": 1.845240831375122, "learning_rate": 2.699641441756862e-07, "loss": 0.6524, "step": 6222 }, { "epoch": 0.9281825639495861, "grad_norm": 1.6758992671966553, "learning_rate": 2.688501643977337e-07, "loss": 0.6843, "step": 6223 }, { "epoch": 0.9283317175031696, "grad_norm": 1.7260775566101074, "learning_rate": 2.6773845641910655e-07, "loss": 0.6103, "step": 6224 }, { "epoch": 0.9284808710567529, "grad_norm": 1.4968184232711792, "learning_rate": 2.6662902049934047e-07, "loss": 0.7643, "step": 6225 }, { "epoch": 0.9286300246103364, "grad_norm": 1.7083029747009277, "learning_rate": 2.655218568974416e-07, "loss": 0.7216, "step": 6226 }, { "epoch": 0.9287791781639197, "grad_norm": 1.5962998867034912, "learning_rate": 2.64416965871882e-07, "loss": 0.6894, "step": 6227 }, { "epoch": 0.9289283317175032, "grad_norm": 1.500799536705017, "learning_rate": 2.633143476806066e-07, "loss": 0.6679, "step": 6228 }, { "epoch": 0.9290774852710866, "grad_norm": 1.7958539724349976, "learning_rate": 2.6221400258102826e-07, "loss": 0.6559, "step": 6229 }, { "epoch": 0.92922663882467, "grad_norm": 1.2219370603561401, "learning_rate": 2.611159308300304e-07, "loss": 0.7212, "step": 6230 }, { "epoch": 0.9293757923782534, "grad_norm": 1.5392429828643799, "learning_rate": 2.600201326839646e-07, "loss": 0.575, "step": 6231 }, { "epoch": 0.9295249459318369, "grad_norm": 1.4384740591049194, "learning_rate": 2.58926608398653e-07, "loss": 0.6573, "step": 6232 }, { "epoch": 0.9296740994854202, "grad_norm": 1.7299139499664307, "learning_rate": 2.5783535822938354e-07, "loss": 0.6175, "step": 6233 }, { "epoch": 0.9298232530390037, "grad_norm": 1.1252857446670532, "learning_rate": 2.56746382430918e-07, "loss": 0.6797, "step": 6234 }, { "epoch": 0.9299724065925871, "grad_norm": 1.4885915517807007, "learning_rate": 2.55659681257483e-07, "loss": 0.6585, "step": 6235 }, { "epoch": 0.9301215601461705, "grad_norm": 1.7080068588256836, "learning_rate": 2.545752549627767e-07, "loss": 0.6001, "step": 6236 }, { "epoch": 0.9302707136997539, "grad_norm": 2.500730514526367, "learning_rate": 2.534931037999633e-07, "loss": 0.6531, "step": 6237 }, { "epoch": 0.9304198672533374, "grad_norm": 2.5992281436920166, "learning_rate": 2.524132280216818e-07, "loss": 0.665, "step": 6238 }, { "epoch": 0.9305690208069207, "grad_norm": 1.740548014640808, "learning_rate": 2.5133562788003276e-07, "loss": 0.6966, "step": 6239 }, { "epoch": 0.9307181743605042, "grad_norm": 1.3115116357803345, "learning_rate": 2.5026030362659157e-07, "loss": 0.6532, "step": 6240 }, { "epoch": 0.9308673279140876, "grad_norm": 1.861311912536621, "learning_rate": 2.491872555123975e-07, "loss": 0.6298, "step": 6241 }, { "epoch": 0.931016481467671, "grad_norm": 1.2872360944747925, "learning_rate": 2.4811648378796127e-07, "loss": 0.5989, "step": 6242 }, { "epoch": 0.9311656350212544, "grad_norm": 1.7253528833389282, "learning_rate": 2.4704798870326174e-07, "loss": 0.7062, "step": 6243 }, { "epoch": 0.9313147885748378, "grad_norm": 1.426487684249878, "learning_rate": 2.4598177050774495e-07, "loss": 0.6616, "step": 6244 }, { "epoch": 0.9314639421284212, "grad_norm": 1.8540977239608765, "learning_rate": 2.449178294503274e-07, "loss": 0.6965, "step": 6245 }, { "epoch": 0.9316130956820047, "grad_norm": 1.3400565385818481, "learning_rate": 2.438561657793914e-07, "loss": 0.7675, "step": 6246 }, { "epoch": 0.931762249235588, "grad_norm": 1.3736696243286133, "learning_rate": 2.4279677974279214e-07, "loss": 0.7084, "step": 6247 }, { "epoch": 0.9319114027891715, "grad_norm": 1.366308569908142, "learning_rate": 2.417396715878462e-07, "loss": 0.6536, "step": 6248 }, { "epoch": 0.9320605563427549, "grad_norm": 1.5889034271240234, "learning_rate": 2.40684841561345e-07, "loss": 0.6291, "step": 6249 }, { "epoch": 0.9322097098963383, "grad_norm": 1.456682801246643, "learning_rate": 2.396322899095449e-07, "loss": 0.6467, "step": 6250 }, { "epoch": 0.9323588634499217, "grad_norm": 1.4907375574111938, "learning_rate": 2.3858201687817164e-07, "loss": 0.6182, "step": 6251 }, { "epoch": 0.9325080170035052, "grad_norm": 0.5325120091438293, "learning_rate": 2.3753402271241566e-07, "loss": 0.2857, "step": 6252 }, { "epoch": 0.9326571705570885, "grad_norm": 1.4534746408462524, "learning_rate": 2.3648830765693908e-07, "loss": 0.6178, "step": 6253 }, { "epoch": 0.932806324110672, "grad_norm": 3.691328763961792, "learning_rate": 2.3544487195587108e-07, "loss": 0.6523, "step": 6254 }, { "epoch": 0.9329554776642554, "grad_norm": 2.347562313079834, "learning_rate": 2.3440371585280896e-07, "loss": 0.7352, "step": 6255 }, { "epoch": 0.9331046312178388, "grad_norm": 2.0671026706695557, "learning_rate": 2.3336483959081612e-07, "loss": 0.7222, "step": 6256 }, { "epoch": 0.9332537847714222, "grad_norm": 1.2882661819458008, "learning_rate": 2.3232824341242743e-07, "loss": 0.6077, "step": 6257 }, { "epoch": 0.9334029383250055, "grad_norm": 3.026398181915283, "learning_rate": 2.312939275596393e-07, "loss": 0.7046, "step": 6258 }, { "epoch": 0.933552091878589, "grad_norm": 1.4515855312347412, "learning_rate": 2.3026189227392083e-07, "loss": 0.7009, "step": 6259 }, { "epoch": 0.9337012454321724, "grad_norm": 2.053558111190796, "learning_rate": 2.2923213779620924e-07, "loss": 0.667, "step": 6260 }, { "epoch": 0.9338503989857558, "grad_norm": 0.495357483625412, "learning_rate": 2.2820466436690447e-07, "loss": 0.2187, "step": 6261 }, { "epoch": 0.9339995525393392, "grad_norm": 2.332462787628174, "learning_rate": 2.27179472225878e-07, "loss": 0.7249, "step": 6262 }, { "epoch": 0.9341487060929227, "grad_norm": 1.7472535371780396, "learning_rate": 2.2615656161246613e-07, "loss": 0.6747, "step": 6263 }, { "epoch": 0.934297859646506, "grad_norm": 3.7739763259887695, "learning_rate": 2.2513593276547673e-07, "loss": 0.6171, "step": 6264 }, { "epoch": 0.9344470132000895, "grad_norm": 1.55128812789917, "learning_rate": 2.2411758592318033e-07, "loss": 0.6256, "step": 6265 }, { "epoch": 0.9345961667536729, "grad_norm": 2.040235757827759, "learning_rate": 2.2310152132331676e-07, "loss": 0.6392, "step": 6266 }, { "epoch": 0.9347453203072563, "grad_norm": 1.2479431629180908, "learning_rate": 2.220877392030929e-07, "loss": 0.7293, "step": 6267 }, { "epoch": 0.9348944738608397, "grad_norm": 2.269524574279785, "learning_rate": 2.210762397991828e-07, "loss": 0.6358, "step": 6268 }, { "epoch": 0.9350436274144232, "grad_norm": 1.962674617767334, "learning_rate": 2.2006702334772755e-07, "loss": 0.6825, "step": 6269 }, { "epoch": 0.9351927809680065, "grad_norm": 2.0116159915924072, "learning_rate": 2.1906009008433427e-07, "loss": 0.6788, "step": 6270 }, { "epoch": 0.93534193452159, "grad_norm": 1.3842992782592773, "learning_rate": 2.1805544024407933e-07, "loss": 0.7495, "step": 6271 }, { "epoch": 0.9354910880751733, "grad_norm": 1.3363319635391235, "learning_rate": 2.17053074061504e-07, "loss": 0.6674, "step": 6272 }, { "epoch": 0.9356402416287568, "grad_norm": 3.064415454864502, "learning_rate": 2.1605299177061668e-07, "loss": 0.6807, "step": 6273 }, { "epoch": 0.9357893951823402, "grad_norm": 1.4797190427780151, "learning_rate": 2.150551936048928e-07, "loss": 0.7362, "step": 6274 }, { "epoch": 0.9359385487359236, "grad_norm": 1.6862696409225464, "learning_rate": 2.1405967979727715e-07, "loss": 0.6724, "step": 6275 }, { "epoch": 0.936087702289507, "grad_norm": 1.5070616006851196, "learning_rate": 2.1306645058017607e-07, "loss": 0.6528, "step": 6276 }, { "epoch": 0.9362368558430905, "grad_norm": 0.5182616710662842, "learning_rate": 2.1207550618546624e-07, "loss": 0.2327, "step": 6277 }, { "epoch": 0.9363860093966738, "grad_norm": 1.5072764158248901, "learning_rate": 2.1108684684448932e-07, "loss": 0.6734, "step": 6278 }, { "epoch": 0.9365351629502573, "grad_norm": 2.3223607540130615, "learning_rate": 2.1010047278805735e-07, "loss": 0.6027, "step": 6279 }, { "epoch": 0.9366843165038407, "grad_norm": 1.7034162282943726, "learning_rate": 2.0911638424644055e-07, "loss": 0.5582, "step": 6280 }, { "epoch": 0.9368334700574241, "grad_norm": 1.79264235496521, "learning_rate": 2.0813458144938514e-07, "loss": 0.7013, "step": 6281 }, { "epoch": 0.9369826236110075, "grad_norm": 0.490109920501709, "learning_rate": 2.0715506462609557e-07, "loss": 0.2502, "step": 6282 }, { "epoch": 0.937131777164591, "grad_norm": 1.8065193891525269, "learning_rate": 2.0617783400525003e-07, "loss": 0.6879, "step": 6283 }, { "epoch": 0.9372809307181743, "grad_norm": 1.5465095043182373, "learning_rate": 2.0520288981498605e-07, "loss": 0.7047, "step": 6284 }, { "epoch": 0.9374300842717578, "grad_norm": 1.1532281637191772, "learning_rate": 2.0423023228291373e-07, "loss": 0.7265, "step": 6285 }, { "epoch": 0.9375792378253411, "grad_norm": 1.643582820892334, "learning_rate": 2.0325986163610367e-07, "loss": 0.6898, "step": 6286 }, { "epoch": 0.9377283913789246, "grad_norm": 1.218015193939209, "learning_rate": 2.022917781010958e-07, "loss": 0.6654, "step": 6287 }, { "epoch": 0.937877544932508, "grad_norm": 1.1237905025482178, "learning_rate": 2.0132598190389596e-07, "loss": 0.6725, "step": 6288 }, { "epoch": 0.9380266984860914, "grad_norm": 1.3866294622421265, "learning_rate": 2.0036247326997383e-07, "loss": 0.5682, "step": 6289 }, { "epoch": 0.9381758520396748, "grad_norm": 3.765944242477417, "learning_rate": 1.9940125242426834e-07, "loss": 0.6616, "step": 6290 }, { "epoch": 0.9383250055932583, "grad_norm": 1.7662010192871094, "learning_rate": 1.9844231959118444e-07, "loss": 0.6096, "step": 6291 }, { "epoch": 0.9384741591468416, "grad_norm": 0.5096643567085266, "learning_rate": 1.9748567499458639e-07, "loss": 0.2336, "step": 6292 }, { "epoch": 0.9386233127004251, "grad_norm": 2.1012871265411377, "learning_rate": 1.9653131885781328e-07, "loss": 0.6118, "step": 6293 }, { "epoch": 0.9387724662540085, "grad_norm": 1.4371243715286255, "learning_rate": 1.9557925140366363e-07, "loss": 0.6805, "step": 6294 }, { "epoch": 0.9389216198075919, "grad_norm": 1.5699506998062134, "learning_rate": 1.9462947285440405e-07, "loss": 0.5763, "step": 6295 }, { "epoch": 0.9390707733611753, "grad_norm": 1.9237325191497803, "learning_rate": 1.9368198343176604e-07, "loss": 0.685, "step": 6296 }, { "epoch": 0.9392199269147588, "grad_norm": 1.497771143913269, "learning_rate": 1.9273678335694712e-07, "loss": 0.7134, "step": 6297 }, { "epoch": 0.9393690804683421, "grad_norm": 1.482182264328003, "learning_rate": 1.917938728506108e-07, "loss": 0.6459, "step": 6298 }, { "epoch": 0.9395182340219256, "grad_norm": 1.327410340309143, "learning_rate": 1.9085325213288542e-07, "loss": 0.6319, "step": 6299 }, { "epoch": 0.9396673875755089, "grad_norm": 1.3028209209442139, "learning_rate": 1.8991492142336644e-07, "loss": 0.6573, "step": 6300 }, { "epoch": 0.9398165411290924, "grad_norm": 2.28493595123291, "learning_rate": 1.8897888094110972e-07, "loss": 0.664, "step": 6301 }, { "epoch": 0.9399656946826758, "grad_norm": 1.988660454750061, "learning_rate": 1.880451309046427e-07, "loss": 0.6945, "step": 6302 }, { "epoch": 0.9401148482362592, "grad_norm": 1.3468215465545654, "learning_rate": 1.8711367153195436e-07, "loss": 0.678, "step": 6303 }, { "epoch": 0.9402640017898426, "grad_norm": 1.6059609651565552, "learning_rate": 1.8618450304050074e-07, "loss": 0.6515, "step": 6304 }, { "epoch": 0.9404131553434261, "grad_norm": 1.6571879386901855, "learning_rate": 1.852576256472005e-07, "loss": 0.6882, "step": 6305 }, { "epoch": 0.9405623088970094, "grad_norm": 5.086423873901367, "learning_rate": 1.8433303956843952e-07, "loss": 0.5816, "step": 6306 }, { "epoch": 0.9407114624505929, "grad_norm": 1.5397593975067139, "learning_rate": 1.834107450200695e-07, "loss": 0.6283, "step": 6307 }, { "epoch": 0.9408606160041763, "grad_norm": 2.6844429969787598, "learning_rate": 1.8249074221740494e-07, "loss": 0.619, "step": 6308 }, { "epoch": 0.9410097695577597, "grad_norm": 1.4468824863433838, "learning_rate": 1.815730313752273e-07, "loss": 0.7229, "step": 6309 }, { "epoch": 0.9411589231113431, "grad_norm": 1.3601130247116089, "learning_rate": 1.8065761270778303e-07, "loss": 0.692, "step": 6310 }, { "epoch": 0.9413080766649266, "grad_norm": 1.2362717390060425, "learning_rate": 1.7974448642877894e-07, "loss": 0.7221, "step": 6311 }, { "epoch": 0.9414572302185099, "grad_norm": 1.7978365421295166, "learning_rate": 1.788336527513934e-07, "loss": 0.6324, "step": 6312 }, { "epoch": 0.9416063837720934, "grad_norm": 1.6291614770889282, "learning_rate": 1.7792511188826522e-07, "loss": 0.7285, "step": 6313 }, { "epoch": 0.9417555373256767, "grad_norm": 1.4611629247665405, "learning_rate": 1.7701886405149914e-07, "loss": 0.6718, "step": 6314 }, { "epoch": 0.9419046908792602, "grad_norm": 1.3948941230773926, "learning_rate": 1.7611490945266375e-07, "loss": 0.7274, "step": 6315 }, { "epoch": 0.9420538444328436, "grad_norm": 1.3208879232406616, "learning_rate": 1.7521324830279463e-07, "loss": 0.7434, "step": 6316 }, { "epoch": 0.942202997986427, "grad_norm": 1.377601146697998, "learning_rate": 1.7431388081238898e-07, "loss": 0.5968, "step": 6317 }, { "epoch": 0.9423521515400104, "grad_norm": 1.3495733737945557, "learning_rate": 1.7341680719141106e-07, "loss": 0.6827, "step": 6318 }, { "epoch": 0.9425013050935939, "grad_norm": 1.7417136430740356, "learning_rate": 1.7252202764928893e-07, "loss": 0.618, "step": 6319 }, { "epoch": 0.9426504586471772, "grad_norm": 1.720023274421692, "learning_rate": 1.7162954239491213e-07, "loss": 0.5904, "step": 6320 }, { "epoch": 0.9427996122007607, "grad_norm": 1.6695897579193115, "learning_rate": 1.7073935163663847e-07, "loss": 0.6773, "step": 6321 }, { "epoch": 0.942948765754344, "grad_norm": 1.6047072410583496, "learning_rate": 1.6985145558228942e-07, "loss": 0.5844, "step": 6322 }, { "epoch": 0.9430979193079275, "grad_norm": 1.560164213180542, "learning_rate": 1.6896585443914927e-07, "loss": 0.7264, "step": 6323 }, { "epoch": 0.9432470728615109, "grad_norm": 1.5105066299438477, "learning_rate": 1.6808254841396587e-07, "loss": 0.6958, "step": 6324 }, { "epoch": 0.9433962264150944, "grad_norm": 1.6196095943450928, "learning_rate": 1.6720153771295656e-07, "loss": 0.7083, "step": 6325 }, { "epoch": 0.9435453799686777, "grad_norm": 0.5178859829902649, "learning_rate": 1.6632282254179456e-07, "loss": 0.2449, "step": 6326 }, { "epoch": 0.9436945335222612, "grad_norm": 1.420709490776062, "learning_rate": 1.6544640310562466e-07, "loss": 0.6783, "step": 6327 }, { "epoch": 0.9438436870758445, "grad_norm": 1.2757364511489868, "learning_rate": 1.6457227960905097e-07, "loss": 0.7315, "step": 6328 }, { "epoch": 0.943992840629428, "grad_norm": 1.4421683549880981, "learning_rate": 1.6370045225614474e-07, "loss": 0.6595, "step": 6329 }, { "epoch": 0.9441419941830114, "grad_norm": 1.6280646324157715, "learning_rate": 1.6283092125043754e-07, "loss": 0.6363, "step": 6330 }, { "epoch": 0.9442911477365948, "grad_norm": 1.710094690322876, "learning_rate": 1.6196368679492815e-07, "loss": 0.708, "step": 6331 }, { "epoch": 0.9444403012901782, "grad_norm": 4.864290714263916, "learning_rate": 1.6109874909207901e-07, "loss": 0.7077, "step": 6332 }, { "epoch": 0.9445894548437617, "grad_norm": 2.3305792808532715, "learning_rate": 1.6023610834381197e-07, "loss": 0.7218, "step": 6333 }, { "epoch": 0.944738608397345, "grad_norm": 1.406248688697815, "learning_rate": 1.593757647515204e-07, "loss": 0.7128, "step": 6334 }, { "epoch": 0.9448877619509285, "grad_norm": 4.714359283447266, "learning_rate": 1.585177185160547e-07, "loss": 0.673, "step": 6335 }, { "epoch": 0.9450369155045119, "grad_norm": 1.3056868314743042, "learning_rate": 1.576619698377313e-07, "loss": 0.6877, "step": 6336 }, { "epoch": 0.9451860690580953, "grad_norm": 1.4001213312149048, "learning_rate": 1.5680851891633042e-07, "loss": 0.646, "step": 6337 }, { "epoch": 0.9453352226116787, "grad_norm": 1.3299338817596436, "learning_rate": 1.55957365951096e-07, "loss": 0.745, "step": 6338 }, { "epoch": 0.9454843761652622, "grad_norm": 2.7451798915863037, "learning_rate": 1.5510851114073467e-07, "loss": 0.6888, "step": 6339 }, { "epoch": 0.9456335297188455, "grad_norm": 1.5026720762252808, "learning_rate": 1.5426195468341675e-07, "loss": 0.677, "step": 6340 }, { "epoch": 0.945782683272429, "grad_norm": 1.72223699092865, "learning_rate": 1.5341769677677753e-07, "loss": 0.662, "step": 6341 }, { "epoch": 0.9459318368260123, "grad_norm": 1.6931438446044922, "learning_rate": 1.5257573761791265e-07, "loss": 0.7346, "step": 6342 }, { "epoch": 0.9460809903795958, "grad_norm": 1.7333922386169434, "learning_rate": 1.5173607740338382e-07, "loss": 0.684, "step": 6343 }, { "epoch": 0.9462301439331792, "grad_norm": 1.7104963064193726, "learning_rate": 1.5089871632921638e-07, "loss": 0.6501, "step": 6344 }, { "epoch": 0.9463792974867626, "grad_norm": 1.8890354633331299, "learning_rate": 1.5006365459089622e-07, "loss": 0.6002, "step": 6345 }, { "epoch": 0.946528451040346, "grad_norm": 1.282618522644043, "learning_rate": 1.4923089238337296e-07, "loss": 0.6557, "step": 6346 }, { "epoch": 0.9466776045939295, "grad_norm": 1.2584365606307983, "learning_rate": 1.484004299010633e-07, "loss": 0.6137, "step": 6347 }, { "epoch": 0.9468267581475128, "grad_norm": 1.4539045095443726, "learning_rate": 1.4757226733783992e-07, "loss": 0.6218, "step": 6348 }, { "epoch": 0.9469759117010963, "grad_norm": 1.734647274017334, "learning_rate": 1.4674640488704596e-07, "loss": 0.6574, "step": 6349 }, { "epoch": 0.9471250652546797, "grad_norm": 2.177579402923584, "learning_rate": 1.4592284274148273e-07, "loss": 0.6724, "step": 6350 }, { "epoch": 0.9472742188082631, "grad_norm": 1.6140590906143188, "learning_rate": 1.4510158109341644e-07, "loss": 0.79, "step": 6351 }, { "epoch": 0.9474233723618465, "grad_norm": 1.7589491605758667, "learning_rate": 1.4428262013457706e-07, "loss": 0.6528, "step": 6352 }, { "epoch": 0.94757252591543, "grad_norm": 1.9656516313552856, "learning_rate": 1.4346596005615499e-07, "loss": 0.6063, "step": 6353 }, { "epoch": 0.9477216794690133, "grad_norm": 1.5961230993270874, "learning_rate": 1.4265160104880438e-07, "loss": 0.727, "step": 6354 }, { "epoch": 0.9478708330225968, "grad_norm": 1.975625991821289, "learning_rate": 1.4183954330264317e-07, "loss": 0.707, "step": 6355 }, { "epoch": 0.9480199865761801, "grad_norm": 1.4931150674819946, "learning_rate": 1.410297870072508e-07, "loss": 0.6838, "step": 6356 }, { "epoch": 0.9481691401297636, "grad_norm": 2.5965628623962402, "learning_rate": 1.402223323516727e-07, "loss": 0.5738, "step": 6357 }, { "epoch": 0.948318293683347, "grad_norm": 2.06892466545105, "learning_rate": 1.3941717952441146e-07, "loss": 0.7258, "step": 6358 }, { "epoch": 0.9484674472369304, "grad_norm": 1.6908622980117798, "learning_rate": 1.386143287134356e-07, "loss": 0.6691, "step": 6359 }, { "epoch": 0.9486166007905138, "grad_norm": 1.5764943361282349, "learning_rate": 1.378137801061763e-07, "loss": 0.6032, "step": 6360 }, { "epoch": 0.9487657543440973, "grad_norm": 1.6222246885299683, "learning_rate": 1.3701553388952627e-07, "loss": 0.6011, "step": 6361 }, { "epoch": 0.9489149078976806, "grad_norm": 1.7369260787963867, "learning_rate": 1.362195902498431e-07, "loss": 0.6553, "step": 6362 }, { "epoch": 0.9490640614512641, "grad_norm": 1.3238664865493774, "learning_rate": 1.354259493729426e-07, "loss": 0.7384, "step": 6363 }, { "epoch": 0.9492132150048475, "grad_norm": 1.497809648513794, "learning_rate": 1.346346114441066e-07, "loss": 0.7007, "step": 6364 }, { "epoch": 0.9493623685584309, "grad_norm": 1.7143412828445435, "learning_rate": 1.3384557664807729e-07, "loss": 0.571, "step": 6365 }, { "epoch": 0.9495115221120143, "grad_norm": 1.4854780435562134, "learning_rate": 1.3305884516906065e-07, "loss": 0.7601, "step": 6366 }, { "epoch": 0.9496606756655978, "grad_norm": 2.3190295696258545, "learning_rate": 1.322744171907242e-07, "loss": 0.7097, "step": 6367 }, { "epoch": 0.9498098292191811, "grad_norm": 1.7539561986923218, "learning_rate": 1.3149229289619593e-07, "loss": 0.6338, "step": 6368 }, { "epoch": 0.9499589827727646, "grad_norm": 1.6013107299804688, "learning_rate": 1.3071247246806972e-07, "loss": 0.6988, "step": 6369 }, { "epoch": 0.9501081363263479, "grad_norm": 1.3242874145507812, "learning_rate": 1.299349560883989e-07, "loss": 0.6994, "step": 6370 }, { "epoch": 0.9502572898799314, "grad_norm": 1.655655860900879, "learning_rate": 1.2915974393870046e-07, "loss": 0.7006, "step": 6371 }, { "epoch": 0.9504064434335148, "grad_norm": 1.4434409141540527, "learning_rate": 1.2838683619995185e-07, "loss": 0.721, "step": 6372 }, { "epoch": 0.9505555969870982, "grad_norm": 2.0333361625671387, "learning_rate": 1.276162330525932e-07, "loss": 0.7133, "step": 6373 }, { "epoch": 0.9507047505406816, "grad_norm": 1.1754850149154663, "learning_rate": 1.2684793467652722e-07, "loss": 0.6667, "step": 6374 }, { "epoch": 0.9508539040942651, "grad_norm": 1.3590848445892334, "learning_rate": 1.2608194125111716e-07, "loss": 0.6237, "step": 6375 }, { "epoch": 0.9510030576478484, "grad_norm": 1.5099419355392456, "learning_rate": 1.2531825295519106e-07, "loss": 0.6942, "step": 6376 }, { "epoch": 0.9511522112014319, "grad_norm": 1.5407177209854126, "learning_rate": 1.2455686996703409e-07, "loss": 0.5908, "step": 6377 }, { "epoch": 0.9513013647550153, "grad_norm": 1.614343285560608, "learning_rate": 1.237977924643985e-07, "loss": 0.6487, "step": 6378 }, { "epoch": 0.9514505183085987, "grad_norm": 2.12199068069458, "learning_rate": 1.2304102062449475e-07, "loss": 0.5965, "step": 6379 }, { "epoch": 0.9515996718621821, "grad_norm": 1.5182231664657593, "learning_rate": 1.2228655462399598e-07, "loss": 0.7343, "step": 6380 }, { "epoch": 0.9517488254157656, "grad_norm": 1.5365610122680664, "learning_rate": 1.2153439463903793e-07, "loss": 0.6486, "step": 6381 }, { "epoch": 0.9518979789693489, "grad_norm": 2.0078272819519043, "learning_rate": 1.2078454084521575e-07, "loss": 0.6647, "step": 6382 }, { "epoch": 0.9520471325229324, "grad_norm": 1.5339514017105103, "learning_rate": 1.2003699341758934e-07, "loss": 0.6319, "step": 6383 }, { "epoch": 0.9521962860765157, "grad_norm": 15.010842323303223, "learning_rate": 1.192917525306758e-07, "loss": 0.6571, "step": 6384 }, { "epoch": 0.9523454396300992, "grad_norm": 1.5317440032958984, "learning_rate": 1.1854881835846044e-07, "loss": 0.5756, "step": 6385 }, { "epoch": 0.9524945931836826, "grad_norm": 1.3636451959609985, "learning_rate": 1.1780819107438112e-07, "loss": 0.6252, "step": 6386 }, { "epoch": 0.952643746737266, "grad_norm": 1.6386688947677612, "learning_rate": 1.1706987085134624e-07, "loss": 0.6183, "step": 6387 }, { "epoch": 0.9527929002908494, "grad_norm": 1.8601127862930298, "learning_rate": 1.1633385786171903e-07, "loss": 0.6806, "step": 6388 }, { "epoch": 0.9529420538444329, "grad_norm": 1.313779592514038, "learning_rate": 1.1560015227732757e-07, "loss": 0.6691, "step": 6389 }, { "epoch": 0.9530912073980162, "grad_norm": 0.5279671549797058, "learning_rate": 1.1486875426945931e-07, "loss": 0.2279, "step": 6390 }, { "epoch": 0.9532403609515997, "grad_norm": 1.9111956357955933, "learning_rate": 1.1413966400886544e-07, "loss": 0.6592, "step": 6391 }, { "epoch": 0.9533895145051831, "grad_norm": 1.3970086574554443, "learning_rate": 1.1341288166575425e-07, "loss": 0.6585, "step": 6392 }, { "epoch": 0.9535386680587665, "grad_norm": 0.5017992258071899, "learning_rate": 1.1268840740979891e-07, "loss": 0.2449, "step": 6393 }, { "epoch": 0.9536878216123499, "grad_norm": 1.4657090902328491, "learning_rate": 1.1196624141013301e-07, "loss": 0.5723, "step": 6394 }, { "epoch": 0.9538369751659334, "grad_norm": 5.324614524841309, "learning_rate": 1.1124638383534947e-07, "loss": 0.5924, "step": 6395 }, { "epoch": 0.9539861287195167, "grad_norm": 1.3458468914031982, "learning_rate": 1.1052883485350607e-07, "loss": 0.693, "step": 6396 }, { "epoch": 0.9541352822731002, "grad_norm": 1.33037269115448, "learning_rate": 1.0981359463211772e-07, "loss": 0.6425, "step": 6397 }, { "epoch": 0.9542844358266835, "grad_norm": 1.2903584241867065, "learning_rate": 1.0910066333816194e-07, "loss": 0.6953, "step": 6398 }, { "epoch": 0.954433589380267, "grad_norm": 1.3841595649719238, "learning_rate": 1.083900411380756e-07, "loss": 0.5909, "step": 6399 }, { "epoch": 0.9545827429338504, "grad_norm": 1.2287966012954712, "learning_rate": 1.0768172819776158e-07, "loss": 0.7225, "step": 6400 }, { "epoch": 0.9547318964874338, "grad_norm": 2.1844563484191895, "learning_rate": 1.0697572468257755e-07, "loss": 0.7615, "step": 6401 }, { "epoch": 0.9548810500410172, "grad_norm": 1.752983808517456, "learning_rate": 1.0627203075734394e-07, "loss": 0.6083, "step": 6402 }, { "epoch": 0.9550302035946007, "grad_norm": 1.620792031288147, "learning_rate": 1.0557064658634486e-07, "loss": 0.5957, "step": 6403 }, { "epoch": 0.955179357148184, "grad_norm": 1.3858425617218018, "learning_rate": 1.0487157233332046e-07, "loss": 0.6428, "step": 6404 }, { "epoch": 0.9553285107017675, "grad_norm": 1.5559390783309937, "learning_rate": 1.0417480816147574e-07, "loss": 0.6984, "step": 6405 }, { "epoch": 0.9554776642553509, "grad_norm": 1.8925327062606812, "learning_rate": 1.0348035423347613e-07, "loss": 0.7335, "step": 6406 }, { "epoch": 0.9556268178089343, "grad_norm": 1.7057000398635864, "learning_rate": 1.0278821071144306e-07, "loss": 0.5397, "step": 6407 }, { "epoch": 0.9557759713625177, "grad_norm": 1.5994633436203003, "learning_rate": 1.0209837775696396e-07, "loss": 0.6428, "step": 6408 }, { "epoch": 0.9559251249161012, "grad_norm": 2.0751020908355713, "learning_rate": 1.0141085553108443e-07, "loss": 0.6711, "step": 6409 }, { "epoch": 0.9560742784696845, "grad_norm": 1.5205585956573486, "learning_rate": 1.0072564419431053e-07, "loss": 0.6372, "step": 6410 }, { "epoch": 0.956223432023268, "grad_norm": 1.7398879528045654, "learning_rate": 1.0004274390660984e-07, "loss": 0.73, "step": 6411 }, { "epoch": 0.9563725855768513, "grad_norm": 3.3153345584869385, "learning_rate": 9.936215482740819e-08, "loss": 0.6789, "step": 6412 }, { "epoch": 0.9565217391304348, "grad_norm": 1.579089641571045, "learning_rate": 9.868387711559624e-08, "loss": 0.6394, "step": 6413 }, { "epoch": 0.9566708926840182, "grad_norm": 1.7675471305847168, "learning_rate": 9.800791092951956e-08, "loss": 0.697, "step": 6414 }, { "epoch": 0.9568200462376016, "grad_norm": 1.2366116046905518, "learning_rate": 9.733425642698857e-08, "loss": 0.6482, "step": 6415 }, { "epoch": 0.956969199791185, "grad_norm": 1.5991110801696777, "learning_rate": 9.666291376527304e-08, "loss": 0.707, "step": 6416 }, { "epoch": 0.9571183533447685, "grad_norm": 2.3119571208953857, "learning_rate": 9.59938831010998e-08, "loss": 0.725, "step": 6417 }, { "epoch": 0.9572675068983518, "grad_norm": 1.678763508796692, "learning_rate": 9.532716459065838e-08, "loss": 0.6925, "step": 6418 }, { "epoch": 0.9574166604519353, "grad_norm": 1.2177515029907227, "learning_rate": 9.466275838960093e-08, "loss": 0.6898, "step": 6419 }, { "epoch": 0.9575658140055187, "grad_norm": 1.3644778728485107, "learning_rate": 9.400066465303448e-08, "loss": 0.6896, "step": 6420 }, { "epoch": 0.9577149675591021, "grad_norm": 1.7024229764938354, "learning_rate": 9.334088353553206e-08, "loss": 0.6051, "step": 6421 }, { "epoch": 0.9578641211126855, "grad_norm": 3.0362892150878906, "learning_rate": 9.268341519112156e-08, "loss": 0.641, "step": 6422 }, { "epoch": 0.958013274666269, "grad_norm": 1.359682321548462, "learning_rate": 9.202825977329355e-08, "loss": 0.7521, "step": 6423 }, { "epoch": 0.9581624282198523, "grad_norm": 1.4246450662612915, "learning_rate": 9.1375417434999e-08, "loss": 0.6787, "step": 6424 }, { "epoch": 0.9583115817734358, "grad_norm": 5.3778815269470215, "learning_rate": 9.072488832864823e-08, "loss": 0.683, "step": 6425 }, { "epoch": 0.9584607353270191, "grad_norm": 1.7870302200317383, "learning_rate": 9.007667260610975e-08, "loss": 0.7225, "step": 6426 }, { "epoch": 0.9586098888806026, "grad_norm": 2.7159173488616943, "learning_rate": 8.943077041871584e-08, "loss": 0.6614, "step": 6427 }, { "epoch": 0.958759042434186, "grad_norm": 1.9771792888641357, "learning_rate": 8.878718191725478e-08, "loss": 0.6572, "step": 6428 }, { "epoch": 0.9589081959877694, "grad_norm": 1.471194863319397, "learning_rate": 8.814590725197636e-08, "loss": 0.6905, "step": 6429 }, { "epoch": 0.9590573495413528, "grad_norm": 1.6194010972976685, "learning_rate": 8.750694657259195e-08, "loss": 0.6504, "step": 6430 }, { "epoch": 0.9592065030949363, "grad_norm": 1.3096964359283447, "learning_rate": 8.687030002827113e-08, "loss": 0.6913, "step": 6431 }, { "epoch": 0.9593556566485196, "grad_norm": 1.5332295894622803, "learning_rate": 8.623596776764165e-08, "loss": 0.6567, "step": 6432 }, { "epoch": 0.9595048102021031, "grad_norm": 1.7869088649749756, "learning_rate": 8.560394993879173e-08, "loss": 0.6351, "step": 6433 }, { "epoch": 0.9596539637556865, "grad_norm": 1.3150495290756226, "learning_rate": 8.497424668927224e-08, "loss": 0.8083, "step": 6434 }, { "epoch": 0.9598031173092699, "grad_norm": 1.2136865854263306, "learning_rate": 8.434685816609e-08, "loss": 0.6727, "step": 6435 }, { "epoch": 0.9599522708628533, "grad_norm": 1.4129923582077026, "learning_rate": 8.372178451571344e-08, "loss": 0.6645, "step": 6436 }, { "epoch": 0.9601014244164368, "grad_norm": 1.2312028408050537, "learning_rate": 8.309902588407026e-08, "loss": 0.699, "step": 6437 }, { "epoch": 0.9602505779700201, "grad_norm": 1.9761031866073608, "learning_rate": 8.247858241654638e-08, "loss": 0.6901, "step": 6438 }, { "epoch": 0.9603997315236036, "grad_norm": 2.0344126224517822, "learning_rate": 8.186045425798817e-08, "loss": 0.6196, "step": 6439 }, { "epoch": 0.9605488850771869, "grad_norm": 1.5615888833999634, "learning_rate": 8.124464155270351e-08, "loss": 0.6875, "step": 6440 }, { "epoch": 0.9606980386307704, "grad_norm": 1.379276990890503, "learning_rate": 8.063114444445741e-08, "loss": 0.6857, "step": 6441 }, { "epoch": 0.9608471921843538, "grad_norm": 1.2180546522140503, "learning_rate": 8.001996307647197e-08, "loss": 0.6044, "step": 6442 }, { "epoch": 0.9609963457379372, "grad_norm": 1.251340627670288, "learning_rate": 7.941109759143528e-08, "loss": 0.6021, "step": 6443 }, { "epoch": 0.9611454992915206, "grad_norm": 1.474552035331726, "learning_rate": 7.880454813148807e-08, "loss": 0.6112, "step": 6444 }, { "epoch": 0.9612946528451041, "grad_norm": 2.5364511013031006, "learning_rate": 7.820031483823487e-08, "loss": 0.5918, "step": 6445 }, { "epoch": 0.9614438063986874, "grad_norm": 1.7646650075912476, "learning_rate": 7.759839785273615e-08, "loss": 0.7112, "step": 6446 }, { "epoch": 0.9615929599522709, "grad_norm": 1.3002485036849976, "learning_rate": 7.699879731551397e-08, "loss": 0.686, "step": 6447 }, { "epoch": 0.9617421135058543, "grad_norm": 2.352428913116455, "learning_rate": 7.640151336654966e-08, "loss": 0.6843, "step": 6448 }, { "epoch": 0.9618912670594377, "grad_norm": 1.5292768478393555, "learning_rate": 7.580654614528282e-08, "loss": 0.6393, "step": 6449 }, { "epoch": 0.9620404206130211, "grad_norm": 1.814801573753357, "learning_rate": 7.521389579061234e-08, "loss": 0.6718, "step": 6450 }, { "epoch": 0.9621895741666046, "grad_norm": 3.839423418045044, "learning_rate": 7.462356244089642e-08, "loss": 0.5744, "step": 6451 }, { "epoch": 0.9623387277201879, "grad_norm": 1.3790194988250732, "learning_rate": 7.403554623395038e-08, "loss": 0.6235, "step": 6452 }, { "epoch": 0.9624878812737714, "grad_norm": 1.7723556756973267, "learning_rate": 7.344984730705218e-08, "loss": 0.6774, "step": 6453 }, { "epoch": 0.9626370348273547, "grad_norm": 2.531189441680908, "learning_rate": 7.286646579693691e-08, "loss": 0.6545, "step": 6454 }, { "epoch": 0.9627861883809382, "grad_norm": 1.7957183122634888, "learning_rate": 7.228540183979782e-08, "loss": 0.5981, "step": 6455 }, { "epoch": 0.9629353419345216, "grad_norm": 1.4970524311065674, "learning_rate": 7.170665557128975e-08, "loss": 0.6324, "step": 6456 }, { "epoch": 0.963084495488105, "grad_norm": 1.6375694274902344, "learning_rate": 7.11302271265224e-08, "loss": 0.6946, "step": 6457 }, { "epoch": 0.9632336490416884, "grad_norm": 2.0180554389953613, "learning_rate": 7.055611664006701e-08, "loss": 0.6674, "step": 6458 }, { "epoch": 0.9633828025952719, "grad_norm": 1.599161982536316, "learning_rate": 6.998432424595524e-08, "loss": 0.6787, "step": 6459 }, { "epoch": 0.9635319561488552, "grad_norm": 1.3593558073043823, "learning_rate": 6.941485007767479e-08, "loss": 0.717, "step": 6460 }, { "epoch": 0.9636811097024387, "grad_norm": 1.4473761320114136, "learning_rate": 6.884769426817261e-08, "loss": 0.6487, "step": 6461 }, { "epoch": 0.9638302632560221, "grad_norm": 1.413630485534668, "learning_rate": 6.828285694985504e-08, "loss": 0.6784, "step": 6462 }, { "epoch": 0.9639794168096055, "grad_norm": 3.321483612060547, "learning_rate": 6.772033825458769e-08, "loss": 0.6433, "step": 6463 }, { "epoch": 0.9641285703631889, "grad_norm": 0.5000460743904114, "learning_rate": 6.716013831369217e-08, "loss": 0.2633, "step": 6464 }, { "epoch": 0.9642777239167724, "grad_norm": 1.2969802618026733, "learning_rate": 6.660225725795278e-08, "loss": 0.6444, "step": 6465 }, { "epoch": 0.9644268774703557, "grad_norm": 1.6131266355514526, "learning_rate": 6.604669521760975e-08, "loss": 0.6727, "step": 6466 }, { "epoch": 0.9645760310239392, "grad_norm": 1.842363953590393, "learning_rate": 6.54934523223627e-08, "loss": 0.6763, "step": 6467 }, { "epoch": 0.9647251845775225, "grad_norm": 1.6948798894882202, "learning_rate": 6.494252870136942e-08, "loss": 0.6726, "step": 6468 }, { "epoch": 0.964874338131106, "grad_norm": 1.4170020818710327, "learning_rate": 6.439392448324699e-08, "loss": 0.746, "step": 6469 }, { "epoch": 0.9650234916846894, "grad_norm": 1.8450533151626587, "learning_rate": 6.384763979607078e-08, "loss": 0.7068, "step": 6470 }, { "epoch": 0.9651726452382728, "grad_norm": 1.1674963235855103, "learning_rate": 6.330367476737321e-08, "loss": 0.6524, "step": 6471 }, { "epoch": 0.9653217987918562, "grad_norm": 1.935728907585144, "learning_rate": 6.276202952414823e-08, "loss": 0.6182, "step": 6472 }, { "epoch": 0.9654709523454397, "grad_norm": 1.702606439590454, "learning_rate": 6.222270419284359e-08, "loss": 0.6283, "step": 6473 }, { "epoch": 0.965620105899023, "grad_norm": 1.284050464630127, "learning_rate": 6.168569889937081e-08, "loss": 0.5925, "step": 6474 }, { "epoch": 0.9657692594526065, "grad_norm": 2.3135790824890137, "learning_rate": 6.115101376909738e-08, "loss": 0.7541, "step": 6475 }, { "epoch": 0.9659184130061899, "grad_norm": 1.3385021686553955, "learning_rate": 6.061864892684788e-08, "loss": 0.6664, "step": 6476 }, { "epoch": 0.9660675665597733, "grad_norm": 2.2026193141937256, "learning_rate": 6.008860449690512e-08, "loss": 0.6491, "step": 6477 }, { "epoch": 0.9662167201133567, "grad_norm": 2.897037982940674, "learning_rate": 5.956088060301457e-08, "loss": 0.666, "step": 6478 }, { "epoch": 0.9663658736669402, "grad_norm": 1.571772813796997, "learning_rate": 5.903547736837323e-08, "loss": 0.5997, "step": 6479 }, { "epoch": 0.9665150272205235, "grad_norm": 1.5242961645126343, "learning_rate": 5.851239491564298e-08, "loss": 0.6215, "step": 6480 }, { "epoch": 0.966664180774107, "grad_norm": 1.9185118675231934, "learning_rate": 5.799163336693836e-08, "loss": 0.6527, "step": 6481 }, { "epoch": 0.9668133343276903, "grad_norm": 1.7623786926269531, "learning_rate": 5.7473192843835454e-08, "loss": 0.6682, "step": 6482 }, { "epoch": 0.9669624878812738, "grad_norm": 1.2563387155532837, "learning_rate": 5.6957073467367454e-08, "loss": 0.6243, "step": 6483 }, { "epoch": 0.9671116414348572, "grad_norm": 1.3687199354171753, "learning_rate": 5.644327535802685e-08, "loss": 0.6789, "step": 6484 }, { "epoch": 0.9672607949884406, "grad_norm": 1.2949169874191284, "learning_rate": 5.5931798635761036e-08, "loss": 0.6396, "step": 6485 }, { "epoch": 0.967409948542024, "grad_norm": 1.280173659324646, "learning_rate": 5.542264341997894e-08, "loss": 0.6569, "step": 6486 }, { "epoch": 0.9675591020956075, "grad_norm": 1.3101993799209595, "learning_rate": 5.491580982954547e-08, "loss": 0.6567, "step": 6487 }, { "epoch": 0.9677082556491908, "grad_norm": 1.1647769212722778, "learning_rate": 5.441129798278488e-08, "loss": 0.8302, "step": 6488 }, { "epoch": 0.9678574092027743, "grad_norm": 1.8296993970870972, "learning_rate": 5.3909107997477395e-08, "loss": 0.6543, "step": 6489 }, { "epoch": 0.9680065627563577, "grad_norm": 2.1541337966918945, "learning_rate": 5.3409239990863673e-08, "loss": 0.6375, "step": 6490 }, { "epoch": 0.9681557163099411, "grad_norm": 1.4314217567443848, "learning_rate": 5.291169407964147e-08, "loss": 0.6754, "step": 6491 }, { "epoch": 0.9683048698635245, "grad_norm": 1.5291283130645752, "learning_rate": 5.2416470379964556e-08, "loss": 0.7801, "step": 6492 }, { "epoch": 0.968454023417108, "grad_norm": 2.707916736602783, "learning_rate": 5.192356900744711e-08, "loss": 0.6721, "step": 6493 }, { "epoch": 0.9686031769706913, "grad_norm": 1.3193244934082031, "learning_rate": 5.1432990077160405e-08, "loss": 0.6613, "step": 6494 }, { "epoch": 0.9687523305242748, "grad_norm": 1.2450473308563232, "learning_rate": 5.0944733703632845e-08, "loss": 0.6937, "step": 6495 }, { "epoch": 0.9689014840778581, "grad_norm": 1.2478764057159424, "learning_rate": 5.04588000008499e-08, "loss": 0.6362, "step": 6496 }, { "epoch": 0.9690506376314416, "grad_norm": 1.4063745737075806, "learning_rate": 4.9975189082258625e-08, "loss": 0.6805, "step": 6497 }, { "epoch": 0.969199791185025, "grad_norm": 1.674002766609192, "learning_rate": 4.949390106075758e-08, "loss": 0.7151, "step": 6498 }, { "epoch": 0.9693489447386084, "grad_norm": 1.3044099807739258, "learning_rate": 4.901493604870799e-08, "loss": 0.6306, "step": 6499 }, { "epoch": 0.9694980982921918, "grad_norm": 2.2860023975372314, "learning_rate": 4.853829415792932e-08, "loss": 0.706, "step": 6500 }, { "epoch": 0.9696472518457753, "grad_norm": 1.398478388786316, "learning_rate": 4.8063975499694774e-08, "loss": 0.6679, "step": 6501 }, { "epoch": 0.9697964053993586, "grad_norm": 1.4376466274261475, "learning_rate": 4.7591980184736874e-08, "loss": 0.6884, "step": 6502 }, { "epoch": 0.9699455589529421, "grad_norm": 1.514479637145996, "learning_rate": 4.7122308323246377e-08, "loss": 0.6439, "step": 6503 }, { "epoch": 0.9700947125065255, "grad_norm": 1.6193369626998901, "learning_rate": 4.6654960024871134e-08, "loss": 0.703, "step": 6504 }, { "epoch": 0.9702438660601089, "grad_norm": 1.2601851224899292, "learning_rate": 4.618993539871719e-08, "loss": 0.6908, "step": 6505 }, { "epoch": 0.9703930196136923, "grad_norm": 1.475164771080017, "learning_rate": 4.57272345533466e-08, "loss": 0.6598, "step": 6506 }, { "epoch": 0.9705421731672758, "grad_norm": 1.6167230606079102, "learning_rate": 4.526685759678073e-08, "loss": 0.647, "step": 6507 }, { "epoch": 0.9706913267208591, "grad_norm": 1.6703344583511353, "learning_rate": 4.480880463649584e-08, "loss": 0.6174, "step": 6508 }, { "epoch": 0.9708404802744426, "grad_norm": 1.815641164779663, "learning_rate": 4.4353075779429713e-08, "loss": 0.6446, "step": 6509 }, { "epoch": 0.9709896338280259, "grad_norm": 1.7372397184371948, "learning_rate": 4.389967113197391e-08, "loss": 0.639, "step": 6510 }, { "epoch": 0.9711387873816094, "grad_norm": 1.3941384553909302, "learning_rate": 4.3448590799978205e-08, "loss": 0.6156, "step": 6511 }, { "epoch": 0.9712879409351928, "grad_norm": 1.509088397026062, "learning_rate": 4.299983488875059e-08, "loss": 0.6851, "step": 6512 }, { "epoch": 0.9714370944887762, "grad_norm": 1.7262693643569946, "learning_rate": 4.255340350305726e-08, "loss": 0.6533, "step": 6513 }, { "epoch": 0.9715862480423596, "grad_norm": 1.7081005573272705, "learning_rate": 4.210929674711817e-08, "loss": 0.6322, "step": 6514 }, { "epoch": 0.9717354015959431, "grad_norm": 0.5494892597198486, "learning_rate": 4.166751472461483e-08, "loss": 0.2437, "step": 6515 }, { "epoch": 0.9718845551495264, "grad_norm": 1.4193624258041382, "learning_rate": 4.1228057538683644e-08, "loss": 0.6761, "step": 6516 }, { "epoch": 0.9720337087031099, "grad_norm": 1.2643038034439087, "learning_rate": 4.0790925291918084e-08, "loss": 0.702, "step": 6517 }, { "epoch": 0.9721828622566933, "grad_norm": 1.9061353206634521, "learning_rate": 4.035611808636986e-08, "loss": 0.6278, "step": 6518 }, { "epoch": 0.9723320158102767, "grad_norm": 1.789146900177002, "learning_rate": 3.9923636023547765e-08, "loss": 0.7527, "step": 6519 }, { "epoch": 0.9724811693638601, "grad_norm": 1.8545371294021606, "learning_rate": 3.94934792044166e-08, "loss": 0.6699, "step": 6520 }, { "epoch": 0.9726303229174436, "grad_norm": 1.2280304431915283, "learning_rate": 3.906564772939936e-08, "loss": 0.6748, "step": 6521 }, { "epoch": 0.9727794764710269, "grad_norm": 1.4045737981796265, "learning_rate": 3.8640141698378376e-08, "loss": 0.7078, "step": 6522 }, { "epoch": 0.9729286300246104, "grad_norm": 1.5745350122451782, "learning_rate": 3.821696121068752e-08, "loss": 0.6615, "step": 6523 }, { "epoch": 0.9730777835781937, "grad_norm": 1.5055869817733765, "learning_rate": 3.779610636512221e-08, "loss": 0.6208, "step": 6524 }, { "epoch": 0.9732269371317772, "grad_norm": 2.196641445159912, "learning_rate": 3.737757725993496e-08, "loss": 0.662, "step": 6525 }, { "epoch": 0.9733760906853606, "grad_norm": 1.6533865928649902, "learning_rate": 3.696137399283206e-08, "loss": 0.6323, "step": 6526 }, { "epoch": 0.973525244238944, "grad_norm": 1.7282915115356445, "learning_rate": 3.654749666098023e-08, "loss": 0.7076, "step": 6527 }, { "epoch": 0.9736743977925274, "grad_norm": 1.2113324403762817, "learning_rate": 3.613594536100107e-08, "loss": 0.701, "step": 6528 }, { "epoch": 0.9738235513461109, "grad_norm": 1.5335389375686646, "learning_rate": 3.5726720188974384e-08, "loss": 0.6189, "step": 6529 }, { "epoch": 0.9739727048996942, "grad_norm": 1.3832650184631348, "learning_rate": 3.531982124043598e-08, "loss": 0.6789, "step": 6530 }, { "epoch": 0.9741218584532777, "grad_norm": 1.5184857845306396, "learning_rate": 3.491524861037876e-08, "loss": 0.6482, "step": 6531 }, { "epoch": 0.9742710120068611, "grad_norm": 1.4965540170669556, "learning_rate": 3.451300239325384e-08, "loss": 0.6592, "step": 6532 }, { "epoch": 0.9744201655604445, "grad_norm": 1.8796719312667847, "learning_rate": 3.41130826829672e-08, "loss": 0.7401, "step": 6533 }, { "epoch": 0.9745693191140279, "grad_norm": 1.3212231397628784, "learning_rate": 3.371548957288418e-08, "loss": 0.7626, "step": 6534 }, { "epoch": 0.9747184726676114, "grad_norm": 1.0522807836532593, "learning_rate": 3.332022315582273e-08, "loss": 0.6642, "step": 6535 }, { "epoch": 0.9748676262211947, "grad_norm": 1.4529505968093872, "learning_rate": 3.292728352406238e-08, "loss": 0.5666, "step": 6536 }, { "epoch": 0.9750167797747782, "grad_norm": 1.8461374044418335, "learning_rate": 3.253667076933753e-08, "loss": 0.652, "step": 6537 }, { "epoch": 0.9751659333283615, "grad_norm": 1.9515430927276611, "learning_rate": 3.214838498283857e-08, "loss": 0.6508, "step": 6538 }, { "epoch": 0.975315086881945, "grad_norm": 1.708074688911438, "learning_rate": 3.176242625521297e-08, "loss": 0.6326, "step": 6539 }, { "epoch": 0.9754642404355284, "grad_norm": 1.2766838073730469, "learning_rate": 3.137879467656535e-08, "loss": 0.743, "step": 6540 }, { "epoch": 0.9756133939891118, "grad_norm": 1.2188665866851807, "learning_rate": 3.099749033645738e-08, "loss": 0.6969, "step": 6541 }, { "epoch": 0.9757625475426952, "grad_norm": 2.7052390575408936, "learning_rate": 3.061851332390786e-08, "loss": 0.7352, "step": 6542 }, { "epoch": 0.9759117010962787, "grad_norm": 1.3569910526275635, "learning_rate": 3.024186372738935e-08, "loss": 0.6211, "step": 6543 }, { "epoch": 0.976060854649862, "grad_norm": 1.8329333066940308, "learning_rate": 2.986754163483485e-08, "loss": 0.6391, "step": 6544 }, { "epoch": 0.9762100082034455, "grad_norm": 1.7140649557113647, "learning_rate": 2.949554713363112e-08, "loss": 0.6712, "step": 6545 }, { "epoch": 0.9763591617570289, "grad_norm": 1.8784676790237427, "learning_rate": 2.9125880310623132e-08, "loss": 0.6937, "step": 6546 }, { "epoch": 0.9765083153106123, "grad_norm": 2.042551040649414, "learning_rate": 2.875854125211297e-08, "loss": 0.6631, "step": 6547 }, { "epoch": 0.9766574688641957, "grad_norm": 1.4100693464279175, "learning_rate": 2.8393530043856476e-08, "loss": 0.635, "step": 6548 }, { "epoch": 0.9768066224177792, "grad_norm": 1.9182924032211304, "learning_rate": 2.803084677106882e-08, "loss": 0.6162, "step": 6549 }, { "epoch": 0.9769557759713625, "grad_norm": 1.4762517213821411, "learning_rate": 2.767049151842005e-08, "loss": 0.598, "step": 6550 }, { "epoch": 0.977104929524946, "grad_norm": 1.306009292602539, "learning_rate": 2.731246437003843e-08, "loss": 0.6732, "step": 6551 }, { "epoch": 0.9772540830785293, "grad_norm": 1.5191779136657715, "learning_rate": 2.695676540950709e-08, "loss": 0.6437, "step": 6552 }, { "epoch": 0.9774032366321128, "grad_norm": 1.4467896223068237, "learning_rate": 2.660339471986739e-08, "loss": 0.681, "step": 6553 }, { "epoch": 0.9775523901856962, "grad_norm": 1.4009984731674194, "learning_rate": 2.6252352383613346e-08, "loss": 0.7267, "step": 6554 }, { "epoch": 0.9777015437392796, "grad_norm": 1.5314245223999023, "learning_rate": 2.59036384827005e-08, "loss": 0.6451, "step": 6555 }, { "epoch": 0.977850697292863, "grad_norm": 0.48760414123535156, "learning_rate": 2.555725309853818e-08, "loss": 0.2681, "step": 6556 }, { "epoch": 0.9779998508464464, "grad_norm": 1.3690111637115479, "learning_rate": 2.5213196311990595e-08, "loss": 0.7234, "step": 6557 }, { "epoch": 0.9781490044000298, "grad_norm": 1.3749032020568848, "learning_rate": 2.4871468203382376e-08, "loss": 0.5448, "step": 6558 }, { "epoch": 0.9782981579536132, "grad_norm": 1.303972601890564, "learning_rate": 2.4532068852489708e-08, "loss": 0.7493, "step": 6559 }, { "epoch": 0.9784473115071967, "grad_norm": 1.366420865058899, "learning_rate": 2.4194998338548103e-08, "loss": 0.6999, "step": 6560 }, { "epoch": 0.97859646506078, "grad_norm": 2.1814815998077393, "learning_rate": 2.3860256740250166e-08, "loss": 0.6431, "step": 6561 }, { "epoch": 0.9787456186143635, "grad_norm": 1.3130985498428345, "learning_rate": 2.352784413574227e-08, "loss": 0.6694, "step": 6562 }, { "epoch": 0.9788947721679468, "grad_norm": 1.4959808588027954, "learning_rate": 2.3197760602629015e-08, "loss": 0.7181, "step": 6563 }, { "epoch": 0.9790439257215303, "grad_norm": 1.691279411315918, "learning_rate": 2.2870006217969864e-08, "loss": 0.6116, "step": 6564 }, { "epoch": 0.9791930792751137, "grad_norm": 1.4928724765777588, "learning_rate": 2.25445810582825e-08, "loss": 0.6131, "step": 6565 }, { "epoch": 0.9793422328286971, "grad_norm": 1.1363680362701416, "learning_rate": 2.2221485199537262e-08, "loss": 0.6015, "step": 6566 }, { "epoch": 0.9794913863822805, "grad_norm": 1.527871012687683, "learning_rate": 2.1900718717164927e-08, "loss": 0.7062, "step": 6567 }, { "epoch": 0.979640539935864, "grad_norm": 2.122734546661377, "learning_rate": 2.1582281686048924e-08, "loss": 0.6575, "step": 6568 }, { "epoch": 0.9797896934894473, "grad_norm": 2.7743115425109863, "learning_rate": 2.1266174180532006e-08, "loss": 0.6549, "step": 6569 }, { "epoch": 0.9799388470430308, "grad_norm": 2.1743874549865723, "learning_rate": 2.09523962744107e-08, "loss": 0.6918, "step": 6570 }, { "epoch": 0.9800880005966142, "grad_norm": 1.5838372707366943, "learning_rate": 2.0640948040937525e-08, "loss": 0.5756, "step": 6571 }, { "epoch": 0.9802371541501976, "grad_norm": 1.4895191192626953, "learning_rate": 2.0331829552824313e-08, "loss": 0.7184, "step": 6572 }, { "epoch": 0.980386307703781, "grad_norm": 1.9142117500305176, "learning_rate": 2.0025040882234447e-08, "loss": 0.756, "step": 6573 }, { "epoch": 0.9805354612573645, "grad_norm": 2.6301591396331787, "learning_rate": 1.9720582100791753e-08, "loss": 0.6745, "step": 6574 }, { "epoch": 0.9806846148109478, "grad_norm": 2.041356086730957, "learning_rate": 1.94184532795727e-08, "loss": 0.6732, "step": 6575 }, { "epoch": 0.9808337683645313, "grad_norm": 0.5188751816749573, "learning_rate": 1.9118654489110877e-08, "loss": 0.2387, "step": 6576 }, { "epoch": 0.9809829219181146, "grad_norm": 1.3645062446594238, "learning_rate": 1.8821185799398067e-08, "loss": 0.6571, "step": 6577 }, { "epoch": 0.9811320754716981, "grad_norm": 1.764573574066162, "learning_rate": 1.8526047279878723e-08, "loss": 0.6434, "step": 6578 }, { "epoch": 0.9812812290252815, "grad_norm": 2.0818657875061035, "learning_rate": 1.8233238999454394e-08, "loss": 0.6698, "step": 6579 }, { "epoch": 0.981430382578865, "grad_norm": 1.3012402057647705, "learning_rate": 1.7942761026484845e-08, "loss": 0.6782, "step": 6580 }, { "epoch": 0.9815795361324483, "grad_norm": 1.2175127267837524, "learning_rate": 1.7654613428782498e-08, "loss": 0.6813, "step": 6581 }, { "epoch": 0.9817286896860318, "grad_norm": 2.1455252170562744, "learning_rate": 1.7368796273617982e-08, "loss": 0.6033, "step": 6582 }, { "epoch": 0.9818778432396151, "grad_norm": 2.330303907394409, "learning_rate": 1.708530962771793e-08, "loss": 0.7199, "step": 6583 }, { "epoch": 0.9820269967931986, "grad_norm": 1.5027215480804443, "learning_rate": 1.6804153557261615e-08, "loss": 0.6674, "step": 6584 }, { "epoch": 0.982176150346782, "grad_norm": 2.3196003437042236, "learning_rate": 1.652532812788987e-08, "loss": 0.7541, "step": 6585 }, { "epoch": 0.9823253039003654, "grad_norm": 1.259651780128479, "learning_rate": 1.6248833404692856e-08, "loss": 0.6985, "step": 6586 }, { "epoch": 0.9824744574539488, "grad_norm": 1.5488301515579224, "learning_rate": 1.597466945222337e-08, "loss": 0.6228, "step": 6587 }, { "epoch": 0.9826236110075323, "grad_norm": 1.8107165098190308, "learning_rate": 1.570283633448466e-08, "loss": 0.7111, "step": 6588 }, { "epoch": 0.9827727645611156, "grad_norm": 2.0559661388397217, "learning_rate": 1.5433334114938193e-08, "loss": 0.521, "step": 6589 }, { "epoch": 0.9829219181146991, "grad_norm": 1.4974714517593384, "learning_rate": 1.516616285650141e-08, "loss": 0.6171, "step": 6590 }, { "epoch": 0.9830710716682824, "grad_norm": 1.755441427230835, "learning_rate": 1.4901322621547753e-08, "loss": 0.6125, "step": 6591 }, { "epoch": 0.9832202252218659, "grad_norm": 1.7174835205078125, "learning_rate": 1.4638813471904435e-08, "loss": 0.6631, "step": 6592 }, { "epoch": 0.9833693787754493, "grad_norm": 1.8339409828186035, "learning_rate": 1.4378635468855762e-08, "loss": 0.613, "step": 6593 }, { "epoch": 0.9835185323290327, "grad_norm": 1.4531294107437134, "learning_rate": 1.412078867314426e-08, "loss": 0.6526, "step": 6594 }, { "epoch": 0.9836676858826161, "grad_norm": 1.362888216972351, "learning_rate": 1.3865273144963998e-08, "loss": 0.6763, "step": 6595 }, { "epoch": 0.9838168394361996, "grad_norm": 2.443516731262207, "learning_rate": 1.3612088943967262e-08, "loss": 0.6431, "step": 6596 }, { "epoch": 0.9839659929897829, "grad_norm": 0.99629807472229, "learning_rate": 1.3361236129261212e-08, "loss": 0.7128, "step": 6597 }, { "epoch": 0.9841151465433664, "grad_norm": 3.376962184906006, "learning_rate": 1.3112714759409006e-08, "loss": 0.6465, "step": 6598 }, { "epoch": 0.9842643000969498, "grad_norm": 1.3277277946472168, "learning_rate": 1.2866524892430898e-08, "loss": 0.7151, "step": 6599 }, { "epoch": 0.9844134536505332, "grad_norm": 1.4433153867721558, "learning_rate": 1.2622666585799803e-08, "loss": 0.6941, "step": 6600 }, { "epoch": 0.9845626072041166, "grad_norm": 1.8802790641784668, "learning_rate": 1.2381139896445737e-08, "loss": 0.7029, "step": 6601 }, { "epoch": 0.9847117607577001, "grad_norm": 1.4860526323318481, "learning_rate": 1.2141944880756928e-08, "loss": 0.6641, "step": 6602 }, { "epoch": 0.9848609143112834, "grad_norm": 1.3681905269622803, "learning_rate": 1.1905081594573153e-08, "loss": 0.6547, "step": 6603 }, { "epoch": 0.9850100678648669, "grad_norm": 1.2611932754516602, "learning_rate": 1.167055009319129e-08, "loss": 0.5681, "step": 6604 }, { "epoch": 0.9851592214184502, "grad_norm": 0.5331334471702576, "learning_rate": 1.143835043136643e-08, "loss": 0.2365, "step": 6605 }, { "epoch": 0.9853083749720337, "grad_norm": 1.2922743558883667, "learning_rate": 1.120848266330521e-08, "loss": 0.6412, "step": 6606 }, { "epoch": 0.9854575285256171, "grad_norm": 1.2165637016296387, "learning_rate": 1.098094684267137e-08, "loss": 0.6851, "step": 6607 }, { "epoch": 0.9856066820792005, "grad_norm": 1.691084861755371, "learning_rate": 1.0755743022585751e-08, "loss": 0.6818, "step": 6608 }, { "epoch": 0.9857558356327839, "grad_norm": 1.6440651416778564, "learning_rate": 1.0532871255624077e-08, "loss": 0.6023, "step": 6609 }, { "epoch": 0.9859049891863674, "grad_norm": 1.5611330270767212, "learning_rate": 1.0312331593815839e-08, "loss": 0.7473, "step": 6610 }, { "epoch": 0.9860541427399507, "grad_norm": 1.644195795059204, "learning_rate": 1.0094124088648739e-08, "loss": 0.6536, "step": 6611 }, { "epoch": 0.9862032962935342, "grad_norm": 1.4742316007614136, "learning_rate": 9.878248791063138e-09, "loss": 0.7051, "step": 6612 }, { "epoch": 0.9863524498471176, "grad_norm": 1.7448066473007202, "learning_rate": 9.664705751457615e-09, "loss": 0.7027, "step": 6613 }, { "epoch": 0.986501603400701, "grad_norm": 1.281140685081482, "learning_rate": 9.45349501968451e-09, "loss": 0.674, "step": 6614 }, { "epoch": 0.9866507569542844, "grad_norm": 1.4871649742126465, "learning_rate": 9.244616645053272e-09, "loss": 0.7036, "step": 6615 }, { "epoch": 0.9867999105078679, "grad_norm": 1.766883134841919, "learning_rate": 9.038070676328226e-09, "loss": 0.6375, "step": 6616 }, { "epoch": 0.9869490640614512, "grad_norm": 1.6624665260314941, "learning_rate": 8.833857161726355e-09, "loss": 0.6334, "step": 6617 }, { "epoch": 0.9870982176150347, "grad_norm": 1.7320311069488525, "learning_rate": 8.631976148925081e-09, "loss": 0.605, "step": 6618 }, { "epoch": 0.987247371168618, "grad_norm": 0.5957942008972168, "learning_rate": 8.432427685054479e-09, "loss": 0.2453, "step": 6619 }, { "epoch": 0.9873965247222015, "grad_norm": 1.4177342653274536, "learning_rate": 8.235211816699506e-09, "loss": 0.7121, "step": 6620 }, { "epoch": 0.9875456782757849, "grad_norm": 4.197437763214111, "learning_rate": 8.040328589901114e-09, "loss": 0.7629, "step": 6621 }, { "epoch": 0.9876948318293683, "grad_norm": 2.0037100315093994, "learning_rate": 7.847778050157351e-09, "loss": 0.6531, "step": 6622 }, { "epoch": 0.9878439853829517, "grad_norm": 0.5205103754997253, "learning_rate": 7.657560242420037e-09, "loss": 0.25, "step": 6623 }, { "epoch": 0.9879931389365352, "grad_norm": 0.5453811883926392, "learning_rate": 7.469675211096983e-09, "loss": 0.2604, "step": 6624 }, { "epoch": 0.9881422924901185, "grad_norm": 2.356492280960083, "learning_rate": 7.28412300004977e-09, "loss": 0.6871, "step": 6625 }, { "epoch": 0.988291446043702, "grad_norm": 2.3255279064178467, "learning_rate": 7.10090365259819e-09, "loss": 0.5565, "step": 6626 }, { "epoch": 0.9884405995972854, "grad_norm": 0.4984394609928131, "learning_rate": 6.920017211515806e-09, "loss": 0.2528, "step": 6627 }, { "epoch": 0.9885897531508688, "grad_norm": 1.2164465188980103, "learning_rate": 6.7414637190310605e-09, "loss": 0.7176, "step": 6628 }, { "epoch": 0.9887389067044522, "grad_norm": 2.3341827392578125, "learning_rate": 6.5652432168283875e-09, "loss": 0.6673, "step": 6629 }, { "epoch": 0.9888880602580357, "grad_norm": 1.4179353713989258, "learning_rate": 6.391355746048211e-09, "loss": 0.6432, "step": 6630 }, { "epoch": 0.989037213811619, "grad_norm": 1.5636992454528809, "learning_rate": 6.219801347285837e-09, "loss": 0.717, "step": 6631 }, { "epoch": 0.9891863673652025, "grad_norm": 1.8043196201324463, "learning_rate": 6.050580060590339e-09, "loss": 0.6517, "step": 6632 }, { "epoch": 0.9893355209187858, "grad_norm": 1.939574122428894, "learning_rate": 5.883691925469004e-09, "loss": 0.7094, "step": 6633 }, { "epoch": 0.9894846744723693, "grad_norm": 2.2525081634521484, "learning_rate": 5.719136980882889e-09, "loss": 0.5998, "step": 6634 }, { "epoch": 0.9896338280259527, "grad_norm": 3.223072052001953, "learning_rate": 5.556915265247931e-09, "loss": 0.6462, "step": 6635 }, { "epoch": 0.9897829815795361, "grad_norm": 1.4500138759613037, "learning_rate": 5.397026816434947e-09, "loss": 0.6716, "step": 6636 }, { "epoch": 0.9899321351331195, "grad_norm": 1.5132025480270386, "learning_rate": 5.239471671772967e-09, "loss": 0.565, "step": 6637 }, { "epoch": 0.990081288686703, "grad_norm": 1.6754741668701172, "learning_rate": 5.084249868042568e-09, "loss": 0.6527, "step": 6638 }, { "epoch": 0.9902304422402863, "grad_norm": 1.7496129274368286, "learning_rate": 4.93136144148143e-09, "loss": 0.6621, "step": 6639 }, { "epoch": 0.9903795957938698, "grad_norm": 1.3195013999938965, "learning_rate": 4.780806427783224e-09, "loss": 0.6688, "step": 6640 }, { "epoch": 0.9905287493474532, "grad_norm": 1.4069883823394775, "learning_rate": 4.632584862095391e-09, "loss": 0.7222, "step": 6641 }, { "epoch": 0.9906779029010366, "grad_norm": 1.9413018226623535, "learning_rate": 4.48669677902025e-09, "loss": 0.6375, "step": 6642 }, { "epoch": 0.99082705645462, "grad_norm": 1.8596904277801514, "learning_rate": 4.3431422126183344e-09, "loss": 0.7466, "step": 6643 }, { "epoch": 0.9909762100082035, "grad_norm": 1.4521260261535645, "learning_rate": 4.201921196402836e-09, "loss": 0.6855, "step": 6644 }, { "epoch": 0.9911253635617868, "grad_norm": 1.3038419485092163, "learning_rate": 4.063033763341828e-09, "loss": 0.6685, "step": 6645 }, { "epoch": 0.9912745171153703, "grad_norm": 0.545604944229126, "learning_rate": 3.9264799458593736e-09, "loss": 0.2527, "step": 6646 }, { "epoch": 0.9914236706689536, "grad_norm": 1.1618077754974365, "learning_rate": 3.7922597758355275e-09, "loss": 0.667, "step": 6647 }, { "epoch": 0.9915728242225371, "grad_norm": 1.980218768119812, "learning_rate": 3.660373284605223e-09, "loss": 0.6005, "step": 6648 }, { "epoch": 0.9917219777761205, "grad_norm": 1.6801865100860596, "learning_rate": 3.5308205029571663e-09, "loss": 0.7403, "step": 6649 }, { "epoch": 0.991871131329704, "grad_norm": 1.6387490034103394, "learning_rate": 3.4036014611371624e-09, "loss": 0.7151, "step": 6650 }, { "epoch": 0.9920202848832873, "grad_norm": 1.6068006753921509, "learning_rate": 3.2787161888447883e-09, "loss": 0.6238, "step": 6651 }, { "epoch": 0.9921694384368708, "grad_norm": 2.0280652046203613, "learning_rate": 3.15616471523561e-09, "loss": 0.6918, "step": 6652 }, { "epoch": 0.9923185919904541, "grad_norm": 1.4479981660842896, "learning_rate": 3.035947068920075e-09, "loss": 0.7176, "step": 6653 }, { "epoch": 0.9924677455440376, "grad_norm": 1.3140614032745361, "learning_rate": 2.9180632779624017e-09, "loss": 0.6095, "step": 6654 }, { "epoch": 0.992616899097621, "grad_norm": 1.1769652366638184, "learning_rate": 2.8025133698861282e-09, "loss": 0.6327, "step": 6655 }, { "epoch": 0.9927660526512044, "grad_norm": 2.4316213130950928, "learning_rate": 2.6892973716641235e-09, "loss": 0.6115, "step": 6656 }, { "epoch": 0.9929152062047878, "grad_norm": 1.4997793436050415, "learning_rate": 2.578415309729687e-09, "loss": 0.6182, "step": 6657 }, { "epoch": 0.9930643597583713, "grad_norm": 1.3803917169570923, "learning_rate": 2.469867209967669e-09, "loss": 0.7027, "step": 6658 }, { "epoch": 0.9932135133119546, "grad_norm": 3.3067567348480225, "learning_rate": 2.36365309772002e-09, "loss": 0.6395, "step": 6659 }, { "epoch": 0.9933626668655381, "grad_norm": 1.824347972869873, "learning_rate": 2.259772997782461e-09, "loss": 0.6312, "step": 6660 }, { "epoch": 0.9935118204191214, "grad_norm": 1.6259453296661377, "learning_rate": 2.1582269344067043e-09, "loss": 0.6907, "step": 6661 }, { "epoch": 0.9936609739727049, "grad_norm": 1.2530416250228882, "learning_rate": 2.0590149312993412e-09, "loss": 0.6929, "step": 6662 }, { "epoch": 0.9938101275262883, "grad_norm": 1.469118595123291, "learning_rate": 1.9621370116218453e-09, "loss": 0.6482, "step": 6663 }, { "epoch": 0.9939592810798717, "grad_norm": 1.6492596864700317, "learning_rate": 1.8675931979916794e-09, "loss": 0.6524, "step": 6664 }, { "epoch": 0.9941084346334551, "grad_norm": 1.5472605228424072, "learning_rate": 1.7753835124800778e-09, "loss": 0.7266, "step": 6665 }, { "epoch": 0.9942575881870386, "grad_norm": 1.3061776161193848, "learning_rate": 1.6855079766142646e-09, "loss": 0.6285, "step": 6666 }, { "epoch": 0.9944067417406219, "grad_norm": 1.5922590494155884, "learning_rate": 1.5979666113763448e-09, "loss": 0.6565, "step": 6667 }, { "epoch": 0.9945558952942054, "grad_norm": 1.1843360662460327, "learning_rate": 1.5127594372033038e-09, "loss": 0.6358, "step": 6668 }, { "epoch": 0.9947050488477888, "grad_norm": 1.4402717351913452, "learning_rate": 1.4298864739870078e-09, "loss": 0.754, "step": 6669 }, { "epoch": 0.9948542024013722, "grad_norm": 1.7544065713882446, "learning_rate": 1.349347741075313e-09, "loss": 0.643, "step": 6670 }, { "epoch": 0.9950033559549556, "grad_norm": 1.629891037940979, "learning_rate": 1.2711432572698468e-09, "loss": 0.6354, "step": 6671 }, { "epoch": 0.9951525095085391, "grad_norm": 1.4190497398376465, "learning_rate": 1.1952730408282264e-09, "loss": 0.6807, "step": 6672 }, { "epoch": 0.9953016630621224, "grad_norm": 2.27390193939209, "learning_rate": 1.12173710946184e-09, "loss": 0.6922, "step": 6673 }, { "epoch": 0.9954508166157059, "grad_norm": 1.2268120050430298, "learning_rate": 1.0505354803402867e-09, "loss": 0.7164, "step": 6674 }, { "epoch": 0.9955999701692893, "grad_norm": 3.6840908527374268, "learning_rate": 9.816681700847152e-10, "loss": 0.6841, "step": 6675 }, { "epoch": 0.9957491237228727, "grad_norm": 1.5584170818328857, "learning_rate": 9.151351947722653e-10, "loss": 0.71, "step": 6676 }, { "epoch": 0.9958982772764561, "grad_norm": 1.517064094543457, "learning_rate": 8.509365699360672e-10, "loss": 0.599, "step": 6677 }, { "epoch": 0.9960474308300395, "grad_norm": 1.2011682987213135, "learning_rate": 7.890723105641318e-10, "loss": 0.7318, "step": 6678 }, { "epoch": 0.9961965843836229, "grad_norm": 1.6266292333602905, "learning_rate": 7.295424310982402e-10, "loss": 0.6534, "step": 6679 }, { "epoch": 0.9963457379372064, "grad_norm": 1.8771405220031738, "learning_rate": 6.723469454372744e-10, "loss": 0.642, "step": 6680 }, { "epoch": 0.9964948914907897, "grad_norm": 1.2881919145584106, "learning_rate": 6.174858669316664e-10, "loss": 0.6493, "step": 6681 }, { "epoch": 0.9966440450443732, "grad_norm": 2.2868926525115967, "learning_rate": 5.649592083911693e-10, "loss": 0.726, "step": 6682 }, { "epoch": 0.9967931985979566, "grad_norm": 1.6260236501693726, "learning_rate": 5.147669820770861e-10, "loss": 0.7144, "step": 6683 }, { "epoch": 0.99694235215154, "grad_norm": 1.422614574432373, "learning_rate": 4.669091997078212e-10, "loss": 0.5898, "step": 6684 }, { "epoch": 0.9970915057051234, "grad_norm": 3.1014745235443115, "learning_rate": 4.2138587245665883e-10, "loss": 0.6583, "step": 6685 }, { "epoch": 0.9972406592587069, "grad_norm": 1.6508026123046875, "learning_rate": 3.7819701094954365e-10, "loss": 0.6446, "step": 6686 }, { "epoch": 0.9973898128122902, "grad_norm": 1.2129007577896118, "learning_rate": 3.373426252706313e-10, "loss": 0.6888, "step": 6687 }, { "epoch": 0.9975389663658737, "grad_norm": 1.2705974578857422, "learning_rate": 2.988227249578479e-10, "loss": 0.6831, "step": 6688 }, { "epoch": 0.997688119919457, "grad_norm": 1.5055255889892578, "learning_rate": 2.626373190028897e-10, "loss": 0.6216, "step": 6689 }, { "epoch": 0.9978372734730405, "grad_norm": 1.5076733827590942, "learning_rate": 2.2878641585455385e-10, "loss": 0.5963, "step": 6690 }, { "epoch": 0.9979864270266239, "grad_norm": 1.7773542404174805, "learning_rate": 1.9727002341429768e-10, "loss": 0.5963, "step": 6691 }, { "epoch": 0.9981355805802073, "grad_norm": 1.4263250827789307, "learning_rate": 1.680881490406794e-10, "loss": 0.6531, "step": 6692 }, { "epoch": 0.9982847341337907, "grad_norm": 1.3878896236419678, "learning_rate": 1.4124079954602743e-10, "loss": 0.6849, "step": 6693 }, { "epoch": 0.9984338876873742, "grad_norm": 1.9181843996047974, "learning_rate": 1.167279811975508e-10, "loss": 0.6147, "step": 6694 }, { "epoch": 0.9985830412409575, "grad_norm": 1.5627009868621826, "learning_rate": 9.454969971955941e-11, "loss": 0.642, "step": 6695 }, { "epoch": 0.998732194794541, "grad_norm": 1.3943243026733398, "learning_rate": 7.470596028902321e-11, "loss": 0.6427, "step": 6696 }, { "epoch": 0.9988813483481244, "grad_norm": 2.0661191940307617, "learning_rate": 5.719676753668246e-11, "loss": 0.6677, "step": 6697 }, { "epoch": 0.9990305019017078, "grad_norm": 1.6954338550567627, "learning_rate": 4.202212555259877e-11, "loss": 0.6224, "step": 6698 }, { "epoch": 0.9991796554552912, "grad_norm": 1.3393056392669678, "learning_rate": 2.9182037879493805e-11, "loss": 0.689, "step": 6699 }, { "epoch": 0.9993288090088747, "grad_norm": 1.2595808506011963, "learning_rate": 1.8676507512749297e-11, "loss": 0.6593, "step": 6700 }, { "epoch": 0.999477962562458, "grad_norm": 1.245774507522583, "learning_rate": 1.0505536907068347e-11, "loss": 0.6502, "step": 6701 }, { "epoch": 0.9996271161160415, "grad_norm": 3.1390316486358643, "learning_rate": 4.6691279687038905e-12, "loss": 0.6525, "step": 6702 }, { "epoch": 0.9997762696696249, "grad_norm": 1.4007090330123901, "learning_rate": 1.167282059899577e-12, "loss": 0.6054, "step": 6703 }, { "epoch": 0.9999254232232083, "grad_norm": 1.548366904258728, "learning_rate": 0.0, "loss": 0.6333, "step": 6704 }, { "epoch": 0.9999254232232083, "step": 6704, "total_flos": 3.3460776513300333e+19, "train_loss": 0.7159054304377525, "train_runtime": 85273.2052, "train_samples_per_second": 10.064, "train_steps_per_second": 0.079 } ], "logging_steps": 1.0, "max_steps": 6704, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 3.3460776513300333e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }