{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9941646855413875,
  "eval_steps": 500,
  "global_step": 4600,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0002161227577263886,
      "grad_norm": 0.7498325109481812,
      "learning_rate": 2.877697841726619e-07,
      "loss": 1.6207,
      "step": 1
    },
    {
      "epoch": 0.0004322455154527772,
      "grad_norm": 0.6986605525016785,
      "learning_rate": 5.755395683453238e-07,
      "loss": 1.6011,
      "step": 2
    },
    {
      "epoch": 0.0006483682731791658,
      "grad_norm": 0.7146493792533875,
      "learning_rate": 8.633093525179857e-07,
      "loss": 1.5284,
      "step": 3
    },
    {
      "epoch": 0.0008644910309055544,
      "grad_norm": 0.8592181205749512,
      "learning_rate": 1.1510791366906476e-06,
      "loss": 1.5683,
      "step": 4
    },
    {
      "epoch": 0.001080613788631943,
      "grad_norm": 0.6243698000907898,
      "learning_rate": 1.4388489208633094e-06,
      "loss": 1.4792,
      "step": 5
    },
    {
      "epoch": 0.0012967365463583316,
      "grad_norm": 0.7416863441467285,
      "learning_rate": 1.7266187050359715e-06,
      "loss": 1.5804,
      "step": 6
    },
    {
      "epoch": 0.0015128593040847202,
      "grad_norm": 0.510182797908783,
      "learning_rate": 2.0143884892086333e-06,
      "loss": 1.6262,
      "step": 7
    },
    {
      "epoch": 0.0017289820618111088,
      "grad_norm": 0.7753154635429382,
      "learning_rate": 2.302158273381295e-06,
      "loss": 1.5323,
      "step": 8
    },
    {
      "epoch": 0.0019451048195374973,
      "grad_norm": 0.5771347284317017,
      "learning_rate": 2.589928057553957e-06,
      "loss": 1.4445,
      "step": 9
    },
    {
      "epoch": 0.002161227577263886,
      "grad_norm": 0.7808551788330078,
      "learning_rate": 2.877697841726619e-06,
      "loss": 1.6863,
      "step": 10
    },
    {
      "epoch": 0.0023773503349902745,
      "grad_norm": 0.7155261039733887,
      "learning_rate": 3.1654676258992807e-06,
      "loss": 1.505,
      "step": 11
    },
    {
      "epoch": 0.002593473092716663,
      "grad_norm": 0.7685397863388062,
      "learning_rate": 3.453237410071943e-06,
      "loss": 1.683,
      "step": 12
    },
    {
      "epoch": 0.0028095958504430517,
      "grad_norm": 0.6781991720199585,
      "learning_rate": 3.741007194244605e-06,
      "loss": 1.5397,
      "step": 13
    },
    {
      "epoch": 0.0030257186081694403,
      "grad_norm": 0.6643094420433044,
      "learning_rate": 4.028776978417267e-06,
      "loss": 1.3852,
      "step": 14
    },
    {
      "epoch": 0.003241841365895829,
      "grad_norm": 0.5720023512840271,
      "learning_rate": 4.316546762589928e-06,
      "loss": 1.3075,
      "step": 15
    },
    {
      "epoch": 0.0034579641236222175,
      "grad_norm": 0.6695529222488403,
      "learning_rate": 4.60431654676259e-06,
      "loss": 1.4687,
      "step": 16
    },
    {
      "epoch": 0.003674086881348606,
      "grad_norm": 0.8202911615371704,
      "learning_rate": 4.892086330935253e-06,
      "loss": 1.6027,
      "step": 17
    },
    {
      "epoch": 0.0038902096390749947,
      "grad_norm": 0.7854058146476746,
      "learning_rate": 5.179856115107914e-06,
      "loss": 1.4183,
      "step": 18
    },
    {
      "epoch": 0.004106332396801383,
      "grad_norm": 0.7073310613632202,
      "learning_rate": 5.467625899280576e-06,
      "loss": 1.545,
      "step": 19
    },
    {
      "epoch": 0.004322455154527772,
      "grad_norm": 0.7040787935256958,
      "learning_rate": 5.755395683453238e-06,
      "loss": 1.6758,
      "step": 20
    },
    {
      "epoch": 0.0045385779122541605,
      "grad_norm": 0.6893643140792847,
      "learning_rate": 6.0431654676259e-06,
      "loss": 1.4593,
      "step": 21
    },
    {
      "epoch": 0.004754700669980549,
      "grad_norm": 0.751395583152771,
      "learning_rate": 6.330935251798561e-06,
      "loss": 1.5057,
      "step": 22
    },
    {
      "epoch": 0.004970823427706938,
      "grad_norm": 0.8289034962654114,
      "learning_rate": 6.618705035971224e-06,
      "loss": 1.5668,
      "step": 23
    },
    {
      "epoch": 0.005186946185433326,
      "grad_norm": 0.6555783748626709,
      "learning_rate": 6.906474820143886e-06,
      "loss": 1.5488,
      "step": 24
    },
    {
      "epoch": 0.005403068943159715,
      "grad_norm": 0.789090096950531,
      "learning_rate": 7.194244604316547e-06,
      "loss": 1.4694,
      "step": 25
    },
    {
      "epoch": 0.0056191917008861034,
      "grad_norm": 0.7591240406036377,
      "learning_rate": 7.48201438848921e-06,
      "loss": 1.6212,
      "step": 26
    },
    {
      "epoch": 0.005835314458612492,
      "grad_norm": 0.6710547208786011,
      "learning_rate": 7.769784172661872e-06,
      "loss": 1.39,
      "step": 27
    },
    {
      "epoch": 0.006051437216338881,
      "grad_norm": 0.6780836582183838,
      "learning_rate": 8.057553956834533e-06,
      "loss": 1.3066,
      "step": 28
    },
    {
      "epoch": 0.006267559974065269,
      "grad_norm": 0.75581955909729,
      "learning_rate": 8.345323741007195e-06,
      "loss": 1.6376,
      "step": 29
    },
    {
      "epoch": 0.006483682731791658,
      "grad_norm": 0.7257469892501831,
      "learning_rate": 8.633093525179856e-06,
      "loss": 1.6985,
      "step": 30
    },
    {
      "epoch": 0.006699805489518046,
      "grad_norm": 0.8537693023681641,
      "learning_rate": 8.92086330935252e-06,
      "loss": 1.4547,
      "step": 31
    },
    {
      "epoch": 0.006915928247244435,
      "grad_norm": 0.7649074196815491,
      "learning_rate": 9.20863309352518e-06,
      "loss": 1.7011,
      "step": 32
    },
    {
      "epoch": 0.007132051004970824,
      "grad_norm": 0.6618505716323853,
      "learning_rate": 9.496402877697842e-06,
      "loss": 1.4064,
      "step": 33
    },
    {
      "epoch": 0.007348173762697212,
      "grad_norm": 0.8622448444366455,
      "learning_rate": 9.784172661870505e-06,
      "loss": 1.3952,
      "step": 34
    },
    {
      "epoch": 0.007564296520423601,
      "grad_norm": 0.8482533097267151,
      "learning_rate": 1.0071942446043167e-05,
      "loss": 1.3496,
      "step": 35
    },
    {
      "epoch": 0.007780419278149989,
      "grad_norm": 0.7652527093887329,
      "learning_rate": 1.0359712230215828e-05,
      "loss": 1.4609,
      "step": 36
    },
    {
      "epoch": 0.007996542035876377,
      "grad_norm": 0.7646381855010986,
      "learning_rate": 1.0647482014388491e-05,
      "loss": 1.3516,
      "step": 37
    },
    {
      "epoch": 0.008212664793602767,
      "grad_norm": 0.8664438724517822,
      "learning_rate": 1.0935251798561153e-05,
      "loss": 1.2758,
      "step": 38
    },
    {
      "epoch": 0.008428787551329154,
      "grad_norm": 0.6816773414611816,
      "learning_rate": 1.1223021582733812e-05,
      "loss": 1.2794,
      "step": 39
    },
    {
      "epoch": 0.008644910309055544,
      "grad_norm": 0.7363021373748779,
      "learning_rate": 1.1510791366906475e-05,
      "loss": 1.3211,
      "step": 40
    },
    {
      "epoch": 0.008861033066781931,
      "grad_norm": 0.6895145177841187,
      "learning_rate": 1.1798561151079137e-05,
      "loss": 1.3894,
      "step": 41
    },
    {
      "epoch": 0.009077155824508321,
      "grad_norm": 0.6489687561988831,
      "learning_rate": 1.20863309352518e-05,
      "loss": 1.3483,
      "step": 42
    },
    {
      "epoch": 0.009293278582234709,
      "grad_norm": 0.9342665076255798,
      "learning_rate": 1.2374100719424463e-05,
      "loss": 1.3172,
      "step": 43
    },
    {
      "epoch": 0.009509401339961098,
      "grad_norm": 0.9434204697608948,
      "learning_rate": 1.2661870503597123e-05,
      "loss": 1.3531,
      "step": 44
    },
    {
      "epoch": 0.009725524097687486,
      "grad_norm": 0.7507467865943909,
      "learning_rate": 1.2949640287769784e-05,
      "loss": 1.4414,
      "step": 45
    },
    {
      "epoch": 0.009941646855413875,
      "grad_norm": 0.8019313812255859,
      "learning_rate": 1.3237410071942447e-05,
      "loss": 1.449,
      "step": 46
    },
    {
      "epoch": 0.010157769613140263,
      "grad_norm": 0.725066065788269,
      "learning_rate": 1.3525179856115109e-05,
      "loss": 1.2374,
      "step": 47
    },
    {
      "epoch": 0.010373892370866653,
      "grad_norm": 0.8231765627861023,
      "learning_rate": 1.3812949640287772e-05,
      "loss": 1.4651,
      "step": 48
    },
    {
      "epoch": 0.01059001512859304,
      "grad_norm": 0.7059705853462219,
      "learning_rate": 1.4100719424460432e-05,
      "loss": 1.3966,
      "step": 49
    },
    {
      "epoch": 0.01080613788631943,
      "grad_norm": 0.8703414797782898,
      "learning_rate": 1.4388489208633095e-05,
      "loss": 1.0098,
      "step": 50
    },
    {
      "epoch": 0.011022260644045817,
      "grad_norm": 0.9461300373077393,
      "learning_rate": 1.4676258992805756e-05,
      "loss": 1.3343,
      "step": 51
    },
    {
      "epoch": 0.011238383401772207,
      "grad_norm": 0.8059316277503967,
      "learning_rate": 1.496402877697842e-05,
      "loss": 1.4206,
      "step": 52
    },
    {
      "epoch": 0.011454506159498595,
      "grad_norm": 0.8721457123756409,
      "learning_rate": 1.525179856115108e-05,
      "loss": 1.3473,
      "step": 53
    },
    {
      "epoch": 0.011670628917224984,
      "grad_norm": 0.7327573299407959,
      "learning_rate": 1.5539568345323744e-05,
      "loss": 1.38,
      "step": 54
    },
    {
      "epoch": 0.011886751674951372,
      "grad_norm": 0.792658805847168,
      "learning_rate": 1.5827338129496403e-05,
      "loss": 1.2197,
      "step": 55
    },
    {
      "epoch": 0.012102874432677761,
      "grad_norm": 0.746330976486206,
      "learning_rate": 1.6115107913669067e-05,
      "loss": 1.1263,
      "step": 56
    },
    {
      "epoch": 0.012318997190404149,
      "grad_norm": 0.6460121870040894,
      "learning_rate": 1.640287769784173e-05,
      "loss": 1.2812,
      "step": 57
    },
    {
      "epoch": 0.012535119948130538,
      "grad_norm": 0.8302263617515564,
      "learning_rate": 1.669064748201439e-05,
      "loss": 1.1794,
      "step": 58
    },
    {
      "epoch": 0.012751242705856926,
      "grad_norm": 0.8536649346351624,
      "learning_rate": 1.6978417266187053e-05,
      "loss": 1.237,
      "step": 59
    },
    {
      "epoch": 0.012967365463583316,
      "grad_norm": 0.7866637706756592,
      "learning_rate": 1.7266187050359712e-05,
      "loss": 1.3387,
      "step": 60
    },
    {
      "epoch": 0.013183488221309703,
      "grad_norm": 0.7334522008895874,
      "learning_rate": 1.7553956834532375e-05,
      "loss": 1.1289,
      "step": 61
    },
    {
      "epoch": 0.013399610979036093,
      "grad_norm": 0.8383240699768066,
      "learning_rate": 1.784172661870504e-05,
      "loss": 1.3147,
      "step": 62
    },
    {
      "epoch": 0.01361573373676248,
      "grad_norm": 0.8260761499404907,
      "learning_rate": 1.8129496402877698e-05,
      "loss": 1.0473,
      "step": 63
    },
    {
      "epoch": 0.01383185649448887,
      "grad_norm": 0.8201937675476074,
      "learning_rate": 1.841726618705036e-05,
      "loss": 1.3302,
      "step": 64
    },
    {
      "epoch": 0.014047979252215258,
      "grad_norm": 0.7809607982635498,
      "learning_rate": 1.870503597122302e-05,
      "loss": 0.9791,
      "step": 65
    },
    {
      "epoch": 0.014264102009941647,
      "grad_norm": 0.9497784972190857,
      "learning_rate": 1.8992805755395684e-05,
      "loss": 1.1998,
      "step": 66
    },
    {
      "epoch": 0.014480224767668035,
      "grad_norm": 0.8034746050834656,
      "learning_rate": 1.9280575539568347e-05,
      "loss": 1.1849,
      "step": 67
    },
    {
      "epoch": 0.014696347525394424,
      "grad_norm": 0.8192304372787476,
      "learning_rate": 1.956834532374101e-05,
      "loss": 1.1532,
      "step": 68
    },
    {
      "epoch": 0.014912470283120812,
      "grad_norm": 0.8304838538169861,
      "learning_rate": 1.985611510791367e-05,
      "loss": 1.2936,
      "step": 69
    },
    {
      "epoch": 0.015128593040847202,
      "grad_norm": 0.932195246219635,
      "learning_rate": 2.0143884892086333e-05,
      "loss": 1.165,
      "step": 70
    },
    {
      "epoch": 0.01534471579857359,
      "grad_norm": 0.9050421118736267,
      "learning_rate": 2.0431654676258996e-05,
      "loss": 1.1442,
      "step": 71
    },
    {
      "epoch": 0.015560838556299979,
      "grad_norm": 0.7831349968910217,
      "learning_rate": 2.0719424460431656e-05,
      "loss": 0.9148,
      "step": 72
    },
    {
      "epoch": 0.015776961314026366,
      "grad_norm": 0.8111123442649841,
      "learning_rate": 2.1007194244604316e-05,
      "loss": 1.2934,
      "step": 73
    },
    {
      "epoch": 0.015993084071752754,
      "grad_norm": 1.0379811525344849,
      "learning_rate": 2.1294964028776982e-05,
      "loss": 1.3094,
      "step": 74
    },
    {
      "epoch": 0.016209206829479145,
      "grad_norm": 0.9090933799743652,
      "learning_rate": 2.1582733812949642e-05,
      "loss": 1.1518,
      "step": 75
    },
    {
      "epoch": 0.016425329587205533,
      "grad_norm": 0.8984608054161072,
      "learning_rate": 2.1870503597122305e-05,
      "loss": 1.2224,
      "step": 76
    },
    {
      "epoch": 0.01664145234493192,
      "grad_norm": 0.9421352744102478,
      "learning_rate": 2.2158273381294965e-05,
      "loss": 1.0569,
      "step": 77
    },
    {
      "epoch": 0.01685757510265831,
      "grad_norm": 1.1250900030136108,
      "learning_rate": 2.2446043165467625e-05,
      "loss": 1.1747,
      "step": 78
    },
    {
      "epoch": 0.0170736978603847,
      "grad_norm": 0.9288277626037598,
      "learning_rate": 2.273381294964029e-05,
      "loss": 1.0783,
      "step": 79
    },
    {
      "epoch": 0.017289820618111088,
      "grad_norm": 0.8962733149528503,
      "learning_rate": 2.302158273381295e-05,
      "loss": 1.0767,
      "step": 80
    },
    {
      "epoch": 0.017505943375837475,
      "grad_norm": 0.8960233926773071,
      "learning_rate": 2.3309352517985614e-05,
      "loss": 1.1613,
      "step": 81
    },
    {
      "epoch": 0.017722066133563863,
      "grad_norm": 0.9017160534858704,
      "learning_rate": 2.3597122302158274e-05,
      "loss": 1.2361,
      "step": 82
    },
    {
      "epoch": 0.017938188891290254,
      "grad_norm": 0.7587832808494568,
      "learning_rate": 2.3884892086330937e-05,
      "loss": 1.1694,
      "step": 83
    },
    {
      "epoch": 0.018154311649016642,
      "grad_norm": 0.8145380616188049,
      "learning_rate": 2.41726618705036e-05,
      "loss": 1.063,
      "step": 84
    },
    {
      "epoch": 0.01837043440674303,
      "grad_norm": 0.863741397857666,
      "learning_rate": 2.446043165467626e-05,
      "loss": 0.9647,
      "step": 85
    },
    {
      "epoch": 0.018586557164469417,
      "grad_norm": 0.8460249304771423,
      "learning_rate": 2.4748201438848926e-05,
      "loss": 1.1474,
      "step": 86
    },
    {
      "epoch": 0.01880267992219581,
      "grad_norm": 0.8994437456130981,
      "learning_rate": 2.5035971223021586e-05,
      "loss": 1.0045,
      "step": 87
    },
    {
      "epoch": 0.019018802679922196,
      "grad_norm": 0.8365037441253662,
      "learning_rate": 2.5323741007194246e-05,
      "loss": 1.0582,
      "step": 88
    },
    {
      "epoch": 0.019234925437648584,
      "grad_norm": 1.2148154973983765,
      "learning_rate": 2.561151079136691e-05,
      "loss": 0.9929,
      "step": 89
    },
    {
      "epoch": 0.01945104819537497,
      "grad_norm": 1.068932056427002,
      "learning_rate": 2.589928057553957e-05,
      "loss": 1.0284,
      "step": 90
    },
    {
      "epoch": 0.019667170953101363,
      "grad_norm": 0.9524146318435669,
      "learning_rate": 2.6187050359712235e-05,
      "loss": 0.9298,
      "step": 91
    },
    {
      "epoch": 0.01988329371082775,
      "grad_norm": 0.9831689596176147,
      "learning_rate": 2.6474820143884895e-05,
      "loss": 0.891,
      "step": 92
    },
    {
      "epoch": 0.02009941646855414,
      "grad_norm": 0.8392059803009033,
      "learning_rate": 2.6762589928057554e-05,
      "loss": 0.7688,
      "step": 93
    },
    {
      "epoch": 0.020315539226280526,
      "grad_norm": 0.9849358797073364,
      "learning_rate": 2.7050359712230217e-05,
      "loss": 1.166,
      "step": 94
    },
    {
      "epoch": 0.020531661984006917,
      "grad_norm": 0.8717548847198486,
      "learning_rate": 2.7338129496402877e-05,
      "loss": 1.0336,
      "step": 95
    },
    {
      "epoch": 0.020747784741733305,
      "grad_norm": 0.9101350903511047,
      "learning_rate": 2.7625899280575544e-05,
      "loss": 1.0679,
      "step": 96
    },
    {
      "epoch": 0.020963907499459693,
      "grad_norm": 0.8745354413986206,
      "learning_rate": 2.7913669064748203e-05,
      "loss": 1.1726,
      "step": 97
    },
    {
      "epoch": 0.02118003025718608,
      "grad_norm": 0.8927585482597351,
      "learning_rate": 2.8201438848920863e-05,
      "loss": 1.15,
      "step": 98
    },
    {
      "epoch": 0.02139615301491247,
      "grad_norm": 0.888699471950531,
      "learning_rate": 2.848920863309353e-05,
      "loss": 0.9032,
      "step": 99
    },
    {
      "epoch": 0.02161227577263886,
      "grad_norm": 0.9794796705245972,
      "learning_rate": 2.877697841726619e-05,
      "loss": 1.1491,
      "step": 100
    },
    {
      "epoch": 0.021828398530365247,
      "grad_norm": 0.8439841270446777,
      "learning_rate": 2.9064748201438852e-05,
      "loss": 1.1291,
      "step": 101
    },
    {
      "epoch": 0.022044521288091635,
      "grad_norm": 0.9384711980819702,
      "learning_rate": 2.9352517985611512e-05,
      "loss": 0.9793,
      "step": 102
    },
    {
      "epoch": 0.022260644045818026,
      "grad_norm": 0.8711330890655518,
      "learning_rate": 2.9640287769784172e-05,
      "loss": 1.0932,
      "step": 103
    },
    {
      "epoch": 0.022476766803544414,
      "grad_norm": 1.1253970861434937,
      "learning_rate": 2.992805755395684e-05,
      "loss": 1.118,
      "step": 104
    },
    {
      "epoch": 0.0226928895612708,
      "grad_norm": 0.8921772837638855,
      "learning_rate": 3.0215827338129498e-05,
      "loss": 0.8669,
      "step": 105
    },
    {
      "epoch": 0.02290901231899719,
      "grad_norm": 0.9359428882598877,
      "learning_rate": 3.050359712230216e-05,
      "loss": 1.0879,
      "step": 106
    },
    {
      "epoch": 0.02312513507672358,
      "grad_norm": 0.9697002172470093,
      "learning_rate": 3.0791366906474824e-05,
      "loss": 1.064,
      "step": 107
    },
    {
      "epoch": 0.023341257834449968,
      "grad_norm": 0.9988358616828918,
      "learning_rate": 3.107913669064749e-05,
      "loss": 1.0661,
      "step": 108
    },
    {
      "epoch": 0.023557380592176356,
      "grad_norm": 1.1203874349594116,
      "learning_rate": 3.1366906474820144e-05,
      "loss": 1.1429,
      "step": 109
    },
    {
      "epoch": 0.023773503349902744,
      "grad_norm": 0.9657348990440369,
      "learning_rate": 3.165467625899281e-05,
      "loss": 1.2262,
      "step": 110
    },
    {
      "epoch": 0.023989626107629135,
      "grad_norm": 0.7940213084220886,
      "learning_rate": 3.194244604316547e-05,
      "loss": 0.9388,
      "step": 111
    },
    {
      "epoch": 0.024205748865355523,
      "grad_norm": 0.9472448229789734,
      "learning_rate": 3.223021582733813e-05,
      "loss": 1.1237,
      "step": 112
    },
    {
      "epoch": 0.02442187162308191,
      "grad_norm": 0.9700066447257996,
      "learning_rate": 3.2517985611510796e-05,
      "loss": 1.2015,
      "step": 113
    },
    {
      "epoch": 0.024637994380808298,
      "grad_norm": 1.0106992721557617,
      "learning_rate": 3.280575539568346e-05,
      "loss": 1.1162,
      "step": 114
    },
    {
      "epoch": 0.02485411713853469,
      "grad_norm": 1.0185657739639282,
      "learning_rate": 3.3093525179856116e-05,
      "loss": 1.0547,
      "step": 115
    },
    {
      "epoch": 0.025070239896261077,
      "grad_norm": 1.0063570737838745,
      "learning_rate": 3.338129496402878e-05,
      "loss": 1.0538,
      "step": 116
    },
    {
      "epoch": 0.025286362653987465,
      "grad_norm": 0.9898773431777954,
      "learning_rate": 3.366906474820144e-05,
      "loss": 1.1855,
      "step": 117
    },
    {
      "epoch": 0.025502485411713852,
      "grad_norm": 0.8569101095199585,
      "learning_rate": 3.3956834532374105e-05,
      "loss": 0.9859,
      "step": 118
    },
    {
      "epoch": 0.025718608169440244,
      "grad_norm": 0.8109889030456543,
      "learning_rate": 3.424460431654677e-05,
      "loss": 0.9956,
      "step": 119
    },
    {
      "epoch": 0.02593473092716663,
      "grad_norm": 0.9044827818870544,
      "learning_rate": 3.4532374100719424e-05,
      "loss": 1.1331,
      "step": 120
    },
    {
      "epoch": 0.02615085368489302,
      "grad_norm": 1.191148042678833,
      "learning_rate": 3.482014388489209e-05,
      "loss": 1.0748,
      "step": 121
    },
    {
      "epoch": 0.026366976442619407,
      "grad_norm": 1.043940544128418,
      "learning_rate": 3.510791366906475e-05,
      "loss": 1.1331,
      "step": 122
    },
    {
      "epoch": 0.026583099200345798,
      "grad_norm": 0.8819486498832703,
      "learning_rate": 3.5395683453237414e-05,
      "loss": 1.0659,
      "step": 123
    },
    {
      "epoch": 0.026799221958072186,
      "grad_norm": 0.9887312650680542,
      "learning_rate": 3.568345323741008e-05,
      "loss": 1.1115,
      "step": 124
    },
    {
      "epoch": 0.027015344715798573,
      "grad_norm": 0.9711753726005554,
      "learning_rate": 3.597122302158273e-05,
      "loss": 1.1175,
      "step": 125
    },
    {
      "epoch": 0.02723146747352496,
      "grad_norm": 1.0508410930633545,
      "learning_rate": 3.6258992805755396e-05,
      "loss": 1.1413,
      "step": 126
    },
    {
      "epoch": 0.027447590231251352,
      "grad_norm": 0.9886671304702759,
      "learning_rate": 3.654676258992806e-05,
      "loss": 0.9326,
      "step": 127
    },
    {
      "epoch": 0.02766371298897774,
      "grad_norm": 0.9078131318092346,
      "learning_rate": 3.683453237410072e-05,
      "loss": 1.1397,
      "step": 128
    },
    {
      "epoch": 0.027879835746704128,
      "grad_norm": 1.1544677019119263,
      "learning_rate": 3.7122302158273386e-05,
      "loss": 0.9968,
      "step": 129
    },
    {
      "epoch": 0.028095958504430515,
      "grad_norm": 0.9644024968147278,
      "learning_rate": 3.741007194244604e-05,
      "loss": 1.1007,
      "step": 130
    },
    {
      "epoch": 0.028312081262156907,
      "grad_norm": 0.7962532043457031,
      "learning_rate": 3.769784172661871e-05,
      "loss": 0.9139,
      "step": 131
    },
    {
      "epoch": 0.028528204019883294,
      "grad_norm": 0.9570571780204773,
      "learning_rate": 3.798561151079137e-05,
      "loss": 1.0106,
      "step": 132
    },
    {
      "epoch": 0.028744326777609682,
      "grad_norm": 0.9263657331466675,
      "learning_rate": 3.827338129496403e-05,
      "loss": 0.7735,
      "step": 133
    },
    {
      "epoch": 0.02896044953533607,
      "grad_norm": 0.8990103602409363,
      "learning_rate": 3.8561151079136694e-05,
      "loss": 0.9535,
      "step": 134
    },
    {
      "epoch": 0.02917657229306246,
      "grad_norm": 0.9810959696769714,
      "learning_rate": 3.884892086330935e-05,
      "loss": 1.0279,
      "step": 135
    },
    {
      "epoch": 0.02939269505078885,
      "grad_norm": 1.0959899425506592,
      "learning_rate": 3.913669064748202e-05,
      "loss": 0.8303,
      "step": 136
    },
    {
      "epoch": 0.029608817808515236,
      "grad_norm": 0.9222844839096069,
      "learning_rate": 3.942446043165468e-05,
      "loss": 1.05,
      "step": 137
    },
    {
      "epoch": 0.029824940566241624,
      "grad_norm": 0.9906018972396851,
      "learning_rate": 3.971223021582734e-05,
      "loss": 1.043,
      "step": 138
    },
    {
      "epoch": 0.030041063323968015,
      "grad_norm": 1.038453459739685,
      "learning_rate": 4e-05,
      "loss": 1.0572,
      "step": 139
    },
    {
      "epoch": 0.030257186081694403,
      "grad_norm": 0.9111128449440002,
      "learning_rate": 3.9999995100023115e-05,
      "loss": 0.987,
      "step": 140
    },
    {
      "epoch": 0.03047330883942079,
      "grad_norm": 1.0118787288665771,
      "learning_rate": 3.9999980400094876e-05,
      "loss": 1.2349,
      "step": 141
    },
    {
      "epoch": 0.03068943159714718,
      "grad_norm": 0.8447703719139099,
      "learning_rate": 3.9999955900222456e-05,
      "loss": 0.8415,
      "step": 142
    },
    {
      "epoch": 0.03090555435487357,
      "grad_norm": 1.0229984521865845,
      "learning_rate": 3.999992160041789e-05,
      "loss": 0.8446,
      "step": 143
    },
    {
      "epoch": 0.031121677112599958,
      "grad_norm": 1.0735254287719727,
      "learning_rate": 3.999987750069797e-05,
      "loss": 1.3394,
      "step": 144
    },
    {
      "epoch": 0.031337799870326345,
      "grad_norm": 1.1151766777038574,
      "learning_rate": 3.9999823601084306e-05,
      "loss": 1.1003,
      "step": 145
    },
    {
      "epoch": 0.03155392262805273,
      "grad_norm": 0.959318995475769,
      "learning_rate": 3.999975990160331e-05,
      "loss": 1.1259,
      "step": 146
    },
    {
      "epoch": 0.03177004538577912,
      "grad_norm": 0.9324452877044678,
      "learning_rate": 3.99996864022862e-05,
      "loss": 0.9367,
      "step": 147
    },
    {
      "epoch": 0.03198616814350551,
      "grad_norm": 1.0011591911315918,
      "learning_rate": 3.999960310316898e-05,
      "loss": 1.0493,
      "step": 148
    },
    {
      "epoch": 0.0322022909012319,
      "grad_norm": 0.979171633720398,
      "learning_rate": 3.9999510004292474e-05,
      "loss": 0.9986,
      "step": 149
    },
    {
      "epoch": 0.03241841365895829,
      "grad_norm": 1.1019887924194336,
      "learning_rate": 3.99994071057023e-05,
      "loss": 1.141,
      "step": 150
    },
    {
      "epoch": 0.03263453641668468,
      "grad_norm": 1.1715887784957886,
      "learning_rate": 3.999929440744887e-05,
      "loss": 0.9725,
      "step": 151
    },
    {
      "epoch": 0.032850659174411066,
      "grad_norm": 0.9511465430259705,
      "learning_rate": 3.999917190958742e-05,
      "loss": 1.0759,
      "step": 152
    },
    {
      "epoch": 0.033066781932137454,
      "grad_norm": 1.1758034229278564,
      "learning_rate": 3.999903961217796e-05,
      "loss": 0.9378,
      "step": 153
    },
    {
      "epoch": 0.03328290468986384,
      "grad_norm": 0.9852985739707947,
      "learning_rate": 3.9998897515285323e-05,
      "loss": 0.9593,
      "step": 154
    },
    {
      "epoch": 0.03349902744759023,
      "grad_norm": 0.929764449596405,
      "learning_rate": 3.999874561897913e-05,
      "loss": 1.0765,
      "step": 155
    },
    {
      "epoch": 0.03371515020531662,
      "grad_norm": 1.136412262916565,
      "learning_rate": 3.999858392333382e-05,
      "loss": 1.0034,
      "step": 156
    },
    {
      "epoch": 0.03393127296304301,
      "grad_norm": 0.9158518314361572,
      "learning_rate": 3.9998412428428613e-05,
      "loss": 1.1276,
      "step": 157
    },
    {
      "epoch": 0.0341473957207694,
      "grad_norm": 1.185773253440857,
      "learning_rate": 3.9998231134347554e-05,
      "loss": 1.1234,
      "step": 158
    },
    {
      "epoch": 0.03436351847849579,
      "grad_norm": 1.0396409034729004,
      "learning_rate": 3.999804004117946e-05,
      "loss": 0.9214,
      "step": 159
    },
    {
      "epoch": 0.034579641236222175,
      "grad_norm": 1.094385027885437,
      "learning_rate": 3.999783914901798e-05,
      "loss": 1.0425,
      "step": 160
    },
    {
      "epoch": 0.03479576399394856,
      "grad_norm": 1.138527512550354,
      "learning_rate": 3.999762845796154e-05,
      "loss": 1.1496,
      "step": 161
    },
    {
      "epoch": 0.03501188675167495,
      "grad_norm": 1.072544813156128,
      "learning_rate": 3.999740796811339e-05,
      "loss": 1.2448,
      "step": 162
    },
    {
      "epoch": 0.03522800950940134,
      "grad_norm": 1.015540599822998,
      "learning_rate": 3.9997177679581555e-05,
      "loss": 0.9786,
      "step": 163
    },
    {
      "epoch": 0.035444132267127726,
      "grad_norm": 0.9477831721305847,
      "learning_rate": 3.999693759247889e-05,
      "loss": 1.1633,
      "step": 164
    },
    {
      "epoch": 0.03566025502485412,
      "grad_norm": 1.0140697956085205,
      "learning_rate": 3.999668770692303e-05,
      "loss": 1.1194,
      "step": 165
    },
    {
      "epoch": 0.03587637778258051,
      "grad_norm": 1.0042285919189453,
      "learning_rate": 3.9996428023036415e-05,
      "loss": 1.0049,
      "step": 166
    },
    {
      "epoch": 0.036092500540306896,
      "grad_norm": 0.965667724609375,
      "learning_rate": 3.99961585409463e-05,
      "loss": 0.9693,
      "step": 167
    },
    {
      "epoch": 0.036308623298033284,
      "grad_norm": 1.002320647239685,
      "learning_rate": 3.999587926078472e-05,
      "loss": 0.8459,
      "step": 168
    },
    {
      "epoch": 0.03652474605575967,
      "grad_norm": 0.9055720567703247,
      "learning_rate": 3.999559018268853e-05,
      "loss": 0.9717,
      "step": 169
    },
    {
      "epoch": 0.03674086881348606,
      "grad_norm": 1.0310685634613037,
      "learning_rate": 3.9995291306799374e-05,
      "loss": 1.0326,
      "step": 170
    },
    {
      "epoch": 0.03695699157121245,
      "grad_norm": 0.7893375158309937,
      "learning_rate": 3.9994982633263695e-05,
      "loss": 0.8942,
      "step": 171
    },
    {
      "epoch": 0.037173114328938835,
      "grad_norm": 1.0069329738616943,
      "learning_rate": 3.999466416223275e-05,
      "loss": 1.1418,
      "step": 172
    },
    {
      "epoch": 0.03738923708666522,
      "grad_norm": 0.7812685966491699,
      "learning_rate": 3.999433589386259e-05,
      "loss": 0.8637,
      "step": 173
    },
    {
      "epoch": 0.03760535984439162,
      "grad_norm": 1.1129683256149292,
      "learning_rate": 3.999399782831405e-05,
      "loss": 0.9179,
      "step": 174
    },
    {
      "epoch": 0.037821482602118005,
      "grad_norm": 0.9261072874069214,
      "learning_rate": 3.9993649965752804e-05,
      "loss": 1.0623,
      "step": 175
    },
    {
      "epoch": 0.03803760535984439,
      "grad_norm": 0.9303197860717773,
      "learning_rate": 3.999329230634929e-05,
      "loss": 0.9668,
      "step": 176
    },
    {
      "epoch": 0.03825372811757078,
      "grad_norm": 1.0003294944763184,
      "learning_rate": 3.9992924850278764e-05,
      "loss": 0.9787,
      "step": 177
    },
    {
      "epoch": 0.03846985087529717,
      "grad_norm": 0.9643764495849609,
      "learning_rate": 3.9992547597721283e-05,
      "loss": 1.2231,
      "step": 178
    },
    {
      "epoch": 0.038685973633023556,
      "grad_norm": 0.8814199566841125,
      "learning_rate": 3.9992160548861694e-05,
      "loss": 0.8795,
      "step": 179
    },
    {
      "epoch": 0.03890209639074994,
      "grad_norm": 1.132537603378296,
      "learning_rate": 3.999176370388965e-05,
      "loss": 0.9443,
      "step": 180
    },
    {
      "epoch": 0.03911821914847633,
      "grad_norm": 1.0179696083068848,
      "learning_rate": 3.99913570629996e-05,
      "loss": 1.0842,
      "step": 181
    },
    {
      "epoch": 0.039334341906202726,
      "grad_norm": 0.9068275690078735,
      "learning_rate": 3.999094062639081e-05,
      "loss": 0.9677,
      "step": 182
    },
    {
      "epoch": 0.039550464663929114,
      "grad_norm": 0.9726153612136841,
      "learning_rate": 3.999051439426732e-05,
      "loss": 1.1278,
      "step": 183
    },
    {
      "epoch": 0.0397665874216555,
      "grad_norm": 0.9714052677154541,
      "learning_rate": 3.999007836683799e-05,
      "loss": 1.1799,
      "step": 184
    },
    {
      "epoch": 0.03998271017938189,
      "grad_norm": 0.8894519209861755,
      "learning_rate": 3.998963254431647e-05,
      "loss": 1.1082,
      "step": 185
    },
    {
      "epoch": 0.04019883293710828,
      "grad_norm": 0.993949830532074,
      "learning_rate": 3.998917692692121e-05,
      "loss": 1.0954,
      "step": 186
    },
    {
      "epoch": 0.040414955694834664,
      "grad_norm": 0.993137538433075,
      "learning_rate": 3.998871151487548e-05,
      "loss": 1.1753,
      "step": 187
    },
    {
      "epoch": 0.04063107845256105,
      "grad_norm": 1.0292941331863403,
      "learning_rate": 3.99882363084073e-05,
      "loss": 1.0214,
      "step": 188
    },
    {
      "epoch": 0.04084720121028744,
      "grad_norm": 0.9065425395965576,
      "learning_rate": 3.9987751307749536e-05,
      "loss": 1.127,
      "step": 189
    },
    {
      "epoch": 0.041063323968013835,
      "grad_norm": 1.0782829523086548,
      "learning_rate": 3.998725651313984e-05,
      "loss": 0.9114,
      "step": 190
    },
    {
      "epoch": 0.04127944672574022,
      "grad_norm": 0.9183439016342163,
      "learning_rate": 3.998675192482065e-05,
      "loss": 1.0319,
      "step": 191
    },
    {
      "epoch": 0.04149556948346661,
      "grad_norm": 0.8721674084663391,
      "learning_rate": 3.998623754303923e-05,
      "loss": 0.8357,
      "step": 192
    },
    {
      "epoch": 0.041711692241193,
      "grad_norm": 0.9285215735435486,
      "learning_rate": 3.998571336804761e-05,
      "loss": 1.0677,
      "step": 193
    },
    {
      "epoch": 0.041927814998919385,
      "grad_norm": 0.972098708152771,
      "learning_rate": 3.9985179400102634e-05,
      "loss": 0.9023,
      "step": 194
    },
    {
      "epoch": 0.04214393775664577,
      "grad_norm": 0.9518387913703918,
      "learning_rate": 3.998463563946596e-05,
      "loss": 1.082,
      "step": 195
    },
    {
      "epoch": 0.04236006051437216,
      "grad_norm": 0.9610128998756409,
      "learning_rate": 3.9984082086404015e-05,
      "loss": 0.8819,
      "step": 196
    },
    {
      "epoch": 0.04257618327209855,
      "grad_norm": 0.9028835892677307,
      "learning_rate": 3.9983518741188046e-05,
      "loss": 1.0048,
      "step": 197
    },
    {
      "epoch": 0.04279230602982494,
      "grad_norm": 1.0402621030807495,
      "learning_rate": 3.998294560409409e-05,
      "loss": 1.1093,
      "step": 198
    },
    {
      "epoch": 0.04300842878755133,
      "grad_norm": 0.8937285542488098,
      "learning_rate": 3.998236267540298e-05,
      "loss": 0.8782,
      "step": 199
    },
    {
      "epoch": 0.04322455154527772,
      "grad_norm": 0.9310992360115051,
      "learning_rate": 3.998176995540035e-05,
      "loss": 0.8388,
      "step": 200
    },
    {
      "epoch": 0.043440674303004106,
      "grad_norm": 1.0354390144348145,
      "learning_rate": 3.998116744437664e-05,
      "loss": 1.0339,
      "step": 201
    },
    {
      "epoch": 0.043656797060730494,
      "grad_norm": 0.9959929585456848,
      "learning_rate": 3.9980555142627065e-05,
      "loss": 1.0143,
      "step": 202
    },
    {
      "epoch": 0.04387291981845688,
      "grad_norm": 1.0468930006027222,
      "learning_rate": 3.9979933050451664e-05,
      "loss": 1.1142,
      "step": 203
    },
    {
      "epoch": 0.04408904257618327,
      "grad_norm": 1.0852928161621094,
      "learning_rate": 3.997930116815525e-05,
      "loss": 1.0066,
      "step": 204
    },
    {
      "epoch": 0.04430516533390966,
      "grad_norm": 0.9408144354820251,
      "learning_rate": 3.9978659496047456e-05,
      "loss": 1.152,
      "step": 205
    },
    {
      "epoch": 0.04452128809163605,
      "grad_norm": 0.8921926617622375,
      "learning_rate": 3.997800803444269e-05,
      "loss": 1.005,
      "step": 206
    },
    {
      "epoch": 0.04473741084936244,
      "grad_norm": 0.9611052870750427,
      "learning_rate": 3.9977346783660165e-05,
      "loss": 1.1262,
      "step": 207
    },
    {
      "epoch": 0.04495353360708883,
      "grad_norm": 0.8504266142845154,
      "learning_rate": 3.99766757440239e-05,
      "loss": 1.05,
      "step": 208
    },
    {
      "epoch": 0.045169656364815215,
      "grad_norm": 0.8924556374549866,
      "learning_rate": 3.99759949158627e-05,
      "loss": 1.1095,
      "step": 209
    },
    {
      "epoch": 0.0453857791225416,
      "grad_norm": 1.0046991109848022,
      "learning_rate": 3.997530429951017e-05,
      "loss": 0.9707,
      "step": 210
    },
    {
      "epoch": 0.04560190188026799,
      "grad_norm": 0.9472872614860535,
      "learning_rate": 3.997460389530471e-05,
      "loss": 1.1529,
      "step": 211
    },
    {
      "epoch": 0.04581802463799438,
      "grad_norm": 0.958730161190033,
      "learning_rate": 3.997389370358951e-05,
      "loss": 1.0757,
      "step": 212
    },
    {
      "epoch": 0.046034147395720766,
      "grad_norm": 0.8812927007675171,
      "learning_rate": 3.997317372471257e-05,
      "loss": 1.0235,
      "step": 213
    },
    {
      "epoch": 0.04625027015344716,
      "grad_norm": 1.042219877243042,
      "learning_rate": 3.997244395902668e-05,
      "loss": 1.1085,
      "step": 214
    },
    {
      "epoch": 0.04646639291117355,
      "grad_norm": 0.850281834602356,
      "learning_rate": 3.997170440688942e-05,
      "loss": 0.8835,
      "step": 215
    },
    {
      "epoch": 0.046682515668899936,
      "grad_norm": 0.9123249053955078,
      "learning_rate": 3.9970955068663165e-05,
      "loss": 0.8998,
      "step": 216
    },
    {
      "epoch": 0.046898638426626324,
      "grad_norm": 1.029981017112732,
      "learning_rate": 3.9970195944715096e-05,
      "loss": 1.2,
      "step": 217
    },
    {
      "epoch": 0.04711476118435271,
      "grad_norm": 0.9120750427246094,
      "learning_rate": 3.9969427035417176e-05,
      "loss": 0.8322,
      "step": 218
    },
    {
      "epoch": 0.0473308839420791,
      "grad_norm": 0.9278477430343628,
      "learning_rate": 3.996864834114617e-05,
      "loss": 0.9291,
      "step": 219
    },
    {
      "epoch": 0.04754700669980549,
      "grad_norm": 0.9059831500053406,
      "learning_rate": 3.996785986228364e-05,
      "loss": 1.0506,
      "step": 220
    },
    {
      "epoch": 0.047763129457531875,
      "grad_norm": 0.9443290829658508,
      "learning_rate": 3.996706159921594e-05,
      "loss": 0.9087,
      "step": 221
    },
    {
      "epoch": 0.04797925221525827,
      "grad_norm": 0.9705933332443237,
      "learning_rate": 3.996625355233421e-05,
      "loss": 1.074,
      "step": 222
    },
    {
      "epoch": 0.04819537497298466,
      "grad_norm": 1.1599591970443726,
      "learning_rate": 3.9965435722034395e-05,
      "loss": 0.7866,
      "step": 223
    },
    {
      "epoch": 0.048411497730711045,
      "grad_norm": 1.036673903465271,
      "learning_rate": 3.996460810871723e-05,
      "loss": 1.0867,
      "step": 224
    },
    {
      "epoch": 0.04862762048843743,
      "grad_norm": 1.0065069198608398,
      "learning_rate": 3.9963770712788244e-05,
      "loss": 0.8558,
      "step": 225
    },
    {
      "epoch": 0.04884374324616382,
      "grad_norm": 0.9539085626602173,
      "learning_rate": 3.996292353465775e-05,
      "loss": 0.9596,
      "step": 226
    },
    {
      "epoch": 0.04905986600389021,
      "grad_norm": 1.0341073274612427,
      "learning_rate": 3.9962066574740886e-05,
      "loss": 0.9632,
      "step": 227
    },
    {
      "epoch": 0.049275988761616596,
      "grad_norm": 1.0347282886505127,
      "learning_rate": 3.996119983345754e-05,
      "loss": 0.9397,
      "step": 228
    },
    {
      "epoch": 0.049492111519342984,
      "grad_norm": 1.1618353128433228,
      "learning_rate": 3.996032331123242e-05,
      "loss": 0.967,
      "step": 229
    },
    {
      "epoch": 0.04970823427706938,
      "grad_norm": 1.0670056343078613,
      "learning_rate": 3.995943700849501e-05,
      "loss": 1.0497,
      "step": 230
    },
    {
      "epoch": 0.049924357034795766,
      "grad_norm": 0.999468207359314,
      "learning_rate": 3.9958540925679614e-05,
      "loss": 0.9668,
      "step": 231
    },
    {
      "epoch": 0.050140479792522154,
      "grad_norm": 0.9835057258605957,
      "learning_rate": 3.9957635063225305e-05,
      "loss": 0.9974,
      "step": 232
    },
    {
      "epoch": 0.05035660255024854,
      "grad_norm": 0.9463314414024353,
      "learning_rate": 3.995671942157594e-05,
      "loss": 1.0148,
      "step": 233
    },
    {
      "epoch": 0.05057272530797493,
      "grad_norm": 1.0669755935668945,
      "learning_rate": 3.99557940011802e-05,
      "loss": 1.0068,
      "step": 234
    },
    {
      "epoch": 0.05078884806570132,
      "grad_norm": 0.9666453003883362,
      "learning_rate": 3.995485880249153e-05,
      "loss": 0.9449,
      "step": 235
    },
    {
      "epoch": 0.051004970823427705,
      "grad_norm": 0.899185299873352,
      "learning_rate": 3.995391382596817e-05,
      "loss": 1.007,
      "step": 236
    },
    {
      "epoch": 0.05122109358115409,
      "grad_norm": 0.8701006770133972,
      "learning_rate": 3.995295907207317e-05,
      "loss": 0.9585,
      "step": 237
    },
    {
      "epoch": 0.05143721633888049,
      "grad_norm": 0.9041889309883118,
      "learning_rate": 3.9951994541274345e-05,
      "loss": 0.8978,
      "step": 238
    },
    {
      "epoch": 0.051653339096606875,
      "grad_norm": 1.0119632482528687,
      "learning_rate": 3.9951020234044316e-05,
      "loss": 1.0081,
      "step": 239
    },
    {
      "epoch": 0.05186946185433326,
      "grad_norm": 1.0799907445907593,
      "learning_rate": 3.995003615086049e-05,
      "loss": 0.9312,
      "step": 240
    },
    {
      "epoch": 0.05208558461205965,
      "grad_norm": 1.000360131263733,
      "learning_rate": 3.994904229220507e-05,
      "loss": 1.078,
      "step": 241
    },
    {
      "epoch": 0.05230170736978604,
      "grad_norm": 0.8915219902992249,
      "learning_rate": 3.994803865856505e-05,
      "loss": 1.031,
      "step": 242
    },
    {
      "epoch": 0.052517830127512426,
      "grad_norm": 0.9879536628723145,
      "learning_rate": 3.994702525043219e-05,
      "loss": 1.1415,
      "step": 243
    },
    {
      "epoch": 0.05273395288523881,
      "grad_norm": 0.8250242471694946,
      "learning_rate": 3.9946002068303076e-05,
      "loss": 0.983,
      "step": 244
    },
    {
      "epoch": 0.0529500756429652,
      "grad_norm": 0.9632031917572021,
      "learning_rate": 3.994496911267905e-05,
      "loss": 1.0133,
      "step": 245
    },
    {
      "epoch": 0.053166198400691596,
      "grad_norm": 0.9913688898086548,
      "learning_rate": 3.9943926384066266e-05,
      "loss": 0.8823,
      "step": 246
    },
    {
      "epoch": 0.053382321158417984,
      "grad_norm": 1.0919219255447388,
      "learning_rate": 3.9942873882975665e-05,
      "loss": 1.1,
      "step": 247
    },
    {
      "epoch": 0.05359844391614437,
      "grad_norm": 0.9994152784347534,
      "learning_rate": 3.9941811609922954e-05,
      "loss": 1.1517,
      "step": 248
    },
    {
      "epoch": 0.05381456667387076,
      "grad_norm": 1.039565086364746,
      "learning_rate": 3.994073956542866e-05,
      "loss": 1.0937,
      "step": 249
    },
    {
      "epoch": 0.05403068943159715,
      "grad_norm": 0.9709450006484985,
      "learning_rate": 3.993965775001807e-05,
      "loss": 1.016,
      "step": 250
    },
    {
      "epoch": 0.054246812189323534,
      "grad_norm": 1.1056290864944458,
      "learning_rate": 3.993856616422128e-05,
      "loss": 1.0423,
      "step": 251
    },
    {
      "epoch": 0.05446293494704992,
      "grad_norm": 0.8803907036781311,
      "learning_rate": 3.9937464808573155e-05,
      "loss": 1.0442,
      "step": 252
    },
    {
      "epoch": 0.05467905770477631,
      "grad_norm": 0.8767016530036926,
      "learning_rate": 3.9936353683613374e-05,
      "loss": 0.9632,
      "step": 253
    },
    {
      "epoch": 0.054895180462502705,
      "grad_norm": 0.771586537361145,
      "learning_rate": 3.993523278988637e-05,
      "loss": 0.868,
      "step": 254
    },
    {
      "epoch": 0.05511130322022909,
      "grad_norm": 0.9147107005119324,
      "learning_rate": 3.993410212794137e-05,
      "loss": 0.9849,
      "step": 255
    },
    {
      "epoch": 0.05532742597795548,
      "grad_norm": 0.8793245553970337,
      "learning_rate": 3.993296169833242e-05,
      "loss": 1.1097,
      "step": 256
    },
    {
      "epoch": 0.05554354873568187,
      "grad_norm": 0.9457629323005676,
      "learning_rate": 3.993181150161832e-05,
      "loss": 0.9583,
      "step": 257
    },
    {
      "epoch": 0.055759671493408255,
      "grad_norm": 0.946765661239624,
      "learning_rate": 3.993065153836265e-05,
      "loss": 1.0441,
      "step": 258
    },
    {
      "epoch": 0.05597579425113464,
      "grad_norm": 1.1300455331802368,
      "learning_rate": 3.9929481809133806e-05,
      "loss": 1.2199,
      "step": 259
    },
    {
      "epoch": 0.05619191700886103,
      "grad_norm": 0.9175975322723389,
      "learning_rate": 3.992830231450494e-05,
      "loss": 0.9304,
      "step": 260
    },
    {
      "epoch": 0.05640803976658742,
      "grad_norm": 1.046789526939392,
      "learning_rate": 3.9927113055054006e-05,
      "loss": 0.9935,
      "step": 261
    },
    {
      "epoch": 0.05662416252431381,
      "grad_norm": 0.8311507105827332,
      "learning_rate": 3.992591403136375e-05,
      "loss": 1.0011,
      "step": 262
    },
    {
      "epoch": 0.0568402852820402,
      "grad_norm": 0.9540581703186035,
      "learning_rate": 3.9924705244021675e-05,
      "loss": 1.0372,
      "step": 263
    },
    {
      "epoch": 0.05705640803976659,
      "grad_norm": 0.8871579766273499,
      "learning_rate": 3.992348669362009e-05,
      "loss": 0.8266,
      "step": 264
    },
    {
      "epoch": 0.057272530797492976,
      "grad_norm": 0.9213599562644958,
      "learning_rate": 3.9922258380756076e-05,
      "loss": 0.9786,
      "step": 265
    },
    {
      "epoch": 0.057488653555219364,
      "grad_norm": 0.8515825271606445,
      "learning_rate": 3.9921020306031514e-05,
      "loss": 0.9597,
      "step": 266
    },
    {
      "epoch": 0.05770477631294575,
      "grad_norm": 1.0192275047302246,
      "learning_rate": 3.991977247005306e-05,
      "loss": 1.0194,
      "step": 267
    },
    {
      "epoch": 0.05792089907067214,
      "grad_norm": 0.8933265805244446,
      "learning_rate": 3.991851487343213e-05,
      "loss": 0.9933,
      "step": 268
    },
    {
      "epoch": 0.05813702182839853,
      "grad_norm": 0.8800820112228394,
      "learning_rate": 3.9917247516784965e-05,
      "loss": 1.0745,
      "step": 269
    },
    {
      "epoch": 0.05835314458612492,
      "grad_norm": 1.1516456604003906,
      "learning_rate": 3.9915970400732554e-05,
      "loss": 1.2227,
      "step": 270
    },
    {
      "epoch": 0.05856926734385131,
      "grad_norm": 0.8964235782623291,
      "learning_rate": 3.991468352590069e-05,
      "loss": 1.1094,
      "step": 271
    },
    {
      "epoch": 0.0587853901015777,
      "grad_norm": 0.9254226684570312,
      "learning_rate": 3.991338689291993e-05,
      "loss": 1.0838,
      "step": 272
    },
    {
      "epoch": 0.059001512859304085,
      "grad_norm": 1.0113575458526611,
      "learning_rate": 3.9912080502425635e-05,
      "loss": 0.9458,
      "step": 273
    },
    {
      "epoch": 0.05921763561703047,
      "grad_norm": 0.8866898417472839,
      "learning_rate": 3.991076435505792e-05,
      "loss": 1.0424,
      "step": 274
    },
    {
      "epoch": 0.05943375837475686,
      "grad_norm": 0.8620572090148926,
      "learning_rate": 3.9909438451461695e-05,
      "loss": 1.1379,
      "step": 275
    },
    {
      "epoch": 0.05964988113248325,
      "grad_norm": 0.9906118512153625,
      "learning_rate": 3.990810279228665e-05,
      "loss": 1.0947,
      "step": 276
    },
    {
      "epoch": 0.059866003890209636,
      "grad_norm": 0.8600062727928162,
      "learning_rate": 3.9906757378187266e-05,
      "loss": 1.0202,
      "step": 277
    },
    {
      "epoch": 0.06008212664793603,
      "grad_norm": 0.977017343044281,
      "learning_rate": 3.990540220982278e-05,
      "loss": 0.9175,
      "step": 278
    },
    {
      "epoch": 0.06029824940566242,
      "grad_norm": 0.8460851907730103,
      "learning_rate": 3.9904037287857226e-05,
      "loss": 1.0349,
      "step": 279
    },
    {
      "epoch": 0.060514372163388806,
      "grad_norm": 1.07172429561615,
      "learning_rate": 3.9902662612959407e-05,
      "loss": 1.0305,
      "step": 280
    },
    {
      "epoch": 0.060730494921115194,
      "grad_norm": 0.9060229063034058,
      "learning_rate": 3.990127818580293e-05,
      "loss": 0.8089,
      "step": 281
    },
    {
      "epoch": 0.06094661767884158,
      "grad_norm": 1.1195108890533447,
      "learning_rate": 3.9899884007066135e-05,
      "loss": 1.1218,
      "step": 282
    },
    {
      "epoch": 0.06116274043656797,
      "grad_norm": 0.7674738764762878,
      "learning_rate": 3.9898480077432184e-05,
      "loss": 0.7514,
      "step": 283
    },
    {
      "epoch": 0.06137886319429436,
      "grad_norm": 0.8991888761520386,
      "learning_rate": 3.9897066397588986e-05,
      "loss": 0.8423,
      "step": 284
    },
    {
      "epoch": 0.061594985952020745,
      "grad_norm": 0.8716213703155518,
      "learning_rate": 3.989564296822925e-05,
      "loss": 0.7751,
      "step": 285
    },
    {
      "epoch": 0.06181110870974714,
      "grad_norm": 1.0875698328018188,
      "learning_rate": 3.9894209790050466e-05,
      "loss": 1.0288,
      "step": 286
    },
    {
      "epoch": 0.06202723146747353,
      "grad_norm": 0.9232035875320435,
      "learning_rate": 3.989276686375486e-05,
      "loss": 0.9869,
      "step": 287
    },
    {
      "epoch": 0.062243354225199915,
      "grad_norm": 0.8879022598266602,
      "learning_rate": 3.989131419004948e-05,
      "loss": 1.081,
      "step": 288
    },
    {
      "epoch": 0.0624594769829263,
      "grad_norm": 0.97151118516922,
      "learning_rate": 3.9889851769646125e-05,
      "loss": 0.9847,
      "step": 289
    },
    {
      "epoch": 0.06267559974065269,
      "grad_norm": 1.0698877573013306,
      "learning_rate": 3.988837960326139e-05,
      "loss": 1.2144,
      "step": 290
    },
    {
      "epoch": 0.06289172249837909,
      "grad_norm": 0.9475870132446289,
      "learning_rate": 3.9886897691616616e-05,
      "loss": 1.0191,
      "step": 291
    },
    {
      "epoch": 0.06310784525610547,
      "grad_norm": 0.9550348520278931,
      "learning_rate": 3.9885406035437953e-05,
      "loss": 0.9717,
      "step": 292
    },
    {
      "epoch": 0.06332396801383186,
      "grad_norm": 0.9410080313682556,
      "learning_rate": 3.98839046354563e-05,
      "loss": 1.0433,
      "step": 293
    },
    {
      "epoch": 0.06354009077155824,
      "grad_norm": 0.9392552375793457,
      "learning_rate": 3.988239349240733e-05,
      "loss": 1.0808,
      "step": 294
    },
    {
      "epoch": 0.06375621352928464,
      "grad_norm": 1.015593409538269,
      "learning_rate": 3.988087260703152e-05,
      "loss": 1.0099,
      "step": 295
    },
    {
      "epoch": 0.06397233628701102,
      "grad_norm": 1.0544767379760742,
      "learning_rate": 3.987934198007409e-05,
      "loss": 1.0538,
      "step": 296
    },
    {
      "epoch": 0.06418845904473741,
      "grad_norm": 1.0634684562683105,
      "learning_rate": 3.987780161228504e-05,
      "loss": 0.9461,
      "step": 297
    },
    {
      "epoch": 0.0644045818024638,
      "grad_norm": 0.8911001682281494,
      "learning_rate": 3.9876251504419154e-05,
      "loss": 1.0814,
      "step": 298
    },
    {
      "epoch": 0.06462070456019019,
      "grad_norm": 0.9938652515411377,
      "learning_rate": 3.987469165723598e-05,
      "loss": 1.0856,
      "step": 299
    },
    {
      "epoch": 0.06483682731791658,
      "grad_norm": 1.0032827854156494,
      "learning_rate": 3.987312207149983e-05,
      "loss": 1.1615,
      "step": 300
    },
    {
      "epoch": 0.06505295007564296,
      "grad_norm": 0.9022207856178284,
      "learning_rate": 3.987154274797981e-05,
      "loss": 0.9091,
      "step": 301
    },
    {
      "epoch": 0.06526907283336936,
      "grad_norm": 1.1873313188552856,
      "learning_rate": 3.986995368744978e-05,
      "loss": 1.0765,
      "step": 302
    },
    {
      "epoch": 0.06548519559109574,
      "grad_norm": 0.9208522439002991,
      "learning_rate": 3.9868354890688375e-05,
      "loss": 1.1512,
      "step": 303
    },
    {
      "epoch": 0.06570131834882213,
      "grad_norm": 1.0207360982894897,
      "learning_rate": 3.9866746358479e-05,
      "loss": 0.8993,
      "step": 304
    },
    {
      "epoch": 0.06591744110654851,
      "grad_norm": 0.9842541217803955,
      "learning_rate": 3.986512809160984e-05,
      "loss": 1.0306,
      "step": 305
    },
    {
      "epoch": 0.06613356386427491,
      "grad_norm": 1.0802537202835083,
      "learning_rate": 3.986350009087384e-05,
      "loss": 1.1707,
      "step": 306
    },
    {
      "epoch": 0.0663496866220013,
      "grad_norm": 0.945550799369812,
      "learning_rate": 3.9861862357068705e-05,
      "loss": 0.9111,
      "step": 307
    },
    {
      "epoch": 0.06656580937972768,
      "grad_norm": 0.9740046858787537,
      "learning_rate": 3.9860214890996925e-05,
      "loss": 0.9361,
      "step": 308
    },
    {
      "epoch": 0.06678193213745408,
      "grad_norm": 1.0904077291488647,
      "learning_rate": 3.9858557693465766e-05,
      "loss": 0.9004,
      "step": 309
    },
    {
      "epoch": 0.06699805489518046,
      "grad_norm": 0.935242772102356,
      "learning_rate": 3.985689076528725e-05,
      "loss": 1.1445,
      "step": 310
    },
    {
      "epoch": 0.06721417765290685,
      "grad_norm": 0.8713205456733704,
      "learning_rate": 3.985521410727815e-05,
      "loss": 0.794,
      "step": 311
    },
    {
      "epoch": 0.06743030041063323,
      "grad_norm": 0.9313145279884338,
      "learning_rate": 3.985352772026004e-05,
      "loss": 0.9409,
      "step": 312
    },
    {
      "epoch": 0.06764642316835963,
      "grad_norm": 0.9327392578125,
      "learning_rate": 3.9851831605059244e-05,
      "loss": 0.8954,
      "step": 313
    },
    {
      "epoch": 0.06786254592608602,
      "grad_norm": 1.0644276142120361,
      "learning_rate": 3.9850125762506853e-05,
      "loss": 0.9337,
      "step": 314
    },
    {
      "epoch": 0.0680786686838124,
      "grad_norm": 0.9526025652885437,
      "learning_rate": 3.984841019343872e-05,
      "loss": 0.9243,
      "step": 315
    },
    {
      "epoch": 0.0682947914415388,
      "grad_norm": 1.130817174911499,
      "learning_rate": 3.9846684898695474e-05,
      "loss": 0.9321,
      "step": 316
    },
    {
      "epoch": 0.06851091419926518,
      "grad_norm": 0.9395380616188049,
      "learning_rate": 3.9844949879122515e-05,
      "loss": 0.7396,
      "step": 317
    },
    {
      "epoch": 0.06872703695699157,
      "grad_norm": 0.9533188343048096,
      "learning_rate": 3.984320513556999e-05,
      "loss": 1.0038,
      "step": 318
    },
    {
      "epoch": 0.06894315971471796,
      "grad_norm": 0.9289395809173584,
      "learning_rate": 3.984145066889281e-05,
      "loss": 0.9629,
      "step": 319
    },
    {
      "epoch": 0.06915928247244435,
      "grad_norm": 1.1060194969177246,
      "learning_rate": 3.983968647995067e-05,
      "loss": 1.0178,
      "step": 320
    },
    {
      "epoch": 0.06937540523017073,
      "grad_norm": 0.9341673851013184,
      "learning_rate": 3.983791256960803e-05,
      "loss": 0.9548,
      "step": 321
    },
    {
      "epoch": 0.06959152798789713,
      "grad_norm": 0.9757445454597473,
      "learning_rate": 3.9836128938734075e-05,
      "loss": 0.9576,
      "step": 322
    },
    {
      "epoch": 0.06980765074562352,
      "grad_norm": 0.9519818425178528,
      "learning_rate": 3.98343355882028e-05,
      "loss": 1.0839,
      "step": 323
    },
    {
      "epoch": 0.0700237735033499,
      "grad_norm": 0.8457005620002747,
      "learning_rate": 3.983253251889294e-05,
      "loss": 0.9882,
      "step": 324
    },
    {
      "epoch": 0.0702398962610763,
      "grad_norm": 0.9045811891555786,
      "learning_rate": 3.983071973168799e-05,
      "loss": 1.1539,
      "step": 325
    },
    {
      "epoch": 0.07045601901880268,
      "grad_norm": 0.8618796467781067,
      "learning_rate": 3.982889722747621e-05,
      "loss": 0.8228,
      "step": 326
    },
    {
      "epoch": 0.07067214177652907,
      "grad_norm": 0.912308394908905,
      "learning_rate": 3.9827065007150626e-05,
      "loss": 0.9616,
      "step": 327
    },
    {
      "epoch": 0.07088826453425545,
      "grad_norm": 0.8848555684089661,
      "learning_rate": 3.982522307160903e-05,
      "loss": 0.948,
      "step": 328
    },
    {
      "epoch": 0.07110438729198185,
      "grad_norm": 0.839093029499054,
      "learning_rate": 3.982337142175396e-05,
      "loss": 0.9289,
      "step": 329
    },
    {
      "epoch": 0.07132051004970824,
      "grad_norm": 0.908827543258667,
      "learning_rate": 3.9821510058492706e-05,
      "loss": 1.0501,
      "step": 330
    },
    {
      "epoch": 0.07153663280743462,
      "grad_norm": 0.8893517255783081,
      "learning_rate": 3.9819638982737354e-05,
      "loss": 1.0266,
      "step": 331
    },
    {
      "epoch": 0.07175275556516102,
      "grad_norm": 0.8727594614028931,
      "learning_rate": 3.981775819540471e-05,
      "loss": 0.812,
      "step": 332
    },
    {
      "epoch": 0.0719688783228874,
      "grad_norm": 0.833471417427063,
      "learning_rate": 3.9815867697416364e-05,
      "loss": 1.0062,
      "step": 333
    },
    {
      "epoch": 0.07218500108061379,
      "grad_norm": 0.903219997882843,
      "learning_rate": 3.981396748969866e-05,
      "loss": 1.1231,
      "step": 334
    },
    {
      "epoch": 0.07240112383834017,
      "grad_norm": 0.9188300967216492,
      "learning_rate": 3.981205757318268e-05,
      "loss": 0.9167,
      "step": 335
    },
    {
      "epoch": 0.07261724659606657,
      "grad_norm": 0.9991043210029602,
      "learning_rate": 3.981013794880429e-05,
      "loss": 0.8541,
      "step": 336
    },
    {
      "epoch": 0.07283336935379295,
      "grad_norm": 0.8712428212165833,
      "learning_rate": 3.9808208617504106e-05,
      "loss": 0.8442,
      "step": 337
    },
    {
      "epoch": 0.07304949211151934,
      "grad_norm": 1.0942577123641968,
      "learning_rate": 3.980626958022748e-05,
      "loss": 1.0906,
      "step": 338
    },
    {
      "epoch": 0.07326561486924574,
      "grad_norm": 0.9288346171379089,
      "learning_rate": 3.9804320837924554e-05,
      "loss": 1.0523,
      "step": 339
    },
    {
      "epoch": 0.07348173762697212,
      "grad_norm": 1.0512988567352295,
      "learning_rate": 3.9802362391550195e-05,
      "loss": 1.0331,
      "step": 340
    },
    {
      "epoch": 0.07369786038469851,
      "grad_norm": 0.9598914384841919,
      "learning_rate": 3.980039424206404e-05,
      "loss": 1.0145,
      "step": 341
    },
    {
      "epoch": 0.0739139831424249,
      "grad_norm": 0.929222822189331,
      "learning_rate": 3.9798416390430485e-05,
      "loss": 0.9458,
      "step": 342
    },
    {
      "epoch": 0.07413010590015129,
      "grad_norm": 0.9733883738517761,
      "learning_rate": 3.979642883761866e-05,
      "loss": 1.0217,
      "step": 343
    },
    {
      "epoch": 0.07434622865787767,
      "grad_norm": 0.9186878204345703,
      "learning_rate": 3.9794431584602466e-05,
      "loss": 1.1528,
      "step": 344
    },
    {
      "epoch": 0.07456235141560406,
      "grad_norm": 0.8842979073524475,
      "learning_rate": 3.979242463236055e-05,
      "loss": 0.9351,
      "step": 345
    },
    {
      "epoch": 0.07477847417333044,
      "grad_norm": 0.9121730327606201,
      "learning_rate": 3.979040798187633e-05,
      "loss": 1.0886,
      "step": 346
    },
    {
      "epoch": 0.07499459693105684,
      "grad_norm": 0.8223870992660522,
      "learning_rate": 3.978838163413793e-05,
      "loss": 0.8619,
      "step": 347
    },
    {
      "epoch": 0.07521071968878323,
      "grad_norm": 1.1758759021759033,
      "learning_rate": 3.978634559013829e-05,
      "loss": 0.8168,
      "step": 348
    },
    {
      "epoch": 0.07542684244650961,
      "grad_norm": 0.948322057723999,
      "learning_rate": 3.978429985087504e-05,
      "loss": 0.8581,
      "step": 349
    },
    {
      "epoch": 0.07564296520423601,
      "grad_norm": 0.8527007102966309,
      "learning_rate": 3.97822444173506e-05,
      "loss": 1.0509,
      "step": 350
    },
    {
      "epoch": 0.07585908796196239,
      "grad_norm": 0.8772498965263367,
      "learning_rate": 3.978017929057213e-05,
      "loss": 0.8964,
      "step": 351
    },
    {
      "epoch": 0.07607521071968879,
      "grad_norm": 1.080377459526062,
      "learning_rate": 3.977810447155154e-05,
      "loss": 1.118,
      "step": 352
    },
    {
      "epoch": 0.07629133347741517,
      "grad_norm": 0.9243336319923401,
      "learning_rate": 3.977601996130546e-05,
      "loss": 1.0803,
      "step": 353
    },
    {
      "epoch": 0.07650745623514156,
      "grad_norm": 0.8424449563026428,
      "learning_rate": 3.9773925760855324e-05,
      "loss": 1.0326,
      "step": 354
    },
    {
      "epoch": 0.07672357899286796,
      "grad_norm": 0.9570095539093018,
      "learning_rate": 3.977182187122728e-05,
      "loss": 0.8873,
      "step": 355
    },
    {
      "epoch": 0.07693970175059434,
      "grad_norm": 0.7843940854072571,
      "learning_rate": 3.976970829345222e-05,
      "loss": 0.9296,
      "step": 356
    },
    {
      "epoch": 0.07715582450832073,
      "grad_norm": 0.8542441725730896,
      "learning_rate": 3.97675850285658e-05,
      "loss": 0.861,
      "step": 357
    },
    {
      "epoch": 0.07737194726604711,
      "grad_norm": 1.0213744640350342,
      "learning_rate": 3.9765452077608404e-05,
      "loss": 0.9373,
      "step": 358
    },
    {
      "epoch": 0.0775880700237735,
      "grad_norm": 0.9188033938407898,
      "learning_rate": 3.976330944162519e-05,
      "loss": 1.0499,
      "step": 359
    },
    {
      "epoch": 0.07780419278149989,
      "grad_norm": 0.911727249622345,
      "learning_rate": 3.9761157121666034e-05,
      "loss": 0.995,
      "step": 360
    },
    {
      "epoch": 0.07802031553922628,
      "grad_norm": 0.8088400363922119,
      "learning_rate": 3.9758995118785566e-05,
      "loss": 1.1295,
      "step": 361
    },
    {
      "epoch": 0.07823643829695266,
      "grad_norm": 0.8860215544700623,
      "learning_rate": 3.975682343404317e-05,
      "loss": 1.0503,
      "step": 362
    },
    {
      "epoch": 0.07845256105467906,
      "grad_norm": 0.8325342535972595,
      "learning_rate": 3.975464206850296e-05,
      "loss": 0.8244,
      "step": 363
    },
    {
      "epoch": 0.07866868381240545,
      "grad_norm": 0.9521036744117737,
      "learning_rate": 3.9752451023233804e-05,
      "loss": 1.0406,
      "step": 364
    },
    {
      "epoch": 0.07888480657013183,
      "grad_norm": 0.9645699858665466,
      "learning_rate": 3.975025029930931e-05,
      "loss": 0.9931,
      "step": 365
    },
    {
      "epoch": 0.07910092932785823,
      "grad_norm": 1.0489739179611206,
      "learning_rate": 3.974803989780782e-05,
      "loss": 0.8751,
      "step": 366
    },
    {
      "epoch": 0.07931705208558461,
      "grad_norm": 1.0184046030044556,
      "learning_rate": 3.974581981981243e-05,
      "loss": 1.1351,
      "step": 367
    },
    {
      "epoch": 0.079533174843311,
      "grad_norm": 0.84317547082901,
      "learning_rate": 3.9743590066410984e-05,
      "loss": 0.8186,
      "step": 368
    },
    {
      "epoch": 0.07974929760103738,
      "grad_norm": 0.9043307304382324,
      "learning_rate": 3.9741350638696034e-05,
      "loss": 0.903,
      "step": 369
    },
    {
      "epoch": 0.07996542035876378,
      "grad_norm": 0.843694806098938,
      "learning_rate": 3.973910153776492e-05,
      "loss": 1.0774,
      "step": 370
    },
    {
      "epoch": 0.08018154311649017,
      "grad_norm": 0.8882518410682678,
      "learning_rate": 3.973684276471967e-05,
      "loss": 0.9999,
      "step": 371
    },
    {
      "epoch": 0.08039766587421655,
      "grad_norm": 0.997040331363678,
      "learning_rate": 3.97345743206671e-05,
      "loss": 1.201,
      "step": 372
    },
    {
      "epoch": 0.08061378863194295,
      "grad_norm": 0.7990419268608093,
      "learning_rate": 3.9732296206718724e-05,
      "loss": 0.9738,
      "step": 373
    },
    {
      "epoch": 0.08082991138966933,
      "grad_norm": 0.9458608627319336,
      "learning_rate": 3.973000842399082e-05,
      "loss": 0.9205,
      "step": 374
    },
    {
      "epoch": 0.08104603414739572,
      "grad_norm": 0.9523917436599731,
      "learning_rate": 3.9727710973604406e-05,
      "loss": 0.9653,
      "step": 375
    },
    {
      "epoch": 0.0812621569051221,
      "grad_norm": 1.1069090366363525,
      "learning_rate": 3.972540385668522e-05,
      "loss": 0.907,
      "step": 376
    },
    {
      "epoch": 0.0814782796628485,
      "grad_norm": 1.0182411670684814,
      "learning_rate": 3.972308707436374e-05,
      "loss": 1.0174,
      "step": 377
    },
    {
      "epoch": 0.08169440242057488,
      "grad_norm": 0.9523218870162964,
      "learning_rate": 3.972076062777518e-05,
      "loss": 0.9735,
      "step": 378
    },
    {
      "epoch": 0.08191052517830127,
      "grad_norm": 0.9677371382713318,
      "learning_rate": 3.97184245180595e-05,
      "loss": 0.9037,
      "step": 379
    },
    {
      "epoch": 0.08212664793602767,
      "grad_norm": 0.9076384902000427,
      "learning_rate": 3.97160787463614e-05,
      "loss": 1.0213,
      "step": 380
    },
    {
      "epoch": 0.08234277069375405,
      "grad_norm": 0.8701322674751282,
      "learning_rate": 3.971372331383028e-05,
      "loss": 0.9872,
      "step": 381
    },
    {
      "epoch": 0.08255889345148044,
      "grad_norm": 0.841193437576294,
      "learning_rate": 3.9711358221620315e-05,
      "loss": 0.7858,
      "step": 382
    },
    {
      "epoch": 0.08277501620920683,
      "grad_norm": 0.9040749669075012,
      "learning_rate": 3.9708983470890385e-05,
      "loss": 0.9726,
      "step": 383
    },
    {
      "epoch": 0.08299113896693322,
      "grad_norm": 0.8368477821350098,
      "learning_rate": 3.970659906280411e-05,
      "loss": 1.0165,
      "step": 384
    },
    {
      "epoch": 0.0832072617246596,
      "grad_norm": 0.9723634123802185,
      "learning_rate": 3.970420499852986e-05,
      "loss": 1.004,
      "step": 385
    },
    {
      "epoch": 0.083423384482386,
      "grad_norm": 0.8730185031890869,
      "learning_rate": 3.9701801279240704e-05,
      "loss": 0.9924,
      "step": 386
    },
    {
      "epoch": 0.08363950724011239,
      "grad_norm": 0.869093120098114,
      "learning_rate": 3.969938790611447e-05,
      "loss": 0.9814,
      "step": 387
    },
    {
      "epoch": 0.08385562999783877,
      "grad_norm": 0.9956414103507996,
      "learning_rate": 3.969696488033369e-05,
      "loss": 1.0197,
      "step": 388
    },
    {
      "epoch": 0.08407175275556517,
      "grad_norm": 1.1042028665542603,
      "learning_rate": 3.969453220308566e-05,
      "loss": 1.0194,
      "step": 389
    },
    {
      "epoch": 0.08428787551329155,
      "grad_norm": 0.9331915378570557,
      "learning_rate": 3.9692089875562374e-05,
      "loss": 0.8877,
      "step": 390
    },
    {
      "epoch": 0.08450399827101794,
      "grad_norm": 0.9793432950973511,
      "learning_rate": 3.968963789896057e-05,
      "loss": 1.0923,
      "step": 391
    },
    {
      "epoch": 0.08472012102874432,
      "grad_norm": 0.9753433465957642,
      "learning_rate": 3.968717627448172e-05,
      "loss": 1.1249,
      "step": 392
    },
    {
      "epoch": 0.08493624378647072,
      "grad_norm": 1.0509642362594604,
      "learning_rate": 3.9684705003331994e-05,
      "loss": 0.9646,
      "step": 393
    },
    {
      "epoch": 0.0851523665441971,
      "grad_norm": 0.9101366400718689,
      "learning_rate": 3.968222408672232e-05,
      "loss": 1.0473,
      "step": 394
    },
    {
      "epoch": 0.08536848930192349,
      "grad_norm": 0.9615835547447205,
      "learning_rate": 3.967973352586835e-05,
      "loss": 0.9445,
      "step": 395
    },
    {
      "epoch": 0.08558461205964989,
      "grad_norm": 0.885998010635376,
      "learning_rate": 3.967723332199045e-05,
      "loss": 0.7974,
      "step": 396
    },
    {
      "epoch": 0.08580073481737627,
      "grad_norm": 0.9641144871711731,
      "learning_rate": 3.96747234763137e-05,
      "loss": 1.0299,
      "step": 397
    },
    {
      "epoch": 0.08601685757510266,
      "grad_norm": 1.0174018144607544,
      "learning_rate": 3.967220399006793e-05,
      "loss": 0.9633,
      "step": 398
    },
    {
      "epoch": 0.08623298033282904,
      "grad_norm": 0.8235479593276978,
      "learning_rate": 3.966967486448768e-05,
      "loss": 0.8539,
      "step": 399
    },
    {
      "epoch": 0.08644910309055544,
      "grad_norm": 0.9421714544296265,
      "learning_rate": 3.966713610081222e-05,
      "loss": 1.2542,
      "step": 400
    },
    {
      "epoch": 0.08666522584828182,
      "grad_norm": 0.9316366314888,
      "learning_rate": 3.966458770028552e-05,
      "loss": 1.106,
      "step": 401
    },
    {
      "epoch": 0.08688134860600821,
      "grad_norm": 0.9297628402709961,
      "learning_rate": 3.9662029664156325e-05,
      "loss": 1.0657,
      "step": 402
    },
    {
      "epoch": 0.08709747136373461,
      "grad_norm": 1.0702085494995117,
      "learning_rate": 3.965946199367804e-05,
      "loss": 0.9823,
      "step": 403
    },
    {
      "epoch": 0.08731359412146099,
      "grad_norm": 0.9327825903892517,
      "learning_rate": 3.9656884690108824e-05,
      "loss": 0.9744,
      "step": 404
    },
    {
      "epoch": 0.08752971687918738,
      "grad_norm": 0.9128959774971008,
      "learning_rate": 3.965429775471155e-05,
      "loss": 0.8884,
      "step": 405
    },
    {
      "epoch": 0.08774583963691376,
      "grad_norm": 0.8637864589691162,
      "learning_rate": 3.9651701188753806e-05,
      "loss": 0.9057,
      "step": 406
    },
    {
      "epoch": 0.08796196239464016,
      "grad_norm": 0.9656795263290405,
      "learning_rate": 3.9649094993507905e-05,
      "loss": 1.0883,
      "step": 407
    },
    {
      "epoch": 0.08817808515236654,
      "grad_norm": 0.8900482058525085,
      "learning_rate": 3.964647917025089e-05,
      "loss": 0.8608,
      "step": 408
    },
    {
      "epoch": 0.08839420791009293,
      "grad_norm": 0.907908022403717,
      "learning_rate": 3.964385372026449e-05,
      "loss": 0.8995,
      "step": 409
    },
    {
      "epoch": 0.08861033066781931,
      "grad_norm": 0.9215573072433472,
      "learning_rate": 3.964121864483518e-05,
      "loss": 0.9278,
      "step": 410
    },
    {
      "epoch": 0.08882645342554571,
      "grad_norm": 0.8984443545341492,
      "learning_rate": 3.963857394525413e-05,
      "loss": 0.9938,
      "step": 411
    },
    {
      "epoch": 0.0890425761832721,
      "grad_norm": 0.9193916320800781,
      "learning_rate": 3.963591962281726e-05,
      "loss": 1.0218,
      "step": 412
    },
    {
      "epoch": 0.08925869894099848,
      "grad_norm": 0.8618556261062622,
      "learning_rate": 3.9633255678825156e-05,
      "loss": 1.0636,
      "step": 413
    },
    {
      "epoch": 0.08947482169872488,
      "grad_norm": 1.1572033166885376,
      "learning_rate": 3.9630582114583165e-05,
      "loss": 0.9855,
      "step": 414
    },
    {
      "epoch": 0.08969094445645126,
      "grad_norm": 0.8636722564697266,
      "learning_rate": 3.9627898931401307e-05,
      "loss": 1.1296,
      "step": 415
    },
    {
      "epoch": 0.08990706721417766,
      "grad_norm": 0.9317108988761902,
      "learning_rate": 3.9625206130594345e-05,
      "loss": 1.076,
      "step": 416
    },
    {
      "epoch": 0.09012318997190404,
      "grad_norm": 0.9154568910598755,
      "learning_rate": 3.962250371348175e-05,
      "loss": 1.0057,
      "step": 417
    },
    {
      "epoch": 0.09033931272963043,
      "grad_norm": 1.0209555625915527,
      "learning_rate": 3.961979168138769e-05,
      "loss": 1.0806,
      "step": 418
    },
    {
      "epoch": 0.09055543548735683,
      "grad_norm": 0.9097704291343689,
      "learning_rate": 3.9617070035641075e-05,
      "loss": 0.8494,
      "step": 419
    },
    {
      "epoch": 0.0907715582450832,
      "grad_norm": 0.8296076655387878,
      "learning_rate": 3.961433877757548e-05,
      "loss": 1.0176,
      "step": 420
    },
    {
      "epoch": 0.0909876810028096,
      "grad_norm": 0.8262540698051453,
      "learning_rate": 3.9611597908529224e-05,
      "loss": 0.863,
      "step": 421
    },
    {
      "epoch": 0.09120380376053598,
      "grad_norm": 1.1728832721710205,
      "learning_rate": 3.9608847429845336e-05,
      "loss": 0.968,
      "step": 422
    },
    {
      "epoch": 0.09141992651826238,
      "grad_norm": 0.7996010780334473,
      "learning_rate": 3.960608734287153e-05,
      "loss": 0.8249,
      "step": 423
    },
    {
      "epoch": 0.09163604927598876,
      "grad_norm": 0.929660975933075,
      "learning_rate": 3.960331764896025e-05,
      "loss": 0.8146,
      "step": 424
    },
    {
      "epoch": 0.09185217203371515,
      "grad_norm": 0.8653681874275208,
      "learning_rate": 3.960053834946864e-05,
      "loss": 0.8004,
      "step": 425
    },
    {
      "epoch": 0.09206829479144153,
      "grad_norm": 0.8965323567390442,
      "learning_rate": 3.9597749445758545e-05,
      "loss": 0.9186,
      "step": 426
    },
    {
      "epoch": 0.09228441754916793,
      "grad_norm": 1.086283564567566,
      "learning_rate": 3.9594950939196535e-05,
      "loss": 0.947,
      "step": 427
    },
    {
      "epoch": 0.09250054030689432,
      "grad_norm": 0.8903186321258545,
      "learning_rate": 3.959214283115385e-05,
      "loss": 0.7717,
      "step": 428
    },
    {
      "epoch": 0.0927166630646207,
      "grad_norm": 0.9546589255332947,
      "learning_rate": 3.9589325123006476e-05,
      "loss": 1.2013,
      "step": 429
    },
    {
      "epoch": 0.0929327858223471,
      "grad_norm": 0.9602444171905518,
      "learning_rate": 3.958649781613507e-05,
      "loss": 1.0055,
      "step": 430
    },
    {
      "epoch": 0.09314890858007348,
      "grad_norm": 0.8137685060501099,
      "learning_rate": 3.958366091192502e-05,
      "loss": 0.8361,
      "step": 431
    },
    {
      "epoch": 0.09336503133779987,
      "grad_norm": 1.2224217653274536,
      "learning_rate": 3.958081441176639e-05,
      "loss": 1.0727,
      "step": 432
    },
    {
      "epoch": 0.09358115409552625,
      "grad_norm": 0.9968867301940918,
      "learning_rate": 3.957795831705396e-05,
      "loss": 0.947,
      "step": 433
    },
    {
      "epoch": 0.09379727685325265,
      "grad_norm": 0.954623281955719,
      "learning_rate": 3.957509262918721e-05,
      "loss": 0.9429,
      "step": 434
    },
    {
      "epoch": 0.09401339961097904,
      "grad_norm": 0.9548289179801941,
      "learning_rate": 3.957221734957032e-05,
      "loss": 1.0949,
      "step": 435
    },
    {
      "epoch": 0.09422952236870542,
      "grad_norm": 0.8827535510063171,
      "learning_rate": 3.956933247961218e-05,
      "loss": 0.9591,
      "step": 436
    },
    {
      "epoch": 0.09444564512643182,
      "grad_norm": 0.9374966025352478,
      "learning_rate": 3.9566438020726366e-05,
      "loss": 1.1719,
      "step": 437
    },
    {
      "epoch": 0.0946617678841582,
      "grad_norm": 1.0943764448165894,
      "learning_rate": 3.956353397433115e-05,
      "loss": 0.8931,
      "step": 438
    },
    {
      "epoch": 0.0948778906418846,
      "grad_norm": 0.9052419066429138,
      "learning_rate": 3.9560620341849504e-05,
      "loss": 0.9404,
      "step": 439
    },
    {
      "epoch": 0.09509401339961097,
      "grad_norm": 0.8661943674087524,
      "learning_rate": 3.955769712470912e-05,
      "loss": 0.8814,
      "step": 440
    },
    {
      "epoch": 0.09531013615733737,
      "grad_norm": 1.0524399280548096,
      "learning_rate": 3.9554764324342344e-05,
      "loss": 1.124,
      "step": 441
    },
    {
      "epoch": 0.09552625891506375,
      "grad_norm": 0.8547698259353638,
      "learning_rate": 3.9551821942186264e-05,
      "loss": 1.0171,
      "step": 442
    },
    {
      "epoch": 0.09574238167279014,
      "grad_norm": 0.7743398547172546,
      "learning_rate": 3.954886997968262e-05,
      "loss": 0.9985,
      "step": 443
    },
    {
      "epoch": 0.09595850443051654,
      "grad_norm": 0.8728423118591309,
      "learning_rate": 3.9545908438277885e-05,
      "loss": 0.9533,
      "step": 444
    },
    {
      "epoch": 0.09617462718824292,
      "grad_norm": 0.8962077498435974,
      "learning_rate": 3.954293731942319e-05,
      "loss": 0.936,
      "step": 445
    },
    {
      "epoch": 0.09639074994596931,
      "grad_norm": 0.7698949575424194,
      "learning_rate": 3.953995662457439e-05,
      "loss": 0.8866,
      "step": 446
    },
    {
      "epoch": 0.0966068727036957,
      "grad_norm": 0.9615756273269653,
      "learning_rate": 3.9536966355192016e-05,
      "loss": 0.8821,
      "step": 447
    },
    {
      "epoch": 0.09682299546142209,
      "grad_norm": 1.023740291595459,
      "learning_rate": 3.953396651274129e-05,
      "loss": 1.214,
      "step": 448
    },
    {
      "epoch": 0.09703911821914847,
      "grad_norm": 0.912865161895752,
      "learning_rate": 3.9530957098692126e-05,
      "loss": 1.0308,
      "step": 449
    },
    {
      "epoch": 0.09725524097687487,
      "grad_norm": 0.8043718338012695,
      "learning_rate": 3.9527938114519134e-05,
      "loss": 1.1519,
      "step": 450
    },
    {
      "epoch": 0.09747136373460126,
      "grad_norm": 0.7729007601737976,
      "learning_rate": 3.9524909561701615e-05,
      "loss": 0.9035,
      "step": 451
    },
    {
      "epoch": 0.09768748649232764,
      "grad_norm": 0.9104475378990173,
      "learning_rate": 3.9521871441723534e-05,
      "loss": 0.9637,
      "step": 452
    },
    {
      "epoch": 0.09790360925005404,
      "grad_norm": 0.9340957403182983,
      "learning_rate": 3.951882375607358e-05,
      "loss": 1.0346,
      "step": 453
    },
    {
      "epoch": 0.09811973200778042,
      "grad_norm": 0.8758137822151184,
      "learning_rate": 3.95157665062451e-05,
      "loss": 0.9313,
      "step": 454
    },
    {
      "epoch": 0.09833585476550681,
      "grad_norm": 1.1035685539245605,
      "learning_rate": 3.951269969373616e-05,
      "loss": 1.0212,
      "step": 455
    },
    {
      "epoch": 0.09855197752323319,
      "grad_norm": 0.9677191376686096,
      "learning_rate": 3.950962332004947e-05,
      "loss": 0.972,
      "step": 456
    },
    {
      "epoch": 0.09876810028095959,
      "grad_norm": 0.9146068096160889,
      "learning_rate": 3.9506537386692455e-05,
      "loss": 0.8206,
      "step": 457
    },
    {
      "epoch": 0.09898422303868597,
      "grad_norm": 1.0270830392837524,
      "learning_rate": 3.9503441895177206e-05,
      "loss": 0.9351,
      "step": 458
    },
    {
      "epoch": 0.09920034579641236,
      "grad_norm": 0.8653499484062195,
      "learning_rate": 3.950033684702051e-05,
      "loss": 0.864,
      "step": 459
    },
    {
      "epoch": 0.09941646855413876,
      "grad_norm": 0.8095022439956665,
      "learning_rate": 3.949722224374385e-05,
      "loss": 0.8414,
      "step": 460
    },
    {
      "epoch": 0.09963259131186514,
      "grad_norm": 1.0095607042312622,
      "learning_rate": 3.949409808687336e-05,
      "loss": 0.903,
      "step": 461
    },
    {
      "epoch": 0.09984871406959153,
      "grad_norm": 0.8256875872612,
      "learning_rate": 3.949096437793986e-05,
      "loss": 1.0661,
      "step": 462
    },
    {
      "epoch": 0.10006483682731791,
      "grad_norm": 0.8819296360015869,
      "learning_rate": 3.9487821118478885e-05,
      "loss": 1.1813,
      "step": 463
    },
    {
      "epoch": 0.10028095958504431,
      "grad_norm": 0.8339661955833435,
      "learning_rate": 3.9484668310030604e-05,
      "loss": 1.028,
      "step": 464
    },
    {
      "epoch": 0.10049708234277069,
      "grad_norm": 1.0233263969421387,
      "learning_rate": 3.9481505954139896e-05,
      "loss": 0.9629,
      "step": 465
    },
    {
      "epoch": 0.10071320510049708,
      "grad_norm": 1.049281358718872,
      "learning_rate": 3.94783340523563e-05,
      "loss": 0.9291,
      "step": 466
    },
    {
      "epoch": 0.10092932785822348,
      "grad_norm": 0.8857412934303284,
      "learning_rate": 3.947515260623405e-05,
      "loss": 0.9623,
      "step": 467
    },
    {
      "epoch": 0.10114545061594986,
      "grad_norm": 0.9321138858795166,
      "learning_rate": 3.947196161733205e-05,
      "loss": 1.0166,
      "step": 468
    },
    {
      "epoch": 0.10136157337367625,
      "grad_norm": 0.9237700700759888,
      "learning_rate": 3.9468761087213864e-05,
      "loss": 1.0322,
      "step": 469
    },
    {
      "epoch": 0.10157769613140263,
      "grad_norm": 1.0547393560409546,
      "learning_rate": 3.946555101744775e-05,
      "loss": 0.8755,
      "step": 470
    },
    {
      "epoch": 0.10179381888912903,
      "grad_norm": 0.9208168983459473,
      "learning_rate": 3.9462331409606636e-05,
      "loss": 0.9245,
      "step": 471
    },
    {
      "epoch": 0.10200994164685541,
      "grad_norm": 0.9916532039642334,
      "learning_rate": 3.9459102265268115e-05,
      "loss": 1.1441,
      "step": 472
    },
    {
      "epoch": 0.1022260644045818,
      "grad_norm": 0.9643540382385254,
      "learning_rate": 3.9455863586014474e-05,
      "loss": 1.1193,
      "step": 473
    },
    {
      "epoch": 0.10244218716230818,
      "grad_norm": 1.0929360389709473,
      "learning_rate": 3.9452615373432645e-05,
      "loss": 1.0725,
      "step": 474
    },
    {
      "epoch": 0.10265830992003458,
      "grad_norm": 0.8778156042098999,
      "learning_rate": 3.944935762911426e-05,
      "loss": 0.8897,
      "step": 475
    },
    {
      "epoch": 0.10287443267776097,
      "grad_norm": 0.9703811407089233,
      "learning_rate": 3.9446090354655596e-05,
      "loss": 0.9387,
      "step": 476
    },
    {
      "epoch": 0.10309055543548735,
      "grad_norm": 0.9691447615623474,
      "learning_rate": 3.944281355165761e-05,
      "loss": 0.9189,
      "step": 477
    },
    {
      "epoch": 0.10330667819321375,
      "grad_norm": 0.7944625020027161,
      "learning_rate": 3.943952722172592e-05,
      "loss": 0.9113,
      "step": 478
    },
    {
      "epoch": 0.10352280095094013,
      "grad_norm": 0.9441581964492798,
      "learning_rate": 3.9436231366470836e-05,
      "loss": 0.9868,
      "step": 479
    },
    {
      "epoch": 0.10373892370866653,
      "grad_norm": 0.9912706613540649,
      "learning_rate": 3.943292598750732e-05,
      "loss": 1.0999,
      "step": 480
    },
    {
      "epoch": 0.1039550464663929,
      "grad_norm": 0.9922301173210144,
      "learning_rate": 3.9429611086454975e-05,
      "loss": 0.882,
      "step": 481
    },
    {
      "epoch": 0.1041711692241193,
      "grad_norm": 0.9965765476226807,
      "learning_rate": 3.9426286664938123e-05,
      "loss": 1.0958,
      "step": 482
    },
    {
      "epoch": 0.1043872919818457,
      "grad_norm": 1.0244691371917725,
      "learning_rate": 3.942295272458571e-05,
      "loss": 1.0144,
      "step": 483
    },
    {
      "epoch": 0.10460341473957208,
      "grad_norm": 1.0300536155700684,
      "learning_rate": 3.941960926703137e-05,
      "loss": 0.912,
      "step": 484
    },
    {
      "epoch": 0.10481953749729847,
      "grad_norm": 1.0701106786727905,
      "learning_rate": 3.9416256293913376e-05,
      "loss": 0.838,
      "step": 485
    },
    {
      "epoch": 0.10503566025502485,
      "grad_norm": 0.9118524193763733,
      "learning_rate": 3.941289380687468e-05,
      "loss": 0.9516,
      "step": 486
    },
    {
      "epoch": 0.10525178301275125,
      "grad_norm": 0.8736467957496643,
      "learning_rate": 3.940952180756289e-05,
      "loss": 0.9668,
      "step": 487
    },
    {
      "epoch": 0.10546790577047763,
      "grad_norm": 0.9006360173225403,
      "learning_rate": 3.9406140297630286e-05,
      "loss": 0.7392,
      "step": 488
    },
    {
      "epoch": 0.10568402852820402,
      "grad_norm": 0.8101032376289368,
      "learning_rate": 3.94027492787338e-05,
      "loss": 0.881,
      "step": 489
    },
    {
      "epoch": 0.1059001512859304,
      "grad_norm": 0.9181492924690247,
      "learning_rate": 3.939934875253502e-05,
      "loss": 0.8168,
      "step": 490
    },
    {
      "epoch": 0.1061162740436568,
      "grad_norm": 0.9278901219367981,
      "learning_rate": 3.9395938720700196e-05,
      "loss": 0.8411,
      "step": 491
    },
    {
      "epoch": 0.10633239680138319,
      "grad_norm": 0.958052396774292,
      "learning_rate": 3.939251918490023e-05,
      "loss": 0.8052,
      "step": 492
    },
    {
      "epoch": 0.10654851955910957,
      "grad_norm": 0.934644341468811,
      "learning_rate": 3.9389090146810696e-05,
      "loss": 0.9906,
      "step": 493
    },
    {
      "epoch": 0.10676464231683597,
      "grad_norm": 0.888184130191803,
      "learning_rate": 3.938565160811181e-05,
      "loss": 1.0343,
      "step": 494
    },
    {
      "epoch": 0.10698076507456235,
      "grad_norm": 1.039035677909851,
      "learning_rate": 3.938220357048845e-05,
      "loss": 1.0156,
      "step": 495
    },
    {
      "epoch": 0.10719688783228874,
      "grad_norm": 0.9711042642593384,
      "learning_rate": 3.937874603563015e-05,
      "loss": 1.0355,
      "step": 496
    },
    {
      "epoch": 0.10741301059001512,
      "grad_norm": 0.8338973522186279,
      "learning_rate": 3.9375279005231084e-05,
      "loss": 0.8755,
      "step": 497
    },
    {
      "epoch": 0.10762913334774152,
      "grad_norm": 0.9922994375228882,
      "learning_rate": 3.93718024809901e-05,
      "loss": 1.0939,
      "step": 498
    },
    {
      "epoch": 0.10784525610546791,
      "grad_norm": 1.0171689987182617,
      "learning_rate": 3.936831646461068e-05,
      "loss": 1.0244,
      "step": 499
    },
    {
      "epoch": 0.1080613788631943,
      "grad_norm": 0.8213376402854919,
      "learning_rate": 3.936482095780096e-05,
      "loss": 1.12,
      "step": 500
    },
    {
      "epoch": 0.10827750162092069,
      "grad_norm": 1.0276782512664795,
      "learning_rate": 3.936131596227374e-05,
      "loss": 1.0105,
      "step": 501
    },
    {
      "epoch": 0.10849362437864707,
      "grad_norm": 0.9741190671920776,
      "learning_rate": 3.935780147974646e-05,
      "loss": 0.8942,
      "step": 502
    },
    {
      "epoch": 0.10870974713637346,
      "grad_norm": 0.9998877644538879,
      "learning_rate": 3.935427751194119e-05,
      "loss": 1.0869,
      "step": 503
    },
    {
      "epoch": 0.10892586989409984,
      "grad_norm": 0.8405811190605164,
      "learning_rate": 3.935074406058469e-05,
      "loss": 0.9447,
      "step": 504
    },
    {
      "epoch": 0.10914199265182624,
      "grad_norm": 0.9247153401374817,
      "learning_rate": 3.9347201127408335e-05,
      "loss": 0.9206,
      "step": 505
    },
    {
      "epoch": 0.10935811540955262,
      "grad_norm": 0.8571478128433228,
      "learning_rate": 3.934364871414815e-05,
      "loss": 0.9402,
      "step": 506
    },
    {
      "epoch": 0.10957423816727901,
      "grad_norm": 0.9175286293029785,
      "learning_rate": 3.9340086822544806e-05,
      "loss": 0.854,
      "step": 507
    },
    {
      "epoch": 0.10979036092500541,
      "grad_norm": 1.0318107604980469,
      "learning_rate": 3.9336515454343634e-05,
      "loss": 1.066,
      "step": 508
    },
    {
      "epoch": 0.11000648368273179,
      "grad_norm": 0.9785745143890381,
      "learning_rate": 3.9332934611294575e-05,
      "loss": 0.9719,
      "step": 509
    },
    {
      "epoch": 0.11022260644045818,
      "grad_norm": 0.9880504012107849,
      "learning_rate": 3.932934429515226e-05,
      "loss": 1.005,
      "step": 510
    },
    {
      "epoch": 0.11043872919818457,
      "grad_norm": 0.8954070806503296,
      "learning_rate": 3.932574450767592e-05,
      "loss": 0.9407,
      "step": 511
    },
    {
      "epoch": 0.11065485195591096,
      "grad_norm": 0.9446602463722229,
      "learning_rate": 3.932213525062945e-05,
      "loss": 1.1116,
      "step": 512
    },
    {
      "epoch": 0.11087097471363734,
      "grad_norm": 0.9656209349632263,
      "learning_rate": 3.931851652578137e-05,
      "loss": 0.9174,
      "step": 513
    },
    {
      "epoch": 0.11108709747136374,
      "grad_norm": 0.9980925917625427,
      "learning_rate": 3.9314888334904846e-05,
      "loss": 0.975,
      "step": 514
    },
    {
      "epoch": 0.11130322022909012,
      "grad_norm": 0.9334444403648376,
      "learning_rate": 3.9311250679777694e-05,
      "loss": 1.0594,
      "step": 515
    },
    {
      "epoch": 0.11151934298681651,
      "grad_norm": 1.04780912399292,
      "learning_rate": 3.930760356218235e-05,
      "loss": 0.9406,
      "step": 516
    },
    {
      "epoch": 0.1117354657445429,
      "grad_norm": 0.9021848440170288,
      "learning_rate": 3.930394698390589e-05,
      "loss": 0.9542,
      "step": 517
    },
    {
      "epoch": 0.11195158850226929,
      "grad_norm": 0.9023771286010742,
      "learning_rate": 3.9300280946740034e-05,
      "loss": 1.1127,
      "step": 518
    },
    {
      "epoch": 0.11216771125999568,
      "grad_norm": 0.8352866768836975,
      "learning_rate": 3.929660545248113e-05,
      "loss": 0.7962,
      "step": 519
    },
    {
      "epoch": 0.11238383401772206,
      "grad_norm": 0.8192297220230103,
      "learning_rate": 3.929292050293016e-05,
      "loss": 0.7481,
      "step": 520
    },
    {
      "epoch": 0.11259995677544846,
      "grad_norm": 0.8999636769294739,
      "learning_rate": 3.9289226099892745e-05,
      "loss": 0.975,
      "step": 521
    },
    {
      "epoch": 0.11281607953317484,
      "grad_norm": 1.027186393737793,
      "learning_rate": 3.928552224517913e-05,
      "loss": 0.9635,
      "step": 522
    },
    {
      "epoch": 0.11303220229090123,
      "grad_norm": 0.9493417739868164,
      "learning_rate": 3.92818089406042e-05,
      "loss": 0.8414,
      "step": 523
    },
    {
      "epoch": 0.11324832504862763,
      "grad_norm": 1.0801202058792114,
      "learning_rate": 3.927808618798746e-05,
      "loss": 0.8554,
      "step": 524
    },
    {
      "epoch": 0.11346444780635401,
      "grad_norm": 0.9390546679496765,
      "learning_rate": 3.9274353989153044e-05,
      "loss": 0.9667,
      "step": 525
    },
    {
      "epoch": 0.1136805705640804,
      "grad_norm": 0.9530695080757141,
      "learning_rate": 3.927061234592974e-05,
      "loss": 0.8776,
      "step": 526
    },
    {
      "epoch": 0.11389669332180678,
      "grad_norm": 0.860176682472229,
      "learning_rate": 3.926686126015093e-05,
      "loss": 1.0035,
      "step": 527
    },
    {
      "epoch": 0.11411281607953318,
      "grad_norm": 0.9042350649833679,
      "learning_rate": 3.926310073365464e-05,
      "loss": 1.0282,
      "step": 528
    },
    {
      "epoch": 0.11432893883725956,
      "grad_norm": 0.8940215110778809,
      "learning_rate": 3.9259330768283516e-05,
      "loss": 0.9375,
      "step": 529
    },
    {
      "epoch": 0.11454506159498595,
      "grad_norm": 0.9392261505126953,
      "learning_rate": 3.925555136588484e-05,
      "loss": 0.9435,
      "step": 530
    },
    {
      "epoch": 0.11476118435271233,
      "grad_norm": 0.8384535312652588,
      "learning_rate": 3.9251762528310514e-05,
      "loss": 0.709,
      "step": 531
    },
    {
      "epoch": 0.11497730711043873,
      "grad_norm": 0.9484767913818359,
      "learning_rate": 3.924796425741705e-05,
      "loss": 0.9147,
      "step": 532
    },
    {
      "epoch": 0.11519342986816512,
      "grad_norm": 0.9432885646820068,
      "learning_rate": 3.9244156555065595e-05,
      "loss": 1.1317,
      "step": 533
    },
    {
      "epoch": 0.1154095526258915,
      "grad_norm": 1.028935194015503,
      "learning_rate": 3.924033942312191e-05,
      "loss": 1.0019,
      "step": 534
    },
    {
      "epoch": 0.1156256753836179,
      "grad_norm": 0.8583811521530151,
      "learning_rate": 3.923651286345638e-05,
      "loss": 0.9307,
      "step": 535
    },
    {
      "epoch": 0.11584179814134428,
      "grad_norm": 1.0542842149734497,
      "learning_rate": 3.923267687794403e-05,
      "loss": 1.171,
      "step": 536
    },
    {
      "epoch": 0.11605792089907067,
      "grad_norm": 0.9055013656616211,
      "learning_rate": 3.9228831468464464e-05,
      "loss": 0.8571,
      "step": 537
    },
    {
      "epoch": 0.11627404365679705,
      "grad_norm": 1.1255946159362793,
      "learning_rate": 3.922497663690192e-05,
      "loss": 1.0053,
      "step": 538
    },
    {
      "epoch": 0.11649016641452345,
      "grad_norm": 0.9034440517425537,
      "learning_rate": 3.9221112385145274e-05,
      "loss": 0.9113,
      "step": 539
    },
    {
      "epoch": 0.11670628917224984,
      "grad_norm": 0.96932053565979,
      "learning_rate": 3.921723871508799e-05,
      "loss": 0.9796,
      "step": 540
    },
    {
      "epoch": 0.11692241192997622,
      "grad_norm": 0.88820481300354,
      "learning_rate": 3.921335562862816e-05,
      "loss": 0.9076,
      "step": 541
    },
    {
      "epoch": 0.11713853468770262,
      "grad_norm": 1.00152587890625,
      "learning_rate": 3.920946312766848e-05,
      "loss": 1.1389,
      "step": 542
    },
    {
      "epoch": 0.117354657445429,
      "grad_norm": 1.0847313404083252,
      "learning_rate": 3.920556121411628e-05,
      "loss": 1.1365,
      "step": 543
    },
    {
      "epoch": 0.1175707802031554,
      "grad_norm": 1.0765568017959595,
      "learning_rate": 3.9201649889883485e-05,
      "loss": 1.0681,
      "step": 544
    },
    {
      "epoch": 0.11778690296088178,
      "grad_norm": 1.0710378885269165,
      "learning_rate": 3.919772915688663e-05,
      "loss": 1.3251,
      "step": 545
    },
    {
      "epoch": 0.11800302571860817,
      "grad_norm": 0.9251645803451538,
      "learning_rate": 3.9193799017046865e-05,
      "loss": 0.9097,
      "step": 546
    },
    {
      "epoch": 0.11821914847633455,
      "grad_norm": 1.001984715461731,
      "learning_rate": 3.9189859472289956e-05,
      "loss": 0.8641,
      "step": 547
    },
    {
      "epoch": 0.11843527123406095,
      "grad_norm": 0.9508165121078491,
      "learning_rate": 3.918591052454626e-05,
      "loss": 0.9421,
      "step": 548
    },
    {
      "epoch": 0.11865139399178734,
      "grad_norm": 0.9724790453910828,
      "learning_rate": 3.918195217575075e-05,
      "loss": 1.0963,
      "step": 549
    },
    {
      "epoch": 0.11886751674951372,
      "grad_norm": 0.9443535208702087,
      "learning_rate": 3.917798442784303e-05,
      "loss": 0.9419,
      "step": 550
    },
    {
      "epoch": 0.11908363950724012,
      "grad_norm": 0.8758763074874878,
      "learning_rate": 3.917400728276727e-05,
      "loss": 0.8275,
      "step": 551
    },
    {
      "epoch": 0.1192997622649665,
      "grad_norm": 0.8813546895980835,
      "learning_rate": 3.917002074247227e-05,
      "loss": 0.8677,
      "step": 552
    },
    {
      "epoch": 0.11951588502269289,
      "grad_norm": 0.8212718367576599,
      "learning_rate": 3.916602480891141e-05,
      "loss": 0.8355,
      "step": 553
    },
    {
      "epoch": 0.11973200778041927,
      "grad_norm": 0.9864819645881653,
      "learning_rate": 3.916201948404271e-05,
      "loss": 1.0547,
      "step": 554
    },
    {
      "epoch": 0.11994813053814567,
      "grad_norm": 0.8727284073829651,
      "learning_rate": 3.915800476982875e-05,
      "loss": 0.8634,
      "step": 555
    },
    {
      "epoch": 0.12016425329587206,
      "grad_norm": 0.9952285885810852,
      "learning_rate": 3.9153980668236744e-05,
      "loss": 0.9807,
      "step": 556
    },
    {
      "epoch": 0.12038037605359844,
      "grad_norm": 0.824222207069397,
      "learning_rate": 3.9149947181238486e-05,
      "loss": 0.8683,
      "step": 557
    },
    {
      "epoch": 0.12059649881132484,
      "grad_norm": 0.9063690304756165,
      "learning_rate": 3.914590431081038e-05,
      "loss": 0.9414,
      "step": 558
    },
    {
      "epoch": 0.12081262156905122,
      "grad_norm": 0.9026046395301819,
      "learning_rate": 3.914185205893342e-05,
      "loss": 0.9657,
      "step": 559
    },
    {
      "epoch": 0.12102874432677761,
      "grad_norm": 0.8094519376754761,
      "learning_rate": 3.9137790427593205e-05,
      "loss": 0.9151,
      "step": 560
    },
    {
      "epoch": 0.121244867084504,
      "grad_norm": 0.8167213201522827,
      "learning_rate": 3.913371941877992e-05,
      "loss": 0.9311,
      "step": 561
    },
    {
      "epoch": 0.12146098984223039,
      "grad_norm": 0.9244396090507507,
      "learning_rate": 3.912963903448834e-05,
      "loss": 1.005,
      "step": 562
    },
    {
      "epoch": 0.12167711259995677,
      "grad_norm": 0.8598445653915405,
      "learning_rate": 3.912554927671786e-05,
      "loss": 0.9037,
      "step": 563
    },
    {
      "epoch": 0.12189323535768316,
      "grad_norm": 1.0223029851913452,
      "learning_rate": 3.912145014747245e-05,
      "loss": 1.044,
      "step": 564
    },
    {
      "epoch": 0.12210935811540956,
      "grad_norm": 1.0009716749191284,
      "learning_rate": 3.911734164876067e-05,
      "loss": 1.0534,
      "step": 565
    },
    {
      "epoch": 0.12232548087313594,
      "grad_norm": 1.0638169050216675,
      "learning_rate": 3.9113223782595674e-05,
      "loss": 1.0947,
      "step": 566
    },
    {
      "epoch": 0.12254160363086233,
      "grad_norm": 0.9844657778739929,
      "learning_rate": 3.910909655099521e-05,
      "loss": 1.0553,
      "step": 567
    },
    {
      "epoch": 0.12275772638858871,
      "grad_norm": 0.9277519583702087,
      "learning_rate": 3.9104959955981605e-05,
      "loss": 0.9108,
      "step": 568
    },
    {
      "epoch": 0.12297384914631511,
      "grad_norm": 0.8299845457077026,
      "learning_rate": 3.910081399958179e-05,
      "loss": 0.7876,
      "step": 569
    },
    {
      "epoch": 0.12318997190404149,
      "grad_norm": 0.8751687407493591,
      "learning_rate": 3.909665868382726e-05,
      "loss": 1.0615,
      "step": 570
    },
    {
      "epoch": 0.12340609466176788,
      "grad_norm": 0.8630756139755249,
      "learning_rate": 3.909249401075413e-05,
      "loss": 0.9572,
      "step": 571
    },
    {
      "epoch": 0.12362221741949428,
      "grad_norm": 0.839674711227417,
      "learning_rate": 3.908831998240307e-05,
      "loss": 1.0258,
      "step": 572
    },
    {
      "epoch": 0.12383834017722066,
      "grad_norm": 0.9267817139625549,
      "learning_rate": 3.908413660081934e-05,
      "loss": 0.7814,
      "step": 573
    },
    {
      "epoch": 0.12405446293494705,
      "grad_norm": 0.8045945167541504,
      "learning_rate": 3.907994386805279e-05,
      "loss": 0.8182,
      "step": 574
    },
    {
      "epoch": 0.12427058569267344,
      "grad_norm": 0.9595804214477539,
      "learning_rate": 3.9075741786157856e-05,
      "loss": 1.0572,
      "step": 575
    },
    {
      "epoch": 0.12448670845039983,
      "grad_norm": 0.9574965834617615,
      "learning_rate": 3.907153035719355e-05,
      "loss": 0.8036,
      "step": 576
    },
    {
      "epoch": 0.12470283120812621,
      "grad_norm": 0.9275492429733276,
      "learning_rate": 3.906730958322345e-05,
      "loss": 0.9117,
      "step": 577
    },
    {
      "epoch": 0.1249189539658526,
      "grad_norm": 0.9035372138023376,
      "learning_rate": 3.9063079466315725e-05,
      "loss": 1.1805,
      "step": 578
    },
    {
      "epoch": 0.125135076723579,
      "grad_norm": 0.951949954032898,
      "learning_rate": 3.9058840008543136e-05,
      "loss": 0.8268,
      "step": 579
    },
    {
      "epoch": 0.12535119948130538,
      "grad_norm": 0.8895303606987,
      "learning_rate": 3.9054591211983e-05,
      "loss": 0.859,
      "step": 580
    },
    {
      "epoch": 0.12556732223903178,
      "grad_norm": 0.8728366494178772,
      "learning_rate": 3.9050333078717216e-05,
      "loss": 0.7964,
      "step": 581
    },
    {
      "epoch": 0.12578344499675817,
      "grad_norm": 1.0275249481201172,
      "learning_rate": 3.9046065610832256e-05,
      "loss": 0.8628,
      "step": 582
    },
    {
      "epoch": 0.12599956775448454,
      "grad_norm": 0.8042998909950256,
      "learning_rate": 3.9041788810419186e-05,
      "loss": 0.8814,
      "step": 583
    },
    {
      "epoch": 0.12621569051221093,
      "grad_norm": 0.8455626964569092,
      "learning_rate": 3.903750267957361e-05,
      "loss": 0.8937,
      "step": 584
    },
    {
      "epoch": 0.12643181326993733,
      "grad_norm": 0.995812177658081,
      "learning_rate": 3.9033207220395733e-05,
      "loss": 0.7724,
      "step": 585
    },
    {
      "epoch": 0.12664793602766372,
      "grad_norm": 0.9180460572242737,
      "learning_rate": 3.9028902434990315e-05,
      "loss": 0.9788,
      "step": 586
    },
    {
      "epoch": 0.1268640587853901,
      "grad_norm": 1.0631048679351807,
      "learning_rate": 3.902458832546669e-05,
      "loss": 1.0315,
      "step": 587
    },
    {
      "epoch": 0.12708018154311648,
      "grad_norm": 0.7558464407920837,
      "learning_rate": 3.9020264893938763e-05,
      "loss": 0.9477,
      "step": 588
    },
    {
      "epoch": 0.12729630430084288,
      "grad_norm": 1.027151346206665,
      "learning_rate": 3.901593214252502e-05,
      "loss": 0.9978,
      "step": 589
    },
    {
      "epoch": 0.12751242705856927,
      "grad_norm": 1.023328185081482,
      "learning_rate": 3.901159007334847e-05,
      "loss": 1.08,
      "step": 590
    },
    {
      "epoch": 0.12772854981629567,
      "grad_norm": 1.150592565536499,
      "learning_rate": 3.900723868853674e-05,
      "loss": 0.9376,
      "step": 591
    },
    {
      "epoch": 0.12794467257402203,
      "grad_norm": 0.8630537390708923,
      "learning_rate": 3.9002877990221986e-05,
      "loss": 1.0298,
      "step": 592
    },
    {
      "epoch": 0.12816079533174843,
      "grad_norm": 0.7960755825042725,
      "learning_rate": 3.899850798054095e-05,
      "loss": 0.8426,
      "step": 593
    },
    {
      "epoch": 0.12837691808947482,
      "grad_norm": 0.7773906588554382,
      "learning_rate": 3.8994128661634914e-05,
      "loss": 0.8398,
      "step": 594
    },
    {
      "epoch": 0.12859304084720122,
      "grad_norm": 0.844587504863739,
      "learning_rate": 3.898974003564975e-05,
      "loss": 0.8978,
      "step": 595
    },
    {
      "epoch": 0.1288091636049276,
      "grad_norm": 0.8783379197120667,
      "learning_rate": 3.8985342104735864e-05,
      "loss": 0.9715,
      "step": 596
    },
    {
      "epoch": 0.12902528636265398,
      "grad_norm": 0.8174152374267578,
      "learning_rate": 3.898093487104824e-05,
      "loss": 0.9263,
      "step": 597
    },
    {
      "epoch": 0.12924140912038037,
      "grad_norm": 0.9347906708717346,
      "learning_rate": 3.8976518336746396e-05,
      "loss": 0.9257,
      "step": 598
    },
    {
      "epoch": 0.12945753187810677,
      "grad_norm": 0.9588862061500549,
      "learning_rate": 3.897209250399444e-05,
      "loss": 1.107,
      "step": 599
    },
    {
      "epoch": 0.12967365463583316,
      "grad_norm": 0.8485615253448486,
      "learning_rate": 3.896765737496101e-05,
      "loss": 0.8536,
      "step": 600
    },
    {
      "epoch": 0.12988977739355953,
      "grad_norm": 0.8176436424255371,
      "learning_rate": 3.896321295181932e-05,
      "loss": 0.7917,
      "step": 601
    },
    {
      "epoch": 0.13010590015128592,
      "grad_norm": 0.8480759859085083,
      "learning_rate": 3.8958759236747116e-05,
      "loss": 0.8708,
      "step": 602
    },
    {
      "epoch": 0.13032202290901232,
      "grad_norm": 1.072355031967163,
      "learning_rate": 3.895429623192672e-05,
      "loss": 1.0741,
      "step": 603
    },
    {
      "epoch": 0.13053814566673871,
      "grad_norm": 0.7905756831169128,
      "learning_rate": 3.894982393954498e-05,
      "loss": 0.8311,
      "step": 604
    },
    {
      "epoch": 0.1307542684244651,
      "grad_norm": 1.0046707391738892,
      "learning_rate": 3.894534236179331e-05,
      "loss": 0.8966,
      "step": 605
    },
    {
      "epoch": 0.13097039118219148,
      "grad_norm": 0.8906007409095764,
      "learning_rate": 3.894085150086769e-05,
      "loss": 0.9787,
      "step": 606
    },
    {
      "epoch": 0.13118651393991787,
      "grad_norm": 1.1165415048599243,
      "learning_rate": 3.893635135896861e-05,
      "loss": 0.9902,
      "step": 607
    },
    {
      "epoch": 0.13140263669764427,
      "grad_norm": 0.9424011707305908,
      "learning_rate": 3.893184193830114e-05,
      "loss": 0.8775,
      "step": 608
    },
    {
      "epoch": 0.13161875945537066,
      "grad_norm": 0.9391512274742126,
      "learning_rate": 3.8927323241074886e-05,
      "loss": 1.0867,
      "step": 609
    },
    {
      "epoch": 0.13183488221309703,
      "grad_norm": 1.1020722389221191,
      "learning_rate": 3.8922795269503996e-05,
      "loss": 1.1466,
      "step": 610
    },
    {
      "epoch": 0.13205100497082342,
      "grad_norm": 0.955644428730011,
      "learning_rate": 3.8918258025807164e-05,
      "loss": 1.0072,
      "step": 611
    },
    {
      "epoch": 0.13226712772854982,
      "grad_norm": 1.0140981674194336,
      "learning_rate": 3.891371151220764e-05,
      "loss": 1.0182,
      "step": 612
    },
    {
      "epoch": 0.1324832504862762,
      "grad_norm": 0.8920745849609375,
      "learning_rate": 3.89091557309332e-05,
      "loss": 0.6105,
      "step": 613
    },
    {
      "epoch": 0.1326993732440026,
      "grad_norm": 0.891534686088562,
      "learning_rate": 3.890459068421615e-05,
      "loss": 0.8945,
      "step": 614
    },
    {
      "epoch": 0.13291549600172897,
      "grad_norm": 0.9055238962173462,
      "learning_rate": 3.890001637429337e-05,
      "loss": 0.9382,
      "step": 615
    },
    {
      "epoch": 0.13313161875945537,
      "grad_norm": 0.9690259695053101,
      "learning_rate": 3.8895432803406266e-05,
      "loss": 0.9672,
      "step": 616
    },
    {
      "epoch": 0.13334774151718176,
      "grad_norm": 0.8873419761657715,
      "learning_rate": 3.889083997380076e-05,
      "loss": 0.8831,
      "step": 617
    },
    {
      "epoch": 0.13356386427490816,
      "grad_norm": 0.9011080861091614,
      "learning_rate": 3.888623788772734e-05,
      "loss": 0.9901,
      "step": 618
    },
    {
      "epoch": 0.13377998703263452,
      "grad_norm": 0.7810534238815308,
      "learning_rate": 3.888162654744101e-05,
      "loss": 0.7921,
      "step": 619
    },
    {
      "epoch": 0.13399610979036092,
      "grad_norm": 0.9624156355857849,
      "learning_rate": 3.887700595520132e-05,
      "loss": 0.9847,
      "step": 620
    },
    {
      "epoch": 0.1342122325480873,
      "grad_norm": 1.021198034286499,
      "learning_rate": 3.887237611327235e-05,
      "loss": 1.0239,
      "step": 621
    },
    {
      "epoch": 0.1344283553058137,
      "grad_norm": 0.8436684012413025,
      "learning_rate": 3.886773702392271e-05,
      "loss": 0.7515,
      "step": 622
    },
    {
      "epoch": 0.1346444780635401,
      "grad_norm": 1.0054851770401,
      "learning_rate": 3.886308868942555e-05,
      "loss": 0.7851,
      "step": 623
    },
    {
      "epoch": 0.13486060082126647,
      "grad_norm": 0.8490008115768433,
      "learning_rate": 3.8858431112058534e-05,
      "loss": 0.7387,
      "step": 624
    },
    {
      "epoch": 0.13507672357899286,
      "grad_norm": 0.9846950173377991,
      "learning_rate": 3.885376429410387e-05,
      "loss": 0.9461,
      "step": 625
    },
    {
      "epoch": 0.13529284633671926,
      "grad_norm": 0.9749772548675537,
      "learning_rate": 3.884908823784828e-05,
      "loss": 0.9217,
      "step": 626
    },
    {
      "epoch": 0.13550896909444565,
      "grad_norm": 0.8481361269950867,
      "learning_rate": 3.884440294558303e-05,
      "loss": 0.8687,
      "step": 627
    },
    {
      "epoch": 0.13572509185217205,
      "grad_norm": 0.9036352634429932,
      "learning_rate": 3.88397084196039e-05,
      "loss": 0.7601,
      "step": 628
    },
    {
      "epoch": 0.13594121460989841,
      "grad_norm": 0.9833604693412781,
      "learning_rate": 3.883500466221119e-05,
      "loss": 0.8995,
      "step": 629
    },
    {
      "epoch": 0.1361573373676248,
      "grad_norm": 0.8426030874252319,
      "learning_rate": 3.883029167570974e-05,
      "loss": 1.1427,
      "step": 630
    },
    {
      "epoch": 0.1363734601253512,
      "grad_norm": 0.961252748966217,
      "learning_rate": 3.88255694624089e-05,
      "loss": 0.9613,
      "step": 631
    },
    {
      "epoch": 0.1365895828830776,
      "grad_norm": 0.8939110040664673,
      "learning_rate": 3.882083802462254e-05,
      "loss": 1.1321,
      "step": 632
    },
    {
      "epoch": 0.13680570564080397,
      "grad_norm": 1.0109326839447021,
      "learning_rate": 3.881609736466906e-05,
      "loss": 0.8538,
      "step": 633
    },
    {
      "epoch": 0.13702182839853036,
      "grad_norm": 0.9047037363052368,
      "learning_rate": 3.8811347484871353e-05,
      "loss": 1.0148,
      "step": 634
    },
    {
      "epoch": 0.13723795115625675,
      "grad_norm": 0.9888780117034912,
      "learning_rate": 3.880658838755688e-05,
      "loss": 1.035,
      "step": 635
    },
    {
      "epoch": 0.13745407391398315,
      "grad_norm": 0.9979327321052551,
      "learning_rate": 3.880182007505756e-05,
      "loss": 1.0421,
      "step": 636
    },
    {
      "epoch": 0.13767019667170954,
      "grad_norm": 0.8484611511230469,
      "learning_rate": 3.879704254970987e-05,
      "loss": 0.7986,
      "step": 637
    },
    {
      "epoch": 0.1378863194294359,
      "grad_norm": 0.8858148455619812,
      "learning_rate": 3.8792255813854783e-05,
      "loss": 0.9573,
      "step": 638
    },
    {
      "epoch": 0.1381024421871623,
      "grad_norm": 0.9977371692657471,
      "learning_rate": 3.87874598698378e-05,
      "loss": 1.0117,
      "step": 639
    },
    {
      "epoch": 0.1383185649448887,
      "grad_norm": 0.8719163537025452,
      "learning_rate": 3.87826547200089e-05,
      "loss": 0.9715,
      "step": 640
    },
    {
      "epoch": 0.1385346877026151,
      "grad_norm": 0.9166508316993713,
      "learning_rate": 3.8777840366722606e-05,
      "loss": 1.0143,
      "step": 641
    },
    {
      "epoch": 0.13875081046034146,
      "grad_norm": 0.9907276034355164,
      "learning_rate": 3.877301681233794e-05,
      "loss": 1.0263,
      "step": 642
    },
    {
      "epoch": 0.13896693321806786,
      "grad_norm": 0.8705201148986816,
      "learning_rate": 3.876818405921844e-05,
      "loss": 0.7806,
      "step": 643
    },
    {
      "epoch": 0.13918305597579425,
      "grad_norm": 0.8763052225112915,
      "learning_rate": 3.876334210973213e-05,
      "loss": 1.0054,
      "step": 644
    },
    {
      "epoch": 0.13939917873352065,
      "grad_norm": 0.8491306900978088,
      "learning_rate": 3.8758490966251565e-05,
      "loss": 0.9252,
      "step": 645
    },
    {
      "epoch": 0.13961530149124704,
      "grad_norm": 0.9202139377593994,
      "learning_rate": 3.875363063115379e-05,
      "loss": 0.9219,
      "step": 646
    },
    {
      "epoch": 0.1398314242489734,
      "grad_norm": 0.9065908789634705,
      "learning_rate": 3.874876110682035e-05,
      "loss": 0.9117,
      "step": 647
    },
    {
      "epoch": 0.1400475470066998,
      "grad_norm": 0.8544798493385315,
      "learning_rate": 3.874388239563732e-05,
      "loss": 0.9109,
      "step": 648
    },
    {
      "epoch": 0.1402636697644262,
      "grad_norm": 0.8583292961120605,
      "learning_rate": 3.873899449999524e-05,
      "loss": 0.9353,
      "step": 649
    },
    {
      "epoch": 0.1404797925221526,
      "grad_norm": 1.047090768814087,
      "learning_rate": 3.8734097422289175e-05,
      "loss": 0.9254,
      "step": 650
    },
    {
      "epoch": 0.14069591527987896,
      "grad_norm": 0.8735350966453552,
      "learning_rate": 3.8729191164918674e-05,
      "loss": 0.9183,
      "step": 651
    },
    {
      "epoch": 0.14091203803760535,
      "grad_norm": 0.8952236175537109,
      "learning_rate": 3.87242757302878e-05,
      "loss": 0.8292,
      "step": 652
    },
    {
      "epoch": 0.14112816079533175,
      "grad_norm": 0.8877466320991516,
      "learning_rate": 3.871935112080511e-05,
      "loss": 0.8552,
      "step": 653
    },
    {
      "epoch": 0.14134428355305814,
      "grad_norm": 0.9474995136260986,
      "learning_rate": 3.8714417338883635e-05,
      "loss": 1.0136,
      "step": 654
    },
    {
      "epoch": 0.14156040631078454,
      "grad_norm": 0.8943508863449097,
      "learning_rate": 3.870947438694093e-05,
      "loss": 0.9082,
      "step": 655
    },
    {
      "epoch": 0.1417765290685109,
      "grad_norm": 0.9678506255149841,
      "learning_rate": 3.870452226739903e-05,
      "loss": 0.8696,
      "step": 656
    },
    {
      "epoch": 0.1419926518262373,
      "grad_norm": 1.0479648113250732,
      "learning_rate": 3.869956098268444e-05,
      "loss": 0.9327,
      "step": 657
    },
    {
      "epoch": 0.1422087745839637,
      "grad_norm": 0.8884786367416382,
      "learning_rate": 3.869459053522821e-05,
      "loss": 1.0548,
      "step": 658
    },
    {
      "epoch": 0.1424248973416901,
      "grad_norm": 0.9204626679420471,
      "learning_rate": 3.868961092746584e-05,
      "loss": 0.9182,
      "step": 659
    },
    {
      "epoch": 0.14264102009941648,
      "grad_norm": 0.7968615889549255,
      "learning_rate": 3.8684622161837306e-05,
      "loss": 0.924,
      "step": 660
    },
    {
      "epoch": 0.14285714285714285,
      "grad_norm": 1.0329526662826538,
      "learning_rate": 3.8679624240787113e-05,
      "loss": 0.9867,
      "step": 661
    },
    {
      "epoch": 0.14307326561486924,
      "grad_norm": 0.9560927748680115,
      "learning_rate": 3.8674617166764216e-05,
      "loss": 1.07,
      "step": 662
    },
    {
      "epoch": 0.14328938837259564,
      "grad_norm": 0.8470692038536072,
      "learning_rate": 3.866960094222208e-05,
      "loss": 0.9925,
      "step": 663
    },
    {
      "epoch": 0.14350551113032203,
      "grad_norm": 0.8122526407241821,
      "learning_rate": 3.866457556961864e-05,
      "loss": 0.7895,
      "step": 664
    },
    {
      "epoch": 0.1437216338880484,
      "grad_norm": 0.9864859580993652,
      "learning_rate": 3.865954105141632e-05,
      "loss": 0.9597,
      "step": 665
    },
    {
      "epoch": 0.1439377566457748,
      "grad_norm": 0.8763389587402344,
      "learning_rate": 3.865449739008202e-05,
      "loss": 0.8651,
      "step": 666
    },
    {
      "epoch": 0.1441538794035012,
      "grad_norm": 0.8942314386367798,
      "learning_rate": 3.864944458808712e-05,
      "loss": 0.8472,
      "step": 667
    },
    {
      "epoch": 0.14437000216122758,
      "grad_norm": 0.8911261558532715,
      "learning_rate": 3.864438264790748e-05,
      "loss": 0.9874,
      "step": 668
    },
    {
      "epoch": 0.14458612491895398,
      "grad_norm": 0.9165135622024536,
      "learning_rate": 3.8639311572023445e-05,
      "loss": 0.8398,
      "step": 669
    },
    {
      "epoch": 0.14480224767668035,
      "grad_norm": 0.8999388813972473,
      "learning_rate": 3.8634231362919826e-05,
      "loss": 0.8984,
      "step": 670
    },
    {
      "epoch": 0.14501837043440674,
      "grad_norm": 0.9407719969749451,
      "learning_rate": 3.862914202308592e-05,
      "loss": 1.0439,
      "step": 671
    },
    {
      "epoch": 0.14523449319213314,
      "grad_norm": 1.0014902353286743,
      "learning_rate": 3.8624043555015485e-05,
      "loss": 0.8871,
      "step": 672
    },
    {
      "epoch": 0.14545061594985953,
      "grad_norm": 0.9063896536827087,
      "learning_rate": 3.861893596120676e-05,
      "loss": 1.0437,
      "step": 673
    },
    {
      "epoch": 0.1456667387075859,
      "grad_norm": 0.9328497052192688,
      "learning_rate": 3.861381924416245e-05,
      "loss": 0.8635,
      "step": 674
    },
    {
      "epoch": 0.1458828614653123,
      "grad_norm": 0.9634287357330322,
      "learning_rate": 3.860869340638974e-05,
      "loss": 0.88,
      "step": 675
    },
    {
      "epoch": 0.14609898422303869,
      "grad_norm": 0.8020275235176086,
      "learning_rate": 3.8603558450400286e-05,
      "loss": 0.8758,
      "step": 676
    },
    {
      "epoch": 0.14631510698076508,
      "grad_norm": 0.8185033798217773,
      "learning_rate": 3.859841437871019e-05,
      "loss": 0.8325,
      "step": 677
    },
    {
      "epoch": 0.14653122973849148,
      "grad_norm": 0.9865819215774536,
      "learning_rate": 3.859326119384004e-05,
      "loss": 0.9134,
      "step": 678
    },
    {
      "epoch": 0.14674735249621784,
      "grad_norm": 0.7986255884170532,
      "learning_rate": 3.8588098898314895e-05,
      "loss": 0.9649,
      "step": 679
    },
    {
      "epoch": 0.14696347525394424,
      "grad_norm": 1.0155187845230103,
      "learning_rate": 3.858292749466426e-05,
      "loss": 1.039,
      "step": 680
    },
    {
      "epoch": 0.14717959801167063,
      "grad_norm": 0.8765354156494141,
      "learning_rate": 3.85777469854221e-05,
      "loss": 1.0504,
      "step": 681
    },
    {
      "epoch": 0.14739572076939703,
      "grad_norm": 0.9732946753501892,
      "learning_rate": 3.857255737312687e-05,
      "loss": 1.0145,
      "step": 682
    },
    {
      "epoch": 0.1476118435271234,
      "grad_norm": 1.0179862976074219,
      "learning_rate": 3.856735866032145e-05,
      "loss": 0.8695,
      "step": 683
    },
    {
      "epoch": 0.1478279662848498,
      "grad_norm": 0.9551127552986145,
      "learning_rate": 3.856215084955322e-05,
      "loss": 1.0414,
      "step": 684
    },
    {
      "epoch": 0.14804408904257618,
      "grad_norm": 0.8766571283340454,
      "learning_rate": 3.855693394337398e-05,
      "loss": 0.978,
      "step": 685
    },
    {
      "epoch": 0.14826021180030258,
      "grad_norm": 0.9660585522651672,
      "learning_rate": 3.855170794434e-05,
      "loss": 0.9748,
      "step": 686
    },
    {
      "epoch": 0.14847633455802897,
      "grad_norm": 0.8801333904266357,
      "learning_rate": 3.854647285501202e-05,
      "loss": 0.9898,
      "step": 687
    },
    {
      "epoch": 0.14869245731575534,
      "grad_norm": 0.8716098666191101,
      "learning_rate": 3.8541228677955216e-05,
      "loss": 0.8926,
      "step": 688
    },
    {
      "epoch": 0.14890858007348173,
      "grad_norm": 0.9434691667556763,
      "learning_rate": 3.853597541573921e-05,
      "loss": 1.1135,
      "step": 689
    },
    {
      "epoch": 0.14912470283120813,
      "grad_norm": 1.0055344104766846,
      "learning_rate": 3.853071307093811e-05,
      "loss": 1.0053,
      "step": 690
    },
    {
      "epoch": 0.14934082558893452,
      "grad_norm": 1.0277626514434814,
      "learning_rate": 3.8525441646130435e-05,
      "loss": 0.9823,
      "step": 691
    },
    {
      "epoch": 0.1495569483466609,
      "grad_norm": 0.8405638933181763,
      "learning_rate": 3.852016114389918e-05,
      "loss": 0.9192,
      "step": 692
    },
    {
      "epoch": 0.14977307110438728,
      "grad_norm": 0.7715931534767151,
      "learning_rate": 3.851487156683178e-05,
      "loss": 0.9255,
      "step": 693
    },
    {
      "epoch": 0.14998919386211368,
      "grad_norm": 1.054321050643921,
      "learning_rate": 3.8509572917520113e-05,
      "loss": 1.1794,
      "step": 694
    },
    {
      "epoch": 0.15020531661984007,
      "grad_norm": 0.8760213255882263,
      "learning_rate": 3.8504265198560495e-05,
      "loss": 0.9638,
      "step": 695
    },
    {
      "epoch": 0.15042143937756647,
      "grad_norm": 0.8417812585830688,
      "learning_rate": 3.8498948412553715e-05,
      "loss": 0.8056,
      "step": 696
    },
    {
      "epoch": 0.15063756213529284,
      "grad_norm": 0.8356149196624756,
      "learning_rate": 3.8493622562104976e-05,
      "loss": 0.9349,
      "step": 697
    },
    {
      "epoch": 0.15085368489301923,
      "grad_norm": 0.8915977478027344,
      "learning_rate": 3.8488287649823924e-05,
      "loss": 0.9902,
      "step": 698
    },
    {
      "epoch": 0.15106980765074562,
      "grad_norm": 0.810424268245697,
      "learning_rate": 3.8482943678324674e-05,
      "loss": 0.8976,
      "step": 699
    },
    {
      "epoch": 0.15128593040847202,
      "grad_norm": 0.9278942346572876,
      "learning_rate": 3.8477590650225735e-05,
      "loss": 0.956,
      "step": 700
    },
    {
      "epoch": 0.1515020531661984,
      "grad_norm": 0.8655145168304443,
      "learning_rate": 3.8472228568150105e-05,
      "loss": 0.7879,
      "step": 701
    },
    {
      "epoch": 0.15171817592392478,
      "grad_norm": 1.128374695777893,
      "learning_rate": 3.8466857434725164e-05,
      "loss": 1.0564,
      "step": 702
    },
    {
      "epoch": 0.15193429868165118,
      "grad_norm": 0.874324381351471,
      "learning_rate": 3.846147725258278e-05,
      "loss": 0.9016,
      "step": 703
    },
    {
      "epoch": 0.15215042143937757,
      "grad_norm": 0.9844251275062561,
      "learning_rate": 3.845608802435922e-05,
      "loss": 0.7689,
      "step": 704
    },
    {
      "epoch": 0.15236654419710396,
      "grad_norm": 0.9193190336227417,
      "learning_rate": 3.8450689752695186e-05,
      "loss": 0.9483,
      "step": 705
    },
    {
      "epoch": 0.15258266695483033,
      "grad_norm": 0.9375171661376953,
      "learning_rate": 3.844528244023583e-05,
      "loss": 0.8956,
      "step": 706
    },
    {
      "epoch": 0.15279878971255673,
      "grad_norm": 0.8980957269668579,
      "learning_rate": 3.8439866089630714e-05,
      "loss": 1.0145,
      "step": 707
    },
    {
      "epoch": 0.15301491247028312,
      "grad_norm": 0.9306039214134216,
      "learning_rate": 3.843444070353384e-05,
      "loss": 0.8307,
      "step": 708
    },
    {
      "epoch": 0.15323103522800952,
      "grad_norm": 0.8327880501747131,
      "learning_rate": 3.842900628460364e-05,
      "loss": 0.853,
      "step": 709
    },
    {
      "epoch": 0.1534471579857359,
      "grad_norm": 0.9295020699501038,
      "learning_rate": 3.842356283550296e-05,
      "loss": 1.0683,
      "step": 710
    },
    {
      "epoch": 0.15366328074346228,
      "grad_norm": 0.9068468809127808,
      "learning_rate": 3.841811035889908e-05,
      "loss": 1.1291,
      "step": 711
    },
    {
      "epoch": 0.15387940350118867,
      "grad_norm": 0.8090900778770447,
      "learning_rate": 3.8412648857463694e-05,
      "loss": 0.9488,
      "step": 712
    },
    {
      "epoch": 0.15409552625891507,
      "grad_norm": 0.9061992168426514,
      "learning_rate": 3.840717833387294e-05,
      "loss": 0.9388,
      "step": 713
    },
    {
      "epoch": 0.15431164901664146,
      "grad_norm": 0.8311301469802856,
      "learning_rate": 3.840169879080735e-05,
      "loss": 1.0176,
      "step": 714
    },
    {
      "epoch": 0.15452777177436783,
      "grad_norm": 0.8473559021949768,
      "learning_rate": 3.839621023095189e-05,
      "loss": 0.9885,
      "step": 715
    },
    {
      "epoch": 0.15474389453209422,
      "grad_norm": 0.9512867331504822,
      "learning_rate": 3.8390712656995946e-05,
      "loss": 0.8733,
      "step": 716
    },
    {
      "epoch": 0.15496001728982062,
      "grad_norm": 1.2607604265213013,
      "learning_rate": 3.8385206071633315e-05,
      "loss": 0.8414,
      "step": 717
    },
    {
      "epoch": 0.155176140047547,
      "grad_norm": 0.8926445841789246,
      "learning_rate": 3.837969047756221e-05,
      "loss": 0.8794,
      "step": 718
    },
    {
      "epoch": 0.1553922628052734,
      "grad_norm": 0.841687798500061,
      "learning_rate": 3.837416587748525e-05,
      "loss": 0.8882,
      "step": 719
    },
    {
      "epoch": 0.15560838556299977,
      "grad_norm": 0.9890703558921814,
      "learning_rate": 3.836863227410949e-05,
      "loss": 1.0339,
      "step": 720
    },
    {
      "epoch": 0.15582450832072617,
      "grad_norm": 1.026080846786499,
      "learning_rate": 3.836308967014638e-05,
      "loss": 1.102,
      "step": 721
    },
    {
      "epoch": 0.15604063107845256,
      "grad_norm": 0.9443207383155823,
      "learning_rate": 3.835753806831178e-05,
      "loss": 0.9666,
      "step": 722
    },
    {
      "epoch": 0.15625675383617896,
      "grad_norm": 0.8445281982421875,
      "learning_rate": 3.835197747132596e-05,
      "loss": 0.9106,
      "step": 723
    },
    {
      "epoch": 0.15647287659390532,
      "grad_norm": 0.983044445514679,
      "learning_rate": 3.834640788191361e-05,
      "loss": 0.8976,
      "step": 724
    },
    {
      "epoch": 0.15668899935163172,
      "grad_norm": 0.9355096817016602,
      "learning_rate": 3.83408293028038e-05,
      "loss": 1.0032,
      "step": 725
    },
    {
      "epoch": 0.15690512210935811,
      "grad_norm": 0.8715406060218811,
      "learning_rate": 3.833524173673004e-05,
      "loss": 0.8908,
      "step": 726
    },
    {
      "epoch": 0.1571212448670845,
      "grad_norm": 0.9600350260734558,
      "learning_rate": 3.83296451864302e-05,
      "loss": 0.9133,
      "step": 727
    },
    {
      "epoch": 0.1573373676248109,
      "grad_norm": 0.8724390268325806,
      "learning_rate": 3.83240396546466e-05,
      "loss": 1.0321,
      "step": 728
    },
    {
      "epoch": 0.15755349038253727,
      "grad_norm": 0.9737085700035095,
      "learning_rate": 3.8318425144125926e-05,
      "loss": 0.9593,
      "step": 729
    },
    {
      "epoch": 0.15776961314026366,
      "grad_norm": 0.8641157150268555,
      "learning_rate": 3.8312801657619277e-05,
      "loss": 0.8612,
      "step": 730
    },
    {
      "epoch": 0.15798573589799006,
      "grad_norm": 0.9206441640853882,
      "learning_rate": 3.830716919788215e-05,
      "loss": 0.9566,
      "step": 731
    },
    {
      "epoch": 0.15820185865571645,
      "grad_norm": 1.0136420726776123,
      "learning_rate": 3.830152776767444e-05,
      "loss": 0.9764,
      "step": 732
    },
    {
      "epoch": 0.15841798141344285,
      "grad_norm": 0.916991651058197,
      "learning_rate": 3.8295877369760426e-05,
      "loss": 1.0438,
      "step": 733
    },
    {
      "epoch": 0.15863410417116922,
      "grad_norm": 0.8354426622390747,
      "learning_rate": 3.829021800690879e-05,
      "loss": 0.8525,
      "step": 734
    },
    {
      "epoch": 0.1588502269288956,
      "grad_norm": 0.9984889030456543,
      "learning_rate": 3.8284549681892615e-05,
      "loss": 1.1238,
      "step": 735
    },
    {
      "epoch": 0.159066349686622,
      "grad_norm": 0.9616902470588684,
      "learning_rate": 3.827887239748937e-05,
      "loss": 0.8615,
      "step": 736
    },
    {
      "epoch": 0.1592824724443484,
      "grad_norm": 0.9104542136192322,
      "learning_rate": 3.82731861564809e-05,
      "loss": 1.0627,
      "step": 737
    },
    {
      "epoch": 0.15949859520207477,
      "grad_norm": 0.9917435646057129,
      "learning_rate": 3.826749096165346e-05,
      "loss": 0.9407,
      "step": 738
    },
    {
      "epoch": 0.15971471795980116,
      "grad_norm": 1.0158805847167969,
      "learning_rate": 3.826178681579767e-05,
      "loss": 0.8319,
      "step": 739
    },
    {
      "epoch": 0.15993084071752756,
      "grad_norm": 0.8807531595230103,
      "learning_rate": 3.825607372170855e-05,
      "loss": 1.025,
      "step": 740
    },
    {
      "epoch": 0.16014696347525395,
      "grad_norm": 0.8851889967918396,
      "learning_rate": 3.825035168218552e-05,
      "loss": 1.0129,
      "step": 741
    },
    {
      "epoch": 0.16036308623298035,
      "grad_norm": 0.9945665001869202,
      "learning_rate": 3.824462070003235e-05,
      "loss": 1.0437,
      "step": 742
    },
    {
      "epoch": 0.1605792089907067,
      "grad_norm": 0.949619710445404,
      "learning_rate": 3.8238880778057214e-05,
      "loss": 0.9602,
      "step": 743
    },
    {
      "epoch": 0.1607953317484331,
      "grad_norm": 0.9423522353172302,
      "learning_rate": 3.8233131919072655e-05,
      "loss": 0.8515,
      "step": 744
    },
    {
      "epoch": 0.1610114545061595,
      "grad_norm": 0.9392579197883606,
      "learning_rate": 3.822737412589561e-05,
      "loss": 1.1029,
      "step": 745
    },
    {
      "epoch": 0.1612275772638859,
      "grad_norm": 1.0344432592391968,
      "learning_rate": 3.822160740134738e-05,
      "loss": 0.9482,
      "step": 746
    },
    {
      "epoch": 0.16144370002161226,
      "grad_norm": 0.9603621959686279,
      "learning_rate": 3.821583174825364e-05,
      "loss": 1.0275,
      "step": 747
    },
    {
      "epoch": 0.16165982277933866,
      "grad_norm": 1.0137100219726562,
      "learning_rate": 3.821004716944445e-05,
      "loss": 0.9796,
      "step": 748
    },
    {
      "epoch": 0.16187594553706505,
      "grad_norm": 0.9770598411560059,
      "learning_rate": 3.820425366775425e-05,
      "loss": 0.9573,
      "step": 749
    },
    {
      "epoch": 0.16209206829479145,
      "grad_norm": 0.8415801525115967,
      "learning_rate": 3.8198451246021834e-05,
      "loss": 0.7984,
      "step": 750
    },
    {
      "epoch": 0.16230819105251784,
      "grad_norm": 0.912669837474823,
      "learning_rate": 3.819263990709037e-05,
      "loss": 0.9408,
      "step": 751
    },
    {
      "epoch": 0.1625243138102442,
      "grad_norm": 1.046235203742981,
      "learning_rate": 3.818681965380741e-05,
      "loss": 1.0403,
      "step": 752
    },
    {
      "epoch": 0.1627404365679706,
      "grad_norm": 0.7493875622749329,
      "learning_rate": 3.818099048902486e-05,
      "loss": 0.9189,
      "step": 753
    },
    {
      "epoch": 0.162956559325697,
      "grad_norm": 0.9934805035591125,
      "learning_rate": 3.817515241559901e-05,
      "loss": 0.8216,
      "step": 754
    },
    {
      "epoch": 0.1631726820834234,
      "grad_norm": 0.9069898724555969,
      "learning_rate": 3.8169305436390474e-05,
      "loss": 0.9603,
      "step": 755
    },
    {
      "epoch": 0.16338880484114976,
      "grad_norm": 0.9589218497276306,
      "learning_rate": 3.816344955426429e-05,
      "loss": 0.9931,
      "step": 756
    },
    {
      "epoch": 0.16360492759887615,
      "grad_norm": 0.8494526743888855,
      "learning_rate": 3.81575847720898e-05,
      "loss": 1.0661,
      "step": 757
    },
    {
      "epoch": 0.16382105035660255,
      "grad_norm": 0.9042608141899109,
      "learning_rate": 3.8151711092740756e-05,
      "loss": 0.8215,
      "step": 758
    },
    {
      "epoch": 0.16403717311432894,
      "grad_norm": 0.8558818101882935,
      "learning_rate": 3.814582851909523e-05,
      "loss": 0.9715,
      "step": 759
    },
    {
      "epoch": 0.16425329587205534,
      "grad_norm": 0.9446301460266113,
      "learning_rate": 3.8139937054035686e-05,
      "loss": 0.8714,
      "step": 760
    },
    {
      "epoch": 0.1644694186297817,
      "grad_norm": 0.9493723511695862,
      "learning_rate": 3.813403670044891e-05,
      "loss": 1.0183,
      "step": 761
    },
    {
      "epoch": 0.1646855413875081,
      "grad_norm": 0.8754888772964478,
      "learning_rate": 3.8128127461226074e-05,
      "loss": 1.0799,
      "step": 762
    },
    {
      "epoch": 0.1649016641452345,
      "grad_norm": 0.9679006338119507,
      "learning_rate": 3.8122209339262686e-05,
      "loss": 1.0678,
      "step": 763
    },
    {
      "epoch": 0.1651177869029609,
      "grad_norm": 1.0180104970932007,
      "learning_rate": 3.811628233745862e-05,
      "loss": 0.9036,
      "step": 764
    },
    {
      "epoch": 0.16533390966068728,
      "grad_norm": 0.7865733504295349,
      "learning_rate": 3.8110346458718085e-05,
      "loss": 0.8595,
      "step": 765
    },
    {
      "epoch": 0.16555003241841365,
      "grad_norm": 0.877221405506134,
      "learning_rate": 3.810440170594964e-05,
      "loss": 0.9345,
      "step": 766
    },
    {
      "epoch": 0.16576615517614005,
      "grad_norm": 1.0369511842727661,
      "learning_rate": 3.809844808206622e-05,
      "loss": 1.0009,
      "step": 767
    },
    {
      "epoch": 0.16598227793386644,
      "grad_norm": 0.9274987578392029,
      "learning_rate": 3.809248558998508e-05,
      "loss": 0.9221,
      "step": 768
    },
    {
      "epoch": 0.16619840069159283,
      "grad_norm": 0.9995907545089722,
      "learning_rate": 3.808651423262782e-05,
      "loss": 0.9444,
      "step": 769
    },
    {
      "epoch": 0.1664145234493192,
      "grad_norm": 0.8567201495170593,
      "learning_rate": 3.80805340129204e-05,
      "loss": 1.0448,
      "step": 770
    },
    {
      "epoch": 0.1666306462070456,
      "grad_norm": 0.8255602717399597,
      "learning_rate": 3.8074544933793105e-05,
      "loss": 0.8142,
      "step": 771
    },
    {
      "epoch": 0.166846768964772,
      "grad_norm": 0.8213574290275574,
      "learning_rate": 3.806854699818058e-05,
      "loss": 1.0253,
      "step": 772
    },
    {
      "epoch": 0.16706289172249839,
      "grad_norm": 0.8631877303123474,
      "learning_rate": 3.806254020902179e-05,
      "loss": 1.0511,
      "step": 773
    },
    {
      "epoch": 0.16727901448022478,
      "grad_norm": 0.8674742579460144,
      "learning_rate": 3.805652456926005e-05,
      "loss": 0.8251,
      "step": 774
    },
    {
      "epoch": 0.16749513723795115,
      "grad_norm": 1.031251072883606,
      "learning_rate": 3.805050008184302e-05,
      "loss": 0.8754,
      "step": 775
    },
    {
      "epoch": 0.16771125999567754,
      "grad_norm": 0.9958481788635254,
      "learning_rate": 3.804446674972267e-05,
      "loss": 0.9438,
      "step": 776
    },
    {
      "epoch": 0.16792738275340394,
      "grad_norm": 0.7598473429679871,
      "learning_rate": 3.8038424575855326e-05,
      "loss": 0.6512,
      "step": 777
    },
    {
      "epoch": 0.16814350551113033,
      "grad_norm": 0.8823094367980957,
      "learning_rate": 3.8032373563201644e-05,
      "loss": 0.9703,
      "step": 778
    },
    {
      "epoch": 0.1683596282688567,
      "grad_norm": 0.94339519739151,
      "learning_rate": 3.802631371472659e-05,
      "loss": 0.9723,
      "step": 779
    },
    {
      "epoch": 0.1685757510265831,
      "grad_norm": 0.9543091058731079,
      "learning_rate": 3.80202450333995e-05,
      "loss": 0.8788,
      "step": 780
    },
    {
      "epoch": 0.1687918737843095,
      "grad_norm": 0.9106163382530212,
      "learning_rate": 3.8014167522193985e-05,
      "loss": 0.9754,
      "step": 781
    },
    {
      "epoch": 0.16900799654203588,
      "grad_norm": 0.9123164415359497,
      "learning_rate": 3.8008081184088036e-05,
      "loss": 0.8364,
      "step": 782
    },
    {
      "epoch": 0.16922411929976228,
      "grad_norm": 0.9945230484008789,
      "learning_rate": 3.800198602206394e-05,
      "loss": 1.0841,
      "step": 783
    },
    {
      "epoch": 0.16944024205748864,
      "grad_norm": 0.9595274329185486,
      "learning_rate": 3.79958820391083e-05,
      "loss": 0.9674,
      "step": 784
    },
    {
      "epoch": 0.16965636481521504,
      "grad_norm": 0.9454584121704102,
      "learning_rate": 3.798976923821207e-05,
      "loss": 0.8179,
      "step": 785
    },
    {
      "epoch": 0.16987248757294143,
      "grad_norm": 0.9144061207771301,
      "learning_rate": 3.798364762237049e-05,
      "loss": 0.8421,
      "step": 786
    },
    {
      "epoch": 0.17008861033066783,
      "grad_norm": 0.8717312216758728,
      "learning_rate": 3.797751719458315e-05,
      "loss": 0.884,
      "step": 787
    },
    {
      "epoch": 0.1703047330883942,
      "grad_norm": 0.8118639588356018,
      "learning_rate": 3.7971377957853945e-05,
      "loss": 0.9057,
      "step": 788
    },
    {
      "epoch": 0.1705208558461206,
      "grad_norm": 1.0112063884735107,
      "learning_rate": 3.7965229915191086e-05,
      "loss": 0.8148,
      "step": 789
    },
    {
      "epoch": 0.17073697860384698,
      "grad_norm": 0.9880518913269043,
      "learning_rate": 3.7959073069607094e-05,
      "loss": 0.9605,
      "step": 790
    },
    {
      "epoch": 0.17095310136157338,
      "grad_norm": 1.0393980741500854,
      "learning_rate": 3.7952907424118815e-05,
      "loss": 1.0229,
      "step": 791
    },
    {
      "epoch": 0.17116922411929977,
      "grad_norm": 0.8649792671203613,
      "learning_rate": 3.79467329817474e-05,
      "loss": 1.0278,
      "step": 792
    },
    {
      "epoch": 0.17138534687702614,
      "grad_norm": 0.84566730260849,
      "learning_rate": 3.7940549745518306e-05,
      "loss": 1.0526,
      "step": 793
    },
    {
      "epoch": 0.17160146963475253,
      "grad_norm": 0.9151575565338135,
      "learning_rate": 3.793435771846131e-05,
      "loss": 0.9885,
      "step": 794
    },
    {
      "epoch": 0.17181759239247893,
      "grad_norm": 1.1504076719284058,
      "learning_rate": 3.792815690361049e-05,
      "loss": 1.0227,
      "step": 795
    },
    {
      "epoch": 0.17203371515020532,
      "grad_norm": 0.854996383190155,
      "learning_rate": 3.792194730400424e-05,
      "loss": 0.9017,
      "step": 796
    },
    {
      "epoch": 0.17224983790793172,
      "grad_norm": 0.9883208274841309,
      "learning_rate": 3.791572892268524e-05,
      "loss": 0.943,
      "step": 797
    },
    {
      "epoch": 0.17246596066565809,
      "grad_norm": 0.941680908203125,
      "learning_rate": 3.790950176270047e-05,
      "loss": 1.1355,
      "step": 798
    },
    {
      "epoch": 0.17268208342338448,
      "grad_norm": 1.0921597480773926,
      "learning_rate": 3.790326582710125e-05,
      "loss": 1.0459,
      "step": 799
    },
    {
      "epoch": 0.17289820618111088,
      "grad_norm": 0.9764776229858398,
      "learning_rate": 3.7897021118943156e-05,
      "loss": 1.1244,
      "step": 800
    },
    {
      "epoch": 0.17311432893883727,
      "grad_norm": 0.8465645909309387,
      "learning_rate": 3.7890767641286086e-05,
      "loss": 0.8692,
      "step": 801
    },
    {
      "epoch": 0.17333045169656364,
      "grad_norm": 0.8207517862319946,
      "learning_rate": 3.788450539719423e-05,
      "loss": 0.7815,
      "step": 802
    },
    {
      "epoch": 0.17354657445429003,
      "grad_norm": 1.0459989309310913,
      "learning_rate": 3.7878234389736074e-05,
      "loss": 0.9891,
      "step": 803
    },
    {
      "epoch": 0.17376269721201643,
      "grad_norm": 0.9639811515808105,
      "learning_rate": 3.787195462198439e-05,
      "loss": 0.861,
      "step": 804
    },
    {
      "epoch": 0.17397881996974282,
      "grad_norm": 0.7766333222389221,
      "learning_rate": 3.786566609701626e-05,
      "loss": 0.731,
      "step": 805
    },
    {
      "epoch": 0.17419494272746922,
      "grad_norm": 0.7881793975830078,
      "learning_rate": 3.7859368817913037e-05,
      "loss": 0.9065,
      "step": 806
    },
    {
      "epoch": 0.17441106548519558,
      "grad_norm": 0.8844035267829895,
      "learning_rate": 3.785306278776038e-05,
      "loss": 0.9453,
      "step": 807
    },
    {
      "epoch": 0.17462718824292198,
      "grad_norm": 0.9553999304771423,
      "learning_rate": 3.784674800964823e-05,
      "loss": 0.7729,
      "step": 808
    },
    {
      "epoch": 0.17484331100064837,
      "grad_norm": 0.8754869699478149,
      "learning_rate": 3.784042448667081e-05,
      "loss": 1.0213,
      "step": 809
    },
    {
      "epoch": 0.17505943375837477,
      "grad_norm": 0.9966672658920288,
      "learning_rate": 3.783409222192663e-05,
      "loss": 0.8817,
      "step": 810
    },
    {
      "epoch": 0.17527555651610113,
      "grad_norm": 0.953411877155304,
      "learning_rate": 3.782775121851849e-05,
      "loss": 0.8634,
      "step": 811
    },
    {
      "epoch": 0.17549167927382753,
      "grad_norm": 1.004577398300171,
      "learning_rate": 3.782140147955347e-05,
      "loss": 0.8899,
      "step": 812
    },
    {
      "epoch": 0.17570780203155392,
      "grad_norm": 0.8689178228378296,
      "learning_rate": 3.7815043008142915e-05,
      "loss": 0.9426,
      "step": 813
    },
    {
      "epoch": 0.17592392478928032,
      "grad_norm": 0.8731287717819214,
      "learning_rate": 3.780867580740247e-05,
      "loss": 1.0252,
      "step": 814
    },
    {
      "epoch": 0.1761400475470067,
      "grad_norm": 0.9560363292694092,
      "learning_rate": 3.780229988045204e-05,
      "loss": 0.8715,
      "step": 815
    },
    {
      "epoch": 0.17635617030473308,
      "grad_norm": 0.9369506239891052,
      "learning_rate": 3.7795915230415834e-05,
      "loss": 0.8979,
      "step": 816
    },
    {
      "epoch": 0.17657229306245947,
      "grad_norm": 0.9733330011367798,
      "learning_rate": 3.77895218604223e-05,
      "loss": 0.8355,
      "step": 817
    },
    {
      "epoch": 0.17678841582018587,
      "grad_norm": 0.7921898365020752,
      "learning_rate": 3.7783119773604176e-05,
      "loss": 0.9214,
      "step": 818
    },
    {
      "epoch": 0.17700453857791226,
      "grad_norm": 0.8752906918525696,
      "learning_rate": 3.7776708973098476e-05,
      "loss": 1.0074,
      "step": 819
    },
    {
      "epoch": 0.17722066133563863,
      "grad_norm": 1.0164240598678589,
      "learning_rate": 3.777028946204647e-05,
      "loss": 1.0186,
      "step": 820
    },
    {
      "epoch": 0.17743678409336502,
      "grad_norm": 0.8949374556541443,
      "learning_rate": 3.77638612435937e-05,
      "loss": 0.9594,
      "step": 821
    },
    {
      "epoch": 0.17765290685109142,
      "grad_norm": 1.0692867040634155,
      "learning_rate": 3.7757424320889987e-05,
      "loss": 0.9626,
      "step": 822
    },
    {
      "epoch": 0.1778690296088178,
      "grad_norm": 0.8554781675338745,
      "learning_rate": 3.775097869708941e-05,
      "loss": 0.8944,
      "step": 823
    },
    {
      "epoch": 0.1780851523665442,
      "grad_norm": 0.979682207107544,
      "learning_rate": 3.774452437535031e-05,
      "loss": 0.9192,
      "step": 824
    },
    {
      "epoch": 0.17830127512427058,
      "grad_norm": 0.9625377655029297,
      "learning_rate": 3.773806135883528e-05,
      "loss": 0.8022,
      "step": 825
    },
    {
      "epoch": 0.17851739788199697,
      "grad_norm": 0.9088266491889954,
      "learning_rate": 3.773158965071119e-05,
      "loss": 0.9798,
      "step": 826
    },
    {
      "epoch": 0.17873352063972336,
      "grad_norm": 0.9747220277786255,
      "learning_rate": 3.772510925414916e-05,
      "loss": 0.879,
      "step": 827
    },
    {
      "epoch": 0.17894964339744976,
      "grad_norm": 0.9348064661026001,
      "learning_rate": 3.771862017232456e-05,
      "loss": 1.0974,
      "step": 828
    },
    {
      "epoch": 0.17916576615517615,
      "grad_norm": 0.8869150876998901,
      "learning_rate": 3.7712122408417055e-05,
      "loss": 0.8494,
      "step": 829
    },
    {
      "epoch": 0.17938188891290252,
      "grad_norm": 0.9823442101478577,
      "learning_rate": 3.77056159656105e-05,
      "loss": 1.0264,
      "step": 830
    },
    {
      "epoch": 0.17959801167062892,
      "grad_norm": 0.8608393669128418,
      "learning_rate": 3.769910084709305e-05,
      "loss": 0.8466,
      "step": 831
    },
    {
      "epoch": 0.1798141344283553,
      "grad_norm": 0.7502967119216919,
      "learning_rate": 3.769257705605711e-05,
      "loss": 0.9066,
      "step": 832
    },
    {
      "epoch": 0.1800302571860817,
      "grad_norm": 1.046044945716858,
      "learning_rate": 3.768604459569931e-05,
      "loss": 1.1937,
      "step": 833
    },
    {
      "epoch": 0.18024637994380807,
      "grad_norm": 0.8878052830696106,
      "learning_rate": 3.767950346922054e-05,
      "loss": 0.8745,
      "step": 834
    },
    {
      "epoch": 0.18046250270153447,
      "grad_norm": 0.803325891494751,
      "learning_rate": 3.7672953679825934e-05,
      "loss": 0.8249,
      "step": 835
    },
    {
      "epoch": 0.18067862545926086,
      "grad_norm": 0.9014940857887268,
      "learning_rate": 3.7666395230724885e-05,
      "loss": 0.8478,
      "step": 836
    },
    {
      "epoch": 0.18089474821698726,
      "grad_norm": 0.8608657717704773,
      "learning_rate": 3.765982812513101e-05,
      "loss": 0.9171,
      "step": 837
    },
    {
      "epoch": 0.18111087097471365,
      "grad_norm": 0.8538623452186584,
      "learning_rate": 3.765325236626217e-05,
      "loss": 0.7362,
      "step": 838
    },
    {
      "epoch": 0.18132699373244002,
      "grad_norm": 0.9900898337364197,
      "learning_rate": 3.764666795734049e-05,
      "loss": 1.0384,
      "step": 839
    },
    {
      "epoch": 0.1815431164901664,
      "grad_norm": 0.9848501682281494,
      "learning_rate": 3.7640074901592306e-05,
      "loss": 0.8081,
      "step": 840
    },
    {
      "epoch": 0.1817592392478928,
      "grad_norm": 0.8468287587165833,
      "learning_rate": 3.763347320224819e-05,
      "loss": 0.8982,
      "step": 841
    },
    {
      "epoch": 0.1819753620056192,
      "grad_norm": 1.0634558200836182,
      "learning_rate": 3.762686286254297e-05,
      "loss": 1.0065,
      "step": 842
    },
    {
      "epoch": 0.18219148476334557,
      "grad_norm": 0.8285744190216064,
      "learning_rate": 3.7620243885715695e-05,
      "loss": 0.8926,
      "step": 843
    },
    {
      "epoch": 0.18240760752107196,
      "grad_norm": 0.7932522892951965,
      "learning_rate": 3.761361627500964e-05,
      "loss": 0.9055,
      "step": 844
    },
    {
      "epoch": 0.18262373027879836,
      "grad_norm": 1.0279165506362915,
      "learning_rate": 3.7606980033672344e-05,
      "loss": 0.9415,
      "step": 845
    },
    {
      "epoch": 0.18283985303652475,
      "grad_norm": 0.8211097121238708,
      "learning_rate": 3.760033516495552e-05,
      "loss": 0.7362,
      "step": 846
    },
    {
      "epoch": 0.18305597579425115,
      "grad_norm": 1.0028184652328491,
      "learning_rate": 3.7593681672115155e-05,
      "loss": 1.0083,
      "step": 847
    },
    {
      "epoch": 0.1832720985519775,
      "grad_norm": 1.0128577947616577,
      "learning_rate": 3.758701955841144e-05,
      "loss": 0.8788,
      "step": 848
    },
    {
      "epoch": 0.1834882213097039,
      "grad_norm": 0.9472517967224121,
      "learning_rate": 3.75803488271088e-05,
      "loss": 0.9193,
      "step": 849
    },
    {
      "epoch": 0.1837043440674303,
      "grad_norm": 0.9328030943870544,
      "learning_rate": 3.757366948147587e-05,
      "loss": 0.8707,
      "step": 850
    },
    {
      "epoch": 0.1839204668251567,
      "grad_norm": 0.9275659322738647,
      "learning_rate": 3.7566981524785526e-05,
      "loss": 0.8106,
      "step": 851
    },
    {
      "epoch": 0.18413658958288306,
      "grad_norm": 0.8463131189346313,
      "learning_rate": 3.756028496031484e-05,
      "loss": 0.8724,
      "step": 852
    },
    {
      "epoch": 0.18435271234060946,
      "grad_norm": 0.9580005407333374,
      "learning_rate": 3.755357979134511e-05,
      "loss": 0.9289,
      "step": 853
    },
    {
      "epoch": 0.18456883509833585,
      "grad_norm": 1.036739706993103,
      "learning_rate": 3.754686602116187e-05,
      "loss": 0.9357,
      "step": 854
    },
    {
      "epoch": 0.18478495785606225,
      "grad_norm": 0.8900768756866455,
      "learning_rate": 3.754014365305484e-05,
      "loss": 0.9699,
      "step": 855
    },
    {
      "epoch": 0.18500108061378864,
      "grad_norm": 0.8311254978179932,
      "learning_rate": 3.753341269031797e-05,
      "loss": 0.902,
      "step": 856
    },
    {
      "epoch": 0.185217203371515,
      "grad_norm": 0.8818534016609192,
      "learning_rate": 3.7526673136249404e-05,
      "loss": 0.8418,
      "step": 857
    },
    {
      "epoch": 0.1854333261292414,
      "grad_norm": 0.8500455021858215,
      "learning_rate": 3.7519924994151524e-05,
      "loss": 0.9488,
      "step": 858
    },
    {
      "epoch": 0.1856494488869678,
      "grad_norm": 0.984480619430542,
      "learning_rate": 3.7513168267330894e-05,
      "loss": 1.0126,
      "step": 859
    },
    {
      "epoch": 0.1858655716446942,
      "grad_norm": 0.9773917198181152,
      "learning_rate": 3.75064029590983e-05,
      "loss": 0.9344,
      "step": 860
    },
    {
      "epoch": 0.18608169440242056,
      "grad_norm": 0.8242107033729553,
      "learning_rate": 3.7499629072768724e-05,
      "loss": 0.927,
      "step": 861
    },
    {
      "epoch": 0.18629781716014696,
      "grad_norm": 0.830271303653717,
      "learning_rate": 3.749284661166135e-05,
      "loss": 0.9614,
      "step": 862
    },
    {
      "epoch": 0.18651393991787335,
      "grad_norm": 0.8726242184638977,
      "learning_rate": 3.748605557909958e-05,
      "loss": 0.9402,
      "step": 863
    },
    {
      "epoch": 0.18673006267559975,
      "grad_norm": 0.9918140769004822,
      "learning_rate": 3.7479255978411e-05,
      "loss": 1.0401,
      "step": 864
    },
    {
      "epoch": 0.18694618543332614,
      "grad_norm": 0.8276970386505127,
      "learning_rate": 3.7472447812927395e-05,
      "loss": 0.8476,
      "step": 865
    },
    {
      "epoch": 0.1871623081910525,
      "grad_norm": 0.9273027777671814,
      "learning_rate": 3.746563108598475e-05,
      "loss": 0.971,
      "step": 866
    },
    {
      "epoch": 0.1873784309487789,
      "grad_norm": 0.926529049873352,
      "learning_rate": 3.7458805800923253e-05,
      "loss": 0.8091,
      "step": 867
    },
    {
      "epoch": 0.1875945537065053,
      "grad_norm": 0.9758947491645813,
      "learning_rate": 3.745197196108726e-05,
      "loss": 0.9299,
      "step": 868
    },
    {
      "epoch": 0.1878106764642317,
      "grad_norm": 0.9522454738616943,
      "learning_rate": 3.744512956982537e-05,
      "loss": 0.9481,
      "step": 869
    },
    {
      "epoch": 0.18802679922195809,
      "grad_norm": 0.952519953250885,
      "learning_rate": 3.743827863049029e-05,
      "loss": 0.973,
      "step": 870
    },
    {
      "epoch": 0.18824292197968445,
      "grad_norm": 0.8867212533950806,
      "learning_rate": 3.7431419146439014e-05,
      "loss": 0.9154,
      "step": 871
    },
    {
      "epoch": 0.18845904473741085,
      "grad_norm": 0.9702788591384888,
      "learning_rate": 3.7424551121032646e-05,
      "loss": 0.9125,
      "step": 872
    },
    {
      "epoch": 0.18867516749513724,
      "grad_norm": 0.8474773168563843,
      "learning_rate": 3.741767455763651e-05,
      "loss": 0.9884,
      "step": 873
    },
    {
      "epoch": 0.18889129025286364,
      "grad_norm": 0.7889257073402405,
      "learning_rate": 3.74107894596201e-05,
      "loss": 0.6314,
      "step": 874
    },
    {
      "epoch": 0.18910741301059,
      "grad_norm": 1.02834153175354,
      "learning_rate": 3.7403895830357114e-05,
      "loss": 1.0108,
      "step": 875
    },
    {
      "epoch": 0.1893235357683164,
      "grad_norm": 0.942200243473053,
      "learning_rate": 3.739699367322539e-05,
      "loss": 0.8859,
      "step": 876
    },
    {
      "epoch": 0.1895396585260428,
      "grad_norm": 1.0845520496368408,
      "learning_rate": 3.739008299160698e-05,
      "loss": 0.982,
      "step": 877
    },
    {
      "epoch": 0.1897557812837692,
      "grad_norm": 1.137697696685791,
      "learning_rate": 3.738316378888812e-05,
      "loss": 0.933,
      "step": 878
    },
    {
      "epoch": 0.18997190404149558,
      "grad_norm": 1.0095382928848267,
      "learning_rate": 3.737623606845918e-05,
      "loss": 1.1746,
      "step": 879
    },
    {
      "epoch": 0.19018802679922195,
      "grad_norm": 0.9245660901069641,
      "learning_rate": 3.736929983371473e-05,
      "loss": 1.0572,
      "step": 880
    },
    {
      "epoch": 0.19040414955694834,
      "grad_norm": 0.9055622816085815,
      "learning_rate": 3.736235508805352e-05,
      "loss": 1.0532,
      "step": 881
    },
    {
      "epoch": 0.19062027231467474,
      "grad_norm": 1.05194091796875,
      "learning_rate": 3.7355401834878446e-05,
      "loss": 1.0278,
      "step": 882
    },
    {
      "epoch": 0.19083639507240113,
      "grad_norm": 0.7964370846748352,
      "learning_rate": 3.73484400775966e-05,
      "loss": 0.8587,
      "step": 883
    },
    {
      "epoch": 0.1910525178301275,
      "grad_norm": 0.8697637319564819,
      "learning_rate": 3.734146981961921e-05,
      "loss": 0.9233,
      "step": 884
    },
    {
      "epoch": 0.1912686405878539,
      "grad_norm": 0.9340189695358276,
      "learning_rate": 3.7334491064361705e-05,
      "loss": 0.7572,
      "step": 885
    },
    {
      "epoch": 0.1914847633455803,
      "grad_norm": 0.909929633140564,
      "learning_rate": 3.7327503815243644e-05,
      "loss": 0.955,
      "step": 886
    },
    {
      "epoch": 0.19170088610330668,
      "grad_norm": 0.8539615869522095,
      "learning_rate": 3.732050807568878e-05,
      "loss": 0.836,
      "step": 887
    },
    {
      "epoch": 0.19191700886103308,
      "grad_norm": 0.9616323709487915,
      "learning_rate": 3.7313503849124986e-05,
      "loss": 1.0609,
      "step": 888
    },
    {
      "epoch": 0.19213313161875945,
      "grad_norm": 0.9650198221206665,
      "learning_rate": 3.730649113898433e-05,
      "loss": 0.8262,
      "step": 889
    },
    {
      "epoch": 0.19234925437648584,
      "grad_norm": 0.8615740537643433,
      "learning_rate": 3.729946994870303e-05,
      "loss": 0.7607,
      "step": 890
    },
    {
      "epoch": 0.19256537713421223,
      "grad_norm": 0.8472082614898682,
      "learning_rate": 3.729244028172144e-05,
      "loss": 0.9098,
      "step": 891
    },
    {
      "epoch": 0.19278149989193863,
      "grad_norm": 0.9803862571716309,
      "learning_rate": 3.7285402141484093e-05,
      "loss": 0.8824,
      "step": 892
    },
    {
      "epoch": 0.192997622649665,
      "grad_norm": 1.0066810846328735,
      "learning_rate": 3.727835553143965e-05,
      "loss": 1.0202,
      "step": 893
    },
    {
      "epoch": 0.1932137454073914,
      "grad_norm": 1.0869866609573364,
      "learning_rate": 3.7271300455040936e-05,
      "loss": 0.8883,
      "step": 894
    },
    {
      "epoch": 0.19342986816511779,
      "grad_norm": 0.9176041483879089,
      "learning_rate": 3.7264236915744924e-05,
      "loss": 0.857,
      "step": 895
    },
    {
      "epoch": 0.19364599092284418,
      "grad_norm": 0.8855222463607788,
      "learning_rate": 3.725716491701274e-05,
      "loss": 1.0702,
      "step": 896
    },
    {
      "epoch": 0.19386211368057057,
      "grad_norm": 1.1113038063049316,
      "learning_rate": 3.7250084462309635e-05,
      "loss": 0.9795,
      "step": 897
    },
    {
      "epoch": 0.19407823643829694,
      "grad_norm": 1.0217797756195068,
      "learning_rate": 3.7242995555105016e-05,
      "loss": 0.971,
      "step": 898
    },
    {
      "epoch": 0.19429435919602334,
      "grad_norm": 1.0123941898345947,
      "learning_rate": 3.723589819887244e-05,
      "loss": 1.0251,
      "step": 899
    },
    {
      "epoch": 0.19451048195374973,
      "grad_norm": 0.7982286810874939,
      "learning_rate": 3.7228792397089584e-05,
      "loss": 0.8106,
      "step": 900
    },
    {
      "epoch": 0.19472660471147613,
      "grad_norm": 0.9076669812202454,
      "learning_rate": 3.722167815323829e-05,
      "loss": 0.9463,
      "step": 901
    },
    {
      "epoch": 0.19494272746920252,
      "grad_norm": 0.9631551504135132,
      "learning_rate": 3.7214555470804505e-05,
      "loss": 0.9932,
      "step": 902
    },
    {
      "epoch": 0.1951588502269289,
      "grad_norm": 0.9473233819007874,
      "learning_rate": 3.720742435327834e-05,
      "loss": 0.8753,
      "step": 903
    },
    {
      "epoch": 0.19537497298465528,
      "grad_norm": 0.905228853225708,
      "learning_rate": 3.720028480415401e-05,
      "loss": 0.8958,
      "step": 904
    },
    {
      "epoch": 0.19559109574238168,
      "grad_norm": 0.8304605484008789,
      "learning_rate": 3.7193136826929894e-05,
      "loss": 0.8962,
      "step": 905
    },
    {
      "epoch": 0.19580721850010807,
      "grad_norm": 0.8458021879196167,
      "learning_rate": 3.7185980425108473e-05,
      "loss": 1.0285,
      "step": 906
    },
    {
      "epoch": 0.19602334125783444,
      "grad_norm": 0.8611153960227966,
      "learning_rate": 3.7178815602196385e-05,
      "loss": 0.8238,
      "step": 907
    },
    {
      "epoch": 0.19623946401556083,
      "grad_norm": 1.0125149488449097,
      "learning_rate": 3.717164236170435e-05,
      "loss": 0.9488,
      "step": 908
    },
    {
      "epoch": 0.19645558677328723,
      "grad_norm": 0.872134268283844,
      "learning_rate": 3.7164460707147255e-05,
      "loss": 0.8408,
      "step": 909
    },
    {
      "epoch": 0.19667170953101362,
      "grad_norm": 0.9392416477203369,
      "learning_rate": 3.715727064204409e-05,
      "loss": 0.992,
      "step": 910
    },
    {
      "epoch": 0.19688783228874002,
      "grad_norm": 1.0227292776107788,
      "learning_rate": 3.715007216991798e-05,
      "loss": 0.9461,
      "step": 911
    },
    {
      "epoch": 0.19710395504646638,
      "grad_norm": 1.30838942527771,
      "learning_rate": 3.7142865294296153e-05,
      "loss": 0.9096,
      "step": 912
    },
    {
      "epoch": 0.19732007780419278,
      "grad_norm": 0.854081392288208,
      "learning_rate": 3.713565001870995e-05,
      "loss": 0.9528,
      "step": 913
    },
    {
      "epoch": 0.19753620056191917,
      "grad_norm": 0.9335965514183044,
      "learning_rate": 3.712842634669486e-05,
      "loss": 0.9381,
      "step": 914
    },
    {
      "epoch": 0.19775232331964557,
      "grad_norm": 0.8086388111114502,
      "learning_rate": 3.712119428179045e-05,
      "loss": 0.9615,
      "step": 915
    },
    {
      "epoch": 0.19796844607737193,
      "grad_norm": 0.9744918346405029,
      "learning_rate": 3.7113953827540424e-05,
      "loss": 1.0383,
      "step": 916
    },
    {
      "epoch": 0.19818456883509833,
      "grad_norm": 0.9365674257278442,
      "learning_rate": 3.710670498749258e-05,
      "loss": 0.9333,
      "step": 917
    },
    {
      "epoch": 0.19840069159282472,
      "grad_norm": 0.9845136404037476,
      "learning_rate": 3.709944776519883e-05,
      "loss": 1.1053,
      "step": 918
    },
    {
      "epoch": 0.19861681435055112,
      "grad_norm": 0.9566503763198853,
      "learning_rate": 3.709218216421522e-05,
      "loss": 0.7985,
      "step": 919
    },
    {
      "epoch": 0.1988329371082775,
      "grad_norm": 0.9244258403778076,
      "learning_rate": 3.708490818810185e-05,
      "loss": 0.8061,
      "step": 920
    },
    {
      "epoch": 0.19904905986600388,
      "grad_norm": 0.8147101998329163,
      "learning_rate": 3.707762584042297e-05,
      "loss": 0.8016,
      "step": 921
    },
    {
      "epoch": 0.19926518262373027,
      "grad_norm": 0.8522989153862,
      "learning_rate": 3.70703351247469e-05,
      "loss": 0.8835,
      "step": 922
    },
    {
      "epoch": 0.19948130538145667,
      "grad_norm": 1.0560715198516846,
      "learning_rate": 3.7063036044646076e-05,
      "loss": 1.2435,
      "step": 923
    },
    {
      "epoch": 0.19969742813918306,
      "grad_norm": 0.8863456845283508,
      "learning_rate": 3.705572860369704e-05,
      "loss": 0.9555,
      "step": 924
    },
    {
      "epoch": 0.19991355089690943,
      "grad_norm": 0.7883124351501465,
      "learning_rate": 3.704841280548041e-05,
      "loss": 0.728,
      "step": 925
    },
    {
      "epoch": 0.20012967365463583,
      "grad_norm": 0.8467419147491455,
      "learning_rate": 3.704108865358093e-05,
      "loss": 0.7594,
      "step": 926
    },
    {
      "epoch": 0.20034579641236222,
      "grad_norm": 1.0494290590286255,
      "learning_rate": 3.7033756151587385e-05,
      "loss": 0.9632,
      "step": 927
    },
    {
      "epoch": 0.20056191917008862,
      "grad_norm": 0.8138583302497864,
      "learning_rate": 3.702641530309271e-05,
      "loss": 0.8598,
      "step": 928
    },
    {
      "epoch": 0.200778041927815,
      "grad_norm": 0.8637174367904663,
      "learning_rate": 3.70190661116939e-05,
      "loss": 0.9428,
      "step": 929
    },
    {
      "epoch": 0.20099416468554138,
      "grad_norm": 0.8784462213516235,
      "learning_rate": 3.701170858099203e-05,
      "loss": 0.8025,
      "step": 930
    },
    {
      "epoch": 0.20121028744326777,
      "grad_norm": 0.9863048195838928,
      "learning_rate": 3.700434271459229e-05,
      "loss": 1.1744,
      "step": 931
    },
    {
      "epoch": 0.20142641020099417,
      "grad_norm": 0.960402250289917,
      "learning_rate": 3.6996968516103915e-05,
      "loss": 1.1154,
      "step": 932
    },
    {
      "epoch": 0.20164253295872056,
      "grad_norm": 0.9031201601028442,
      "learning_rate": 3.6989585989140276e-05,
      "loss": 1.0328,
      "step": 933
    },
    {
      "epoch": 0.20185865571644696,
      "grad_norm": 0.8621350526809692,
      "learning_rate": 3.698219513731876e-05,
      "loss": 0.9169,
      "step": 934
    },
    {
      "epoch": 0.20207477847417332,
      "grad_norm": 0.8853005766868591,
      "learning_rate": 3.697479596426089e-05,
      "loss": 1.019,
      "step": 935
    },
    {
      "epoch": 0.20229090123189972,
      "grad_norm": 0.8601515293121338,
      "learning_rate": 3.6967388473592236e-05,
      "loss": 0.8714,
      "step": 936
    },
    {
      "epoch": 0.2025070239896261,
      "grad_norm": 0.9690818786621094,
      "learning_rate": 3.6959972668942456e-05,
      "loss": 1.1918,
      "step": 937
    },
    {
      "epoch": 0.2027231467473525,
      "grad_norm": 0.8859080672264099,
      "learning_rate": 3.695254855394527e-05,
      "loss": 0.6843,
      "step": 938
    },
    {
      "epoch": 0.20293926950507887,
      "grad_norm": 0.819726288318634,
      "learning_rate": 3.694511613223849e-05,
      "loss": 0.8831,
      "step": 939
    },
    {
      "epoch": 0.20315539226280527,
      "grad_norm": 0.8103852868080139,
      "learning_rate": 3.693767540746397e-05,
      "loss": 0.7011,
      "step": 940
    },
    {
      "epoch": 0.20337151502053166,
      "grad_norm": 0.8728498220443726,
      "learning_rate": 3.6930226383267655e-05,
      "loss": 0.8609,
      "step": 941
    },
    {
      "epoch": 0.20358763777825806,
      "grad_norm": 0.8289908170700073,
      "learning_rate": 3.692276906329955e-05,
      "loss": 0.9882,
      "step": 942
    },
    {
      "epoch": 0.20380376053598445,
      "grad_norm": 0.9996088743209839,
      "learning_rate": 3.691530345121372e-05,
      "loss": 0.9028,
      "step": 943
    },
    {
      "epoch": 0.20401988329371082,
      "grad_norm": 0.871531069278717,
      "learning_rate": 3.690782955066831e-05,
      "loss": 0.8812,
      "step": 944
    },
    {
      "epoch": 0.2042360060514372,
      "grad_norm": 0.9398096799850464,
      "learning_rate": 3.69003473653255e-05,
      "loss": 1.0003,
      "step": 945
    },
    {
      "epoch": 0.2044521288091636,
      "grad_norm": 0.909731924533844,
      "learning_rate": 3.689285689885154e-05,
      "loss": 0.9512,
      "step": 946
    },
    {
      "epoch": 0.20466825156689,
      "grad_norm": 0.8529198169708252,
      "learning_rate": 3.6885358154916765e-05,
      "loss": 0.7834,
      "step": 947
    },
    {
      "epoch": 0.20488437432461637,
      "grad_norm": 0.8345663547515869,
      "learning_rate": 3.687785113719552e-05,
      "loss": 0.9069,
      "step": 948
    },
    {
      "epoch": 0.20510049708234276,
      "grad_norm": 0.9323077201843262,
      "learning_rate": 3.687033584936624e-05,
      "loss": 0.8803,
      "step": 949
    },
    {
      "epoch": 0.20531661984006916,
      "grad_norm": 0.8528758883476257,
      "learning_rate": 3.686281229511138e-05,
      "loss": 0.9107,
      "step": 950
    },
    {
      "epoch": 0.20553274259779555,
      "grad_norm": 0.8408779501914978,
      "learning_rate": 3.685528047811749e-05,
      "loss": 0.8124,
      "step": 951
    },
    {
      "epoch": 0.20574886535552195,
      "grad_norm": 1.095395565032959,
      "learning_rate": 3.684774040207512e-05,
      "loss": 0.8935,
      "step": 952
    },
    {
      "epoch": 0.20596498811324832,
      "grad_norm": 1.0189132690429688,
      "learning_rate": 3.684019207067891e-05,
      "loss": 1.014,
      "step": 953
    },
    {
      "epoch": 0.2061811108709747,
      "grad_norm": 0.9494995474815369,
      "learning_rate": 3.68326354876275e-05,
      "loss": 0.8565,
      "step": 954
    },
    {
      "epoch": 0.2063972336287011,
      "grad_norm": 0.9605530500411987,
      "learning_rate": 3.6825070656623626e-05,
      "loss": 1.0225,
      "step": 955
    },
    {
      "epoch": 0.2066133563864275,
      "grad_norm": 0.8805063962936401,
      "learning_rate": 3.681749758137402e-05,
      "loss": 0.9296,
      "step": 956
    },
    {
      "epoch": 0.20682947914415387,
      "grad_norm": 1.0285567045211792,
      "learning_rate": 3.6809916265589476e-05,
      "loss": 1.1333,
      "step": 957
    },
    {
      "epoch": 0.20704560190188026,
      "grad_norm": 0.9766727089881897,
      "learning_rate": 3.6802326712984816e-05,
      "loss": 0.9373,
      "step": 958
    },
    {
      "epoch": 0.20726172465960666,
      "grad_norm": 1.1031501293182373,
      "learning_rate": 3.679472892727891e-05,
      "loss": 1.0861,
      "step": 959
    },
    {
      "epoch": 0.20747784741733305,
      "grad_norm": 0.7932749390602112,
      "learning_rate": 3.6787122912194656e-05,
      "loss": 0.8303,
      "step": 960
    },
    {
      "epoch": 0.20769397017505944,
      "grad_norm": 1.0850369930267334,
      "learning_rate": 3.677950867145898e-05,
      "loss": 0.831,
      "step": 961
    },
    {
      "epoch": 0.2079100929327858,
      "grad_norm": 0.943305253982544,
      "learning_rate": 3.677188620880284e-05,
      "loss": 0.9211,
      "step": 962
    },
    {
      "epoch": 0.2081262156905122,
      "grad_norm": 0.9907835125923157,
      "learning_rate": 3.676425552796123e-05,
      "loss": 0.9135,
      "step": 963
    },
    {
      "epoch": 0.2083423384482386,
      "grad_norm": 0.8394286632537842,
      "learning_rate": 3.675661663267317e-05,
      "loss": 0.8862,
      "step": 964
    },
    {
      "epoch": 0.208558461205965,
      "grad_norm": 1.1331239938735962,
      "learning_rate": 3.674896952668169e-05,
      "loss": 1.1158,
      "step": 965
    },
    {
      "epoch": 0.2087745839636914,
      "grad_norm": 0.8609957098960876,
      "learning_rate": 3.674131421373386e-05,
      "loss": 0.8496,
      "step": 966
    },
    {
      "epoch": 0.20899070672141776,
      "grad_norm": 0.8272537589073181,
      "learning_rate": 3.6733650697580774e-05,
      "loss": 0.8221,
      "step": 967
    },
    {
      "epoch": 0.20920682947914415,
      "grad_norm": 0.9432743787765503,
      "learning_rate": 3.672597898197752e-05,
      "loss": 1.0988,
      "step": 968
    },
    {
      "epoch": 0.20942295223687055,
      "grad_norm": 0.9202385544776917,
      "learning_rate": 3.6718299070683234e-05,
      "loss": 0.8537,
      "step": 969
    },
    {
      "epoch": 0.20963907499459694,
      "grad_norm": 0.8289174437522888,
      "learning_rate": 3.6710610967461044e-05,
      "loss": 0.9086,
      "step": 970
    },
    {
      "epoch": 0.2098551977523233,
      "grad_norm": 0.9070008397102356,
      "learning_rate": 3.670291467607812e-05,
      "loss": 0.809,
      "step": 971
    },
    {
      "epoch": 0.2100713205100497,
      "grad_norm": 1.0298635959625244,
      "learning_rate": 3.669521020030561e-05,
      "loss": 0.9918,
      "step": 972
    },
    {
      "epoch": 0.2102874432677761,
      "grad_norm": 0.9390925168991089,
      "learning_rate": 3.668749754391869e-05,
      "loss": 0.7554,
      "step": 973
    },
    {
      "epoch": 0.2105035660255025,
      "grad_norm": 0.8981751203536987,
      "learning_rate": 3.667977671069656e-05,
      "loss": 0.9524,
      "step": 974
    },
    {
      "epoch": 0.2107196887832289,
      "grad_norm": 0.9763649702072144,
      "learning_rate": 3.667204770442239e-05,
      "loss": 0.8765,
      "step": 975
    },
    {
      "epoch": 0.21093581154095525,
      "grad_norm": 0.8939294815063477,
      "learning_rate": 3.6664310528883396e-05,
      "loss": 1.0306,
      "step": 976
    },
    {
      "epoch": 0.21115193429868165,
      "grad_norm": 0.9194285273551941,
      "learning_rate": 3.665656518787076e-05,
      "loss": 0.8041,
      "step": 977
    },
    {
      "epoch": 0.21136805705640804,
      "grad_norm": 0.8309286832809448,
      "learning_rate": 3.664881168517968e-05,
      "loss": 1.0146,
      "step": 978
    },
    {
      "epoch": 0.21158417981413444,
      "grad_norm": 0.9171546101570129,
      "learning_rate": 3.664105002460937e-05,
      "loss": 1.1928,
      "step": 979
    },
    {
      "epoch": 0.2118003025718608,
      "grad_norm": 0.8434054255485535,
      "learning_rate": 3.663328020996301e-05,
      "loss": 0.9317,
      "step": 980
    },
    {
      "epoch": 0.2120164253295872,
      "grad_norm": 0.9480212330818176,
      "learning_rate": 3.66255022450478e-05,
      "loss": 0.9464,
      "step": 981
    },
    {
      "epoch": 0.2122325480873136,
      "grad_norm": 0.8842633366584778,
      "learning_rate": 3.661771613367493e-05,
      "loss": 0.8515,
      "step": 982
    },
    {
      "epoch": 0.21244867084504,
      "grad_norm": 0.8174552321434021,
      "learning_rate": 3.660992187965956e-05,
      "loss": 0.9086,
      "step": 983
    },
    {
      "epoch": 0.21266479360276638,
      "grad_norm": 0.9173724055290222,
      "learning_rate": 3.660211948682086e-05,
      "loss": 0.9264,
      "step": 984
    },
    {
      "epoch": 0.21288091636049275,
      "grad_norm": 0.9887210130691528,
      "learning_rate": 3.6594308958982004e-05,
      "loss": 1.0338,
      "step": 985
    },
    {
      "epoch": 0.21309703911821914,
      "grad_norm": 0.9939167499542236,
      "learning_rate": 3.658649029997011e-05,
      "loss": 0.9151,
      "step": 986
    },
    {
      "epoch": 0.21331316187594554,
      "grad_norm": 0.9670457243919373,
      "learning_rate": 3.657866351361632e-05,
      "loss": 0.9914,
      "step": 987
    },
    {
      "epoch": 0.21352928463367193,
      "grad_norm": 0.8448925614356995,
      "learning_rate": 3.657082860375572e-05,
      "loss": 0.9263,
      "step": 988
    },
    {
      "epoch": 0.2137454073913983,
      "grad_norm": 0.9376818537712097,
      "learning_rate": 3.6562985574227414e-05,
      "loss": 1.036,
      "step": 989
    },
    {
      "epoch": 0.2139615301491247,
      "grad_norm": 0.9312554001808167,
      "learning_rate": 3.655513442887447e-05,
      "loss": 0.9952,
      "step": 990
    },
    {
      "epoch": 0.2141776529068511,
      "grad_norm": 0.9356735944747925,
      "learning_rate": 3.6547275171543924e-05,
      "loss": 1.3891,
      "step": 991
    },
    {
      "epoch": 0.21439377566457749,
      "grad_norm": 0.9153226613998413,
      "learning_rate": 3.6539407806086796e-05,
      "loss": 0.9982,
      "step": 992
    },
    {
      "epoch": 0.21460989842230388,
      "grad_norm": 1.0015946626663208,
      "learning_rate": 3.653153233635808e-05,
      "loss": 0.9758,
      "step": 993
    },
    {
      "epoch": 0.21482602118003025,
      "grad_norm": 0.928017795085907,
      "learning_rate": 3.652364876621673e-05,
      "loss": 0.9092,
      "step": 994
    },
    {
      "epoch": 0.21504214393775664,
      "grad_norm": 0.8220463991165161,
      "learning_rate": 3.651575709952568e-05,
      "loss": 0.9433,
      "step": 995
    },
    {
      "epoch": 0.21525826669548304,
      "grad_norm": 0.9616919755935669,
      "learning_rate": 3.650785734015183e-05,
      "loss": 0.8842,
      "step": 996
    },
    {
      "epoch": 0.21547438945320943,
      "grad_norm": 0.9724686741828918,
      "learning_rate": 3.6499949491966046e-05,
      "loss": 0.9112,
      "step": 997
    },
    {
      "epoch": 0.21569051221093583,
      "grad_norm": 0.8349328637123108,
      "learning_rate": 3.649203355884316e-05,
      "loss": 0.9399,
      "step": 998
    },
    {
      "epoch": 0.2159066349686622,
      "grad_norm": 0.8705836534500122,
      "learning_rate": 3.648410954466195e-05,
      "loss": 0.9218,
      "step": 999
    },
    {
      "epoch": 0.2161227577263886,
      "grad_norm": 0.8451696634292603,
      "learning_rate": 3.6476177453305164e-05,
      "loss": 0.9779,
      "step": 1000
    },
    {
      "epoch": 0.21633888048411498,
      "grad_norm": 0.9489712715148926,
      "learning_rate": 3.646823728865952e-05,
      "loss": 1.0642,
      "step": 1001
    },
    {
      "epoch": 0.21655500324184138,
      "grad_norm": 0.8946622610092163,
      "learning_rate": 3.6460289054615665e-05,
      "loss": 0.9093,
      "step": 1002
    },
    {
      "epoch": 0.21677112599956774,
      "grad_norm": 0.9775545001029968,
      "learning_rate": 3.6452332755068226e-05,
      "loss": 0.874,
      "step": 1003
    },
    {
      "epoch": 0.21698724875729414,
      "grad_norm": 0.7812718152999878,
      "learning_rate": 3.6444368393915774e-05,
      "loss": 0.7704,
      "step": 1004
    },
    {
      "epoch": 0.21720337151502053,
      "grad_norm": 0.995879590511322,
      "learning_rate": 3.643639597506082e-05,
      "loss": 1.0089,
      "step": 1005
    },
    {
      "epoch": 0.21741949427274693,
      "grad_norm": 1.089314341545105,
      "learning_rate": 3.642841550240983e-05,
      "loss": 0.9043,
      "step": 1006
    },
    {
      "epoch": 0.21763561703047332,
      "grad_norm": 0.9540629982948303,
      "learning_rate": 3.642042697987323e-05,
      "loss": 0.9521,
      "step": 1007
    },
    {
      "epoch": 0.2178517397881997,
      "grad_norm": 0.908274233341217,
      "learning_rate": 3.6412430411365365e-05,
      "loss": 0.9355,
      "step": 1008
    },
    {
      "epoch": 0.21806786254592608,
      "grad_norm": 0.8804648518562317,
      "learning_rate": 3.6404425800804533e-05,
      "loss": 1.1751,
      "step": 1009
    },
    {
      "epoch": 0.21828398530365248,
      "grad_norm": 0.9607532024383545,
      "learning_rate": 3.639641315211299e-05,
      "loss": 0.8435,
      "step": 1010
    },
    {
      "epoch": 0.21850010806137887,
      "grad_norm": 0.9816673994064331,
      "learning_rate": 3.638839246921689e-05,
      "loss": 1.0648,
      "step": 1011
    },
    {
      "epoch": 0.21871623081910524,
      "grad_norm": 0.8674247860908508,
      "learning_rate": 3.638036375604638e-05,
      "loss": 1.0623,
      "step": 1012
    },
    {
      "epoch": 0.21893235357683163,
      "grad_norm": 1.0201153755187988,
      "learning_rate": 3.637232701653549e-05,
      "loss": 1.1882,
      "step": 1013
    },
    {
      "epoch": 0.21914847633455803,
      "grad_norm": 0.870021641254425,
      "learning_rate": 3.6364282254622215e-05,
      "loss": 0.8389,
      "step": 1014
    },
    {
      "epoch": 0.21936459909228442,
      "grad_norm": 0.9027089476585388,
      "learning_rate": 3.635622947424845e-05,
      "loss": 1.0713,
      "step": 1015
    },
    {
      "epoch": 0.21958072185001082,
      "grad_norm": 1.028327465057373,
      "learning_rate": 3.634816867936007e-05,
      "loss": 1.1063,
      "step": 1016
    },
    {
      "epoch": 0.21979684460773719,
      "grad_norm": 0.9232480525970459,
      "learning_rate": 3.6340099873906814e-05,
      "loss": 0.9094,
      "step": 1017
    },
    {
      "epoch": 0.22001296736546358,
      "grad_norm": 0.879548192024231,
      "learning_rate": 3.63320230618424e-05,
      "loss": 0.9462,
      "step": 1018
    },
    {
      "epoch": 0.22022909012318997,
      "grad_norm": 0.9714813232421875,
      "learning_rate": 3.632393824712444e-05,
      "loss": 0.95,
      "step": 1019
    },
    {
      "epoch": 0.22044521288091637,
      "grad_norm": 0.9346253871917725,
      "learning_rate": 3.631584543371447e-05,
      "loss": 0.9187,
      "step": 1020
    },
    {
      "epoch": 0.22066133563864274,
      "grad_norm": 0.975001335144043,
      "learning_rate": 3.630774462557796e-05,
      "loss": 0.9681,
      "step": 1021
    },
    {
      "epoch": 0.22087745839636913,
      "grad_norm": 0.905458390712738,
      "learning_rate": 3.629963582668428e-05,
      "loss": 1.0308,
      "step": 1022
    },
    {
      "epoch": 0.22109358115409553,
      "grad_norm": 0.9808592796325684,
      "learning_rate": 3.629151904100672e-05,
      "loss": 0.7074,
      "step": 1023
    },
    {
      "epoch": 0.22130970391182192,
      "grad_norm": 0.9215885400772095,
      "learning_rate": 3.628339427252249e-05,
      "loss": 0.9571,
      "step": 1024
    },
    {
      "epoch": 0.22152582666954831,
      "grad_norm": 0.9385959506034851,
      "learning_rate": 3.627526152521271e-05,
      "loss": 0.8931,
      "step": 1025
    },
    {
      "epoch": 0.22174194942727468,
      "grad_norm": 0.8258581757545471,
      "learning_rate": 3.626712080306241e-05,
      "loss": 0.8248,
      "step": 1026
    },
    {
      "epoch": 0.22195807218500108,
      "grad_norm": 0.9462264776229858,
      "learning_rate": 3.625897211006051e-05,
      "loss": 0.9672,
      "step": 1027
    },
    {
      "epoch": 0.22217419494272747,
      "grad_norm": 0.9047693610191345,
      "learning_rate": 3.625081545019987e-05,
      "loss": 0.9575,
      "step": 1028
    },
    {
      "epoch": 0.22239031770045387,
      "grad_norm": 0.7842395901679993,
      "learning_rate": 3.624265082747723e-05,
      "loss": 0.8527,
      "step": 1029
    },
    {
      "epoch": 0.22260644045818023,
      "grad_norm": 0.8913676142692566,
      "learning_rate": 3.623447824589323e-05,
      "loss": 0.9988,
      "step": 1030
    },
    {
      "epoch": 0.22282256321590663,
      "grad_norm": 0.9162867665290833,
      "learning_rate": 3.622629770945241e-05,
      "loss": 0.817,
      "step": 1031
    },
    {
      "epoch": 0.22303868597363302,
      "grad_norm": 0.9730992317199707,
      "learning_rate": 3.621810922216323e-05,
      "loss": 0.8078,
      "step": 1032
    },
    {
      "epoch": 0.22325480873135942,
      "grad_norm": 0.951578676700592,
      "learning_rate": 3.620991278803802e-05,
      "loss": 0.9413,
      "step": 1033
    },
    {
      "epoch": 0.2234709314890858,
      "grad_norm": 0.9462690949440002,
      "learning_rate": 3.620170841109301e-05,
      "loss": 1.0054,
      "step": 1034
    },
    {
      "epoch": 0.22368705424681218,
      "grad_norm": 0.9456660747528076,
      "learning_rate": 3.6193496095348345e-05,
      "loss": 0.879,
      "step": 1035
    },
    {
      "epoch": 0.22390317700453857,
      "grad_norm": 0.9455549120903015,
      "learning_rate": 3.618527584482801e-05,
      "loss": 1.0426,
      "step": 1036
    },
    {
      "epoch": 0.22411929976226497,
      "grad_norm": 0.8765550255775452,
      "learning_rate": 3.617704766355994e-05,
      "loss": 0.9799,
      "step": 1037
    },
    {
      "epoch": 0.22433542251999136,
      "grad_norm": 0.9239512085914612,
      "learning_rate": 3.6168811555575905e-05,
      "loss": 0.9659,
      "step": 1038
    },
    {
      "epoch": 0.22455154527771776,
      "grad_norm": 0.8369002342224121,
      "learning_rate": 3.6160567524911586e-05,
      "loss": 0.8801,
      "step": 1039
    },
    {
      "epoch": 0.22476766803544412,
      "grad_norm": 0.9263517260551453,
      "learning_rate": 3.6152315575606535e-05,
      "loss": 0.9159,
      "step": 1040
    },
    {
      "epoch": 0.22498379079317052,
      "grad_norm": 0.8030198216438293,
      "learning_rate": 3.6144055711704196e-05,
      "loss": 0.7952,
      "step": 1041
    },
    {
      "epoch": 0.2251999135508969,
      "grad_norm": 0.9598090052604675,
      "learning_rate": 3.6135787937251875e-05,
      "loss": 0.9139,
      "step": 1042
    },
    {
      "epoch": 0.2254160363086233,
      "grad_norm": 0.8471052050590515,
      "learning_rate": 3.612751225630076e-05,
      "loss": 0.9855,
      "step": 1043
    },
    {
      "epoch": 0.22563215906634967,
      "grad_norm": 0.8563123941421509,
      "learning_rate": 3.6119228672905926e-05,
      "loss": 0.9401,
      "step": 1044
    },
    {
      "epoch": 0.22584828182407607,
      "grad_norm": 0.9090791344642639,
      "learning_rate": 3.6110937191126305e-05,
      "loss": 0.9147,
      "step": 1045
    },
    {
      "epoch": 0.22606440458180246,
      "grad_norm": 0.8402764201164246,
      "learning_rate": 3.61026378150247e-05,
      "loss": 0.9035,
      "step": 1046
    },
    {
      "epoch": 0.22628052733952886,
      "grad_norm": 0.8264151215553284,
      "learning_rate": 3.6094330548667794e-05,
      "loss": 0.8666,
      "step": 1047
    },
    {
      "epoch": 0.22649665009725525,
      "grad_norm": 0.9080968499183655,
      "learning_rate": 3.6086015396126126e-05,
      "loss": 0.8237,
      "step": 1048
    },
    {
      "epoch": 0.22671277285498162,
      "grad_norm": 0.8279055953025818,
      "learning_rate": 3.6077692361474095e-05,
      "loss": 0.8232,
      "step": 1049
    },
    {
      "epoch": 0.22692889561270801,
      "grad_norm": 1.0176692008972168,
      "learning_rate": 3.6069361448789976e-05,
      "loss": 0.7964,
      "step": 1050
    },
    {
      "epoch": 0.2271450183704344,
      "grad_norm": 0.803254246711731,
      "learning_rate": 3.606102266215589e-05,
      "loss": 0.7906,
      "step": 1051
    },
    {
      "epoch": 0.2273611411281608,
      "grad_norm": 0.908789873123169,
      "learning_rate": 3.605267600565783e-05,
      "loss": 0.9803,
      "step": 1052
    },
    {
      "epoch": 0.22757726388588717,
      "grad_norm": 0.9088970422744751,
      "learning_rate": 3.604432148338563e-05,
      "loss": 0.954,
      "step": 1053
    },
    {
      "epoch": 0.22779338664361357,
      "grad_norm": 0.9667347073554993,
      "learning_rate": 3.6035959099433e-05,
      "loss": 1.0223,
      "step": 1054
    },
    {
      "epoch": 0.22800950940133996,
      "grad_norm": 0.8741658926010132,
      "learning_rate": 3.602758885789748e-05,
      "loss": 0.99,
      "step": 1055
    },
    {
      "epoch": 0.22822563215906636,
      "grad_norm": 0.8842224478721619,
      "learning_rate": 3.6019210762880464e-05,
      "loss": 0.8577,
      "step": 1056
    },
    {
      "epoch": 0.22844175491679275,
      "grad_norm": 0.8977473378181458,
      "learning_rate": 3.601082481848721e-05,
      "loss": 1.0656,
      "step": 1057
    },
    {
      "epoch": 0.22865787767451912,
      "grad_norm": 1.0967479944229126,
      "learning_rate": 3.6002431028826806e-05,
      "loss": 0.8729,
      "step": 1058
    },
    {
      "epoch": 0.2288740004322455,
      "grad_norm": 0.8644981384277344,
      "learning_rate": 3.599402939801219e-05,
      "loss": 0.9116,
      "step": 1059
    },
    {
      "epoch": 0.2290901231899719,
      "grad_norm": 1.03041672706604,
      "learning_rate": 3.5985619930160146e-05,
      "loss": 0.9206,
      "step": 1060
    },
    {
      "epoch": 0.2293062459476983,
      "grad_norm": 1.0125470161437988,
      "learning_rate": 3.5977202629391284e-05,
      "loss": 0.9103,
      "step": 1061
    },
    {
      "epoch": 0.22952236870542467,
      "grad_norm": 0.8819757103919983,
      "learning_rate": 3.596877749983007e-05,
      "loss": 0.9736,
      "step": 1062
    },
    {
      "epoch": 0.22973849146315106,
      "grad_norm": 0.8741188645362854,
      "learning_rate": 3.5960344545604796e-05,
      "loss": 0.977,
      "step": 1063
    },
    {
      "epoch": 0.22995461422087746,
      "grad_norm": 0.8714580535888672,
      "learning_rate": 3.5951903770847584e-05,
      "loss": 0.977,
      "step": 1064
    },
    {
      "epoch": 0.23017073697860385,
      "grad_norm": 0.8280560374259949,
      "learning_rate": 3.5943455179694404e-05,
      "loss": 0.9297,
      "step": 1065
    },
    {
      "epoch": 0.23038685973633025,
      "grad_norm": 0.8424382209777832,
      "learning_rate": 3.5934998776285044e-05,
      "loss": 1.046,
      "step": 1066
    },
    {
      "epoch": 0.2306029824940566,
      "grad_norm": 0.9405550956726074,
      "learning_rate": 3.5926534564763116e-05,
      "loss": 1.0598,
      "step": 1067
    },
    {
      "epoch": 0.230819105251783,
      "grad_norm": 1.014173984527588,
      "learning_rate": 3.591806254927607e-05,
      "loss": 0.8757,
      "step": 1068
    },
    {
      "epoch": 0.2310352280095094,
      "grad_norm": 1.0071674585342407,
      "learning_rate": 3.5909582733975174e-05,
      "loss": 0.9478,
      "step": 1069
    },
    {
      "epoch": 0.2312513507672358,
      "grad_norm": 1.0263164043426514,
      "learning_rate": 3.590109512301552e-05,
      "loss": 0.9838,
      "step": 1070
    },
    {
      "epoch": 0.2314674735249622,
      "grad_norm": 0.873468816280365,
      "learning_rate": 3.5892599720556e-05,
      "loss": 0.8863,
      "step": 1071
    },
    {
      "epoch": 0.23168359628268856,
      "grad_norm": 0.8657466769218445,
      "learning_rate": 3.588409653075937e-05,
      "loss": 0.9229,
      "step": 1072
    },
    {
      "epoch": 0.23189971904041495,
      "grad_norm": 0.993262767791748,
      "learning_rate": 3.587558555779215e-05,
      "loss": 0.7674,
      "step": 1073
    },
    {
      "epoch": 0.23211584179814135,
      "grad_norm": 0.7991393804550171,
      "learning_rate": 3.586706680582471e-05,
      "loss": 0.7787,
      "step": 1074
    },
    {
      "epoch": 0.23233196455586774,
      "grad_norm": 0.8678078651428223,
      "learning_rate": 3.585854027903121e-05,
      "loss": 0.774,
      "step": 1075
    },
    {
      "epoch": 0.2325480873135941,
      "grad_norm": 0.9204895496368408,
      "learning_rate": 3.585000598158964e-05,
      "loss": 0.9951,
      "step": 1076
    },
    {
      "epoch": 0.2327642100713205,
      "grad_norm": 0.9125127196311951,
      "learning_rate": 3.584146391768177e-05,
      "loss": 0.9557,
      "step": 1077
    },
    {
      "epoch": 0.2329803328290469,
      "grad_norm": 0.856071949005127,
      "learning_rate": 3.58329140914932e-05,
      "loss": 1.0097,
      "step": 1078
    },
    {
      "epoch": 0.2331964555867733,
      "grad_norm": 0.9988387227058411,
      "learning_rate": 3.582435650721333e-05,
      "loss": 1.1671,
      "step": 1079
    },
    {
      "epoch": 0.2334125783444997,
      "grad_norm": 0.9570616483688354,
      "learning_rate": 3.581579116903536e-05,
      "loss": 0.9732,
      "step": 1080
    },
    {
      "epoch": 0.23362870110222606,
      "grad_norm": 0.926640510559082,
      "learning_rate": 3.580721808115627e-05,
      "loss": 0.7473,
      "step": 1081
    },
    {
      "epoch": 0.23384482385995245,
      "grad_norm": 0.8650385737419128,
      "learning_rate": 3.579863724777686e-05,
      "loss": 0.8781,
      "step": 1082
    },
    {
      "epoch": 0.23406094661767884,
      "grad_norm": 0.8797435760498047,
      "learning_rate": 3.579004867310172e-05,
      "loss": 0.9566,
      "step": 1083
    },
    {
      "epoch": 0.23427706937540524,
      "grad_norm": 0.8567848205566406,
      "learning_rate": 3.578145236133923e-05,
      "loss": 0.8627,
      "step": 1084
    },
    {
      "epoch": 0.2344931921331316,
      "grad_norm": 0.9849474430084229,
      "learning_rate": 3.577284831670157e-05,
      "loss": 1.0674,
      "step": 1085
    },
    {
      "epoch": 0.234709314890858,
      "grad_norm": 0.8508183360099792,
      "learning_rate": 3.5764236543404694e-05,
      "loss": 0.8411,
      "step": 1086
    },
    {
      "epoch": 0.2349254376485844,
      "grad_norm": 0.8463327884674072,
      "learning_rate": 3.575561704566835e-05,
      "loss": 0.9291,
      "step": 1087
    },
    {
      "epoch": 0.2351415604063108,
      "grad_norm": 0.8836262822151184,
      "learning_rate": 3.574698982771608e-05,
      "loss": 0.8609,
      "step": 1088
    },
    {
      "epoch": 0.23535768316403718,
      "grad_norm": 1.0963822603225708,
      "learning_rate": 3.57383548937752e-05,
      "loss": 0.984,
      "step": 1089
    },
    {
      "epoch": 0.23557380592176355,
      "grad_norm": 0.9397730231285095,
      "learning_rate": 3.572971224807679e-05,
      "loss": 0.9046,
      "step": 1090
    },
    {
      "epoch": 0.23578992867948995,
      "grad_norm": 0.9169086217880249,
      "learning_rate": 3.5721061894855756e-05,
      "loss": 1.0361,
      "step": 1091
    },
    {
      "epoch": 0.23600605143721634,
      "grad_norm": 1.0400639772415161,
      "learning_rate": 3.5712403838350726e-05,
      "loss": 1.0632,
      "step": 1092
    },
    {
      "epoch": 0.23622217419494274,
      "grad_norm": 0.8737825155258179,
      "learning_rate": 3.570373808280414e-05,
      "loss": 0.9283,
      "step": 1093
    },
    {
      "epoch": 0.2364382969526691,
      "grad_norm": 0.9475460648536682,
      "learning_rate": 3.569506463246219e-05,
      "loss": 0.9713,
      "step": 1094
    },
    {
      "epoch": 0.2366544197103955,
      "grad_norm": 0.8910499215126038,
      "learning_rate": 3.568638349157486e-05,
      "loss": 1.0578,
      "step": 1095
    },
    {
      "epoch": 0.2368705424681219,
      "grad_norm": 0.933046281337738,
      "learning_rate": 3.567769466439588e-05,
      "loss": 0.7465,
      "step": 1096
    },
    {
      "epoch": 0.2370866652258483,
      "grad_norm": 0.8022202253341675,
      "learning_rate": 3.5668998155182746e-05,
      "loss": 0.7768,
      "step": 1097
    },
    {
      "epoch": 0.23730278798357468,
      "grad_norm": 0.9533029198646545,
      "learning_rate": 3.5660293968196744e-05,
      "loss": 0.9189,
      "step": 1098
    },
    {
      "epoch": 0.23751891074130105,
      "grad_norm": 0.9283272624015808,
      "learning_rate": 3.56515821077029e-05,
      "loss": 0.975,
      "step": 1099
    },
    {
      "epoch": 0.23773503349902744,
      "grad_norm": 0.933311402797699,
      "learning_rate": 3.564286257797001e-05,
      "loss": 0.9122,
      "step": 1100
    },
    {
      "epoch": 0.23795115625675384,
      "grad_norm": 1.4455485343933105,
      "learning_rate": 3.563413538327061e-05,
      "loss": 1.0093,
      "step": 1101
    },
    {
      "epoch": 0.23816727901448023,
      "grad_norm": 0.8379711508750916,
      "learning_rate": 3.5625400527881015e-05,
      "loss": 0.9233,
      "step": 1102
    },
    {
      "epoch": 0.23838340177220663,
      "grad_norm": 0.7099660038948059,
      "learning_rate": 3.5616658016081286e-05,
      "loss": 0.825,
      "step": 1103
    },
    {
      "epoch": 0.238599524529933,
      "grad_norm": 0.9785086512565613,
      "learning_rate": 3.560790785215522e-05,
      "loss": 0.9785,
      "step": 1104
    },
    {
      "epoch": 0.2388156472876594,
      "grad_norm": 0.9324660301208496,
      "learning_rate": 3.559915004039039e-05,
      "loss": 1.1149,
      "step": 1105
    },
    {
      "epoch": 0.23903177004538578,
      "grad_norm": 0.9618993401527405,
      "learning_rate": 3.559038458507811e-05,
      "loss": 0.972,
      "step": 1106
    },
    {
      "epoch": 0.23924789280311218,
      "grad_norm": 0.8113493323326111,
      "learning_rate": 3.558161149051341e-05,
      "loss": 0.8161,
      "step": 1107
    },
    {
      "epoch": 0.23946401556083854,
      "grad_norm": 0.8987981081008911,
      "learning_rate": 3.55728307609951e-05,
      "loss": 0.8535,
      "step": 1108
    },
    {
      "epoch": 0.23968013831856494,
      "grad_norm": 0.7710052728652954,
      "learning_rate": 3.556404240082573e-05,
      "loss": 0.8278,
      "step": 1109
    },
    {
      "epoch": 0.23989626107629133,
      "grad_norm": 0.8242369890213013,
      "learning_rate": 3.555524641431155e-05,
      "loss": 1.0152,
      "step": 1110
    },
    {
      "epoch": 0.24011238383401773,
      "grad_norm": 0.8900905251502991,
      "learning_rate": 3.554644280576259e-05,
      "loss": 0.8891,
      "step": 1111
    },
    {
      "epoch": 0.24032850659174412,
      "grad_norm": 0.8917087316513062,
      "learning_rate": 3.553763157949259e-05,
      "loss": 1.0774,
      "step": 1112
    },
    {
      "epoch": 0.2405446293494705,
      "grad_norm": 0.8916319012641907,
      "learning_rate": 3.5528812739819044e-05,
      "loss": 0.9739,
      "step": 1113
    },
    {
      "epoch": 0.24076075210719688,
      "grad_norm": 0.9248983860015869,
      "learning_rate": 3.551998629106314e-05,
      "loss": 0.9,
      "step": 1114
    },
    {
      "epoch": 0.24097687486492328,
      "grad_norm": 0.9821251034736633,
      "learning_rate": 3.551115223754984e-05,
      "loss": 1.0232,
      "step": 1115
    },
    {
      "epoch": 0.24119299762264967,
      "grad_norm": 0.868484377861023,
      "learning_rate": 3.5502310583607796e-05,
      "loss": 0.8949,
      "step": 1116
    },
    {
      "epoch": 0.24140912038037604,
      "grad_norm": 0.9557080864906311,
      "learning_rate": 3.5493461333569396e-05,
      "loss": 0.897,
      "step": 1117
    },
    {
      "epoch": 0.24162524313810244,
      "grad_norm": 0.9541976451873779,
      "learning_rate": 3.548460449177077e-05,
      "loss": 1.0214,
      "step": 1118
    },
    {
      "epoch": 0.24184136589582883,
      "grad_norm": 0.8650433421134949,
      "learning_rate": 3.547574006255173e-05,
      "loss": 1.0235,
      "step": 1119
    },
    {
      "epoch": 0.24205748865355523,
      "grad_norm": 0.9833268523216248,
      "learning_rate": 3.546686805025584e-05,
      "loss": 0.913,
      "step": 1120
    },
    {
      "epoch": 0.24227361141128162,
      "grad_norm": 0.940438985824585,
      "learning_rate": 3.5457988459230356e-05,
      "loss": 0.9043,
      "step": 1121
    },
    {
      "epoch": 0.242489734169008,
      "grad_norm": 0.774019718170166,
      "learning_rate": 3.544910129382626e-05,
      "loss": 0.8052,
      "step": 1122
    },
    {
      "epoch": 0.24270585692673438,
      "grad_norm": 0.7606857419013977,
      "learning_rate": 3.544020655839824e-05,
      "loss": 0.8237,
      "step": 1123
    },
    {
      "epoch": 0.24292197968446078,
      "grad_norm": 0.9330726861953735,
      "learning_rate": 3.54313042573047e-05,
      "loss": 0.8704,
      "step": 1124
    },
    {
      "epoch": 0.24313810244218717,
      "grad_norm": 0.8465023636817932,
      "learning_rate": 3.542239439490776e-05,
      "loss": 0.9687,
      "step": 1125
    },
    {
      "epoch": 0.24335422519991354,
      "grad_norm": 0.9619770646095276,
      "learning_rate": 3.54134769755732e-05,
      "loss": 0.9585,
      "step": 1126
    },
    {
      "epoch": 0.24357034795763993,
      "grad_norm": 0.7957881689071655,
      "learning_rate": 3.5404552003670565e-05,
      "loss": 0.783,
      "step": 1127
    },
    {
      "epoch": 0.24378647071536633,
      "grad_norm": 0.8766531944274902,
      "learning_rate": 3.539561948357305e-05,
      "loss": 0.9029,
      "step": 1128
    },
    {
      "epoch": 0.24400259347309272,
      "grad_norm": 0.9924162030220032,
      "learning_rate": 3.538667941965758e-05,
      "loss": 0.8902,
      "step": 1129
    },
    {
      "epoch": 0.24421871623081912,
      "grad_norm": 0.7866281270980835,
      "learning_rate": 3.537773181630477e-05,
      "loss": 0.7439,
      "step": 1130
    },
    {
      "epoch": 0.24443483898854548,
      "grad_norm": 0.8730312585830688,
      "learning_rate": 3.5368776677898906e-05,
      "loss": 0.9673,
      "step": 1131
    },
    {
      "epoch": 0.24465096174627188,
      "grad_norm": 0.9427263140678406,
      "learning_rate": 3.5359814008828006e-05,
      "loss": 1.077,
      "step": 1132
    },
    {
      "epoch": 0.24486708450399827,
      "grad_norm": 0.9608944058418274,
      "learning_rate": 3.5350843813483746e-05,
      "loss": 1.0296,
      "step": 1133
    },
    {
      "epoch": 0.24508320726172467,
      "grad_norm": 0.7508792281150818,
      "learning_rate": 3.53418660962615e-05,
      "loss": 0.937,
      "step": 1134
    },
    {
      "epoch": 0.24529933001945106,
      "grad_norm": 0.8189660906791687,
      "learning_rate": 3.533288086156034e-05,
      "loss": 0.8369,
      "step": 1135
    },
    {
      "epoch": 0.24551545277717743,
      "grad_norm": 0.8507649302482605,
      "learning_rate": 3.5323888113782996e-05,
      "loss": 0.847,
      "step": 1136
    },
    {
      "epoch": 0.24573157553490382,
      "grad_norm": 1.0197499990463257,
      "learning_rate": 3.53148878573359e-05,
      "loss": 0.8865,
      "step": 1137
    },
    {
      "epoch": 0.24594769829263022,
      "grad_norm": 0.8537721633911133,
      "learning_rate": 3.530588009662916e-05,
      "loss": 1.0585,
      "step": 1138
    },
    {
      "epoch": 0.2461638210503566,
      "grad_norm": 0.8288100361824036,
      "learning_rate": 3.5296864836076555e-05,
      "loss": 0.9157,
      "step": 1139
    },
    {
      "epoch": 0.24637994380808298,
      "grad_norm": 1.0532606840133667,
      "learning_rate": 3.5287842080095537e-05,
      "loss": 1.039,
      "step": 1140
    },
    {
      "epoch": 0.24659606656580937,
      "grad_norm": 0.9588279128074646,
      "learning_rate": 3.5278811833107246e-05,
      "loss": 0.9791,
      "step": 1141
    },
    {
      "epoch": 0.24681218932353577,
      "grad_norm": 0.9191341996192932,
      "learning_rate": 3.5269774099536476e-05,
      "loss": 1.0564,
      "step": 1142
    },
    {
      "epoch": 0.24702831208126216,
      "grad_norm": 0.7049510478973389,
      "learning_rate": 3.526072888381169e-05,
      "loss": 0.6928,
      "step": 1143
    },
    {
      "epoch": 0.24724443483898856,
      "grad_norm": 1.0857255458831787,
      "learning_rate": 3.525167619036503e-05,
      "loss": 1.0761,
      "step": 1144
    },
    {
      "epoch": 0.24746055759671493,
      "grad_norm": 0.8708821535110474,
      "learning_rate": 3.52426160236323e-05,
      "loss": 0.7623,
      "step": 1145
    },
    {
      "epoch": 0.24767668035444132,
      "grad_norm": 0.7799321413040161,
      "learning_rate": 3.523354838805295e-05,
      "loss": 0.8589,
      "step": 1146
    },
    {
      "epoch": 0.24789280311216771,
      "grad_norm": 0.9419688582420349,
      "learning_rate": 3.52244732880701e-05,
      "loss": 1.1236,
      "step": 1147
    },
    {
      "epoch": 0.2481089258698941,
      "grad_norm": 0.9029631018638611,
      "learning_rate": 3.521539072813054e-05,
      "loss": 1.0226,
      "step": 1148
    },
    {
      "epoch": 0.24832504862762048,
      "grad_norm": 1.017147421836853,
      "learning_rate": 3.5206300712684694e-05,
      "loss": 0.9981,
      "step": 1149
    },
    {
      "epoch": 0.24854117138534687,
      "grad_norm": 0.8713418841362,
      "learning_rate": 3.5197203246186654e-05,
      "loss": 0.8987,
      "step": 1150
    },
    {
      "epoch": 0.24875729414307327,
      "grad_norm": 0.8041524291038513,
      "learning_rate": 3.5188098333094145e-05,
      "loss": 0.8464,
      "step": 1151
    },
    {
      "epoch": 0.24897341690079966,
      "grad_norm": 0.9536299705505371,
      "learning_rate": 3.517898597786857e-05,
      "loss": 0.9542,
      "step": 1152
    },
    {
      "epoch": 0.24918953965852605,
      "grad_norm": 0.899425745010376,
      "learning_rate": 3.516986618497496e-05,
      "loss": 1.0598,
      "step": 1153
    },
    {
      "epoch": 0.24940566241625242,
      "grad_norm": 0.8632761240005493,
      "learning_rate": 3.5160738958881975e-05,
      "loss": 0.924,
      "step": 1154
    },
    {
      "epoch": 0.24962178517397882,
      "grad_norm": 0.85684734582901,
      "learning_rate": 3.5151604304061946e-05,
      "loss": 0.9598,
      "step": 1155
    },
    {
      "epoch": 0.2498379079317052,
      "grad_norm": 0.9506204128265381,
      "learning_rate": 3.514246222499084e-05,
      "loss": 0.9236,
      "step": 1156
    },
    {
      "epoch": 0.2500540306894316,
      "grad_norm": 0.8864320516586304,
      "learning_rate": 3.5133312726148244e-05,
      "loss": 1.0351,
      "step": 1157
    },
    {
      "epoch": 0.250270153447158,
      "grad_norm": 1.1389678716659546,
      "learning_rate": 3.51241558120174e-05,
      "loss": 1.0681,
      "step": 1158
    },
    {
      "epoch": 0.2504862762048844,
      "grad_norm": 0.9330379366874695,
      "learning_rate": 3.511499148708517e-05,
      "loss": 1.0575,
      "step": 1159
    },
    {
      "epoch": 0.25070239896261076,
      "grad_norm": 0.7407273054122925,
      "learning_rate": 3.510581975584205e-05,
      "loss": 0.7253,
      "step": 1160
    },
    {
      "epoch": 0.25091852172033713,
      "grad_norm": 0.8479929566383362,
      "learning_rate": 3.509664062278217e-05,
      "loss": 0.8569,
      "step": 1161
    },
    {
      "epoch": 0.25113464447806355,
      "grad_norm": 0.9289430975914001,
      "learning_rate": 3.5087454092403285e-05,
      "loss": 0.9247,
      "step": 1162
    },
    {
      "epoch": 0.2513507672357899,
      "grad_norm": 0.9285940527915955,
      "learning_rate": 3.507826016920677e-05,
      "loss": 0.9408,
      "step": 1163
    },
    {
      "epoch": 0.25156688999351634,
      "grad_norm": 0.8576862812042236,
      "learning_rate": 3.5069058857697625e-05,
      "loss": 0.9638,
      "step": 1164
    },
    {
      "epoch": 0.2517830127512427,
      "grad_norm": 1.0147756338119507,
      "learning_rate": 3.505985016238448e-05,
      "loss": 0.9354,
      "step": 1165
    },
    {
      "epoch": 0.2519991355089691,
      "grad_norm": 0.9034207463264465,
      "learning_rate": 3.505063408777956e-05,
      "loss": 1.0437,
      "step": 1166
    },
    {
      "epoch": 0.2522152582666955,
      "grad_norm": 0.9311251640319824,
      "learning_rate": 3.504141063839874e-05,
      "loss": 0.8789,
      "step": 1167
    },
    {
      "epoch": 0.25243138102442186,
      "grad_norm": 0.9327875971794128,
      "learning_rate": 3.503217981876147e-05,
      "loss": 1.0022,
      "step": 1168
    },
    {
      "epoch": 0.2526475037821483,
      "grad_norm": 0.9139388799667358,
      "learning_rate": 3.502294163339084e-05,
      "loss": 0.845,
      "step": 1169
    },
    {
      "epoch": 0.25286362653987465,
      "grad_norm": 0.9231714010238647,
      "learning_rate": 3.501369608681354e-05,
      "loss": 0.887,
      "step": 1170
    },
    {
      "epoch": 0.253079749297601,
      "grad_norm": 0.9506759643554688,
      "learning_rate": 3.5004443183559864e-05,
      "loss": 0.7532,
      "step": 1171
    },
    {
      "epoch": 0.25329587205532744,
      "grad_norm": 0.9717447757720947,
      "learning_rate": 3.499518292816371e-05,
      "loss": 0.9421,
      "step": 1172
    },
    {
      "epoch": 0.2535119948130538,
      "grad_norm": 1.1096994876861572,
      "learning_rate": 3.498591532516259e-05,
      "loss": 1.0407,
      "step": 1173
    },
    {
      "epoch": 0.2537281175707802,
      "grad_norm": 0.8934118151664734,
      "learning_rate": 3.4976640379097594e-05,
      "loss": 0.9223,
      "step": 1174
    },
    {
      "epoch": 0.2539442403285066,
      "grad_norm": 0.96073317527771,
      "learning_rate": 3.4967358094513446e-05,
      "loss": 0.9524,
      "step": 1175
    },
    {
      "epoch": 0.25416036308623297,
      "grad_norm": 0.8692421317100525,
      "learning_rate": 3.4958068475958424e-05,
      "loss": 0.7931,
      "step": 1176
    },
    {
      "epoch": 0.2543764858439594,
      "grad_norm": 1.026824712753296,
      "learning_rate": 3.494877152798442e-05,
      "loss": 0.9667,
      "step": 1177
    },
    {
      "epoch": 0.25459260860168575,
      "grad_norm": 0.9396779537200928,
      "learning_rate": 3.493946725514694e-05,
      "loss": 0.8441,
      "step": 1178
    },
    {
      "epoch": 0.2548087313594121,
      "grad_norm": 0.9980692267417908,
      "learning_rate": 3.493015566200503e-05,
      "loss": 0.9181,
      "step": 1179
    },
    {
      "epoch": 0.25502485411713854,
      "grad_norm": 0.8568054437637329,
      "learning_rate": 3.4920836753121366e-05,
      "loss": 0.911,
      "step": 1180
    },
    {
      "epoch": 0.2552409768748649,
      "grad_norm": 0.8480731844902039,
      "learning_rate": 3.4911510533062174e-05,
      "loss": 1.0297,
      "step": 1181
    },
    {
      "epoch": 0.25545709963259133,
      "grad_norm": 0.9319406151771545,
      "learning_rate": 3.49021770063973e-05,
      "loss": 0.8339,
      "step": 1182
    },
    {
      "epoch": 0.2556732223903177,
      "grad_norm": 0.8354699611663818,
      "learning_rate": 3.489283617770014e-05,
      "loss": 0.891,
      "step": 1183
    },
    {
      "epoch": 0.25588934514804407,
      "grad_norm": 0.9215112924575806,
      "learning_rate": 3.4883488051547674e-05,
      "loss": 0.8315,
      "step": 1184
    },
    {
      "epoch": 0.2561054679057705,
      "grad_norm": 0.9236769676208496,
      "learning_rate": 3.487413263252047e-05,
      "loss": 0.9985,
      "step": 1185
    },
    {
      "epoch": 0.25632159066349686,
      "grad_norm": 0.8746913075447083,
      "learning_rate": 3.486476992520267e-05,
      "loss": 0.7641,
      "step": 1186
    },
    {
      "epoch": 0.2565377134212233,
      "grad_norm": 0.9822189211845398,
      "learning_rate": 3.485539993418196e-05,
      "loss": 1.0482,
      "step": 1187
    },
    {
      "epoch": 0.25675383617894965,
      "grad_norm": 0.9604916572570801,
      "learning_rate": 3.4846022664049624e-05,
      "loss": 1.032,
      "step": 1188
    },
    {
      "epoch": 0.256969958936676,
      "grad_norm": 0.9414983987808228,
      "learning_rate": 3.4836638119400504e-05,
      "loss": 1.0286,
      "step": 1189
    },
    {
      "epoch": 0.25718608169440244,
      "grad_norm": 0.831779420375824,
      "learning_rate": 3.4827246304833e-05,
      "loss": 0.9321,
      "step": 1190
    },
    {
      "epoch": 0.2574022044521288,
      "grad_norm": 0.8739473223686218,
      "learning_rate": 3.481784722494909e-05,
      "loss": 0.8186,
      "step": 1191
    },
    {
      "epoch": 0.2576183272098552,
      "grad_norm": 0.8328155279159546,
      "learning_rate": 3.480844088435428e-05,
      "loss": 1.0132,
      "step": 1192
    },
    {
      "epoch": 0.2578344499675816,
      "grad_norm": 0.8653275966644287,
      "learning_rate": 3.479902728765768e-05,
      "loss": 0.9433,
      "step": 1193
    },
    {
      "epoch": 0.25805057272530796,
      "grad_norm": 0.7569735646247864,
      "learning_rate": 3.478960643947192e-05,
      "loss": 0.7814,
      "step": 1194
    },
    {
      "epoch": 0.2582666954830344,
      "grad_norm": 0.751507043838501,
      "learning_rate": 3.478017834441319e-05,
      "loss": 0.8006,
      "step": 1195
    },
    {
      "epoch": 0.25848281824076075,
      "grad_norm": 0.9412997961044312,
      "learning_rate": 3.477074300710123e-05,
      "loss": 0.9028,
      "step": 1196
    },
    {
      "epoch": 0.2586989409984871,
      "grad_norm": 0.8218026757240295,
      "learning_rate": 3.4761300432159356e-05,
      "loss": 1.0349,
      "step": 1197
    },
    {
      "epoch": 0.25891506375621354,
      "grad_norm": 0.8984594345092773,
      "learning_rate": 3.475185062421438e-05,
      "loss": 0.8372,
      "step": 1198
    },
    {
      "epoch": 0.2591311865139399,
      "grad_norm": 0.9882254600524902,
      "learning_rate": 3.474239358789671e-05,
      "loss": 1.1724,
      "step": 1199
    },
    {
      "epoch": 0.2593473092716663,
      "grad_norm": 0.9558103084564209,
      "learning_rate": 3.473292932784026e-05,
      "loss": 0.9732,
      "step": 1200
    },
    {
      "epoch": 0.2595634320293927,
      "grad_norm": 0.9504913091659546,
      "learning_rate": 3.472345784868249e-05,
      "loss": 0.8753,
      "step": 1201
    },
    {
      "epoch": 0.25977955478711906,
      "grad_norm": 0.9989475607872009,
      "learning_rate": 3.471397915506441e-05,
      "loss": 0.8223,
      "step": 1202
    },
    {
      "epoch": 0.2599956775448455,
      "grad_norm": 0.9614273309707642,
      "learning_rate": 3.4704493251630565e-05,
      "loss": 1.0081,
      "step": 1203
    },
    {
      "epoch": 0.26021180030257185,
      "grad_norm": 1.006683349609375,
      "learning_rate": 3.4695000143029013e-05,
      "loss": 0.8342,
      "step": 1204
    },
    {
      "epoch": 0.26042792306029827,
      "grad_norm": 1.0121324062347412,
      "learning_rate": 3.4685499833911366e-05,
      "loss": 0.9228,
      "step": 1205
    },
    {
      "epoch": 0.26064404581802464,
      "grad_norm": 0.9092336297035217,
      "learning_rate": 3.4675992328932746e-05,
      "loss": 0.8474,
      "step": 1206
    },
    {
      "epoch": 0.260860168575751,
      "grad_norm": 0.9401735067367554,
      "learning_rate": 3.4666477632751814e-05,
      "loss": 0.8988,
      "step": 1207
    },
    {
      "epoch": 0.26107629133347743,
      "grad_norm": 0.7813588976860046,
      "learning_rate": 3.465695575003074e-05,
      "loss": 0.8302,
      "step": 1208
    },
    {
      "epoch": 0.2612924140912038,
      "grad_norm": 0.7777760028839111,
      "learning_rate": 3.464742668543523e-05,
      "loss": 0.773,
      "step": 1209
    },
    {
      "epoch": 0.2615085368489302,
      "grad_norm": 1.030483365058899,
      "learning_rate": 3.463789044363451e-05,
      "loss": 0.9962,
      "step": 1210
    },
    {
      "epoch": 0.2617246596066566,
      "grad_norm": 0.7267763018608093,
      "learning_rate": 3.462834702930131e-05,
      "loss": 0.7075,
      "step": 1211
    },
    {
      "epoch": 0.26194078236438295,
      "grad_norm": 0.9555537700653076,
      "learning_rate": 3.461879644711188e-05,
      "loss": 1.1664,
      "step": 1212
    },
    {
      "epoch": 0.2621569051221094,
      "grad_norm": 0.9532811045646667,
      "learning_rate": 3.4609238701745985e-05,
      "loss": 0.9267,
      "step": 1213
    },
    {
      "epoch": 0.26237302787983574,
      "grad_norm": 0.9747017621994019,
      "learning_rate": 3.4599673797886896e-05,
      "loss": 0.972,
      "step": 1214
    },
    {
      "epoch": 0.2625891506375621,
      "grad_norm": 0.838711678981781,
      "learning_rate": 3.45901017402214e-05,
      "loss": 0.9826,
      "step": 1215
    },
    {
      "epoch": 0.26280527339528853,
      "grad_norm": 1.041108250617981,
      "learning_rate": 3.4580522533439773e-05,
      "loss": 0.9327,
      "step": 1216
    },
    {
      "epoch": 0.2630213961530149,
      "grad_norm": 0.7605981826782227,
      "learning_rate": 3.457093618223581e-05,
      "loss": 0.9068,
      "step": 1217
    },
    {
      "epoch": 0.2632375189107413,
      "grad_norm": 0.8309284448623657,
      "learning_rate": 3.4561342691306806e-05,
      "loss": 0.9885,
      "step": 1218
    },
    {
      "epoch": 0.2634536416684677,
      "grad_norm": 0.9520570039749146,
      "learning_rate": 3.455174206535354e-05,
      "loss": 0.7893,
      "step": 1219
    },
    {
      "epoch": 0.26366976442619405,
      "grad_norm": 0.7623745799064636,
      "learning_rate": 3.45421343090803e-05,
      "loss": 0.7545,
      "step": 1220
    },
    {
      "epoch": 0.2638858871839205,
      "grad_norm": 0.9425437450408936,
      "learning_rate": 3.453251942719487e-05,
      "loss": 0.8535,
      "step": 1221
    },
    {
      "epoch": 0.26410200994164684,
      "grad_norm": 0.8759909868240356,
      "learning_rate": 3.452289742440851e-05,
      "loss": 0.9291,
      "step": 1222
    },
    {
      "epoch": 0.26431813269937326,
      "grad_norm": 0.8797547817230225,
      "learning_rate": 3.451326830543599e-05,
      "loss": 0.9388,
      "step": 1223
    },
    {
      "epoch": 0.26453425545709963,
      "grad_norm": 0.8488011956214905,
      "learning_rate": 3.450363207499554e-05,
      "loss": 0.9036,
      "step": 1224
    },
    {
      "epoch": 0.264750378214826,
      "grad_norm": 0.9757619500160217,
      "learning_rate": 3.449398873780891e-05,
      "loss": 0.9631,
      "step": 1225
    },
    {
      "epoch": 0.2649665009725524,
      "grad_norm": 1.0438483953475952,
      "learning_rate": 3.44843382986013e-05,
      "loss": 1.0235,
      "step": 1226
    },
    {
      "epoch": 0.2651826237302788,
      "grad_norm": 0.9465405941009521,
      "learning_rate": 3.44746807621014e-05,
      "loss": 0.8919,
      "step": 1227
    },
    {
      "epoch": 0.2653987464880052,
      "grad_norm": 0.9920943379402161,
      "learning_rate": 3.4465016133041405e-05,
      "loss": 0.9785,
      "step": 1228
    },
    {
      "epoch": 0.2656148692457316,
      "grad_norm": 0.9541953206062317,
      "learning_rate": 3.445534441615693e-05,
      "loss": 0.9799,
      "step": 1229
    },
    {
      "epoch": 0.26583099200345794,
      "grad_norm": 1.056687831878662,
      "learning_rate": 3.4445665616187106e-05,
      "loss": 0.9835,
      "step": 1230
    },
    {
      "epoch": 0.26604711476118437,
      "grad_norm": 0.8478302955627441,
      "learning_rate": 3.4435979737874534e-05,
      "loss": 0.8815,
      "step": 1231
    },
    {
      "epoch": 0.26626323751891073,
      "grad_norm": 1.0264782905578613,
      "learning_rate": 3.442628678596525e-05,
      "loss": 0.9199,
      "step": 1232
    },
    {
      "epoch": 0.26647936027663716,
      "grad_norm": 0.8958058953285217,
      "learning_rate": 3.441658676520879e-05,
      "loss": 1.0462,
      "step": 1233
    },
    {
      "epoch": 0.2666954830343635,
      "grad_norm": 0.9210952520370483,
      "learning_rate": 3.440687968035815e-05,
      "loss": 0.9968,
      "step": 1234
    },
    {
      "epoch": 0.2669116057920899,
      "grad_norm": 0.8968856930732727,
      "learning_rate": 3.439716553616977e-05,
      "loss": 0.8992,
      "step": 1235
    },
    {
      "epoch": 0.2671277285498163,
      "grad_norm": 0.8895953297615051,
      "learning_rate": 3.4387444337403556e-05,
      "loss": 0.9828,
      "step": 1236
    },
    {
      "epoch": 0.2673438513075427,
      "grad_norm": 0.9425379633903503,
      "learning_rate": 3.4377716088822875e-05,
      "loss": 0.862,
      "step": 1237
    },
    {
      "epoch": 0.26755997406526905,
      "grad_norm": 0.8021935820579529,
      "learning_rate": 3.436798079519455e-05,
      "loss": 0.799,
      "step": 1238
    },
    {
      "epoch": 0.26777609682299547,
      "grad_norm": 0.9322415590286255,
      "learning_rate": 3.435823846128884e-05,
      "loss": 1.0036,
      "step": 1239
    },
    {
      "epoch": 0.26799221958072184,
      "grad_norm": 0.8307620286941528,
      "learning_rate": 3.434848909187948e-05,
      "loss": 0.77,
      "step": 1240
    },
    {
      "epoch": 0.26820834233844826,
      "grad_norm": 1.0585285425186157,
      "learning_rate": 3.4338732691743644e-05,
      "loss": 1.1941,
      "step": 1241
    },
    {
      "epoch": 0.2684244650961746,
      "grad_norm": 1.032701849937439,
      "learning_rate": 3.432896926566193e-05,
      "loss": 0.9371,
      "step": 1242
    },
    {
      "epoch": 0.268640587853901,
      "grad_norm": 0.8700973391532898,
      "learning_rate": 3.431919881841839e-05,
      "loss": 0.7914,
      "step": 1243
    },
    {
      "epoch": 0.2688567106116274,
      "grad_norm": 0.8723090291023254,
      "learning_rate": 3.430942135480053e-05,
      "loss": 0.914,
      "step": 1244
    },
    {
      "epoch": 0.2690728333693538,
      "grad_norm": 0.9654848575592041,
      "learning_rate": 3.4299636879599295e-05,
      "loss": 0.8412,
      "step": 1245
    },
    {
      "epoch": 0.2692889561270802,
      "grad_norm": 0.8331428170204163,
      "learning_rate": 3.4289845397609044e-05,
      "loss": 0.8932,
      "step": 1246
    },
    {
      "epoch": 0.26950507888480657,
      "grad_norm": 0.9070435166358948,
      "learning_rate": 3.428004691362758e-05,
      "loss": 0.8609,
      "step": 1247
    },
    {
      "epoch": 0.26972120164253294,
      "grad_norm": 0.8193120956420898,
      "learning_rate": 3.4270241432456135e-05,
      "loss": 0.7546,
      "step": 1248
    },
    {
      "epoch": 0.26993732440025936,
      "grad_norm": 0.9638481736183167,
      "learning_rate": 3.426042895889938e-05,
      "loss": 1.0327,
      "step": 1249
    },
    {
      "epoch": 0.2701534471579857,
      "grad_norm": 1.1214122772216797,
      "learning_rate": 3.42506094977654e-05,
      "loss": 1.1299,
      "step": 1250
    },
    {
      "epoch": 0.27036956991571215,
      "grad_norm": 0.8765868544578552,
      "learning_rate": 3.424078305386571e-05,
      "loss": 0.8192,
      "step": 1251
    },
    {
      "epoch": 0.2705856926734385,
      "grad_norm": 0.8048397302627563,
      "learning_rate": 3.423094963201524e-05,
      "loss": 0.7947,
      "step": 1252
    },
    {
      "epoch": 0.2708018154311649,
      "grad_norm": 0.8385607004165649,
      "learning_rate": 3.422110923703235e-05,
      "loss": 0.7999,
      "step": 1253
    },
    {
      "epoch": 0.2710179381888913,
      "grad_norm": 0.7560341954231262,
      "learning_rate": 3.421126187373881e-05,
      "loss": 0.7791,
      "step": 1254
    },
    {
      "epoch": 0.27123406094661767,
      "grad_norm": 1.1296929121017456,
      "learning_rate": 3.4201407546959796e-05,
      "loss": 1.0208,
      "step": 1255
    },
    {
      "epoch": 0.2714501837043441,
      "grad_norm": 0.9315122365951538,
      "learning_rate": 3.419154626152392e-05,
      "loss": 0.7966,
      "step": 1256
    },
    {
      "epoch": 0.27166630646207046,
      "grad_norm": 0.9457100033760071,
      "learning_rate": 3.418167802226318e-05,
      "loss": 1.1376,
      "step": 1257
    },
    {
      "epoch": 0.27188242921979683,
      "grad_norm": 0.8627362251281738,
      "learning_rate": 3.4171802834012996e-05,
      "loss": 0.9037,
      "step": 1258
    },
    {
      "epoch": 0.27209855197752325,
      "grad_norm": 0.8868746161460876,
      "learning_rate": 3.416192070161218e-05,
      "loss": 0.8328,
      "step": 1259
    },
    {
      "epoch": 0.2723146747352496,
      "grad_norm": 0.8933835029602051,
      "learning_rate": 3.415203162990296e-05,
      "loss": 0.9073,
      "step": 1260
    },
    {
      "epoch": 0.272530797492976,
      "grad_norm": 0.8237617015838623,
      "learning_rate": 3.4142135623730954e-05,
      "loss": 0.987,
      "step": 1261
    },
    {
      "epoch": 0.2727469202507024,
      "grad_norm": 0.8819873929023743,
      "learning_rate": 3.413223268794518e-05,
      "loss": 0.9322,
      "step": 1262
    },
    {
      "epoch": 0.2729630430084288,
      "grad_norm": 0.8413676619529724,
      "learning_rate": 3.412232282739807e-05,
      "loss": 0.801,
      "step": 1263
    },
    {
      "epoch": 0.2731791657661552,
      "grad_norm": 0.9011121392250061,
      "learning_rate": 3.411240604694541e-05,
      "loss": 1.0719,
      "step": 1264
    },
    {
      "epoch": 0.27339528852388156,
      "grad_norm": 1.065183162689209,
      "learning_rate": 3.410248235144641e-05,
      "loss": 1.1748,
      "step": 1265
    },
    {
      "epoch": 0.27361141128160793,
      "grad_norm": 0.8870022892951965,
      "learning_rate": 3.409255174576367e-05,
      "loss": 0.9793,
      "step": 1266
    },
    {
      "epoch": 0.27382753403933435,
      "grad_norm": 0.9056565165519714,
      "learning_rate": 3.408261423476314e-05,
      "loss": 0.9582,
      "step": 1267
    },
    {
      "epoch": 0.2740436567970607,
      "grad_norm": 0.9530169367790222,
      "learning_rate": 3.4072669823314194e-05,
      "loss": 1.0038,
      "step": 1268
    },
    {
      "epoch": 0.27425977955478714,
      "grad_norm": 0.961306095123291,
      "learning_rate": 3.4062718516289564e-05,
      "loss": 0.8197,
      "step": 1269
    },
    {
      "epoch": 0.2744759023125135,
      "grad_norm": 0.8169891834259033,
      "learning_rate": 3.405276031856537e-05,
      "loss": 0.8325,
      "step": 1270
    },
    {
      "epoch": 0.2746920250702399,
      "grad_norm": 0.8405026793479919,
      "learning_rate": 3.40427952350211e-05,
      "loss": 0.8495,
      "step": 1271
    },
    {
      "epoch": 0.2749081478279663,
      "grad_norm": 0.7815272212028503,
      "learning_rate": 3.403282327053963e-05,
      "loss": 0.9514,
      "step": 1272
    },
    {
      "epoch": 0.27512427058569267,
      "grad_norm": 0.9990447759628296,
      "learning_rate": 3.402284443000721e-05,
      "loss": 0.8438,
      "step": 1273
    },
    {
      "epoch": 0.2753403933434191,
      "grad_norm": 0.8385444283485413,
      "learning_rate": 3.401285871831342e-05,
      "loss": 0.8427,
      "step": 1274
    },
    {
      "epoch": 0.27555651610114545,
      "grad_norm": 0.9013781547546387,
      "learning_rate": 3.400286614035126e-05,
      "loss": 0.9588,
      "step": 1275
    },
    {
      "epoch": 0.2757726388588718,
      "grad_norm": 0.8931231498718262,
      "learning_rate": 3.399286670101705e-05,
      "loss": 1.0243,
      "step": 1276
    },
    {
      "epoch": 0.27598876161659824,
      "grad_norm": 0.9418546557426453,
      "learning_rate": 3.398286040521051e-05,
      "loss": 0.8088,
      "step": 1277
    },
    {
      "epoch": 0.2762048843743246,
      "grad_norm": 0.8482999801635742,
      "learning_rate": 3.39728472578347e-05,
      "loss": 0.955,
      "step": 1278
    },
    {
      "epoch": 0.276421007132051,
      "grad_norm": 0.9833201169967651,
      "learning_rate": 3.3962827263796024e-05,
      "loss": 1.0117,
      "step": 1279
    },
    {
      "epoch": 0.2766371298897774,
      "grad_norm": 0.803976833820343,
      "learning_rate": 3.395280042800427e-05,
      "loss": 0.9786,
      "step": 1280
    },
    {
      "epoch": 0.27685325264750377,
      "grad_norm": 0.8341302275657654,
      "learning_rate": 3.394276675537256e-05,
      "loss": 0.884,
      "step": 1281
    },
    {
      "epoch": 0.2770693754052302,
      "grad_norm": 0.8698337078094482,
      "learning_rate": 3.393272625081737e-05,
      "loss": 0.9135,
      "step": 1282
    },
    {
      "epoch": 0.27728549816295656,
      "grad_norm": 1.0237162113189697,
      "learning_rate": 3.392267891925854e-05,
      "loss": 0.7524,
      "step": 1283
    },
    {
      "epoch": 0.2775016209206829,
      "grad_norm": 0.9259644746780396,
      "learning_rate": 3.391262476561921e-05,
      "loss": 1.0063,
      "step": 1284
    },
    {
      "epoch": 0.27771774367840935,
      "grad_norm": 0.9323410987854004,
      "learning_rate": 3.3902563794825904e-05,
      "loss": 1.021,
      "step": 1285
    },
    {
      "epoch": 0.2779338664361357,
      "grad_norm": 0.841606616973877,
      "learning_rate": 3.389249601180848e-05,
      "loss": 0.9984,
      "step": 1286
    },
    {
      "epoch": 0.27814998919386213,
      "grad_norm": 0.844332754611969,
      "learning_rate": 3.388242142150013e-05,
      "loss": 0.8519,
      "step": 1287
    },
    {
      "epoch": 0.2783661119515885,
      "grad_norm": 0.9326592087745667,
      "learning_rate": 3.3872340028837366e-05,
      "loss": 0.9249,
      "step": 1288
    },
    {
      "epoch": 0.27858223470931487,
      "grad_norm": 0.9754464626312256,
      "learning_rate": 3.3862251838760067e-05,
      "loss": 1.016,
      "step": 1289
    },
    {
      "epoch": 0.2787983574670413,
      "grad_norm": 0.8150403499603271,
      "learning_rate": 3.3852156856211404e-05,
      "loss": 0.9499,
      "step": 1290
    },
    {
      "epoch": 0.27901448022476766,
      "grad_norm": 0.8909110426902771,
      "learning_rate": 3.38420550861379e-05,
      "loss": 0.9758,
      "step": 1291
    },
    {
      "epoch": 0.2792306029824941,
      "grad_norm": 0.9303125739097595,
      "learning_rate": 3.3831946533489414e-05,
      "loss": 0.9312,
      "step": 1292
    },
    {
      "epoch": 0.27944672574022045,
      "grad_norm": 0.9276483058929443,
      "learning_rate": 3.382183120321909e-05,
      "loss": 0.8911,
      "step": 1293
    },
    {
      "epoch": 0.2796628484979468,
      "grad_norm": 0.892886757850647,
      "learning_rate": 3.3811709100283434e-05,
      "loss": 1.0487,
      "step": 1294
    },
    {
      "epoch": 0.27987897125567324,
      "grad_norm": 0.8534461855888367,
      "learning_rate": 3.3801580229642243e-05,
      "loss": 0.8842,
      "step": 1295
    },
    {
      "epoch": 0.2800950940133996,
      "grad_norm": 0.7508280277252197,
      "learning_rate": 3.379144459625865e-05,
      "loss": 0.7597,
      "step": 1296
    },
    {
      "epoch": 0.280311216771126,
      "grad_norm": 0.8750606179237366,
      "learning_rate": 3.378130220509908e-05,
      "loss": 0.8795,
      "step": 1297
    },
    {
      "epoch": 0.2805273395288524,
      "grad_norm": 0.88175368309021,
      "learning_rate": 3.3771153061133286e-05,
      "loss": 1.1336,
      "step": 1298
    },
    {
      "epoch": 0.28074346228657876,
      "grad_norm": 0.9986168742179871,
      "learning_rate": 3.376099716933433e-05,
      "loss": 1.225,
      "step": 1299
    },
    {
      "epoch": 0.2809595850443052,
      "grad_norm": 0.7794172763824463,
      "learning_rate": 3.375083453467857e-05,
      "loss": 0.941,
      "step": 1300
    },
    {
      "epoch": 0.28117570780203155,
      "grad_norm": 0.9246460795402527,
      "learning_rate": 3.3740665162145685e-05,
      "loss": 0.9086,
      "step": 1301
    },
    {
      "epoch": 0.2813918305597579,
      "grad_norm": 0.8370898962020874,
      "learning_rate": 3.373048905671862e-05,
      "loss": 0.8536,
      "step": 1302
    },
    {
      "epoch": 0.28160795331748434,
      "grad_norm": 0.8827524185180664,
      "learning_rate": 3.372030622338367e-05,
      "loss": 1.0139,
      "step": 1303
    },
    {
      "epoch": 0.2818240760752107,
      "grad_norm": 0.8502309918403625,
      "learning_rate": 3.371011666713038e-05,
      "loss": 0.9512,
      "step": 1304
    },
    {
      "epoch": 0.28204019883293713,
      "grad_norm": 0.777923583984375,
      "learning_rate": 3.3699920392951615e-05,
      "loss": 0.9032,
      "step": 1305
    },
    {
      "epoch": 0.2822563215906635,
      "grad_norm": 1.035717487335205,
      "learning_rate": 3.368971740584353e-05,
      "loss": 1.1064,
      "step": 1306
    },
    {
      "epoch": 0.28247244434838986,
      "grad_norm": 0.8214704990386963,
      "learning_rate": 3.367950771080556e-05,
      "loss": 1.0441,
      "step": 1307
    },
    {
      "epoch": 0.2826885671061163,
      "grad_norm": 0.9529228210449219,
      "learning_rate": 3.366929131284043e-05,
      "loss": 0.9373,
      "step": 1308
    },
    {
      "epoch": 0.28290468986384265,
      "grad_norm": 0.8761690258979797,
      "learning_rate": 3.3659068216954164e-05,
      "loss": 0.8627,
      "step": 1309
    },
    {
      "epoch": 0.2831208126215691,
      "grad_norm": 0.8952942490577698,
      "learning_rate": 3.364883842815605e-05,
      "loss": 0.9455,
      "step": 1310
    },
    {
      "epoch": 0.28333693537929544,
      "grad_norm": 0.864251971244812,
      "learning_rate": 3.363860195145865e-05,
      "loss": 0.8178,
      "step": 1311
    },
    {
      "epoch": 0.2835530581370218,
      "grad_norm": 0.9414435029029846,
      "learning_rate": 3.362835879187783e-05,
      "loss": 1.1242,
      "step": 1312
    },
    {
      "epoch": 0.28376918089474823,
      "grad_norm": 0.8933331370353699,
      "learning_rate": 3.361810895443269e-05,
      "loss": 0.7215,
      "step": 1313
    },
    {
      "epoch": 0.2839853036524746,
      "grad_norm": 0.9027130603790283,
      "learning_rate": 3.360785244414566e-05,
      "loss": 1.0014,
      "step": 1314
    },
    {
      "epoch": 0.284201426410201,
      "grad_norm": 0.9086551666259766,
      "learning_rate": 3.3597589266042384e-05,
      "loss": 0.769,
      "step": 1315
    },
    {
      "epoch": 0.2844175491679274,
      "grad_norm": 0.9505411982536316,
      "learning_rate": 3.35873194251518e-05,
      "loss": 0.8302,
      "step": 1316
    },
    {
      "epoch": 0.28463367192565375,
      "grad_norm": 0.7406628131866455,
      "learning_rate": 3.35770429265061e-05,
      "loss": 0.8559,
      "step": 1317
    },
    {
      "epoch": 0.2848497946833802,
      "grad_norm": 0.9455854892730713,
      "learning_rate": 3.356675977514076e-05,
      "loss": 0.9963,
      "step": 1318
    },
    {
      "epoch": 0.28506591744110654,
      "grad_norm": 0.915973961353302,
      "learning_rate": 3.355646997609449e-05,
      "loss": 1.0081,
      "step": 1319
    },
    {
      "epoch": 0.28528204019883296,
      "grad_norm": 0.8263589143753052,
      "learning_rate": 3.354617353440927e-05,
      "loss": 0.9118,
      "step": 1320
    },
    {
      "epoch": 0.28549816295655933,
      "grad_norm": 0.8953337669372559,
      "learning_rate": 3.353587045513033e-05,
      "loss": 1.052,
      "step": 1321
    },
    {
      "epoch": 0.2857142857142857,
      "grad_norm": 0.9771729111671448,
      "learning_rate": 3.352556074330615e-05,
      "loss": 0.9796,
      "step": 1322
    },
    {
      "epoch": 0.2859304084720121,
      "grad_norm": 1.0700210332870483,
      "learning_rate": 3.351524440398849e-05,
      "loss": 1.0475,
      "step": 1323
    },
    {
      "epoch": 0.2861465312297385,
      "grad_norm": 0.8895292282104492,
      "learning_rate": 3.3504921442232306e-05,
      "loss": 1.0354,
      "step": 1324
    },
    {
      "epoch": 0.28636265398746485,
      "grad_norm": 0.8481094837188721,
      "learning_rate": 3.3494591863095834e-05,
      "loss": 0.988,
      "step": 1325
    },
    {
      "epoch": 0.2865787767451913,
      "grad_norm": 1.0255558490753174,
      "learning_rate": 3.348425567164054e-05,
      "loss": 0.8495,
      "step": 1326
    },
    {
      "epoch": 0.28679489950291764,
      "grad_norm": 0.8727287650108337,
      "learning_rate": 3.347391287293115e-05,
      "loss": 0.8897,
      "step": 1327
    },
    {
      "epoch": 0.28701102226064407,
      "grad_norm": 0.8275237679481506,
      "learning_rate": 3.3463563472035586e-05,
      "loss": 0.8135,
      "step": 1328
    },
    {
      "epoch": 0.28722714501837043,
      "grad_norm": 0.8719654679298401,
      "learning_rate": 3.3453207474025054e-05,
      "loss": 0.9233,
      "step": 1329
    },
    {
      "epoch": 0.2874432677760968,
      "grad_norm": 0.9389613270759583,
      "learning_rate": 3.344284488397395e-05,
      "loss": 0.9438,
      "step": 1330
    },
    {
      "epoch": 0.2876593905338232,
      "grad_norm": 0.8721885085105896,
      "learning_rate": 3.3432475706959936e-05,
      "loss": 0.8661,
      "step": 1331
    },
    {
      "epoch": 0.2878755132915496,
      "grad_norm": 0.9125036001205444,
      "learning_rate": 3.3422099948063876e-05,
      "loss": 0.9638,
      "step": 1332
    },
    {
      "epoch": 0.288091636049276,
      "grad_norm": 0.9462102651596069,
      "learning_rate": 3.3411717612369866e-05,
      "loss": 0.8471,
      "step": 1333
    },
    {
      "epoch": 0.2883077588070024,
      "grad_norm": 0.9385507702827454,
      "learning_rate": 3.340132870496523e-05,
      "loss": 0.9694,
      "step": 1334
    },
    {
      "epoch": 0.28852388156472875,
      "grad_norm": 0.9072837233543396,
      "learning_rate": 3.339093323094051e-05,
      "loss": 0.8278,
      "step": 1335
    },
    {
      "epoch": 0.28874000432245517,
      "grad_norm": 0.7959422469139099,
      "learning_rate": 3.338053119538946e-05,
      "loss": 0.9622,
      "step": 1336
    },
    {
      "epoch": 0.28895612708018154,
      "grad_norm": 0.8302894234657288,
      "learning_rate": 3.337012260340906e-05,
      "loss": 0.9179,
      "step": 1337
    },
    {
      "epoch": 0.28917224983790796,
      "grad_norm": 0.8141044974327087,
      "learning_rate": 3.3359707460099485e-05,
      "loss": 0.872,
      "step": 1338
    },
    {
      "epoch": 0.2893883725956343,
      "grad_norm": 1.1808451414108276,
      "learning_rate": 3.334928577056414e-05,
      "loss": 1.1128,
      "step": 1339
    },
    {
      "epoch": 0.2896044953533607,
      "grad_norm": 0.8286523818969727,
      "learning_rate": 3.3338857539909625e-05,
      "loss": 1.0352,
      "step": 1340
    },
    {
      "epoch": 0.2898206181110871,
      "grad_norm": 0.9007647037506104,
      "learning_rate": 3.332842277324576e-05,
      "loss": 0.8617,
      "step": 1341
    },
    {
      "epoch": 0.2900367408688135,
      "grad_norm": 0.9239495992660522,
      "learning_rate": 3.3317981475685534e-05,
      "loss": 1.0,
      "step": 1342
    },
    {
      "epoch": 0.29025286362653985,
      "grad_norm": 0.986542284488678,
      "learning_rate": 3.330753365234518e-05,
      "loss": 0.9027,
      "step": 1343
    },
    {
      "epoch": 0.29046898638426627,
      "grad_norm": 0.96485835313797,
      "learning_rate": 3.329707930834409e-05,
      "loss": 0.9289,
      "step": 1344
    },
    {
      "epoch": 0.29068510914199264,
      "grad_norm": 0.8924793004989624,
      "learning_rate": 3.328661844880489e-05,
      "loss": 1.0875,
      "step": 1345
    },
    {
      "epoch": 0.29090123189971906,
      "grad_norm": 0.9833986163139343,
      "learning_rate": 3.327615107885335e-05,
      "loss": 0.9299,
      "step": 1346
    },
    {
      "epoch": 0.2911173546574454,
      "grad_norm": 0.8929439187049866,
      "learning_rate": 3.326567720361849e-05,
      "loss": 0.9629,
      "step": 1347
    },
    {
      "epoch": 0.2913334774151718,
      "grad_norm": 0.8560901880264282,
      "learning_rate": 3.325519682823244e-05,
      "loss": 1.1753,
      "step": 1348
    },
    {
      "epoch": 0.2915496001728982,
      "grad_norm": 0.9288893938064575,
      "learning_rate": 3.324470995783061e-05,
      "loss": 0.994,
      "step": 1349
    },
    {
      "epoch": 0.2917657229306246,
      "grad_norm": 1.0577545166015625,
      "learning_rate": 3.323421659755151e-05,
      "loss": 0.9899,
      "step": 1350
    },
    {
      "epoch": 0.291981845688351,
      "grad_norm": 0.8964827060699463,
      "learning_rate": 3.322371675253686e-05,
      "loss": 0.898,
      "step": 1351
    },
    {
      "epoch": 0.29219796844607737,
      "grad_norm": 0.856554388999939,
      "learning_rate": 3.3213210427931585e-05,
      "loss": 0.8849,
      "step": 1352
    },
    {
      "epoch": 0.29241409120380374,
      "grad_norm": 1.0463320016860962,
      "learning_rate": 3.320269762888374e-05,
      "loss": 0.98,
      "step": 1353
    },
    {
      "epoch": 0.29263021396153016,
      "grad_norm": 0.9561208486557007,
      "learning_rate": 3.319217836054457e-05,
      "loss": 0.8383,
      "step": 1354
    },
    {
      "epoch": 0.29284633671925653,
      "grad_norm": 0.8747385740280151,
      "learning_rate": 3.318165262806851e-05,
      "loss": 0.9091,
      "step": 1355
    },
    {
      "epoch": 0.29306245947698295,
      "grad_norm": 0.8965030908584595,
      "learning_rate": 3.317112043661313e-05,
      "loss": 0.999,
      "step": 1356
    },
    {
      "epoch": 0.2932785822347093,
      "grad_norm": 0.9199790954589844,
      "learning_rate": 3.3160581791339186e-05,
      "loss": 0.8623,
      "step": 1357
    },
    {
      "epoch": 0.2934947049924357,
      "grad_norm": 0.7929543256759644,
      "learning_rate": 3.3150036697410586e-05,
      "loss": 0.9401,
      "step": 1358
    },
    {
      "epoch": 0.2937108277501621,
      "grad_norm": 0.8746668100357056,
      "learning_rate": 3.3139485159994396e-05,
      "loss": 0.9932,
      "step": 1359
    },
    {
      "epoch": 0.2939269505078885,
      "grad_norm": 0.8712968826293945,
      "learning_rate": 3.312892718426086e-05,
      "loss": 0.9287,
      "step": 1360
    },
    {
      "epoch": 0.2941430732656149,
      "grad_norm": 0.8132144212722778,
      "learning_rate": 3.3118362775383346e-05,
      "loss": 0.9417,
      "step": 1361
    },
    {
      "epoch": 0.29435919602334126,
      "grad_norm": 0.9515878558158875,
      "learning_rate": 3.31077919385384e-05,
      "loss": 0.8063,
      "step": 1362
    },
    {
      "epoch": 0.29457531878106763,
      "grad_norm": 0.8533822298049927,
      "learning_rate": 3.309721467890571e-05,
      "loss": 0.9044,
      "step": 1363
    },
    {
      "epoch": 0.29479144153879405,
      "grad_norm": 0.8742635846138,
      "learning_rate": 3.308663100166809e-05,
      "loss": 0.9775,
      "step": 1364
    },
    {
      "epoch": 0.2950075642965204,
      "grad_norm": 0.8867061734199524,
      "learning_rate": 3.3076040912011544e-05,
      "loss": 0.8029,
      "step": 1365
    },
    {
      "epoch": 0.2952236870542468,
      "grad_norm": 0.9431737661361694,
      "learning_rate": 3.306544441512518e-05,
      "loss": 1.0518,
      "step": 1366
    },
    {
      "epoch": 0.2954398098119732,
      "grad_norm": 0.8994881510734558,
      "learning_rate": 3.3054841516201244e-05,
      "loss": 0.9312,
      "step": 1367
    },
    {
      "epoch": 0.2956559325696996,
      "grad_norm": 0.8521738648414612,
      "learning_rate": 3.304423222043515e-05,
      "loss": 1.0674,
      "step": 1368
    },
    {
      "epoch": 0.295872055327426,
      "grad_norm": 0.8029457330703735,
      "learning_rate": 3.3033616533025425e-05,
      "loss": 0.9699,
      "step": 1369
    },
    {
      "epoch": 0.29608817808515236,
      "grad_norm": 0.8967093229293823,
      "learning_rate": 3.3022994459173724e-05,
      "loss": 0.8124,
      "step": 1370
    },
    {
      "epoch": 0.29630430084287873,
      "grad_norm": 1.0194793939590454,
      "learning_rate": 3.301236600408484e-05,
      "loss": 0.8773,
      "step": 1371
    },
    {
      "epoch": 0.29652042360060515,
      "grad_norm": 0.8610829710960388,
      "learning_rate": 3.300173117296671e-05,
      "loss": 0.9437,
      "step": 1372
    },
    {
      "epoch": 0.2967365463583315,
      "grad_norm": 0.9379924535751343,
      "learning_rate": 3.2991089971030344e-05,
      "loss": 1.0667,
      "step": 1373
    },
    {
      "epoch": 0.29695266911605794,
      "grad_norm": 1.0194292068481445,
      "learning_rate": 3.2980442403489926e-05,
      "loss": 1.1446,
      "step": 1374
    },
    {
      "epoch": 0.2971687918737843,
      "grad_norm": 0.8629703521728516,
      "learning_rate": 3.296978847556274e-05,
      "loss": 0.8923,
      "step": 1375
    },
    {
      "epoch": 0.2973849146315107,
      "grad_norm": 0.817665159702301,
      "learning_rate": 3.295912819246918e-05,
      "loss": 0.7325,
      "step": 1376
    },
    {
      "epoch": 0.2976010373892371,
      "grad_norm": 0.9250444173812866,
      "learning_rate": 3.294846155943277e-05,
      "loss": 1.0255,
      "step": 1377
    },
    {
      "epoch": 0.29781716014696347,
      "grad_norm": 0.9583104848861694,
      "learning_rate": 3.293778858168012e-05,
      "loss": 1.0777,
      "step": 1378
    },
    {
      "epoch": 0.2980332829046899,
      "grad_norm": 0.8659541606903076,
      "learning_rate": 3.292710926444098e-05,
      "loss": 0.9546,
      "step": 1379
    },
    {
      "epoch": 0.29824940566241626,
      "grad_norm": 0.819677472114563,
      "learning_rate": 3.291642361294818e-05,
      "loss": 0.8741,
      "step": 1380
    },
    {
      "epoch": 0.2984655284201426,
      "grad_norm": 0.9010489583015442,
      "learning_rate": 3.290573163243766e-05,
      "loss": 0.9506,
      "step": 1381
    },
    {
      "epoch": 0.29868165117786905,
      "grad_norm": 0.8347435593605042,
      "learning_rate": 3.2895033328148485e-05,
      "loss": 0.7022,
      "step": 1382
    },
    {
      "epoch": 0.2988977739355954,
      "grad_norm": 1.0209494829177856,
      "learning_rate": 3.2884328705322786e-05,
      "loss": 1.0069,
      "step": 1383
    },
    {
      "epoch": 0.2991138966933218,
      "grad_norm": 0.7825828790664673,
      "learning_rate": 3.28736177692058e-05,
      "loss": 0.7867,
      "step": 1384
    },
    {
      "epoch": 0.2993300194510482,
      "grad_norm": 0.9509403109550476,
      "learning_rate": 3.2862900525045875e-05,
      "loss": 1.0487,
      "step": 1385
    },
    {
      "epoch": 0.29954614220877457,
      "grad_norm": 0.8550942540168762,
      "learning_rate": 3.285217697809443e-05,
      "loss": 0.8047,
      "step": 1386
    },
    {
      "epoch": 0.299762264966501,
      "grad_norm": 0.7809147834777832,
      "learning_rate": 3.2841447133605965e-05,
      "loss": 0.7594,
      "step": 1387
    },
    {
      "epoch": 0.29997838772422736,
      "grad_norm": 1.0176464319229126,
      "learning_rate": 3.2830710996838094e-05,
      "loss": 1.0076,
      "step": 1388
    },
    {
      "epoch": 0.3001945104819537,
      "grad_norm": 0.7915776371955872,
      "learning_rate": 3.2819968573051494e-05,
      "loss": 0.9234,
      "step": 1389
    },
    {
      "epoch": 0.30041063323968015,
      "grad_norm": 0.9604405760765076,
      "learning_rate": 3.280921986750993e-05,
      "loss": 1.0885,
      "step": 1390
    },
    {
      "epoch": 0.3006267559974065,
      "grad_norm": 0.8955967426300049,
      "learning_rate": 3.279846488548024e-05,
      "loss": 0.8431,
      "step": 1391
    },
    {
      "epoch": 0.30084287875513294,
      "grad_norm": 0.936397910118103,
      "learning_rate": 3.2787703632232344e-05,
      "loss": 0.8925,
      "step": 1392
    },
    {
      "epoch": 0.3010590015128593,
      "grad_norm": 0.7613109946250916,
      "learning_rate": 3.277693611303922e-05,
      "loss": 0.8787,
      "step": 1393
    },
    {
      "epoch": 0.30127512427058567,
      "grad_norm": 0.9893035292625427,
      "learning_rate": 3.2766162333176955e-05,
      "loss": 0.9203,
      "step": 1394
    },
    {
      "epoch": 0.3014912470283121,
      "grad_norm": 0.800090491771698,
      "learning_rate": 3.275538229792465e-05,
      "loss": 0.8575,
      "step": 1395
    },
    {
      "epoch": 0.30170736978603846,
      "grad_norm": 0.9871878623962402,
      "learning_rate": 3.27445960125645e-05,
      "loss": 0.8238,
      "step": 1396
    },
    {
      "epoch": 0.3019234925437649,
      "grad_norm": 0.9994919300079346,
      "learning_rate": 3.273380348238177e-05,
      "loss": 0.927,
      "step": 1397
    },
    {
      "epoch": 0.30213961530149125,
      "grad_norm": 0.9238467216491699,
      "learning_rate": 3.2723004712664766e-05,
      "loss": 0.8515,
      "step": 1398
    },
    {
      "epoch": 0.3023557380592176,
      "grad_norm": 0.9151886105537415,
      "learning_rate": 3.271219970870487e-05,
      "loss": 0.9554,
      "step": 1399
    },
    {
      "epoch": 0.30257186081694404,
      "grad_norm": 0.9553155899047852,
      "learning_rate": 3.27013884757965e-05,
      "loss": 1.0217,
      "step": 1400
    },
    {
      "epoch": 0.3027879835746704,
      "grad_norm": 0.8802904486656189,
      "learning_rate": 3.269057101923714e-05,
      "loss": 0.9886,
      "step": 1401
    },
    {
      "epoch": 0.3030041063323968,
      "grad_norm": 1.0054231882095337,
      "learning_rate": 3.267974734432731e-05,
      "loss": 1.1334,
      "step": 1402
    },
    {
      "epoch": 0.3032202290901232,
      "grad_norm": 0.8831580281257629,
      "learning_rate": 3.266891745637059e-05,
      "loss": 1.0838,
      "step": 1403
    },
    {
      "epoch": 0.30343635184784956,
      "grad_norm": 0.8652462959289551,
      "learning_rate": 3.2658081360673615e-05,
      "loss": 0.9097,
      "step": 1404
    },
    {
      "epoch": 0.303652474605576,
      "grad_norm": 0.8898692727088928,
      "learning_rate": 3.2647239062546026e-05,
      "loss": 0.9986,
      "step": 1405
    },
    {
      "epoch": 0.30386859736330235,
      "grad_norm": 0.8116910457611084,
      "learning_rate": 3.2636390567300544e-05,
      "loss": 0.883,
      "step": 1406
    },
    {
      "epoch": 0.3040847201210287,
      "grad_norm": 0.8537965416908264,
      "learning_rate": 3.262553588025288e-05,
      "loss": 1.0509,
      "step": 1407
    },
    {
      "epoch": 0.30430084287875514,
      "grad_norm": 0.9292021989822388,
      "learning_rate": 3.2614675006721826e-05,
      "loss": 0.96,
      "step": 1408
    },
    {
      "epoch": 0.3045169656364815,
      "grad_norm": 0.8599814176559448,
      "learning_rate": 3.2603807952029187e-05,
      "loss": 0.9549,
      "step": 1409
    },
    {
      "epoch": 0.30473308839420793,
      "grad_norm": 0.7698879837989807,
      "learning_rate": 3.2592934721499775e-05,
      "loss": 0.8736,
      "step": 1410
    },
    {
      "epoch": 0.3049492111519343,
      "grad_norm": 0.9960350394248962,
      "learning_rate": 3.258205532046147e-05,
      "loss": 0.8644,
      "step": 1411
    },
    {
      "epoch": 0.30516533390966066,
      "grad_norm": 0.8982427716255188,
      "learning_rate": 3.2571169754245136e-05,
      "loss": 1.0009,
      "step": 1412
    },
    {
      "epoch": 0.3053814566673871,
      "grad_norm": 0.9564204812049866,
      "learning_rate": 3.256027802818469e-05,
      "loss": 0.8045,
      "step": 1413
    },
    {
      "epoch": 0.30559757942511345,
      "grad_norm": 0.8544869422912598,
      "learning_rate": 3.254938014761704e-05,
      "loss": 0.8334,
      "step": 1414
    },
    {
      "epoch": 0.3058137021828399,
      "grad_norm": 1.0413247346878052,
      "learning_rate": 3.253847611788214e-05,
      "loss": 1.0471,
      "step": 1415
    },
    {
      "epoch": 0.30602982494056624,
      "grad_norm": 0.9799613356590271,
      "learning_rate": 3.252756594432291e-05,
      "loss": 1.1929,
      "step": 1416
    },
    {
      "epoch": 0.3062459476982926,
      "grad_norm": 0.8312803506851196,
      "learning_rate": 3.2516649632285335e-05,
      "loss": 0.9199,
      "step": 1417
    },
    {
      "epoch": 0.30646207045601903,
      "grad_norm": 1.087661862373352,
      "learning_rate": 3.250572718711837e-05,
      "loss": 1.019,
      "step": 1418
    },
    {
      "epoch": 0.3066781932137454,
      "grad_norm": 0.8894320726394653,
      "learning_rate": 3.2494798614174e-05,
      "loss": 0.8547,
      "step": 1419
    },
    {
      "epoch": 0.3068943159714718,
      "grad_norm": 1.049173355102539,
      "learning_rate": 3.2483863918807187e-05,
      "loss": 0.8446,
      "step": 1420
    },
    {
      "epoch": 0.3071104387291982,
      "grad_norm": 0.8932068347930908,
      "learning_rate": 3.2472923106375915e-05,
      "loss": 0.7526,
      "step": 1421
    },
    {
      "epoch": 0.30732656148692455,
      "grad_norm": 0.9055934548377991,
      "learning_rate": 3.246197618224115e-05,
      "loss": 1.0318,
      "step": 1422
    },
    {
      "epoch": 0.307542684244651,
      "grad_norm": 0.9189789295196533,
      "learning_rate": 3.245102315176687e-05,
      "loss": 0.9418,
      "step": 1423
    },
    {
      "epoch": 0.30775880700237734,
      "grad_norm": 0.8460312485694885,
      "learning_rate": 3.2440064020320025e-05,
      "loss": 0.8354,
      "step": 1424
    },
    {
      "epoch": 0.30797492976010377,
      "grad_norm": 0.9116464257240295,
      "learning_rate": 3.2429098793270565e-05,
      "loss": 0.8884,
      "step": 1425
    },
    {
      "epoch": 0.30819105251783013,
      "grad_norm": 0.9721053242683411,
      "learning_rate": 3.241812747599143e-05,
      "loss": 1.0997,
      "step": 1426
    },
    {
      "epoch": 0.3084071752755565,
      "grad_norm": 0.8986929655075073,
      "learning_rate": 3.2407150073858536e-05,
      "loss": 0.6901,
      "step": 1427
    },
    {
      "epoch": 0.3086232980332829,
      "grad_norm": 0.8689582347869873,
      "learning_rate": 3.239616659225079e-05,
      "loss": 0.9803,
      "step": 1428
    },
    {
      "epoch": 0.3088394207910093,
      "grad_norm": 0.8031696677207947,
      "learning_rate": 3.2385177036550075e-05,
      "loss": 1.196,
      "step": 1429
    },
    {
      "epoch": 0.30905554354873566,
      "grad_norm": 0.8193029761314392,
      "learning_rate": 3.2374181412141235e-05,
      "loss": 0.9293,
      "step": 1430
    },
    {
      "epoch": 0.3092716663064621,
      "grad_norm": 0.9366302490234375,
      "learning_rate": 3.2363179724412105e-05,
      "loss": 1.0836,
      "step": 1431
    },
    {
      "epoch": 0.30948778906418845,
      "grad_norm": 1.066731572151184,
      "learning_rate": 3.2352171978753495e-05,
      "loss": 0.9343,
      "step": 1432
    },
    {
      "epoch": 0.30970391182191487,
      "grad_norm": 0.9972415566444397,
      "learning_rate": 3.2341158180559174e-05,
      "loss": 1.0282,
      "step": 1433
    },
    {
      "epoch": 0.30992003457964123,
      "grad_norm": 1.0614042282104492,
      "learning_rate": 3.233013833522587e-05,
      "loss": 1.0944,
      "step": 1434
    },
    {
      "epoch": 0.3101361573373676,
      "grad_norm": 0.9234687089920044,
      "learning_rate": 3.2319112448153274e-05,
      "loss": 0.953,
      "step": 1435
    },
    {
      "epoch": 0.310352280095094,
      "grad_norm": 0.8281671404838562,
      "learning_rate": 3.230808052474407e-05,
      "loss": 0.9637,
      "step": 1436
    },
    {
      "epoch": 0.3105684028528204,
      "grad_norm": 1.0255532264709473,
      "learning_rate": 3.229704257040385e-05,
      "loss": 0.9303,
      "step": 1437
    },
    {
      "epoch": 0.3107845256105468,
      "grad_norm": 0.8764996528625488,
      "learning_rate": 3.228599859054121e-05,
      "loss": 1.0025,
      "step": 1438
    },
    {
      "epoch": 0.3110006483682732,
      "grad_norm": 1.0559879541397095,
      "learning_rate": 3.227494859056765e-05,
      "loss": 1.0939,
      "step": 1439
    },
    {
      "epoch": 0.31121677112599955,
      "grad_norm": 0.8221381306648254,
      "learning_rate": 3.2263892575897666e-05,
      "loss": 0.9899,
      "step": 1440
    },
    {
      "epoch": 0.31143289388372597,
      "grad_norm": 0.782566249370575,
      "learning_rate": 3.2252830551948663e-05,
      "loss": 0.9341,
      "step": 1441
    },
    {
      "epoch": 0.31164901664145234,
      "grad_norm": 0.9282163381576538,
      "learning_rate": 3.2241762524141016e-05,
      "loss": 0.923,
      "step": 1442
    },
    {
      "epoch": 0.31186513939917876,
      "grad_norm": 0.9249364733695984,
      "learning_rate": 3.223068849789803e-05,
      "loss": 0.8954,
      "step": 1443
    },
    {
      "epoch": 0.3120812621569051,
      "grad_norm": 0.9271489381790161,
      "learning_rate": 3.221960847864596e-05,
      "loss": 0.8951,
      "step": 1444
    },
    {
      "epoch": 0.3122973849146315,
      "grad_norm": 0.7943389415740967,
      "learning_rate": 3.220852247181397e-05,
      "loss": 0.8719,
      "step": 1445
    },
    {
      "epoch": 0.3125135076723579,
      "grad_norm": 0.8858696222305298,
      "learning_rate": 3.2197430482834204e-05,
      "loss": 0.9753,
      "step": 1446
    },
    {
      "epoch": 0.3127296304300843,
      "grad_norm": 0.9106383919715881,
      "learning_rate": 3.218633251714169e-05,
      "loss": 0.9076,
      "step": 1447
    },
    {
      "epoch": 0.31294575318781065,
      "grad_norm": 1.0993046760559082,
      "learning_rate": 3.217522858017442e-05,
      "loss": 0.8854,
      "step": 1448
    },
    {
      "epoch": 0.31316187594553707,
      "grad_norm": 0.9718704223632812,
      "learning_rate": 3.2164118677373287e-05,
      "loss": 1.0509,
      "step": 1449
    },
    {
      "epoch": 0.31337799870326344,
      "grad_norm": 0.9736214876174927,
      "learning_rate": 3.215300281418212e-05,
      "loss": 0.8624,
      "step": 1450
    },
    {
      "epoch": 0.31359412146098986,
      "grad_norm": 0.8958658576011658,
      "learning_rate": 3.214188099604766e-05,
      "loss": 0.9222,
      "step": 1451
    },
    {
      "epoch": 0.31381024421871623,
      "grad_norm": 0.9576481580734253,
      "learning_rate": 3.21307532284196e-05,
      "loss": 0.8332,
      "step": 1452
    },
    {
      "epoch": 0.3140263669764426,
      "grad_norm": 0.8226738572120667,
      "learning_rate": 3.211961951675048e-05,
      "loss": 0.7807,
      "step": 1453
    },
    {
      "epoch": 0.314242489734169,
      "grad_norm": 0.877682089805603,
      "learning_rate": 3.210847986649582e-05,
      "loss": 0.7873,
      "step": 1454
    },
    {
      "epoch": 0.3144586124918954,
      "grad_norm": 0.8779473304748535,
      "learning_rate": 3.209733428311403e-05,
      "loss": 0.8722,
      "step": 1455
    },
    {
      "epoch": 0.3146747352496218,
      "grad_norm": 0.9517311453819275,
      "learning_rate": 3.2086182772066386e-05,
      "loss": 0.9974,
      "step": 1456
    },
    {
      "epoch": 0.3148908580073482,
      "grad_norm": 0.8762261867523193,
      "learning_rate": 3.207502533881713e-05,
      "loss": 0.9426,
      "step": 1457
    },
    {
      "epoch": 0.31510698076507454,
      "grad_norm": 0.9212803244590759,
      "learning_rate": 3.206386198883338e-05,
      "loss": 0.9103,
      "step": 1458
    },
    {
      "epoch": 0.31532310352280096,
      "grad_norm": 0.85163813829422,
      "learning_rate": 3.205269272758513e-05,
      "loss": 0.9321,
      "step": 1459
    },
    {
      "epoch": 0.31553922628052733,
      "grad_norm": 0.9100742340087891,
      "learning_rate": 3.204151756054532e-05,
      "loss": 0.9176,
      "step": 1460
    },
    {
      "epoch": 0.31575534903825375,
      "grad_norm": 0.9264156818389893,
      "learning_rate": 3.203033649318973e-05,
      "loss": 0.9382,
      "step": 1461
    },
    {
      "epoch": 0.3159714717959801,
      "grad_norm": 0.8556106090545654,
      "learning_rate": 3.201914953099707e-05,
      "loss": 0.9871,
      "step": 1462
    },
    {
      "epoch": 0.3161875945537065,
      "grad_norm": 0.9584073424339294,
      "learning_rate": 3.200795667944892e-05,
      "loss": 1.0367,
      "step": 1463
    },
    {
      "epoch": 0.3164037173114329,
      "grad_norm": 0.78240966796875,
      "learning_rate": 3.199675794402976e-05,
      "loss": 0.7538,
      "step": 1464
    },
    {
      "epoch": 0.3166198400691593,
      "grad_norm": 0.881813645362854,
      "learning_rate": 3.198555333022694e-05,
      "loss": 1.0155,
      "step": 1465
    },
    {
      "epoch": 0.3168359628268857,
      "grad_norm": 1.0268313884735107,
      "learning_rate": 3.1974342843530694e-05,
      "loss": 1.0565,
      "step": 1466
    },
    {
      "epoch": 0.31705208558461206,
      "grad_norm": 0.8890941143035889,
      "learning_rate": 3.196312648943414e-05,
      "loss": 1.0004,
      "step": 1467
    },
    {
      "epoch": 0.31726820834233843,
      "grad_norm": 0.8016995191574097,
      "learning_rate": 3.195190427343326e-05,
      "loss": 0.7452,
      "step": 1468
    },
    {
      "epoch": 0.31748433110006485,
      "grad_norm": 0.8824421167373657,
      "learning_rate": 3.194067620102691e-05,
      "loss": 0.9135,
      "step": 1469
    },
    {
      "epoch": 0.3177004538577912,
      "grad_norm": 1.0067412853240967,
      "learning_rate": 3.192944227771682e-05,
      "loss": 0.9748,
      "step": 1470
    },
    {
      "epoch": 0.3179165766155176,
      "grad_norm": 0.9073596596717834,
      "learning_rate": 3.1918202509007605e-05,
      "loss": 1.0502,
      "step": 1471
    },
    {
      "epoch": 0.318132699373244,
      "grad_norm": 1.0185718536376953,
      "learning_rate": 3.19069569004067e-05,
      "loss": 0.8564,
      "step": 1472
    },
    {
      "epoch": 0.3183488221309704,
      "grad_norm": 0.9078257083892822,
      "learning_rate": 3.189570545742444e-05,
      "loss": 0.7839,
      "step": 1473
    },
    {
      "epoch": 0.3185649448886968,
      "grad_norm": 0.8796834945678711,
      "learning_rate": 3.1884448185574016e-05,
      "loss": 0.7607,
      "step": 1474
    },
    {
      "epoch": 0.31878106764642317,
      "grad_norm": 0.9579322934150696,
      "learning_rate": 3.187318509037145e-05,
      "loss": 1.1325,
      "step": 1475
    },
    {
      "epoch": 0.31899719040414953,
      "grad_norm": 0.834815502166748,
      "learning_rate": 3.186191617733562e-05,
      "loss": 0.8135,
      "step": 1476
    },
    {
      "epoch": 0.31921331316187596,
      "grad_norm": 0.9586957693099976,
      "learning_rate": 3.1850641451988295e-05,
      "loss": 0.9507,
      "step": 1477
    },
    {
      "epoch": 0.3194294359196023,
      "grad_norm": 0.824196994304657,
      "learning_rate": 3.1839360919854067e-05,
      "loss": 0.9114,
      "step": 1478
    },
    {
      "epoch": 0.31964555867732874,
      "grad_norm": 0.7384624481201172,
      "learning_rate": 3.182807458646034e-05,
      "loss": 0.6911,
      "step": 1479
    },
    {
      "epoch": 0.3198616814350551,
      "grad_norm": 0.9443362951278687,
      "learning_rate": 3.181678245733741e-05,
      "loss": 0.9006,
      "step": 1480
    },
    {
      "epoch": 0.3200778041927815,
      "grad_norm": 1.0642915964126587,
      "learning_rate": 3.1805484538018396e-05,
      "loss": 0.9546,
      "step": 1481
    },
    {
      "epoch": 0.3202939269505079,
      "grad_norm": 0.9522401690483093,
      "learning_rate": 3.1794180834039245e-05,
      "loss": 0.9805,
      "step": 1482
    },
    {
      "epoch": 0.32051004970823427,
      "grad_norm": 0.9213443398475647,
      "learning_rate": 3.178287135093875e-05,
      "loss": 1.0474,
      "step": 1483
    },
    {
      "epoch": 0.3207261724659607,
      "grad_norm": 0.8785810470581055,
      "learning_rate": 3.177155609425854e-05,
      "loss": 0.8559,
      "step": 1484
    },
    {
      "epoch": 0.32094229522368706,
      "grad_norm": 0.8620195984840393,
      "learning_rate": 3.176023506954304e-05,
      "loss": 0.8553,
      "step": 1485
    },
    {
      "epoch": 0.3211584179814134,
      "grad_norm": 0.9921575784683228,
      "learning_rate": 3.174890828233956e-05,
      "loss": 0.9996,
      "step": 1486
    },
    {
      "epoch": 0.32137454073913985,
      "grad_norm": 0.8634210228919983,
      "learning_rate": 3.173757573819817e-05,
      "loss": 0.8852,
      "step": 1487
    },
    {
      "epoch": 0.3215906634968662,
      "grad_norm": 1.0673092603683472,
      "learning_rate": 3.17262374426718e-05,
      "loss": 0.9562,
      "step": 1488
    },
    {
      "epoch": 0.32180678625459264,
      "grad_norm": 0.9692311882972717,
      "learning_rate": 3.171489340131619e-05,
      "loss": 0.8759,
      "step": 1489
    },
    {
      "epoch": 0.322022909012319,
      "grad_norm": 0.9291105270385742,
      "learning_rate": 3.17035436196899e-05,
      "loss": 0.8828,
      "step": 1490
    },
    {
      "epoch": 0.32223903177004537,
      "grad_norm": 0.9369811415672302,
      "learning_rate": 3.169218810335429e-05,
      "loss": 0.8377,
      "step": 1491
    },
    {
      "epoch": 0.3224551545277718,
      "grad_norm": 1.0407109260559082,
      "learning_rate": 3.1680826857873534e-05,
      "loss": 0.7897,
      "step": 1492
    },
    {
      "epoch": 0.32267127728549816,
      "grad_norm": 0.8820456266403198,
      "learning_rate": 3.166945988881462e-05,
      "loss": 0.9196,
      "step": 1493
    },
    {
      "epoch": 0.3228874000432245,
      "grad_norm": 0.9677581191062927,
      "learning_rate": 3.165808720174734e-05,
      "loss": 0.9512,
      "step": 1494
    },
    {
      "epoch": 0.32310352280095095,
      "grad_norm": 1.0615472793579102,
      "learning_rate": 3.164670880224428e-05,
      "loss": 0.9821,
      "step": 1495
    },
    {
      "epoch": 0.3233196455586773,
      "grad_norm": 0.908225953578949,
      "learning_rate": 3.163532469588084e-05,
      "loss": 0.9871,
      "step": 1496
    },
    {
      "epoch": 0.32353576831640374,
      "grad_norm": 0.9291085004806519,
      "learning_rate": 3.162393488823518e-05,
      "loss": 0.8227,
      "step": 1497
    },
    {
      "epoch": 0.3237518910741301,
      "grad_norm": 1.0241197347640991,
      "learning_rate": 3.1612539384888304e-05,
      "loss": 0.9324,
      "step": 1498
    },
    {
      "epoch": 0.32396801383185647,
      "grad_norm": 0.9944846630096436,
      "learning_rate": 3.1601138191423966e-05,
      "loss": 1.0057,
      "step": 1499
    },
    {
      "epoch": 0.3241841365895829,
      "grad_norm": 0.872500479221344,
      "learning_rate": 3.1589731313428745e-05,
      "loss": 1.0265,
      "step": 1500
    },
    {
      "epoch": 0.32440025934730926,
      "grad_norm": 0.9759976267814636,
      "learning_rate": 3.157831875649196e-05,
      "loss": 0.9748,
      "step": 1501
    },
    {
      "epoch": 0.3246163821050357,
      "grad_norm": 0.9262040257453918,
      "learning_rate": 3.156690052620575e-05,
      "loss": 0.8156,
      "step": 1502
    },
    {
      "epoch": 0.32483250486276205,
      "grad_norm": 0.9541052579879761,
      "learning_rate": 3.155547662816503e-05,
      "loss": 0.9731,
      "step": 1503
    },
    {
      "epoch": 0.3250486276204884,
      "grad_norm": 0.996863067150116,
      "learning_rate": 3.1544047067967465e-05,
      "loss": 0.9828,
      "step": 1504
    },
    {
      "epoch": 0.32526475037821484,
      "grad_norm": 0.9251271486282349,
      "learning_rate": 3.153261185121353e-05,
      "loss": 0.9075,
      "step": 1505
    },
    {
      "epoch": 0.3254808731359412,
      "grad_norm": 0.9439906477928162,
      "learning_rate": 3.152117098350644e-05,
      "loss": 0.9737,
      "step": 1506
    },
    {
      "epoch": 0.32569699589366763,
      "grad_norm": 0.9262637495994568,
      "learning_rate": 3.15097244704522e-05,
      "loss": 0.9972,
      "step": 1507
    },
    {
      "epoch": 0.325913118651394,
      "grad_norm": 0.8676000237464905,
      "learning_rate": 3.149827231765959e-05,
      "loss": 1.0146,
      "step": 1508
    },
    {
      "epoch": 0.32612924140912036,
      "grad_norm": 0.894512414932251,
      "learning_rate": 3.1486814530740114e-05,
      "loss": 1.0719,
      "step": 1509
    },
    {
      "epoch": 0.3263453641668468,
      "grad_norm": 0.9048900008201599,
      "learning_rate": 3.147535111530807e-05,
      "loss": 0.7671,
      "step": 1510
    },
    {
      "epoch": 0.32656148692457315,
      "grad_norm": 0.9422193169593811,
      "learning_rate": 3.1463882076980515e-05,
      "loss": 1.0002,
      "step": 1511
    },
    {
      "epoch": 0.3267776096822995,
      "grad_norm": 0.9564182162284851,
      "learning_rate": 3.145240742137724e-05,
      "loss": 0.9639,
      "step": 1512
    },
    {
      "epoch": 0.32699373244002594,
      "grad_norm": 0.9609580039978027,
      "learning_rate": 3.1440927154120795e-05,
      "loss": 0.9638,
      "step": 1513
    },
    {
      "epoch": 0.3272098551977523,
      "grad_norm": 0.9527109265327454,
      "learning_rate": 3.1429441280836504e-05,
      "loss": 0.9075,
      "step": 1514
    },
    {
      "epoch": 0.32742597795547873,
      "grad_norm": 0.9161962866783142,
      "learning_rate": 3.14179498071524e-05,
      "loss": 0.8975,
      "step": 1515
    },
    {
      "epoch": 0.3276421007132051,
      "grad_norm": 0.9064362645149231,
      "learning_rate": 3.1406452738699284e-05,
      "loss": 0.9487,
      "step": 1516
    },
    {
      "epoch": 0.32785822347093146,
      "grad_norm": 0.9479205012321472,
      "learning_rate": 3.139495008111069e-05,
      "loss": 0.9245,
      "step": 1517
    },
    {
      "epoch": 0.3280743462286579,
      "grad_norm": 0.9338969588279724,
      "learning_rate": 3.138344184002291e-05,
      "loss": 0.8734,
      "step": 1518
    },
    {
      "epoch": 0.32829046898638425,
      "grad_norm": 0.9291052222251892,
      "learning_rate": 3.137192802107493e-05,
      "loss": 0.9992,
      "step": 1519
    },
    {
      "epoch": 0.3285065917441107,
      "grad_norm": 0.9882834553718567,
      "learning_rate": 3.136040862990852e-05,
      "loss": 1.01,
      "step": 1520
    },
    {
      "epoch": 0.32872271450183704,
      "grad_norm": 0.8983349800109863,
      "learning_rate": 3.134888367216814e-05,
      "loss": 0.9383,
      "step": 1521
    },
    {
      "epoch": 0.3289388372595634,
      "grad_norm": 1.0109102725982666,
      "learning_rate": 3.133735315350099e-05,
      "loss": 0.9699,
      "step": 1522
    },
    {
      "epoch": 0.32915496001728983,
      "grad_norm": 1.0953506231307983,
      "learning_rate": 3.1325817079557004e-05,
      "loss": 1.0418,
      "step": 1523
    },
    {
      "epoch": 0.3293710827750162,
      "grad_norm": 0.9270989894866943,
      "learning_rate": 3.1314275455988835e-05,
      "loss": 0.9192,
      "step": 1524
    },
    {
      "epoch": 0.3295872055327426,
      "grad_norm": 0.8695876598358154,
      "learning_rate": 3.130272828845184e-05,
      "loss": 0.8122,
      "step": 1525
    },
    {
      "epoch": 0.329803328290469,
      "grad_norm": 0.8579394817352295,
      "learning_rate": 3.1291175582604116e-05,
      "loss": 0.9604,
      "step": 1526
    },
    {
      "epoch": 0.33001945104819536,
      "grad_norm": 0.8832108378410339,
      "learning_rate": 3.127961734410646e-05,
      "loss": 0.9691,
      "step": 1527
    },
    {
      "epoch": 0.3302355738059218,
      "grad_norm": 0.8641235828399658,
      "learning_rate": 3.126805357862237e-05,
      "loss": 0.9122,
      "step": 1528
    },
    {
      "epoch": 0.33045169656364815,
      "grad_norm": 0.8991526961326599,
      "learning_rate": 3.125648429181809e-05,
      "loss": 0.8178,
      "step": 1529
    },
    {
      "epoch": 0.33066781932137457,
      "grad_norm": 1.029059648513794,
      "learning_rate": 3.1244909489362526e-05,
      "loss": 0.97,
      "step": 1530
    },
    {
      "epoch": 0.33088394207910093,
      "grad_norm": 0.9144630432128906,
      "learning_rate": 3.12333291769273e-05,
      "loss": 0.9817,
      "step": 1531
    },
    {
      "epoch": 0.3311000648368273,
      "grad_norm": 1.0621875524520874,
      "learning_rate": 3.1221743360186745e-05,
      "loss": 0.9882,
      "step": 1532
    },
    {
      "epoch": 0.3313161875945537,
      "grad_norm": 0.8354048728942871,
      "learning_rate": 3.121015204481788e-05,
      "loss": 0.8407,
      "step": 1533
    },
    {
      "epoch": 0.3315323103522801,
      "grad_norm": 0.8807803988456726,
      "learning_rate": 3.1198555236500435e-05,
      "loss": 0.814,
      "step": 1534
    },
    {
      "epoch": 0.33174843311000646,
      "grad_norm": 1.0710721015930176,
      "learning_rate": 3.118695294091681e-05,
      "loss": 0.9749,
      "step": 1535
    },
    {
      "epoch": 0.3319645558677329,
      "grad_norm": 0.9333633780479431,
      "learning_rate": 3.1175345163752105e-05,
      "loss": 0.8591,
      "step": 1536
    },
    {
      "epoch": 0.33218067862545925,
      "grad_norm": 0.986922562122345,
      "learning_rate": 3.11637319106941e-05,
      "loss": 1.0004,
      "step": 1537
    },
    {
      "epoch": 0.33239680138318567,
      "grad_norm": 0.7981417179107666,
      "learning_rate": 3.115211318743327e-05,
      "loss": 0.8796,
      "step": 1538
    },
    {
      "epoch": 0.33261292414091204,
      "grad_norm": 0.914089024066925,
      "learning_rate": 3.114048899966275e-05,
      "loss": 0.9013,
      "step": 1539
    },
    {
      "epoch": 0.3328290468986384,
      "grad_norm": 0.942493736743927,
      "learning_rate": 3.112885935307839e-05,
      "loss": 0.9443,
      "step": 1540
    },
    {
      "epoch": 0.3330451696563648,
      "grad_norm": 0.8391870260238647,
      "learning_rate": 3.111722425337866e-05,
      "loss": 0.9616,
      "step": 1541
    },
    {
      "epoch": 0.3332612924140912,
      "grad_norm": 0.8648127317428589,
      "learning_rate": 3.110558370626475e-05,
      "loss": 0.9745,
      "step": 1542
    },
    {
      "epoch": 0.3334774151718176,
      "grad_norm": 1.1335663795471191,
      "learning_rate": 3.109393771744049e-05,
      "loss": 0.9485,
      "step": 1543
    },
    {
      "epoch": 0.333693537929544,
      "grad_norm": 0.8218050003051758,
      "learning_rate": 3.10822862926124e-05,
      "loss": 0.87,
      "step": 1544
    },
    {
      "epoch": 0.33390966068727035,
      "grad_norm": 0.8941485285758972,
      "learning_rate": 3.1070629437489644e-05,
      "loss": 1.0029,
      "step": 1545
    },
    {
      "epoch": 0.33412578344499677,
      "grad_norm": 0.8925780057907104,
      "learning_rate": 3.1058967157784056e-05,
      "loss": 0.8107,
      "step": 1546
    },
    {
      "epoch": 0.33434190620272314,
      "grad_norm": 0.780466616153717,
      "learning_rate": 3.104729945921012e-05,
      "loss": 0.8016,
      "step": 1547
    },
    {
      "epoch": 0.33455802896044956,
      "grad_norm": 1.005927562713623,
      "learning_rate": 3.103562634748498e-05,
      "loss": 0.9431,
      "step": 1548
    },
    {
      "epoch": 0.3347741517181759,
      "grad_norm": 0.9308692216873169,
      "learning_rate": 3.102394782832846e-05,
      "loss": 0.8973,
      "step": 1549
    },
    {
      "epoch": 0.3349902744759023,
      "grad_norm": 0.9113888740539551,
      "learning_rate": 3.101226390746298e-05,
      "loss": 0.8785,
      "step": 1550
    },
    {
      "epoch": 0.3352063972336287,
      "grad_norm": 0.9140170216560364,
      "learning_rate": 3.1000574590613636e-05,
      "loss": 0.8812,
      "step": 1551
    },
    {
      "epoch": 0.3354225199913551,
      "grad_norm": 0.9670307636260986,
      "learning_rate": 3.098887988350818e-05,
      "loss": 0.7821,
      "step": 1552
    },
    {
      "epoch": 0.33563864274908145,
      "grad_norm": 0.9144891500473022,
      "learning_rate": 3.097717979187698e-05,
      "loss": 0.9738,
      "step": 1553
    },
    {
      "epoch": 0.3358547655068079,
      "grad_norm": 1.0251222848892212,
      "learning_rate": 3.096547432145306e-05,
      "loss": 0.8484,
      "step": 1554
    },
    {
      "epoch": 0.33607088826453424,
      "grad_norm": 0.9502887725830078,
      "learning_rate": 3.095376347797207e-05,
      "loss": 0.8715,
      "step": 1555
    },
    {
      "epoch": 0.33628701102226066,
      "grad_norm": 0.8589299321174622,
      "learning_rate": 3.0942047267172296e-05,
      "loss": 0.8837,
      "step": 1556
    },
    {
      "epoch": 0.33650313377998703,
      "grad_norm": 0.9323639273643494,
      "learning_rate": 3.093032569479466e-05,
      "loss": 0.8995,
      "step": 1557
    },
    {
      "epoch": 0.3367192565377134,
      "grad_norm": 1.05650794506073,
      "learning_rate": 3.091859876658269e-05,
      "loss": 0.9877,
      "step": 1558
    },
    {
      "epoch": 0.3369353792954398,
      "grad_norm": 1.1186269521713257,
      "learning_rate": 3.090686648828257e-05,
      "loss": 1.0473,
      "step": 1559
    },
    {
      "epoch": 0.3371515020531662,
      "grad_norm": 1.087912917137146,
      "learning_rate": 3.0895128865643086e-05,
      "loss": 1.1434,
      "step": 1560
    },
    {
      "epoch": 0.3373676248108926,
      "grad_norm": 0.8387062549591064,
      "learning_rate": 3.0883385904415644e-05,
      "loss": 0.9497,
      "step": 1561
    },
    {
      "epoch": 0.337583747568619,
      "grad_norm": 0.9030735492706299,
      "learning_rate": 3.087163761035427e-05,
      "loss": 0.9586,
      "step": 1562
    },
    {
      "epoch": 0.33779987032634534,
      "grad_norm": 0.8736393451690674,
      "learning_rate": 3.085988398921559e-05,
      "loss": 0.9666,
      "step": 1563
    },
    {
      "epoch": 0.33801599308407176,
      "grad_norm": 0.9995821118354797,
      "learning_rate": 3.0848125046758863e-05,
      "loss": 0.9769,
      "step": 1564
    },
    {
      "epoch": 0.33823211584179813,
      "grad_norm": 1.046533465385437,
      "learning_rate": 3.0836360788745946e-05,
      "loss": 0.9898,
      "step": 1565
    },
    {
      "epoch": 0.33844823859952455,
      "grad_norm": 0.9657412767410278,
      "learning_rate": 3.082459122094129e-05,
      "loss": 1.049,
      "step": 1566
    },
    {
      "epoch": 0.3386643613572509,
      "grad_norm": 0.9767979383468628,
      "learning_rate": 3.0812816349111956e-05,
      "loss": 0.9881,
      "step": 1567
    },
    {
      "epoch": 0.3388804841149773,
      "grad_norm": 0.9699817895889282,
      "learning_rate": 3.080103617902761e-05,
      "loss": 1.1529,
      "step": 1568
    },
    {
      "epoch": 0.3390966068727037,
      "grad_norm": 0.9518383145332336,
      "learning_rate": 3.0789250716460504e-05,
      "loss": 1.0584,
      "step": 1569
    },
    {
      "epoch": 0.3393127296304301,
      "grad_norm": 0.9244017601013184,
      "learning_rate": 3.077745996718548e-05,
      "loss": 0.8928,
      "step": 1570
    },
    {
      "epoch": 0.3395288523881565,
      "grad_norm": 1.14718759059906,
      "learning_rate": 3.0765663936979994e-05,
      "loss": 0.8307,
      "step": 1571
    },
    {
      "epoch": 0.33974497514588287,
      "grad_norm": 1.0088956356048584,
      "learning_rate": 3.0753862631624066e-05,
      "loss": 1.0454,
      "step": 1572
    },
    {
      "epoch": 0.33996109790360923,
      "grad_norm": 0.8901629447937012,
      "learning_rate": 3.0742056056900304e-05,
      "loss": 0.9993,
      "step": 1573
    },
    {
      "epoch": 0.34017722066133566,
      "grad_norm": 0.8994284868240356,
      "learning_rate": 3.073024421859391e-05,
      "loss": 0.8864,
      "step": 1574
    },
    {
      "epoch": 0.340393343419062,
      "grad_norm": 0.9857457876205444,
      "learning_rate": 3.071842712249265e-05,
      "loss": 0.9714,
      "step": 1575
    },
    {
      "epoch": 0.3406094661767884,
      "grad_norm": 1.186560034751892,
      "learning_rate": 3.070660477438688e-05,
      "loss": 1.0122,
      "step": 1576
    },
    {
      "epoch": 0.3408255889345148,
      "grad_norm": 0.9748552441596985,
      "learning_rate": 3.0694777180069515e-05,
      "loss": 1.0159,
      "step": 1577
    },
    {
      "epoch": 0.3410417116922412,
      "grad_norm": 1.0009037256240845,
      "learning_rate": 3.068294434533606e-05,
      "loss": 1.112,
      "step": 1578
    },
    {
      "epoch": 0.3412578344499676,
      "grad_norm": 0.8519662618637085,
      "learning_rate": 3.067110627598457e-05,
      "loss": 0.7989,
      "step": 1579
    },
    {
      "epoch": 0.34147395720769397,
      "grad_norm": 0.9448208808898926,
      "learning_rate": 3.065926297781567e-05,
      "loss": 0.7649,
      "step": 1580
    },
    {
      "epoch": 0.34169007996542033,
      "grad_norm": 0.8865207433700562,
      "learning_rate": 3.0647414456632554e-05,
      "loss": 1.0816,
      "step": 1581
    },
    {
      "epoch": 0.34190620272314676,
      "grad_norm": 1.0456334352493286,
      "learning_rate": 3.063556071824097e-05,
      "loss": 0.6623,
      "step": 1582
    },
    {
      "epoch": 0.3421223254808731,
      "grad_norm": 1.0350103378295898,
      "learning_rate": 3.0623701768449214e-05,
      "loss": 1.1929,
      "step": 1583
    },
    {
      "epoch": 0.34233844823859955,
      "grad_norm": 0.8997272849082947,
      "learning_rate": 3.061183761306816e-05,
      "loss": 0.9787,
      "step": 1584
    },
    {
      "epoch": 0.3425545709963259,
      "grad_norm": 1.0938043594360352,
      "learning_rate": 3.05999682579112e-05,
      "loss": 1.0633,
      "step": 1585
    },
    {
      "epoch": 0.3427706937540523,
      "grad_norm": 0.8825398087501526,
      "learning_rate": 3.05880937087943e-05,
      "loss": 0.7721,
      "step": 1586
    },
    {
      "epoch": 0.3429868165117787,
      "grad_norm": 0.8789626955986023,
      "learning_rate": 3.057621397153596e-05,
      "loss": 0.9942,
      "step": 1587
    },
    {
      "epoch": 0.34320293926950507,
      "grad_norm": 0.8417037725448608,
      "learning_rate": 3.056432905195721e-05,
      "loss": 0.977,
      "step": 1588
    },
    {
      "epoch": 0.3434190620272315,
      "grad_norm": 0.8185150623321533,
      "learning_rate": 3.055243895588167e-05,
      "loss": 0.7804,
      "step": 1589
    },
    {
      "epoch": 0.34363518478495786,
      "grad_norm": 0.8963676691055298,
      "learning_rate": 3.054054368913541e-05,
      "loss": 0.9526,
      "step": 1590
    },
    {
      "epoch": 0.3438513075426842,
      "grad_norm": 0.8924521803855896,
      "learning_rate": 3.052864325754712e-05,
      "loss": 0.9355,
      "step": 1591
    },
    {
      "epoch": 0.34406743030041065,
      "grad_norm": 0.8117523193359375,
      "learning_rate": 3.051673766694797e-05,
      "loss": 0.8333,
      "step": 1592
    },
    {
      "epoch": 0.344283553058137,
      "grad_norm": 0.8220072984695435,
      "learning_rate": 3.050482692317168e-05,
      "loss": 0.83,
      "step": 1593
    },
    {
      "epoch": 0.34449967581586344,
      "grad_norm": 0.9793494343757629,
      "learning_rate": 3.0492911032054472e-05,
      "loss": 1.0176,
      "step": 1594
    },
    {
      "epoch": 0.3447157985735898,
      "grad_norm": 0.8228978514671326,
      "learning_rate": 3.048098999943512e-05,
      "loss": 0.8449,
      "step": 1595
    },
    {
      "epoch": 0.34493192133131617,
      "grad_norm": 0.9178285598754883,
      "learning_rate": 3.0469063831154896e-05,
      "loss": 0.9921,
      "step": 1596
    },
    {
      "epoch": 0.3451480440890426,
      "grad_norm": 0.8306567668914795,
      "learning_rate": 3.0457132533057598e-05,
      "loss": 0.957,
      "step": 1597
    },
    {
      "epoch": 0.34536416684676896,
      "grad_norm": 0.9351313710212708,
      "learning_rate": 3.0445196110989533e-05,
      "loss": 1.1073,
      "step": 1598
    },
    {
      "epoch": 0.3455802896044953,
      "grad_norm": 0.8027867078781128,
      "learning_rate": 3.0433254570799514e-05,
      "loss": 1.0925,
      "step": 1599
    },
    {
      "epoch": 0.34579641236222175,
      "grad_norm": 0.9075708985328674,
      "learning_rate": 3.0421307918338873e-05,
      "loss": 0.9232,
      "step": 1600
    },
    {
      "epoch": 0.3460125351199481,
      "grad_norm": 0.994594156742096,
      "learning_rate": 3.0409356159461447e-05,
      "loss": 1.0689,
      "step": 1601
    },
    {
      "epoch": 0.34622865787767454,
      "grad_norm": 0.9905773997306824,
      "learning_rate": 3.0397399300023568e-05,
      "loss": 0.9695,
      "step": 1602
    },
    {
      "epoch": 0.3464447806354009,
      "grad_norm": 0.9328803420066833,
      "learning_rate": 3.038543734588406e-05,
      "loss": 0.8649,
      "step": 1603
    },
    {
      "epoch": 0.3466609033931273,
      "grad_norm": 1.0163012742996216,
      "learning_rate": 3.037347030290427e-05,
      "loss": 0.9543,
      "step": 1604
    },
    {
      "epoch": 0.3468770261508537,
      "grad_norm": 0.9779214859008789,
      "learning_rate": 3.0361498176948e-05,
      "loss": 0.8091,
      "step": 1605
    },
    {
      "epoch": 0.34709314890858006,
      "grad_norm": 0.8842347264289856,
      "learning_rate": 3.0349520973881582e-05,
      "loss": 0.849,
      "step": 1606
    },
    {
      "epoch": 0.3473092716663065,
      "grad_norm": 0.9716510772705078,
      "learning_rate": 3.0337538699573814e-05,
      "loss": 1.102,
      "step": 1607
    },
    {
      "epoch": 0.34752539442403285,
      "grad_norm": 0.9485663175582886,
      "learning_rate": 3.032555135989597e-05,
      "loss": 0.8349,
      "step": 1608
    },
    {
      "epoch": 0.3477415171817592,
      "grad_norm": 1.0341752767562866,
      "learning_rate": 3.0313558960721844e-05,
      "loss": 0.8645,
      "step": 1609
    },
    {
      "epoch": 0.34795763993948564,
      "grad_norm": 0.9334589242935181,
      "learning_rate": 3.0301561507927655e-05,
      "loss": 1.0647,
      "step": 1610
    },
    {
      "epoch": 0.348173762697212,
      "grad_norm": 0.9249916672706604,
      "learning_rate": 3.0289559007392148e-05,
      "loss": 0.8871,
      "step": 1611
    },
    {
      "epoch": 0.34838988545493843,
      "grad_norm": 0.8663226366043091,
      "learning_rate": 3.027755146499651e-05,
      "loss": 0.8528,
      "step": 1612
    },
    {
      "epoch": 0.3486060082126648,
      "grad_norm": 0.8412871956825256,
      "learning_rate": 3.0265538886624413e-05,
      "loss": 0.8306,
      "step": 1613
    },
    {
      "epoch": 0.34882213097039116,
      "grad_norm": 0.9474676847457886,
      "learning_rate": 3.0253521278161996e-05,
      "loss": 0.8494,
      "step": 1614
    },
    {
      "epoch": 0.3490382537281176,
      "grad_norm": 1.1441682577133179,
      "learning_rate": 3.0241498645497852e-05,
      "loss": 1.1878,
      "step": 1615
    },
    {
      "epoch": 0.34925437648584395,
      "grad_norm": 0.8670212626457214,
      "learning_rate": 3.0229470994523048e-05,
      "loss": 0.8506,
      "step": 1616
    },
    {
      "epoch": 0.3494704992435703,
      "grad_norm": 0.801177978515625,
      "learning_rate": 3.0217438331131102e-05,
      "loss": 0.7948,
      "step": 1617
    },
    {
      "epoch": 0.34968662200129674,
      "grad_norm": 1.0146235227584839,
      "learning_rate": 3.0205400661218e-05,
      "loss": 0.8659,
      "step": 1618
    },
    {
      "epoch": 0.3499027447590231,
      "grad_norm": 1.0296528339385986,
      "learning_rate": 3.0193357990682153e-05,
      "loss": 1.0093,
      "step": 1619
    },
    {
      "epoch": 0.35011886751674953,
      "grad_norm": 1.0370322465896606,
      "learning_rate": 3.0181310325424464e-05,
      "loss": 0.9417,
      "step": 1620
    },
    {
      "epoch": 0.3503349902744759,
      "grad_norm": 0.912803053855896,
      "learning_rate": 3.016925767134825e-05,
      "loss": 1.0228,
      "step": 1621
    },
    {
      "epoch": 0.35055111303220227,
      "grad_norm": 0.9205127358436584,
      "learning_rate": 3.015720003435928e-05,
      "loss": 0.8741,
      "step": 1622
    },
    {
      "epoch": 0.3507672357899287,
      "grad_norm": 0.8457703590393066,
      "learning_rate": 3.0145137420365774e-05,
      "loss": 0.9972,
      "step": 1623
    },
    {
      "epoch": 0.35098335854765506,
      "grad_norm": 0.9067233204841614,
      "learning_rate": 3.013306983527839e-05,
      "loss": 0.8588,
      "step": 1624
    },
    {
      "epoch": 0.3511994813053815,
      "grad_norm": 1.0689157247543335,
      "learning_rate": 3.012099728501021e-05,
      "loss": 0.9996,
      "step": 1625
    },
    {
      "epoch": 0.35141560406310784,
      "grad_norm": 0.752790629863739,
      "learning_rate": 3.010891977547675e-05,
      "loss": 0.9512,
      "step": 1626
    },
    {
      "epoch": 0.3516317268208342,
      "grad_norm": 0.905785322189331,
      "learning_rate": 3.009683731259598e-05,
      "loss": 1.0775,
      "step": 1627
    },
    {
      "epoch": 0.35184784957856063,
      "grad_norm": 1.0364049673080444,
      "learning_rate": 3.008474990228825e-05,
      "loss": 0.9045,
      "step": 1628
    },
    {
      "epoch": 0.352063972336287,
      "grad_norm": 0.9609606266021729,
      "learning_rate": 3.0072657550476402e-05,
      "loss": 0.896,
      "step": 1629
    },
    {
      "epoch": 0.3522800950940134,
      "grad_norm": 0.916305422782898,
      "learning_rate": 3.006056026308562e-05,
      "loss": 1.0727,
      "step": 1630
    },
    {
      "epoch": 0.3524962178517398,
      "grad_norm": 0.8478757739067078,
      "learning_rate": 3.0048458046043573e-05,
      "loss": 0.9927,
      "step": 1631
    },
    {
      "epoch": 0.35271234060946616,
      "grad_norm": 0.8042452335357666,
      "learning_rate": 3.0036350905280316e-05,
      "loss": 0.7312,
      "step": 1632
    },
    {
      "epoch": 0.3529284633671926,
      "grad_norm": 0.9166516065597534,
      "learning_rate": 3.002423884672831e-05,
      "loss": 0.8677,
      "step": 1633
    },
    {
      "epoch": 0.35314458612491895,
      "grad_norm": 0.9375431537628174,
      "learning_rate": 3.0012121876322443e-05,
      "loss": 0.8155,
      "step": 1634
    },
    {
      "epoch": 0.35336070888264537,
      "grad_norm": 1.0676640272140503,
      "learning_rate": 3.0000000000000004e-05,
      "loss": 0.894,
      "step": 1635
    },
    {
      "epoch": 0.35357683164037174,
      "grad_norm": 0.9676708579063416,
      "learning_rate": 2.9987873223700677e-05,
      "loss": 0.9801,
      "step": 1636
    },
    {
      "epoch": 0.3537929543980981,
      "grad_norm": 0.9622818231582642,
      "learning_rate": 2.9975741553366564e-05,
      "loss": 0.8829,
      "step": 1637
    },
    {
      "epoch": 0.3540090771558245,
      "grad_norm": 0.7891084551811218,
      "learning_rate": 2.9963604994942152e-05,
      "loss": 0.8298,
      "step": 1638
    },
    {
      "epoch": 0.3542251999135509,
      "grad_norm": 0.8852497339248657,
      "learning_rate": 2.9951463554374317e-05,
      "loss": 0.9008,
      "step": 1639
    },
    {
      "epoch": 0.35444132267127726,
      "grad_norm": 0.9709888696670532,
      "learning_rate": 2.9939317237612352e-05,
      "loss": 1.0987,
      "step": 1640
    },
    {
      "epoch": 0.3546574454290037,
      "grad_norm": 0.9508516788482666,
      "learning_rate": 2.9927166050607912e-05,
      "loss": 0.8624,
      "step": 1641
    },
    {
      "epoch": 0.35487356818673005,
      "grad_norm": 0.9188629984855652,
      "learning_rate": 2.991500999931506e-05,
      "loss": 1.0215,
      "step": 1642
    },
    {
      "epoch": 0.35508969094445647,
      "grad_norm": 0.900020182132721,
      "learning_rate": 2.990284908969023e-05,
      "loss": 0.8522,
      "step": 1643
    },
    {
      "epoch": 0.35530581370218284,
      "grad_norm": 0.8772697448730469,
      "learning_rate": 2.989068332769223e-05,
      "loss": 0.9635,
      "step": 1644
    },
    {
      "epoch": 0.3555219364599092,
      "grad_norm": 0.8928239941596985,
      "learning_rate": 2.9878512719282267e-05,
      "loss": 0.9649,
      "step": 1645
    },
    {
      "epoch": 0.3557380592176356,
      "grad_norm": 0.9713953137397766,
      "learning_rate": 2.9866337270423912e-05,
      "loss": 0.9589,
      "step": 1646
    },
    {
      "epoch": 0.355954181975362,
      "grad_norm": 0.9290111660957336,
      "learning_rate": 2.98541569870831e-05,
      "loss": 0.9241,
      "step": 1647
    },
    {
      "epoch": 0.3561703047330884,
      "grad_norm": 0.8772423267364502,
      "learning_rate": 2.9841971875228134e-05,
      "loss": 1.0103,
      "step": 1648
    },
    {
      "epoch": 0.3563864274908148,
      "grad_norm": 1.002431035041809,
      "learning_rate": 2.9829781940829718e-05,
      "loss": 0.9588,
      "step": 1649
    },
    {
      "epoch": 0.35660255024854115,
      "grad_norm": 0.8712643384933472,
      "learning_rate": 2.981758718986086e-05,
      "loss": 0.8737,
      "step": 1650
    },
    {
      "epoch": 0.3568186730062676,
      "grad_norm": 0.9159672260284424,
      "learning_rate": 2.980538762829698e-05,
      "loss": 1.0155,
      "step": 1651
    },
    {
      "epoch": 0.35703479576399394,
      "grad_norm": 0.9155294895172119,
      "learning_rate": 2.9793183262115824e-05,
      "loss": 0.8415,
      "step": 1652
    },
    {
      "epoch": 0.35725091852172036,
      "grad_norm": 1.0039186477661133,
      "learning_rate": 2.9780974097297516e-05,
      "loss": 0.9856,
      "step": 1653
    },
    {
      "epoch": 0.35746704127944673,
      "grad_norm": 0.903708815574646,
      "learning_rate": 2.976876013982451e-05,
      "loss": 0.9116,
      "step": 1654
    },
    {
      "epoch": 0.3576831640371731,
      "grad_norm": 0.9548552632331848,
      "learning_rate": 2.9756541395681613e-05,
      "loss": 1.2098,
      "step": 1655
    },
    {
      "epoch": 0.3578992867948995,
      "grad_norm": 0.912732720375061,
      "learning_rate": 2.974431787085599e-05,
      "loss": 0.9992,
      "step": 1656
    },
    {
      "epoch": 0.3581154095526259,
      "grad_norm": 0.8119702339172363,
      "learning_rate": 2.9732089571337126e-05,
      "loss": 0.9309,
      "step": 1657
    },
    {
      "epoch": 0.3583315323103523,
      "grad_norm": 0.9073593616485596,
      "learning_rate": 2.9719856503116886e-05,
      "loss": 1.0523,
      "step": 1658
    },
    {
      "epoch": 0.3585476550680787,
      "grad_norm": 0.8632818460464478,
      "learning_rate": 2.970761867218941e-05,
      "loss": 1.0282,
      "step": 1659
    },
    {
      "epoch": 0.35876377782580504,
      "grad_norm": 0.9574206471443176,
      "learning_rate": 2.9695376084551237e-05,
      "loss": 1.0417,
      "step": 1660
    },
    {
      "epoch": 0.35897990058353146,
      "grad_norm": 0.8399937748908997,
      "learning_rate": 2.9683128746201194e-05,
      "loss": 0.8353,
      "step": 1661
    },
    {
      "epoch": 0.35919602334125783,
      "grad_norm": 0.852632999420166,
      "learning_rate": 2.9670876663140443e-05,
      "loss": 0.9927,
      "step": 1662
    },
    {
      "epoch": 0.3594121460989842,
      "grad_norm": 0.9457361698150635,
      "learning_rate": 2.965861984137249e-05,
      "loss": 1.0071,
      "step": 1663
    },
    {
      "epoch": 0.3596282688567106,
      "grad_norm": 0.6972500681877136,
      "learning_rate": 2.9646358286903144e-05,
      "loss": 0.7935,
      "step": 1664
    },
    {
      "epoch": 0.359844391614437,
      "grad_norm": 0.9250654578208923,
      "learning_rate": 2.963409200574053e-05,
      "loss": 0.8254,
      "step": 1665
    },
    {
      "epoch": 0.3600605143721634,
      "grad_norm": 0.8242780566215515,
      "learning_rate": 2.9621821003895107e-05,
      "loss": 0.8502,
      "step": 1666
    },
    {
      "epoch": 0.3602766371298898,
      "grad_norm": 0.8958160877227783,
      "learning_rate": 2.960954528737964e-05,
      "loss": 0.9306,
      "step": 1667
    },
    {
      "epoch": 0.36049275988761614,
      "grad_norm": 0.9546581506729126,
      "learning_rate": 2.9597264862209183e-05,
      "loss": 0.8418,
      "step": 1668
    },
    {
      "epoch": 0.36070888264534257,
      "grad_norm": 1.0000452995300293,
      "learning_rate": 2.958497973440114e-05,
      "loss": 0.8989,
      "step": 1669
    },
    {
      "epoch": 0.36092500540306893,
      "grad_norm": 0.8637808561325073,
      "learning_rate": 2.9572689909975182e-05,
      "loss": 0.9931,
      "step": 1670
    },
    {
      "epoch": 0.36114112816079535,
      "grad_norm": 0.9332252740859985,
      "learning_rate": 2.9560395394953295e-05,
      "loss": 0.891,
      "step": 1671
    },
    {
      "epoch": 0.3613572509185217,
      "grad_norm": 0.8956640362739563,
      "learning_rate": 2.9548096195359765e-05,
      "loss": 0.8972,
      "step": 1672
    },
    {
      "epoch": 0.3615733736762481,
      "grad_norm": 0.8492765426635742,
      "learning_rate": 2.9535792317221178e-05,
      "loss": 0.9525,
      "step": 1673
    },
    {
      "epoch": 0.3617894964339745,
      "grad_norm": 0.8705052733421326,
      "learning_rate": 2.9523483766566392e-05,
      "loss": 0.9671,
      "step": 1674
    },
    {
      "epoch": 0.3620056191917009,
      "grad_norm": 0.9933458566665649,
      "learning_rate": 2.9511170549426577e-05,
      "loss": 1.1338,
      "step": 1675
    },
    {
      "epoch": 0.3622217419494273,
      "grad_norm": 0.8857420086860657,
      "learning_rate": 2.949885267183518e-05,
      "loss": 0.9703,
      "step": 1676
    },
    {
      "epoch": 0.36243786470715367,
      "grad_norm": 0.926338791847229,
      "learning_rate": 2.9486530139827926e-05,
      "loss": 1.1379,
      "step": 1677
    },
    {
      "epoch": 0.36265398746488003,
      "grad_norm": 0.9619150161743164,
      "learning_rate": 2.9474202959442848e-05,
      "loss": 0.9449,
      "step": 1678
    },
    {
      "epoch": 0.36287011022260646,
      "grad_norm": 0.882526695728302,
      "learning_rate": 2.9461871136720205e-05,
      "loss": 0.9521,
      "step": 1679
    },
    {
      "epoch": 0.3630862329803328,
      "grad_norm": 0.8300138711929321,
      "learning_rate": 2.9449534677702584e-05,
      "loss": 1.04,
      "step": 1680
    },
    {
      "epoch": 0.3633023557380592,
      "grad_norm": 0.9559792280197144,
      "learning_rate": 2.9437193588434816e-05,
      "loss": 1.0917,
      "step": 1681
    },
    {
      "epoch": 0.3635184784957856,
      "grad_norm": 0.9795278310775757,
      "learning_rate": 2.9424847874964003e-05,
      "loss": 0.8666,
      "step": 1682
    },
    {
      "epoch": 0.363734601253512,
      "grad_norm": 0.8716777563095093,
      "learning_rate": 2.941249754333952e-05,
      "loss": 0.8962,
      "step": 1683
    },
    {
      "epoch": 0.3639507240112384,
      "grad_norm": 0.867805540561676,
      "learning_rate": 2.9400142599613005e-05,
      "loss": 0.7833,
      "step": 1684
    },
    {
      "epoch": 0.36416684676896477,
      "grad_norm": 0.8331664800643921,
      "learning_rate": 2.9387783049838338e-05,
      "loss": 1.0517,
      "step": 1685
    },
    {
      "epoch": 0.36438296952669114,
      "grad_norm": 0.9284753203392029,
      "learning_rate": 2.9375418900071676e-05,
      "loss": 0.9471,
      "step": 1686
    },
    {
      "epoch": 0.36459909228441756,
      "grad_norm": 0.8487952351570129,
      "learning_rate": 2.9363050156371433e-05,
      "loss": 0.9644,
      "step": 1687
    },
    {
      "epoch": 0.3648152150421439,
      "grad_norm": 0.8749293684959412,
      "learning_rate": 2.9350676824798256e-05,
      "loss": 1.0773,
      "step": 1688
    },
    {
      "epoch": 0.36503133779987035,
      "grad_norm": 0.8109781742095947,
      "learning_rate": 2.9338298911415052e-05,
      "loss": 0.7734,
      "step": 1689
    },
    {
      "epoch": 0.3652474605575967,
      "grad_norm": 0.8879095315933228,
      "learning_rate": 2.932591642228696e-05,
      "loss": 0.7161,
      "step": 1690
    },
    {
      "epoch": 0.3654635833153231,
      "grad_norm": 0.9725330471992493,
      "learning_rate": 2.9313529363481386e-05,
      "loss": 1.0409,
      "step": 1691
    },
    {
      "epoch": 0.3656797060730495,
      "grad_norm": 1.1101995706558228,
      "learning_rate": 2.9301137741067958e-05,
      "loss": 1.1728,
      "step": 1692
    },
    {
      "epoch": 0.36589582883077587,
      "grad_norm": 0.8863847255706787,
      "learning_rate": 2.928874156111853e-05,
      "loss": 0.8595,
      "step": 1693
    },
    {
      "epoch": 0.3661119515885023,
      "grad_norm": 1.0283147096633911,
      "learning_rate": 2.9276340829707216e-05,
      "loss": 0.8235,
      "step": 1694
    },
    {
      "epoch": 0.36632807434622866,
      "grad_norm": 0.8551615476608276,
      "learning_rate": 2.9263935552910333e-05,
      "loss": 0.7459,
      "step": 1695
    },
    {
      "epoch": 0.366544197103955,
      "grad_norm": 0.8963771462440491,
      "learning_rate": 2.9251525736806454e-05,
      "loss": 0.989,
      "step": 1696
    },
    {
      "epoch": 0.36676031986168145,
      "grad_norm": 0.8559364080429077,
      "learning_rate": 2.9239111387476336e-05,
      "loss": 0.9415,
      "step": 1697
    },
    {
      "epoch": 0.3669764426194078,
      "grad_norm": 1.028477668762207,
      "learning_rate": 2.9226692511003007e-05,
      "loss": 0.9121,
      "step": 1698
    },
    {
      "epoch": 0.36719256537713424,
      "grad_norm": 0.8393043279647827,
      "learning_rate": 2.9214269113471672e-05,
      "loss": 0.9051,
      "step": 1699
    },
    {
      "epoch": 0.3674086881348606,
      "grad_norm": 0.959419310092926,
      "learning_rate": 2.920184120096977e-05,
      "loss": 1.0464,
      "step": 1700
    },
    {
      "epoch": 0.367624810892587,
      "grad_norm": 0.9398619532585144,
      "learning_rate": 2.9189408779586953e-05,
      "loss": 0.9417,
      "step": 1701
    },
    {
      "epoch": 0.3678409336503134,
      "grad_norm": 0.9295390248298645,
      "learning_rate": 2.9176971855415083e-05,
      "loss": 1.106,
      "step": 1702
    },
    {
      "epoch": 0.36805705640803976,
      "grad_norm": 1.0452978610992432,
      "learning_rate": 2.916453043454821e-05,
      "loss": 1.1577,
      "step": 1703
    },
    {
      "epoch": 0.36827317916576613,
      "grad_norm": 0.9328250885009766,
      "learning_rate": 2.9152084523082616e-05,
      "loss": 0.843,
      "step": 1704
    },
    {
      "epoch": 0.36848930192349255,
      "grad_norm": 0.9288171529769897,
      "learning_rate": 2.9139634127116756e-05,
      "loss": 0.9252,
      "step": 1705
    },
    {
      "epoch": 0.3687054246812189,
      "grad_norm": 0.9420575499534607,
      "learning_rate": 2.9127179252751305e-05,
      "loss": 1.1016,
      "step": 1706
    },
    {
      "epoch": 0.36892154743894534,
      "grad_norm": 0.8303332924842834,
      "learning_rate": 2.9114719906089117e-05,
      "loss": 0.8496,
      "step": 1707
    },
    {
      "epoch": 0.3691376701966717,
      "grad_norm": 0.9922606945037842,
      "learning_rate": 2.9102256093235245e-05,
      "loss": 0.9668,
      "step": 1708
    },
    {
      "epoch": 0.3693537929543981,
      "grad_norm": 0.8178890347480774,
      "learning_rate": 2.908978782029693e-05,
      "loss": 0.8172,
      "step": 1709
    },
    {
      "epoch": 0.3695699157121245,
      "grad_norm": 0.8483377695083618,
      "learning_rate": 2.9077315093383595e-05,
      "loss": 0.8602,
      "step": 1710
    },
    {
      "epoch": 0.36978603846985086,
      "grad_norm": 0.7788403630256653,
      "learning_rate": 2.906483791860685e-05,
      "loss": 0.8256,
      "step": 1711
    },
    {
      "epoch": 0.3700021612275773,
      "grad_norm": 0.8779165744781494,
      "learning_rate": 2.905235630208048e-05,
      "loss": 0.8665,
      "step": 1712
    },
    {
      "epoch": 0.37021828398530365,
      "grad_norm": 0.8570807576179504,
      "learning_rate": 2.9039870249920447e-05,
      "loss": 1.023,
      "step": 1713
    },
    {
      "epoch": 0.37043440674303,
      "grad_norm": 0.8159751296043396,
      "learning_rate": 2.902737976824489e-05,
      "loss": 0.8657,
      "step": 1714
    },
    {
      "epoch": 0.37065052950075644,
      "grad_norm": 0.9290934801101685,
      "learning_rate": 2.901488486317411e-05,
      "loss": 0.8364,
      "step": 1715
    },
    {
      "epoch": 0.3708666522584828,
      "grad_norm": 0.9076923727989197,
      "learning_rate": 2.9002385540830585e-05,
      "loss": 0.9955,
      "step": 1716
    },
    {
      "epoch": 0.37108277501620923,
      "grad_norm": 0.8310691118240356,
      "learning_rate": 2.8989881807338964e-05,
      "loss": 0.7522,
      "step": 1717
    },
    {
      "epoch": 0.3712988977739356,
      "grad_norm": 1.0004169940948486,
      "learning_rate": 2.8977373668826035e-05,
      "loss": 1.0444,
      "step": 1718
    },
    {
      "epoch": 0.37151502053166197,
      "grad_norm": 0.8425519466400146,
      "learning_rate": 2.8964861131420758e-05,
      "loss": 0.8879,
      "step": 1719
    },
    {
      "epoch": 0.3717311432893884,
      "grad_norm": 0.9142181873321533,
      "learning_rate": 2.8952344201254253e-05,
      "loss": 0.9851,
      "step": 1720
    },
    {
      "epoch": 0.37194726604711476,
      "grad_norm": 0.9255647659301758,
      "learning_rate": 2.8939822884459786e-05,
      "loss": 0.8724,
      "step": 1721
    },
    {
      "epoch": 0.3721633888048411,
      "grad_norm": 0.8818854689598083,
      "learning_rate": 2.8927297187172772e-05,
      "loss": 0.9729,
      "step": 1722
    },
    {
      "epoch": 0.37237951156256754,
      "grad_norm": 0.768568217754364,
      "learning_rate": 2.891476711553077e-05,
      "loss": 0.8083,
      "step": 1723
    },
    {
      "epoch": 0.3725956343202939,
      "grad_norm": 0.8540796041488647,
      "learning_rate": 2.8902232675673486e-05,
      "loss": 0.9899,
      "step": 1724
    },
    {
      "epoch": 0.37281175707802033,
      "grad_norm": 0.9795287251472473,
      "learning_rate": 2.8889693873742783e-05,
      "loss": 0.9475,
      "step": 1725
    },
    {
      "epoch": 0.3730278798357467,
      "grad_norm": 0.7936578989028931,
      "learning_rate": 2.8877150715882613e-05,
      "loss": 0.766,
      "step": 1726
    },
    {
      "epoch": 0.37324400259347307,
      "grad_norm": 0.8888428211212158,
      "learning_rate": 2.886460320823913e-05,
      "loss": 1.121,
      "step": 1727
    },
    {
      "epoch": 0.3734601253511995,
      "grad_norm": 0.8631256222724915,
      "learning_rate": 2.8852051356960555e-05,
      "loss": 0.827,
      "step": 1728
    },
    {
      "epoch": 0.37367624810892586,
      "grad_norm": 1.0968459844589233,
      "learning_rate": 2.8839495168197288e-05,
      "loss": 0.8127,
      "step": 1729
    },
    {
      "epoch": 0.3738923708666523,
      "grad_norm": 0.9336002469062805,
      "learning_rate": 2.8826934648101815e-05,
      "loss": 0.8939,
      "step": 1730
    },
    {
      "epoch": 0.37410849362437865,
      "grad_norm": 0.8790167570114136,
      "learning_rate": 2.8814369802828773e-05,
      "loss": 0.8584,
      "step": 1731
    },
    {
      "epoch": 0.374324616382105,
      "grad_norm": 0.8758849501609802,
      "learning_rate": 2.8801800638534906e-05,
      "loss": 0.8605,
      "step": 1732
    },
    {
      "epoch": 0.37454073913983144,
      "grad_norm": 0.7395503520965576,
      "learning_rate": 2.8789227161379068e-05,
      "loss": 0.8336,
      "step": 1733
    },
    {
      "epoch": 0.3747568618975578,
      "grad_norm": 0.9526098966598511,
      "learning_rate": 2.8776649377522245e-05,
      "loss": 1.0668,
      "step": 1734
    },
    {
      "epoch": 0.3749729846552842,
      "grad_norm": 0.8846078515052795,
      "learning_rate": 2.8764067293127506e-05,
      "loss": 0.8983,
      "step": 1735
    },
    {
      "epoch": 0.3751891074130106,
      "grad_norm": 0.9965586066246033,
      "learning_rate": 2.875148091436006e-05,
      "loss": 0.9229,
      "step": 1736
    },
    {
      "epoch": 0.37540523017073696,
      "grad_norm": 0.8903135061264038,
      "learning_rate": 2.873889024738719e-05,
      "loss": 1.0622,
      "step": 1737
    },
    {
      "epoch": 0.3756213529284634,
      "grad_norm": 0.9423289895057678,
      "learning_rate": 2.872629529837831e-05,
      "loss": 0.9667,
      "step": 1738
    },
    {
      "epoch": 0.37583747568618975,
      "grad_norm": 0.8821099400520325,
      "learning_rate": 2.8713696073504897e-05,
      "loss": 0.8798,
      "step": 1739
    },
    {
      "epoch": 0.37605359844391617,
      "grad_norm": 0.8812253475189209,
      "learning_rate": 2.870109257894056e-05,
      "loss": 0.8199,
      "step": 1740
    },
    {
      "epoch": 0.37626972120164254,
      "grad_norm": 0.8192459344863892,
      "learning_rate": 2.8688484820860965e-05,
      "loss": 0.8676,
      "step": 1741
    },
    {
      "epoch": 0.3764858439593689,
      "grad_norm": 0.8847028613090515,
      "learning_rate": 2.8675872805443895e-05,
      "loss": 1.0504,
      "step": 1742
    },
    {
      "epoch": 0.3767019667170953,
      "grad_norm": 1.005643606185913,
      "learning_rate": 2.8663256538869212e-05,
      "loss": 1.2306,
      "step": 1743
    },
    {
      "epoch": 0.3769180894748217,
      "grad_norm": 0.779718279838562,
      "learning_rate": 2.8650636027318844e-05,
      "loss": 0.7796,
      "step": 1744
    },
    {
      "epoch": 0.37713421223254806,
      "grad_norm": 1.0670115947723389,
      "learning_rate": 2.8638011276976825e-05,
      "loss": 0.8797,
      "step": 1745
    },
    {
      "epoch": 0.3773503349902745,
      "grad_norm": 0.8027476668357849,
      "learning_rate": 2.8625382294029242e-05,
      "loss": 0.7303,
      "step": 1746
    },
    {
      "epoch": 0.37756645774800085,
      "grad_norm": 0.8964573740959167,
      "learning_rate": 2.8612749084664282e-05,
      "loss": 0.8716,
      "step": 1747
    },
    {
      "epoch": 0.3777825805057273,
      "grad_norm": 0.9428089261054993,
      "learning_rate": 2.8600111655072172e-05,
      "loss": 1.0786,
      "step": 1748
    },
    {
      "epoch": 0.37799870326345364,
      "grad_norm": 0.8974640965461731,
      "learning_rate": 2.858747001144524e-05,
      "loss": 0.9482,
      "step": 1749
    },
    {
      "epoch": 0.37821482602118,
      "grad_norm": 0.9904823303222656,
      "learning_rate": 2.857482415997785e-05,
      "loss": 0.9166,
      "step": 1750
    },
    {
      "epoch": 0.37843094877890643,
      "grad_norm": 0.9504700899124146,
      "learning_rate": 2.856217410686644e-05,
      "loss": 0.9677,
      "step": 1751
    },
    {
      "epoch": 0.3786470715366328,
      "grad_norm": 0.9497498273849487,
      "learning_rate": 2.8549519858309514e-05,
      "loss": 0.9889,
      "step": 1752
    },
    {
      "epoch": 0.3788631942943592,
      "grad_norm": 0.9732851982116699,
      "learning_rate": 2.853686142050762e-05,
      "loss": 0.9731,
      "step": 1753
    },
    {
      "epoch": 0.3790793170520856,
      "grad_norm": 0.7974346876144409,
      "learning_rate": 2.8524198799663367e-05,
      "loss": 0.7792,
      "step": 1754
    },
    {
      "epoch": 0.37929543980981195,
      "grad_norm": 0.7844465374946594,
      "learning_rate": 2.851153200198141e-05,
      "loss": 0.8316,
      "step": 1755
    },
    {
      "epoch": 0.3795115625675384,
      "grad_norm": 0.8197035789489746,
      "learning_rate": 2.8498861033668444e-05,
      "loss": 0.718,
      "step": 1756
    },
    {
      "epoch": 0.37972768532526474,
      "grad_norm": 1.013469934463501,
      "learning_rate": 2.8486185900933212e-05,
      "loss": 0.962,
      "step": 1757
    },
    {
      "epoch": 0.37994380808299116,
      "grad_norm": 0.7966868877410889,
      "learning_rate": 2.8473506609986515e-05,
      "loss": 0.8272,
      "step": 1758
    },
    {
      "epoch": 0.38015993084071753,
      "grad_norm": 0.9745547771453857,
      "learning_rate": 2.8460823167041162e-05,
      "loss": 1.0775,
      "step": 1759
    },
    {
      "epoch": 0.3803760535984439,
      "grad_norm": 1.1553611755371094,
      "learning_rate": 2.8448135578312018e-05,
      "loss": 0.9355,
      "step": 1760
    },
    {
      "epoch": 0.3805921763561703,
      "grad_norm": 0.9141063094139099,
      "learning_rate": 2.8435443850015967e-05,
      "loss": 0.8721,
      "step": 1761
    },
    {
      "epoch": 0.3808082991138967,
      "grad_norm": 0.8130009770393372,
      "learning_rate": 2.842274798837193e-05,
      "loss": 0.9071,
      "step": 1762
    },
    {
      "epoch": 0.3810244218716231,
      "grad_norm": 0.8336833119392395,
      "learning_rate": 2.8410047999600853e-05,
      "loss": 0.9596,
      "step": 1763
    },
    {
      "epoch": 0.3812405446293495,
      "grad_norm": 1.0067347288131714,
      "learning_rate": 2.8397343889925694e-05,
      "loss": 1.0109,
      "step": 1764
    },
    {
      "epoch": 0.38145666738707584,
      "grad_norm": 0.9170424342155457,
      "learning_rate": 2.8384635665571444e-05,
      "loss": 0.9603,
      "step": 1765
    },
    {
      "epoch": 0.38167279014480227,
      "grad_norm": 1.063937783241272,
      "learning_rate": 2.8371923332765097e-05,
      "loss": 1.0738,
      "step": 1766
    },
    {
      "epoch": 0.38188891290252863,
      "grad_norm": 0.9675772786140442,
      "learning_rate": 2.8359206897735673e-05,
      "loss": 0.9906,
      "step": 1767
    },
    {
      "epoch": 0.382105035660255,
      "grad_norm": 0.8671706318855286,
      "learning_rate": 2.8346486366714185e-05,
      "loss": 0.971,
      "step": 1768
    },
    {
      "epoch": 0.3823211584179814,
      "grad_norm": 0.8462857007980347,
      "learning_rate": 2.833376174593368e-05,
      "loss": 0.9277,
      "step": 1769
    },
    {
      "epoch": 0.3825372811757078,
      "grad_norm": 0.8690409660339355,
      "learning_rate": 2.832103304162918e-05,
      "loss": 0.868,
      "step": 1770
    },
    {
      "epoch": 0.3827534039334342,
      "grad_norm": 0.8862578868865967,
      "learning_rate": 2.8308300260037734e-05,
      "loss": 0.7762,
      "step": 1771
    },
    {
      "epoch": 0.3829695266911606,
      "grad_norm": 0.9211979508399963,
      "learning_rate": 2.8295563407398358e-05,
      "loss": 0.8859,
      "step": 1772
    },
    {
      "epoch": 0.38318564944888694,
      "grad_norm": 0.8833542466163635,
      "learning_rate": 2.8282822489952093e-05,
      "loss": 0.8993,
      "step": 1773
    },
    {
      "epoch": 0.38340177220661337,
      "grad_norm": 0.9462282657623291,
      "learning_rate": 2.8270077513941953e-05,
      "loss": 0.9645,
      "step": 1774
    },
    {
      "epoch": 0.38361789496433973,
      "grad_norm": 0.8705824017524719,
      "learning_rate": 2.8257328485612948e-05,
      "loss": 0.8253,
      "step": 1775
    },
    {
      "epoch": 0.38383401772206616,
      "grad_norm": 0.8962376713752747,
      "learning_rate": 2.8244575411212075e-05,
      "loss": 0.8689,
      "step": 1776
    },
    {
      "epoch": 0.3840501404797925,
      "grad_norm": 0.9097771644592285,
      "learning_rate": 2.8231818296988304e-05,
      "loss": 0.8516,
      "step": 1777
    },
    {
      "epoch": 0.3842662632375189,
      "grad_norm": 0.8236470222473145,
      "learning_rate": 2.82190571491926e-05,
      "loss": 0.8598,
      "step": 1778
    },
    {
      "epoch": 0.3844823859952453,
      "grad_norm": 0.9493615031242371,
      "learning_rate": 2.8206291974077894e-05,
      "loss": 0.976,
      "step": 1779
    },
    {
      "epoch": 0.3846985087529717,
      "grad_norm": 0.9372259378433228,
      "learning_rate": 2.819352277789909e-05,
      "loss": 0.9265,
      "step": 1780
    },
    {
      "epoch": 0.3849146315106981,
      "grad_norm": 0.9209122061729431,
      "learning_rate": 2.8180749566913067e-05,
      "loss": 0.8781,
      "step": 1781
    },
    {
      "epoch": 0.38513075426842447,
      "grad_norm": 1.0795836448669434,
      "learning_rate": 2.8167972347378666e-05,
      "loss": 1.0435,
      "step": 1782
    },
    {
      "epoch": 0.38534687702615084,
      "grad_norm": 0.9621537923812866,
      "learning_rate": 2.8155191125556696e-05,
      "loss": 0.9987,
      "step": 1783
    },
    {
      "epoch": 0.38556299978387726,
      "grad_norm": 0.9384992122650146,
      "learning_rate": 2.8142405907709926e-05,
      "loss": 0.9227,
      "step": 1784
    },
    {
      "epoch": 0.3857791225416036,
      "grad_norm": 0.93331378698349,
      "learning_rate": 2.8129616700103088e-05,
      "loss": 1.0711,
      "step": 1785
    },
    {
      "epoch": 0.38599524529933,
      "grad_norm": 0.9580069780349731,
      "learning_rate": 2.811682350900285e-05,
      "loss": 0.9268,
      "step": 1786
    },
    {
      "epoch": 0.3862113680570564,
      "grad_norm": 0.9399328827857971,
      "learning_rate": 2.810402634067787e-05,
      "loss": 1.1607,
      "step": 1787
    },
    {
      "epoch": 0.3864274908147828,
      "grad_norm": 1.0003597736358643,
      "learning_rate": 2.8091225201398703e-05,
      "loss": 1.0215,
      "step": 1788
    },
    {
      "epoch": 0.3866436135725092,
      "grad_norm": 1.009648084640503,
      "learning_rate": 2.80784200974379e-05,
      "loss": 1.0312,
      "step": 1789
    },
    {
      "epoch": 0.38685973633023557,
      "grad_norm": 0.9388926029205322,
      "learning_rate": 2.8065611035069923e-05,
      "loss": 0.9376,
      "step": 1790
    },
    {
      "epoch": 0.38707585908796194,
      "grad_norm": 0.7759811282157898,
      "learning_rate": 2.8052798020571182e-05,
      "loss": 0.914,
      "step": 1791
    },
    {
      "epoch": 0.38729198184568836,
      "grad_norm": 0.976290762424469,
      "learning_rate": 2.8039981060220028e-05,
      "loss": 0.759,
      "step": 1792
    },
    {
      "epoch": 0.3875081046034147,
      "grad_norm": 0.8384339213371277,
      "learning_rate": 2.802716016029674e-05,
      "loss": 0.8724,
      "step": 1793
    },
    {
      "epoch": 0.38772422736114115,
      "grad_norm": 0.9807355999946594,
      "learning_rate": 2.801433532708353e-05,
      "loss": 0.8927,
      "step": 1794
    },
    {
      "epoch": 0.3879403501188675,
      "grad_norm": 0.9871201515197754,
      "learning_rate": 2.8001506566864534e-05,
      "loss": 0.9795,
      "step": 1795
    },
    {
      "epoch": 0.3881564728765939,
      "grad_norm": 1.03062105178833,
      "learning_rate": 2.7988673885925818e-05,
      "loss": 0.7453,
      "step": 1796
    },
    {
      "epoch": 0.3883725956343203,
      "grad_norm": 0.9937511086463928,
      "learning_rate": 2.7975837290555366e-05,
      "loss": 0.8696,
      "step": 1797
    },
    {
      "epoch": 0.3885887183920467,
      "grad_norm": 0.8401237726211548,
      "learning_rate": 2.796299678704308e-05,
      "loss": 0.941,
      "step": 1798
    },
    {
      "epoch": 0.3888048411497731,
      "grad_norm": 0.9794442057609558,
      "learning_rate": 2.7950152381680772e-05,
      "loss": 0.8264,
      "step": 1799
    },
    {
      "epoch": 0.38902096390749946,
      "grad_norm": 0.8898727297782898,
      "learning_rate": 2.7937304080762174e-05,
      "loss": 0.7814,
      "step": 1800
    },
    {
      "epoch": 0.38923708666522583,
      "grad_norm": 0.9887492656707764,
      "learning_rate": 2.7924451890582936e-05,
      "loss": 0.6829,
      "step": 1801
    },
    {
      "epoch": 0.38945320942295225,
      "grad_norm": 0.8383352756500244,
      "learning_rate": 2.791159581744058e-05,
      "loss": 0.8101,
      "step": 1802
    },
    {
      "epoch": 0.3896693321806786,
      "grad_norm": 0.9079082608222961,
      "learning_rate": 2.7898735867634567e-05,
      "loss": 0.9837,
      "step": 1803
    },
    {
      "epoch": 0.38988545493840504,
      "grad_norm": 0.8341466188430786,
      "learning_rate": 2.7885872047466236e-05,
      "loss": 0.8203,
      "step": 1804
    },
    {
      "epoch": 0.3901015776961314,
      "grad_norm": 0.8649726510047913,
      "learning_rate": 2.787300436323883e-05,
      "loss": 0.7397,
      "step": 1805
    },
    {
      "epoch": 0.3903177004538578,
      "grad_norm": 0.9751591682434082,
      "learning_rate": 2.7860132821257483e-05,
      "loss": 0.8808,
      "step": 1806
    },
    {
      "epoch": 0.3905338232115842,
      "grad_norm": 0.8929107785224915,
      "learning_rate": 2.7847257427829233e-05,
      "loss": 0.9881,
      "step": 1807
    },
    {
      "epoch": 0.39074994596931056,
      "grad_norm": 1.0322059392929077,
      "learning_rate": 2.783437818926298e-05,
      "loss": 1.0004,
      "step": 1808
    },
    {
      "epoch": 0.39096606872703693,
      "grad_norm": 0.8034584522247314,
      "learning_rate": 2.782149511186952e-05,
      "loss": 0.6872,
      "step": 1809
    },
    {
      "epoch": 0.39118219148476335,
      "grad_norm": 0.9873882532119751,
      "learning_rate": 2.780860820196154e-05,
      "loss": 0.9788,
      "step": 1810
    },
    {
      "epoch": 0.3913983142424897,
      "grad_norm": 0.8640844225883484,
      "learning_rate": 2.7795717465853588e-05,
      "loss": 0.8948,
      "step": 1811
    },
    {
      "epoch": 0.39161443700021614,
      "grad_norm": 1.0133180618286133,
      "learning_rate": 2.7782822909862105e-05,
      "loss": 1.009,
      "step": 1812
    },
    {
      "epoch": 0.3918305597579425,
      "grad_norm": 0.8707285523414612,
      "learning_rate": 2.7769924540305385e-05,
      "loss": 0.7323,
      "step": 1813
    },
    {
      "epoch": 0.3920466825156689,
      "grad_norm": 1.0021897554397583,
      "learning_rate": 2.77570223635036e-05,
      "loss": 1.042,
      "step": 1814
    },
    {
      "epoch": 0.3922628052733953,
      "grad_norm": 0.926548182964325,
      "learning_rate": 2.774411638577879e-05,
      "loss": 0.9995,
      "step": 1815
    },
    {
      "epoch": 0.39247892803112167,
      "grad_norm": 0.9106104373931885,
      "learning_rate": 2.7731206613454853e-05,
      "loss": 0.8422,
      "step": 1816
    },
    {
      "epoch": 0.3926950507888481,
      "grad_norm": 0.9673689007759094,
      "learning_rate": 2.7718293052857545e-05,
      "loss": 0.919,
      "step": 1817
    },
    {
      "epoch": 0.39291117354657445,
      "grad_norm": 0.9710796475410461,
      "learning_rate": 2.7705375710314486e-05,
      "loss": 0.9097,
      "step": 1818
    },
    {
      "epoch": 0.3931272963043008,
      "grad_norm": 0.9857740998268127,
      "learning_rate": 2.7692454592155137e-05,
      "loss": 1.0333,
      "step": 1819
    },
    {
      "epoch": 0.39334341906202724,
      "grad_norm": 1.0093835592269897,
      "learning_rate": 2.7679529704710827e-05,
      "loss": 0.8549,
      "step": 1820
    },
    {
      "epoch": 0.3935595418197536,
      "grad_norm": 0.9259928464889526,
      "learning_rate": 2.7666601054314707e-05,
      "loss": 1.0608,
      "step": 1821
    },
    {
      "epoch": 0.39377566457748003,
      "grad_norm": 0.9289209842681885,
      "learning_rate": 2.7653668647301797e-05,
      "loss": 0.9653,
      "step": 1822
    },
    {
      "epoch": 0.3939917873352064,
      "grad_norm": 0.8605603575706482,
      "learning_rate": 2.7640732490008945e-05,
      "loss": 1.0109,
      "step": 1823
    },
    {
      "epoch": 0.39420791009293277,
      "grad_norm": 0.8922584056854248,
      "learning_rate": 2.7627792588774832e-05,
      "loss": 0.9341,
      "step": 1824
    },
    {
      "epoch": 0.3944240328506592,
      "grad_norm": 0.9215991497039795,
      "learning_rate": 2.7614848949939986e-05,
      "loss": 0.9281,
      "step": 1825
    },
    {
      "epoch": 0.39464015560838556,
      "grad_norm": 0.7918894290924072,
      "learning_rate": 2.760190157984675e-05,
      "loss": 0.9201,
      "step": 1826
    },
    {
      "epoch": 0.394856278366112,
      "grad_norm": 0.9827929139137268,
      "learning_rate": 2.7588950484839324e-05,
      "loss": 0.9999,
      "step": 1827
    },
    {
      "epoch": 0.39507240112383835,
      "grad_norm": 1.0141046047210693,
      "learning_rate": 2.7575995671263695e-05,
      "loss": 0.9462,
      "step": 1828
    },
    {
      "epoch": 0.3952885238815647,
      "grad_norm": 0.8736757040023804,
      "learning_rate": 2.7563037145467705e-05,
      "loss": 1.0194,
      "step": 1829
    },
    {
      "epoch": 0.39550464663929114,
      "grad_norm": 0.8712459206581116,
      "learning_rate": 2.7550074913800995e-05,
      "loss": 0.6799,
      "step": 1830
    },
    {
      "epoch": 0.3957207693970175,
      "grad_norm": 0.9101915955543518,
      "learning_rate": 2.753710898261503e-05,
      "loss": 0.9883,
      "step": 1831
    },
    {
      "epoch": 0.39593689215474387,
      "grad_norm": 0.9826453328132629,
      "learning_rate": 2.7524139358263088e-05,
      "loss": 1.0092,
      "step": 1832
    },
    {
      "epoch": 0.3961530149124703,
      "grad_norm": 0.8820707201957703,
      "learning_rate": 2.7511166047100255e-05,
      "loss": 1.0001,
      "step": 1833
    },
    {
      "epoch": 0.39636913767019666,
      "grad_norm": 0.8937494158744812,
      "learning_rate": 2.7498189055483423e-05,
      "loss": 0.8805,
      "step": 1834
    },
    {
      "epoch": 0.3965852604279231,
      "grad_norm": 0.9945870637893677,
      "learning_rate": 2.748520838977128e-05,
      "loss": 0.9133,
      "step": 1835
    },
    {
      "epoch": 0.39680138318564945,
      "grad_norm": 0.8278480768203735,
      "learning_rate": 2.747222405632434e-05,
      "loss": 1.0361,
      "step": 1836
    },
    {
      "epoch": 0.3970175059433758,
      "grad_norm": 0.8340871930122375,
      "learning_rate": 2.7459236061504873e-05,
      "loss": 0.8497,
      "step": 1837
    },
    {
      "epoch": 0.39723362870110224,
      "grad_norm": 0.8968183398246765,
      "learning_rate": 2.744624441167699e-05,
      "loss": 0.8952,
      "step": 1838
    },
    {
      "epoch": 0.3974497514588286,
      "grad_norm": 1.0628737211227417,
      "learning_rate": 2.743324911320655e-05,
      "loss": 0.8561,
      "step": 1839
    },
    {
      "epoch": 0.397665874216555,
      "grad_norm": 0.9170161485671997,
      "learning_rate": 2.7420250172461233e-05,
      "loss": 1.0833,
      "step": 1840
    },
    {
      "epoch": 0.3978819969742814,
      "grad_norm": 0.8254498839378357,
      "learning_rate": 2.7407247595810486e-05,
      "loss": 0.8913,
      "step": 1841
    },
    {
      "epoch": 0.39809811973200776,
      "grad_norm": 0.8792933821678162,
      "learning_rate": 2.739424138962554e-05,
      "loss": 0.8642,
      "step": 1842
    },
    {
      "epoch": 0.3983142424897342,
      "grad_norm": 0.8736568093299866,
      "learning_rate": 2.7381231560279405e-05,
      "loss": 0.8703,
      "step": 1843
    },
    {
      "epoch": 0.39853036524746055,
      "grad_norm": 1.0543543100357056,
      "learning_rate": 2.7368218114146867e-05,
      "loss": 1.0008,
      "step": 1844
    },
    {
      "epoch": 0.39874648800518697,
      "grad_norm": 0.9799116849899292,
      "learning_rate": 2.735520105760449e-05,
      "loss": 0.8504,
      "step": 1845
    },
    {
      "epoch": 0.39896261076291334,
      "grad_norm": 0.8448163866996765,
      "learning_rate": 2.7342180397030586e-05,
      "loss": 0.8936,
      "step": 1846
    },
    {
      "epoch": 0.3991787335206397,
      "grad_norm": 1.048321008682251,
      "learning_rate": 2.7329156138805273e-05,
      "loss": 1.1015,
      "step": 1847
    },
    {
      "epoch": 0.39939485627836613,
      "grad_norm": 0.8891046643257141,
      "learning_rate": 2.7316128289310386e-05,
      "loss": 0.8276,
      "step": 1848
    },
    {
      "epoch": 0.3996109790360925,
      "grad_norm": 0.8857054114341736,
      "learning_rate": 2.7303096854929553e-05,
      "loss": 0.9379,
      "step": 1849
    },
    {
      "epoch": 0.39982710179381886,
      "grad_norm": 1.0203007459640503,
      "learning_rate": 2.7290061842048143e-05,
      "loss": 0.9498,
      "step": 1850
    },
    {
      "epoch": 0.4000432245515453,
      "grad_norm": 0.865835964679718,
      "learning_rate": 2.7277023257053286e-05,
      "loss": 0.8314,
      "step": 1851
    },
    {
      "epoch": 0.40025934730927165,
      "grad_norm": 0.9435209631919861,
      "learning_rate": 2.726398110633385e-05,
      "loss": 0.9581,
      "step": 1852
    },
    {
      "epoch": 0.4004754700669981,
      "grad_norm": 0.9913296699523926,
      "learning_rate": 2.7250935396280467e-05,
      "loss": 0.9327,
      "step": 1853
    },
    {
      "epoch": 0.40069159282472444,
      "grad_norm": 0.885454535484314,
      "learning_rate": 2.7237886133285504e-05,
      "loss": 0.7667,
      "step": 1854
    },
    {
      "epoch": 0.4009077155824508,
      "grad_norm": 0.8463343977928162,
      "learning_rate": 2.7224833323743064e-05,
      "loss": 0.8656,
      "step": 1855
    },
    {
      "epoch": 0.40112383834017723,
      "grad_norm": 1.0554388761520386,
      "learning_rate": 2.7211776974048997e-05,
      "loss": 1.0108,
      "step": 1856
    },
    {
      "epoch": 0.4013399610979036,
      "grad_norm": 1.000020980834961,
      "learning_rate": 2.7198717090600878e-05,
      "loss": 0.9093,
      "step": 1857
    },
    {
      "epoch": 0.40155608385563,
      "grad_norm": 0.7798357009887695,
      "learning_rate": 2.7185653679798036e-05,
      "loss": 0.7673,
      "step": 1858
    },
    {
      "epoch": 0.4017722066133564,
      "grad_norm": 0.959668755531311,
      "learning_rate": 2.7172586748041494e-05,
      "loss": 1.021,
      "step": 1859
    },
    {
      "epoch": 0.40198832937108275,
      "grad_norm": 0.8943659663200378,
      "learning_rate": 2.715951630173403e-05,
      "loss": 0.86,
      "step": 1860
    },
    {
      "epoch": 0.4022044521288092,
      "grad_norm": 0.9070212841033936,
      "learning_rate": 2.714644234728013e-05,
      "loss": 0.8823,
      "step": 1861
    },
    {
      "epoch": 0.40242057488653554,
      "grad_norm": 0.99940425157547,
      "learning_rate": 2.7133364891085997e-05,
      "loss": 0.9913,
      "step": 1862
    },
    {
      "epoch": 0.40263669764426196,
      "grad_norm": 0.9460877776145935,
      "learning_rate": 2.712028393955956e-05,
      "loss": 0.9143,
      "step": 1863
    },
    {
      "epoch": 0.40285282040198833,
      "grad_norm": 0.9539337754249573,
      "learning_rate": 2.7107199499110448e-05,
      "loss": 0.9687,
      "step": 1864
    },
    {
      "epoch": 0.4030689431597147,
      "grad_norm": 0.8600987792015076,
      "learning_rate": 2.7094111576150014e-05,
      "loss": 0.8365,
      "step": 1865
    },
    {
      "epoch": 0.4032850659174411,
      "grad_norm": 1.0653421878814697,
      "learning_rate": 2.7081020177091304e-05,
      "loss": 1.046,
      "step": 1866
    },
    {
      "epoch": 0.4035011886751675,
      "grad_norm": 0.8216643929481506,
      "learning_rate": 2.7067925308349084e-05,
      "loss": 0.7576,
      "step": 1867
    },
    {
      "epoch": 0.4037173114328939,
      "grad_norm": 0.9096048474311829,
      "learning_rate": 2.7054826976339793e-05,
      "loss": 0.9487,
      "step": 1868
    },
    {
      "epoch": 0.4039334341906203,
      "grad_norm": 0.8275030851364136,
      "learning_rate": 2.7041725187481592e-05,
      "loss": 0.9392,
      "step": 1869
    },
    {
      "epoch": 0.40414955694834664,
      "grad_norm": 0.9473551511764526,
      "learning_rate": 2.7028619948194332e-05,
      "loss": 1.0163,
      "step": 1870
    },
    {
      "epoch": 0.40436567970607307,
      "grad_norm": 1.0237468481063843,
      "learning_rate": 2.7015511264899545e-05,
      "loss": 0.9627,
      "step": 1871
    },
    {
      "epoch": 0.40458180246379943,
      "grad_norm": 0.910361111164093,
      "learning_rate": 2.700239914402045e-05,
      "loss": 1.0283,
      "step": 1872
    },
    {
      "epoch": 0.4047979252215258,
      "grad_norm": 0.9893791675567627,
      "learning_rate": 2.698928359198197e-05,
      "loss": 0.8604,
      "step": 1873
    },
    {
      "epoch": 0.4050140479792522,
      "grad_norm": 0.91969233751297,
      "learning_rate": 2.697616461521068e-05,
      "loss": 0.9786,
      "step": 1874
    },
    {
      "epoch": 0.4052301707369786,
      "grad_norm": 0.8568896055221558,
      "learning_rate": 2.696304222013486e-05,
      "loss": 1.0031,
      "step": 1875
    },
    {
      "epoch": 0.405446293494705,
      "grad_norm": 0.8218428492546082,
      "learning_rate": 2.694991641318445e-05,
      "loss": 0.9537,
      "step": 1876
    },
    {
      "epoch": 0.4056624162524314,
      "grad_norm": 0.833819568157196,
      "learning_rate": 2.693678720079105e-05,
      "loss": 0.697,
      "step": 1877
    },
    {
      "epoch": 0.40587853901015775,
      "grad_norm": 0.9879897832870483,
      "learning_rate": 2.6923654589387976e-05,
      "loss": 0.9709,
      "step": 1878
    },
    {
      "epoch": 0.40609466176788417,
      "grad_norm": 1.0249334573745728,
      "learning_rate": 2.6910518585410144e-05,
      "loss": 0.7457,
      "step": 1879
    },
    {
      "epoch": 0.40631078452561054,
      "grad_norm": 0.8635609745979309,
      "learning_rate": 2.6897379195294187e-05,
      "loss": 0.8512,
      "step": 1880
    },
    {
      "epoch": 0.40652690728333696,
      "grad_norm": 1.0228142738342285,
      "learning_rate": 2.6884236425478366e-05,
      "loss": 0.9787,
      "step": 1881
    },
    {
      "epoch": 0.4067430300410633,
      "grad_norm": 1.288407325744629,
      "learning_rate": 2.6871090282402614e-05,
      "loss": 0.9159,
      "step": 1882
    },
    {
      "epoch": 0.4069591527987897,
      "grad_norm": 0.9210023283958435,
      "learning_rate": 2.6857940772508504e-05,
      "loss": 1.0713,
      "step": 1883
    },
    {
      "epoch": 0.4071752755565161,
      "grad_norm": 0.9451961517333984,
      "learning_rate": 2.684478790223927e-05,
      "loss": 1.0474,
      "step": 1884
    },
    {
      "epoch": 0.4073913983142425,
      "grad_norm": 0.9673775434494019,
      "learning_rate": 2.6831631678039785e-05,
      "loss": 0.8909,
      "step": 1885
    },
    {
      "epoch": 0.4076075210719689,
      "grad_norm": 1.0196783542633057,
      "learning_rate": 2.681847210635657e-05,
      "loss": 0.988,
      "step": 1886
    },
    {
      "epoch": 0.40782364382969527,
      "grad_norm": 0.864243745803833,
      "learning_rate": 2.6805309193637793e-05,
      "loss": 0.94,
      "step": 1887
    },
    {
      "epoch": 0.40803976658742164,
      "grad_norm": 0.9797789454460144,
      "learning_rate": 2.6792142946333227e-05,
      "loss": 0.8806,
      "step": 1888
    },
    {
      "epoch": 0.40825588934514806,
      "grad_norm": 0.9118221402168274,
      "learning_rate": 2.6778973370894327e-05,
      "loss": 0.8709,
      "step": 1889
    },
    {
      "epoch": 0.4084720121028744,
      "grad_norm": 0.928864598274231,
      "learning_rate": 2.676580047377415e-05,
      "loss": 0.8471,
      "step": 1890
    },
    {
      "epoch": 0.4086881348606008,
      "grad_norm": 0.9791908860206604,
      "learning_rate": 2.675262426142738e-05,
      "loss": 1.0364,
      "step": 1891
    },
    {
      "epoch": 0.4089042576183272,
      "grad_norm": 1.1222354173660278,
      "learning_rate": 2.673944474031033e-05,
      "loss": 1.0092,
      "step": 1892
    },
    {
      "epoch": 0.4091203803760536,
      "grad_norm": 0.8850167989730835,
      "learning_rate": 2.6726261916880933e-05,
      "loss": 0.8162,
      "step": 1893
    },
    {
      "epoch": 0.40933650313378,
      "grad_norm": 0.9502853751182556,
      "learning_rate": 2.671307579759875e-05,
      "loss": 0.9675,
      "step": 1894
    },
    {
      "epoch": 0.40955262589150637,
      "grad_norm": 0.8617500066757202,
      "learning_rate": 2.6699886388924942e-05,
      "loss": 0.7554,
      "step": 1895
    },
    {
      "epoch": 0.40976874864923274,
      "grad_norm": 0.9188858270645142,
      "learning_rate": 2.6686693697322294e-05,
      "loss": 0.7892,
      "step": 1896
    },
    {
      "epoch": 0.40998487140695916,
      "grad_norm": 1.0231481790542603,
      "learning_rate": 2.6673497729255188e-05,
      "loss": 0.9294,
      "step": 1897
    },
    {
      "epoch": 0.41020099416468553,
      "grad_norm": 0.9644643068313599,
      "learning_rate": 2.666029849118963e-05,
      "loss": 1.0355,
      "step": 1898
    },
    {
      "epoch": 0.41041711692241195,
      "grad_norm": 0.8928436040878296,
      "learning_rate": 2.6647095989593194e-05,
      "loss": 0.8279,
      "step": 1899
    },
    {
      "epoch": 0.4106332396801383,
      "grad_norm": 0.9390683770179749,
      "learning_rate": 2.66338902309351e-05,
      "loss": 0.8059,
      "step": 1900
    },
    {
      "epoch": 0.4108493624378647,
      "grad_norm": 0.8569971919059753,
      "learning_rate": 2.6620681221686126e-05,
      "loss": 0.9673,
      "step": 1901
    },
    {
      "epoch": 0.4110654851955911,
      "grad_norm": 0.8158060312271118,
      "learning_rate": 2.6607468968318655e-05,
      "loss": 0.9587,
      "step": 1902
    },
    {
      "epoch": 0.4112816079533175,
      "grad_norm": 0.9675447344779968,
      "learning_rate": 2.6594253477306663e-05,
      "loss": 0.7087,
      "step": 1903
    },
    {
      "epoch": 0.4114977307110439,
      "grad_norm": 0.87473464012146,
      "learning_rate": 2.6581034755125713e-05,
      "loss": 1.0256,
      "step": 1904
    },
    {
      "epoch": 0.41171385346877026,
      "grad_norm": 1.0762571096420288,
      "learning_rate": 2.656781280825295e-05,
      "loss": 0.9579,
      "step": 1905
    },
    {
      "epoch": 0.41192997622649663,
      "grad_norm": 1.1155767440795898,
      "learning_rate": 2.6554587643167088e-05,
      "loss": 0.8231,
      "step": 1906
    },
    {
      "epoch": 0.41214609898422305,
      "grad_norm": 0.9181614518165588,
      "learning_rate": 2.6541359266348437e-05,
      "loss": 0.9439,
      "step": 1907
    },
    {
      "epoch": 0.4123622217419494,
      "grad_norm": 1.0097953081130981,
      "learning_rate": 2.6528127684278858e-05,
      "loss": 1.0552,
      "step": 1908
    },
    {
      "epoch": 0.41257834449967584,
      "grad_norm": 0.8957809805870056,
      "learning_rate": 2.6514892903441815e-05,
      "loss": 0.963,
      "step": 1909
    },
    {
      "epoch": 0.4127944672574022,
      "grad_norm": 0.9842140674591064,
      "learning_rate": 2.650165493032231e-05,
      "loss": 0.9449,
      "step": 1910
    },
    {
      "epoch": 0.4130105900151286,
      "grad_norm": 0.9267460703849792,
      "learning_rate": 2.6488413771406913e-05,
      "loss": 0.9907,
      "step": 1911
    },
    {
      "epoch": 0.413226712772855,
      "grad_norm": 1.0374408960342407,
      "learning_rate": 2.6475169433183768e-05,
      "loss": 1.0192,
      "step": 1912
    },
    {
      "epoch": 0.41344283553058137,
      "grad_norm": 0.7625603079795837,
      "learning_rate": 2.6461921922142573e-05,
      "loss": 0.7252,
      "step": 1913
    },
    {
      "epoch": 0.41365895828830773,
      "grad_norm": 0.9079304337501526,
      "learning_rate": 2.6448671244774572e-05,
      "loss": 0.8406,
      "step": 1914
    },
    {
      "epoch": 0.41387508104603415,
      "grad_norm": 0.9198092222213745,
      "learning_rate": 2.643541740757256e-05,
      "loss": 0.852,
      "step": 1915
    },
    {
      "epoch": 0.4140912038037605,
      "grad_norm": 0.8380274176597595,
      "learning_rate": 2.6422160417030908e-05,
      "loss": 0.9572,
      "step": 1916
    },
    {
      "epoch": 0.41430732656148694,
      "grad_norm": 0.9923501014709473,
      "learning_rate": 2.640890027964549e-05,
      "loss": 1.0269,
      "step": 1917
    },
    {
      "epoch": 0.4145234493192133,
      "grad_norm": 0.9316266775131226,
      "learning_rate": 2.6395637001913752e-05,
      "loss": 0.9294,
      "step": 1918
    },
    {
      "epoch": 0.4147395720769397,
      "grad_norm": 0.8916770219802856,
      "learning_rate": 2.6382370590334664e-05,
      "loss": 0.876,
      "step": 1919
    },
    {
      "epoch": 0.4149556948346661,
      "grad_norm": 0.9091696739196777,
      "learning_rate": 2.6369101051408748e-05,
      "loss": 0.8264,
      "step": 1920
    },
    {
      "epoch": 0.41517181759239247,
      "grad_norm": 0.9155973792076111,
      "learning_rate": 2.6355828391638036e-05,
      "loss": 0.9626,
      "step": 1921
    },
    {
      "epoch": 0.4153879403501189,
      "grad_norm": 0.9353761672973633,
      "learning_rate": 2.6342552617526104e-05,
      "loss": 1.0452,
      "step": 1922
    },
    {
      "epoch": 0.41560406310784526,
      "grad_norm": 0.8498687148094177,
      "learning_rate": 2.6329273735578053e-05,
      "loss": 0.8687,
      "step": 1923
    },
    {
      "epoch": 0.4158201858655716,
      "grad_norm": 0.895742654800415,
      "learning_rate": 2.6315991752300503e-05,
      "loss": 0.9839,
      "step": 1924
    },
    {
      "epoch": 0.41603630862329805,
      "grad_norm": 0.9112388491630554,
      "learning_rate": 2.630270667420159e-05,
      "loss": 0.8423,
      "step": 1925
    },
    {
      "epoch": 0.4162524313810244,
      "grad_norm": 0.8951870203018188,
      "learning_rate": 2.628941850779098e-05,
      "loss": 0.8704,
      "step": 1926
    },
    {
      "epoch": 0.41646855413875083,
      "grad_norm": 0.9525790214538574,
      "learning_rate": 2.6276127259579848e-05,
      "loss": 0.9369,
      "step": 1927
    },
    {
      "epoch": 0.4166846768964772,
      "grad_norm": 0.8421275615692139,
      "learning_rate": 2.626283293608085e-05,
      "loss": 1.0124,
      "step": 1928
    },
    {
      "epoch": 0.41690079965420357,
      "grad_norm": 0.9734429717063904,
      "learning_rate": 2.6249535543808202e-05,
      "loss": 0.9653,
      "step": 1929
    },
    {
      "epoch": 0.41711692241193,
      "grad_norm": 0.9232828617095947,
      "learning_rate": 2.623623508927758e-05,
      "loss": 0.9472,
      "step": 1930
    },
    {
      "epoch": 0.41733304516965636,
      "grad_norm": 0.9370399117469788,
      "learning_rate": 2.622293157900619e-05,
      "loss": 0.885,
      "step": 1931
    },
    {
      "epoch": 0.4175491679273828,
      "grad_norm": 1.0074236392974854,
      "learning_rate": 2.62096250195127e-05,
      "loss": 1.1915,
      "step": 1932
    },
    {
      "epoch": 0.41776529068510915,
      "grad_norm": 0.884238600730896,
      "learning_rate": 2.6196315417317314e-05,
      "loss": 0.8891,
      "step": 1933
    },
    {
      "epoch": 0.4179814134428355,
      "grad_norm": 0.8914430737495422,
      "learning_rate": 2.6183002778941692e-05,
      "loss": 0.743,
      "step": 1934
    },
    {
      "epoch": 0.41819753620056194,
      "grad_norm": 0.7857561707496643,
      "learning_rate": 2.6169687110909002e-05,
      "loss": 0.8604,
      "step": 1935
    },
    {
      "epoch": 0.4184136589582883,
      "grad_norm": 1.0400208234786987,
      "learning_rate": 2.6156368419743892e-05,
      "loss": 0.7448,
      "step": 1936
    },
    {
      "epoch": 0.41862978171601467,
      "grad_norm": 0.8496856093406677,
      "learning_rate": 2.614304671197248e-05,
      "loss": 0.8913,
      "step": 1937
    },
    {
      "epoch": 0.4188459044737411,
      "grad_norm": 0.9101283550262451,
      "learning_rate": 2.612972199412239e-05,
      "loss": 0.7999,
      "step": 1938
    },
    {
      "epoch": 0.41906202723146746,
      "grad_norm": 0.974231481552124,
      "learning_rate": 2.6116394272722688e-05,
      "loss": 1.0992,
      "step": 1939
    },
    {
      "epoch": 0.4192781499891939,
      "grad_norm": 0.9200551509857178,
      "learning_rate": 2.6103063554303934e-05,
      "loss": 0.8596,
      "step": 1940
    },
    {
      "epoch": 0.41949427274692025,
      "grad_norm": 0.7142768502235413,
      "learning_rate": 2.6089729845398144e-05,
      "loss": 0.854,
      "step": 1941
    },
    {
      "epoch": 0.4197103955046466,
      "grad_norm": 1.0593105554580688,
      "learning_rate": 2.6076393152538807e-05,
      "loss": 1.0074,
      "step": 1942
    },
    {
      "epoch": 0.41992651826237304,
      "grad_norm": 0.9730693101882935,
      "learning_rate": 2.606305348226087e-05,
      "loss": 0.8169,
      "step": 1943
    },
    {
      "epoch": 0.4201426410200994,
      "grad_norm": 1.0784400701522827,
      "learning_rate": 2.604971084110075e-05,
      "loss": 0.9604,
      "step": 1944
    },
    {
      "epoch": 0.42035876377782583,
      "grad_norm": 0.7956601977348328,
      "learning_rate": 2.6036365235596296e-05,
      "loss": 0.8099,
      "step": 1945
    },
    {
      "epoch": 0.4205748865355522,
      "grad_norm": 0.8289860486984253,
      "learning_rate": 2.602301667228683e-05,
      "loss": 0.7475,
      "step": 1946
    },
    {
      "epoch": 0.42079100929327856,
      "grad_norm": 0.9040570259094238,
      "learning_rate": 2.6009665157713127e-05,
      "loss": 0.885,
      "step": 1947
    },
    {
      "epoch": 0.421007132051005,
      "grad_norm": 0.9660112857818604,
      "learning_rate": 2.5996310698417376e-05,
      "loss": 0.8363,
      "step": 1948
    },
    {
      "epoch": 0.42122325480873135,
      "grad_norm": 0.9220053553581238,
      "learning_rate": 2.5982953300943254e-05,
      "loss": 0.7915,
      "step": 1949
    },
    {
      "epoch": 0.4214393775664578,
      "grad_norm": 0.9023492336273193,
      "learning_rate": 2.5969592971835836e-05,
      "loss": 0.9589,
      "step": 1950
    },
    {
      "epoch": 0.42165550032418414,
      "grad_norm": 0.8304703831672668,
      "learning_rate": 2.595622971764167e-05,
      "loss": 0.9593,
      "step": 1951
    },
    {
      "epoch": 0.4218716230819105,
      "grad_norm": 0.9802231788635254,
      "learning_rate": 2.594286354490871e-05,
      "loss": 1.0522,
      "step": 1952
    },
    {
      "epoch": 0.42208774583963693,
      "grad_norm": 1.0685917139053345,
      "learning_rate": 2.592949446018635e-05,
      "loss": 0.7953,
      "step": 1953
    },
    {
      "epoch": 0.4223038685973633,
      "grad_norm": 0.9584881067276001,
      "learning_rate": 2.5916122470025414e-05,
      "loss": 1.0165,
      "step": 1954
    },
    {
      "epoch": 0.42251999135508966,
      "grad_norm": 0.8616102933883667,
      "learning_rate": 2.5902747580978137e-05,
      "loss": 0.7753,
      "step": 1955
    },
    {
      "epoch": 0.4227361141128161,
      "grad_norm": 0.885311484336853,
      "learning_rate": 2.5889369799598196e-05,
      "loss": 0.8329,
      "step": 1956
    },
    {
      "epoch": 0.42295223687054245,
      "grad_norm": 0.8231478929519653,
      "learning_rate": 2.5875989132440663e-05,
      "loss": 0.96,
      "step": 1957
    },
    {
      "epoch": 0.4231683596282689,
      "grad_norm": 1.279319405555725,
      "learning_rate": 2.5862605586062044e-05,
      "loss": 1.0165,
      "step": 1958
    },
    {
      "epoch": 0.42338448238599524,
      "grad_norm": 0.7875201106071472,
      "learning_rate": 2.5849219167020235e-05,
      "loss": 1.0049,
      "step": 1959
    },
    {
      "epoch": 0.4236006051437216,
      "grad_norm": 0.9663813710212708,
      "learning_rate": 2.583582988187456e-05,
      "loss": 0.8889,
      "step": 1960
    },
    {
      "epoch": 0.42381672790144803,
      "grad_norm": 0.9025231599807739,
      "learning_rate": 2.582243773718573e-05,
      "loss": 0.9526,
      "step": 1961
    },
    {
      "epoch": 0.4240328506591744,
      "grad_norm": 0.9170035719871521,
      "learning_rate": 2.5809042739515872e-05,
      "loss": 0.8904,
      "step": 1962
    },
    {
      "epoch": 0.4242489734169008,
      "grad_norm": 0.9558761715888977,
      "learning_rate": 2.5795644895428494e-05,
      "loss": 0.829,
      "step": 1963
    },
    {
      "epoch": 0.4244650961746272,
      "grad_norm": 1.0170693397521973,
      "learning_rate": 2.578224421148852e-05,
      "loss": 1.1339,
      "step": 1964
    },
    {
      "epoch": 0.42468121893235355,
      "grad_norm": 0.9244565963745117,
      "learning_rate": 2.5768840694262247e-05,
      "loss": 1.1144,
      "step": 1965
    },
    {
      "epoch": 0.42489734169008,
      "grad_norm": 0.8529707193374634,
      "learning_rate": 2.5755434350317367e-05,
      "loss": 0.9846,
      "step": 1966
    },
    {
      "epoch": 0.42511346444780634,
      "grad_norm": 0.8597925901412964,
      "learning_rate": 2.574202518622297e-05,
      "loss": 0.9885,
      "step": 1967
    },
    {
      "epoch": 0.42532958720553277,
      "grad_norm": 0.9960910081863403,
      "learning_rate": 2.5728613208549495e-05,
      "loss": 0.8757,
      "step": 1968
    },
    {
      "epoch": 0.42554570996325913,
      "grad_norm": 0.9559326767921448,
      "learning_rate": 2.5715198423868794e-05,
      "loss": 0.9808,
      "step": 1969
    },
    {
      "epoch": 0.4257618327209855,
      "grad_norm": 1.0519607067108154,
      "learning_rate": 2.5701780838754075e-05,
      "loss": 1.0984,
      "step": 1970
    },
    {
      "epoch": 0.4259779554787119,
      "grad_norm": 0.9505062103271484,
      "learning_rate": 2.568836045977993e-05,
      "loss": 0.9912,
      "step": 1971
    },
    {
      "epoch": 0.4261940782364383,
      "grad_norm": 0.9739280939102173,
      "learning_rate": 2.5674937293522305e-05,
      "loss": 1.039,
      "step": 1972
    },
    {
      "epoch": 0.4264102009941647,
      "grad_norm": 0.9407761096954346,
      "learning_rate": 2.566151134655853e-05,
      "loss": 0.8215,
      "step": 1973
    },
    {
      "epoch": 0.4266263237518911,
      "grad_norm": 0.9971686005592346,
      "learning_rate": 2.564808262546728e-05,
      "loss": 1.0354,
      "step": 1974
    },
    {
      "epoch": 0.42684244650961745,
      "grad_norm": 0.8059692978858948,
      "learning_rate": 2.5634651136828597e-05,
      "loss": 0.9137,
      "step": 1975
    },
    {
      "epoch": 0.42705856926734387,
      "grad_norm": 0.9364917278289795,
      "learning_rate": 2.5621216887223886e-05,
      "loss": 1.1067,
      "step": 1976
    },
    {
      "epoch": 0.42727469202507024,
      "grad_norm": 0.8833372592926025,
      "learning_rate": 2.560777988323589e-05,
      "loss": 1.0041,
      "step": 1977
    },
    {
      "epoch": 0.4274908147827966,
      "grad_norm": 0.9525211453437805,
      "learning_rate": 2.559434013144872e-05,
      "loss": 1.0978,
      "step": 1978
    },
    {
      "epoch": 0.427706937540523,
      "grad_norm": 1.0457853078842163,
      "learning_rate": 2.5580897638447814e-05,
      "loss": 0.881,
      "step": 1979
    },
    {
      "epoch": 0.4279230602982494,
      "grad_norm": 0.9005357027053833,
      "learning_rate": 2.5567452410819966e-05,
      "loss": 0.9099,
      "step": 1980
    },
    {
      "epoch": 0.4281391830559758,
      "grad_norm": 0.9874290227890015,
      "learning_rate": 2.555400445515331e-05,
      "loss": 0.9065,
      "step": 1981
    },
    {
      "epoch": 0.4283553058137022,
      "grad_norm": 0.8200092911720276,
      "learning_rate": 2.554055377803731e-05,
      "loss": 0.6178,
      "step": 1982
    },
    {
      "epoch": 0.42857142857142855,
      "grad_norm": 1.066659688949585,
      "learning_rate": 2.552710038606277e-05,
      "loss": 0.9835,
      "step": 1983
    },
    {
      "epoch": 0.42878755132915497,
      "grad_norm": 0.9795448780059814,
      "learning_rate": 2.551364428582181e-05,
      "loss": 0.9276,
      "step": 1984
    },
    {
      "epoch": 0.42900367408688134,
      "grad_norm": 0.9300527572631836,
      "learning_rate": 2.55001854839079e-05,
      "loss": 0.9971,
      "step": 1985
    },
    {
      "epoch": 0.42921979684460776,
      "grad_norm": 0.771075963973999,
      "learning_rate": 2.548672398691581e-05,
      "loss": 0.7037,
      "step": 1986
    },
    {
      "epoch": 0.4294359196023341,
      "grad_norm": 0.8233857750892639,
      "learning_rate": 2.5473259801441663e-05,
      "loss": 0.9341,
      "step": 1987
    },
    {
      "epoch": 0.4296520423600605,
      "grad_norm": 1.0577247142791748,
      "learning_rate": 2.5459792934082853e-05,
      "loss": 1.0113,
      "step": 1988
    },
    {
      "epoch": 0.4298681651177869,
      "grad_norm": 0.9196573495864868,
      "learning_rate": 2.5446323391438133e-05,
      "loss": 0.822,
      "step": 1989
    },
    {
      "epoch": 0.4300842878755133,
      "grad_norm": 0.8457186818122864,
      "learning_rate": 2.5432851180107544e-05,
      "loss": 0.8506,
      "step": 1990
    },
    {
      "epoch": 0.4303004106332397,
      "grad_norm": 0.8600859045982361,
      "learning_rate": 2.5419376306692433e-05,
      "loss": 0.9624,
      "step": 1991
    },
    {
      "epoch": 0.43051653339096607,
      "grad_norm": 0.9862504601478577,
      "learning_rate": 2.540589877779546e-05,
      "loss": 0.9211,
      "step": 1992
    },
    {
      "epoch": 0.43073265614869244,
      "grad_norm": 0.8851495385169983,
      "learning_rate": 2.539241860002058e-05,
      "loss": 0.9876,
      "step": 1993
    },
    {
      "epoch": 0.43094877890641886,
      "grad_norm": 0.9032397270202637,
      "learning_rate": 2.537893577997305e-05,
      "loss": 0.9057,
      "step": 1994
    },
    {
      "epoch": 0.43116490166414523,
      "grad_norm": 0.8996214270591736,
      "learning_rate": 2.5365450324259424e-05,
      "loss": 1.0677,
      "step": 1995
    },
    {
      "epoch": 0.43138102442187165,
      "grad_norm": 0.9084228277206421,
      "learning_rate": 2.5351962239487548e-05,
      "loss": 0.9358,
      "step": 1996
    },
    {
      "epoch": 0.431597147179598,
      "grad_norm": 1.0036503076553345,
      "learning_rate": 2.5338471532266534e-05,
      "loss": 0.9952,
      "step": 1997
    },
    {
      "epoch": 0.4318132699373244,
      "grad_norm": 0.893693208694458,
      "learning_rate": 2.532497820920682e-05,
      "loss": 0.9576,
      "step": 1998
    },
    {
      "epoch": 0.4320293926950508,
      "grad_norm": 0.9255844354629517,
      "learning_rate": 2.531148227692009e-05,
      "loss": 0.9368,
      "step": 1999
    },
    {
      "epoch": 0.4322455154527772,
      "grad_norm": 0.9848679304122925,
      "learning_rate": 2.5297983742019325e-05,
      "loss": 1.0371,
      "step": 2000
    },
    {
      "epoch": 0.43246163821050354,
      "grad_norm": 0.9268440008163452,
      "learning_rate": 2.5284482611118773e-05,
      "loss": 0.8615,
      "step": 2001
    },
    {
      "epoch": 0.43267776096822996,
      "grad_norm": 0.8308999538421631,
      "learning_rate": 2.5270978890833955e-05,
      "loss": 0.886,
      "step": 2002
    },
    {
      "epoch": 0.43289388372595633,
      "grad_norm": 0.9142823815345764,
      "learning_rate": 2.525747258778167e-05,
      "loss": 0.9298,
      "step": 2003
    },
    {
      "epoch": 0.43311000648368275,
      "grad_norm": 0.9102539420127869,
      "learning_rate": 2.5243963708579964e-05,
      "loss": 0.9284,
      "step": 2004
    },
    {
      "epoch": 0.4333261292414091,
      "grad_norm": 0.8535233736038208,
      "learning_rate": 2.5230452259848167e-05,
      "loss": 0.8829,
      "step": 2005
    },
    {
      "epoch": 0.4335422519991355,
      "grad_norm": 0.9500638246536255,
      "learning_rate": 2.5216938248206847e-05,
      "loss": 0.9293,
      "step": 2006
    },
    {
      "epoch": 0.4337583747568619,
      "grad_norm": 0.9432377815246582,
      "learning_rate": 2.520342168027786e-05,
      "loss": 0.8627,
      "step": 2007
    },
    {
      "epoch": 0.4339744975145883,
      "grad_norm": 0.8626837134361267,
      "learning_rate": 2.5189902562684268e-05,
      "loss": 0.9437,
      "step": 2008
    },
    {
      "epoch": 0.4341906202723147,
      "grad_norm": 0.9086456894874573,
      "learning_rate": 2.5176380902050418e-05,
      "loss": 0.9182,
      "step": 2009
    },
    {
      "epoch": 0.43440674303004106,
      "grad_norm": 0.9128880500793457,
      "learning_rate": 2.5162856705001892e-05,
      "loss": 0.8335,
      "step": 2010
    },
    {
      "epoch": 0.43462286578776743,
      "grad_norm": 0.933714747428894,
      "learning_rate": 2.514932997816552e-05,
      "loss": 1.0162,
      "step": 2011
    },
    {
      "epoch": 0.43483898854549385,
      "grad_norm": 0.9576535820960999,
      "learning_rate": 2.5135800728169357e-05,
      "loss": 1.0565,
      "step": 2012
    },
    {
      "epoch": 0.4350551113032202,
      "grad_norm": 1.1083024740219116,
      "learning_rate": 2.512226896164271e-05,
      "loss": 0.9044,
      "step": 2013
    },
    {
      "epoch": 0.43527123406094664,
      "grad_norm": 0.7745140790939331,
      "learning_rate": 2.5108734685216117e-05,
      "loss": 0.7354,
      "step": 2014
    },
    {
      "epoch": 0.435487356818673,
      "grad_norm": 1.1194214820861816,
      "learning_rate": 2.509519790552133e-05,
      "loss": 0.923,
      "step": 2015
    },
    {
      "epoch": 0.4357034795763994,
      "grad_norm": 0.9793070554733276,
      "learning_rate": 2.5081658629191353e-05,
      "loss": 1.0025,
      "step": 2016
    },
    {
      "epoch": 0.4359196023341258,
      "grad_norm": 1.0025380849838257,
      "learning_rate": 2.5068116862860397e-05,
      "loss": 1.009,
      "step": 2017
    },
    {
      "epoch": 0.43613572509185217,
      "grad_norm": 0.9246045351028442,
      "learning_rate": 2.505457261316389e-05,
      "loss": 1.0137,
      "step": 2018
    },
    {
      "epoch": 0.43635184784957853,
      "grad_norm": 0.8660938143730164,
      "learning_rate": 2.504102588673849e-05,
      "loss": 0.9983,
      "step": 2019
    },
    {
      "epoch": 0.43656797060730496,
      "grad_norm": 0.9384306073188782,
      "learning_rate": 2.5027476690222058e-05,
      "loss": 0.9404,
      "step": 2020
    },
    {
      "epoch": 0.4367840933650313,
      "grad_norm": 1.1287697553634644,
      "learning_rate": 2.501392503025367e-05,
      "loss": 0.9825,
      "step": 2021
    },
    {
      "epoch": 0.43700021612275775,
      "grad_norm": 0.9511003494262695,
      "learning_rate": 2.5000370913473605e-05,
      "loss": 0.839,
      "step": 2022
    },
    {
      "epoch": 0.4372163388804841,
      "grad_norm": 0.9103716015815735,
      "learning_rate": 2.498681434652335e-05,
      "loss": 0.9403,
      "step": 2023
    },
    {
      "epoch": 0.4374324616382105,
      "grad_norm": 0.9426201581954956,
      "learning_rate": 2.4973255336045597e-05,
      "loss": 1.0598,
      "step": 2024
    },
    {
      "epoch": 0.4376485843959369,
      "grad_norm": 0.8501827120780945,
      "learning_rate": 2.4959693888684226e-05,
      "loss": 0.9278,
      "step": 2025
    },
    {
      "epoch": 0.43786470715366327,
      "grad_norm": 0.9373735785484314,
      "learning_rate": 2.494613001108431e-05,
      "loss": 0.9727,
      "step": 2026
    },
    {
      "epoch": 0.4380808299113897,
      "grad_norm": 1.0211824178695679,
      "learning_rate": 2.493256370989213e-05,
      "loss": 1.0724,
      "step": 2027
    },
    {
      "epoch": 0.43829695266911606,
      "grad_norm": 0.9939093589782715,
      "learning_rate": 2.4918994991755126e-05,
      "loss": 0.9271,
      "step": 2028
    },
    {
      "epoch": 0.4385130754268424,
      "grad_norm": 0.9821503162384033,
      "learning_rate": 2.4905423863321953e-05,
      "loss": 0.8327,
      "step": 2029
    },
    {
      "epoch": 0.43872919818456885,
      "grad_norm": 0.7790869474411011,
      "learning_rate": 2.4891850331242427e-05,
      "loss": 0.8875,
      "step": 2030
    },
    {
      "epoch": 0.4389453209422952,
      "grad_norm": 0.971564531326294,
      "learning_rate": 2.4878274402167544e-05,
      "loss": 0.9534,
      "step": 2031
    },
    {
      "epoch": 0.43916144370002164,
      "grad_norm": 0.8810921311378479,
      "learning_rate": 2.4864696082749483e-05,
      "loss": 0.8962,
      "step": 2032
    },
    {
      "epoch": 0.439377566457748,
      "grad_norm": 1.0457826852798462,
      "learning_rate": 2.4851115379641588e-05,
      "loss": 1.0562,
      "step": 2033
    },
    {
      "epoch": 0.43959368921547437,
      "grad_norm": 0.8541572093963623,
      "learning_rate": 2.4837532299498366e-05,
      "loss": 0.7529,
      "step": 2034
    },
    {
      "epoch": 0.4398098119732008,
      "grad_norm": 1.006701111793518,
      "learning_rate": 2.4823946848975503e-05,
      "loss": 1.0355,
      "step": 2035
    },
    {
      "epoch": 0.44002593473092716,
      "grad_norm": 0.8878010511398315,
      "learning_rate": 2.481035903472984e-05,
      "loss": 0.8281,
      "step": 2036
    },
    {
      "epoch": 0.4402420574886536,
      "grad_norm": 1.10139000415802,
      "learning_rate": 2.4796768863419364e-05,
      "loss": 0.9031,
      "step": 2037
    },
    {
      "epoch": 0.44045818024637995,
      "grad_norm": 0.9112840294837952,
      "learning_rate": 2.4783176341703233e-05,
      "loss": 0.8575,
      "step": 2038
    },
    {
      "epoch": 0.4406743030041063,
      "grad_norm": 0.9776943922042847,
      "learning_rate": 2.4769581476241756e-05,
      "loss": 0.8534,
      "step": 2039
    },
    {
      "epoch": 0.44089042576183274,
      "grad_norm": 0.8622772693634033,
      "learning_rate": 2.4755984273696377e-05,
      "loss": 0.8593,
      "step": 2040
    },
    {
      "epoch": 0.4411065485195591,
      "grad_norm": 0.7838684320449829,
      "learning_rate": 2.4742384740729704e-05,
      "loss": 0.952,
      "step": 2041
    },
    {
      "epoch": 0.44132267127728547,
      "grad_norm": 0.982847273349762,
      "learning_rate": 2.4728782884005465e-05,
      "loss": 0.8801,
      "step": 2042
    },
    {
      "epoch": 0.4415387940350119,
      "grad_norm": 0.8614881038665771,
      "learning_rate": 2.471517871018855e-05,
      "loss": 0.8426,
      "step": 2043
    },
    {
      "epoch": 0.44175491679273826,
      "grad_norm": 0.9391106367111206,
      "learning_rate": 2.4701572225944962e-05,
      "loss": 0.9851,
      "step": 2044
    },
    {
      "epoch": 0.4419710395504647,
      "grad_norm": 1.0585438013076782,
      "learning_rate": 2.4687963437941855e-05,
      "loss": 1.0897,
      "step": 2045
    },
    {
      "epoch": 0.44218716230819105,
      "grad_norm": 0.9564892649650574,
      "learning_rate": 2.4674352352847492e-05,
      "loss": 0.9417,
      "step": 2046
    },
    {
      "epoch": 0.4424032850659174,
      "grad_norm": 0.9705290198326111,
      "learning_rate": 2.4660738977331297e-05,
      "loss": 0.9973,
      "step": 2047
    },
    {
      "epoch": 0.44261940782364384,
      "grad_norm": 0.9146240949630737,
      "learning_rate": 2.4647123318063758e-05,
      "loss": 0.8612,
      "step": 2048
    },
    {
      "epoch": 0.4428355305813702,
      "grad_norm": 0.8360450863838196,
      "learning_rate": 2.463350538171655e-05,
      "loss": 0.7957,
      "step": 2049
    },
    {
      "epoch": 0.44305165333909663,
      "grad_norm": 0.8925408720970154,
      "learning_rate": 2.4619885174962414e-05,
      "loss": 0.9586,
      "step": 2050
    },
    {
      "epoch": 0.443267776096823,
      "grad_norm": 1.1791043281555176,
      "learning_rate": 2.460626270447522e-05,
      "loss": 1.1034,
      "step": 2051
    },
    {
      "epoch": 0.44348389885454936,
      "grad_norm": 0.8828898668289185,
      "learning_rate": 2.4592637976929946e-05,
      "loss": 0.8665,
      "step": 2052
    },
    {
      "epoch": 0.4437000216122758,
      "grad_norm": 0.9726607799530029,
      "learning_rate": 2.4579010999002683e-05,
      "loss": 1.0096,
      "step": 2053
    },
    {
      "epoch": 0.44391614437000215,
      "grad_norm": 0.9190080761909485,
      "learning_rate": 2.4565381777370618e-05,
      "loss": 0.7618,
      "step": 2054
    },
    {
      "epoch": 0.4441322671277286,
      "grad_norm": 0.9725461006164551,
      "learning_rate": 2.4551750318712027e-05,
      "loss": 0.8907,
      "step": 2055
    },
    {
      "epoch": 0.44434838988545494,
      "grad_norm": 0.8990496397018433,
      "learning_rate": 2.4538116629706314e-05,
      "loss": 0.8768,
      "step": 2056
    },
    {
      "epoch": 0.4445645126431813,
      "grad_norm": 1.0470227003097534,
      "learning_rate": 2.4524480717033936e-05,
      "loss": 0.8767,
      "step": 2057
    },
    {
      "epoch": 0.44478063540090773,
      "grad_norm": 0.9046476483345032,
      "learning_rate": 2.4510842587376465e-05,
      "loss": 0.9632,
      "step": 2058
    },
    {
      "epoch": 0.4449967581586341,
      "grad_norm": 0.8195945620536804,
      "learning_rate": 2.4497202247416557e-05,
      "loss": 0.8968,
      "step": 2059
    },
    {
      "epoch": 0.44521288091636047,
      "grad_norm": 0.9423304796218872,
      "learning_rate": 2.4483559703837943e-05,
      "loss": 0.8321,
      "step": 2060
    },
    {
      "epoch": 0.4454290036740869,
      "grad_norm": 0.9971717000007629,
      "learning_rate": 2.4469914963325444e-05,
      "loss": 0.7434,
      "step": 2061
    },
    {
      "epoch": 0.44564512643181325,
      "grad_norm": 0.8343095183372498,
      "learning_rate": 2.4456268032564935e-05,
      "loss": 0.7801,
      "step": 2062
    },
    {
      "epoch": 0.4458612491895397,
      "grad_norm": 0.9570486545562744,
      "learning_rate": 2.4442618918243398e-05,
      "loss": 1.0419,
      "step": 2063
    },
    {
      "epoch": 0.44607737194726604,
      "grad_norm": 0.9690583944320679,
      "learning_rate": 2.4428967627048857e-05,
      "loss": 0.8199,
      "step": 2064
    },
    {
      "epoch": 0.4462934947049924,
      "grad_norm": 0.8820160031318665,
      "learning_rate": 2.4415314165670423e-05,
      "loss": 0.8887,
      "step": 2065
    },
    {
      "epoch": 0.44650961746271883,
      "grad_norm": 0.9160499572753906,
      "learning_rate": 2.4401658540798247e-05,
      "loss": 0.7878,
      "step": 2066
    },
    {
      "epoch": 0.4467257402204452,
      "grad_norm": 0.9459227919578552,
      "learning_rate": 2.4388000759123573e-05,
      "loss": 0.8063,
      "step": 2067
    },
    {
      "epoch": 0.4469418629781716,
      "grad_norm": 0.8146785497665405,
      "learning_rate": 2.4374340827338653e-05,
      "loss": 0.7571,
      "step": 2068
    },
    {
      "epoch": 0.447157985735898,
      "grad_norm": 0.9509387612342834,
      "learning_rate": 2.4360678752136853e-05,
      "loss": 1.1469,
      "step": 2069
    },
    {
      "epoch": 0.44737410849362436,
      "grad_norm": 0.8361994028091431,
      "learning_rate": 2.4347014540212546e-05,
      "loss": 0.8913,
      "step": 2070
    },
    {
      "epoch": 0.4475902312513508,
      "grad_norm": 0.8761122822761536,
      "learning_rate": 2.4333348198261154e-05,
      "loss": 0.9557,
      "step": 2071
    },
    {
      "epoch": 0.44780635400907715,
      "grad_norm": 0.8106732964515686,
      "learning_rate": 2.4319679732979164e-05,
      "loss": 0.8319,
      "step": 2072
    },
    {
      "epoch": 0.44802247676680357,
      "grad_norm": 0.9187236428260803,
      "learning_rate": 2.4306009151064096e-05,
      "loss": 0.7749,
      "step": 2073
    },
    {
      "epoch": 0.44823859952452993,
      "grad_norm": 0.8334870934486389,
      "learning_rate": 2.4292336459214497e-05,
      "loss": 0.8025,
      "step": 2074
    },
    {
      "epoch": 0.4484547222822563,
      "grad_norm": 0.8454905152320862,
      "learning_rate": 2.427866166412995e-05,
      "loss": 0.8934,
      "step": 2075
    },
    {
      "epoch": 0.4486708450399827,
      "grad_norm": 0.9709123969078064,
      "learning_rate": 2.4264984772511085e-05,
      "loss": 0.958,
      "step": 2076
    },
    {
      "epoch": 0.4488869677977091,
      "grad_norm": 0.89371258020401,
      "learning_rate": 2.4251305791059533e-05,
      "loss": 0.8512,
      "step": 2077
    },
    {
      "epoch": 0.4491030905554355,
      "grad_norm": 0.8887743949890137,
      "learning_rate": 2.4237624726477976e-05,
      "loss": 0.8881,
      "step": 2078
    },
    {
      "epoch": 0.4493192133131619,
      "grad_norm": 0.808680534362793,
      "learning_rate": 2.4223941585470104e-05,
      "loss": 0.9046,
      "step": 2079
    },
    {
      "epoch": 0.44953533607088825,
      "grad_norm": 0.913142204284668,
      "learning_rate": 2.421025637474061e-05,
      "loss": 1.0584,
      "step": 2080
    },
    {
      "epoch": 0.44975145882861467,
      "grad_norm": 0.9678011536598206,
      "learning_rate": 2.4196569100995228e-05,
      "loss": 0.9738,
      "step": 2081
    },
    {
      "epoch": 0.44996758158634104,
      "grad_norm": 0.8861469030380249,
      "learning_rate": 2.4182879770940692e-05,
      "loss": 0.8452,
      "step": 2082
    },
    {
      "epoch": 0.4501837043440674,
      "grad_norm": 1.0149720907211304,
      "learning_rate": 2.4169188391284735e-05,
      "loss": 0.758,
      "step": 2083
    },
    {
      "epoch": 0.4503998271017938,
      "grad_norm": 0.8799240589141846,
      "learning_rate": 2.4155494968736104e-05,
      "loss": 0.8982,
      "step": 2084
    },
    {
      "epoch": 0.4506159498595202,
      "grad_norm": 0.9708576202392578,
      "learning_rate": 2.4141799510004545e-05,
      "loss": 0.842,
      "step": 2085
    },
    {
      "epoch": 0.4508320726172466,
      "grad_norm": 0.8632113337516785,
      "learning_rate": 2.4128102021800794e-05,
      "loss": 0.8216,
      "step": 2086
    },
    {
      "epoch": 0.451048195374973,
      "grad_norm": 0.9555045366287231,
      "learning_rate": 2.4114402510836605e-05,
      "loss": 1.0627,
      "step": 2087
    },
    {
      "epoch": 0.45126431813269935,
      "grad_norm": 0.9048724174499512,
      "learning_rate": 2.4100700983824687e-05,
      "loss": 0.8379,
      "step": 2088
    },
    {
      "epoch": 0.45148044089042577,
      "grad_norm": 0.9210740923881531,
      "learning_rate": 2.408699744747877e-05,
      "loss": 0.9444,
      "step": 2089
    },
    {
      "epoch": 0.45169656364815214,
      "grad_norm": 0.9148637652397156,
      "learning_rate": 2.407329190851356e-05,
      "loss": 0.8682,
      "step": 2090
    },
    {
      "epoch": 0.45191268640587856,
      "grad_norm": 0.9044589996337891,
      "learning_rate": 2.4059584373644724e-05,
      "loss": 0.8847,
      "step": 2091
    },
    {
      "epoch": 0.45212880916360493,
      "grad_norm": 1.0271599292755127,
      "learning_rate": 2.4045874849588932e-05,
      "loss": 1.0158,
      "step": 2092
    },
    {
      "epoch": 0.4523449319213313,
      "grad_norm": 0.9533768892288208,
      "learning_rate": 2.403216334306381e-05,
      "loss": 0.8622,
      "step": 2093
    },
    {
      "epoch": 0.4525610546790577,
      "grad_norm": 0.9236588478088379,
      "learning_rate": 2.4018449860787977e-05,
      "loss": 0.9078,
      "step": 2094
    },
    {
      "epoch": 0.4527771774367841,
      "grad_norm": 0.992451012134552,
      "learning_rate": 2.4004734409480996e-05,
      "loss": 1.0081,
      "step": 2095
    },
    {
      "epoch": 0.4529933001945105,
      "grad_norm": 0.8581631779670715,
      "learning_rate": 2.3991016995863417e-05,
      "loss": 0.7346,
      "step": 2096
    },
    {
      "epoch": 0.4532094229522369,
      "grad_norm": 0.8542599081993103,
      "learning_rate": 2.3977297626656736e-05,
      "loss": 0.9192,
      "step": 2097
    },
    {
      "epoch": 0.45342554570996324,
      "grad_norm": 0.9894627332687378,
      "learning_rate": 2.396357630858341e-05,
      "loss": 1.1239,
      "step": 2098
    },
    {
      "epoch": 0.45364166846768966,
      "grad_norm": 0.9991193413734436,
      "learning_rate": 2.394985304836686e-05,
      "loss": 0.9764,
      "step": 2099
    },
    {
      "epoch": 0.45385779122541603,
      "grad_norm": 0.9391309022903442,
      "learning_rate": 2.3936127852731436e-05,
      "loss": 0.8762,
      "step": 2100
    },
    {
      "epoch": 0.45407391398314245,
      "grad_norm": 0.908093273639679,
      "learning_rate": 2.392240072840247e-05,
      "loss": 0.9232,
      "step": 2101
    },
    {
      "epoch": 0.4542900367408688,
      "grad_norm": 1.0131136178970337,
      "learning_rate": 2.3908671682106218e-05,
      "loss": 0.9918,
      "step": 2102
    },
    {
      "epoch": 0.4545061594985952,
      "grad_norm": 0.9139040112495422,
      "learning_rate": 2.389494072056987e-05,
      "loss": 0.7649,
      "step": 2103
    },
    {
      "epoch": 0.4547222822563216,
      "grad_norm": 0.9638808369636536,
      "learning_rate": 2.3881207850521578e-05,
      "loss": 1.1865,
      "step": 2104
    },
    {
      "epoch": 0.454938405014048,
      "grad_norm": 0.8026013970375061,
      "learning_rate": 2.3867473078690412e-05,
      "loss": 0.9392,
      "step": 2105
    },
    {
      "epoch": 0.45515452777177434,
      "grad_norm": 0.8221445083618164,
      "learning_rate": 2.3853736411806372e-05,
      "loss": 0.8729,
      "step": 2106
    },
    {
      "epoch": 0.45537065052950076,
      "grad_norm": 0.9557278752326965,
      "learning_rate": 2.3839997856600405e-05,
      "loss": 1.0695,
      "step": 2107
    },
    {
      "epoch": 0.45558677328722713,
      "grad_norm": 1.0303809642791748,
      "learning_rate": 2.3826257419804364e-05,
      "loss": 1.0914,
      "step": 2108
    },
    {
      "epoch": 0.45580289604495355,
      "grad_norm": 0.8700131773948669,
      "learning_rate": 2.3812515108151027e-05,
      "loss": 0.7936,
      "step": 2109
    },
    {
      "epoch": 0.4560190188026799,
      "grad_norm": 1.0393489599227905,
      "learning_rate": 2.379877092837411e-05,
      "loss": 0.9687,
      "step": 2110
    },
    {
      "epoch": 0.4562351415604063,
      "grad_norm": 1.025715947151184,
      "learning_rate": 2.3785024887208207e-05,
      "loss": 1.0204,
      "step": 2111
    },
    {
      "epoch": 0.4564512643181327,
      "grad_norm": 0.8350749611854553,
      "learning_rate": 2.3771276991388864e-05,
      "loss": 0.99,
      "step": 2112
    },
    {
      "epoch": 0.4566673870758591,
      "grad_norm": 0.9822539687156677,
      "learning_rate": 2.3757527247652513e-05,
      "loss": 1.0197,
      "step": 2113
    },
    {
      "epoch": 0.4568835098335855,
      "grad_norm": 0.986997663974762,
      "learning_rate": 2.3743775662736504e-05,
      "loss": 0.9264,
      "step": 2114
    },
    {
      "epoch": 0.45709963259131187,
      "grad_norm": 0.8423007130622864,
      "learning_rate": 2.3730022243379063e-05,
      "loss": 0.8811,
      "step": 2115
    },
    {
      "epoch": 0.45731575534903823,
      "grad_norm": 0.893768310546875,
      "learning_rate": 2.3716266996319356e-05,
      "loss": 0.9342,
      "step": 2116
    },
    {
      "epoch": 0.45753187810676466,
      "grad_norm": 0.8970029950141907,
      "learning_rate": 2.3702509928297407e-05,
      "loss": 0.9518,
      "step": 2117
    },
    {
      "epoch": 0.457748000864491,
      "grad_norm": 0.9548612833023071,
      "learning_rate": 2.368875104605415e-05,
      "loss": 0.9881,
      "step": 2118
    },
    {
      "epoch": 0.45796412362221744,
      "grad_norm": 0.9271209836006165,
      "learning_rate": 2.367499035633141e-05,
      "loss": 0.8797,
      "step": 2119
    },
    {
      "epoch": 0.4581802463799438,
      "grad_norm": 0.8605367541313171,
      "learning_rate": 2.366122786587189e-05,
      "loss": 0.8788,
      "step": 2120
    },
    {
      "epoch": 0.4583963691376702,
      "grad_norm": 0.9081845879554749,
      "learning_rate": 2.364746358141918e-05,
      "loss": 0.8302,
      "step": 2121
    },
    {
      "epoch": 0.4586124918953966,
      "grad_norm": 0.8858135342597961,
      "learning_rate": 2.3633697509717745e-05,
      "loss": 0.9653,
      "step": 2122
    },
    {
      "epoch": 0.45882861465312297,
      "grad_norm": 1.0024068355560303,
      "learning_rate": 2.3619929657512934e-05,
      "loss": 0.988,
      "step": 2123
    },
    {
      "epoch": 0.45904473741084934,
      "grad_norm": 0.8974485993385315,
      "learning_rate": 2.3606160031550952e-05,
      "loss": 0.8367,
      "step": 2124
    },
    {
      "epoch": 0.45926086016857576,
      "grad_norm": 0.9284573793411255,
      "learning_rate": 2.35923886385789e-05,
      "loss": 1.1474,
      "step": 2125
    },
    {
      "epoch": 0.4594769829263021,
      "grad_norm": 0.85592120885849,
      "learning_rate": 2.357861548534471e-05,
      "loss": 0.9362,
      "step": 2126
    },
    {
      "epoch": 0.45969310568402855,
      "grad_norm": 0.926304042339325,
      "learning_rate": 2.3564840578597213e-05,
      "loss": 0.9677,
      "step": 2127
    },
    {
      "epoch": 0.4599092284417549,
      "grad_norm": 0.8898852467536926,
      "learning_rate": 2.3551063925086072e-05,
      "loss": 0.9228,
      "step": 2128
    },
    {
      "epoch": 0.4601253511994813,
      "grad_norm": 0.9166799783706665,
      "learning_rate": 2.353728553156181e-05,
      "loss": 0.9467,
      "step": 2129
    },
    {
      "epoch": 0.4603414739572077,
      "grad_norm": 0.9641197323799133,
      "learning_rate": 2.3523505404775825e-05,
      "loss": 0.8508,
      "step": 2130
    },
    {
      "epoch": 0.46055759671493407,
      "grad_norm": 1.0209652185440063,
      "learning_rate": 2.3509723551480325e-05,
      "loss": 0.9728,
      "step": 2131
    },
    {
      "epoch": 0.4607737194726605,
      "grad_norm": 0.8495245575904846,
      "learning_rate": 2.34959399784284e-05,
      "loss": 0.9475,
      "step": 2132
    },
    {
      "epoch": 0.46098984223038686,
      "grad_norm": 0.7956224679946899,
      "learning_rate": 2.348215469237397e-05,
      "loss": 0.9316,
      "step": 2133
    },
    {
      "epoch": 0.4612059649881132,
      "grad_norm": 1.0309728384017944,
      "learning_rate": 2.3468367700071796e-05,
      "loss": 0.8616,
      "step": 2134
    },
    {
      "epoch": 0.46142208774583965,
      "grad_norm": 0.7615361213684082,
      "learning_rate": 2.3454579008277457e-05,
      "loss": 0.8523,
      "step": 2135
    },
    {
      "epoch": 0.461638210503566,
      "grad_norm": 1.004491925239563,
      "learning_rate": 2.34407886237474e-05,
      "loss": 0.7608,
      "step": 2136
    },
    {
      "epoch": 0.46185433326129244,
      "grad_norm": 1.046006441116333,
      "learning_rate": 2.3426996553238865e-05,
      "loss": 0.9058,
      "step": 2137
    },
    {
      "epoch": 0.4620704560190188,
      "grad_norm": 0.9255813360214233,
      "learning_rate": 2.3413202803509938e-05,
      "loss": 0.8566,
      "step": 2138
    },
    {
      "epoch": 0.46228657877674517,
      "grad_norm": 0.902448296546936,
      "learning_rate": 2.3399407381319536e-05,
      "loss": 1.1862,
      "step": 2139
    },
    {
      "epoch": 0.4625027015344716,
      "grad_norm": 1.046591877937317,
      "learning_rate": 2.3385610293427367e-05,
      "loss": 1.0268,
      "step": 2140
    },
    {
      "epoch": 0.46271882429219796,
      "grad_norm": 0.8518416881561279,
      "learning_rate": 2.3371811546593985e-05,
      "loss": 0.8228,
      "step": 2141
    },
    {
      "epoch": 0.4629349470499244,
      "grad_norm": 0.8757534623146057,
      "learning_rate": 2.3358011147580738e-05,
      "loss": 1.0293,
      "step": 2142
    },
    {
      "epoch": 0.46315106980765075,
      "grad_norm": 0.8403485417366028,
      "learning_rate": 2.3344209103149792e-05,
      "loss": 0.9059,
      "step": 2143
    },
    {
      "epoch": 0.4633671925653771,
      "grad_norm": 1.0074067115783691,
      "learning_rate": 2.3330405420064116e-05,
      "loss": 0.84,
      "step": 2144
    },
    {
      "epoch": 0.46358331532310354,
      "grad_norm": 1.0459541082382202,
      "learning_rate": 2.3316600105087484e-05,
      "loss": 0.9625,
      "step": 2145
    },
    {
      "epoch": 0.4637994380808299,
      "grad_norm": 0.9329015016555786,
      "learning_rate": 2.3302793164984472e-05,
      "loss": 0.828,
      "step": 2146
    },
    {
      "epoch": 0.4640155608385563,
      "grad_norm": 0.8942865133285522,
      "learning_rate": 2.3288984606520435e-05,
      "loss": 0.7879,
      "step": 2147
    },
    {
      "epoch": 0.4642316835962827,
      "grad_norm": 0.8742052316665649,
      "learning_rate": 2.327517443646155e-05,
      "loss": 1.0176,
      "step": 2148
    },
    {
      "epoch": 0.46444780635400906,
      "grad_norm": 0.9404723048210144,
      "learning_rate": 2.3261362661574758e-05,
      "loss": 1.0162,
      "step": 2149
    },
    {
      "epoch": 0.4646639291117355,
      "grad_norm": 0.8055806159973145,
      "learning_rate": 2.3247549288627806e-05,
      "loss": 0.7548,
      "step": 2150
    },
    {
      "epoch": 0.46488005186946185,
      "grad_norm": 0.7803989052772522,
      "learning_rate": 2.3233734324389204e-05,
      "loss": 0.9598,
      "step": 2151
    },
    {
      "epoch": 0.4650961746271882,
      "grad_norm": 0.8580408096313477,
      "learning_rate": 2.321991777562826e-05,
      "loss": 0.7791,
      "step": 2152
    },
    {
      "epoch": 0.46531229738491464,
      "grad_norm": 0.91729736328125,
      "learning_rate": 2.320609964911505e-05,
      "loss": 0.8086,
      "step": 2153
    },
    {
      "epoch": 0.465528420142641,
      "grad_norm": 0.9715539813041687,
      "learning_rate": 2.3192279951620426e-05,
      "loss": 0.8756,
      "step": 2154
    },
    {
      "epoch": 0.46574454290036743,
      "grad_norm": 0.8068549633026123,
      "learning_rate": 2.3178458689916e-05,
      "loss": 0.6827,
      "step": 2155
    },
    {
      "epoch": 0.4659606656580938,
      "grad_norm": 0.909864068031311,
      "learning_rate": 2.3164635870774167e-05,
      "loss": 1.0626,
      "step": 2156
    },
    {
      "epoch": 0.46617678841582016,
      "grad_norm": 0.8749873042106628,
      "learning_rate": 2.3150811500968077e-05,
      "loss": 0.8645,
      "step": 2157
    },
    {
      "epoch": 0.4663929111735466,
      "grad_norm": 0.9503135681152344,
      "learning_rate": 2.3136985587271627e-05,
      "loss": 0.9609,
      "step": 2158
    },
    {
      "epoch": 0.46660903393127295,
      "grad_norm": 1.0307978391647339,
      "learning_rate": 2.31231581364595e-05,
      "loss": 0.9689,
      "step": 2159
    },
    {
      "epoch": 0.4668251566889994,
      "grad_norm": 1.094694972038269,
      "learning_rate": 2.3109329155307094e-05,
      "loss": 1.0066,
      "step": 2160
    },
    {
      "epoch": 0.46704127944672574,
      "grad_norm": 0.9437207579612732,
      "learning_rate": 2.3095498650590598e-05,
      "loss": 0.9423,
      "step": 2161
    },
    {
      "epoch": 0.4672574022044521,
      "grad_norm": 0.907027006149292,
      "learning_rate": 2.3081666629086918e-05,
      "loss": 0.9202,
      "step": 2162
    },
    {
      "epoch": 0.46747352496217853,
      "grad_norm": 1.0126712322235107,
      "learning_rate": 2.3067833097573713e-05,
      "loss": 1.0868,
      "step": 2163
    },
    {
      "epoch": 0.4676896477199049,
      "grad_norm": 0.8758916258811951,
      "learning_rate": 2.3053998062829375e-05,
      "loss": 1.0776,
      "step": 2164
    },
    {
      "epoch": 0.4679057704776313,
      "grad_norm": 1.004266619682312,
      "learning_rate": 2.3040161531633057e-05,
      "loss": 1.0596,
      "step": 2165
    },
    {
      "epoch": 0.4681218932353577,
      "grad_norm": 0.9461581707000732,
      "learning_rate": 2.3026323510764614e-05,
      "loss": 0.8754,
      "step": 2166
    },
    {
      "epoch": 0.46833801599308406,
      "grad_norm": 0.9111448526382446,
      "learning_rate": 2.3012484007004644e-05,
      "loss": 0.8873,
      "step": 2167
    },
    {
      "epoch": 0.4685541387508105,
      "grad_norm": 1.1857701539993286,
      "learning_rate": 2.299864302713447e-05,
      "loss": 1.0155,
      "step": 2168
    },
    {
      "epoch": 0.46877026150853685,
      "grad_norm": 0.9034219980239868,
      "learning_rate": 2.298480057793615e-05,
      "loss": 1.0139,
      "step": 2169
    },
    {
      "epoch": 0.4689863842662632,
      "grad_norm": 0.9284543991088867,
      "learning_rate": 2.297095666619245e-05,
      "loss": 0.9375,
      "step": 2170
    },
    {
      "epoch": 0.46920250702398963,
      "grad_norm": 0.9861460328102112,
      "learning_rate": 2.2957111298686844e-05,
      "loss": 1.1214,
      "step": 2171
    },
    {
      "epoch": 0.469418629781716,
      "grad_norm": 0.997794508934021,
      "learning_rate": 2.2943264482203544e-05,
      "loss": 0.831,
      "step": 2172
    },
    {
      "epoch": 0.4696347525394424,
      "grad_norm": 0.9758365750312805,
      "learning_rate": 2.2929416223527446e-05,
      "loss": 0.9791,
      "step": 2173
    },
    {
      "epoch": 0.4698508752971688,
      "grad_norm": 0.8575986623764038,
      "learning_rate": 2.2915566529444173e-05,
      "loss": 0.8343,
      "step": 2174
    },
    {
      "epoch": 0.47006699805489516,
      "grad_norm": 0.8302111625671387,
      "learning_rate": 2.2901715406740043e-05,
      "loss": 0.7926,
      "step": 2175
    },
    {
      "epoch": 0.4702831208126216,
      "grad_norm": 0.9717157483100891,
      "learning_rate": 2.2887862862202068e-05,
      "loss": 0.7629,
      "step": 2176
    },
    {
      "epoch": 0.47049924357034795,
      "grad_norm": 0.8666814565658569,
      "learning_rate": 2.2874008902617967e-05,
      "loss": 0.8743,
      "step": 2177
    },
    {
      "epoch": 0.47071536632807437,
      "grad_norm": 0.8030889630317688,
      "learning_rate": 2.2860153534776145e-05,
      "loss": 0.8479,
      "step": 2178
    },
    {
      "epoch": 0.47093148908580074,
      "grad_norm": 0.9813153147697449,
      "learning_rate": 2.2846296765465708e-05,
      "loss": 0.8502,
      "step": 2179
    },
    {
      "epoch": 0.4711476118435271,
      "grad_norm": 0.9990540742874146,
      "learning_rate": 2.2832438601476428e-05,
      "loss": 0.9455,
      "step": 2180
    },
    {
      "epoch": 0.4713637346012535,
      "grad_norm": 1.0254837274551392,
      "learning_rate": 2.2818579049598788e-05,
      "loss": 1.0668,
      "step": 2181
    },
    {
      "epoch": 0.4715798573589799,
      "grad_norm": 0.9308380484580994,
      "learning_rate": 2.2804718116623926e-05,
      "loss": 0.9167,
      "step": 2182
    },
    {
      "epoch": 0.4717959801167063,
      "grad_norm": 0.8804174065589905,
      "learning_rate": 2.2790855809343673e-05,
      "loss": 0.8824,
      "step": 2183
    },
    {
      "epoch": 0.4720121028744327,
      "grad_norm": 0.847017228603363,
      "learning_rate": 2.2776992134550517e-05,
      "loss": 0.7257,
      "step": 2184
    },
    {
      "epoch": 0.47222822563215905,
      "grad_norm": 0.9161202907562256,
      "learning_rate": 2.2763127099037646e-05,
      "loss": 0.8147,
      "step": 2185
    },
    {
      "epoch": 0.47244434838988547,
      "grad_norm": 0.9066073298454285,
      "learning_rate": 2.274926070959888e-05,
      "loss": 0.8774,
      "step": 2186
    },
    {
      "epoch": 0.47266047114761184,
      "grad_norm": 0.9424638152122498,
      "learning_rate": 2.2735392973028723e-05,
      "loss": 0.9729,
      "step": 2187
    },
    {
      "epoch": 0.4728765939053382,
      "grad_norm": 0.8449385166168213,
      "learning_rate": 2.272152389612233e-05,
      "loss": 0.8649,
      "step": 2188
    },
    {
      "epoch": 0.4730927166630646,
      "grad_norm": 0.8816748261451721,
      "learning_rate": 2.2707653485675513e-05,
      "loss": 1.0513,
      "step": 2189
    },
    {
      "epoch": 0.473308839420791,
      "grad_norm": 1.061385154724121,
      "learning_rate": 2.269378174848476e-05,
      "loss": 0.8477,
      "step": 2190
    },
    {
      "epoch": 0.4735249621785174,
      "grad_norm": 0.9070470929145813,
      "learning_rate": 2.2679908691347165e-05,
      "loss": 0.7769,
      "step": 2191
    },
    {
      "epoch": 0.4737410849362438,
      "grad_norm": 0.8117925524711609,
      "learning_rate": 2.2666034321060516e-05,
      "loss": 0.8018,
      "step": 2192
    },
    {
      "epoch": 0.47395720769397015,
      "grad_norm": 0.9357739090919495,
      "learning_rate": 2.265215864442321e-05,
      "loss": 0.8859,
      "step": 2193
    },
    {
      "epoch": 0.4741733304516966,
      "grad_norm": 1.0191253423690796,
      "learning_rate": 2.2638281668234295e-05,
      "loss": 0.9105,
      "step": 2194
    },
    {
      "epoch": 0.47438945320942294,
      "grad_norm": 0.8923472166061401,
      "learning_rate": 2.2624403399293464e-05,
      "loss": 0.8753,
      "step": 2195
    },
    {
      "epoch": 0.47460557596714936,
      "grad_norm": 1.0272793769836426,
      "learning_rate": 2.261052384440104e-05,
      "loss": 0.885,
      "step": 2196
    },
    {
      "epoch": 0.47482169872487573,
      "grad_norm": 0.9659473299980164,
      "learning_rate": 2.259664301035796e-05,
      "loss": 0.9349,
      "step": 2197
    },
    {
      "epoch": 0.4750378214826021,
      "grad_norm": 0.9282272458076477,
      "learning_rate": 2.25827609039658e-05,
      "loss": 0.9397,
      "step": 2198
    },
    {
      "epoch": 0.4752539442403285,
      "grad_norm": 1.0531755685806274,
      "learning_rate": 2.2568877532026785e-05,
      "loss": 0.7518,
      "step": 2199
    },
    {
      "epoch": 0.4754700669980549,
      "grad_norm": 0.8328658938407898,
      "learning_rate": 2.2554992901343705e-05,
      "loss": 0.7891,
      "step": 2200
    },
    {
      "epoch": 0.4756861897557813,
      "grad_norm": 1.065454125404358,
      "learning_rate": 2.2541107018720013e-05,
      "loss": 0.9156,
      "step": 2201
    },
    {
      "epoch": 0.4759023125135077,
      "grad_norm": 0.9001337289810181,
      "learning_rate": 2.2527219890959756e-05,
      "loss": 0.8919,
      "step": 2202
    },
    {
      "epoch": 0.47611843527123404,
      "grad_norm": 1.0516306161880493,
      "learning_rate": 2.2513331524867596e-05,
      "loss": 1.1024,
      "step": 2203
    },
    {
      "epoch": 0.47633455802896046,
      "grad_norm": 0.952760636806488,
      "learning_rate": 2.24994419272488e-05,
      "loss": 0.8666,
      "step": 2204
    },
    {
      "epoch": 0.47655068078668683,
      "grad_norm": 1.0019255876541138,
      "learning_rate": 2.2485551104909235e-05,
      "loss": 1.0784,
      "step": 2205
    },
    {
      "epoch": 0.47676680354441325,
      "grad_norm": 1.068312168121338,
      "learning_rate": 2.2471659064655375e-05,
      "loss": 1.0797,
      "step": 2206
    },
    {
      "epoch": 0.4769829263021396,
      "grad_norm": 1.2210317850112915,
      "learning_rate": 2.2457765813294285e-05,
      "loss": 0.7697,
      "step": 2207
    },
    {
      "epoch": 0.477199049059866,
      "grad_norm": 1.0743895769119263,
      "learning_rate": 2.2443871357633632e-05,
      "loss": 1.1007,
      "step": 2208
    },
    {
      "epoch": 0.4774151718175924,
      "grad_norm": 0.9332402348518372,
      "learning_rate": 2.2429975704481658e-05,
      "loss": 0.9549,
      "step": 2209
    },
    {
      "epoch": 0.4776312945753188,
      "grad_norm": 0.833015501499176,
      "learning_rate": 2.2416078860647213e-05,
      "loss": 0.8848,
      "step": 2210
    },
    {
      "epoch": 0.47784741733304514,
      "grad_norm": 0.9843903183937073,
      "learning_rate": 2.2402180832939707e-05,
      "loss": 0.9991,
      "step": 2211
    },
    {
      "epoch": 0.47806354009077157,
      "grad_norm": 0.8275328278541565,
      "learning_rate": 2.2388281628169146e-05,
      "loss": 0.8761,
      "step": 2212
    },
    {
      "epoch": 0.47827966284849793,
      "grad_norm": 1.0255450010299683,
      "learning_rate": 2.2374381253146105e-05,
      "loss": 0.9425,
      "step": 2213
    },
    {
      "epoch": 0.47849578560622436,
      "grad_norm": 1.0531729459762573,
      "learning_rate": 2.2360479714681745e-05,
      "loss": 1.0212,
      "step": 2214
    },
    {
      "epoch": 0.4787119083639507,
      "grad_norm": 0.8640594482421875,
      "learning_rate": 2.234657701958778e-05,
      "loss": 0.8951,
      "step": 2215
    },
    {
      "epoch": 0.4789280311216771,
      "grad_norm": 1.0316178798675537,
      "learning_rate": 2.2332673174676496e-05,
      "loss": 0.9412,
      "step": 2216
    },
    {
      "epoch": 0.4791441538794035,
      "grad_norm": 0.8753515481948853,
      "learning_rate": 2.2318768186760753e-05,
      "loss": 0.9821,
      "step": 2217
    },
    {
      "epoch": 0.4793602766371299,
      "grad_norm": 0.998913049697876,
      "learning_rate": 2.2304862062653956e-05,
      "loss": 1.0409,
      "step": 2218
    },
    {
      "epoch": 0.4795763993948563,
      "grad_norm": 0.9236776232719421,
      "learning_rate": 2.229095480917008e-05,
      "loss": 0.9509,
      "step": 2219
    },
    {
      "epoch": 0.47979252215258267,
      "grad_norm": 0.9642339944839478,
      "learning_rate": 2.2277046433123636e-05,
      "loss": 1.0345,
      "step": 2220
    },
    {
      "epoch": 0.48000864491030903,
      "grad_norm": 0.9492659568786621,
      "learning_rate": 2.226313694132971e-05,
      "loss": 0.8834,
      "step": 2221
    },
    {
      "epoch": 0.48022476766803546,
      "grad_norm": 0.8292040228843689,
      "learning_rate": 2.2249226340603913e-05,
      "loss": 0.7468,
      "step": 2222
    },
    {
      "epoch": 0.4804408904257618,
      "grad_norm": 0.9183518886566162,
      "learning_rate": 2.223531463776241e-05,
      "loss": 0.914,
      "step": 2223
    },
    {
      "epoch": 0.48065701318348825,
      "grad_norm": 0.960827648639679,
      "learning_rate": 2.22214018396219e-05,
      "loss": 0.8415,
      "step": 2224
    },
    {
      "epoch": 0.4808731359412146,
      "grad_norm": 0.932241678237915,
      "learning_rate": 2.2207487952999623e-05,
      "loss": 0.9552,
      "step": 2225
    },
    {
      "epoch": 0.481089258698941,
      "grad_norm": 0.9515604376792908,
      "learning_rate": 2.2193572984713356e-05,
      "loss": 0.8896,
      "step": 2226
    },
    {
      "epoch": 0.4813053814566674,
      "grad_norm": 0.9919021129608154,
      "learning_rate": 2.2179656941581387e-05,
      "loss": 0.8612,
      "step": 2227
    },
    {
      "epoch": 0.48152150421439377,
      "grad_norm": 0.9585627913475037,
      "learning_rate": 2.216573983042257e-05,
      "loss": 0.9442,
      "step": 2228
    },
    {
      "epoch": 0.48173762697212014,
      "grad_norm": 0.9216515421867371,
      "learning_rate": 2.2151821658056232e-05,
      "loss": 0.9371,
      "step": 2229
    },
    {
      "epoch": 0.48195374972984656,
      "grad_norm": 0.9168282151222229,
      "learning_rate": 2.2137902431302264e-05,
      "loss": 0.9961,
      "step": 2230
    },
    {
      "epoch": 0.4821698724875729,
      "grad_norm": 0.9793004989624023,
      "learning_rate": 2.2123982156981034e-05,
      "loss": 1.1558,
      "step": 2231
    },
    {
      "epoch": 0.48238599524529935,
      "grad_norm": 0.8328722715377808,
      "learning_rate": 2.2110060841913467e-05,
      "loss": 0.7743,
      "step": 2232
    },
    {
      "epoch": 0.4826021180030257,
      "grad_norm": 1.0485424995422363,
      "learning_rate": 2.2096138492920965e-05,
      "loss": 0.8943,
      "step": 2233
    },
    {
      "epoch": 0.4828182407607521,
      "grad_norm": 1.0185878276824951,
      "learning_rate": 2.2082215116825445e-05,
      "loss": 0.9977,
      "step": 2234
    },
    {
      "epoch": 0.4830343635184785,
      "grad_norm": 1.0318353176116943,
      "learning_rate": 2.2068290720449328e-05,
      "loss": 0.9433,
      "step": 2235
    },
    {
      "epoch": 0.48325048627620487,
      "grad_norm": 0.9102119207382202,
      "learning_rate": 2.2054365310615548e-05,
      "loss": 1.0616,
      "step": 2236
    },
    {
      "epoch": 0.4834666090339313,
      "grad_norm": 0.7391582131385803,
      "learning_rate": 2.2040438894147505e-05,
      "loss": 0.8957,
      "step": 2237
    },
    {
      "epoch": 0.48368273179165766,
      "grad_norm": 1.055592656135559,
      "learning_rate": 2.202651147786912e-05,
      "loss": 0.8464,
      "step": 2238
    },
    {
      "epoch": 0.483898854549384,
      "grad_norm": 1.039684534072876,
      "learning_rate": 2.2012583068604803e-05,
      "loss": 1.0491,
      "step": 2239
    },
    {
      "epoch": 0.48411497730711045,
      "grad_norm": 1.0695092678070068,
      "learning_rate": 2.1998653673179433e-05,
      "loss": 0.8996,
      "step": 2240
    },
    {
      "epoch": 0.4843311000648368,
      "grad_norm": 0.9700279235839844,
      "learning_rate": 2.1984723298418384e-05,
      "loss": 0.7883,
      "step": 2241
    },
    {
      "epoch": 0.48454722282256324,
      "grad_norm": 0.8059743046760559,
      "learning_rate": 2.1970791951147505e-05,
      "loss": 0.7191,
      "step": 2242
    },
    {
      "epoch": 0.4847633455802896,
      "grad_norm": 0.8079337477684021,
      "learning_rate": 2.1956859638193126e-05,
      "loss": 0.929,
      "step": 2243
    },
    {
      "epoch": 0.484979468338016,
      "grad_norm": 1.0118950605392456,
      "learning_rate": 2.194292636638205e-05,
      "loss": 0.8756,
      "step": 2244
    },
    {
      "epoch": 0.4851955910957424,
      "grad_norm": 0.9371973872184753,
      "learning_rate": 2.1928992142541545e-05,
      "loss": 0.8836,
      "step": 2245
    },
    {
      "epoch": 0.48541171385346876,
      "grad_norm": 0.9567400217056274,
      "learning_rate": 2.1915056973499346e-05,
      "loss": 0.9433,
      "step": 2246
    },
    {
      "epoch": 0.4856278366111952,
      "grad_norm": 1.198793888092041,
      "learning_rate": 2.190112086608365e-05,
      "loss": 1.1154,
      "step": 2247
    },
    {
      "epoch": 0.48584395936892155,
      "grad_norm": 0.9048296809196472,
      "learning_rate": 2.1887183827123143e-05,
      "loss": 0.7672,
      "step": 2248
    },
    {
      "epoch": 0.4860600821266479,
      "grad_norm": 1.0196088552474976,
      "learning_rate": 2.187324586344691e-05,
      "loss": 0.9443,
      "step": 2249
    },
    {
      "epoch": 0.48627620488437434,
      "grad_norm": 1.0643374919891357,
      "learning_rate": 2.1859306981884542e-05,
      "loss": 0.8723,
      "step": 2250
    },
    {
      "epoch": 0.4864923276421007,
      "grad_norm": 0.9235920310020447,
      "learning_rate": 2.184536718926604e-05,
      "loss": 0.7271,
      "step": 2251
    },
    {
      "epoch": 0.4867084503998271,
      "grad_norm": 0.9010453820228577,
      "learning_rate": 2.1831426492421893e-05,
      "loss": 0.9279,
      "step": 2252
    },
    {
      "epoch": 0.4869245731575535,
      "grad_norm": 0.8894018530845642,
      "learning_rate": 2.1817484898182992e-05,
      "loss": 0.8781,
      "step": 2253
    },
    {
      "epoch": 0.48714069591527986,
      "grad_norm": 0.8632429838180542,
      "learning_rate": 2.180354241338069e-05,
      "loss": 0.8908,
      "step": 2254
    },
    {
      "epoch": 0.4873568186730063,
      "grad_norm": 1.0111632347106934,
      "learning_rate": 2.1789599044846782e-05,
      "loss": 1.0216,
      "step": 2255
    },
    {
      "epoch": 0.48757294143073265,
      "grad_norm": 0.933199942111969,
      "learning_rate": 2.1775654799413476e-05,
      "loss": 0.8884,
      "step": 2256
    },
    {
      "epoch": 0.487789064188459,
      "grad_norm": 0.8441728353500366,
      "learning_rate": 2.1761709683913423e-05,
      "loss": 0.7231,
      "step": 2257
    },
    {
      "epoch": 0.48800518694618544,
      "grad_norm": 1.016108751296997,
      "learning_rate": 2.174776370517969e-05,
      "loss": 0.7251,
      "step": 2258
    },
    {
      "epoch": 0.4882213097039118,
      "grad_norm": 0.8854086399078369,
      "learning_rate": 2.1733816870045798e-05,
      "loss": 0.9986,
      "step": 2259
    },
    {
      "epoch": 0.48843743246163823,
      "grad_norm": 0.9269996881484985,
      "learning_rate": 2.1719869185345632e-05,
      "loss": 0.9333,
      "step": 2260
    },
    {
      "epoch": 0.4886535552193646,
      "grad_norm": 0.8737806081771851,
      "learning_rate": 2.170592065791355e-05,
      "loss": 0.9868,
      "step": 2261
    },
    {
      "epoch": 0.48886967797709097,
      "grad_norm": 0.9864827394485474,
      "learning_rate": 2.1691971294584282e-05,
      "loss": 0.9379,
      "step": 2262
    },
    {
      "epoch": 0.4890858007348174,
      "grad_norm": 0.9524204134941101,
      "learning_rate": 2.1678021102192996e-05,
      "loss": 0.7459,
      "step": 2263
    },
    {
      "epoch": 0.48930192349254376,
      "grad_norm": 0.9791752099990845,
      "learning_rate": 2.166407008757525e-05,
      "loss": 0.9494,
      "step": 2264
    },
    {
      "epoch": 0.4895180462502702,
      "grad_norm": 1.0480341911315918,
      "learning_rate": 2.1650118257567002e-05,
      "loss": 0.9778,
      "step": 2265
    },
    {
      "epoch": 0.48973416900799654,
      "grad_norm": 0.9262263774871826,
      "learning_rate": 2.163616561900463e-05,
      "loss": 0.7492,
      "step": 2266
    },
    {
      "epoch": 0.4899502917657229,
      "grad_norm": 0.9226260185241699,
      "learning_rate": 2.162221217872488e-05,
      "loss": 0.9377,
      "step": 2267
    },
    {
      "epoch": 0.49016641452344933,
      "grad_norm": 1.1801583766937256,
      "learning_rate": 2.160825794356492e-05,
      "loss": 0.8532,
      "step": 2268
    },
    {
      "epoch": 0.4903825372811757,
      "grad_norm": 0.9868795871734619,
      "learning_rate": 2.1594302920362276e-05,
      "loss": 1.0342,
      "step": 2269
    },
    {
      "epoch": 0.4905986600389021,
      "grad_norm": 0.9075852632522583,
      "learning_rate": 2.1580347115954896e-05,
      "loss": 0.7872,
      "step": 2270
    },
    {
      "epoch": 0.4908147827966285,
      "grad_norm": 0.9382511973381042,
      "learning_rate": 2.1566390537181075e-05,
      "loss": 0.7565,
      "step": 2271
    },
    {
      "epoch": 0.49103090555435486,
      "grad_norm": 0.8777767419815063,
      "learning_rate": 2.1552433190879512e-05,
      "loss": 0.736,
      "step": 2272
    },
    {
      "epoch": 0.4912470283120813,
      "grad_norm": 0.7822272777557373,
      "learning_rate": 2.1538475083889278e-05,
      "loss": 0.6991,
      "step": 2273
    },
    {
      "epoch": 0.49146315106980765,
      "grad_norm": 0.8870785236358643,
      "learning_rate": 2.152451622304981e-05,
      "loss": 0.8687,
      "step": 2274
    },
    {
      "epoch": 0.491679273827534,
      "grad_norm": 1.043366551399231,
      "learning_rate": 2.1510556615200917e-05,
      "loss": 0.891,
      "step": 2275
    },
    {
      "epoch": 0.49189539658526044,
      "grad_norm": 0.7973032593727112,
      "learning_rate": 2.1496596267182773e-05,
      "loss": 0.8404,
      "step": 2276
    },
    {
      "epoch": 0.4921115193429868,
      "grad_norm": 0.9199157953262329,
      "learning_rate": 2.1482635185835917e-05,
      "loss": 1.0426,
      "step": 2277
    },
    {
      "epoch": 0.4923276421007132,
      "grad_norm": 1.0459051132202148,
      "learning_rate": 2.1468673378001242e-05,
      "loss": 0.8554,
      "step": 2278
    },
    {
      "epoch": 0.4925437648584396,
      "grad_norm": 0.9541643261909485,
      "learning_rate": 2.1454710850520016e-05,
      "loss": 0.9347,
      "step": 2279
    },
    {
      "epoch": 0.49275988761616596,
      "grad_norm": 0.9007477164268494,
      "learning_rate": 2.144074761023383e-05,
      "loss": 0.9255,
      "step": 2280
    },
    {
      "epoch": 0.4929760103738924,
      "grad_norm": 0.8977211117744446,
      "learning_rate": 2.1426783663984648e-05,
      "loss": 0.9632,
      "step": 2281
    },
    {
      "epoch": 0.49319213313161875,
      "grad_norm": 0.8943268656730652,
      "learning_rate": 2.141281901861477e-05,
      "loss": 0.8649,
      "step": 2282
    },
    {
      "epoch": 0.49340825588934517,
      "grad_norm": 0.9033158421516418,
      "learning_rate": 2.139885368096684e-05,
      "loss": 0.8529,
      "step": 2283
    },
    {
      "epoch": 0.49362437864707154,
      "grad_norm": 0.8825488686561584,
      "learning_rate": 2.1384887657883836e-05,
      "loss": 0.9252,
      "step": 2284
    },
    {
      "epoch": 0.4938405014047979,
      "grad_norm": 0.8469288349151611,
      "learning_rate": 2.137092095620908e-05,
      "loss": 0.8682,
      "step": 2285
    },
    {
      "epoch": 0.4940566241625243,
      "grad_norm": 0.9247711300849915,
      "learning_rate": 2.135695358278623e-05,
      "loss": 0.7958,
      "step": 2286
    },
    {
      "epoch": 0.4942727469202507,
      "grad_norm": 0.8183013796806335,
      "learning_rate": 2.1342985544459258e-05,
      "loss": 0.8834,
      "step": 2287
    },
    {
      "epoch": 0.4944888696779771,
      "grad_norm": 0.8992856740951538,
      "learning_rate": 2.132901684807248e-05,
      "loss": 1.0267,
      "step": 2288
    },
    {
      "epoch": 0.4947049924357035,
      "grad_norm": 0.9574567079544067,
      "learning_rate": 2.1315047500470505e-05,
      "loss": 0.9439,
      "step": 2289
    },
    {
      "epoch": 0.49492111519342985,
      "grad_norm": 0.9456168413162231,
      "learning_rate": 2.1301077508498305e-05,
      "loss": 0.9476,
      "step": 2290
    },
    {
      "epoch": 0.4951372379511563,
      "grad_norm": 0.8847987055778503,
      "learning_rate": 2.128710687900113e-05,
      "loss": 0.9578,
      "step": 2291
    },
    {
      "epoch": 0.49535336070888264,
      "grad_norm": 1.0706462860107422,
      "learning_rate": 2.1273135618824562e-05,
      "loss": 1.0467,
      "step": 2292
    },
    {
      "epoch": 0.495569483466609,
      "grad_norm": 0.9031651020050049,
      "learning_rate": 2.1259163734814482e-05,
      "loss": 0.7507,
      "step": 2293
    },
    {
      "epoch": 0.49578560622433543,
      "grad_norm": 1.0642560720443726,
      "learning_rate": 2.1245191233817085e-05,
      "loss": 0.9923,
      "step": 2294
    },
    {
      "epoch": 0.4960017289820618,
      "grad_norm": 1.0771523714065552,
      "learning_rate": 2.1231218122678862e-05,
      "loss": 1.0417,
      "step": 2295
    },
    {
      "epoch": 0.4962178517397882,
      "grad_norm": 0.9673963785171509,
      "learning_rate": 2.1217244408246605e-05,
      "loss": 0.8142,
      "step": 2296
    },
    {
      "epoch": 0.4964339744975146,
      "grad_norm": 0.8511964082717896,
      "learning_rate": 2.1203270097367398e-05,
      "loss": 0.9035,
      "step": 2297
    },
    {
      "epoch": 0.49665009725524095,
      "grad_norm": 1.0219144821166992,
      "learning_rate": 2.1189295196888624e-05,
      "loss": 0.9958,
      "step": 2298
    },
    {
      "epoch": 0.4968662200129674,
      "grad_norm": 0.8596683144569397,
      "learning_rate": 2.117531971365796e-05,
      "loss": 0.8375,
      "step": 2299
    },
    {
      "epoch": 0.49708234277069374,
      "grad_norm": 1.0385677814483643,
      "learning_rate": 2.1161343654523346e-05,
      "loss": 0.8623,
      "step": 2300
    },
    {
      "epoch": 0.49729846552842016,
      "grad_norm": 0.8668344020843506,
      "learning_rate": 2.114736702633303e-05,
      "loss": 0.905,
      "step": 2301
    },
    {
      "epoch": 0.49751458828614653,
      "grad_norm": 0.9759607315063477,
      "learning_rate": 2.113338983593552e-05,
      "loss": 0.8899,
      "step": 2302
    },
    {
      "epoch": 0.4977307110438729,
      "grad_norm": 0.9088861346244812,
      "learning_rate": 2.1119412090179616e-05,
      "loss": 1.0605,
      "step": 2303
    },
    {
      "epoch": 0.4979468338015993,
      "grad_norm": 0.8826245069503784,
      "learning_rate": 2.110543379591437e-05,
      "loss": 0.8327,
      "step": 2304
    },
    {
      "epoch": 0.4981629565593257,
      "grad_norm": 0.9717110395431519,
      "learning_rate": 2.109145495998912e-05,
      "loss": 0.9719,
      "step": 2305
    },
    {
      "epoch": 0.4983790793170521,
      "grad_norm": 0.9280117154121399,
      "learning_rate": 2.107747558925347e-05,
      "loss": 0.897,
      "step": 2306
    },
    {
      "epoch": 0.4985952020747785,
      "grad_norm": 0.9122903943061829,
      "learning_rate": 2.106349569055726e-05,
      "loss": 0.9383,
      "step": 2307
    },
    {
      "epoch": 0.49881132483250484,
      "grad_norm": 1.0483940839767456,
      "learning_rate": 2.1049515270750632e-05,
      "loss": 0.8508,
      "step": 2308
    },
    {
      "epoch": 0.49902744759023127,
      "grad_norm": 0.860085129737854,
      "learning_rate": 2.1035534336683936e-05,
      "loss": 0.9986,
      "step": 2309
    },
    {
      "epoch": 0.49924357034795763,
      "grad_norm": 0.9422728419303894,
      "learning_rate": 2.1021552895207815e-05,
      "loss": 0.9296,
      "step": 2310
    },
    {
      "epoch": 0.49945969310568405,
      "grad_norm": 0.8932642936706543,
      "learning_rate": 2.100757095317314e-05,
      "loss": 1.1715,
      "step": 2311
    },
    {
      "epoch": 0.4996758158634104,
      "grad_norm": 0.9482457637786865,
      "learning_rate": 2.0993588517431024e-05,
      "loss": 0.9304,
      "step": 2312
    },
    {
      "epoch": 0.4998919386211368,
      "grad_norm": 1.0164151191711426,
      "learning_rate": 2.097960559483283e-05,
      "loss": 1.05,
      "step": 2313
    },
    {
      "epoch": 0.5001080613788632,
      "grad_norm": 1.0282493829727173,
      "learning_rate": 2.0965622192230158e-05,
      "loss": 1.0176,
      "step": 2314
    },
    {
      "epoch": 0.5003241841365896,
      "grad_norm": 0.8830835223197937,
      "learning_rate": 2.095163831647485e-05,
      "loss": 0.773,
      "step": 2315
    },
    {
      "epoch": 0.500540306894316,
      "grad_norm": 0.9088915586471558,
      "learning_rate": 2.0937653974418963e-05,
      "loss": 0.9497,
      "step": 2316
    },
    {
      "epoch": 0.5007564296520424,
      "grad_norm": 0.9781036376953125,
      "learning_rate": 2.0923669172914796e-05,
      "loss": 1.0345,
      "step": 2317
    },
    {
      "epoch": 0.5009725524097688,
      "grad_norm": 0.8619513511657715,
      "learning_rate": 2.0909683918814867e-05,
      "loss": 0.8277,
      "step": 2318
    },
    {
      "epoch": 0.5011886751674951,
      "grad_norm": 0.9903712868690491,
      "learning_rate": 2.0895698218971927e-05,
      "loss": 0.9553,
      "step": 2319
    },
    {
      "epoch": 0.5014047979252215,
      "grad_norm": 0.8601084351539612,
      "learning_rate": 2.088171208023892e-05,
      "loss": 0.7103,
      "step": 2320
    },
    {
      "epoch": 0.501620920682948,
      "grad_norm": 1.0813382863998413,
      "learning_rate": 2.0867725509469042e-05,
      "loss": 0.8895,
      "step": 2321
    },
    {
      "epoch": 0.5018370434406743,
      "grad_norm": 0.9440907835960388,
      "learning_rate": 2.0853738513515663e-05,
      "loss": 0.8573,
      "step": 2322
    },
    {
      "epoch": 0.5020531661984007,
      "grad_norm": 1.0963239669799805,
      "learning_rate": 2.0839751099232392e-05,
      "loss": 0.9476,
      "step": 2323
    },
    {
      "epoch": 0.5022692889561271,
      "grad_norm": 1.0190849304199219,
      "learning_rate": 2.0825763273473022e-05,
      "loss": 0.9648,
      "step": 2324
    },
    {
      "epoch": 0.5024854117138534,
      "grad_norm": 0.9341250061988831,
      "learning_rate": 2.081177504309156e-05,
      "loss": 0.8598,
      "step": 2325
    },
    {
      "epoch": 0.5027015344715798,
      "grad_norm": 1.0032615661621094,
      "learning_rate": 2.0797786414942197e-05,
      "loss": 1.0656,
      "step": 2326
    },
    {
      "epoch": 0.5029176572293063,
      "grad_norm": 0.8984432816505432,
      "learning_rate": 2.078379739587933e-05,
      "loss": 0.7449,
      "step": 2327
    },
    {
      "epoch": 0.5031337799870327,
      "grad_norm": 0.8945673704147339,
      "learning_rate": 2.0769807992757568e-05,
      "loss": 0.834,
      "step": 2328
    },
    {
      "epoch": 0.503349902744759,
      "grad_norm": 0.8658612966537476,
      "learning_rate": 2.0755818212431653e-05,
      "loss": 0.8557,
      "step": 2329
    },
    {
      "epoch": 0.5035660255024854,
      "grad_norm": 0.9491671323776245,
      "learning_rate": 2.074182806175657e-05,
      "loss": 1.0323,
      "step": 2330
    },
    {
      "epoch": 0.5037821482602118,
      "grad_norm": 0.8965189456939697,
      "learning_rate": 2.0727837547587447e-05,
      "loss": 0.9245,
      "step": 2331
    },
    {
      "epoch": 0.5039982710179381,
      "grad_norm": 0.9609273076057434,
      "learning_rate": 2.0713846676779613e-05,
      "loss": 0.7758,
      "step": 2332
    },
    {
      "epoch": 0.5042143937756646,
      "grad_norm": 0.992252767086029,
      "learning_rate": 2.0699855456188555e-05,
      "loss": 1.0365,
      "step": 2333
    },
    {
      "epoch": 0.504430516533391,
      "grad_norm": 0.907981812953949,
      "learning_rate": 2.068586389266994e-05,
      "loss": 0.9384,
      "step": 2334
    },
    {
      "epoch": 0.5046466392911173,
      "grad_norm": 0.8060364723205566,
      "learning_rate": 2.0671871993079606e-05,
      "loss": 0.9055,
      "step": 2335
    },
    {
      "epoch": 0.5048627620488437,
      "grad_norm": 1.042852520942688,
      "learning_rate": 2.0657879764273546e-05,
      "loss": 1.0777,
      "step": 2336
    },
    {
      "epoch": 0.5050788848065701,
      "grad_norm": 0.8662554025650024,
      "learning_rate": 2.064388721310792e-05,
      "loss": 0.9275,
      "step": 2337
    },
    {
      "epoch": 0.5052950075642966,
      "grad_norm": 0.9479020833969116,
      "learning_rate": 2.062989434643905e-05,
      "loss": 0.9244,
      "step": 2338
    },
    {
      "epoch": 0.5055111303220229,
      "grad_norm": 0.978425920009613,
      "learning_rate": 2.061590117112341e-05,
      "loss": 0.8059,
      "step": 2339
    },
    {
      "epoch": 0.5057272530797493,
      "grad_norm": 0.919092059135437,
      "learning_rate": 2.0601907694017617e-05,
      "loss": 0.915,
      "step": 2340
    },
    {
      "epoch": 0.5059433758374757,
      "grad_norm": 1.0036684274673462,
      "learning_rate": 2.0587913921978445e-05,
      "loss": 0.8228,
      "step": 2341
    },
    {
      "epoch": 0.506159498595202,
      "grad_norm": 0.978500247001648,
      "learning_rate": 2.0573919861862812e-05,
      "loss": 0.9381,
      "step": 2342
    },
    {
      "epoch": 0.5063756213529285,
      "grad_norm": 0.9454809427261353,
      "learning_rate": 2.055992552052777e-05,
      "loss": 0.9983,
      "step": 2343
    },
    {
      "epoch": 0.5065917441106549,
      "grad_norm": 1.0295363664627075,
      "learning_rate": 2.054593090483052e-05,
      "loss": 1.0686,
      "step": 2344
    },
    {
      "epoch": 0.5068078668683812,
      "grad_norm": 0.8806283473968506,
      "learning_rate": 2.053193602162839e-05,
      "loss": 0.899,
      "step": 2345
    },
    {
      "epoch": 0.5070239896261076,
      "grad_norm": 0.9531514048576355,
      "learning_rate": 2.051794087777884e-05,
      "loss": 0.9833,
      "step": 2346
    },
    {
      "epoch": 0.507240112383834,
      "grad_norm": 0.9447773098945618,
      "learning_rate": 2.050394548013945e-05,
      "loss": 0.9393,
      "step": 2347
    },
    {
      "epoch": 0.5074562351415604,
      "grad_norm": 0.9605883955955505,
      "learning_rate": 2.048994983556795e-05,
      "loss": 1.005,
      "step": 2348
    },
    {
      "epoch": 0.5076723578992868,
      "grad_norm": 0.9409863948822021,
      "learning_rate": 2.0475953950922148e-05,
      "loss": 0.7372,
      "step": 2349
    },
    {
      "epoch": 0.5078884806570132,
      "grad_norm": 0.9915863871574402,
      "learning_rate": 2.0461957833060025e-05,
      "loss": 0.9475,
      "step": 2350
    },
    {
      "epoch": 0.5081046034147396,
      "grad_norm": 0.8699820041656494,
      "learning_rate": 2.0447961488839625e-05,
      "loss": 0.7303,
      "step": 2351
    },
    {
      "epoch": 0.5083207261724659,
      "grad_norm": 0.8779844641685486,
      "learning_rate": 2.0433964925119132e-05,
      "loss": 0.936,
      "step": 2352
    },
    {
      "epoch": 0.5085368489301924,
      "grad_norm": 1.072743535041809,
      "learning_rate": 2.041996814875683e-05,
      "loss": 1.0447,
      "step": 2353
    },
    {
      "epoch": 0.5087529716879188,
      "grad_norm": 1.0182307958602905,
      "learning_rate": 2.0405971166611108e-05,
      "loss": 0.8995,
      "step": 2354
    },
    {
      "epoch": 0.5089690944456451,
      "grad_norm": 0.9541364908218384,
      "learning_rate": 2.039197398554045e-05,
      "loss": 0.9084,
      "step": 2355
    },
    {
      "epoch": 0.5091852172033715,
      "grad_norm": 0.9599238038063049,
      "learning_rate": 2.0377976612403443e-05,
      "loss": 0.8955,
      "step": 2356
    },
    {
      "epoch": 0.5094013399610979,
      "grad_norm": 0.8445461988449097,
      "learning_rate": 2.0363979054058777e-05,
      "loss": 0.8298,
      "step": 2357
    },
    {
      "epoch": 0.5096174627188242,
      "grad_norm": 0.9038436412811279,
      "learning_rate": 2.0349981317365205e-05,
      "loss": 0.7916,
      "step": 2358
    },
    {
      "epoch": 0.5098335854765507,
      "grad_norm": 0.8970431089401245,
      "learning_rate": 2.0335983409181606e-05,
      "loss": 0.9355,
      "step": 2359
    },
    {
      "epoch": 0.5100497082342771,
      "grad_norm": 0.934702455997467,
      "learning_rate": 2.0321985336366906e-05,
      "loss": 0.8184,
      "step": 2360
    },
    {
      "epoch": 0.5102658309920035,
      "grad_norm": 0.8944525718688965,
      "learning_rate": 2.0307987105780138e-05,
      "loss": 0.9199,
      "step": 2361
    },
    {
      "epoch": 0.5104819537497298,
      "grad_norm": 0.9491285681724548,
      "learning_rate": 2.0293988724280404e-05,
      "loss": 0.9956,
      "step": 2362
    },
    {
      "epoch": 0.5106980765074562,
      "grad_norm": 0.8704890608787537,
      "learning_rate": 2.027999019872687e-05,
      "loss": 0.8784,
      "step": 2363
    },
    {
      "epoch": 0.5109141992651827,
      "grad_norm": 0.8883909583091736,
      "learning_rate": 2.026599153597879e-05,
      "loss": 0.8123,
      "step": 2364
    },
    {
      "epoch": 0.511130322022909,
      "grad_norm": 0.8475587964057922,
      "learning_rate": 2.025199274289547e-05,
      "loss": 0.89,
      "step": 2365
    },
    {
      "epoch": 0.5113464447806354,
      "grad_norm": 0.888871431350708,
      "learning_rate": 2.023799382633629e-05,
      "loss": 1.0221,
      "step": 2366
    },
    {
      "epoch": 0.5115625675383618,
      "grad_norm": 0.846250057220459,
      "learning_rate": 2.0223994793160678e-05,
      "loss": 0.8937,
      "step": 2367
    },
    {
      "epoch": 0.5117786902960881,
      "grad_norm": 0.9950271844863892,
      "learning_rate": 2.0209995650228146e-05,
      "loss": 0.8609,
      "step": 2368
    },
    {
      "epoch": 0.5119948130538146,
      "grad_norm": 0.9442114233970642,
      "learning_rate": 2.0195996404398222e-05,
      "loss": 0.9993,
      "step": 2369
    },
    {
      "epoch": 0.512210935811541,
      "grad_norm": 0.8403794169425964,
      "learning_rate": 2.0181997062530513e-05,
      "loss": 0.9092,
      "step": 2370
    },
    {
      "epoch": 0.5124270585692673,
      "grad_norm": 0.8744664192199707,
      "learning_rate": 2.016799763148467e-05,
      "loss": 0.9722,
      "step": 2371
    },
    {
      "epoch": 0.5126431813269937,
      "grad_norm": 0.9275670051574707,
      "learning_rate": 2.0153998118120376e-05,
      "loss": 0.9156,
      "step": 2372
    },
    {
      "epoch": 0.5128593040847201,
      "grad_norm": 0.8707650303840637,
      "learning_rate": 2.0139998529297355e-05,
      "loss": 0.921,
      "step": 2373
    },
    {
      "epoch": 0.5130754268424466,
      "grad_norm": 1.0175389051437378,
      "learning_rate": 2.0125998871875385e-05,
      "loss": 1.0841,
      "step": 2374
    },
    {
      "epoch": 0.5132915496001729,
      "grad_norm": 0.9202478528022766,
      "learning_rate": 2.0111999152714254e-05,
      "loss": 1.1347,
      "step": 2375
    },
    {
      "epoch": 0.5135076723578993,
      "grad_norm": 0.929257869720459,
      "learning_rate": 2.00979993786738e-05,
      "loss": 0.9207,
      "step": 2376
    },
    {
      "epoch": 0.5137237951156257,
      "grad_norm": 0.9317587614059448,
      "learning_rate": 2.0083999556613874e-05,
      "loss": 0.8966,
      "step": 2377
    },
    {
      "epoch": 0.513939917873352,
      "grad_norm": 1.0302447080612183,
      "learning_rate": 2.0069999693394354e-05,
      "loss": 0.808,
      "step": 2378
    },
    {
      "epoch": 0.5141560406310784,
      "grad_norm": 0.9321349859237671,
      "learning_rate": 2.005599979587516e-05,
      "loss": 0.8473,
      "step": 2379
    },
    {
      "epoch": 0.5143721633888049,
      "grad_norm": 1.0263248682022095,
      "learning_rate": 2.0041999870916186e-05,
      "loss": 1.0613,
      "step": 2380
    },
    {
      "epoch": 0.5145882861465312,
      "grad_norm": 0.9499816298484802,
      "learning_rate": 2.0027999925377375e-05,
      "loss": 0.9073,
      "step": 2381
    },
    {
      "epoch": 0.5148044089042576,
      "grad_norm": 1.3729811906814575,
      "learning_rate": 2.0013999966118664e-05,
      "loss": 0.8664,
      "step": 2382
    },
    {
      "epoch": 0.515020531661984,
      "grad_norm": 0.9456096887588501,
      "learning_rate": 2e-05,
      "loss": 0.9954,
      "step": 2383
    },
    {
      "epoch": 0.5152366544197104,
      "grad_norm": 1.0228707790374756,
      "learning_rate": 1.998600003388134e-05,
      "loss": 1.041,
      "step": 2384
    },
    {
      "epoch": 0.5154527771774368,
      "grad_norm": 0.8717309832572937,
      "learning_rate": 1.9972000074622628e-05,
      "loss": 0.9667,
      "step": 2385
    },
    {
      "epoch": 0.5156688999351632,
      "grad_norm": 0.8505531549453735,
      "learning_rate": 1.995800012908382e-05,
      "loss": 0.9104,
      "step": 2386
    },
    {
      "epoch": 0.5158850226928896,
      "grad_norm": 0.8359039425849915,
      "learning_rate": 1.9944000204124848e-05,
      "loss": 0.8077,
      "step": 2387
    },
    {
      "epoch": 0.5161011454506159,
      "grad_norm": 0.9694004058837891,
      "learning_rate": 1.993000030660565e-05,
      "loss": 1.0607,
      "step": 2388
    },
    {
      "epoch": 0.5163172682083423,
      "grad_norm": 0.8425354957580566,
      "learning_rate": 1.9916000443386132e-05,
      "loss": 0.7214,
      "step": 2389
    },
    {
      "epoch": 0.5165333909660688,
      "grad_norm": 1.0435991287231445,
      "learning_rate": 1.9902000621326206e-05,
      "loss": 0.9371,
      "step": 2390
    },
    {
      "epoch": 0.5167495137237951,
      "grad_norm": 1.007751703262329,
      "learning_rate": 1.9888000847285753e-05,
      "loss": 1.0005,
      "step": 2391
    },
    {
      "epoch": 0.5169656364815215,
      "grad_norm": 1.0336477756500244,
      "learning_rate": 1.987400112812463e-05,
      "loss": 0.9929,
      "step": 2392
    },
    {
      "epoch": 0.5171817592392479,
      "grad_norm": 0.9204444885253906,
      "learning_rate": 1.986000147070265e-05,
      "loss": 0.863,
      "step": 2393
    },
    {
      "epoch": 0.5173978819969742,
      "grad_norm": 1.0044440031051636,
      "learning_rate": 1.984600188187963e-05,
      "loss": 1.0805,
      "step": 2394
    },
    {
      "epoch": 0.5176140047547007,
      "grad_norm": 0.9487243890762329,
      "learning_rate": 1.9832002368515336e-05,
      "loss": 0.8167,
      "step": 2395
    },
    {
      "epoch": 0.5178301275124271,
      "grad_norm": 0.9148545265197754,
      "learning_rate": 1.9818002937469484e-05,
      "loss": 0.9394,
      "step": 2396
    },
    {
      "epoch": 0.5180462502701535,
      "grad_norm": 1.1300163269042969,
      "learning_rate": 1.9804003595601778e-05,
      "loss": 0.9321,
      "step": 2397
    },
    {
      "epoch": 0.5182623730278798,
      "grad_norm": 0.9817214012145996,
      "learning_rate": 1.9790004349771864e-05,
      "loss": 0.9786,
      "step": 2398
    },
    {
      "epoch": 0.5184784957856062,
      "grad_norm": 0.9644269943237305,
      "learning_rate": 1.977600520683933e-05,
      "loss": 1.0304,
      "step": 2399
    },
    {
      "epoch": 0.5186946185433327,
      "grad_norm": 0.9276800751686096,
      "learning_rate": 1.9762006173663717e-05,
      "loss": 1.1313,
      "step": 2400
    },
    {
      "epoch": 0.518910741301059,
      "grad_norm": 0.9133787155151367,
      "learning_rate": 1.974800725710454e-05,
      "loss": 0.7661,
      "step": 2401
    },
    {
      "epoch": 0.5191268640587854,
      "grad_norm": 0.8322534561157227,
      "learning_rate": 1.9734008464021216e-05,
      "loss": 0.8939,
      "step": 2402
    },
    {
      "epoch": 0.5193429868165118,
      "grad_norm": 0.8768430352210999,
      "learning_rate": 1.972000980127313e-05,
      "loss": 0.8087,
      "step": 2403
    },
    {
      "epoch": 0.5195591095742381,
      "grad_norm": 0.8987047076225281,
      "learning_rate": 1.9706011275719603e-05,
      "loss": 0.9341,
      "step": 2404
    },
    {
      "epoch": 0.5197752323319645,
      "grad_norm": 0.9694629311561584,
      "learning_rate": 1.969201289421987e-05,
      "loss": 0.9099,
      "step": 2405
    },
    {
      "epoch": 0.519991355089691,
      "grad_norm": 0.8979818224906921,
      "learning_rate": 1.96780146636331e-05,
      "loss": 0.8387,
      "step": 2406
    },
    {
      "epoch": 0.5202074778474174,
      "grad_norm": 1.0015891790390015,
      "learning_rate": 1.96640165908184e-05,
      "loss": 1.0356,
      "step": 2407
    },
    {
      "epoch": 0.5204236006051437,
      "grad_norm": 0.8676528930664062,
      "learning_rate": 1.96500186826348e-05,
      "loss": 0.9171,
      "step": 2408
    },
    {
      "epoch": 0.5206397233628701,
      "grad_norm": 1.0528929233551025,
      "learning_rate": 1.9636020945941236e-05,
      "loss": 1.0244,
      "step": 2409
    },
    {
      "epoch": 0.5208558461205965,
      "grad_norm": 0.8272160887718201,
      "learning_rate": 1.9622023387596563e-05,
      "loss": 0.7513,
      "step": 2410
    },
    {
      "epoch": 0.5210719688783229,
      "grad_norm": 1.0948705673217773,
      "learning_rate": 1.9608026014459554e-05,
      "loss": 0.9408,
      "step": 2411
    },
    {
      "epoch": 0.5212880916360493,
      "grad_norm": 0.9872722625732422,
      "learning_rate": 1.95940288333889e-05,
      "loss": 0.9502,
      "step": 2412
    },
    {
      "epoch": 0.5215042143937757,
      "grad_norm": 0.8445034623146057,
      "learning_rate": 1.9580031851243176e-05,
      "loss": 0.8021,
      "step": 2413
    },
    {
      "epoch": 0.521720337151502,
      "grad_norm": 0.8690013289451599,
      "learning_rate": 1.9566035074880868e-05,
      "loss": 0.7603,
      "step": 2414
    },
    {
      "epoch": 0.5219364599092284,
      "grad_norm": 1.050392746925354,
      "learning_rate": 1.955203851116038e-05,
      "loss": 0.9773,
      "step": 2415
    },
    {
      "epoch": 0.5221525826669549,
      "grad_norm": 0.9531746506690979,
      "learning_rate": 1.9538042166939982e-05,
      "loss": 0.9301,
      "step": 2416
    },
    {
      "epoch": 0.5223687054246812,
      "grad_norm": 0.8638320565223694,
      "learning_rate": 1.9524046049077855e-05,
      "loss": 0.8095,
      "step": 2417
    },
    {
      "epoch": 0.5225848281824076,
      "grad_norm": 1.070334553718567,
      "learning_rate": 1.9510050164432058e-05,
      "loss": 1.0291,
      "step": 2418
    },
    {
      "epoch": 0.522800950940134,
      "grad_norm": 0.9095038771629333,
      "learning_rate": 1.949605451986055e-05,
      "loss": 0.8391,
      "step": 2419
    },
    {
      "epoch": 0.5230170736978604,
      "grad_norm": 0.9874014258384705,
      "learning_rate": 1.9482059122221168e-05,
      "loss": 0.9386,
      "step": 2420
    },
    {
      "epoch": 0.5232331964555867,
      "grad_norm": 0.8531448841094971,
      "learning_rate": 1.946806397837162e-05,
      "loss": 0.819,
      "step": 2421
    },
    {
      "epoch": 0.5234493192133132,
      "grad_norm": 0.8765074610710144,
      "learning_rate": 1.9454069095169484e-05,
      "loss": 0.8008,
      "step": 2422
    },
    {
      "epoch": 0.5236654419710396,
      "grad_norm": 0.9033129811286926,
      "learning_rate": 1.9440074479472232e-05,
      "loss": 0.9036,
      "step": 2423
    },
    {
      "epoch": 0.5238815647287659,
      "grad_norm": 0.9528734087944031,
      "learning_rate": 1.9426080138137195e-05,
      "loss": 0.8323,
      "step": 2424
    },
    {
      "epoch": 0.5240976874864923,
      "grad_norm": 0.9053243398666382,
      "learning_rate": 1.941208607802156e-05,
      "loss": 0.9455,
      "step": 2425
    },
    {
      "epoch": 0.5243138102442187,
      "grad_norm": 0.8585258722305298,
      "learning_rate": 1.939809230598239e-05,
      "loss": 0.8961,
      "step": 2426
    },
    {
      "epoch": 0.5245299330019451,
      "grad_norm": 0.9704940319061279,
      "learning_rate": 1.9384098828876597e-05,
      "loss": 0.8393,
      "step": 2427
    },
    {
      "epoch": 0.5247460557596715,
      "grad_norm": 1.1031426191329956,
      "learning_rate": 1.9370105653560956e-05,
      "loss": 0.9268,
      "step": 2428
    },
    {
      "epoch": 0.5249621785173979,
      "grad_norm": 0.9932018518447876,
      "learning_rate": 1.9356112786892082e-05,
      "loss": 0.9444,
      "step": 2429
    },
    {
      "epoch": 0.5251783012751242,
      "grad_norm": 1.0802488327026367,
      "learning_rate": 1.934212023572646e-05,
      "loss": 0.9151,
      "step": 2430
    },
    {
      "epoch": 0.5253944240328506,
      "grad_norm": 0.8728967905044556,
      "learning_rate": 1.93281280069204e-05,
      "loss": 0.9823,
      "step": 2431
    },
    {
      "epoch": 0.5256105467905771,
      "grad_norm": 0.7961871027946472,
      "learning_rate": 1.931413610733007e-05,
      "loss": 0.934,
      "step": 2432
    },
    {
      "epoch": 0.5258266695483035,
      "grad_norm": 0.908875584602356,
      "learning_rate": 1.930014454381145e-05,
      "loss": 0.8754,
      "step": 2433
    },
    {
      "epoch": 0.5260427923060298,
      "grad_norm": 0.875497579574585,
      "learning_rate": 1.9286153323220393e-05,
      "loss": 0.6693,
      "step": 2434
    },
    {
      "epoch": 0.5262589150637562,
      "grad_norm": 0.9510184526443481,
      "learning_rate": 1.9272162452412556e-05,
      "loss": 0.8506,
      "step": 2435
    },
    {
      "epoch": 0.5264750378214826,
      "grad_norm": 1.0109052658081055,
      "learning_rate": 1.9258171938243432e-05,
      "loss": 0.8483,
      "step": 2436
    },
    {
      "epoch": 0.526691160579209,
      "grad_norm": 0.9058769941329956,
      "learning_rate": 1.9244181787568347e-05,
      "loss": 0.8904,
      "step": 2437
    },
    {
      "epoch": 0.5269072833369354,
      "grad_norm": 1.0631839036941528,
      "learning_rate": 1.9230192007242442e-05,
      "loss": 1.0426,
      "step": 2438
    },
    {
      "epoch": 0.5271234060946618,
      "grad_norm": 0.9550550580024719,
      "learning_rate": 1.9216202604120672e-05,
      "loss": 1.0229,
      "step": 2439
    },
    {
      "epoch": 0.5273395288523881,
      "grad_norm": 0.893438994884491,
      "learning_rate": 1.920221358505781e-05,
      "loss": 0.8342,
      "step": 2440
    },
    {
      "epoch": 0.5275556516101145,
      "grad_norm": 1.1339728832244873,
      "learning_rate": 1.9188224956908448e-05,
      "loss": 0.9101,
      "step": 2441
    },
    {
      "epoch": 0.527771774367841,
      "grad_norm": 0.9783705472946167,
      "learning_rate": 1.917423672652698e-05,
      "loss": 0.9414,
      "step": 2442
    },
    {
      "epoch": 0.5279878971255674,
      "grad_norm": 0.8473526239395142,
      "learning_rate": 1.9160248900767607e-05,
      "loss": 0.7932,
      "step": 2443
    },
    {
      "epoch": 0.5282040198832937,
      "grad_norm": 0.8786388635635376,
      "learning_rate": 1.914626148648434e-05,
      "loss": 0.8596,
      "step": 2444
    },
    {
      "epoch": 0.5284201426410201,
      "grad_norm": 0.9827716946601868,
      "learning_rate": 1.9132274490530964e-05,
      "loss": 1.0699,
      "step": 2445
    },
    {
      "epoch": 0.5286362653987465,
      "grad_norm": 1.1196885108947754,
      "learning_rate": 1.9118287919761084e-05,
      "loss": 1.0602,
      "step": 2446
    },
    {
      "epoch": 0.5288523881564728,
      "grad_norm": 0.8260840773582458,
      "learning_rate": 1.910430178102808e-05,
      "loss": 0.746,
      "step": 2447
    },
    {
      "epoch": 0.5290685109141993,
      "grad_norm": 0.8771698474884033,
      "learning_rate": 1.9090316081185136e-05,
      "loss": 0.7933,
      "step": 2448
    },
    {
      "epoch": 0.5292846336719257,
      "grad_norm": 0.9941705465316772,
      "learning_rate": 1.9076330827085214e-05,
      "loss": 0.95,
      "step": 2449
    },
    {
      "epoch": 0.529500756429652,
      "grad_norm": 0.9113670587539673,
      "learning_rate": 1.9062346025581047e-05,
      "loss": 0.9018,
      "step": 2450
    },
    {
      "epoch": 0.5297168791873784,
      "grad_norm": 1.045186161994934,
      "learning_rate": 1.9048361683525155e-05,
      "loss": 0.9235,
      "step": 2451
    },
    {
      "epoch": 0.5299330019451048,
      "grad_norm": 0.9754068851470947,
      "learning_rate": 1.9034377807769845e-05,
      "loss": 1.0684,
      "step": 2452
    },
    {
      "epoch": 0.5301491247028312,
      "grad_norm": 1.0298631191253662,
      "learning_rate": 1.9020394405167174e-05,
      "loss": 0.7132,
      "step": 2453
    },
    {
      "epoch": 0.5303652474605576,
      "grad_norm": 0.9308910965919495,
      "learning_rate": 1.900641148256898e-05,
      "loss": 0.9392,
      "step": 2454
    },
    {
      "epoch": 0.530581370218284,
      "grad_norm": 0.889383852481842,
      "learning_rate": 1.8992429046826865e-05,
      "loss": 0.765,
      "step": 2455
    },
    {
      "epoch": 0.5307974929760104,
      "grad_norm": 0.9352144598960876,
      "learning_rate": 1.897844710479219e-05,
      "loss": 0.9131,
      "step": 2456
    },
    {
      "epoch": 0.5310136157337367,
      "grad_norm": 1.1047238111495972,
      "learning_rate": 1.8964465663316067e-05,
      "loss": 0.9011,
      "step": 2457
    },
    {
      "epoch": 0.5312297384914632,
      "grad_norm": 0.8324387073516846,
      "learning_rate": 1.8950484729249374e-05,
      "loss": 0.9107,
      "step": 2458
    },
    {
      "epoch": 0.5314458612491896,
      "grad_norm": 0.9552721977233887,
      "learning_rate": 1.893650430944274e-05,
      "loss": 0.8853,
      "step": 2459
    },
    {
      "epoch": 0.5316619840069159,
      "grad_norm": 0.9641865491867065,
      "learning_rate": 1.892252441074654e-05,
      "loss": 0.8674,
      "step": 2460
    },
    {
      "epoch": 0.5318781067646423,
      "grad_norm": 0.9755154252052307,
      "learning_rate": 1.8908545040010885e-05,
      "loss": 0.9572,
      "step": 2461
    },
    {
      "epoch": 0.5320942295223687,
      "grad_norm": 0.8502157330513,
      "learning_rate": 1.8894566204085633e-05,
      "loss": 0.8698,
      "step": 2462
    },
    {
      "epoch": 0.532310352280095,
      "grad_norm": 0.8985865116119385,
      "learning_rate": 1.888058790982039e-05,
      "loss": 0.9292,
      "step": 2463
    },
    {
      "epoch": 0.5325264750378215,
      "grad_norm": 0.9788563847541809,
      "learning_rate": 1.8866610164064485e-05,
      "loss": 0.8234,
      "step": 2464
    },
    {
      "epoch": 0.5327425977955479,
      "grad_norm": 1.0623949766159058,
      "learning_rate": 1.8852632973666972e-05,
      "loss": 1.0194,
      "step": 2465
    },
    {
      "epoch": 0.5329587205532743,
      "grad_norm": 1.2494370937347412,
      "learning_rate": 1.8838656345476654e-05,
      "loss": 0.9452,
      "step": 2466
    },
    {
      "epoch": 0.5331748433110006,
      "grad_norm": 0.8787872791290283,
      "learning_rate": 1.882468028634205e-05,
      "loss": 0.8668,
      "step": 2467
    },
    {
      "epoch": 0.533390966068727,
      "grad_norm": 0.888615071773529,
      "learning_rate": 1.8810704803111382e-05,
      "loss": 0.9638,
      "step": 2468
    },
    {
      "epoch": 0.5336070888264535,
      "grad_norm": 1.047143578529358,
      "learning_rate": 1.8796729902632605e-05,
      "loss": 0.8589,
      "step": 2469
    },
    {
      "epoch": 0.5338232115841798,
      "grad_norm": 1.0401242971420288,
      "learning_rate": 1.8782755591753405e-05,
      "loss": 1.0722,
      "step": 2470
    },
    {
      "epoch": 0.5340393343419062,
      "grad_norm": 0.9091007709503174,
      "learning_rate": 1.8768781877321145e-05,
      "loss": 0.9763,
      "step": 2471
    },
    {
      "epoch": 0.5342554570996326,
      "grad_norm": 0.9124115109443665,
      "learning_rate": 1.8754808766182925e-05,
      "loss": 0.9341,
      "step": 2472
    },
    {
      "epoch": 0.5344715798573589,
      "grad_norm": 1.0849777460098267,
      "learning_rate": 1.874083626518552e-05,
      "loss": 0.9522,
      "step": 2473
    },
    {
      "epoch": 0.5346877026150854,
      "grad_norm": 0.9831454753875732,
      "learning_rate": 1.8726864381175445e-05,
      "loss": 0.7644,
      "step": 2474
    },
    {
      "epoch": 0.5349038253728118,
      "grad_norm": 0.8318514227867126,
      "learning_rate": 1.8712893120998873e-05,
      "loss": 0.7629,
      "step": 2475
    },
    {
      "epoch": 0.5351199481305381,
      "grad_norm": 0.8997523188591003,
      "learning_rate": 1.8698922491501698e-05,
      "loss": 0.8557,
      "step": 2476
    },
    {
      "epoch": 0.5353360708882645,
      "grad_norm": 0.8529967665672302,
      "learning_rate": 1.8684952499529495e-05,
      "loss": 0.8869,
      "step": 2477
    },
    {
      "epoch": 0.5355521936459909,
      "grad_norm": 0.938438892364502,
      "learning_rate": 1.8670983151927534e-05,
      "loss": 0.9609,
      "step": 2478
    },
    {
      "epoch": 0.5357683164037174,
      "grad_norm": 0.9284506440162659,
      "learning_rate": 1.865701445554075e-05,
      "loss": 1.0326,
      "step": 2479
    },
    {
      "epoch": 0.5359844391614437,
      "grad_norm": 0.9874833226203918,
      "learning_rate": 1.8643046417213776e-05,
      "loss": 0.7757,
      "step": 2480
    },
    {
      "epoch": 0.5362005619191701,
      "grad_norm": 0.9178028106689453,
      "learning_rate": 1.8629079043790922e-05,
      "loss": 0.982,
      "step": 2481
    },
    {
      "epoch": 0.5364166846768965,
      "grad_norm": 0.8410900831222534,
      "learning_rate": 1.861511234211617e-05,
      "loss": 0.8956,
      "step": 2482
    },
    {
      "epoch": 0.5366328074346228,
      "grad_norm": 0.9202355146408081,
      "learning_rate": 1.8601146319033164e-05,
      "loss": 0.8585,
      "step": 2483
    },
    {
      "epoch": 0.5368489301923492,
      "grad_norm": 0.9029659628868103,
      "learning_rate": 1.8587180981385237e-05,
      "loss": 1.0118,
      "step": 2484
    },
    {
      "epoch": 0.5370650529500757,
      "grad_norm": 0.8835629224777222,
      "learning_rate": 1.8573216336015355e-05,
      "loss": 0.8455,
      "step": 2485
    },
    {
      "epoch": 0.537281175707802,
      "grad_norm": 0.9127852320671082,
      "learning_rate": 1.8559252389766177e-05,
      "loss": 1.0092,
      "step": 2486
    },
    {
      "epoch": 0.5374972984655284,
      "grad_norm": 0.9556038975715637,
      "learning_rate": 1.8545289149479987e-05,
      "loss": 0.7549,
      "step": 2487
    },
    {
      "epoch": 0.5377134212232548,
      "grad_norm": 0.887857973575592,
      "learning_rate": 1.8531326621998758e-05,
      "loss": 0.9067,
      "step": 2488
    },
    {
      "epoch": 0.5379295439809813,
      "grad_norm": 0.8509790897369385,
      "learning_rate": 1.8517364814164093e-05,
      "loss": 0.7826,
      "step": 2489
    },
    {
      "epoch": 0.5381456667387076,
      "grad_norm": 0.956433892250061,
      "learning_rate": 1.8503403732817237e-05,
      "loss": 0.86,
      "step": 2490
    },
    {
      "epoch": 0.538361789496434,
      "grad_norm": 0.9578793048858643,
      "learning_rate": 1.848944338479909e-05,
      "loss": 0.9152,
      "step": 2491
    },
    {
      "epoch": 0.5385779122541604,
      "grad_norm": 1.0387738943099976,
      "learning_rate": 1.8475483776950196e-05,
      "loss": 0.8922,
      "step": 2492
    },
    {
      "epoch": 0.5387940350118867,
      "grad_norm": 1.0104392766952515,
      "learning_rate": 1.8461524916110725e-05,
      "loss": 0.7996,
      "step": 2493
    },
    {
      "epoch": 0.5390101577696131,
      "grad_norm": 0.9003995060920715,
      "learning_rate": 1.8447566809120487e-05,
      "loss": 0.7733,
      "step": 2494
    },
    {
      "epoch": 0.5392262805273396,
      "grad_norm": 1.0649616718292236,
      "learning_rate": 1.8433609462818935e-05,
      "loss": 1.0595,
      "step": 2495
    },
    {
      "epoch": 0.5394424032850659,
      "grad_norm": 0.906844437122345,
      "learning_rate": 1.8419652884045114e-05,
      "loss": 0.8297,
      "step": 2496
    },
    {
      "epoch": 0.5396585260427923,
      "grad_norm": 0.8947880268096924,
      "learning_rate": 1.840569707963773e-05,
      "loss": 1.0482,
      "step": 2497
    },
    {
      "epoch": 0.5398746488005187,
      "grad_norm": 0.9246777892112732,
      "learning_rate": 1.839174205643509e-05,
      "loss": 0.9083,
      "step": 2498
    },
    {
      "epoch": 0.540090771558245,
      "grad_norm": 0.8564475774765015,
      "learning_rate": 1.8377787821275122e-05,
      "loss": 0.8579,
      "step": 2499
    },
    {
      "epoch": 0.5403068943159715,
      "grad_norm": 0.9944950938224792,
      "learning_rate": 1.8363834380995377e-05,
      "loss": 1.0187,
      "step": 2500
    },
    {
      "epoch": 0.5405230170736979,
      "grad_norm": 1.0183297395706177,
      "learning_rate": 1.8349881742433004e-05,
      "loss": 1.0016,
      "step": 2501
    },
    {
      "epoch": 0.5407391398314243,
      "grad_norm": 0.8491703867912292,
      "learning_rate": 1.8335929912424756e-05,
      "loss": 0.7843,
      "step": 2502
    },
    {
      "epoch": 0.5409552625891506,
      "grad_norm": 1.0703781843185425,
      "learning_rate": 1.8321978897807007e-05,
      "loss": 1.1484,
      "step": 2503
    },
    {
      "epoch": 0.541171385346877,
      "grad_norm": 0.8054190874099731,
      "learning_rate": 1.8308028705415725e-05,
      "loss": 0.7427,
      "step": 2504
    },
    {
      "epoch": 0.5413875081046035,
      "grad_norm": 0.9089491367340088,
      "learning_rate": 1.8294079342086454e-05,
      "loss": 0.8596,
      "step": 2505
    },
    {
      "epoch": 0.5416036308623298,
      "grad_norm": 0.9109365344047546,
      "learning_rate": 1.828013081465437e-05,
      "loss": 0.9888,
      "step": 2506
    },
    {
      "epoch": 0.5418197536200562,
      "grad_norm": 1.0054105520248413,
      "learning_rate": 1.8266183129954215e-05,
      "loss": 1.2071,
      "step": 2507
    },
    {
      "epoch": 0.5420358763777826,
      "grad_norm": 0.8904284238815308,
      "learning_rate": 1.8252236294820313e-05,
      "loss": 0.9599,
      "step": 2508
    },
    {
      "epoch": 0.5422519991355089,
      "grad_norm": 0.9618232250213623,
      "learning_rate": 1.8238290316086584e-05,
      "loss": 0.9419,
      "step": 2509
    },
    {
      "epoch": 0.5424681218932353,
      "grad_norm": 1.0052188634872437,
      "learning_rate": 1.822434520058653e-05,
      "loss": 0.948,
      "step": 2510
    },
    {
      "epoch": 0.5426842446509618,
      "grad_norm": 0.9211570620536804,
      "learning_rate": 1.8210400955153224e-05,
      "loss": 1.1163,
      "step": 2511
    },
    {
      "epoch": 0.5429003674086882,
      "grad_norm": 1.0729602575302124,
      "learning_rate": 1.8196457586619315e-05,
      "loss": 0.9801,
      "step": 2512
    },
    {
      "epoch": 0.5431164901664145,
      "grad_norm": 0.934258759021759,
      "learning_rate": 1.8182515101817015e-05,
      "loss": 0.8857,
      "step": 2513
    },
    {
      "epoch": 0.5433326129241409,
      "grad_norm": 0.7439682483673096,
      "learning_rate": 1.8168573507578114e-05,
      "loss": 0.9079,
      "step": 2514
    },
    {
      "epoch": 0.5435487356818673,
      "grad_norm": 1.0175498723983765,
      "learning_rate": 1.815463281073396e-05,
      "loss": 0.8523,
      "step": 2515
    },
    {
      "epoch": 0.5437648584395937,
      "grad_norm": 0.838266134262085,
      "learning_rate": 1.8140693018115465e-05,
      "loss": 0.9479,
      "step": 2516
    },
    {
      "epoch": 0.5439809811973201,
      "grad_norm": 0.8469918966293335,
      "learning_rate": 1.8126754136553093e-05,
      "loss": 1.0116,
      "step": 2517
    },
    {
      "epoch": 0.5441971039550465,
      "grad_norm": 0.9128462076187134,
      "learning_rate": 1.8112816172876867e-05,
      "loss": 0.902,
      "step": 2518
    },
    {
      "epoch": 0.5444132267127728,
      "grad_norm": 0.8548224568367004,
      "learning_rate": 1.8098879133916352e-05,
      "loss": 0.9813,
      "step": 2519
    },
    {
      "epoch": 0.5446293494704992,
      "grad_norm": 0.8403018116950989,
      "learning_rate": 1.808494302650066e-05,
      "loss": 0.7716,
      "step": 2520
    },
    {
      "epoch": 0.5448454722282257,
      "grad_norm": 0.8741294741630554,
      "learning_rate": 1.8071007857458465e-05,
      "loss": 0.7792,
      "step": 2521
    },
    {
      "epoch": 0.545061594985952,
      "grad_norm": 1.0990644693374634,
      "learning_rate": 1.8057073633617958e-05,
      "loss": 0.8027,
      "step": 2522
    },
    {
      "epoch": 0.5452777177436784,
      "grad_norm": 0.840221643447876,
      "learning_rate": 1.8043140361806877e-05,
      "loss": 0.891,
      "step": 2523
    },
    {
      "epoch": 0.5454938405014048,
      "grad_norm": 0.8790461421012878,
      "learning_rate": 1.8029208048852505e-05,
      "loss": 0.8501,
      "step": 2524
    },
    {
      "epoch": 0.5457099632591312,
      "grad_norm": 0.8606840968132019,
      "learning_rate": 1.8015276701581623e-05,
      "loss": 0.8979,
      "step": 2525
    },
    {
      "epoch": 0.5459260860168575,
      "grad_norm": 0.9998458623886108,
      "learning_rate": 1.8001346326820574e-05,
      "loss": 0.9728,
      "step": 2526
    },
    {
      "epoch": 0.546142208774584,
      "grad_norm": 0.8386487364768982,
      "learning_rate": 1.79874169313952e-05,
      "loss": 0.7945,
      "step": 2527
    },
    {
      "epoch": 0.5463583315323104,
      "grad_norm": 0.9057452082633972,
      "learning_rate": 1.797348852213088e-05,
      "loss": 0.8812,
      "step": 2528
    },
    {
      "epoch": 0.5465744542900367,
      "grad_norm": 0.8506041765213013,
      "learning_rate": 1.7959561105852505e-05,
      "loss": 0.9357,
      "step": 2529
    },
    {
      "epoch": 0.5467905770477631,
      "grad_norm": 0.8173531293869019,
      "learning_rate": 1.7945634689384465e-05,
      "loss": 0.9853,
      "step": 2530
    },
    {
      "epoch": 0.5470066998054895,
      "grad_norm": 0.7886649966239929,
      "learning_rate": 1.7931709279550676e-05,
      "loss": 1.0335,
      "step": 2531
    },
    {
      "epoch": 0.5472228225632159,
      "grad_norm": 0.9975094199180603,
      "learning_rate": 1.7917784883174562e-05,
      "loss": 0.8146,
      "step": 2532
    },
    {
      "epoch": 0.5474389453209423,
      "grad_norm": 0.8904764652252197,
      "learning_rate": 1.7903861507079042e-05,
      "loss": 0.8679,
      "step": 2533
    },
    {
      "epoch": 0.5476550680786687,
      "grad_norm": 0.9284105896949768,
      "learning_rate": 1.7889939158086536e-05,
      "loss": 1.0873,
      "step": 2534
    },
    {
      "epoch": 0.5478711908363951,
      "grad_norm": 0.8726813793182373,
      "learning_rate": 1.7876017843018973e-05,
      "loss": 0.9619,
      "step": 2535
    },
    {
      "epoch": 0.5480873135941214,
      "grad_norm": 0.9748417735099792,
      "learning_rate": 1.786209756869775e-05,
      "loss": 0.9706,
      "step": 2536
    },
    {
      "epoch": 0.5483034363518479,
      "grad_norm": 0.959156334400177,
      "learning_rate": 1.7848178341943775e-05,
      "loss": 0.8527,
      "step": 2537
    },
    {
      "epoch": 0.5485195591095743,
      "grad_norm": 0.8951647281646729,
      "learning_rate": 1.7834260169577436e-05,
      "loss": 0.6911,
      "step": 2538
    },
    {
      "epoch": 0.5487356818673006,
      "grad_norm": 0.8986188173294067,
      "learning_rate": 1.7820343058418613e-05,
      "loss": 0.8569,
      "step": 2539
    },
    {
      "epoch": 0.548951804625027,
      "grad_norm": 0.9517485499382019,
      "learning_rate": 1.780642701528665e-05,
      "loss": 1.0089,
      "step": 2540
    },
    {
      "epoch": 0.5491679273827534,
      "grad_norm": 0.8486550450325012,
      "learning_rate": 1.7792512047000387e-05,
      "loss": 0.9291,
      "step": 2541
    },
    {
      "epoch": 0.5493840501404798,
      "grad_norm": 0.9620897173881531,
      "learning_rate": 1.7778598160378107e-05,
      "loss": 0.7867,
      "step": 2542
    },
    {
      "epoch": 0.5496001728982062,
      "grad_norm": 0.894357442855835,
      "learning_rate": 1.7764685362237596e-05,
      "loss": 0.7913,
      "step": 2543
    },
    {
      "epoch": 0.5498162956559326,
      "grad_norm": 0.897143542766571,
      "learning_rate": 1.7750773659396094e-05,
      "loss": 0.8315,
      "step": 2544
    },
    {
      "epoch": 0.5500324184136589,
      "grad_norm": 0.9111842513084412,
      "learning_rate": 1.773686305867029e-05,
      "loss": 0.8653,
      "step": 2545
    },
    {
      "epoch": 0.5502485411713853,
      "grad_norm": 1.007009744644165,
      "learning_rate": 1.7722953566876364e-05,
      "loss": 0.8701,
      "step": 2546
    },
    {
      "epoch": 0.5504646639291118,
      "grad_norm": 0.8903434872627258,
      "learning_rate": 1.770904519082993e-05,
      "loss": 0.7084,
      "step": 2547
    },
    {
      "epoch": 0.5506807866868382,
      "grad_norm": 1.0857263803482056,
      "learning_rate": 1.769513793734605e-05,
      "loss": 0.8966,
      "step": 2548
    },
    {
      "epoch": 0.5508969094445645,
      "grad_norm": 0.94488126039505,
      "learning_rate": 1.7681231813239254e-05,
      "loss": 1.0278,
      "step": 2549
    },
    {
      "epoch": 0.5511130322022909,
      "grad_norm": 1.0167680978775024,
      "learning_rate": 1.7667326825323507e-05,
      "loss": 0.8787,
      "step": 2550
    },
    {
      "epoch": 0.5513291549600173,
      "grad_norm": 0.9108545184135437,
      "learning_rate": 1.7653422980412227e-05,
      "loss": 0.8335,
      "step": 2551
    },
    {
      "epoch": 0.5515452777177436,
      "grad_norm": 0.9126055836677551,
      "learning_rate": 1.7639520285318265e-05,
      "loss": 0.7946,
      "step": 2552
    },
    {
      "epoch": 0.5517614004754701,
      "grad_norm": 0.9356114864349365,
      "learning_rate": 1.7625618746853902e-05,
      "loss": 1.0366,
      "step": 2553
    },
    {
      "epoch": 0.5519775232331965,
      "grad_norm": 0.9361844658851624,
      "learning_rate": 1.761171837183086e-05,
      "loss": 0.9991,
      "step": 2554
    },
    {
      "epoch": 0.5521936459909228,
      "grad_norm": 0.9151226282119751,
      "learning_rate": 1.7597819167060303e-05,
      "loss": 0.8677,
      "step": 2555
    },
    {
      "epoch": 0.5524097687486492,
      "grad_norm": 0.922074556350708,
      "learning_rate": 1.7583921139352793e-05,
      "loss": 0.9288,
      "step": 2556
    },
    {
      "epoch": 0.5526258915063756,
      "grad_norm": 0.8715337514877319,
      "learning_rate": 1.7570024295518345e-05,
      "loss": 0.7825,
      "step": 2557
    },
    {
      "epoch": 0.552842014264102,
      "grad_norm": 0.9341384172439575,
      "learning_rate": 1.7556128642366378e-05,
      "loss": 0.9295,
      "step": 2558
    },
    {
      "epoch": 0.5530581370218284,
      "grad_norm": 0.7741240859031677,
      "learning_rate": 1.7542234186705722e-05,
      "loss": 0.8583,
      "step": 2559
    },
    {
      "epoch": 0.5532742597795548,
      "grad_norm": 0.9513848423957825,
      "learning_rate": 1.752834093534463e-05,
      "loss": 1.0367,
      "step": 2560
    },
    {
      "epoch": 0.5534903825372812,
      "grad_norm": 0.9577759504318237,
      "learning_rate": 1.751444889509077e-05,
      "loss": 1.003,
      "step": 2561
    },
    {
      "epoch": 0.5537065052950075,
      "grad_norm": 0.8637582063674927,
      "learning_rate": 1.7500558072751207e-05,
      "loss": 0.8981,
      "step": 2562
    },
    {
      "epoch": 0.553922628052734,
      "grad_norm": 0.9732483625411987,
      "learning_rate": 1.7486668475132404e-05,
      "loss": 0.801,
      "step": 2563
    },
    {
      "epoch": 0.5541387508104604,
      "grad_norm": 1.0261329412460327,
      "learning_rate": 1.7472780109040254e-05,
      "loss": 1.0119,
      "step": 2564
    },
    {
      "epoch": 0.5543548735681867,
      "grad_norm": 1.0444742441177368,
      "learning_rate": 1.7458892981279993e-05,
      "loss": 0.926,
      "step": 2565
    },
    {
      "epoch": 0.5545709963259131,
      "grad_norm": 0.8797621130943298,
      "learning_rate": 1.74450070986563e-05,
      "loss": 0.9306,
      "step": 2566
    },
    {
      "epoch": 0.5547871190836395,
      "grad_norm": 1.0905117988586426,
      "learning_rate": 1.743112246797322e-05,
      "loss": 0.965,
      "step": 2567
    },
    {
      "epoch": 0.5550032418413658,
      "grad_norm": 0.7685717344284058,
      "learning_rate": 1.7417239096034197e-05,
      "loss": 0.7421,
      "step": 2568
    },
    {
      "epoch": 0.5552193645990923,
      "grad_norm": 0.8749861121177673,
      "learning_rate": 1.740335698964205e-05,
      "loss": 0.9124,
      "step": 2569
    },
    {
      "epoch": 0.5554354873568187,
      "grad_norm": 0.9034978151321411,
      "learning_rate": 1.7389476155598974e-05,
      "loss": 0.8177,
      "step": 2570
    },
    {
      "epoch": 0.5556516101145451,
      "grad_norm": 0.8407739996910095,
      "learning_rate": 1.737559660070654e-05,
      "loss": 0.9647,
      "step": 2571
    },
    {
      "epoch": 0.5558677328722714,
      "grad_norm": 1.0439980030059814,
      "learning_rate": 1.736171833176571e-05,
      "loss": 0.9058,
      "step": 2572
    },
    {
      "epoch": 0.5560838556299978,
      "grad_norm": 0.8621343374252319,
      "learning_rate": 1.7347841355576797e-05,
      "loss": 0.8257,
      "step": 2573
    },
    {
      "epoch": 0.5562999783877243,
      "grad_norm": 1.0059354305267334,
      "learning_rate": 1.7333965678939487e-05,
      "loss": 1.1693,
      "step": 2574
    },
    {
      "epoch": 0.5565161011454506,
      "grad_norm": 0.9958191514015198,
      "learning_rate": 1.7320091308652842e-05,
      "loss": 0.9896,
      "step": 2575
    },
    {
      "epoch": 0.556732223903177,
      "grad_norm": 0.9572399854660034,
      "learning_rate": 1.730621825151525e-05,
      "loss": 0.8653,
      "step": 2576
    },
    {
      "epoch": 0.5569483466609034,
      "grad_norm": 0.9230698943138123,
      "learning_rate": 1.729234651432449e-05,
      "loss": 0.6516,
      "step": 2577
    },
    {
      "epoch": 0.5571644694186297,
      "grad_norm": 0.8629150390625,
      "learning_rate": 1.7278476103877676e-05,
      "loss": 0.8205,
      "step": 2578
    },
    {
      "epoch": 0.5573805921763562,
      "grad_norm": 0.9017676711082458,
      "learning_rate": 1.7264607026971284e-05,
      "loss": 0.7964,
      "step": 2579
    },
    {
      "epoch": 0.5575967149340826,
      "grad_norm": 0.8994758725166321,
      "learning_rate": 1.7250739290401123e-05,
      "loss": 0.9763,
      "step": 2580
    },
    {
      "epoch": 0.5578128376918089,
      "grad_norm": 0.9548357129096985,
      "learning_rate": 1.7236872900962364e-05,
      "loss": 0.8762,
      "step": 2581
    },
    {
      "epoch": 0.5580289604495353,
      "grad_norm": 0.9338661432266235,
      "learning_rate": 1.7223007865449487e-05,
      "loss": 0.8987,
      "step": 2582
    },
    {
      "epoch": 0.5582450832072617,
      "grad_norm": 0.8923291563987732,
      "learning_rate": 1.7209144190656333e-05,
      "loss": 0.8751,
      "step": 2583
    },
    {
      "epoch": 0.5584612059649882,
      "grad_norm": 0.9313778877258301,
      "learning_rate": 1.7195281883376078e-05,
      "loss": 0.9678,
      "step": 2584
    },
    {
      "epoch": 0.5586773287227145,
      "grad_norm": 0.9116126298904419,
      "learning_rate": 1.7181420950401212e-05,
      "loss": 0.9974,
      "step": 2585
    },
    {
      "epoch": 0.5588934514804409,
      "grad_norm": 0.9923733472824097,
      "learning_rate": 1.7167561398523572e-05,
      "loss": 0.897,
      "step": 2586
    },
    {
      "epoch": 0.5591095742381673,
      "grad_norm": 0.843914270401001,
      "learning_rate": 1.7153703234534302e-05,
      "loss": 0.7292,
      "step": 2587
    },
    {
      "epoch": 0.5593256969958936,
      "grad_norm": 1.0005183219909668,
      "learning_rate": 1.713984646522386e-05,
      "loss": 0.8998,
      "step": 2588
    },
    {
      "epoch": 0.55954181975362,
      "grad_norm": 0.9981311559677124,
      "learning_rate": 1.712599109738204e-05,
      "loss": 1.0055,
      "step": 2589
    },
    {
      "epoch": 0.5597579425113465,
      "grad_norm": 0.8771159648895264,
      "learning_rate": 1.711213713779794e-05,
      "loss": 0.9573,
      "step": 2590
    },
    {
      "epoch": 0.5599740652690728,
      "grad_norm": 0.9199696779251099,
      "learning_rate": 1.7098284593259963e-05,
      "loss": 0.8015,
      "step": 2591
    },
    {
      "epoch": 0.5601901880267992,
      "grad_norm": 0.9282891750335693,
      "learning_rate": 1.7084433470555837e-05,
      "loss": 0.8832,
      "step": 2592
    },
    {
      "epoch": 0.5604063107845256,
      "grad_norm": 0.9497724175453186,
      "learning_rate": 1.7070583776472564e-05,
      "loss": 0.7778,
      "step": 2593
    },
    {
      "epoch": 0.560622433542252,
      "grad_norm": 1.0086259841918945,
      "learning_rate": 1.7056735517796463e-05,
      "loss": 0.9504,
      "step": 2594
    },
    {
      "epoch": 0.5608385562999784,
      "grad_norm": 0.954163134098053,
      "learning_rate": 1.704288870131316e-05,
      "loss": 0.9827,
      "step": 2595
    },
    {
      "epoch": 0.5610546790577048,
      "grad_norm": 0.9140467047691345,
      "learning_rate": 1.7029043333807556e-05,
      "loss": 0.8622,
      "step": 2596
    },
    {
      "epoch": 0.5612708018154312,
      "grad_norm": 0.8212366104125977,
      "learning_rate": 1.701519942206385e-05,
      "loss": 0.9051,
      "step": 2597
    },
    {
      "epoch": 0.5614869245731575,
      "grad_norm": 1.0883913040161133,
      "learning_rate": 1.7001356972865535e-05,
      "loss": 0.9649,
      "step": 2598
    },
    {
      "epoch": 0.5617030473308839,
      "grad_norm": 0.9906061887741089,
      "learning_rate": 1.6987515992995366e-05,
      "loss": 0.8934,
      "step": 2599
    },
    {
      "epoch": 0.5619191700886104,
      "grad_norm": 1.04061758518219,
      "learning_rate": 1.6973676489235393e-05,
      "loss": 0.9897,
      "step": 2600
    },
    {
      "epoch": 0.5621352928463367,
      "grad_norm": 0.956551194190979,
      "learning_rate": 1.6959838468366947e-05,
      "loss": 0.8,
      "step": 2601
    },
    {
      "epoch": 0.5623514156040631,
      "grad_norm": 1.0711612701416016,
      "learning_rate": 1.6946001937170625e-05,
      "loss": 0.9623,
      "step": 2602
    },
    {
      "epoch": 0.5625675383617895,
      "grad_norm": 0.9555321335792542,
      "learning_rate": 1.693216690242629e-05,
      "loss": 0.8434,
      "step": 2603
    },
    {
      "epoch": 0.5627836611195158,
      "grad_norm": 1.0556505918502808,
      "learning_rate": 1.6918333370913092e-05,
      "loss": 0.9153,
      "step": 2604
    },
    {
      "epoch": 0.5629997838772423,
      "grad_norm": 1.0087647438049316,
      "learning_rate": 1.690450134940941e-05,
      "loss": 0.9824,
      "step": 2605
    },
    {
      "epoch": 0.5632159066349687,
      "grad_norm": 0.9734079241752625,
      "learning_rate": 1.6890670844692912e-05,
      "loss": 0.9209,
      "step": 2606
    },
    {
      "epoch": 0.5634320293926951,
      "grad_norm": 1.0049638748168945,
      "learning_rate": 1.6876841863540508e-05,
      "loss": 0.822,
      "step": 2607
    },
    {
      "epoch": 0.5636481521504214,
      "grad_norm": 0.9042305946350098,
      "learning_rate": 1.6863014412728377e-05,
      "loss": 0.8646,
      "step": 2608
    },
    {
      "epoch": 0.5638642749081478,
      "grad_norm": 1.022499442100525,
      "learning_rate": 1.684918849903193e-05,
      "loss": 0.6836,
      "step": 2609
    },
    {
      "epoch": 0.5640803976658743,
      "grad_norm": 0.9768783450126648,
      "learning_rate": 1.683536412922584e-05,
      "loss": 0.9014,
      "step": 2610
    },
    {
      "epoch": 0.5642965204236006,
      "grad_norm": 0.957285463809967,
      "learning_rate": 1.6821541310084007e-05,
      "loss": 0.8889,
      "step": 2611
    },
    {
      "epoch": 0.564512643181327,
      "grad_norm": 1.1212570667266846,
      "learning_rate": 1.6807720048379577e-05,
      "loss": 0.9276,
      "step": 2612
    },
    {
      "epoch": 0.5647287659390534,
      "grad_norm": 0.9631536602973938,
      "learning_rate": 1.6793900350884956e-05,
      "loss": 0.9226,
      "step": 2613
    },
    {
      "epoch": 0.5649448886967797,
      "grad_norm": 1.0156223773956299,
      "learning_rate": 1.678008222437174e-05,
      "loss": 0.9919,
      "step": 2614
    },
    {
      "epoch": 0.5651610114545061,
      "grad_norm": 0.8217774629592896,
      "learning_rate": 1.6766265675610806e-05,
      "loss": 0.8019,
      "step": 2615
    },
    {
      "epoch": 0.5653771342122326,
      "grad_norm": 0.9502395391464233,
      "learning_rate": 1.6752450711372204e-05,
      "loss": 0.7396,
      "step": 2616
    },
    {
      "epoch": 0.565593256969959,
      "grad_norm": 0.866053581237793,
      "learning_rate": 1.673863733842525e-05,
      "loss": 0.9259,
      "step": 2617
    },
    {
      "epoch": 0.5658093797276853,
      "grad_norm": 0.9778143763542175,
      "learning_rate": 1.6724825563538455e-05,
      "loss": 0.926,
      "step": 2618
    },
    {
      "epoch": 0.5660255024854117,
      "grad_norm": 1.0290147066116333,
      "learning_rate": 1.6711015393479568e-05,
      "loss": 1.0948,
      "step": 2619
    },
    {
      "epoch": 0.5662416252431381,
      "grad_norm": 1.0352962017059326,
      "learning_rate": 1.6697206835015535e-05,
      "loss": 1.0491,
      "step": 2620
    },
    {
      "epoch": 0.5664577480008645,
      "grad_norm": 0.8152976036071777,
      "learning_rate": 1.6683399894912522e-05,
      "loss": 0.7761,
      "step": 2621
    },
    {
      "epoch": 0.5666738707585909,
      "grad_norm": 0.9332512617111206,
      "learning_rate": 1.666959457993589e-05,
      "loss": 0.9035,
      "step": 2622
    },
    {
      "epoch": 0.5668899935163173,
      "grad_norm": 0.8031812310218811,
      "learning_rate": 1.665579089685021e-05,
      "loss": 0.9277,
      "step": 2623
    },
    {
      "epoch": 0.5671061162740436,
      "grad_norm": 0.9118196964263916,
      "learning_rate": 1.6641988852419265e-05,
      "loss": 0.6753,
      "step": 2624
    },
    {
      "epoch": 0.56732223903177,
      "grad_norm": 0.883331835269928,
      "learning_rate": 1.6628188453406015e-05,
      "loss": 0.9446,
      "step": 2625
    },
    {
      "epoch": 0.5675383617894965,
      "grad_norm": 1.035176396369934,
      "learning_rate": 1.6614389706572633e-05,
      "loss": 0.9588,
      "step": 2626
    },
    {
      "epoch": 0.5677544845472228,
      "grad_norm": 0.9993638396263123,
      "learning_rate": 1.6600592618680474e-05,
      "loss": 0.911,
      "step": 2627
    },
    {
      "epoch": 0.5679706073049492,
      "grad_norm": 0.9066967368125916,
      "learning_rate": 1.658679719649007e-05,
      "loss": 1.0114,
      "step": 2628
    },
    {
      "epoch": 0.5681867300626756,
      "grad_norm": 1.0658564567565918,
      "learning_rate": 1.657300344676114e-05,
      "loss": 1.0594,
      "step": 2629
    },
    {
      "epoch": 0.568402852820402,
      "grad_norm": 1.0333069562911987,
      "learning_rate": 1.6559211376252607e-05,
      "loss": 0.8774,
      "step": 2630
    },
    {
      "epoch": 0.5686189755781283,
      "grad_norm": 0.9193286895751953,
      "learning_rate": 1.6545420991722543e-05,
      "loss": 0.8313,
      "step": 2631
    },
    {
      "epoch": 0.5688350983358548,
      "grad_norm": 1.1600608825683594,
      "learning_rate": 1.6531632299928207e-05,
      "loss": 0.8811,
      "step": 2632
    },
    {
      "epoch": 0.5690512210935812,
      "grad_norm": 0.9152405261993408,
      "learning_rate": 1.6517845307626035e-05,
      "loss": 0.9077,
      "step": 2633
    },
    {
      "epoch": 0.5692673438513075,
      "grad_norm": 0.9811593890190125,
      "learning_rate": 1.6504060021571602e-05,
      "loss": 1.1066,
      "step": 2634
    },
    {
      "epoch": 0.5694834666090339,
      "grad_norm": 0.9509673118591309,
      "learning_rate": 1.649027644851968e-05,
      "loss": 1.0162,
      "step": 2635
    },
    {
      "epoch": 0.5696995893667604,
      "grad_norm": 1.0188031196594238,
      "learning_rate": 1.6476494595224185e-05,
      "loss": 0.96,
      "step": 2636
    },
    {
      "epoch": 0.5699157121244867,
      "grad_norm": 0.9113590717315674,
      "learning_rate": 1.646271446843819e-05,
      "loss": 0.8967,
      "step": 2637
    },
    {
      "epoch": 0.5701318348822131,
      "grad_norm": 1.0003587007522583,
      "learning_rate": 1.6448936074913938e-05,
      "loss": 1.0857,
      "step": 2638
    },
    {
      "epoch": 0.5703479576399395,
      "grad_norm": 1.0504573583602905,
      "learning_rate": 1.6435159421402797e-05,
      "loss": 0.9088,
      "step": 2639
    },
    {
      "epoch": 0.5705640803976659,
      "grad_norm": 0.8997858166694641,
      "learning_rate": 1.6421384514655296e-05,
      "loss": 0.8765,
      "step": 2640
    },
    {
      "epoch": 0.5707802031553922,
      "grad_norm": 0.776848316192627,
      "learning_rate": 1.6407611361421107e-05,
      "loss": 0.8643,
      "step": 2641
    },
    {
      "epoch": 0.5709963259131187,
      "grad_norm": 0.9078662991523743,
      "learning_rate": 1.639383996844905e-05,
      "loss": 0.7983,
      "step": 2642
    },
    {
      "epoch": 0.5712124486708451,
      "grad_norm": 1.1863467693328857,
      "learning_rate": 1.638007034248707e-05,
      "loss": 0.9971,
      "step": 2643
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 0.9968357682228088,
      "learning_rate": 1.6366302490282265e-05,
      "loss": 0.9071,
      "step": 2644
    },
    {
      "epoch": 0.5716446941862978,
      "grad_norm": 1.0731886625289917,
      "learning_rate": 1.6352536418580828e-05,
      "loss": 0.9942,
      "step": 2645
    },
    {
      "epoch": 0.5718608169440242,
      "grad_norm": 0.9937326312065125,
      "learning_rate": 1.6338772134128115e-05,
      "loss": 0.8896,
      "step": 2646
    },
    {
      "epoch": 0.5720769397017506,
      "grad_norm": 1.042829155921936,
      "learning_rate": 1.6325009643668592e-05,
      "loss": 0.7685,
      "step": 2647
    },
    {
      "epoch": 0.572293062459477,
      "grad_norm": 0.8667680621147156,
      "learning_rate": 1.6311248953945854e-05,
      "loss": 0.9037,
      "step": 2648
    },
    {
      "epoch": 0.5725091852172034,
      "grad_norm": 0.873292088508606,
      "learning_rate": 1.62974900717026e-05,
      "loss": 0.8839,
      "step": 2649
    },
    {
      "epoch": 0.5727253079749297,
      "grad_norm": 0.9799197912216187,
      "learning_rate": 1.6283733003680655e-05,
      "loss": 0.9105,
      "step": 2650
    },
    {
      "epoch": 0.5729414307326561,
      "grad_norm": 0.9652045965194702,
      "learning_rate": 1.6269977756620944e-05,
      "loss": 0.8397,
      "step": 2651
    },
    {
      "epoch": 0.5731575534903826,
      "grad_norm": 0.9093625545501709,
      "learning_rate": 1.6256224337263503e-05,
      "loss": 0.7846,
      "step": 2652
    },
    {
      "epoch": 0.573373676248109,
      "grad_norm": 0.8416289687156677,
      "learning_rate": 1.624247275234749e-05,
      "loss": 0.8236,
      "step": 2653
    },
    {
      "epoch": 0.5735897990058353,
      "grad_norm": 0.8340027332305908,
      "learning_rate": 1.6228723008611136e-05,
      "loss": 0.7617,
      "step": 2654
    },
    {
      "epoch": 0.5738059217635617,
      "grad_norm": 0.9418075084686279,
      "learning_rate": 1.6214975112791803e-05,
      "loss": 1.0724,
      "step": 2655
    },
    {
      "epoch": 0.5740220445212881,
      "grad_norm": 0.9534636735916138,
      "learning_rate": 1.6201229071625905e-05,
      "loss": 0.8903,
      "step": 2656
    },
    {
      "epoch": 0.5742381672790144,
      "grad_norm": 0.9745770692825317,
      "learning_rate": 1.6187484891848983e-05,
      "loss": 0.8336,
      "step": 2657
    },
    {
      "epoch": 0.5744542900367409,
      "grad_norm": 0.9617936015129089,
      "learning_rate": 1.6173742580195643e-05,
      "loss": 0.982,
      "step": 2658
    },
    {
      "epoch": 0.5746704127944673,
      "grad_norm": 0.9466758966445923,
      "learning_rate": 1.61600021433996e-05,
      "loss": 0.8253,
      "step": 2659
    },
    {
      "epoch": 0.5748865355521936,
      "grad_norm": 0.9294735789299011,
      "learning_rate": 1.614626358819363e-05,
      "loss": 0.8607,
      "step": 2660
    },
    {
      "epoch": 0.57510265830992,
      "grad_norm": 0.8792514204978943,
      "learning_rate": 1.6132526921309598e-05,
      "loss": 0.9271,
      "step": 2661
    },
    {
      "epoch": 0.5753187810676464,
      "grad_norm": 0.9135647416114807,
      "learning_rate": 1.6118792149478432e-05,
      "loss": 0.8542,
      "step": 2662
    },
    {
      "epoch": 0.5755349038253728,
      "grad_norm": 0.7765149474143982,
      "learning_rate": 1.6105059279430132e-05,
      "loss": 0.902,
      "step": 2663
    },
    {
      "epoch": 0.5757510265830992,
      "grad_norm": 0.978321373462677,
      "learning_rate": 1.6091328317893792e-05,
      "loss": 1.0733,
      "step": 2664
    },
    {
      "epoch": 0.5759671493408256,
      "grad_norm": 0.9727914333343506,
      "learning_rate": 1.607759927159753e-05,
      "loss": 0.907,
      "step": 2665
    },
    {
      "epoch": 0.576183272098552,
      "grad_norm": 0.9169484376907349,
      "learning_rate": 1.6063872147268564e-05,
      "loss": 0.9775,
      "step": 2666
    },
    {
      "epoch": 0.5763993948562783,
      "grad_norm": 0.9502250552177429,
      "learning_rate": 1.605014695163315e-05,
      "loss": 0.8968,
      "step": 2667
    },
    {
      "epoch": 0.5766155176140048,
      "grad_norm": 1.0350382328033447,
      "learning_rate": 1.6036423691416597e-05,
      "loss": 0.9144,
      "step": 2668
    },
    {
      "epoch": 0.5768316403717312,
      "grad_norm": 0.9713992476463318,
      "learning_rate": 1.6022702373343274e-05,
      "loss": 0.9253,
      "step": 2669
    },
    {
      "epoch": 0.5770477631294575,
      "grad_norm": 0.9588881731033325,
      "learning_rate": 1.6008983004136586e-05,
      "loss": 0.8175,
      "step": 2670
    },
    {
      "epoch": 0.5772638858871839,
      "grad_norm": 0.878943145275116,
      "learning_rate": 1.5995265590519007e-05,
      "loss": 0.9627,
      "step": 2671
    },
    {
      "epoch": 0.5774800086449103,
      "grad_norm": 0.9545088410377502,
      "learning_rate": 1.5981550139212023e-05,
      "loss": 1.0463,
      "step": 2672
    },
    {
      "epoch": 0.5776961314026366,
      "grad_norm": 0.9589905738830566,
      "learning_rate": 1.5967836656936197e-05,
      "loss": 0.8573,
      "step": 2673
    },
    {
      "epoch": 0.5779122541603631,
      "grad_norm": 1.0416665077209473,
      "learning_rate": 1.5954125150411078e-05,
      "loss": 1.0135,
      "step": 2674
    },
    {
      "epoch": 0.5781283769180895,
      "grad_norm": 1.038084864616394,
      "learning_rate": 1.5940415626355282e-05,
      "loss": 0.9184,
      "step": 2675
    },
    {
      "epoch": 0.5783444996758159,
      "grad_norm": 0.9264064431190491,
      "learning_rate": 1.5926708091486443e-05,
      "loss": 0.9914,
      "step": 2676
    },
    {
      "epoch": 0.5785606224335422,
      "grad_norm": 0.9196184873580933,
      "learning_rate": 1.5913002552521225e-05,
      "loss": 0.7685,
      "step": 2677
    },
    {
      "epoch": 0.5787767451912686,
      "grad_norm": 0.8804293870925903,
      "learning_rate": 1.5899299016175317e-05,
      "loss": 0.8003,
      "step": 2678
    },
    {
      "epoch": 0.5789928679489951,
      "grad_norm": 0.8991097807884216,
      "learning_rate": 1.5885597489163405e-05,
      "loss": 0.923,
      "step": 2679
    },
    {
      "epoch": 0.5792089907067214,
      "grad_norm": 0.8430712223052979,
      "learning_rate": 1.5871897978199213e-05,
      "loss": 0.9133,
      "step": 2680
    },
    {
      "epoch": 0.5794251134644478,
      "grad_norm": 1.0993525981903076,
      "learning_rate": 1.585820048999546e-05,
      "loss": 0.9837,
      "step": 2681
    },
    {
      "epoch": 0.5796412362221742,
      "grad_norm": 0.8528681993484497,
      "learning_rate": 1.5844505031263902e-05,
      "loss": 0.8822,
      "step": 2682
    },
    {
      "epoch": 0.5798573589799005,
      "grad_norm": 0.8468205332756042,
      "learning_rate": 1.5830811608715265e-05,
      "loss": 0.7528,
      "step": 2683
    },
    {
      "epoch": 0.580073481737627,
      "grad_norm": 0.8934227824211121,
      "learning_rate": 1.5817120229059318e-05,
      "loss": 0.9948,
      "step": 2684
    },
    {
      "epoch": 0.5802896044953534,
      "grad_norm": 0.8171609044075012,
      "learning_rate": 1.5803430899004775e-05,
      "loss": 0.9341,
      "step": 2685
    },
    {
      "epoch": 0.5805057272530797,
      "grad_norm": 0.9663010239601135,
      "learning_rate": 1.5789743625259396e-05,
      "loss": 0.8579,
      "step": 2686
    },
    {
      "epoch": 0.5807218500108061,
      "grad_norm": 1.067415714263916,
      "learning_rate": 1.5776058414529903e-05,
      "loss": 0.9969,
      "step": 2687
    },
    {
      "epoch": 0.5809379727685325,
      "grad_norm": 1.0298633575439453,
      "learning_rate": 1.5762375273522024e-05,
      "loss": 1.0491,
      "step": 2688
    },
    {
      "epoch": 0.581154095526259,
      "grad_norm": 0.8125079274177551,
      "learning_rate": 1.5748694208940467e-05,
      "loss": 0.8395,
      "step": 2689
    },
    {
      "epoch": 0.5813702182839853,
      "grad_norm": 0.9669515490531921,
      "learning_rate": 1.5735015227488925e-05,
      "loss": 0.7882,
      "step": 2690
    },
    {
      "epoch": 0.5815863410417117,
      "grad_norm": 0.9027782082557678,
      "learning_rate": 1.5721338335870057e-05,
      "loss": 0.8252,
      "step": 2691
    },
    {
      "epoch": 0.5818024637994381,
      "grad_norm": 1.0070239305496216,
      "learning_rate": 1.570766354078551e-05,
      "loss": 0.9992,
      "step": 2692
    },
    {
      "epoch": 0.5820185865571644,
      "grad_norm": 0.8885738253593445,
      "learning_rate": 1.569399084893591e-05,
      "loss": 0.8881,
      "step": 2693
    },
    {
      "epoch": 0.5822347093148909,
      "grad_norm": 0.9686094522476196,
      "learning_rate": 1.5680320267020836e-05,
      "loss": 0.9656,
      "step": 2694
    },
    {
      "epoch": 0.5824508320726173,
      "grad_norm": 0.9403521418571472,
      "learning_rate": 1.5666651801738856e-05,
      "loss": 0.9953,
      "step": 2695
    },
    {
      "epoch": 0.5826669548303436,
      "grad_norm": 1.0152837038040161,
      "learning_rate": 1.5652985459787464e-05,
      "loss": 0.9059,
      "step": 2696
    },
    {
      "epoch": 0.58288307758807,
      "grad_norm": 0.8296104073524475,
      "learning_rate": 1.5639321247863154e-05,
      "loss": 0.8858,
      "step": 2697
    },
    {
      "epoch": 0.5830992003457964,
      "grad_norm": 1.0700321197509766,
      "learning_rate": 1.562565917266135e-05,
      "loss": 0.9993,
      "step": 2698
    },
    {
      "epoch": 0.5833153231035229,
      "grad_norm": 0.8812270164489746,
      "learning_rate": 1.5611999240876437e-05,
      "loss": 0.9261,
      "step": 2699
    },
    {
      "epoch": 0.5835314458612492,
      "grad_norm": 0.9802578687667847,
      "learning_rate": 1.5598341459201756e-05,
      "loss": 0.9699,
      "step": 2700
    },
    {
      "epoch": 0.5837475686189756,
      "grad_norm": 0.9958237409591675,
      "learning_rate": 1.558468583432959e-05,
      "loss": 1.0226,
      "step": 2701
    },
    {
      "epoch": 0.583963691376702,
      "grad_norm": 0.8533070087432861,
      "learning_rate": 1.5571032372951153e-05,
      "loss": 0.7393,
      "step": 2702
    },
    {
      "epoch": 0.5841798141344283,
      "grad_norm": 0.9260321855545044,
      "learning_rate": 1.555738108175661e-05,
      "loss": 0.7341,
      "step": 2703
    },
    {
      "epoch": 0.5843959368921547,
      "grad_norm": 1.0298815965652466,
      "learning_rate": 1.554373196743507e-05,
      "loss": 0.9243,
      "step": 2704
    },
    {
      "epoch": 0.5846120596498812,
      "grad_norm": 0.9426895976066589,
      "learning_rate": 1.5530085036674563e-05,
      "loss": 0.9307,
      "step": 2705
    },
    {
      "epoch": 0.5848281824076075,
      "grad_norm": 0.9806160926818848,
      "learning_rate": 1.551644029616206e-05,
      "loss": 1.0719,
      "step": 2706
    },
    {
      "epoch": 0.5850443051653339,
      "grad_norm": 1.058203101158142,
      "learning_rate": 1.550279775258345e-05,
      "loss": 0.9036,
      "step": 2707
    },
    {
      "epoch": 0.5852604279230603,
      "grad_norm": 0.9290843605995178,
      "learning_rate": 1.5489157412623538e-05,
      "loss": 0.8886,
      "step": 2708
    },
    {
      "epoch": 0.5854765506807866,
      "grad_norm": 1.087302803993225,
      "learning_rate": 1.547551928296607e-05,
      "loss": 1.0837,
      "step": 2709
    },
    {
      "epoch": 0.5856926734385131,
      "grad_norm": 1.1101616621017456,
      "learning_rate": 1.5461883370293692e-05,
      "loss": 1.0016,
      "step": 2710
    },
    {
      "epoch": 0.5859087961962395,
      "grad_norm": 0.8973838686943054,
      "learning_rate": 1.5448249681287972e-05,
      "loss": 1.0268,
      "step": 2711
    },
    {
      "epoch": 0.5861249189539659,
      "grad_norm": 0.9331724047660828,
      "learning_rate": 1.5434618222629382e-05,
      "loss": 0.8761,
      "step": 2712
    },
    {
      "epoch": 0.5863410417116922,
      "grad_norm": 1.032333254814148,
      "learning_rate": 1.5420989000997324e-05,
      "loss": 0.9178,
      "step": 2713
    },
    {
      "epoch": 0.5865571644694186,
      "grad_norm": 0.8464323282241821,
      "learning_rate": 1.5407362023070057e-05,
      "loss": 0.782,
      "step": 2714
    },
    {
      "epoch": 0.5867732872271451,
      "grad_norm": 0.9692414999008179,
      "learning_rate": 1.539373729552479e-05,
      "loss": 0.8567,
      "step": 2715
    },
    {
      "epoch": 0.5869894099848714,
      "grad_norm": 0.8548213243484497,
      "learning_rate": 1.538011482503759e-05,
      "loss": 0.8791,
      "step": 2716
    },
    {
      "epoch": 0.5872055327425978,
      "grad_norm": 1.0426594018936157,
      "learning_rate": 1.5366494618283453e-05,
      "loss": 0.8803,
      "step": 2717
    },
    {
      "epoch": 0.5874216555003242,
      "grad_norm": 0.8966501355171204,
      "learning_rate": 1.5352876681936245e-05,
      "loss": 0.8331,
      "step": 2718
    },
    {
      "epoch": 0.5876377782580505,
      "grad_norm": 0.9452791810035706,
      "learning_rate": 1.5339261022668717e-05,
      "loss": 0.7207,
      "step": 2719
    },
    {
      "epoch": 0.587853901015777,
      "grad_norm": 1.0118849277496338,
      "learning_rate": 1.5325647647152514e-05,
      "loss": 0.8975,
      "step": 2720
    },
    {
      "epoch": 0.5880700237735034,
      "grad_norm": 0.9325366616249084,
      "learning_rate": 1.5312036562058152e-05,
      "loss": 1.0635,
      "step": 2721
    },
    {
      "epoch": 0.5882861465312298,
      "grad_norm": 0.9577626585960388,
      "learning_rate": 1.5298427774055045e-05,
      "loss": 0.7438,
      "step": 2722
    },
    {
      "epoch": 0.5885022692889561,
      "grad_norm": 0.9435785412788391,
      "learning_rate": 1.5284821289811453e-05,
      "loss": 0.9906,
      "step": 2723
    },
    {
      "epoch": 0.5887183920466825,
      "grad_norm": 0.863259494304657,
      "learning_rate": 1.5271217115994542e-05,
      "loss": 0.8271,
      "step": 2724
    },
    {
      "epoch": 0.588934514804409,
      "grad_norm": 0.9923564791679382,
      "learning_rate": 1.5257615259270302e-05,
      "loss": 0.9293,
      "step": 2725
    },
    {
      "epoch": 0.5891506375621353,
      "grad_norm": 0.9756587743759155,
      "learning_rate": 1.5244015726303626e-05,
      "loss": 0.9125,
      "step": 2726
    },
    {
      "epoch": 0.5893667603198617,
      "grad_norm": 0.8018559813499451,
      "learning_rate": 1.523041852375825e-05,
      "loss": 0.8673,
      "step": 2727
    },
    {
      "epoch": 0.5895828830775881,
      "grad_norm": 1.043648600578308,
      "learning_rate": 1.5216823658296767e-05,
      "loss": 0.8729,
      "step": 2728
    },
    {
      "epoch": 0.5897990058353144,
      "grad_norm": 0.9324607253074646,
      "learning_rate": 1.520323113658064e-05,
      "loss": 0.8696,
      "step": 2729
    },
    {
      "epoch": 0.5900151285930408,
      "grad_norm": 0.9599418640136719,
      "learning_rate": 1.518964096527017e-05,
      "loss": 0.8883,
      "step": 2730
    },
    {
      "epoch": 0.5902312513507673,
      "grad_norm": 0.919252336025238,
      "learning_rate": 1.51760531510245e-05,
      "loss": 0.8387,
      "step": 2731
    },
    {
      "epoch": 0.5904473741084936,
      "grad_norm": 0.9479200839996338,
      "learning_rate": 1.5162467700501635e-05,
      "loss": 0.9632,
      "step": 2732
    },
    {
      "epoch": 0.59066349686622,
      "grad_norm": 1.010833501815796,
      "learning_rate": 1.5148884620358417e-05,
      "loss": 0.7722,
      "step": 2733
    },
    {
      "epoch": 0.5908796196239464,
      "grad_norm": 0.9122352004051208,
      "learning_rate": 1.5135303917250517e-05,
      "loss": 0.8343,
      "step": 2734
    },
    {
      "epoch": 0.5910957423816728,
      "grad_norm": 1.0183535814285278,
      "learning_rate": 1.5121725597832457e-05,
      "loss": 0.772,
      "step": 2735
    },
    {
      "epoch": 0.5913118651393992,
      "grad_norm": 1.1082161664962769,
      "learning_rate": 1.5108149668757578e-05,
      "loss": 1.0553,
      "step": 2736
    },
    {
      "epoch": 0.5915279878971256,
      "grad_norm": 0.9422979950904846,
      "learning_rate": 1.5094576136678052e-05,
      "loss": 0.986,
      "step": 2737
    },
    {
      "epoch": 0.591744110654852,
      "grad_norm": 0.9901473522186279,
      "learning_rate": 1.5081005008244879e-05,
      "loss": 1.1901,
      "step": 2738
    },
    {
      "epoch": 0.5919602334125783,
      "grad_norm": 1.0788806676864624,
      "learning_rate": 1.5067436290107876e-05,
      "loss": 0.9779,
      "step": 2739
    },
    {
      "epoch": 0.5921763561703047,
      "grad_norm": 0.9207198619842529,
      "learning_rate": 1.5053869988915691e-05,
      "loss": 0.8134,
      "step": 2740
    },
    {
      "epoch": 0.5923924789280312,
      "grad_norm": 1.2359508275985718,
      "learning_rate": 1.5040306111315783e-05,
      "loss": 1.0705,
      "step": 2741
    },
    {
      "epoch": 0.5926086016857575,
      "grad_norm": 0.9359343647956848,
      "learning_rate": 1.5026744663954411e-05,
      "loss": 0.8531,
      "step": 2742
    },
    {
      "epoch": 0.5928247244434839,
      "grad_norm": 1.0426597595214844,
      "learning_rate": 1.5013185653476653e-05,
      "loss": 0.8466,
      "step": 2743
    },
    {
      "epoch": 0.5930408472012103,
      "grad_norm": 0.9316443204879761,
      "learning_rate": 1.4999629086526403e-05,
      "loss": 0.9299,
      "step": 2744
    },
    {
      "epoch": 0.5932569699589367,
      "grad_norm": 0.8866399526596069,
      "learning_rate": 1.4986074969746333e-05,
      "loss": 0.8621,
      "step": 2745
    },
    {
      "epoch": 0.593473092716663,
      "grad_norm": 0.9012348651885986,
      "learning_rate": 1.4972523309777947e-05,
      "loss": 0.9353,
      "step": 2746
    },
    {
      "epoch": 0.5936892154743895,
      "grad_norm": 0.8727730512619019,
      "learning_rate": 1.4958974113261518e-05,
      "loss": 0.709,
      "step": 2747
    },
    {
      "epoch": 0.5939053382321159,
      "grad_norm": 0.9378992915153503,
      "learning_rate": 1.4945427386836118e-05,
      "loss": 0.8303,
      "step": 2748
    },
    {
      "epoch": 0.5941214609898422,
      "grad_norm": 0.9986936450004578,
      "learning_rate": 1.4931883137139612e-05,
      "loss": 0.9444,
      "step": 2749
    },
    {
      "epoch": 0.5943375837475686,
      "grad_norm": 1.0609228610992432,
      "learning_rate": 1.4918341370808649e-05,
      "loss": 1.0354,
      "step": 2750
    },
    {
      "epoch": 0.594553706505295,
      "grad_norm": 0.9177163243293762,
      "learning_rate": 1.4904802094478672e-05,
      "loss": 0.8406,
      "step": 2751
    },
    {
      "epoch": 0.5947698292630214,
      "grad_norm": 0.8395483493804932,
      "learning_rate": 1.4891265314783888e-05,
      "loss": 0.8217,
      "step": 2752
    },
    {
      "epoch": 0.5949859520207478,
      "grad_norm": 0.9909489154815674,
      "learning_rate": 1.4877731038357299e-05,
      "loss": 0.9599,
      "step": 2753
    },
    {
      "epoch": 0.5952020747784742,
      "grad_norm": 0.9902933239936829,
      "learning_rate": 1.4864199271830648e-05,
      "loss": 1.0263,
      "step": 2754
    },
    {
      "epoch": 0.5954181975362005,
      "grad_norm": 1.1740868091583252,
      "learning_rate": 1.4850670021834488e-05,
      "loss": 1.0437,
      "step": 2755
    },
    {
      "epoch": 0.5956343202939269,
      "grad_norm": 0.9129568934440613,
      "learning_rate": 1.4837143294998113e-05,
      "loss": 0.8702,
      "step": 2756
    },
    {
      "epoch": 0.5958504430516534,
      "grad_norm": 1.0828653573989868,
      "learning_rate": 1.4823619097949584e-05,
      "loss": 0.7669,
      "step": 2757
    },
    {
      "epoch": 0.5960665658093798,
      "grad_norm": 0.9232279062271118,
      "learning_rate": 1.481009743731574e-05,
      "loss": 0.7958,
      "step": 2758
    },
    {
      "epoch": 0.5962826885671061,
      "grad_norm": 0.910819947719574,
      "learning_rate": 1.479657831972215e-05,
      "loss": 0.9502,
      "step": 2759
    },
    {
      "epoch": 0.5964988113248325,
      "grad_norm": 0.8901724815368652,
      "learning_rate": 1.4783061751793155e-05,
      "loss": 0.8451,
      "step": 2760
    },
    {
      "epoch": 0.5967149340825589,
      "grad_norm": 0.8953905701637268,
      "learning_rate": 1.4769547740151838e-05,
      "loss": 0.9198,
      "step": 2761
    },
    {
      "epoch": 0.5969310568402852,
      "grad_norm": 0.9130937457084656,
      "learning_rate": 1.475603629142004e-05,
      "loss": 0.7836,
      "step": 2762
    },
    {
      "epoch": 0.5971471795980117,
      "grad_norm": 0.9950690865516663,
      "learning_rate": 1.4742527412218332e-05,
      "loss": 0.9808,
      "step": 2763
    },
    {
      "epoch": 0.5973633023557381,
      "grad_norm": 1.0196300745010376,
      "learning_rate": 1.4729021109166053e-05,
      "loss": 0.7786,
      "step": 2764
    },
    {
      "epoch": 0.5975794251134644,
      "grad_norm": 0.91399085521698,
      "learning_rate": 1.4715517388881234e-05,
      "loss": 0.8409,
      "step": 2765
    },
    {
      "epoch": 0.5977955478711908,
      "grad_norm": 1.1146055459976196,
      "learning_rate": 1.470201625798068e-05,
      "loss": 0.7907,
      "step": 2766
    },
    {
      "epoch": 0.5980116706289172,
      "grad_norm": 0.9721848368644714,
      "learning_rate": 1.4688517723079914e-05,
      "loss": 0.8861,
      "step": 2767
    },
    {
      "epoch": 0.5982277933866436,
      "grad_norm": 0.876146674156189,
      "learning_rate": 1.4675021790793182e-05,
      "loss": 0.9064,
      "step": 2768
    },
    {
      "epoch": 0.59844391614437,
      "grad_norm": 0.8468629121780396,
      "learning_rate": 1.4661528467733465e-05,
      "loss": 1.0736,
      "step": 2769
    },
    {
      "epoch": 0.5986600389020964,
      "grad_norm": 1.004036545753479,
      "learning_rate": 1.4648037760512464e-05,
      "loss": 0.8996,
      "step": 2770
    },
    {
      "epoch": 0.5988761616598228,
      "grad_norm": 1.0619404315948486,
      "learning_rate": 1.4634549675740584e-05,
      "loss": 1.1611,
      "step": 2771
    },
    {
      "epoch": 0.5990922844175491,
      "grad_norm": 0.9154078960418701,
      "learning_rate": 1.4621064220026955e-05,
      "loss": 0.8513,
      "step": 2772
    },
    {
      "epoch": 0.5993084071752756,
      "grad_norm": 0.9970297813415527,
      "learning_rate": 1.4607581399979427e-05,
      "loss": 0.8516,
      "step": 2773
    },
    {
      "epoch": 0.599524529933002,
      "grad_norm": 1.0168700218200684,
      "learning_rate": 1.4594101222204544e-05,
      "loss": 0.8145,
      "step": 2774
    },
    {
      "epoch": 0.5997406526907283,
      "grad_norm": 0.8243957161903381,
      "learning_rate": 1.4580623693307572e-05,
      "loss": 0.7947,
      "step": 2775
    },
    {
      "epoch": 0.5999567754484547,
      "grad_norm": 0.9932655096054077,
      "learning_rate": 1.4567148819892464e-05,
      "loss": 0.8704,
      "step": 2776
    },
    {
      "epoch": 0.6001728982061811,
      "grad_norm": 1.0828499794006348,
      "learning_rate": 1.4553676608561872e-05,
      "loss": 1.0422,
      "step": 2777
    },
    {
      "epoch": 0.6003890209639074,
      "grad_norm": 1.019808292388916,
      "learning_rate": 1.4540207065917152e-05,
      "loss": 0.9274,
      "step": 2778
    },
    {
      "epoch": 0.6006051437216339,
      "grad_norm": 0.9077752828598022,
      "learning_rate": 1.4526740198558345e-05,
      "loss": 0.9341,
      "step": 2779
    },
    {
      "epoch": 0.6008212664793603,
      "grad_norm": 0.9558945894241333,
      "learning_rate": 1.451327601308419e-05,
      "loss": 0.937,
      "step": 2780
    },
    {
      "epoch": 0.6010373892370867,
      "grad_norm": 1.0341992378234863,
      "learning_rate": 1.4499814516092113e-05,
      "loss": 0.8255,
      "step": 2781
    },
    {
      "epoch": 0.601253511994813,
      "grad_norm": 0.9370862245559692,
      "learning_rate": 1.44863557141782e-05,
      "loss": 1.0031,
      "step": 2782
    },
    {
      "epoch": 0.6014696347525395,
      "grad_norm": 0.9899297952651978,
      "learning_rate": 1.447289961393724e-05,
      "loss": 0.9807,
      "step": 2783
    },
    {
      "epoch": 0.6016857575102659,
      "grad_norm": 1.0725080966949463,
      "learning_rate": 1.4459446221962697e-05,
      "loss": 1.0988,
      "step": 2784
    },
    {
      "epoch": 0.6019018802679922,
      "grad_norm": 0.9283778071403503,
      "learning_rate": 1.4445995544846694e-05,
      "loss": 0.9264,
      "step": 2785
    },
    {
      "epoch": 0.6021180030257186,
      "grad_norm": 0.9730119109153748,
      "learning_rate": 1.4432547589180033e-05,
      "loss": 0.7145,
      "step": 2786
    },
    {
      "epoch": 0.602334125783445,
      "grad_norm": 0.9832450747489929,
      "learning_rate": 1.4419102361552191e-05,
      "loss": 0.8735,
      "step": 2787
    },
    {
      "epoch": 0.6025502485411713,
      "grad_norm": 0.8619163632392883,
      "learning_rate": 1.4405659868551287e-05,
      "loss": 0.7491,
      "step": 2788
    },
    {
      "epoch": 0.6027663712988978,
      "grad_norm": 0.9204447269439697,
      "learning_rate": 1.4392220116764116e-05,
      "loss": 0.8984,
      "step": 2789
    },
    {
      "epoch": 0.6029824940566242,
      "grad_norm": 1.0223321914672852,
      "learning_rate": 1.4378783112776119e-05,
      "loss": 0.9145,
      "step": 2790
    },
    {
      "epoch": 0.6031986168143505,
      "grad_norm": 0.9550166130065918,
      "learning_rate": 1.4365348863171406e-05,
      "loss": 0.8271,
      "step": 2791
    },
    {
      "epoch": 0.6034147395720769,
      "grad_norm": 0.9865041971206665,
      "learning_rate": 1.4351917374532725e-05,
      "loss": 0.9244,
      "step": 2792
    },
    {
      "epoch": 0.6036308623298033,
      "grad_norm": 0.9395430088043213,
      "learning_rate": 1.4338488653441482e-05,
      "loss": 0.8319,
      "step": 2793
    },
    {
      "epoch": 0.6038469850875298,
      "grad_norm": 0.9528705477714539,
      "learning_rate": 1.43250627064777e-05,
      "loss": 0.9107,
      "step": 2794
    },
    {
      "epoch": 0.6040631078452561,
      "grad_norm": 0.9629172682762146,
      "learning_rate": 1.4311639540220075e-05,
      "loss": 0.8418,
      "step": 2795
    },
    {
      "epoch": 0.6042792306029825,
      "grad_norm": 0.8943168520927429,
      "learning_rate": 1.4298219161245927e-05,
      "loss": 0.8928,
      "step": 2796
    },
    {
      "epoch": 0.6044953533607089,
      "grad_norm": 0.8976085186004639,
      "learning_rate": 1.428480157613121e-05,
      "loss": 1.0248,
      "step": 2797
    },
    {
      "epoch": 0.6047114761184352,
      "grad_norm": 0.9356058239936829,
      "learning_rate": 1.4271386791450508e-05,
      "loss": 1.0223,
      "step": 2798
    },
    {
      "epoch": 0.6049275988761617,
      "grad_norm": 0.9700066447257996,
      "learning_rate": 1.425797481377704e-05,
      "loss": 0.8128,
      "step": 2799
    },
    {
      "epoch": 0.6051437216338881,
      "grad_norm": 1.0174139738082886,
      "learning_rate": 1.4244565649682636e-05,
      "loss": 0.9016,
      "step": 2800
    },
    {
      "epoch": 0.6053598443916144,
      "grad_norm": 1.014001727104187,
      "learning_rate": 1.4231159305737757e-05,
      "loss": 0.9467,
      "step": 2801
    },
    {
      "epoch": 0.6055759671493408,
      "grad_norm": 0.8730211853981018,
      "learning_rate": 1.4217755788511485e-05,
      "loss": 0.8795,
      "step": 2802
    },
    {
      "epoch": 0.6057920899070672,
      "grad_norm": 0.8965271711349487,
      "learning_rate": 1.4204355104571506e-05,
      "loss": 0.8219,
      "step": 2803
    },
    {
      "epoch": 0.6060082126647937,
      "grad_norm": 0.9582975506782532,
      "learning_rate": 1.419095726048414e-05,
      "loss": 0.8053,
      "step": 2804
    },
    {
      "epoch": 0.60622433542252,
      "grad_norm": 1.272420883178711,
      "learning_rate": 1.4177562262814277e-05,
      "loss": 0.8486,
      "step": 2805
    },
    {
      "epoch": 0.6064404581802464,
      "grad_norm": 1.0263395309448242,
      "learning_rate": 1.4164170118125448e-05,
      "loss": 0.9882,
      "step": 2806
    },
    {
      "epoch": 0.6066565809379728,
      "grad_norm": 1.087254524230957,
      "learning_rate": 1.415078083297977e-05,
      "loss": 1.0561,
      "step": 2807
    },
    {
      "epoch": 0.6068727036956991,
      "grad_norm": 0.9583911299705505,
      "learning_rate": 1.4137394413937959e-05,
      "loss": 1.1197,
      "step": 2808
    },
    {
      "epoch": 0.6070888264534255,
      "grad_norm": 0.8234987854957581,
      "learning_rate": 1.4124010867559339e-05,
      "loss": 0.8728,
      "step": 2809
    },
    {
      "epoch": 0.607304949211152,
      "grad_norm": 0.9210377931594849,
      "learning_rate": 1.411063020040181e-05,
      "loss": 0.8823,
      "step": 2810
    },
    {
      "epoch": 0.6075210719688783,
      "grad_norm": 0.9682777523994446,
      "learning_rate": 1.4097252419021871e-05,
      "loss": 0.8733,
      "step": 2811
    },
    {
      "epoch": 0.6077371947266047,
      "grad_norm": 0.9390709400177002,
      "learning_rate": 1.4083877529974594e-05,
      "loss": 1.0549,
      "step": 2812
    },
    {
      "epoch": 0.6079533174843311,
      "grad_norm": 0.9637075662612915,
      "learning_rate": 1.4070505539813654e-05,
      "loss": 0.8418,
      "step": 2813
    },
    {
      "epoch": 0.6081694402420574,
      "grad_norm": 1.1636171340942383,
      "learning_rate": 1.4057136455091293e-05,
      "loss": 0.901,
      "step": 2814
    },
    {
      "epoch": 0.6083855629997839,
      "grad_norm": 0.9594619870185852,
      "learning_rate": 1.4043770282358332e-05,
      "loss": 1.0105,
      "step": 2815
    },
    {
      "epoch": 0.6086016857575103,
      "grad_norm": 0.9706513285636902,
      "learning_rate": 1.4030407028164165e-05,
      "loss": 0.9269,
      "step": 2816
    },
    {
      "epoch": 0.6088178085152367,
      "grad_norm": 1.107113242149353,
      "learning_rate": 1.4017046699056753e-05,
      "loss": 1.0875,
      "step": 2817
    },
    {
      "epoch": 0.609033931272963,
      "grad_norm": 0.9278103113174438,
      "learning_rate": 1.4003689301582628e-05,
      "loss": 0.9864,
      "step": 2818
    },
    {
      "epoch": 0.6092500540306894,
      "grad_norm": 0.8669817447662354,
      "learning_rate": 1.3990334842286881e-05,
      "loss": 0.9355,
      "step": 2819
    },
    {
      "epoch": 0.6094661767884159,
      "grad_norm": 1.051826000213623,
      "learning_rate": 1.3976983327713172e-05,
      "loss": 1.1648,
      "step": 2820
    },
    {
      "epoch": 0.6096822995461422,
      "grad_norm": 1.1233115196228027,
      "learning_rate": 1.3963634764403714e-05,
      "loss": 1.042,
      "step": 2821
    },
    {
      "epoch": 0.6098984223038686,
      "grad_norm": 0.9370507597923279,
      "learning_rate": 1.3950289158899262e-05,
      "loss": 0.7103,
      "step": 2822
    },
    {
      "epoch": 0.610114545061595,
      "grad_norm": 1.1144932508468628,
      "learning_rate": 1.3936946517739132e-05,
      "loss": 0.911,
      "step": 2823
    },
    {
      "epoch": 0.6103306678193213,
      "grad_norm": 1.068408727645874,
      "learning_rate": 1.39236068474612e-05,
      "loss": 0.9533,
      "step": 2824
    },
    {
      "epoch": 0.6105467905770477,
      "grad_norm": 1.0667728185653687,
      "learning_rate": 1.3910270154601864e-05,
      "loss": 0.9732,
      "step": 2825
    },
    {
      "epoch": 0.6107629133347742,
      "grad_norm": 1.15886390209198,
      "learning_rate": 1.389693644569607e-05,
      "loss": 1.1132,
      "step": 2826
    },
    {
      "epoch": 0.6109790360925006,
      "grad_norm": 0.9730468392372131,
      "learning_rate": 1.3883605727277319e-05,
      "loss": 0.893,
      "step": 2827
    },
    {
      "epoch": 0.6111951588502269,
      "grad_norm": 0.945916473865509,
      "learning_rate": 1.3870278005877617e-05,
      "loss": 0.8653,
      "step": 2828
    },
    {
      "epoch": 0.6114112816079533,
      "grad_norm": 1.081801414489746,
      "learning_rate": 1.3856953288027524e-05,
      "loss": 1.001,
      "step": 2829
    },
    {
      "epoch": 0.6116274043656797,
      "grad_norm": 0.8034911155700684,
      "learning_rate": 1.3843631580256114e-05,
      "loss": 0.7614,
      "step": 2830
    },
    {
      "epoch": 0.6118435271234061,
      "grad_norm": 0.9153169393539429,
      "learning_rate": 1.3830312889091003e-05,
      "loss": 0.908,
      "step": 2831
    },
    {
      "epoch": 0.6120596498811325,
      "grad_norm": 0.9824167490005493,
      "learning_rate": 1.381699722105831e-05,
      "loss": 0.9988,
      "step": 2832
    },
    {
      "epoch": 0.6122757726388589,
      "grad_norm": 1.022257685661316,
      "learning_rate": 1.3803684582682697e-05,
      "loss": 0.988,
      "step": 2833
    },
    {
      "epoch": 0.6124918953965852,
      "grad_norm": 0.7998851537704468,
      "learning_rate": 1.3790374980487303e-05,
      "loss": 0.7413,
      "step": 2834
    },
    {
      "epoch": 0.6127080181543116,
      "grad_norm": 0.877083420753479,
      "learning_rate": 1.3777068420993818e-05,
      "loss": 0.912,
      "step": 2835
    },
    {
      "epoch": 0.6129241409120381,
      "grad_norm": 0.9918305277824402,
      "learning_rate": 1.3763764910722422e-05,
      "loss": 0.9743,
      "step": 2836
    },
    {
      "epoch": 0.6131402636697644,
      "grad_norm": 0.837921142578125,
      "learning_rate": 1.37504644561918e-05,
      "loss": 0.8085,
      "step": 2837
    },
    {
      "epoch": 0.6133563864274908,
      "grad_norm": 0.8603920936584473,
      "learning_rate": 1.3737167063919148e-05,
      "loss": 1.0102,
      "step": 2838
    },
    {
      "epoch": 0.6135725091852172,
      "grad_norm": 1.0279122591018677,
      "learning_rate": 1.3723872740420165e-05,
      "loss": 1.1155,
      "step": 2839
    },
    {
      "epoch": 0.6137886319429436,
      "grad_norm": 0.9947484135627747,
      "learning_rate": 1.3710581492209025e-05,
      "loss": 0.8671,
      "step": 2840
    },
    {
      "epoch": 0.61400475470067,
      "grad_norm": 0.9215602874755859,
      "learning_rate": 1.369729332579841e-05,
      "loss": 0.9884,
      "step": 2841
    },
    {
      "epoch": 0.6142208774583964,
      "grad_norm": 0.9822049140930176,
      "learning_rate": 1.3684008247699505e-05,
      "loss": 0.9937,
      "step": 2842
    },
    {
      "epoch": 0.6144370002161228,
      "grad_norm": 0.9134843945503235,
      "learning_rate": 1.3670726264421954e-05,
      "loss": 1.0323,
      "step": 2843
    },
    {
      "epoch": 0.6146531229738491,
      "grad_norm": 1.0407973527908325,
      "learning_rate": 1.3657447382473905e-05,
      "loss": 0.9765,
      "step": 2844
    },
    {
      "epoch": 0.6148692457315755,
      "grad_norm": 0.8549898266792297,
      "learning_rate": 1.364417160836197e-05,
      "loss": 0.8894,
      "step": 2845
    },
    {
      "epoch": 0.615085368489302,
      "grad_norm": 1.0176432132720947,
      "learning_rate": 1.363089894859126e-05,
      "loss": 0.8621,
      "step": 2846
    },
    {
      "epoch": 0.6153014912470283,
      "grad_norm": 0.8429177403450012,
      "learning_rate": 1.3617629409665338e-05,
      "loss": 0.7707,
      "step": 2847
    },
    {
      "epoch": 0.6155176140047547,
      "grad_norm": 1.0289604663848877,
      "learning_rate": 1.3604362998086251e-05,
      "loss": 0.9811,
      "step": 2848
    },
    {
      "epoch": 0.6157337367624811,
      "grad_norm": 1.0498583316802979,
      "learning_rate": 1.3591099720354515e-05,
      "loss": 0.8922,
      "step": 2849
    },
    {
      "epoch": 0.6159498595202075,
      "grad_norm": 0.9106667637825012,
      "learning_rate": 1.3577839582969104e-05,
      "loss": 0.9103,
      "step": 2850
    },
    {
      "epoch": 0.6161659822779338,
      "grad_norm": 1.018824815750122,
      "learning_rate": 1.3564582592427445e-05,
      "loss": 0.8685,
      "step": 2851
    },
    {
      "epoch": 0.6163821050356603,
      "grad_norm": 0.886332094669342,
      "learning_rate": 1.3551328755225437e-05,
      "loss": 0.9994,
      "step": 2852
    },
    {
      "epoch": 0.6165982277933867,
      "grad_norm": 1.0133635997772217,
      "learning_rate": 1.3538078077857435e-05,
      "loss": 0.8734,
      "step": 2853
    },
    {
      "epoch": 0.616814350551113,
      "grad_norm": 0.9232343435287476,
      "learning_rate": 1.3524830566816239e-05,
      "loss": 0.8883,
      "step": 2854
    },
    {
      "epoch": 0.6170304733088394,
      "grad_norm": 0.888059675693512,
      "learning_rate": 1.3511586228593089e-05,
      "loss": 1.0347,
      "step": 2855
    },
    {
      "epoch": 0.6172465960665658,
      "grad_norm": 1.0011212825775146,
      "learning_rate": 1.3498345069677698e-05,
      "loss": 1.0151,
      "step": 2856
    },
    {
      "epoch": 0.6174627188242922,
      "grad_norm": 1.1768680810928345,
      "learning_rate": 1.348510709655819e-05,
      "loss": 0.9633,
      "step": 2857
    },
    {
      "epoch": 0.6176788415820186,
      "grad_norm": 0.9705842137336731,
      "learning_rate": 1.3471872315721144e-05,
      "loss": 0.8847,
      "step": 2858
    },
    {
      "epoch": 0.617894964339745,
      "grad_norm": 0.9357125163078308,
      "learning_rate": 1.345864073365157e-05,
      "loss": 0.8738,
      "step": 2859
    },
    {
      "epoch": 0.6181110870974713,
      "grad_norm": 0.8916847705841064,
      "learning_rate": 1.3445412356832917e-05,
      "loss": 1.0725,
      "step": 2860
    },
    {
      "epoch": 0.6183272098551977,
      "grad_norm": 1.191519856452942,
      "learning_rate": 1.3432187191747059e-05,
      "loss": 1.0197,
      "step": 2861
    },
    {
      "epoch": 0.6185433326129242,
      "grad_norm": 1.018497347831726,
      "learning_rate": 1.3418965244874293e-05,
      "loss": 0.8523,
      "step": 2862
    },
    {
      "epoch": 0.6187594553706506,
      "grad_norm": 0.9003714323043823,
      "learning_rate": 1.3405746522693339e-05,
      "loss": 0.8863,
      "step": 2863
    },
    {
      "epoch": 0.6189755781283769,
      "grad_norm": 0.9653944373130798,
      "learning_rate": 1.3392531031681352e-05,
      "loss": 0.8744,
      "step": 2864
    },
    {
      "epoch": 0.6191917008861033,
      "grad_norm": 1.1506441831588745,
      "learning_rate": 1.3379318778313883e-05,
      "loss": 0.9678,
      "step": 2865
    },
    {
      "epoch": 0.6194078236438297,
      "grad_norm": 0.8557504415512085,
      "learning_rate": 1.3366109769064903e-05,
      "loss": 0.7498,
      "step": 2866
    },
    {
      "epoch": 0.619623946401556,
      "grad_norm": 0.9204830527305603,
      "learning_rate": 1.3352904010406811e-05,
      "loss": 0.9042,
      "step": 2867
    },
    {
      "epoch": 0.6198400691592825,
      "grad_norm": 0.9908314943313599,
      "learning_rate": 1.3339701508810384e-05,
      "loss": 1.0972,
      "step": 2868
    },
    {
      "epoch": 0.6200561919170089,
      "grad_norm": 0.9415073394775391,
      "learning_rate": 1.3326502270744819e-05,
      "loss": 0.9935,
      "step": 2869
    },
    {
      "epoch": 0.6202723146747352,
      "grad_norm": 1.0607409477233887,
      "learning_rate": 1.3313306302677711e-05,
      "loss": 0.9559,
      "step": 2870
    },
    {
      "epoch": 0.6204884374324616,
      "grad_norm": 0.9476536512374878,
      "learning_rate": 1.3300113611075061e-05,
      "loss": 0.8337,
      "step": 2871
    },
    {
      "epoch": 0.620704560190188,
      "grad_norm": 0.937411367893219,
      "learning_rate": 1.3286924202401257e-05,
      "loss": 0.8395,
      "step": 2872
    },
    {
      "epoch": 0.6209206829479145,
      "grad_norm": 0.8920255899429321,
      "learning_rate": 1.3273738083119074e-05,
      "loss": 0.7921,
      "step": 2873
    },
    {
      "epoch": 0.6211368057056408,
      "grad_norm": 1.0879623889923096,
      "learning_rate": 1.3260555259689678e-05,
      "loss": 1.0492,
      "step": 2874
    },
    {
      "epoch": 0.6213529284633672,
      "grad_norm": 0.9902235269546509,
      "learning_rate": 1.3247375738572628e-05,
      "loss": 0.9859,
      "step": 2875
    },
    {
      "epoch": 0.6215690512210936,
      "grad_norm": 0.8703365921974182,
      "learning_rate": 1.3234199526225858e-05,
      "loss": 0.9271,
      "step": 2876
    },
    {
      "epoch": 0.6217851739788199,
      "grad_norm": 0.943744957447052,
      "learning_rate": 1.3221026629105672e-05,
      "loss": 0.9108,
      "step": 2877
    },
    {
      "epoch": 0.6220012967365464,
      "grad_norm": 1.1115494966506958,
      "learning_rate": 1.3207857053666773e-05,
      "loss": 0.9466,
      "step": 2878
    },
    {
      "epoch": 0.6222174194942728,
      "grad_norm": 0.9848674535751343,
      "learning_rate": 1.319469080636222e-05,
      "loss": 1.085,
      "step": 2879
    },
    {
      "epoch": 0.6224335422519991,
      "grad_norm": 1.033340334892273,
      "learning_rate": 1.3181527893643437e-05,
      "loss": 0.9287,
      "step": 2880
    },
    {
      "epoch": 0.6226496650097255,
      "grad_norm": 0.8988987803459167,
      "learning_rate": 1.3168368321960218e-05,
      "loss": 0.9669,
      "step": 2881
    },
    {
      "epoch": 0.6228657877674519,
      "grad_norm": 0.9278745055198669,
      "learning_rate": 1.3155212097760736e-05,
      "loss": 0.8758,
      "step": 2882
    },
    {
      "epoch": 0.6230819105251783,
      "grad_norm": 0.976412296295166,
      "learning_rate": 1.3142059227491501e-05,
      "loss": 0.9732,
      "step": 2883
    },
    {
      "epoch": 0.6232980332829047,
      "grad_norm": 1.006489872932434,
      "learning_rate": 1.3128909717597397e-05,
      "loss": 0.9468,
      "step": 2884
    },
    {
      "epoch": 0.6235141560406311,
      "grad_norm": 1.134055256843567,
      "learning_rate": 1.3115763574521641e-05,
      "loss": 0.9543,
      "step": 2885
    },
    {
      "epoch": 0.6237302787983575,
      "grad_norm": 0.9546724557876587,
      "learning_rate": 1.3102620804705818e-05,
      "loss": 0.7967,
      "step": 2886
    },
    {
      "epoch": 0.6239464015560838,
      "grad_norm": 1.0579935312271118,
      "learning_rate": 1.308948141458986e-05,
      "loss": 1.0239,
      "step": 2887
    },
    {
      "epoch": 0.6241625243138103,
      "grad_norm": 1.0004938840866089,
      "learning_rate": 1.307634541061203e-05,
      "loss": 0.831,
      "step": 2888
    },
    {
      "epoch": 0.6243786470715367,
      "grad_norm": 1.0822181701660156,
      "learning_rate": 1.3063212799208947e-05,
      "loss": 0.9237,
      "step": 2889
    },
    {
      "epoch": 0.624594769829263,
      "grad_norm": 0.9382985234260559,
      "learning_rate": 1.305008358681556e-05,
      "loss": 0.8923,
      "step": 2890
    },
    {
      "epoch": 0.6248108925869894,
      "grad_norm": 0.9579905271530151,
      "learning_rate": 1.3036957779865147e-05,
      "loss": 0.9521,
      "step": 2891
    },
    {
      "epoch": 0.6250270153447158,
      "grad_norm": 1.0470249652862549,
      "learning_rate": 1.302383538478932e-05,
      "loss": 0.9409,
      "step": 2892
    },
    {
      "epoch": 0.6252431381024421,
      "grad_norm": 1.0197440385818481,
      "learning_rate": 1.3010716408018037e-05,
      "loss": 0.946,
      "step": 2893
    },
    {
      "epoch": 0.6254592608601686,
      "grad_norm": 0.8235803246498108,
      "learning_rate": 1.299760085597955e-05,
      "loss": 0.8161,
      "step": 2894
    },
    {
      "epoch": 0.625675383617895,
      "grad_norm": 1.1165820360183716,
      "learning_rate": 1.2984488735100458e-05,
      "loss": 0.8393,
      "step": 2895
    },
    {
      "epoch": 0.6258915063756213,
      "grad_norm": 1.1988672018051147,
      "learning_rate": 1.2971380051805673e-05,
      "loss": 1.1965,
      "step": 2896
    },
    {
      "epoch": 0.6261076291333477,
      "grad_norm": 0.9154154658317566,
      "learning_rate": 1.2958274812518413e-05,
      "loss": 0.9277,
      "step": 2897
    },
    {
      "epoch": 0.6263237518910741,
      "grad_norm": 0.9022080302238464,
      "learning_rate": 1.2945173023660216e-05,
      "loss": 0.8837,
      "step": 2898
    },
    {
      "epoch": 0.6265398746488006,
      "grad_norm": 0.946685791015625,
      "learning_rate": 1.2932074691650925e-05,
      "loss": 0.8566,
      "step": 2899
    },
    {
      "epoch": 0.6267559974065269,
      "grad_norm": 1.0265495777130127,
      "learning_rate": 1.2918979822908697e-05,
      "loss": 0.8907,
      "step": 2900
    },
    {
      "epoch": 0.6269721201642533,
      "grad_norm": 0.9504611492156982,
      "learning_rate": 1.2905888423849991e-05,
      "loss": 0.8348,
      "step": 2901
    },
    {
      "epoch": 0.6271882429219797,
      "grad_norm": 0.9449243545532227,
      "learning_rate": 1.289280050088956e-05,
      "loss": 0.7915,
      "step": 2902
    },
    {
      "epoch": 0.627404365679706,
      "grad_norm": 1.0838656425476074,
      "learning_rate": 1.2879716060440446e-05,
      "loss": 0.9741,
      "step": 2903
    },
    {
      "epoch": 0.6276204884374325,
      "grad_norm": 0.9667279124259949,
      "learning_rate": 1.2866635108914007e-05,
      "loss": 0.8818,
      "step": 2904
    },
    {
      "epoch": 0.6278366111951589,
      "grad_norm": 0.9566159248352051,
      "learning_rate": 1.2853557652719877e-05,
      "loss": 0.876,
      "step": 2905
    },
    {
      "epoch": 0.6280527339528852,
      "grad_norm": 0.9707115888595581,
      "learning_rate": 1.2840483698265971e-05,
      "loss": 0.8774,
      "step": 2906
    },
    {
      "epoch": 0.6282688567106116,
      "grad_norm": 0.8697360157966614,
      "learning_rate": 1.282741325195851e-05,
      "loss": 0.9508,
      "step": 2907
    },
    {
      "epoch": 0.628484979468338,
      "grad_norm": 0.8760444521903992,
      "learning_rate": 1.281434632020197e-05,
      "loss": 0.8965,
      "step": 2908
    },
    {
      "epoch": 0.6287011022260645,
      "grad_norm": 0.8903259038925171,
      "learning_rate": 1.2801282909399126e-05,
      "loss": 0.9143,
      "step": 2909
    },
    {
      "epoch": 0.6289172249837908,
      "grad_norm": 1.049411654472351,
      "learning_rate": 1.278822302595101e-05,
      "loss": 0.9625,
      "step": 2910
    },
    {
      "epoch": 0.6291333477415172,
      "grad_norm": 0.9313656091690063,
      "learning_rate": 1.2775166676256942e-05,
      "loss": 0.8914,
      "step": 2911
    },
    {
      "epoch": 0.6293494704992436,
      "grad_norm": 0.9373122453689575,
      "learning_rate": 1.2762113866714503e-05,
      "loss": 0.9475,
      "step": 2912
    },
    {
      "epoch": 0.6295655932569699,
      "grad_norm": 1.0107008218765259,
      "learning_rate": 1.2749064603719541e-05,
      "loss": 0.9687,
      "step": 2913
    },
    {
      "epoch": 0.6297817160146963,
      "grad_norm": 1.0947474241256714,
      "learning_rate": 1.2736018893666154e-05,
      "loss": 1.0293,
      "step": 2914
    },
    {
      "epoch": 0.6299978387724228,
      "grad_norm": 1.0009697675704956,
      "learning_rate": 1.2722976742946719e-05,
      "loss": 0.8728,
      "step": 2915
    },
    {
      "epoch": 0.6302139615301491,
      "grad_norm": 0.8761113286018372,
      "learning_rate": 1.270993815795186e-05,
      "loss": 0.8043,
      "step": 2916
    },
    {
      "epoch": 0.6304300842878755,
      "grad_norm": 1.0249576568603516,
      "learning_rate": 1.2696903145070447e-05,
      "loss": 0.9708,
      "step": 2917
    },
    {
      "epoch": 0.6306462070456019,
      "grad_norm": 1.007078766822815,
      "learning_rate": 1.2683871710689614e-05,
      "loss": 1.0561,
      "step": 2918
    },
    {
      "epoch": 0.6308623298033282,
      "grad_norm": 0.9375736117362976,
      "learning_rate": 1.2670843861194737e-05,
      "loss": 0.8983,
      "step": 2919
    },
    {
      "epoch": 0.6310784525610547,
      "grad_norm": 0.883455753326416,
      "learning_rate": 1.265781960296942e-05,
      "loss": 0.8459,
      "step": 2920
    },
    {
      "epoch": 0.6312945753187811,
      "grad_norm": 1.0403642654418945,
      "learning_rate": 1.264479894239552e-05,
      "loss": 0.9753,
      "step": 2921
    },
    {
      "epoch": 0.6315106980765075,
      "grad_norm": 0.8808861970901489,
      "learning_rate": 1.2631781885853141e-05,
      "loss": 0.7907,
      "step": 2922
    },
    {
      "epoch": 0.6317268208342338,
      "grad_norm": 1.1256625652313232,
      "learning_rate": 1.2618768439720603e-05,
      "loss": 0.8048,
      "step": 2923
    },
    {
      "epoch": 0.6319429435919602,
      "grad_norm": 0.9758326411247253,
      "learning_rate": 1.260575861037447e-05,
      "loss": 1.0097,
      "step": 2924
    },
    {
      "epoch": 0.6321590663496867,
      "grad_norm": 1.0359525680541992,
      "learning_rate": 1.259275240418952e-05,
      "loss": 0.9651,
      "step": 2925
    },
    {
      "epoch": 0.632375189107413,
      "grad_norm": 0.8884275555610657,
      "learning_rate": 1.257974982753877e-05,
      "loss": 1.0232,
      "step": 2926
    },
    {
      "epoch": 0.6325913118651394,
      "grad_norm": 1.0615814924240112,
      "learning_rate": 1.2566750886793453e-05,
      "loss": 0.7354,
      "step": 2927
    },
    {
      "epoch": 0.6328074346228658,
      "grad_norm": 0.9971369504928589,
      "learning_rate": 1.2553755588323014e-05,
      "loss": 0.9903,
      "step": 2928
    },
    {
      "epoch": 0.6330235573805921,
      "grad_norm": 1.0728267431259155,
      "learning_rate": 1.2540763938495127e-05,
      "loss": 0.7252,
      "step": 2929
    },
    {
      "epoch": 0.6332396801383186,
      "grad_norm": 0.9396435618400574,
      "learning_rate": 1.2527775943675673e-05,
      "loss": 0.8694,
      "step": 2930
    },
    {
      "epoch": 0.633455802896045,
      "grad_norm": 0.9887653589248657,
      "learning_rate": 1.2514791610228727e-05,
      "loss": 0.93,
      "step": 2931
    },
    {
      "epoch": 0.6336719256537714,
      "grad_norm": 0.9077363014221191,
      "learning_rate": 1.2501810944516585e-05,
      "loss": 0.7585,
      "step": 2932
    },
    {
      "epoch": 0.6338880484114977,
      "grad_norm": 0.8319936394691467,
      "learning_rate": 1.248883395289975e-05,
      "loss": 0.9588,
      "step": 2933
    },
    {
      "epoch": 0.6341041711692241,
      "grad_norm": 0.9702330827713013,
      "learning_rate": 1.2475860641736917e-05,
      "loss": 0.8698,
      "step": 2934
    },
    {
      "epoch": 0.6343202939269506,
      "grad_norm": 1.0369808673858643,
      "learning_rate": 1.2462891017384971e-05,
      "loss": 0.9861,
      "step": 2935
    },
    {
      "epoch": 0.6345364166846769,
      "grad_norm": 1.0273332595825195,
      "learning_rate": 1.244992508619901e-05,
      "loss": 0.9162,
      "step": 2936
    },
    {
      "epoch": 0.6347525394424033,
      "grad_norm": 0.8920016884803772,
      "learning_rate": 1.2436962854532302e-05,
      "loss": 0.8125,
      "step": 2937
    },
    {
      "epoch": 0.6349686622001297,
      "grad_norm": 1.032292127609253,
      "learning_rate": 1.2424004328736312e-05,
      "loss": 0.9432,
      "step": 2938
    },
    {
      "epoch": 0.635184784957856,
      "grad_norm": 0.9539034962654114,
      "learning_rate": 1.2411049515160683e-05,
      "loss": 0.7774,
      "step": 2939
    },
    {
      "epoch": 0.6354009077155824,
      "grad_norm": 0.8693695664405823,
      "learning_rate": 1.239809842015325e-05,
      "loss": 0.8773,
      "step": 2940
    },
    {
      "epoch": 0.6356170304733089,
      "grad_norm": 0.8640608191490173,
      "learning_rate": 1.238515105006002e-05,
      "loss": 0.8453,
      "step": 2941
    },
    {
      "epoch": 0.6358331532310352,
      "grad_norm": 0.8216714262962341,
      "learning_rate": 1.237220741122518e-05,
      "loss": 0.6976,
      "step": 2942
    },
    {
      "epoch": 0.6360492759887616,
      "grad_norm": 0.9706547856330872,
      "learning_rate": 1.2359267509991062e-05,
      "loss": 0.8826,
      "step": 2943
    },
    {
      "epoch": 0.636265398746488,
      "grad_norm": 0.8941812515258789,
      "learning_rate": 1.2346331352698206e-05,
      "loss": 0.8344,
      "step": 2944
    },
    {
      "epoch": 0.6364815215042144,
      "grad_norm": 0.9289961457252502,
      "learning_rate": 1.2333398945685295e-05,
      "loss": 0.9049,
      "step": 2945
    },
    {
      "epoch": 0.6366976442619408,
      "grad_norm": 0.9785312414169312,
      "learning_rate": 1.2320470295289178e-05,
      "loss": 0.957,
      "step": 2946
    },
    {
      "epoch": 0.6369137670196672,
      "grad_norm": 0.9480656981468201,
      "learning_rate": 1.2307545407844868e-05,
      "loss": 0.7372,
      "step": 2947
    },
    {
      "epoch": 0.6371298897773936,
      "grad_norm": 1.0860769748687744,
      "learning_rate": 1.2294624289685522e-05,
      "loss": 1.123,
      "step": 2948
    },
    {
      "epoch": 0.6373460125351199,
      "grad_norm": 1.026252269744873,
      "learning_rate": 1.2281706947142463e-05,
      "loss": 0.7511,
      "step": 2949
    },
    {
      "epoch": 0.6375621352928463,
      "grad_norm": 0.9047048091888428,
      "learning_rate": 1.2268793386545152e-05,
      "loss": 0.9597,
      "step": 2950
    },
    {
      "epoch": 0.6377782580505728,
      "grad_norm": 0.9991381168365479,
      "learning_rate": 1.2255883614221216e-05,
      "loss": 1.0385,
      "step": 2951
    },
    {
      "epoch": 0.6379943808082991,
      "grad_norm": 1.060020089149475,
      "learning_rate": 1.2242977636496405e-05,
      "loss": 0.9657,
      "step": 2952
    },
    {
      "epoch": 0.6382105035660255,
      "grad_norm": 0.9928672313690186,
      "learning_rate": 1.2230075459694626e-05,
      "loss": 0.8669,
      "step": 2953
    },
    {
      "epoch": 0.6384266263237519,
      "grad_norm": 0.9651292562484741,
      "learning_rate": 1.2217177090137901e-05,
      "loss": 1.0384,
      "step": 2954
    },
    {
      "epoch": 0.6386427490814783,
      "grad_norm": 0.927743673324585,
      "learning_rate": 1.2204282534146414e-05,
      "loss": 0.9027,
      "step": 2955
    },
    {
      "epoch": 0.6388588718392046,
      "grad_norm": 0.9630751013755798,
      "learning_rate": 1.2191391798038468e-05,
      "loss": 0.7777,
      "step": 2956
    },
    {
      "epoch": 0.6390749945969311,
      "grad_norm": 0.9790912866592407,
      "learning_rate": 1.2178504888130482e-05,
      "loss": 0.9915,
      "step": 2957
    },
    {
      "epoch": 0.6392911173546575,
      "grad_norm": 1.0396443605422974,
      "learning_rate": 1.2165621810737025e-05,
      "loss": 0.8656,
      "step": 2958
    },
    {
      "epoch": 0.6395072401123838,
      "grad_norm": 0.8791309595108032,
      "learning_rate": 1.2152742572170774e-05,
      "loss": 0.8693,
      "step": 2959
    },
    {
      "epoch": 0.6397233628701102,
      "grad_norm": 0.9552299976348877,
      "learning_rate": 1.2139867178742519e-05,
      "loss": 0.736,
      "step": 2960
    },
    {
      "epoch": 0.6399394856278366,
      "grad_norm": 0.9659188985824585,
      "learning_rate": 1.2126995636761174e-05,
      "loss": 0.8197,
      "step": 2961
    },
    {
      "epoch": 0.640155608385563,
      "grad_norm": 1.0355355739593506,
      "learning_rate": 1.2114127952533773e-05,
      "loss": 0.9412,
      "step": 2962
    },
    {
      "epoch": 0.6403717311432894,
      "grad_norm": 0.8427955508232117,
      "learning_rate": 1.210126413236544e-05,
      "loss": 0.8794,
      "step": 2963
    },
    {
      "epoch": 0.6405878539010158,
      "grad_norm": 0.9869213104248047,
      "learning_rate": 1.208840418255942e-05,
      "loss": 0.8764,
      "step": 2964
    },
    {
      "epoch": 0.6408039766587421,
      "grad_norm": 0.8903014063835144,
      "learning_rate": 1.2075548109417073e-05,
      "loss": 0.9439,
      "step": 2965
    },
    {
      "epoch": 0.6410200994164685,
      "grad_norm": 1.016274333000183,
      "learning_rate": 1.2062695919237827e-05,
      "loss": 0.9497,
      "step": 2966
    },
    {
      "epoch": 0.641236222174195,
      "grad_norm": 1.1049562692642212,
      "learning_rate": 1.2049847618319235e-05,
      "loss": 1.0377,
      "step": 2967
    },
    {
      "epoch": 0.6414523449319214,
      "grad_norm": 0.9826555848121643,
      "learning_rate": 1.2037003212956924e-05,
      "loss": 0.9585,
      "step": 2968
    },
    {
      "epoch": 0.6416684676896477,
      "grad_norm": 1.0991896390914917,
      "learning_rate": 1.2024162709444637e-05,
      "loss": 0.9034,
      "step": 2969
    },
    {
      "epoch": 0.6418845904473741,
      "grad_norm": 0.9281312227249146,
      "learning_rate": 1.2011326114074188e-05,
      "loss": 0.9162,
      "step": 2970
    },
    {
      "epoch": 0.6421007132051005,
      "grad_norm": 1.021680474281311,
      "learning_rate": 1.1998493433135474e-05,
      "loss": 0.9187,
      "step": 2971
    },
    {
      "epoch": 0.6423168359628268,
      "grad_norm": 0.9557349681854248,
      "learning_rate": 1.1985664672916474e-05,
      "loss": 0.8563,
      "step": 2972
    },
    {
      "epoch": 0.6425329587205533,
      "grad_norm": 0.985569417476654,
      "learning_rate": 1.1972839839703263e-05,
      "loss": 0.7767,
      "step": 2973
    },
    {
      "epoch": 0.6427490814782797,
      "grad_norm": 1.2562131881713867,
      "learning_rate": 1.1960018939779977e-05,
      "loss": 0.9193,
      "step": 2974
    },
    {
      "epoch": 0.642965204236006,
      "grad_norm": 1.118052363395691,
      "learning_rate": 1.1947201979428817e-05,
      "loss": 0.9389,
      "step": 2975
    },
    {
      "epoch": 0.6431813269937324,
      "grad_norm": 1.0856068134307861,
      "learning_rate": 1.1934388964930082e-05,
      "loss": 0.9635,
      "step": 2976
    },
    {
      "epoch": 0.6433974497514588,
      "grad_norm": 1.1075384616851807,
      "learning_rate": 1.1921579902562103e-05,
      "loss": 0.9912,
      "step": 2977
    },
    {
      "epoch": 0.6436135725091853,
      "grad_norm": 0.9479909539222717,
      "learning_rate": 1.19087747986013e-05,
      "loss": 0.9073,
      "step": 2978
    },
    {
      "epoch": 0.6438296952669116,
      "grad_norm": 0.8583290576934814,
      "learning_rate": 1.1895973659322135e-05,
      "loss": 0.9195,
      "step": 2979
    },
    {
      "epoch": 0.644045818024638,
      "grad_norm": 1.0640686750411987,
      "learning_rate": 1.188317649099715e-05,
      "loss": 0.8857,
      "step": 2980
    },
    {
      "epoch": 0.6442619407823644,
      "grad_norm": 0.8737252354621887,
      "learning_rate": 1.1870383299896918e-05,
      "loss": 0.6835,
      "step": 2981
    },
    {
      "epoch": 0.6444780635400907,
      "grad_norm": 1.0016423463821411,
      "learning_rate": 1.185759409229008e-05,
      "loss": 0.8296,
      "step": 2982
    },
    {
      "epoch": 0.6446941862978172,
      "grad_norm": 0.8675923347473145,
      "learning_rate": 1.1844808874443307e-05,
      "loss": 0.911,
      "step": 2983
    },
    {
      "epoch": 0.6449103090555436,
      "grad_norm": 0.9906182885169983,
      "learning_rate": 1.1832027652621339e-05,
      "loss": 0.964,
      "step": 2984
    },
    {
      "epoch": 0.6451264318132699,
      "grad_norm": 0.9699721932411194,
      "learning_rate": 1.181925043308694e-05,
      "loss": 0.9571,
      "step": 2985
    },
    {
      "epoch": 0.6453425545709963,
      "grad_norm": 0.9955711364746094,
      "learning_rate": 1.1806477222100912e-05,
      "loss": 0.9385,
      "step": 2986
    },
    {
      "epoch": 0.6455586773287227,
      "grad_norm": 1.0205777883529663,
      "learning_rate": 1.1793708025922112e-05,
      "loss": 1.1295,
      "step": 2987
    },
    {
      "epoch": 0.645774800086449,
      "grad_norm": 0.8897711038589478,
      "learning_rate": 1.1780942850807407e-05,
      "loss": 0.8313,
      "step": 2988
    },
    {
      "epoch": 0.6459909228441755,
      "grad_norm": 0.9280810356140137,
      "learning_rate": 1.1768181703011703e-05,
      "loss": 0.8511,
      "step": 2989
    },
    {
      "epoch": 0.6462070456019019,
      "grad_norm": 1.0160632133483887,
      "learning_rate": 1.1755424588787933e-05,
      "loss": 0.972,
      "step": 2990
    },
    {
      "epoch": 0.6464231683596283,
      "grad_norm": 1.013733148574829,
      "learning_rate": 1.1742671514387059e-05,
      "loss": 0.9266,
      "step": 2991
    },
    {
      "epoch": 0.6466392911173546,
      "grad_norm": 0.9809374213218689,
      "learning_rate": 1.1729922486058057e-05,
      "loss": 0.9901,
      "step": 2992
    },
    {
      "epoch": 0.646855413875081,
      "grad_norm": 0.9819334745407104,
      "learning_rate": 1.1717177510047919e-05,
      "loss": 0.9503,
      "step": 2993
    },
    {
      "epoch": 0.6470715366328075,
      "grad_norm": 0.953656017780304,
      "learning_rate": 1.1704436592601649e-05,
      "loss": 0.7493,
      "step": 2994
    },
    {
      "epoch": 0.6472876593905338,
      "grad_norm": 0.9014970064163208,
      "learning_rate": 1.1691699739962275e-05,
      "loss": 0.8103,
      "step": 2995
    },
    {
      "epoch": 0.6475037821482602,
      "grad_norm": 0.9725258350372314,
      "learning_rate": 1.167896695837082e-05,
      "loss": 0.934,
      "step": 2996
    },
    {
      "epoch": 0.6477199049059866,
      "grad_norm": 1.0287680625915527,
      "learning_rate": 1.166623825406632e-05,
      "loss": 0.9125,
      "step": 2997
    },
    {
      "epoch": 0.6479360276637129,
      "grad_norm": 1.0294086933135986,
      "learning_rate": 1.1653513633285813e-05,
      "loss": 0.9198,
      "step": 2998
    },
    {
      "epoch": 0.6481521504214394,
      "grad_norm": 0.9742729067802429,
      "learning_rate": 1.164079310226434e-05,
      "loss": 0.8445,
      "step": 2999
    },
    {
      "epoch": 0.6483682731791658,
      "grad_norm": 0.9297633767127991,
      "learning_rate": 1.1628076667234906e-05,
      "loss": 0.9093,
      "step": 3000
    },
    {
      "epoch": 0.6485843959368921,
      "grad_norm": 0.9483668208122253,
      "learning_rate": 1.1615364334428562e-05,
      "loss": 0.9307,
      "step": 3001
    },
    {
      "epoch": 0.6488005186946185,
      "grad_norm": 1.0058062076568604,
      "learning_rate": 1.1602656110074308e-05,
      "loss": 0.8993,
      "step": 3002
    },
    {
      "epoch": 0.649016641452345,
      "grad_norm": 0.9775843024253845,
      "learning_rate": 1.1589952000399152e-05,
      "loss": 0.9641,
      "step": 3003
    },
    {
      "epoch": 0.6492327642100714,
      "grad_norm": 0.821497917175293,
      "learning_rate": 1.1577252011628072e-05,
      "loss": 0.8256,
      "step": 3004
    },
    {
      "epoch": 0.6494488869677977,
      "grad_norm": 0.9848234057426453,
      "learning_rate": 1.1564556149984038e-05,
      "loss": 0.9869,
      "step": 3005
    },
    {
      "epoch": 0.6496650097255241,
      "grad_norm": 1.0490641593933105,
      "learning_rate": 1.155186442168799e-05,
      "loss": 0.8785,
      "step": 3006
    },
    {
      "epoch": 0.6498811324832505,
      "grad_norm": 1.098029613494873,
      "learning_rate": 1.1539176832958845e-05,
      "loss": 1.0068,
      "step": 3007
    },
    {
      "epoch": 0.6500972552409768,
      "grad_norm": 1.0362261533737183,
      "learning_rate": 1.1526493390013493e-05,
      "loss": 1.0468,
      "step": 3008
    },
    {
      "epoch": 0.6503133779987033,
      "grad_norm": 0.9353405833244324,
      "learning_rate": 1.1513814099066786e-05,
      "loss": 0.8203,
      "step": 3009
    },
    {
      "epoch": 0.6505295007564297,
      "grad_norm": 0.9363531470298767,
      "learning_rate": 1.150113896633157e-05,
      "loss": 1.0759,
      "step": 3010
    },
    {
      "epoch": 0.650745623514156,
      "grad_norm": 0.9360188841819763,
      "learning_rate": 1.1488467998018601e-05,
      "loss": 0.9603,
      "step": 3011
    },
    {
      "epoch": 0.6509617462718824,
      "grad_norm": 0.9600111842155457,
      "learning_rate": 1.147580120033664e-05,
      "loss": 0.8148,
      "step": 3012
    },
    {
      "epoch": 0.6511778690296088,
      "grad_norm": 0.9954214096069336,
      "learning_rate": 1.1463138579492386e-05,
      "loss": 0.8941,
      "step": 3013
    },
    {
      "epoch": 0.6513939917873353,
      "grad_norm": 0.8630295991897583,
      "learning_rate": 1.1450480141690486e-05,
      "loss": 0.8974,
      "step": 3014
    },
    {
      "epoch": 0.6516101145450616,
      "grad_norm": 1.057152509689331,
      "learning_rate": 1.143782589313356e-05,
      "loss": 0.9268,
      "step": 3015
    },
    {
      "epoch": 0.651826237302788,
      "grad_norm": 0.884134829044342,
      "learning_rate": 1.1425175840022163e-05,
      "loss": 0.8361,
      "step": 3016
    },
    {
      "epoch": 0.6520423600605144,
      "grad_norm": 1.0940039157867432,
      "learning_rate": 1.1412529988554772e-05,
      "loss": 1.1047,
      "step": 3017
    },
    {
      "epoch": 0.6522584828182407,
      "grad_norm": 0.986821711063385,
      "learning_rate": 1.1399888344927828e-05,
      "loss": 0.8267,
      "step": 3018
    },
    {
      "epoch": 0.6524746055759671,
      "grad_norm": 0.9668971300125122,
      "learning_rate": 1.1387250915335724e-05,
      "loss": 0.8693,
      "step": 3019
    },
    {
      "epoch": 0.6526907283336936,
      "grad_norm": 0.9331496357917786,
      "learning_rate": 1.1374617705970761e-05,
      "loss": 0.9027,
      "step": 3020
    },
    {
      "epoch": 0.6529068510914199,
      "grad_norm": 0.9307202696800232,
      "learning_rate": 1.1361988723023183e-05,
      "loss": 1.0028,
      "step": 3021
    },
    {
      "epoch": 0.6531229738491463,
      "grad_norm": 0.880175769329071,
      "learning_rate": 1.134936397268116e-05,
      "loss": 0.8451,
      "step": 3022
    },
    {
      "epoch": 0.6533390966068727,
      "grad_norm": 0.9314516186714172,
      "learning_rate": 1.1336743461130797e-05,
      "loss": 0.7698,
      "step": 3023
    },
    {
      "epoch": 0.653555219364599,
      "grad_norm": 0.9444621801376343,
      "learning_rate": 1.1324127194556107e-05,
      "loss": 0.9248,
      "step": 3024
    },
    {
      "epoch": 0.6537713421223255,
      "grad_norm": 0.9031175971031189,
      "learning_rate": 1.1311515179139038e-05,
      "loss": 0.9376,
      "step": 3025
    },
    {
      "epoch": 0.6539874648800519,
      "grad_norm": 0.9749978184700012,
      "learning_rate": 1.1298907421059448e-05,
      "loss": 0.9033,
      "step": 3026
    },
    {
      "epoch": 0.6542035876377783,
      "grad_norm": 1.1861553192138672,
      "learning_rate": 1.1286303926495099e-05,
      "loss": 1.048,
      "step": 3027
    },
    {
      "epoch": 0.6544197103955046,
      "grad_norm": 1.0161523818969727,
      "learning_rate": 1.1273704701621698e-05,
      "loss": 0.9086,
      "step": 3028
    },
    {
      "epoch": 0.654635833153231,
      "grad_norm": 0.8376269936561584,
      "learning_rate": 1.1261109752612813e-05,
      "loss": 0.9415,
      "step": 3029
    },
    {
      "epoch": 0.6548519559109575,
      "grad_norm": 0.9243308305740356,
      "learning_rate": 1.1248519085639948e-05,
      "loss": 0.8454,
      "step": 3030
    },
    {
      "epoch": 0.6550680786686838,
      "grad_norm": 1.1032763719558716,
      "learning_rate": 1.12359327068725e-05,
      "loss": 0.8026,
      "step": 3031
    },
    {
      "epoch": 0.6552842014264102,
      "grad_norm": 0.9946315884590149,
      "learning_rate": 1.122335062247776e-05,
      "loss": 0.9173,
      "step": 3032
    },
    {
      "epoch": 0.6555003241841366,
      "grad_norm": 0.904829204082489,
      "learning_rate": 1.121077283862094e-05,
      "loss": 0.8143,
      "step": 3033
    },
    {
      "epoch": 0.6557164469418629,
      "grad_norm": 0.9684969782829285,
      "learning_rate": 1.1198199361465104e-05,
      "loss": 0.9296,
      "step": 3034
    },
    {
      "epoch": 0.6559325696995894,
      "grad_norm": 1.0400232076644897,
      "learning_rate": 1.1185630197171236e-05,
      "loss": 0.8541,
      "step": 3035
    },
    {
      "epoch": 0.6561486924573158,
      "grad_norm": 1.0041389465332031,
      "learning_rate": 1.1173065351898185e-05,
      "loss": 0.8815,
      "step": 3036
    },
    {
      "epoch": 0.6563648152150422,
      "grad_norm": 0.9918075799942017,
      "learning_rate": 1.1160504831802714e-05,
      "loss": 0.9075,
      "step": 3037
    },
    {
      "epoch": 0.6565809379727685,
      "grad_norm": 0.9889822006225586,
      "learning_rate": 1.1147948643039443e-05,
      "loss": 0.9699,
      "step": 3038
    },
    {
      "epoch": 0.6567970607304949,
      "grad_norm": 1.0850324630737305,
      "learning_rate": 1.1135396791760882e-05,
      "loss": 0.9191,
      "step": 3039
    },
    {
      "epoch": 0.6570131834882214,
      "grad_norm": 1.0572766065597534,
      "learning_rate": 1.1122849284117385e-05,
      "loss": 0.8661,
      "step": 3040
    },
    {
      "epoch": 0.6572293062459477,
      "grad_norm": 1.0197628736495972,
      "learning_rate": 1.1110306126257226e-05,
      "loss": 0.919,
      "step": 3041
    },
    {
      "epoch": 0.6574454290036741,
      "grad_norm": 0.9910076856613159,
      "learning_rate": 1.1097767324326515e-05,
      "loss": 0.7211,
      "step": 3042
    },
    {
      "epoch": 0.6576615517614005,
      "grad_norm": 0.9274858236312866,
      "learning_rate": 1.1085232884469236e-05,
      "loss": 0.9885,
      "step": 3043
    },
    {
      "epoch": 0.6578776745191268,
      "grad_norm": 1.080104112625122,
      "learning_rate": 1.1072702812827236e-05,
      "loss": 0.971,
      "step": 3044
    },
    {
      "epoch": 0.6580937972768532,
      "grad_norm": 1.3358137607574463,
      "learning_rate": 1.106017711554022e-05,
      "loss": 0.8378,
      "step": 3045
    },
    {
      "epoch": 0.6583099200345797,
      "grad_norm": 0.8777062296867371,
      "learning_rate": 1.1047655798745752e-05,
      "loss": 0.8373,
      "step": 3046
    },
    {
      "epoch": 0.658526042792306,
      "grad_norm": 0.9593110084533691,
      "learning_rate": 1.1035138868579247e-05,
      "loss": 0.9809,
      "step": 3047
    },
    {
      "epoch": 0.6587421655500324,
      "grad_norm": 1.1162147521972656,
      "learning_rate": 1.1022626331173973e-05,
      "loss": 0.9226,
      "step": 3048
    },
    {
      "epoch": 0.6589582883077588,
      "grad_norm": 0.9021515846252441,
      "learning_rate": 1.1010118192661036e-05,
      "loss": 0.9045,
      "step": 3049
    },
    {
      "epoch": 0.6591744110654852,
      "grad_norm": 1.0183483362197876,
      "learning_rate": 1.0997614459169421e-05,
      "loss": 0.763,
      "step": 3050
    },
    {
      "epoch": 0.6593905338232116,
      "grad_norm": 0.9093033671379089,
      "learning_rate": 1.0985115136825901e-05,
      "loss": 0.8944,
      "step": 3051
    },
    {
      "epoch": 0.659606656580938,
      "grad_norm": 1.030536413192749,
      "learning_rate": 1.0972620231755125e-05,
      "loss": 0.9605,
      "step": 3052
    },
    {
      "epoch": 0.6598227793386644,
      "grad_norm": 0.8911164402961731,
      "learning_rate": 1.0960129750079565e-05,
      "loss": 0.923,
      "step": 3053
    },
    {
      "epoch": 0.6600389020963907,
      "grad_norm": 0.9700839519500732,
      "learning_rate": 1.0947643697919523e-05,
      "loss": 0.9794,
      "step": 3054
    },
    {
      "epoch": 0.6602550248541171,
      "grad_norm": 0.9478440880775452,
      "learning_rate": 1.0935162081393154e-05,
      "loss": 0.8314,
      "step": 3055
    },
    {
      "epoch": 0.6604711476118436,
      "grad_norm": 0.8433210253715515,
      "learning_rate": 1.0922684906616414e-05,
      "loss": 0.7496,
      "step": 3056
    },
    {
      "epoch": 0.6606872703695699,
      "grad_norm": 0.9891439080238342,
      "learning_rate": 1.0910212179703082e-05,
      "loss": 0.7754,
      "step": 3057
    },
    {
      "epoch": 0.6609033931272963,
      "grad_norm": 1.016565203666687,
      "learning_rate": 1.0897743906764757e-05,
      "loss": 0.8037,
      "step": 3058
    },
    {
      "epoch": 0.6611195158850227,
      "grad_norm": 0.9328057765960693,
      "learning_rate": 1.0885280093910886e-05,
      "loss": 0.95,
      "step": 3059
    },
    {
      "epoch": 0.6613356386427491,
      "grad_norm": 0.9315574169158936,
      "learning_rate": 1.08728207472487e-05,
      "loss": 0.964,
      "step": 3060
    },
    {
      "epoch": 0.6615517614004754,
      "grad_norm": 0.9109113216400146,
      "learning_rate": 1.0860365872883247e-05,
      "loss": 0.9243,
      "step": 3061
    },
    {
      "epoch": 0.6617678841582019,
      "grad_norm": 1.1577420234680176,
      "learning_rate": 1.0847915476917392e-05,
      "loss": 0.8427,
      "step": 3062
    },
    {
      "epoch": 0.6619840069159283,
      "grad_norm": 0.9538229703903198,
      "learning_rate": 1.0835469565451792e-05,
      "loss": 0.9386,
      "step": 3063
    },
    {
      "epoch": 0.6622001296736546,
      "grad_norm": 1.0019724369049072,
      "learning_rate": 1.0823028144584924e-05,
      "loss": 0.732,
      "step": 3064
    },
    {
      "epoch": 0.662416252431381,
      "grad_norm": 0.9666872620582581,
      "learning_rate": 1.081059122041305e-05,
      "loss": 0.9524,
      "step": 3065
    },
    {
      "epoch": 0.6626323751891074,
      "grad_norm": 0.9982711672782898,
      "learning_rate": 1.0798158799030234e-05,
      "loss": 0.9872,
      "step": 3066
    },
    {
      "epoch": 0.6628484979468338,
      "grad_norm": 1.018813133239746,
      "learning_rate": 1.0785730886528328e-05,
      "loss": 0.7601,
      "step": 3067
    },
    {
      "epoch": 0.6630646207045602,
      "grad_norm": 1.0204263925552368,
      "learning_rate": 1.0773307488997001e-05,
      "loss": 1.0132,
      "step": 3068
    },
    {
      "epoch": 0.6632807434622866,
      "grad_norm": 0.9229834675788879,
      "learning_rate": 1.0760888612523667e-05,
      "loss": 0.8415,
      "step": 3069
    },
    {
      "epoch": 0.6634968662200129,
      "grad_norm": 1.0011403560638428,
      "learning_rate": 1.074847426319356e-05,
      "loss": 0.7339,
      "step": 3070
    },
    {
      "epoch": 0.6637129889777393,
      "grad_norm": 0.9895837903022766,
      "learning_rate": 1.0736064447089674e-05,
      "loss": 1.0072,
      "step": 3071
    },
    {
      "epoch": 0.6639291117354658,
      "grad_norm": 0.929438054561615,
      "learning_rate": 1.0723659170292786e-05,
      "loss": 0.6744,
      "step": 3072
    },
    {
      "epoch": 0.6641452344931922,
      "grad_norm": 0.9434496760368347,
      "learning_rate": 1.0711258438881477e-05,
      "loss": 0.9305,
      "step": 3073
    },
    {
      "epoch": 0.6643613572509185,
      "grad_norm": 1.071992039680481,
      "learning_rate": 1.0698862258932052e-05,
      "loss": 1.0707,
      "step": 3074
    },
    {
      "epoch": 0.6645774800086449,
      "grad_norm": 0.8919010162353516,
      "learning_rate": 1.068647063651862e-05,
      "loss": 0.7848,
      "step": 3075
    },
    {
      "epoch": 0.6647936027663713,
      "grad_norm": 1.0251803398132324,
      "learning_rate": 1.0674083577713037e-05,
      "loss": 0.972,
      "step": 3076
    },
    {
      "epoch": 0.6650097255240977,
      "grad_norm": 0.9181733131408691,
      "learning_rate": 1.0661701088584953e-05,
      "loss": 0.9924,
      "step": 3077
    },
    {
      "epoch": 0.6652258482818241,
      "grad_norm": 1.0462700128555298,
      "learning_rate": 1.0649323175201746e-05,
      "loss": 1.1303,
      "step": 3078
    },
    {
      "epoch": 0.6654419710395505,
      "grad_norm": 1.027639627456665,
      "learning_rate": 1.0636949843628578e-05,
      "loss": 0.9405,
      "step": 3079
    },
    {
      "epoch": 0.6656580937972768,
      "grad_norm": 1.0701429843902588,
      "learning_rate": 1.0624581099928324e-05,
      "loss": 0.8144,
      "step": 3080
    },
    {
      "epoch": 0.6658742165550032,
      "grad_norm": 0.9261655807495117,
      "learning_rate": 1.0612216950161667e-05,
      "loss": 0.86,
      "step": 3081
    },
    {
      "epoch": 0.6660903393127297,
      "grad_norm": 1.076094388961792,
      "learning_rate": 1.0599857400387003e-05,
      "loss": 0.8355,
      "step": 3082
    },
    {
      "epoch": 0.6663064620704561,
      "grad_norm": 1.1415523290634155,
      "learning_rate": 1.0587502456660484e-05,
      "loss": 1.2092,
      "step": 3083
    },
    {
      "epoch": 0.6665225848281824,
      "grad_norm": 0.7931217551231384,
      "learning_rate": 1.0575152125036e-05,
      "loss": 0.8419,
      "step": 3084
    },
    {
      "epoch": 0.6667387075859088,
      "grad_norm": 0.9748810529708862,
      "learning_rate": 1.0562806411565192e-05,
      "loss": 1.0033,
      "step": 3085
    },
    {
      "epoch": 0.6669548303436352,
      "grad_norm": 0.9649502635002136,
      "learning_rate": 1.0550465322297421e-05,
      "loss": 0.7608,
      "step": 3086
    },
    {
      "epoch": 0.6671709531013615,
      "grad_norm": 1.041642427444458,
      "learning_rate": 1.0538128863279801e-05,
      "loss": 0.8764,
      "step": 3087
    },
    {
      "epoch": 0.667387075859088,
      "grad_norm": 0.9173684120178223,
      "learning_rate": 1.0525797040557166e-05,
      "loss": 0.828,
      "step": 3088
    },
    {
      "epoch": 0.6676031986168144,
      "grad_norm": 0.9731480479240417,
      "learning_rate": 1.0513469860172079e-05,
      "loss": 1.0716,
      "step": 3089
    },
    {
      "epoch": 0.6678193213745407,
      "grad_norm": 0.8859195113182068,
      "learning_rate": 1.0501147328164832e-05,
      "loss": 0.8304,
      "step": 3090
    },
    {
      "epoch": 0.6680354441322671,
      "grad_norm": 0.9966490864753723,
      "learning_rate": 1.0488829450573435e-05,
      "loss": 1.1198,
      "step": 3091
    },
    {
      "epoch": 0.6682515668899935,
      "grad_norm": 0.9166406393051147,
      "learning_rate": 1.047651623343362e-05,
      "loss": 0.7968,
      "step": 3092
    },
    {
      "epoch": 0.6684676896477199,
      "grad_norm": 0.9286608695983887,
      "learning_rate": 1.0464207682778835e-05,
      "loss": 0.8948,
      "step": 3093
    },
    {
      "epoch": 0.6686838124054463,
      "grad_norm": 0.8471441864967346,
      "learning_rate": 1.0451903804640236e-05,
      "loss": 0.8495,
      "step": 3094
    },
    {
      "epoch": 0.6688999351631727,
      "grad_norm": 0.9391002058982849,
      "learning_rate": 1.0439604605046707e-05,
      "loss": 0.8925,
      "step": 3095
    },
    {
      "epoch": 0.6691160579208991,
      "grad_norm": 0.9239630103111267,
      "learning_rate": 1.042731009002483e-05,
      "loss": 0.8672,
      "step": 3096
    },
    {
      "epoch": 0.6693321806786254,
      "grad_norm": 1.1243126392364502,
      "learning_rate": 1.0415020265598872e-05,
      "loss": 1.0357,
      "step": 3097
    },
    {
      "epoch": 0.6695483034363519,
      "grad_norm": 0.9715553522109985,
      "learning_rate": 1.0402735137790816e-05,
      "loss": 0.9174,
      "step": 3098
    },
    {
      "epoch": 0.6697644261940783,
      "grad_norm": 0.9210078716278076,
      "learning_rate": 1.0390454712620368e-05,
      "loss": 1.0687,
      "step": 3099
    },
    {
      "epoch": 0.6699805489518046,
      "grad_norm": 1.0733075141906738,
      "learning_rate": 1.0378178996104898e-05,
      "loss": 0.9525,
      "step": 3100
    },
    {
      "epoch": 0.670196671709531,
      "grad_norm": 0.9286667108535767,
      "learning_rate": 1.0365907994259472e-05,
      "loss": 0.9281,
      "step": 3101
    },
    {
      "epoch": 0.6704127944672574,
      "grad_norm": 1.105976939201355,
      "learning_rate": 1.0353641713096863e-05,
      "loss": 1.0309,
      "step": 3102
    },
    {
      "epoch": 0.6706289172249837,
      "grad_norm": 1.0517795085906982,
      "learning_rate": 1.0341380158627512e-05,
      "loss": 0.9342,
      "step": 3103
    },
    {
      "epoch": 0.6708450399827102,
      "grad_norm": 1.0220019817352295,
      "learning_rate": 1.0329123336859559e-05,
      "loss": 0.8418,
      "step": 3104
    },
    {
      "epoch": 0.6710611627404366,
      "grad_norm": 0.9805824756622314,
      "learning_rate": 1.0316871253798813e-05,
      "loss": 0.9325,
      "step": 3105
    },
    {
      "epoch": 0.6712772854981629,
      "grad_norm": 0.8305776119232178,
      "learning_rate": 1.030462391544877e-05,
      "loss": 0.6997,
      "step": 3106
    },
    {
      "epoch": 0.6714934082558893,
      "grad_norm": 0.9174684286117554,
      "learning_rate": 1.0292381327810585e-05,
      "loss": 0.7971,
      "step": 3107
    },
    {
      "epoch": 0.6717095310136157,
      "grad_norm": 0.9358829855918884,
      "learning_rate": 1.0280143496883128e-05,
      "loss": 0.992,
      "step": 3108
    },
    {
      "epoch": 0.6719256537713422,
      "grad_norm": 0.946599543094635,
      "learning_rate": 1.0267910428662878e-05,
      "loss": 0.8471,
      "step": 3109
    },
    {
      "epoch": 0.6721417765290685,
      "grad_norm": 1.030730962753296,
      "learning_rate": 1.0255682129144022e-05,
      "loss": 0.994,
      "step": 3110
    },
    {
      "epoch": 0.6723578992867949,
      "grad_norm": 0.9013956189155579,
      "learning_rate": 1.0243458604318397e-05,
      "loss": 0.9702,
      "step": 3111
    },
    {
      "epoch": 0.6725740220445213,
      "grad_norm": 0.9366409778594971,
      "learning_rate": 1.0231239860175495e-05,
      "loss": 0.8885,
      "step": 3112
    },
    {
      "epoch": 0.6727901448022476,
      "grad_norm": 1.0941097736358643,
      "learning_rate": 1.0219025902702494e-05,
      "loss": 0.9037,
      "step": 3113
    },
    {
      "epoch": 0.6730062675599741,
      "grad_norm": 0.9904168248176575,
      "learning_rate": 1.0206816737884182e-05,
      "loss": 0.8922,
      "step": 3114
    },
    {
      "epoch": 0.6732223903177005,
      "grad_norm": 0.9461530447006226,
      "learning_rate": 1.019461237170303e-05,
      "loss": 1.0165,
      "step": 3115
    },
    {
      "epoch": 0.6734385130754268,
      "grad_norm": 0.9304680824279785,
      "learning_rate": 1.0182412810139142e-05,
      "loss": 0.7641,
      "step": 3116
    },
    {
      "epoch": 0.6736546358331532,
      "grad_norm": 0.9654682874679565,
      "learning_rate": 1.017021805917029e-05,
      "loss": 0.9698,
      "step": 3117
    },
    {
      "epoch": 0.6738707585908796,
      "grad_norm": 0.8547962307929993,
      "learning_rate": 1.0158028124771863e-05,
      "loss": 0.803,
      "step": 3118
    },
    {
      "epoch": 0.6740868813486061,
      "grad_norm": 0.9742785692214966,
      "learning_rate": 1.0145843012916913e-05,
      "loss": 0.8176,
      "step": 3119
    },
    {
      "epoch": 0.6743030041063324,
      "grad_norm": 0.9695746302604675,
      "learning_rate": 1.0133662729576095e-05,
      "loss": 0.8249,
      "step": 3120
    },
    {
      "epoch": 0.6745191268640588,
      "grad_norm": 0.9653365612030029,
      "learning_rate": 1.0121487280717734e-05,
      "loss": 0.9193,
      "step": 3121
    },
    {
      "epoch": 0.6747352496217852,
      "grad_norm": 1.0053045749664307,
      "learning_rate": 1.0109316672307774e-05,
      "loss": 0.8366,
      "step": 3122
    },
    {
      "epoch": 0.6749513723795115,
      "grad_norm": 0.9816423654556274,
      "learning_rate": 1.0097150910309778e-05,
      "loss": 0.7376,
      "step": 3123
    },
    {
      "epoch": 0.675167495137238,
      "grad_norm": 0.9633327126502991,
      "learning_rate": 1.0084990000684947e-05,
      "loss": 0.9669,
      "step": 3124
    },
    {
      "epoch": 0.6753836178949644,
      "grad_norm": 0.9444330930709839,
      "learning_rate": 1.0072833949392091e-05,
      "loss": 0.8994,
      "step": 3125
    },
    {
      "epoch": 0.6755997406526907,
      "grad_norm": 0.8973102569580078,
      "learning_rate": 1.0060682762387655e-05,
      "loss": 0.9545,
      "step": 3126
    },
    {
      "epoch": 0.6758158634104171,
      "grad_norm": 0.9260913729667664,
      "learning_rate": 1.0048536445625688e-05,
      "loss": 0.9796,
      "step": 3127
    },
    {
      "epoch": 0.6760319861681435,
      "grad_norm": 1.016711711883545,
      "learning_rate": 1.0036395005057858e-05,
      "loss": 0.968,
      "step": 3128
    },
    {
      "epoch": 0.6762481089258698,
      "grad_norm": 1.183542251586914,
      "learning_rate": 1.0024258446633444e-05,
      "loss": 1.1579,
      "step": 3129
    },
    {
      "epoch": 0.6764642316835963,
      "grad_norm": 0.9324146509170532,
      "learning_rate": 1.001212677629932e-05,
      "loss": 0.9397,
      "step": 3130
    },
    {
      "epoch": 0.6766803544413227,
      "grad_norm": 0.9586891531944275,
      "learning_rate": 1.0000000000000006e-05,
      "loss": 0.8797,
      "step": 3131
    },
    {
      "epoch": 0.6768964771990491,
      "grad_norm": 1.0296969413757324,
      "learning_rate": 9.987878123677565e-06,
      "loss": 0.8891,
      "step": 3132
    },
    {
      "epoch": 0.6771125999567754,
      "grad_norm": 1.126132845878601,
      "learning_rate": 9.9757611532717e-06,
      "loss": 0.8492,
      "step": 3133
    },
    {
      "epoch": 0.6773287227145018,
      "grad_norm": 0.8979195356369019,
      "learning_rate": 9.96364909471969e-06,
      "loss": 0.7549,
      "step": 3134
    },
    {
      "epoch": 0.6775448454722283,
      "grad_norm": 0.852216899394989,
      "learning_rate": 9.951541953956427e-06,
      "loss": 0.9574,
      "step": 3135
    },
    {
      "epoch": 0.6777609682299546,
      "grad_norm": 0.9262779355049133,
      "learning_rate": 9.939439736914388e-06,
      "loss": 0.8306,
      "step": 3136
    },
    {
      "epoch": 0.677977090987681,
      "grad_norm": 0.9397847652435303,
      "learning_rate": 9.927342449523616e-06,
      "loss": 0.9153,
      "step": 3137
    },
    {
      "epoch": 0.6781932137454074,
      "grad_norm": 0.9414774775505066,
      "learning_rate": 9.915250097711749e-06,
      "loss": 0.6423,
      "step": 3138
    },
    {
      "epoch": 0.6784093365031337,
      "grad_norm": 1.0208468437194824,
      "learning_rate": 9.903162687404028e-06,
      "loss": 0.9731,
      "step": 3139
    },
    {
      "epoch": 0.6786254592608602,
      "grad_norm": 0.9117647409439087,
      "learning_rate": 9.891080224523253e-06,
      "loss": 0.8163,
      "step": 3140
    },
    {
      "epoch": 0.6788415820185866,
      "grad_norm": 0.9898706674575806,
      "learning_rate": 9.879002714989796e-06,
      "loss": 0.955,
      "step": 3141
    },
    {
      "epoch": 0.679057704776313,
      "grad_norm": 0.9839960932731628,
      "learning_rate": 9.866930164721615e-06,
      "loss": 1.0999,
      "step": 3142
    },
    {
      "epoch": 0.6792738275340393,
      "grad_norm": 0.9270585179328918,
      "learning_rate": 9.854862579634228e-06,
      "loss": 1.1173,
      "step": 3143
    },
    {
      "epoch": 0.6794899502917657,
      "grad_norm": 1.0187488794326782,
      "learning_rate": 9.842799965640725e-06,
      "loss": 0.968,
      "step": 3144
    },
    {
      "epoch": 0.6797060730494922,
      "grad_norm": 1.1590285301208496,
      "learning_rate": 9.830742328651759e-06,
      "loss": 1.1053,
      "step": 3145
    },
    {
      "epoch": 0.6799221958072185,
      "grad_norm": 1.092870831489563,
      "learning_rate": 9.818689674575543e-06,
      "loss": 1.0063,
      "step": 3146
    },
    {
      "epoch": 0.6801383185649449,
      "grad_norm": 0.9513214230537415,
      "learning_rate": 9.80664200931785e-06,
      "loss": 0.9578,
      "step": 3147
    },
    {
      "epoch": 0.6803544413226713,
      "grad_norm": 0.8436422348022461,
      "learning_rate": 9.794599338782011e-06,
      "loss": 0.8834,
      "step": 3148
    },
    {
      "epoch": 0.6805705640803976,
      "grad_norm": 0.8941531181335449,
      "learning_rate": 9.782561668868905e-06,
      "loss": 0.8751,
      "step": 3149
    },
    {
      "epoch": 0.680786686838124,
      "grad_norm": 1.0576249361038208,
      "learning_rate": 9.770529005476959e-06,
      "loss": 1.0757,
      "step": 3150
    },
    {
      "epoch": 0.6810028095958505,
      "grad_norm": 0.9331166744232178,
      "learning_rate": 9.758501354502154e-06,
      "loss": 1.1021,
      "step": 3151
    },
    {
      "epoch": 0.6812189323535768,
      "grad_norm": 0.8988887667655945,
      "learning_rate": 9.746478721838004e-06,
      "loss": 0.7193,
      "step": 3152
    },
    {
      "epoch": 0.6814350551113032,
      "grad_norm": 1.0191924571990967,
      "learning_rate": 9.734461113375593e-06,
      "loss": 0.8743,
      "step": 3153
    },
    {
      "epoch": 0.6816511778690296,
      "grad_norm": 0.9970690608024597,
      "learning_rate": 9.722448535003497e-06,
      "loss": 0.989,
      "step": 3154
    },
    {
      "epoch": 0.681867300626756,
      "grad_norm": 0.9373263716697693,
      "learning_rate": 9.710440992607862e-06,
      "loss": 0.9117,
      "step": 3155
    },
    {
      "epoch": 0.6820834233844824,
      "grad_norm": 1.0130620002746582,
      "learning_rate": 9.698438492072346e-06,
      "loss": 1.0492,
      "step": 3156
    },
    {
      "epoch": 0.6822995461422088,
      "grad_norm": 0.9624810814857483,
      "learning_rate": 9.686441039278163e-06,
      "loss": 0.818,
      "step": 3157
    },
    {
      "epoch": 0.6825156688999352,
      "grad_norm": 1.0124694108963013,
      "learning_rate": 9.674448640104028e-06,
      "loss": 1.0394,
      "step": 3158
    },
    {
      "epoch": 0.6827317916576615,
      "grad_norm": 1.0002669095993042,
      "learning_rate": 9.6624613004262e-06,
      "loss": 1.0167,
      "step": 3159
    },
    {
      "epoch": 0.6829479144153879,
      "grad_norm": 0.9762430191040039,
      "learning_rate": 9.650479026118423e-06,
      "loss": 0.9059,
      "step": 3160
    },
    {
      "epoch": 0.6831640371731144,
      "grad_norm": 0.8910196423530579,
      "learning_rate": 9.638501823052002e-06,
      "loss": 0.8185,
      "step": 3161
    },
    {
      "epoch": 0.6833801599308407,
      "grad_norm": 1.0277290344238281,
      "learning_rate": 9.626529697095738e-06,
      "loss": 0.8963,
      "step": 3162
    },
    {
      "epoch": 0.6835962826885671,
      "grad_norm": 0.9845807552337646,
      "learning_rate": 9.614562654115944e-06,
      "loss": 0.8445,
      "step": 3163
    },
    {
      "epoch": 0.6838124054462935,
      "grad_norm": 0.9002982974052429,
      "learning_rate": 9.602600699976439e-06,
      "loss": 0.9235,
      "step": 3164
    },
    {
      "epoch": 0.6840285282040199,
      "grad_norm": 0.7742257118225098,
      "learning_rate": 9.590643840538558e-06,
      "loss": 0.7285,
      "step": 3165
    },
    {
      "epoch": 0.6842446509617462,
      "grad_norm": 0.8876941204071045,
      "learning_rate": 9.578692081661132e-06,
      "loss": 0.9619,
      "step": 3166
    },
    {
      "epoch": 0.6844607737194727,
      "grad_norm": 1.004382848739624,
      "learning_rate": 9.566745429200492e-06,
      "loss": 1.0215,
      "step": 3167
    },
    {
      "epoch": 0.6846768964771991,
      "grad_norm": 0.9555290937423706,
      "learning_rate": 9.554803889010477e-06,
      "loss": 0.721,
      "step": 3168
    },
    {
      "epoch": 0.6848930192349254,
      "grad_norm": 0.9498472213745117,
      "learning_rate": 9.542867466942409e-06,
      "loss": 1.0724,
      "step": 3169
    },
    {
      "epoch": 0.6851091419926518,
      "grad_norm": 0.9486294984817505,
      "learning_rate": 9.530936168845102e-06,
      "loss": 0.9296,
      "step": 3170
    },
    {
      "epoch": 0.6853252647503782,
      "grad_norm": 0.8646090030670166,
      "learning_rate": 9.519010000564888e-06,
      "loss": 0.7584,
      "step": 3171
    },
    {
      "epoch": 0.6855413875081046,
      "grad_norm": 0.9679620265960693,
      "learning_rate": 9.507088967945535e-06,
      "loss": 0.8904,
      "step": 3172
    },
    {
      "epoch": 0.685757510265831,
      "grad_norm": 0.877466082572937,
      "learning_rate": 9.495173076828332e-06,
      "loss": 0.861,
      "step": 3173
    },
    {
      "epoch": 0.6859736330235574,
      "grad_norm": 0.8935673832893372,
      "learning_rate": 9.48326233305203e-06,
      "loss": 0.8915,
      "step": 3174
    },
    {
      "epoch": 0.6861897557812837,
      "grad_norm": 0.8707239031791687,
      "learning_rate": 9.471356742452881e-06,
      "loss": 0.7875,
      "step": 3175
    },
    {
      "epoch": 0.6864058785390101,
      "grad_norm": 0.9756590723991394,
      "learning_rate": 9.4594563108646e-06,
      "loss": 0.8454,
      "step": 3176
    },
    {
      "epoch": 0.6866220012967366,
      "grad_norm": 0.7923886179924011,
      "learning_rate": 9.447561044118349e-06,
      "loss": 0.7555,
      "step": 3177
    },
    {
      "epoch": 0.686838124054463,
      "grad_norm": 0.9808708429336548,
      "learning_rate": 9.435670948042788e-06,
      "loss": 0.9774,
      "step": 3178
    },
    {
      "epoch": 0.6870542468121893,
      "grad_norm": 0.9539241194725037,
      "learning_rate": 9.423786028464049e-06,
      "loss": 0.8864,
      "step": 3179
    },
    {
      "epoch": 0.6872703695699157,
      "grad_norm": 1.0560336112976074,
      "learning_rate": 9.411906291205704e-06,
      "loss": 0.9401,
      "step": 3180
    },
    {
      "epoch": 0.6874864923276421,
      "grad_norm": 1.0322731733322144,
      "learning_rate": 9.400031742088802e-06,
      "loss": 0.7739,
      "step": 3181
    },
    {
      "epoch": 0.6877026150853685,
      "grad_norm": 1.0997140407562256,
      "learning_rate": 9.388162386931842e-06,
      "loss": 1.0653,
      "step": 3182
    },
    {
      "epoch": 0.6879187378430949,
      "grad_norm": 0.8863367438316345,
      "learning_rate": 9.376298231550784e-06,
      "loss": 0.7619,
      "step": 3183
    },
    {
      "epoch": 0.6881348606008213,
      "grad_norm": 1.0070819854736328,
      "learning_rate": 9.364439281759033e-06,
      "loss": 0.9854,
      "step": 3184
    },
    {
      "epoch": 0.6883509833585476,
      "grad_norm": 1.0507131814956665,
      "learning_rate": 9.352585543367448e-06,
      "loss": 0.9161,
      "step": 3185
    },
    {
      "epoch": 0.688567106116274,
      "grad_norm": 1.0086380243301392,
      "learning_rate": 9.340737022184331e-06,
      "loss": 0.9242,
      "step": 3186
    },
    {
      "epoch": 0.6887832288740005,
      "grad_norm": 1.051830530166626,
      "learning_rate": 9.328893724015436e-06,
      "loss": 1.0695,
      "step": 3187
    },
    {
      "epoch": 0.6889993516317269,
      "grad_norm": 0.9411728382110596,
      "learning_rate": 9.317055654663946e-06,
      "loss": 0.9433,
      "step": 3188
    },
    {
      "epoch": 0.6892154743894532,
      "grad_norm": 0.9798381328582764,
      "learning_rate": 9.30522281993049e-06,
      "loss": 1.023,
      "step": 3189
    },
    {
      "epoch": 0.6894315971471796,
      "grad_norm": 0.9365191459655762,
      "learning_rate": 9.29339522561313e-06,
      "loss": 0.8442,
      "step": 3190
    },
    {
      "epoch": 0.689647719904906,
      "grad_norm": 0.9904110431671143,
      "learning_rate": 9.281572877507359e-06,
      "loss": 0.9847,
      "step": 3191
    },
    {
      "epoch": 0.6898638426626323,
      "grad_norm": 0.9646345376968384,
      "learning_rate": 9.269755781406094e-06,
      "loss": 0.8717,
      "step": 3192
    },
    {
      "epoch": 0.6900799654203588,
      "grad_norm": 1.0621178150177002,
      "learning_rate": 9.257943943099698e-06,
      "loss": 0.6718,
      "step": 3193
    },
    {
      "epoch": 0.6902960881780852,
      "grad_norm": 0.9507277607917786,
      "learning_rate": 9.246137368375944e-06,
      "loss": 0.8953,
      "step": 3194
    },
    {
      "epoch": 0.6905122109358115,
      "grad_norm": 0.9311648607254028,
      "learning_rate": 9.234336063020014e-06,
      "loss": 0.8478,
      "step": 3195
    },
    {
      "epoch": 0.6907283336935379,
      "grad_norm": 1.0660985708236694,
      "learning_rate": 9.222540032814522e-06,
      "loss": 0.9248,
      "step": 3196
    },
    {
      "epoch": 0.6909444564512643,
      "grad_norm": 0.9388880133628845,
      "learning_rate": 9.210749283539504e-06,
      "loss": 0.753,
      "step": 3197
    },
    {
      "epoch": 0.6911605792089907,
      "grad_norm": 1.06510329246521,
      "learning_rate": 9.198963820972398e-06,
      "loss": 0.9045,
      "step": 3198
    },
    {
      "epoch": 0.6913767019667171,
      "grad_norm": 1.0905274152755737,
      "learning_rate": 9.187183650888056e-06,
      "loss": 0.8409,
      "step": 3199
    },
    {
      "epoch": 0.6915928247244435,
      "grad_norm": 0.9568233489990234,
      "learning_rate": 9.175408779058716e-06,
      "loss": 0.815,
      "step": 3200
    },
    {
      "epoch": 0.6918089474821699,
      "grad_norm": 1.0407795906066895,
      "learning_rate": 9.163639211254059e-06,
      "loss": 0.7762,
      "step": 3201
    },
    {
      "epoch": 0.6920250702398962,
      "grad_norm": 1.0074913501739502,
      "learning_rate": 9.151874953241138e-06,
      "loss": 0.9301,
      "step": 3202
    },
    {
      "epoch": 0.6922411929976227,
      "grad_norm": 0.9492015242576599,
      "learning_rate": 9.140116010784413e-06,
      "loss": 1.0045,
      "step": 3203
    },
    {
      "epoch": 0.6924573157553491,
      "grad_norm": 0.9555363059043884,
      "learning_rate": 9.128362389645737e-06,
      "loss": 0.7526,
      "step": 3204
    },
    {
      "epoch": 0.6926734385130754,
      "grad_norm": 1.1192456483840942,
      "learning_rate": 9.116614095584361e-06,
      "loss": 1.0528,
      "step": 3205
    },
    {
      "epoch": 0.6928895612708018,
      "grad_norm": 0.9081218242645264,
      "learning_rate": 9.104871134356919e-06,
      "loss": 0.9308,
      "step": 3206
    },
    {
      "epoch": 0.6931056840285282,
      "grad_norm": 1.0473381280899048,
      "learning_rate": 9.093133511717433e-06,
      "loss": 0.9199,
      "step": 3207
    },
    {
      "epoch": 0.6933218067862545,
      "grad_norm": 0.8416535258293152,
      "learning_rate": 9.081401233417315e-06,
      "loss": 0.7143,
      "step": 3208
    },
    {
      "epoch": 0.693537929543981,
      "grad_norm": 1.107761263847351,
      "learning_rate": 9.069674305205352e-06,
      "loss": 0.8804,
      "step": 3209
    },
    {
      "epoch": 0.6937540523017074,
      "grad_norm": 1.1008070707321167,
      "learning_rate": 9.057952732827704e-06,
      "loss": 0.9937,
      "step": 3210
    },
    {
      "epoch": 0.6939701750594338,
      "grad_norm": 1.0305111408233643,
      "learning_rate": 9.046236522027939e-06,
      "loss": 0.8992,
      "step": 3211
    },
    {
      "epoch": 0.6941862978171601,
      "grad_norm": 1.0368010997772217,
      "learning_rate": 9.034525678546948e-06,
      "loss": 0.938,
      "step": 3212
    },
    {
      "epoch": 0.6944024205748865,
      "grad_norm": 0.9238491058349609,
      "learning_rate": 9.022820208123026e-06,
      "loss": 1.0078,
      "step": 3213
    },
    {
      "epoch": 0.694618543332613,
      "grad_norm": 0.9420394897460938,
      "learning_rate": 9.01112011649182e-06,
      "loss": 0.9367,
      "step": 3214
    },
    {
      "epoch": 0.6948346660903393,
      "grad_norm": 0.8957251310348511,
      "learning_rate": 8.999425409386362e-06,
      "loss": 0.8335,
      "step": 3215
    },
    {
      "epoch": 0.6950507888480657,
      "grad_norm": 0.9629054069519043,
      "learning_rate": 8.987736092537029e-06,
      "loss": 0.9271,
      "step": 3216
    },
    {
      "epoch": 0.6952669116057921,
      "grad_norm": 1.0926717519760132,
      "learning_rate": 8.97605217167155e-06,
      "loss": 1.0634,
      "step": 3217
    },
    {
      "epoch": 0.6954830343635184,
      "grad_norm": 0.8469776511192322,
      "learning_rate": 8.964373652515012e-06,
      "loss": 0.8309,
      "step": 3218
    },
    {
      "epoch": 0.6956991571212449,
      "grad_norm": 1.1549241542816162,
      "learning_rate": 8.952700540789884e-06,
      "loss": 0.977,
      "step": 3219
    },
    {
      "epoch": 0.6959152798789713,
      "grad_norm": 0.9077586531639099,
      "learning_rate": 8.941032842215951e-06,
      "loss": 1.0237,
      "step": 3220
    },
    {
      "epoch": 0.6961314026366976,
      "grad_norm": 0.9283832311630249,
      "learning_rate": 8.929370562510363e-06,
      "loss": 0.9388,
      "step": 3221
    },
    {
      "epoch": 0.696347525394424,
      "grad_norm": 0.8420954346656799,
      "learning_rate": 8.917713707387606e-06,
      "loss": 0.8526,
      "step": 3222
    },
    {
      "epoch": 0.6965636481521504,
      "grad_norm": 1.092646598815918,
      "learning_rate": 8.906062282559516e-06,
      "loss": 0.956,
      "step": 3223
    },
    {
      "epoch": 0.6967797709098769,
      "grad_norm": 0.9578081965446472,
      "learning_rate": 8.894416293735259e-06,
      "loss": 0.8625,
      "step": 3224
    },
    {
      "epoch": 0.6969958936676032,
      "grad_norm": 0.9161734580993652,
      "learning_rate": 8.882775746621348e-06,
      "loss": 0.9652,
      "step": 3225
    },
    {
      "epoch": 0.6972120164253296,
      "grad_norm": 0.9222764372825623,
      "learning_rate": 8.871140646921622e-06,
      "loss": 0.9847,
      "step": 3226
    },
    {
      "epoch": 0.697428139183056,
      "grad_norm": 1.0095137357711792,
      "learning_rate": 8.85951100033725e-06,
      "loss": 0.9466,
      "step": 3227
    },
    {
      "epoch": 0.6976442619407823,
      "grad_norm": 0.8722392320632935,
      "learning_rate": 8.847886812566737e-06,
      "loss": 0.8155,
      "step": 3228
    },
    {
      "epoch": 0.6978603846985088,
      "grad_norm": 1.0187437534332275,
      "learning_rate": 8.836268089305904e-06,
      "loss": 0.9925,
      "step": 3229
    },
    {
      "epoch": 0.6980765074562352,
      "grad_norm": 0.9919723868370056,
      "learning_rate": 8.824654836247903e-06,
      "loss": 0.9173,
      "step": 3230
    },
    {
      "epoch": 0.6982926302139615,
      "grad_norm": 0.9587224721908569,
      "learning_rate": 8.813047059083198e-06,
      "loss": 0.8911,
      "step": 3231
    },
    {
      "epoch": 0.6985087529716879,
      "grad_norm": 0.9450093507766724,
      "learning_rate": 8.801444763499565e-06,
      "loss": 0.9868,
      "step": 3232
    },
    {
      "epoch": 0.6987248757294143,
      "grad_norm": 1.0732356309890747,
      "learning_rate": 8.789847955182118e-06,
      "loss": 0.8334,
      "step": 3233
    },
    {
      "epoch": 0.6989409984871406,
      "grad_norm": 0.9450024366378784,
      "learning_rate": 8.778256639813267e-06,
      "loss": 0.8401,
      "step": 3234
    },
    {
      "epoch": 0.6991571212448671,
      "grad_norm": 1.0916708707809448,
      "learning_rate": 8.766670823072714e-06,
      "loss": 0.851,
      "step": 3235
    },
    {
      "epoch": 0.6993732440025935,
      "grad_norm": 0.9243761301040649,
      "learning_rate": 8.755090510637483e-06,
      "loss": 0.8658,
      "step": 3236
    },
    {
      "epoch": 0.6995893667603199,
      "grad_norm": 1.0502249002456665,
      "learning_rate": 8.743515708181914e-06,
      "loss": 0.8741,
      "step": 3237
    },
    {
      "epoch": 0.6998054895180462,
      "grad_norm": 1.1587893962860107,
      "learning_rate": 8.731946421377627e-06,
      "loss": 0.9818,
      "step": 3238
    },
    {
      "epoch": 0.7000216122757726,
      "grad_norm": 0.8979175686836243,
      "learning_rate": 8.720382655893552e-06,
      "loss": 1.0243,
      "step": 3239
    },
    {
      "epoch": 0.7002377350334991,
      "grad_norm": 1.0077136754989624,
      "learning_rate": 8.708824417395887e-06,
      "loss": 1.0221,
      "step": 3240
    },
    {
      "epoch": 0.7004538577912254,
      "grad_norm": 0.9645074009895325,
      "learning_rate": 8.697271711548163e-06,
      "loss": 1.0476,
      "step": 3241
    },
    {
      "epoch": 0.7006699805489518,
      "grad_norm": 0.9618235230445862,
      "learning_rate": 8.685724544011174e-06,
      "loss": 0.9815,
      "step": 3242
    },
    {
      "epoch": 0.7008861033066782,
      "grad_norm": 1.1321656703948975,
      "learning_rate": 8.674182920443002e-06,
      "loss": 0.8899,
      "step": 3243
    },
    {
      "epoch": 0.7011022260644045,
      "grad_norm": 0.9900889992713928,
      "learning_rate": 8.662646846499017e-06,
      "loss": 0.8192,
      "step": 3244
    },
    {
      "epoch": 0.701318348822131,
      "grad_norm": 0.9528653621673584,
      "learning_rate": 8.65111632783187e-06,
      "loss": 0.6818,
      "step": 3245
    },
    {
      "epoch": 0.7015344715798574,
      "grad_norm": 0.956042468547821,
      "learning_rate": 8.639591370091486e-06,
      "loss": 0.6874,
      "step": 3246
    },
    {
      "epoch": 0.7017505943375838,
      "grad_norm": 1.0208436250686646,
      "learning_rate": 8.62807197892507e-06,
      "loss": 0.8192,
      "step": 3247
    },
    {
      "epoch": 0.7019667170953101,
      "grad_norm": 0.9951738119125366,
      "learning_rate": 8.616558159977097e-06,
      "loss": 0.953,
      "step": 3248
    },
    {
      "epoch": 0.7021828398530365,
      "grad_norm": 0.9474645853042603,
      "learning_rate": 8.60504991888931e-06,
      "loss": 0.9003,
      "step": 3249
    },
    {
      "epoch": 0.702398962610763,
      "grad_norm": 0.9384509921073914,
      "learning_rate": 8.593547261300716e-06,
      "loss": 0.9431,
      "step": 3250
    },
    {
      "epoch": 0.7026150853684893,
      "grad_norm": 0.9716159701347351,
      "learning_rate": 8.582050192847608e-06,
      "loss": 0.8827,
      "step": 3251
    },
    {
      "epoch": 0.7028312081262157,
      "grad_norm": 0.8509849309921265,
      "learning_rate": 8.570558719163506e-06,
      "loss": 0.7682,
      "step": 3252
    },
    {
      "epoch": 0.7030473308839421,
      "grad_norm": 1.0275623798370361,
      "learning_rate": 8.559072845879211e-06,
      "loss": 1.0044,
      "step": 3253
    },
    {
      "epoch": 0.7032634536416684,
      "grad_norm": 1.024664282798767,
      "learning_rate": 8.547592578622762e-06,
      "loss": 0.9481,
      "step": 3254
    },
    {
      "epoch": 0.7034795763993948,
      "grad_norm": 0.9387202262878418,
      "learning_rate": 8.536117923019486e-06,
      "loss": 0.983,
      "step": 3255
    },
    {
      "epoch": 0.7036956991571213,
      "grad_norm": 0.9498915672302246,
      "learning_rate": 8.524648884691935e-06,
      "loss": 0.9352,
      "step": 3256
    },
    {
      "epoch": 0.7039118219148476,
      "grad_norm": 0.988545835018158,
      "learning_rate": 8.513185469259894e-06,
      "loss": 0.7864,
      "step": 3257
    },
    {
      "epoch": 0.704127944672574,
      "grad_norm": 0.8617672920227051,
      "learning_rate": 8.501727682340415e-06,
      "loss": 0.7823,
      "step": 3258
    },
    {
      "epoch": 0.7043440674303004,
      "grad_norm": 0.9407501220703125,
      "learning_rate": 8.490275529547798e-06,
      "loss": 0.8565,
      "step": 3259
    },
    {
      "epoch": 0.7045601901880268,
      "grad_norm": 0.9361156821250916,
      "learning_rate": 8.478829016493565e-06,
      "loss": 0.9448,
      "step": 3260
    },
    {
      "epoch": 0.7047763129457532,
      "grad_norm": 0.9246730208396912,
      "learning_rate": 8.467388148786477e-06,
      "loss": 0.8766,
      "step": 3261
    },
    {
      "epoch": 0.7049924357034796,
      "grad_norm": 1.061888575553894,
      "learning_rate": 8.455952932032541e-06,
      "loss": 0.9644,
      "step": 3262
    },
    {
      "epoch": 0.705208558461206,
      "grad_norm": 1.057067632675171,
      "learning_rate": 8.444523371834978e-06,
      "loss": 0.9544,
      "step": 3263
    },
    {
      "epoch": 0.7054246812189323,
      "grad_norm": 0.9874102473258972,
      "learning_rate": 8.433099473794255e-06,
      "loss": 0.9106,
      "step": 3264
    },
    {
      "epoch": 0.7056408039766587,
      "grad_norm": 0.8923742771148682,
      "learning_rate": 8.421681243508048e-06,
      "loss": 0.764,
      "step": 3265
    },
    {
      "epoch": 0.7058569267343852,
      "grad_norm": 1.0538125038146973,
      "learning_rate": 8.410268686571269e-06,
      "loss": 1.0664,
      "step": 3266
    },
    {
      "epoch": 0.7060730494921115,
      "grad_norm": 0.9824836254119873,
      "learning_rate": 8.39886180857604e-06,
      "loss": 0.7895,
      "step": 3267
    },
    {
      "epoch": 0.7062891722498379,
      "grad_norm": 0.962027907371521,
      "learning_rate": 8.387460615111707e-06,
      "loss": 0.6859,
      "step": 3268
    },
    {
      "epoch": 0.7065052950075643,
      "grad_norm": 0.8959424495697021,
      "learning_rate": 8.376065111764829e-06,
      "loss": 0.9898,
      "step": 3269
    },
    {
      "epoch": 0.7067214177652907,
      "grad_norm": 0.9475058913230896,
      "learning_rate": 8.364675304119175e-06,
      "loss": 0.8121,
      "step": 3270
    },
    {
      "epoch": 0.706937540523017,
      "grad_norm": 1.3032982349395752,
      "learning_rate": 8.353291197755724e-06,
      "loss": 0.998,
      "step": 3271
    },
    {
      "epoch": 0.7071536632807435,
      "grad_norm": 1.0476473569869995,
      "learning_rate": 8.341912798252659e-06,
      "loss": 0.9759,
      "step": 3272
    },
    {
      "epoch": 0.7073697860384699,
      "grad_norm": 1.0302788019180298,
      "learning_rate": 8.330540111185377e-06,
      "loss": 0.9669,
      "step": 3273
    },
    {
      "epoch": 0.7075859087961962,
      "grad_norm": 1.0285735130310059,
      "learning_rate": 8.319173142126473e-06,
      "loss": 0.8233,
      "step": 3274
    },
    {
      "epoch": 0.7078020315539226,
      "grad_norm": 0.9094539284706116,
      "learning_rate": 8.307811896645719e-06,
      "loss": 0.8072,
      "step": 3275
    },
    {
      "epoch": 0.708018154311649,
      "grad_norm": 0.8531277179718018,
      "learning_rate": 8.296456380310101e-06,
      "loss": 0.8927,
      "step": 3276
    },
    {
      "epoch": 0.7082342770693754,
      "grad_norm": 1.093274712562561,
      "learning_rate": 8.28510659868381e-06,
      "loss": 0.8381,
      "step": 3277
    },
    {
      "epoch": 0.7084503998271018,
      "grad_norm": 1.050874948501587,
      "learning_rate": 8.273762557328204e-06,
      "loss": 0.919,
      "step": 3278
    },
    {
      "epoch": 0.7086665225848282,
      "grad_norm": 0.8907845616340637,
      "learning_rate": 8.262424261801844e-06,
      "loss": 0.8325,
      "step": 3279
    },
    {
      "epoch": 0.7088826453425545,
      "grad_norm": 1.0920202732086182,
      "learning_rate": 8.251091717660449e-06,
      "loss": 1.036,
      "step": 3280
    },
    {
      "epoch": 0.7090987681002809,
      "grad_norm": 1.0165354013442993,
      "learning_rate": 8.23976493045696e-06,
      "loss": 0.6763,
      "step": 3281
    },
    {
      "epoch": 0.7093148908580074,
      "grad_norm": 0.9226462244987488,
      "learning_rate": 8.22844390574147e-06,
      "loss": 0.7246,
      "step": 3282
    },
    {
      "epoch": 0.7095310136157338,
      "grad_norm": 1.0060843229293823,
      "learning_rate": 8.217128649061252e-06,
      "loss": 0.8275,
      "step": 3283
    },
    {
      "epoch": 0.7097471363734601,
      "grad_norm": 0.9231213927268982,
      "learning_rate": 8.20581916596076e-06,
      "loss": 1.0298,
      "step": 3284
    },
    {
      "epoch": 0.7099632591311865,
      "grad_norm": 0.8861151933670044,
      "learning_rate": 8.194515461981612e-06,
      "loss": 0.9043,
      "step": 3285
    },
    {
      "epoch": 0.7101793818889129,
      "grad_norm": 1.0948805809020996,
      "learning_rate": 8.183217542662596e-06,
      "loss": 1.028,
      "step": 3286
    },
    {
      "epoch": 0.7103955046466393,
      "grad_norm": 1.1120010614395142,
      "learning_rate": 8.17192541353967e-06,
      "loss": 0.9463,
      "step": 3287
    },
    {
      "epoch": 0.7106116274043657,
      "grad_norm": 0.9352031350135803,
      "learning_rate": 8.160639080145947e-06,
      "loss": 0.8321,
      "step": 3288
    },
    {
      "epoch": 0.7108277501620921,
      "grad_norm": 1.0009642839431763,
      "learning_rate": 8.149358548011706e-06,
      "loss": 0.876,
      "step": 3289
    },
    {
      "epoch": 0.7110438729198184,
      "grad_norm": 0.907788097858429,
      "learning_rate": 8.138083822664376e-06,
      "loss": 0.9031,
      "step": 3290
    },
    {
      "epoch": 0.7112599956775448,
      "grad_norm": 1.021148681640625,
      "learning_rate": 8.126814909628565e-06,
      "loss": 0.9595,
      "step": 3291
    },
    {
      "epoch": 0.7114761184352713,
      "grad_norm": 1.090847373008728,
      "learning_rate": 8.115551814425995e-06,
      "loss": 1.0886,
      "step": 3292
    },
    {
      "epoch": 0.7116922411929977,
      "grad_norm": 0.9658523201942444,
      "learning_rate": 8.104294542575562e-06,
      "loss": 0.9741,
      "step": 3293
    },
    {
      "epoch": 0.711908363950724,
      "grad_norm": 1.063903570175171,
      "learning_rate": 8.093043099593298e-06,
      "loss": 0.986,
      "step": 3294
    },
    {
      "epoch": 0.7121244867084504,
      "grad_norm": 0.9700491428375244,
      "learning_rate": 8.081797490992398e-06,
      "loss": 0.8083,
      "step": 3295
    },
    {
      "epoch": 0.7123406094661768,
      "grad_norm": 0.9563601613044739,
      "learning_rate": 8.070557722283176e-06,
      "loss": 1.1012,
      "step": 3296
    },
    {
      "epoch": 0.7125567322239031,
      "grad_norm": 1.0904157161712646,
      "learning_rate": 8.0593237989731e-06,
      "loss": 0.9117,
      "step": 3297
    },
    {
      "epoch": 0.7127728549816296,
      "grad_norm": 0.9020714163780212,
      "learning_rate": 8.048095726566746e-06,
      "loss": 0.7514,
      "step": 3298
    },
    {
      "epoch": 0.712988977739356,
      "grad_norm": 0.9047414660453796,
      "learning_rate": 8.036873510565864e-06,
      "loss": 0.8639,
      "step": 3299
    },
    {
      "epoch": 0.7132051004970823,
      "grad_norm": 1.0338889360427856,
      "learning_rate": 8.025657156469307e-06,
      "loss": 0.8126,
      "step": 3300
    },
    {
      "epoch": 0.7134212232548087,
      "grad_norm": 0.9189358949661255,
      "learning_rate": 8.014446669773061e-06,
      "loss": 0.7255,
      "step": 3301
    },
    {
      "epoch": 0.7136373460125351,
      "grad_norm": 0.9422216415405273,
      "learning_rate": 8.003242055970245e-06,
      "loss": 0.7908,
      "step": 3302
    },
    {
      "epoch": 0.7138534687702615,
      "grad_norm": 1.0897268056869507,
      "learning_rate": 7.992043320551084e-06,
      "loss": 0.9166,
      "step": 3303
    },
    {
      "epoch": 0.7140695915279879,
      "grad_norm": 0.9540897011756897,
      "learning_rate": 7.980850469002939e-06,
      "loss": 0.8191,
      "step": 3304
    },
    {
      "epoch": 0.7142857142857143,
      "grad_norm": 0.9342294931411743,
      "learning_rate": 7.969663506810282e-06,
      "loss": 0.7946,
      "step": 3305
    },
    {
      "epoch": 0.7145018370434407,
      "grad_norm": 1.0292030572891235,
      "learning_rate": 7.958482439454694e-06,
      "loss": 0.8582,
      "step": 3306
    },
    {
      "epoch": 0.714717959801167,
      "grad_norm": 0.899174153804779,
      "learning_rate": 7.947307272414874e-06,
      "loss": 1.0184,
      "step": 3307
    },
    {
      "epoch": 0.7149340825588935,
      "grad_norm": 1.0515456199645996,
      "learning_rate": 7.936138011166633e-06,
      "loss": 0.9218,
      "step": 3308
    },
    {
      "epoch": 0.7151502053166199,
      "grad_norm": 1.0137747526168823,
      "learning_rate": 7.924974661182873e-06,
      "loss": 1.0679,
      "step": 3309
    },
    {
      "epoch": 0.7153663280743462,
      "grad_norm": 1.2173030376434326,
      "learning_rate": 7.91381722793362e-06,
      "loss": 1.0549,
      "step": 3310
    },
    {
      "epoch": 0.7155824508320726,
      "grad_norm": 1.0457470417022705,
      "learning_rate": 7.902665716885985e-06,
      "loss": 0.7535,
      "step": 3311
    },
    {
      "epoch": 0.715798573589799,
      "grad_norm": 1.2232424020767212,
      "learning_rate": 7.891520133504175e-06,
      "loss": 1.2076,
      "step": 3312
    },
    {
      "epoch": 0.7160146963475253,
      "grad_norm": 1.0062086582183838,
      "learning_rate": 7.880380483249519e-06,
      "loss": 0.92,
      "step": 3313
    },
    {
      "epoch": 0.7162308191052518,
      "grad_norm": 1.0115803480148315,
      "learning_rate": 7.869246771580414e-06,
      "loss": 1.0616,
      "step": 3314
    },
    {
      "epoch": 0.7164469418629782,
      "grad_norm": 0.9694705009460449,
      "learning_rate": 7.858119003952344e-06,
      "loss": 0.9655,
      "step": 3315
    },
    {
      "epoch": 0.7166630646207046,
      "grad_norm": 0.9992493987083435,
      "learning_rate": 7.846997185817886e-06,
      "loss": 0.9105,
      "step": 3316
    },
    {
      "epoch": 0.7168791873784309,
      "grad_norm": 1.0341936349868774,
      "learning_rate": 7.83588132262672e-06,
      "loss": 0.9677,
      "step": 3317
    },
    {
      "epoch": 0.7170953101361573,
      "grad_norm": 1.082875370979309,
      "learning_rate": 7.824771419825588e-06,
      "loss": 1.0106,
      "step": 3318
    },
    {
      "epoch": 0.7173114328938838,
      "grad_norm": 0.9768566489219666,
      "learning_rate": 7.81366748285832e-06,
      "loss": 0.8919,
      "step": 3319
    },
    {
      "epoch": 0.7175275556516101,
      "grad_norm": 1.0182429552078247,
      "learning_rate": 7.8025695171658e-06,
      "loss": 0.9841,
      "step": 3320
    },
    {
      "epoch": 0.7177436784093365,
      "grad_norm": 1.0493568181991577,
      "learning_rate": 7.791477528186031e-06,
      "loss": 1.1323,
      "step": 3321
    },
    {
      "epoch": 0.7179598011670629,
      "grad_norm": 0.9830143451690674,
      "learning_rate": 7.780391521354047e-06,
      "loss": 0.791,
      "step": 3322
    },
    {
      "epoch": 0.7181759239247892,
      "grad_norm": 1.1327837705612183,
      "learning_rate": 7.769311502101973e-06,
      "loss": 0.8785,
      "step": 3323
    },
    {
      "epoch": 0.7183920466825157,
      "grad_norm": 0.8882994651794434,
      "learning_rate": 7.758237475858987e-06,
      "loss": 0.912,
      "step": 3324
    },
    {
      "epoch": 0.7186081694402421,
      "grad_norm": 1.1242026090621948,
      "learning_rate": 7.747169448051341e-06,
      "loss": 0.9498,
      "step": 3325
    },
    {
      "epoch": 0.7188242921979684,
      "grad_norm": 1.0362653732299805,
      "learning_rate": 7.736107424102342e-06,
      "loss": 0.8962,
      "step": 3326
    },
    {
      "epoch": 0.7190404149556948,
      "grad_norm": 1.0527663230895996,
      "learning_rate": 7.725051409432353e-06,
      "loss": 1.0429,
      "step": 3327
    },
    {
      "epoch": 0.7192565377134212,
      "grad_norm": 0.9332002401351929,
      "learning_rate": 7.714001409458798e-06,
      "loss": 0.8325,
      "step": 3328
    },
    {
      "epoch": 0.7194726604711477,
      "grad_norm": 1.1222304105758667,
      "learning_rate": 7.702957429596152e-06,
      "loss": 0.964,
      "step": 3329
    },
    {
      "epoch": 0.719688783228874,
      "grad_norm": 1.1005209684371948,
      "learning_rate": 7.691919475255931e-06,
      "loss": 1.0872,
      "step": 3330
    },
    {
      "epoch": 0.7199049059866004,
      "grad_norm": 0.9944490194320679,
      "learning_rate": 7.68088755184673e-06,
      "loss": 0.858,
      "step": 3331
    },
    {
      "epoch": 0.7201210287443268,
      "grad_norm": 0.990530788898468,
      "learning_rate": 7.669861664774143e-06,
      "loss": 0.7804,
      "step": 3332
    },
    {
      "epoch": 0.7203371515020531,
      "grad_norm": 1.1296415328979492,
      "learning_rate": 7.658841819440836e-06,
      "loss": 0.8683,
      "step": 3333
    },
    {
      "epoch": 0.7205532742597796,
      "grad_norm": 1.0111063718795776,
      "learning_rate": 7.647828021246503e-06,
      "loss": 0.7653,
      "step": 3334
    },
    {
      "epoch": 0.720769397017506,
      "grad_norm": 0.9293777942657471,
      "learning_rate": 7.636820275587894e-06,
      "loss": 1.0058,
      "step": 3335
    },
    {
      "epoch": 0.7209855197752323,
      "grad_norm": 1.007898211479187,
      "learning_rate": 7.625818587858769e-06,
      "loss": 0.9946,
      "step": 3336
    },
    {
      "epoch": 0.7212016425329587,
      "grad_norm": 1.0067023038864136,
      "learning_rate": 7.6148229634499396e-06,
      "loss": 0.9263,
      "step": 3337
    },
    {
      "epoch": 0.7214177652906851,
      "grad_norm": 0.9708625078201294,
      "learning_rate": 7.6038334077492105e-06,
      "loss": 0.8998,
      "step": 3338
    },
    {
      "epoch": 0.7216338880484114,
      "grad_norm": 0.9538854360580444,
      "learning_rate": 7.592849926141466e-06,
      "loss": 0.8437,
      "step": 3339
    },
    {
      "epoch": 0.7218500108061379,
      "grad_norm": 1.0082589387893677,
      "learning_rate": 7.581872524008574e-06,
      "loss": 1.076,
      "step": 3340
    },
    {
      "epoch": 0.7220661335638643,
      "grad_norm": 1.0913642644882202,
      "learning_rate": 7.5709012067294395e-06,
      "loss": 1.1092,
      "step": 3341
    },
    {
      "epoch": 0.7222822563215907,
      "grad_norm": 0.9131114482879639,
      "learning_rate": 7.559935979679988e-06,
      "loss": 0.926,
      "step": 3342
    },
    {
      "epoch": 0.722498379079317,
      "grad_norm": 1.0407235622406006,
      "learning_rate": 7.548976848233138e-06,
      "loss": 0.9838,
      "step": 3343
    },
    {
      "epoch": 0.7227145018370434,
      "grad_norm": 0.9489783644676208,
      "learning_rate": 7.538023817758855e-06,
      "loss": 0.7608,
      "step": 3344
    },
    {
      "epoch": 0.7229306245947699,
      "grad_norm": 0.939863383769989,
      "learning_rate": 7.5270768936240924e-06,
      "loss": 1.0709,
      "step": 3345
    },
    {
      "epoch": 0.7231467473524962,
      "grad_norm": 1.0594513416290283,
      "learning_rate": 7.516136081192819e-06,
      "loss": 1.008,
      "step": 3346
    },
    {
      "epoch": 0.7233628701102226,
      "grad_norm": 0.9714732766151428,
      "learning_rate": 7.505201385826009e-06,
      "loss": 0.8528,
      "step": 3347
    },
    {
      "epoch": 0.723578992867949,
      "grad_norm": 0.9454349875450134,
      "learning_rate": 7.4942728128816355e-06,
      "loss": 0.8362,
      "step": 3348
    },
    {
      "epoch": 0.7237951156256753,
      "grad_norm": 0.9525579214096069,
      "learning_rate": 7.4833503677146725e-06,
      "loss": 0.8455,
      "step": 3349
    },
    {
      "epoch": 0.7240112383834018,
      "grad_norm": 1.1196388006210327,
      "learning_rate": 7.472434055677098e-06,
      "loss": 1.0083,
      "step": 3350
    },
    {
      "epoch": 0.7242273611411282,
      "grad_norm": 0.9199864864349365,
      "learning_rate": 7.461523882117876e-06,
      "loss": 0.967,
      "step": 3351
    },
    {
      "epoch": 0.7244434838988546,
      "grad_norm": 0.9865084886550903,
      "learning_rate": 7.450619852382959e-06,
      "loss": 0.9323,
      "step": 3352
    },
    {
      "epoch": 0.7246596066565809,
      "grad_norm": 0.8805970549583435,
      "learning_rate": 7.43972197181531e-06,
      "loss": 0.8617,
      "step": 3353
    },
    {
      "epoch": 0.7248757294143073,
      "grad_norm": 0.9994069337844849,
      "learning_rate": 7.42883024575487e-06,
      "loss": 0.8431,
      "step": 3354
    },
    {
      "epoch": 0.7250918521720338,
      "grad_norm": 1.1997559070587158,
      "learning_rate": 7.41794467953854e-06,
      "loss": 0.8347,
      "step": 3355
    },
    {
      "epoch": 0.7253079749297601,
      "grad_norm": 0.9434436559677124,
      "learning_rate": 7.407065278500225e-06,
      "loss": 0.9283,
      "step": 3356
    },
    {
      "epoch": 0.7255240976874865,
      "grad_norm": 1.0402320623397827,
      "learning_rate": 7.39619204797082e-06,
      "loss": 1.0099,
      "step": 3357
    },
    {
      "epoch": 0.7257402204452129,
      "grad_norm": 1.0664135217666626,
      "learning_rate": 7.3853249932781755e-06,
      "loss": 0.8768,
      "step": 3358
    },
    {
      "epoch": 0.7259563432029392,
      "grad_norm": 0.9630206227302551,
      "learning_rate": 7.374464119747122e-06,
      "loss": 0.9166,
      "step": 3359
    },
    {
      "epoch": 0.7261724659606656,
      "grad_norm": 1.0093247890472412,
      "learning_rate": 7.363609432699466e-06,
      "loss": 0.9244,
      "step": 3360
    },
    {
      "epoch": 0.7263885887183921,
      "grad_norm": 0.9457176923751831,
      "learning_rate": 7.352760937453975e-06,
      "loss": 0.786,
      "step": 3361
    },
    {
      "epoch": 0.7266047114761184,
      "grad_norm": 0.9402831792831421,
      "learning_rate": 7.341918639326391e-06,
      "loss": 0.8649,
      "step": 3362
    },
    {
      "epoch": 0.7268208342338448,
      "grad_norm": 0.9374096393585205,
      "learning_rate": 7.331082543629411e-06,
      "loss": 0.8783,
      "step": 3363
    },
    {
      "epoch": 0.7270369569915712,
      "grad_norm": 1.0298699140548706,
      "learning_rate": 7.320252655672697e-06,
      "loss": 0.8972,
      "step": 3364
    },
    {
      "epoch": 0.7272530797492976,
      "grad_norm": 0.945776641368866,
      "learning_rate": 7.309428980762874e-06,
      "loss": 1.0332,
      "step": 3365
    },
    {
      "epoch": 0.727469202507024,
      "grad_norm": 1.0960521697998047,
      "learning_rate": 7.29861152420351e-06,
      "loss": 1.0236,
      "step": 3366
    },
    {
      "epoch": 0.7276853252647504,
      "grad_norm": 0.9372932314872742,
      "learning_rate": 7.2878002912951395e-06,
      "loss": 0.9436,
      "step": 3367
    },
    {
      "epoch": 0.7279014480224768,
      "grad_norm": 0.9429520964622498,
      "learning_rate": 7.27699528733524e-06,
      "loss": 0.9814,
      "step": 3368
    },
    {
      "epoch": 0.7281175707802031,
      "grad_norm": 1.0094513893127441,
      "learning_rate": 7.266196517618238e-06,
      "loss": 0.9912,
      "step": 3369
    },
    {
      "epoch": 0.7283336935379295,
      "grad_norm": 0.9140549898147583,
      "learning_rate": 7.2554039874355005e-06,
      "loss": 0.6879,
      "step": 3370
    },
    {
      "epoch": 0.728549816295656,
      "grad_norm": 1.0616697072982788,
      "learning_rate": 7.244617702075361e-06,
      "loss": 0.9242,
      "step": 3371
    },
    {
      "epoch": 0.7287659390533823,
      "grad_norm": 0.9983554482460022,
      "learning_rate": 7.233837666823054e-06,
      "loss": 0.9,
      "step": 3372
    },
    {
      "epoch": 0.7289820618111087,
      "grad_norm": 0.9800330400466919,
      "learning_rate": 7.223063886960779e-06,
      "loss": 0.8686,
      "step": 3373
    },
    {
      "epoch": 0.7291981845688351,
      "grad_norm": 0.8646646738052368,
      "learning_rate": 7.212296367767657e-06,
      "loss": 0.7352,
      "step": 3374
    },
    {
      "epoch": 0.7294143073265615,
      "grad_norm": 0.8267316818237305,
      "learning_rate": 7.2015351145197594e-06,
      "loss": 0.9062,
      "step": 3375
    },
    {
      "epoch": 0.7296304300842879,
      "grad_norm": 0.9159088730812073,
      "learning_rate": 7.190780132490071e-06,
      "loss": 0.9845,
      "step": 3376
    },
    {
      "epoch": 0.7298465528420143,
      "grad_norm": 1.0310955047607422,
      "learning_rate": 7.180031426948515e-06,
      "loss": 0.8794,
      "step": 3377
    },
    {
      "epoch": 0.7300626755997407,
      "grad_norm": 0.956666111946106,
      "learning_rate": 7.169289003161908e-06,
      "loss": 0.8247,
      "step": 3378
    },
    {
      "epoch": 0.730278798357467,
      "grad_norm": 1.0660452842712402,
      "learning_rate": 7.1585528663940375e-06,
      "loss": 0.9081,
      "step": 3379
    },
    {
      "epoch": 0.7304949211151934,
      "grad_norm": 1.1198481321334839,
      "learning_rate": 7.147823021905578e-06,
      "loss": 0.9272,
      "step": 3380
    },
    {
      "epoch": 0.7307110438729199,
      "grad_norm": 1.1018749475479126,
      "learning_rate": 7.137099474954125e-06,
      "loss": 1.1237,
      "step": 3381
    },
    {
      "epoch": 0.7309271666306462,
      "grad_norm": 1.0701727867126465,
      "learning_rate": 7.1263822307942045e-06,
      "loss": 0.9933,
      "step": 3382
    },
    {
      "epoch": 0.7311432893883726,
      "grad_norm": 0.8856919407844543,
      "learning_rate": 7.115671294677218e-06,
      "loss": 0.8479,
      "step": 3383
    },
    {
      "epoch": 0.731359412146099,
      "grad_norm": 1.0185657739639282,
      "learning_rate": 7.104966671851517e-06,
      "loss": 0.8627,
      "step": 3384
    },
    {
      "epoch": 0.7315755349038253,
      "grad_norm": 1.0855196714401245,
      "learning_rate": 7.09426836756234e-06,
      "loss": 0.8953,
      "step": 3385
    },
    {
      "epoch": 0.7317916576615517,
      "grad_norm": 0.9401381611824036,
      "learning_rate": 7.083576387051827e-06,
      "loss": 0.8269,
      "step": 3386
    },
    {
      "epoch": 0.7320077804192782,
      "grad_norm": 0.9077387452125549,
      "learning_rate": 7.072890735559028e-06,
      "loss": 0.7239,
      "step": 3387
    },
    {
      "epoch": 0.7322239031770046,
      "grad_norm": 1.0916656255722046,
      "learning_rate": 7.062211418319884e-06,
      "loss": 0.9714,
      "step": 3388
    },
    {
      "epoch": 0.7324400259347309,
      "grad_norm": 1.1207377910614014,
      "learning_rate": 7.051538440567238e-06,
      "loss": 1.0157,
      "step": 3389
    },
    {
      "epoch": 0.7326561486924573,
      "grad_norm": 1.0763168334960938,
      "learning_rate": 7.040871807530825e-06,
      "loss": 0.8366,
      "step": 3390
    },
    {
      "epoch": 0.7328722714501837,
      "grad_norm": 1.031316876411438,
      "learning_rate": 7.030211524437267e-06,
      "loss": 1.0263,
      "step": 3391
    },
    {
      "epoch": 0.73308839420791,
      "grad_norm": 1.0733709335327148,
      "learning_rate": 7.0195575965100735e-06,
      "loss": 0.6845,
      "step": 3392
    },
    {
      "epoch": 0.7333045169656365,
      "grad_norm": 1.0250283479690552,
      "learning_rate": 7.008910028969657e-06,
      "loss": 0.9607,
      "step": 3393
    },
    {
      "epoch": 0.7335206397233629,
      "grad_norm": 0.9315289258956909,
      "learning_rate": 6.998268827033303e-06,
      "loss": 0.8932,
      "step": 3394
    },
    {
      "epoch": 0.7337367624810892,
      "grad_norm": 1.0561474561691284,
      "learning_rate": 6.987633995915164e-06,
      "loss": 0.8489,
      "step": 3395
    },
    {
      "epoch": 0.7339528852388156,
      "grad_norm": 1.0175994634628296,
      "learning_rate": 6.977005540826276e-06,
      "loss": 1.1645,
      "step": 3396
    },
    {
      "epoch": 0.7341690079965421,
      "grad_norm": 1.0059309005737305,
      "learning_rate": 6.966383466974578e-06,
      "loss": 0.7149,
      "step": 3397
    },
    {
      "epoch": 0.7343851307542685,
      "grad_norm": 1.010170578956604,
      "learning_rate": 6.95576777956485e-06,
      "loss": 0.7357,
      "step": 3398
    },
    {
      "epoch": 0.7346012535119948,
      "grad_norm": 0.9982577562332153,
      "learning_rate": 6.9451584837987574e-06,
      "loss": 0.9607,
      "step": 3399
    },
    {
      "epoch": 0.7348173762697212,
      "grad_norm": 1.0086729526519775,
      "learning_rate": 6.934555584874834e-06,
      "loss": 0.9249,
      "step": 3400
    },
    {
      "epoch": 0.7350334990274476,
      "grad_norm": 0.9194556474685669,
      "learning_rate": 6.923959087988459e-06,
      "loss": 1.0919,
      "step": 3401
    },
    {
      "epoch": 0.735249621785174,
      "grad_norm": 0.9621269106864929,
      "learning_rate": 6.913368998331911e-06,
      "loss": 0.8922,
      "step": 3402
    },
    {
      "epoch": 0.7354657445429004,
      "grad_norm": 0.914582371711731,
      "learning_rate": 6.902785321094301e-06,
      "loss": 0.7621,
      "step": 3403
    },
    {
      "epoch": 0.7356818673006268,
      "grad_norm": 1.0676108598709106,
      "learning_rate": 6.892208061461607e-06,
      "loss": 1.0045,
      "step": 3404
    },
    {
      "epoch": 0.7358979900583531,
      "grad_norm": 1.0403320789337158,
      "learning_rate": 6.881637224616662e-06,
      "loss": 0.9555,
      "step": 3405
    },
    {
      "epoch": 0.7361141128160795,
      "grad_norm": 0.9418090581893921,
      "learning_rate": 6.87107281573915e-06,
      "loss": 0.8053,
      "step": 3406
    },
    {
      "epoch": 0.736330235573806,
      "grad_norm": 0.8581088185310364,
      "learning_rate": 6.860514840005612e-06,
      "loss": 0.7958,
      "step": 3407
    },
    {
      "epoch": 0.7365463583315323,
      "grad_norm": 0.9900988340377808,
      "learning_rate": 6.849963302589426e-06,
      "loss": 0.9443,
      "step": 3408
    },
    {
      "epoch": 0.7367624810892587,
      "grad_norm": 0.8412300944328308,
      "learning_rate": 6.839418208660824e-06,
      "loss": 0.7845,
      "step": 3409
    },
    {
      "epoch": 0.7369786038469851,
      "grad_norm": 0.9322758913040161,
      "learning_rate": 6.82887956338687e-06,
      "loss": 0.948,
      "step": 3410
    },
    {
      "epoch": 0.7371947266047115,
      "grad_norm": 0.9318099021911621,
      "learning_rate": 6.818347371931498e-06,
      "loss": 1.0697,
      "step": 3411
    },
    {
      "epoch": 0.7374108493624378,
      "grad_norm": 1.023543119430542,
      "learning_rate": 6.807821639455432e-06,
      "loss": 1.0122,
      "step": 3412
    },
    {
      "epoch": 0.7376269721201643,
      "grad_norm": 0.9158769249916077,
      "learning_rate": 6.7973023711162675e-06,
      "loss": 0.8321,
      "step": 3413
    },
    {
      "epoch": 0.7378430948778907,
      "grad_norm": 1.0362530946731567,
      "learning_rate": 6.786789572068417e-06,
      "loss": 0.8262,
      "step": 3414
    },
    {
      "epoch": 0.738059217635617,
      "grad_norm": 0.9433619379997253,
      "learning_rate": 6.776283247463135e-06,
      "loss": 0.8907,
      "step": 3415
    },
    {
      "epoch": 0.7382753403933434,
      "grad_norm": 0.8728352785110474,
      "learning_rate": 6.765783402448496e-06,
      "loss": 0.7827,
      "step": 3416
    },
    {
      "epoch": 0.7384914631510698,
      "grad_norm": 0.9512519836425781,
      "learning_rate": 6.755290042169402e-06,
      "loss": 1.0115,
      "step": 3417
    },
    {
      "epoch": 0.7387075859087961,
      "grad_norm": 0.9114593863487244,
      "learning_rate": 6.744803171767556e-06,
      "loss": 0.6862,
      "step": 3418
    },
    {
      "epoch": 0.7389237086665226,
      "grad_norm": 1.0527325868606567,
      "learning_rate": 6.734322796381521e-06,
      "loss": 0.9852,
      "step": 3419
    },
    {
      "epoch": 0.739139831424249,
      "grad_norm": 0.9113556742668152,
      "learning_rate": 6.723848921146649e-06,
      "loss": 0.9002,
      "step": 3420
    },
    {
      "epoch": 0.7393559541819754,
      "grad_norm": 0.9874826669692993,
      "learning_rate": 6.71338155119512e-06,
      "loss": 0.8813,
      "step": 3421
    },
    {
      "epoch": 0.7395720769397017,
      "grad_norm": 1.0439640283584595,
      "learning_rate": 6.702920691655919e-06,
      "loss": 1.0515,
      "step": 3422
    },
    {
      "epoch": 0.7397881996974282,
      "grad_norm": 0.8799842596054077,
      "learning_rate": 6.692466347654829e-06,
      "loss": 0.7147,
      "step": 3423
    },
    {
      "epoch": 0.7400043224551546,
      "grad_norm": 1.0796020030975342,
      "learning_rate": 6.682018524314471e-06,
      "loss": 1.081,
      "step": 3424
    },
    {
      "epoch": 0.7402204452128809,
      "grad_norm": 0.8543500900268555,
      "learning_rate": 6.6715772267542515e-06,
      "loss": 0.8152,
      "step": 3425
    },
    {
      "epoch": 0.7404365679706073,
      "grad_norm": 0.9580144882202148,
      "learning_rate": 6.661142460090379e-06,
      "loss": 0.8699,
      "step": 3426
    },
    {
      "epoch": 0.7406526907283337,
      "grad_norm": 0.9547476172447205,
      "learning_rate": 6.650714229435867e-06,
      "loss": 0.9847,
      "step": 3427
    },
    {
      "epoch": 0.74086881348606,
      "grad_norm": 0.995082437992096,
      "learning_rate": 6.640292539900521e-06,
      "loss": 0.9451,
      "step": 3428
    },
    {
      "epoch": 0.7410849362437865,
      "grad_norm": 0.9206424951553345,
      "learning_rate": 6.629877396590952e-06,
      "loss": 0.8789,
      "step": 3429
    },
    {
      "epoch": 0.7413010590015129,
      "grad_norm": 0.9735265374183655,
      "learning_rate": 6.619468804610547e-06,
      "loss": 0.9124,
      "step": 3430
    },
    {
      "epoch": 0.7415171817592392,
      "grad_norm": 0.9054409861564636,
      "learning_rate": 6.609066769059498e-06,
      "loss": 0.6867,
      "step": 3431
    },
    {
      "epoch": 0.7417333045169656,
      "grad_norm": 0.9464341402053833,
      "learning_rate": 6.5986712950347705e-06,
      "loss": 0.7371,
      "step": 3432
    },
    {
      "epoch": 0.741949427274692,
      "grad_norm": 1.1124294996261597,
      "learning_rate": 6.588282387630134e-06,
      "loss": 0.9704,
      "step": 3433
    },
    {
      "epoch": 0.7421655500324185,
      "grad_norm": 1.0384552478790283,
      "learning_rate": 6.577900051936133e-06,
      "loss": 0.8598,
      "step": 3434
    },
    {
      "epoch": 0.7423816727901448,
      "grad_norm": 0.8957676291465759,
      "learning_rate": 6.567524293040071e-06,
      "loss": 0.8189,
      "step": 3435
    },
    {
      "epoch": 0.7425977955478712,
      "grad_norm": 0.9393583536148071,
      "learning_rate": 6.557155116026048e-06,
      "loss": 0.8454,
      "step": 3436
    },
    {
      "epoch": 0.7428139183055976,
      "grad_norm": 1.0651594400405884,
      "learning_rate": 6.54679252597495e-06,
      "loss": 0.9852,
      "step": 3437
    },
    {
      "epoch": 0.7430300410633239,
      "grad_norm": 1.1220266819000244,
      "learning_rate": 6.536436527964414e-06,
      "loss": 1.0354,
      "step": 3438
    },
    {
      "epoch": 0.7432461638210504,
      "grad_norm": 0.9720316529273987,
      "learning_rate": 6.526087127068857e-06,
      "loss": 0.8795,
      "step": 3439
    },
    {
      "epoch": 0.7434622865787768,
      "grad_norm": 1.1413424015045166,
      "learning_rate": 6.5157443283594655e-06,
      "loss": 1.0138,
      "step": 3440
    },
    {
      "epoch": 0.7436784093365031,
      "grad_norm": 0.939396858215332,
      "learning_rate": 6.50540813690417e-06,
      "loss": 0.8591,
      "step": 3441
    },
    {
      "epoch": 0.7438945320942295,
      "grad_norm": 1.1626993417739868,
      "learning_rate": 6.495078557767698e-06,
      "loss": 0.883,
      "step": 3442
    },
    {
      "epoch": 0.7441106548519559,
      "grad_norm": 0.9208924174308777,
      "learning_rate": 6.484755596011514e-06,
      "loss": 0.7905,
      "step": 3443
    },
    {
      "epoch": 0.7443267776096822,
      "grad_norm": 0.9631216526031494,
      "learning_rate": 6.474439256693845e-06,
      "loss": 0.953,
      "step": 3444
    },
    {
      "epoch": 0.7445429003674087,
      "grad_norm": 0.9523822665214539,
      "learning_rate": 6.464129544869675e-06,
      "loss": 0.8924,
      "step": 3445
    },
    {
      "epoch": 0.7447590231251351,
      "grad_norm": 1.01498281955719,
      "learning_rate": 6.453826465590738e-06,
      "loss": 0.9344,
      "step": 3446
    },
    {
      "epoch": 0.7449751458828615,
      "grad_norm": 0.7808797359466553,
      "learning_rate": 6.443530023905518e-06,
      "loss": 0.7739,
      "step": 3447
    },
    {
      "epoch": 0.7451912686405878,
      "grad_norm": 1.0282634496688843,
      "learning_rate": 6.433240224859247e-06,
      "loss": 0.8307,
      "step": 3448
    },
    {
      "epoch": 0.7454073913983142,
      "grad_norm": 1.138948917388916,
      "learning_rate": 6.422957073493905e-06,
      "loss": 1.1069,
      "step": 3449
    },
    {
      "epoch": 0.7456235141560407,
      "grad_norm": 1.1635663509368896,
      "learning_rate": 6.412680574848205e-06,
      "loss": 0.9551,
      "step": 3450
    },
    {
      "epoch": 0.745839636913767,
      "grad_norm": 1.016524076461792,
      "learning_rate": 6.402410733957627e-06,
      "loss": 1.0311,
      "step": 3451
    },
    {
      "epoch": 0.7460557596714934,
      "grad_norm": 0.8808252215385437,
      "learning_rate": 6.392147555854349e-06,
      "loss": 0.8383,
      "step": 3452
    },
    {
      "epoch": 0.7462718824292198,
      "grad_norm": 1.1563820838928223,
      "learning_rate": 6.3818910455673125e-06,
      "loss": 0.8775,
      "step": 3453
    },
    {
      "epoch": 0.7464880051869461,
      "grad_norm": 1.0173801183700562,
      "learning_rate": 6.3716412081221766e-06,
      "loss": 0.8934,
      "step": 3454
    },
    {
      "epoch": 0.7467041279446726,
      "grad_norm": 0.996764063835144,
      "learning_rate": 6.361398048541349e-06,
      "loss": 0.9363,
      "step": 3455
    },
    {
      "epoch": 0.746920250702399,
      "grad_norm": 1.1051582098007202,
      "learning_rate": 6.351161571843953e-06,
      "loss": 1.063,
      "step": 3456
    },
    {
      "epoch": 0.7471363734601254,
      "grad_norm": 1.0325502157211304,
      "learning_rate": 6.340931783045841e-06,
      "loss": 0.9407,
      "step": 3457
    },
    {
      "epoch": 0.7473524962178517,
      "grad_norm": 1.139733910560608,
      "learning_rate": 6.330708687159573e-06,
      "loss": 1.0407,
      "step": 3458
    },
    {
      "epoch": 0.7475686189755781,
      "grad_norm": 1.0138351917266846,
      "learning_rate": 6.320492289194442e-06,
      "loss": 1.0362,
      "step": 3459
    },
    {
      "epoch": 0.7477847417333046,
      "grad_norm": 0.9171978235244751,
      "learning_rate": 6.310282594156474e-06,
      "loss": 0.7239,
      "step": 3460
    },
    {
      "epoch": 0.7480008644910309,
      "grad_norm": 1.101423740386963,
      "learning_rate": 6.300079607048388e-06,
      "loss": 1.0078,
      "step": 3461
    },
    {
      "epoch": 0.7482169872487573,
      "grad_norm": 1.0272020101547241,
      "learning_rate": 6.2898833328696265e-06,
      "loss": 0.9119,
      "step": 3462
    },
    {
      "epoch": 0.7484331100064837,
      "grad_norm": 1.0556352138519287,
      "learning_rate": 6.279693776616338e-06,
      "loss": 1.0261,
      "step": 3463
    },
    {
      "epoch": 0.74864923276421,
      "grad_norm": 0.9500029683113098,
      "learning_rate": 6.269510943281383e-06,
      "loss": 0.9667,
      "step": 3464
    },
    {
      "epoch": 0.7488653555219364,
      "grad_norm": 0.8981829881668091,
      "learning_rate": 6.2593348378543255e-06,
      "loss": 1.0014,
      "step": 3465
    },
    {
      "epoch": 0.7490814782796629,
      "grad_norm": 0.9949851036071777,
      "learning_rate": 6.249165465321432e-06,
      "loss": 0.8555,
      "step": 3466
    },
    {
      "epoch": 0.7492976010373892,
      "grad_norm": 0.9537209868431091,
      "learning_rate": 6.239002830665675e-06,
      "loss": 0.726,
      "step": 3467
    },
    {
      "epoch": 0.7495137237951156,
      "grad_norm": 0.8678178191184998,
      "learning_rate": 6.228846938866717e-06,
      "loss": 0.9033,
      "step": 3468
    },
    {
      "epoch": 0.749729846552842,
      "grad_norm": 0.9645637273788452,
      "learning_rate": 6.218697794900928e-06,
      "loss": 0.793,
      "step": 3469
    },
    {
      "epoch": 0.7499459693105684,
      "grad_norm": 0.9632251262664795,
      "learning_rate": 6.208555403741361e-06,
      "loss": 0.809,
      "step": 3470
    },
    {
      "epoch": 0.7501620920682948,
      "grad_norm": 1.032147765159607,
      "learning_rate": 6.198419770357764e-06,
      "loss": 0.9394,
      "step": 3471
    },
    {
      "epoch": 0.7503782148260212,
      "grad_norm": 1.0285770893096924,
      "learning_rate": 6.188290899716569e-06,
      "loss": 0.962,
      "step": 3472
    },
    {
      "epoch": 0.7505943375837476,
      "grad_norm": 0.8958332538604736,
      "learning_rate": 6.178168796780912e-06,
      "loss": 0.9542,
      "step": 3473
    },
    {
      "epoch": 0.7508104603414739,
      "grad_norm": 0.9863196611404419,
      "learning_rate": 6.168053466510597e-06,
      "loss": 0.8857,
      "step": 3474
    },
    {
      "epoch": 0.7510265830992003,
      "grad_norm": 1.0095715522766113,
      "learning_rate": 6.1579449138621065e-06,
      "loss": 0.8864,
      "step": 3475
    },
    {
      "epoch": 0.7512427058569268,
      "grad_norm": 1.0227198600769043,
      "learning_rate": 6.147843143788601e-06,
      "loss": 0.8899,
      "step": 3476
    },
    {
      "epoch": 0.7514588286146531,
      "grad_norm": 0.9959652423858643,
      "learning_rate": 6.137748161239938e-06,
      "loss": 0.8684,
      "step": 3477
    },
    {
      "epoch": 0.7516749513723795,
      "grad_norm": 1.0028258562088013,
      "learning_rate": 6.127659971162634e-06,
      "loss": 1.0545,
      "step": 3478
    },
    {
      "epoch": 0.7518910741301059,
      "grad_norm": 1.0746790170669556,
      "learning_rate": 6.1175785784998745e-06,
      "loss": 0.8086,
      "step": 3479
    },
    {
      "epoch": 0.7521071968878323,
      "grad_norm": 1.0223948955535889,
      "learning_rate": 6.107503988191528e-06,
      "loss": 0.7856,
      "step": 3480
    },
    {
      "epoch": 0.7523233196455587,
      "grad_norm": 0.9098303318023682,
      "learning_rate": 6.0974362051740985e-06,
      "loss": 0.7511,
      "step": 3481
    },
    {
      "epoch": 0.7525394424032851,
      "grad_norm": 1.2489588260650635,
      "learning_rate": 6.0873752343807965e-06,
      "loss": 0.9436,
      "step": 3482
    },
    {
      "epoch": 0.7527555651610115,
      "grad_norm": 0.947459876537323,
      "learning_rate": 6.077321080741469e-06,
      "loss": 0.823,
      "step": 3483
    },
    {
      "epoch": 0.7529716879187378,
      "grad_norm": 1.0971026420593262,
      "learning_rate": 6.067273749182627e-06,
      "loss": 0.9406,
      "step": 3484
    },
    {
      "epoch": 0.7531878106764642,
      "grad_norm": 0.9637070298194885,
      "learning_rate": 6.057233244627441e-06,
      "loss": 0.7159,
      "step": 3485
    },
    {
      "epoch": 0.7534039334341907,
      "grad_norm": 0.9341323971748352,
      "learning_rate": 6.047199571995732e-06,
      "loss": 0.9081,
      "step": 3486
    },
    {
      "epoch": 0.753620056191917,
      "grad_norm": 0.9842247366905212,
      "learning_rate": 6.03717273620398e-06,
      "loss": 0.9413,
      "step": 3487
    },
    {
      "epoch": 0.7538361789496434,
      "grad_norm": 0.920375406742096,
      "learning_rate": 6.02715274216531e-06,
      "loss": 0.9189,
      "step": 3488
    },
    {
      "epoch": 0.7540523017073698,
      "grad_norm": 1.0057052373886108,
      "learning_rate": 6.017139594789496e-06,
      "loss": 0.8734,
      "step": 3489
    },
    {
      "epoch": 0.7542684244650961,
      "grad_norm": 1.1185704469680786,
      "learning_rate": 6.00713329898295e-06,
      "loss": 0.9477,
      "step": 3490
    },
    {
      "epoch": 0.7544845472228225,
      "grad_norm": 0.9550908207893372,
      "learning_rate": 5.997133859648752e-06,
      "loss": 0.7867,
      "step": 3491
    },
    {
      "epoch": 0.754700669980549,
      "grad_norm": 0.9330325722694397,
      "learning_rate": 5.987141281686588e-06,
      "loss": 0.8126,
      "step": 3492
    },
    {
      "epoch": 0.7549167927382754,
      "grad_norm": 0.9264267683029175,
      "learning_rate": 5.977155569992803e-06,
      "loss": 0.8604,
      "step": 3493
    },
    {
      "epoch": 0.7551329154960017,
      "grad_norm": 1.3186193704605103,
      "learning_rate": 5.967176729460367e-06,
      "loss": 1.0712,
      "step": 3494
    },
    {
      "epoch": 0.7553490382537281,
      "grad_norm": 0.9311937689781189,
      "learning_rate": 5.957204764978899e-06,
      "loss": 0.736,
      "step": 3495
    },
    {
      "epoch": 0.7555651610114545,
      "grad_norm": 1.113035798072815,
      "learning_rate": 5.947239681434634e-06,
      "loss": 0.8888,
      "step": 3496
    },
    {
      "epoch": 0.7557812837691809,
      "grad_norm": 1.006145715713501,
      "learning_rate": 5.937281483710446e-06,
      "loss": 0.9843,
      "step": 3497
    },
    {
      "epoch": 0.7559974065269073,
      "grad_norm": 0.9831714034080505,
      "learning_rate": 5.927330176685817e-06,
      "loss": 0.9059,
      "step": 3498
    },
    {
      "epoch": 0.7562135292846337,
      "grad_norm": 0.9994760155677795,
      "learning_rate": 5.9173857652368645e-06,
      "loss": 0.909,
      "step": 3499
    },
    {
      "epoch": 0.75642965204236,
      "grad_norm": 1.0729668140411377,
      "learning_rate": 5.907448254236339e-06,
      "loss": 1.023,
      "step": 3500
    },
    {
      "epoch": 0.7566457748000864,
      "grad_norm": 0.9878201484680176,
      "learning_rate": 5.89751764855359e-06,
      "loss": 0.9754,
      "step": 3501
    },
    {
      "epoch": 0.7568618975578129,
      "grad_norm": 0.9825910329818726,
      "learning_rate": 5.8875939530545936e-06,
      "loss": 0.6884,
      "step": 3502
    },
    {
      "epoch": 0.7570780203155393,
      "grad_norm": 0.946611225605011,
      "learning_rate": 5.877677172601937e-06,
      "loss": 0.902,
      "step": 3503
    },
    {
      "epoch": 0.7572941430732656,
      "grad_norm": 1.0394933223724365,
      "learning_rate": 5.86776731205482e-06,
      "loss": 1.0897,
      "step": 3504
    },
    {
      "epoch": 0.757510265830992,
      "grad_norm": 0.9216761589050293,
      "learning_rate": 5.857864376269051e-06,
      "loss": 0.7417,
      "step": 3505
    },
    {
      "epoch": 0.7577263885887184,
      "grad_norm": 0.8604562878608704,
      "learning_rate": 5.847968370097045e-06,
      "loss": 0.9106,
      "step": 3506
    },
    {
      "epoch": 0.7579425113464447,
      "grad_norm": 1.0855454206466675,
      "learning_rate": 5.838079298387824e-06,
      "loss": 0.7717,
      "step": 3507
    },
    {
      "epoch": 0.7581586341041712,
      "grad_norm": 0.9160546064376831,
      "learning_rate": 5.82819716598701e-06,
      "loss": 0.7919,
      "step": 3508
    },
    {
      "epoch": 0.7583747568618976,
      "grad_norm": 1.1170223951339722,
      "learning_rate": 5.818321977736822e-06,
      "loss": 0.7261,
      "step": 3509
    },
    {
      "epoch": 0.7585908796196239,
      "grad_norm": 1.0788850784301758,
      "learning_rate": 5.808453738476083e-06,
      "loss": 1.0544,
      "step": 3510
    },
    {
      "epoch": 0.7588070023773503,
      "grad_norm": 1.0905154943466187,
      "learning_rate": 5.7985924530402064e-06,
      "loss": 0.9653,
      "step": 3511
    },
    {
      "epoch": 0.7590231251350767,
      "grad_norm": 1.0005922317504883,
      "learning_rate": 5.788738126261191e-06,
      "loss": 0.9405,
      "step": 3512
    },
    {
      "epoch": 0.7592392478928031,
      "grad_norm": 1.1160231828689575,
      "learning_rate": 5.7788907629676504e-06,
      "loss": 1.0449,
      "step": 3513
    },
    {
      "epoch": 0.7594553706505295,
      "grad_norm": 1.0808011293411255,
      "learning_rate": 5.769050367984765e-06,
      "loss": 1.06,
      "step": 3514
    },
    {
      "epoch": 0.7596714934082559,
      "grad_norm": 1.0940054655075073,
      "learning_rate": 5.759216946134298e-06,
      "loss": 0.7752,
      "step": 3515
    },
    {
      "epoch": 0.7598876161659823,
      "grad_norm": 0.9768939018249512,
      "learning_rate": 5.749390502234606e-06,
      "loss": 0.8032,
      "step": 3516
    },
    {
      "epoch": 0.7601037389237086,
      "grad_norm": 0.9841901659965515,
      "learning_rate": 5.739571041100622e-06,
      "loss": 0.8549,
      "step": 3517
    },
    {
      "epoch": 0.7603198616814351,
      "grad_norm": 1.0701329708099365,
      "learning_rate": 5.729758567543866e-06,
      "loss": 0.9749,
      "step": 3518
    },
    {
      "epoch": 0.7605359844391615,
      "grad_norm": 1.1490933895111084,
      "learning_rate": 5.719953086372425e-06,
      "loss": 0.9564,
      "step": 3519
    },
    {
      "epoch": 0.7607521071968878,
      "grad_norm": 0.9661714434623718,
      "learning_rate": 5.710154602390965e-06,
      "loss": 0.9353,
      "step": 3520
    },
    {
      "epoch": 0.7609682299546142,
      "grad_norm": 0.97609943151474,
      "learning_rate": 5.700363120400707e-06,
      "loss": 0.9127,
      "step": 3521
    },
    {
      "epoch": 0.7611843527123406,
      "grad_norm": 1.0656062364578247,
      "learning_rate": 5.690578645199469e-06,
      "loss": 0.8924,
      "step": 3522
    },
    {
      "epoch": 0.761400475470067,
      "grad_norm": 0.9709311723709106,
      "learning_rate": 5.680801181581617e-06,
      "loss": 0.8216,
      "step": 3523
    },
    {
      "epoch": 0.7616165982277934,
      "grad_norm": 1.0572892427444458,
      "learning_rate": 5.671030734338083e-06,
      "loss": 0.9399,
      "step": 3524
    },
    {
      "epoch": 0.7618327209855198,
      "grad_norm": 0.9256997108459473,
      "learning_rate": 5.661267308256366e-06,
      "loss": 0.8269,
      "step": 3525
    },
    {
      "epoch": 0.7620488437432462,
      "grad_norm": 1.0007261037826538,
      "learning_rate": 5.651510908120521e-06,
      "loss": 0.8812,
      "step": 3526
    },
    {
      "epoch": 0.7622649665009725,
      "grad_norm": 1.1655714511871338,
      "learning_rate": 5.641761538711164e-06,
      "loss": 0.7871,
      "step": 3527
    },
    {
      "epoch": 0.762481089258699,
      "grad_norm": 0.970575213432312,
      "learning_rate": 5.632019204805461e-06,
      "loss": 1.1446,
      "step": 3528
    },
    {
      "epoch": 0.7626972120164254,
      "grad_norm": 1.0391998291015625,
      "learning_rate": 5.622283911177133e-06,
      "loss": 0.9843,
      "step": 3529
    },
    {
      "epoch": 0.7629133347741517,
      "grad_norm": 0.9886474013328552,
      "learning_rate": 5.6125556625964465e-06,
      "loss": 0.9048,
      "step": 3530
    },
    {
      "epoch": 0.7631294575318781,
      "grad_norm": 0.9747617244720459,
      "learning_rate": 5.602834463830238e-06,
      "loss": 0.8056,
      "step": 3531
    },
    {
      "epoch": 0.7633455802896045,
      "grad_norm": 1.0145761966705322,
      "learning_rate": 5.593120319641854e-06,
      "loss": 0.8992,
      "step": 3532
    },
    {
      "epoch": 0.7635617030473308,
      "grad_norm": 0.8937737345695496,
      "learning_rate": 5.583413234791211e-06,
      "loss": 0.8653,
      "step": 3533
    },
    {
      "epoch": 0.7637778258050573,
      "grad_norm": 0.9845710396766663,
      "learning_rate": 5.5737132140347575e-06,
      "loss": 0.8091,
      "step": 3534
    },
    {
      "epoch": 0.7639939485627837,
      "grad_norm": 0.993019700050354,
      "learning_rate": 5.5640202621254714e-06,
      "loss": 0.8797,
      "step": 3535
    },
    {
      "epoch": 0.76421007132051,
      "grad_norm": 0.986335813999176,
      "learning_rate": 5.5543343838128935e-06,
      "loss": 1.0194,
      "step": 3536
    },
    {
      "epoch": 0.7644261940782364,
      "grad_norm": 1.0384854078292847,
      "learning_rate": 5.544655583843079e-06,
      "loss": 0.9606,
      "step": 3537
    },
    {
      "epoch": 0.7646423168359628,
      "grad_norm": 0.9218137860298157,
      "learning_rate": 5.534983866958608e-06,
      "loss": 0.8564,
      "step": 3538
    },
    {
      "epoch": 0.7648584395936893,
      "grad_norm": 1.080175518989563,
      "learning_rate": 5.5253192378985966e-06,
      "loss": 1.0654,
      "step": 3539
    },
    {
      "epoch": 0.7650745623514156,
      "grad_norm": 0.9595901966094971,
      "learning_rate": 5.515661701398705e-06,
      "loss": 0.8546,
      "step": 3540
    },
    {
      "epoch": 0.765290685109142,
      "grad_norm": 1.0158528089523315,
      "learning_rate": 5.506011262191096e-06,
      "loss": 0.9655,
      "step": 3541
    },
    {
      "epoch": 0.7655068078668684,
      "grad_norm": 1.0765477418899536,
      "learning_rate": 5.496367925004462e-06,
      "loss": 1.0154,
      "step": 3542
    },
    {
      "epoch": 0.7657229306245947,
      "grad_norm": 1.0074890851974487,
      "learning_rate": 5.48673169456402e-06,
      "loss": 0.8622,
      "step": 3543
    },
    {
      "epoch": 0.7659390533823212,
      "grad_norm": 1.063214898109436,
      "learning_rate": 5.477102575591495e-06,
      "loss": 0.984,
      "step": 3544
    },
    {
      "epoch": 0.7661551761400476,
      "grad_norm": 1.00334894657135,
      "learning_rate": 5.4674805728051395e-06,
      "loss": 0.9945,
      "step": 3545
    },
    {
      "epoch": 0.7663712988977739,
      "grad_norm": 0.9830182194709778,
      "learning_rate": 5.4578656909197055e-06,
      "loss": 0.9223,
      "step": 3546
    },
    {
      "epoch": 0.7665874216555003,
      "grad_norm": 1.033290982246399,
      "learning_rate": 5.448257934646468e-06,
      "loss": 0.8841,
      "step": 3547
    },
    {
      "epoch": 0.7668035444132267,
      "grad_norm": 1.0941208600997925,
      "learning_rate": 5.438657308693202e-06,
      "loss": 1.073,
      "step": 3548
    },
    {
      "epoch": 0.7670196671709532,
      "grad_norm": 0.8555561900138855,
      "learning_rate": 5.429063817764197e-06,
      "loss": 0.9096,
      "step": 3549
    },
    {
      "epoch": 0.7672357899286795,
      "grad_norm": 0.8595163226127625,
      "learning_rate": 5.419477466560237e-06,
      "loss": 0.7998,
      "step": 3550
    },
    {
      "epoch": 0.7674519126864059,
      "grad_norm": 0.8784067630767822,
      "learning_rate": 5.409898259778612e-06,
      "loss": 0.822,
      "step": 3551
    },
    {
      "epoch": 0.7676680354441323,
      "grad_norm": 1.0079318284988403,
      "learning_rate": 5.400326202113107e-06,
      "loss": 0.8605,
      "step": 3552
    },
    {
      "epoch": 0.7678841582018586,
      "grad_norm": 1.1107678413391113,
      "learning_rate": 5.390761298254019e-06,
      "loss": 0.9658,
      "step": 3553
    },
    {
      "epoch": 0.768100280959585,
      "grad_norm": 0.952477753162384,
      "learning_rate": 5.381203552888128e-06,
      "loss": 0.851,
      "step": 3554
    },
    {
      "epoch": 0.7683164037173115,
      "grad_norm": 1.0495015382766724,
      "learning_rate": 5.371652970698697e-06,
      "loss": 1.0494,
      "step": 3555
    },
    {
      "epoch": 0.7685325264750378,
      "grad_norm": 1.0040615797042847,
      "learning_rate": 5.362109556365496e-06,
      "loss": 0.951,
      "step": 3556
    },
    {
      "epoch": 0.7687486492327642,
      "grad_norm": 1.0023738145828247,
      "learning_rate": 5.352573314564768e-06,
      "loss": 0.8326,
      "step": 3557
    },
    {
      "epoch": 0.7689647719904906,
      "grad_norm": 0.9427552819252014,
      "learning_rate": 5.343044249969263e-06,
      "loss": 0.9219,
      "step": 3558
    },
    {
      "epoch": 0.7691808947482169,
      "grad_norm": 1.0693159103393555,
      "learning_rate": 5.333522367248189e-06,
      "loss": 0.8595,
      "step": 3559
    },
    {
      "epoch": 0.7693970175059434,
      "grad_norm": 0.8929608464241028,
      "learning_rate": 5.324007671067262e-06,
      "loss": 0.8378,
      "step": 3560
    },
    {
      "epoch": 0.7696131402636698,
      "grad_norm": 1.0509757995605469,
      "learning_rate": 5.3145001660886366e-06,
      "loss": 0.8896,
      "step": 3561
    },
    {
      "epoch": 0.7698292630213962,
      "grad_norm": 1.0412158966064453,
      "learning_rate": 5.304999856970987e-06,
      "loss": 0.8417,
      "step": 3562
    },
    {
      "epoch": 0.7700453857791225,
      "grad_norm": 1.0653715133666992,
      "learning_rate": 5.295506748369437e-06,
      "loss": 0.9056,
      "step": 3563
    },
    {
      "epoch": 0.7702615085368489,
      "grad_norm": 0.8758518695831299,
      "learning_rate": 5.286020844935591e-06,
      "loss": 0.9891,
      "step": 3564
    },
    {
      "epoch": 0.7704776312945754,
      "grad_norm": 1.0536704063415527,
      "learning_rate": 5.276542151317514e-06,
      "loss": 0.9429,
      "step": 3565
    },
    {
      "epoch": 0.7706937540523017,
      "grad_norm": 0.942868709564209,
      "learning_rate": 5.267070672159749e-06,
      "loss": 0.9326,
      "step": 3566
    },
    {
      "epoch": 0.7709098768100281,
      "grad_norm": 0.9325775504112244,
      "learning_rate": 5.257606412103298e-06,
      "loss": 0.8711,
      "step": 3567
    },
    {
      "epoch": 0.7711259995677545,
      "grad_norm": 0.8976432085037231,
      "learning_rate": 5.248149375785623e-06,
      "loss": 0.7371,
      "step": 3568
    },
    {
      "epoch": 0.7713421223254808,
      "grad_norm": 0.881286084651947,
      "learning_rate": 5.238699567840655e-06,
      "loss": 0.947,
      "step": 3569
    },
    {
      "epoch": 0.7715582450832073,
      "grad_norm": 0.9576743245124817,
      "learning_rate": 5.229256992898768e-06,
      "loss": 0.9322,
      "step": 3570
    },
    {
      "epoch": 0.7717743678409337,
      "grad_norm": 0.9686759114265442,
      "learning_rate": 5.219821655586821e-06,
      "loss": 0.9093,
      "step": 3571
    },
    {
      "epoch": 0.77199049059866,
      "grad_norm": 1.0285251140594482,
      "learning_rate": 5.210393560528091e-06,
      "loss": 0.9755,
      "step": 3572
    },
    {
      "epoch": 0.7722066133563864,
      "grad_norm": 0.9954254627227783,
      "learning_rate": 5.200972712342327e-06,
      "loss": 0.9602,
      "step": 3573
    },
    {
      "epoch": 0.7724227361141128,
      "grad_norm": 0.9787831902503967,
      "learning_rate": 5.191559115645723e-06,
      "loss": 0.8698,
      "step": 3574
    },
    {
      "epoch": 0.7726388588718393,
      "grad_norm": 0.911556601524353,
      "learning_rate": 5.182152775050917e-06,
      "loss": 0.8634,
      "step": 3575
    },
    {
      "epoch": 0.7728549816295656,
      "grad_norm": 0.9864009022712708,
      "learning_rate": 5.172753695167001e-06,
      "loss": 0.8141,
      "step": 3576
    },
    {
      "epoch": 0.773071104387292,
      "grad_norm": 0.9081274271011353,
      "learning_rate": 5.163361880599505e-06,
      "loss": 0.9335,
      "step": 3577
    },
    {
      "epoch": 0.7732872271450184,
      "grad_norm": 0.9300902485847473,
      "learning_rate": 5.153977335950384e-06,
      "loss": 0.989,
      "step": 3578
    },
    {
      "epoch": 0.7735033499027447,
      "grad_norm": 1.0700031518936157,
      "learning_rate": 5.144600065818044e-06,
      "loss": 1.0278,
      "step": 3579
    },
    {
      "epoch": 0.7737194726604711,
      "grad_norm": 1.109338402748108,
      "learning_rate": 5.1352300747973375e-06,
      "loss": 0.8681,
      "step": 3580
    },
    {
      "epoch": 0.7739355954181976,
      "grad_norm": 1.0578832626342773,
      "learning_rate": 5.125867367479531e-06,
      "loss": 1.0099,
      "step": 3581
    },
    {
      "epoch": 0.7741517181759239,
      "grad_norm": 0.8836191296577454,
      "learning_rate": 5.11651194845233e-06,
      "loss": 0.9081,
      "step": 3582
    },
    {
      "epoch": 0.7743678409336503,
      "grad_norm": 0.9299150109291077,
      "learning_rate": 5.10716382229987e-06,
      "loss": 0.9009,
      "step": 3583
    },
    {
      "epoch": 0.7745839636913767,
      "grad_norm": 1.2958821058273315,
      "learning_rate": 5.0978229936027076e-06,
      "loss": 1.021,
      "step": 3584
    },
    {
      "epoch": 0.7748000864491031,
      "grad_norm": 0.9406410455703735,
      "learning_rate": 5.088489466937832e-06,
      "loss": 0.8438,
      "step": 3585
    },
    {
      "epoch": 0.7750162092068295,
      "grad_norm": 1.2616820335388184,
      "learning_rate": 5.0791632468786445e-06,
      "loss": 0.9295,
      "step": 3586
    },
    {
      "epoch": 0.7752323319645559,
      "grad_norm": 0.8569998741149902,
      "learning_rate": 5.069844337994976e-06,
      "loss": 0.7257,
      "step": 3587
    },
    {
      "epoch": 0.7754484547222823,
      "grad_norm": 1.0046062469482422,
      "learning_rate": 5.0605327448530616e-06,
      "loss": 0.9712,
      "step": 3588
    },
    {
      "epoch": 0.7756645774800086,
      "grad_norm": 1.2030833959579468,
      "learning_rate": 5.0512284720155794e-06,
      "loss": 0.9328,
      "step": 3589
    },
    {
      "epoch": 0.775880700237735,
      "grad_norm": 0.9053451418876648,
      "learning_rate": 5.041931524041584e-06,
      "loss": 0.7339,
      "step": 3590
    },
    {
      "epoch": 0.7760968229954615,
      "grad_norm": 1.0405101776123047,
      "learning_rate": 5.032641905486562e-06,
      "loss": 0.9354,
      "step": 3591
    },
    {
      "epoch": 0.7763129457531878,
      "grad_norm": 1.0439729690551758,
      "learning_rate": 5.023359620902408e-06,
      "loss": 0.9671,
      "step": 3592
    },
    {
      "epoch": 0.7765290685109142,
      "grad_norm": 0.9951745867729187,
      "learning_rate": 5.014084674837414e-06,
      "loss": 0.7272,
      "step": 3593
    },
    {
      "epoch": 0.7767451912686406,
      "grad_norm": 0.9379323720932007,
      "learning_rate": 5.0048170718362965e-06,
      "loss": 0.8559,
      "step": 3594
    },
    {
      "epoch": 0.7769613140263669,
      "grad_norm": 0.9604951739311218,
      "learning_rate": 4.9955568164401456e-06,
      "loss": 0.7323,
      "step": 3595
    },
    {
      "epoch": 0.7771774367840933,
      "grad_norm": 0.9695637822151184,
      "learning_rate": 4.986303913186468e-06,
      "loss": 0.906,
      "step": 3596
    },
    {
      "epoch": 0.7773935595418198,
      "grad_norm": 1.0083304643630981,
      "learning_rate": 4.9770583666091625e-06,
      "loss": 0.967,
      "step": 3597
    },
    {
      "epoch": 0.7776096822995462,
      "grad_norm": 1.0379058122634888,
      "learning_rate": 4.967820181238532e-06,
      "loss": 0.9315,
      "step": 3598
    },
    {
      "epoch": 0.7778258050572725,
      "grad_norm": 1.0539214611053467,
      "learning_rate": 4.958589361601265e-06,
      "loss": 0.9961,
      "step": 3599
    },
    {
      "epoch": 0.7780419278149989,
      "grad_norm": 0.9680747985839844,
      "learning_rate": 4.9493659122204475e-06,
      "loss": 0.7478,
      "step": 3600
    },
    {
      "epoch": 0.7782580505727253,
      "grad_norm": 1.0678519010543823,
      "learning_rate": 4.940149837615527e-06,
      "loss": 0.9473,
      "step": 3601
    },
    {
      "epoch": 0.7784741733304517,
      "grad_norm": 1.0017139911651611,
      "learning_rate": 4.930941142302379e-06,
      "loss": 0.8501,
      "step": 3602
    },
    {
      "epoch": 0.7786902960881781,
      "grad_norm": 0.981833815574646,
      "learning_rate": 4.9217398307932376e-06,
      "loss": 0.7998,
      "step": 3603
    },
    {
      "epoch": 0.7789064188459045,
      "grad_norm": 1.018774151802063,
      "learning_rate": 4.912545907596722e-06,
      "loss": 0.7805,
      "step": 3604
    },
    {
      "epoch": 0.7791225416036308,
      "grad_norm": 1.044105887413025,
      "learning_rate": 4.9033593772178355e-06,
      "loss": 0.9452,
      "step": 3605
    },
    {
      "epoch": 0.7793386643613572,
      "grad_norm": 0.9681096076965332,
      "learning_rate": 4.894180244157956e-06,
      "loss": 0.7941,
      "step": 3606
    },
    {
      "epoch": 0.7795547871190837,
      "grad_norm": 1.0327109098434448,
      "learning_rate": 4.885008512914837e-06,
      "loss": 0.9058,
      "step": 3607
    },
    {
      "epoch": 0.7797709098768101,
      "grad_norm": 1.0944807529449463,
      "learning_rate": 4.875844187982606e-06,
      "loss": 1.0559,
      "step": 3608
    },
    {
      "epoch": 0.7799870326345364,
      "grad_norm": 1.0139321088790894,
      "learning_rate": 4.8666872738517605e-06,
      "loss": 0.9443,
      "step": 3609
    },
    {
      "epoch": 0.7802031553922628,
      "grad_norm": 0.9465548992156982,
      "learning_rate": 4.85753777500916e-06,
      "loss": 1.0223,
      "step": 3610
    },
    {
      "epoch": 0.7804192781499892,
      "grad_norm": 0.9195845127105713,
      "learning_rate": 4.8483956959380595e-06,
      "loss": 0.7036,
      "step": 3611
    },
    {
      "epoch": 0.7806354009077155,
      "grad_norm": 1.0407829284667969,
      "learning_rate": 4.839261041118035e-06,
      "loss": 0.8927,
      "step": 3612
    },
    {
      "epoch": 0.780851523665442,
      "grad_norm": 0.9261695146560669,
      "learning_rate": 4.830133815025055e-06,
      "loss": 0.7521,
      "step": 3613
    },
    {
      "epoch": 0.7810676464231684,
      "grad_norm": 0.9677841067314148,
      "learning_rate": 4.821014022131439e-06,
      "loss": 0.9074,
      "step": 3614
    },
    {
      "epoch": 0.7812837691808947,
      "grad_norm": 0.9663761854171753,
      "learning_rate": 4.811901666905856e-06,
      "loss": 0.9172,
      "step": 3615
    },
    {
      "epoch": 0.7814998919386211,
      "grad_norm": 0.9527883529663086,
      "learning_rate": 4.802796753813353e-06,
      "loss": 0.9255,
      "step": 3616
    },
    {
      "epoch": 0.7817160146963475,
      "grad_norm": 1.0191152095794678,
      "learning_rate": 4.793699287315314e-06,
      "loss": 0.8335,
      "step": 3617
    },
    {
      "epoch": 0.7819321374540739,
      "grad_norm": 1.1564364433288574,
      "learning_rate": 4.784609271869469e-06,
      "loss": 0.8629,
      "step": 3618
    },
    {
      "epoch": 0.7821482602118003,
      "grad_norm": 1.0203686952590942,
      "learning_rate": 4.775526711929901e-06,
      "loss": 0.8993,
      "step": 3619
    },
    {
      "epoch": 0.7823643829695267,
      "grad_norm": 1.1355894804000854,
      "learning_rate": 4.7664516119470565e-06,
      "loss": 0.8992,
      "step": 3620
    },
    {
      "epoch": 0.7825805057272531,
      "grad_norm": 0.9424318671226501,
      "learning_rate": 4.7573839763677045e-06,
      "loss": 0.976,
      "step": 3621
    },
    {
      "epoch": 0.7827966284849794,
      "grad_norm": 1.0141879320144653,
      "learning_rate": 4.748323809634972e-06,
      "loss": 1.0126,
      "step": 3622
    },
    {
      "epoch": 0.7830127512427059,
      "grad_norm": 0.9914786219596863,
      "learning_rate": 4.7392711161883136e-06,
      "loss": 0.968,
      "step": 3623
    },
    {
      "epoch": 0.7832288740004323,
      "grad_norm": 1.0773526430130005,
      "learning_rate": 4.73022590046353e-06,
      "loss": 1.0232,
      "step": 3624
    },
    {
      "epoch": 0.7834449967581586,
      "grad_norm": 1.0062161684036255,
      "learning_rate": 4.721188166892759e-06,
      "loss": 0.931,
      "step": 3625
    },
    {
      "epoch": 0.783661119515885,
      "grad_norm": 1.0620781183242798,
      "learning_rate": 4.712157919904465e-06,
      "loss": 0.8005,
      "step": 3626
    },
    {
      "epoch": 0.7838772422736114,
      "grad_norm": 1.038500189781189,
      "learning_rate": 4.703135163923451e-06,
      "loss": 0.825,
      "step": 3627
    },
    {
      "epoch": 0.7840933650313378,
      "grad_norm": 0.9993885159492493,
      "learning_rate": 4.694119903370837e-06,
      "loss": 0.803,
      "step": 3628
    },
    {
      "epoch": 0.7843094877890642,
      "grad_norm": 1.0093708038330078,
      "learning_rate": 4.685112142664103e-06,
      "loss": 1.0174,
      "step": 3629
    },
    {
      "epoch": 0.7845256105467906,
      "grad_norm": 0.9287478923797607,
      "learning_rate": 4.67611188621701e-06,
      "loss": 0.9034,
      "step": 3630
    },
    {
      "epoch": 0.784741733304517,
      "grad_norm": 0.9693188667297363,
      "learning_rate": 4.667119138439669e-06,
      "loss": 0.906,
      "step": 3631
    },
    {
      "epoch": 0.7849578560622433,
      "grad_norm": 0.9690222144126892,
      "learning_rate": 4.6581339037385045e-06,
      "loss": 0.7174,
      "step": 3632
    },
    {
      "epoch": 0.7851739788199698,
      "grad_norm": 1.038748025894165,
      "learning_rate": 4.649156186516255e-06,
      "loss": 0.8444,
      "step": 3633
    },
    {
      "epoch": 0.7853901015776962,
      "grad_norm": 1.1169486045837402,
      "learning_rate": 4.640185991172002e-06,
      "loss": 0.9354,
      "step": 3634
    },
    {
      "epoch": 0.7856062243354225,
      "grad_norm": 0.9531453251838684,
      "learning_rate": 4.6312233221011e-06,
      "loss": 0.8381,
      "step": 3635
    },
    {
      "epoch": 0.7858223470931489,
      "grad_norm": 0.9114736318588257,
      "learning_rate": 4.622268183695242e-06,
      "loss": 0.9621,
      "step": 3636
    },
    {
      "epoch": 0.7860384698508753,
      "grad_norm": 0.9040265083312988,
      "learning_rate": 4.613320580342422e-06,
      "loss": 0.8006,
      "step": 3637
    },
    {
      "epoch": 0.7862545926086016,
      "grad_norm": 0.9460227489471436,
      "learning_rate": 4.6043805164269516e-06,
      "loss": 0.8082,
      "step": 3638
    },
    {
      "epoch": 0.7864707153663281,
      "grad_norm": 1.059830665588379,
      "learning_rate": 4.595447996329441e-06,
      "loss": 0.8074,
      "step": 3639
    },
    {
      "epoch": 0.7866868381240545,
      "grad_norm": 0.9420005679130554,
      "learning_rate": 4.586523024426808e-06,
      "loss": 0.8067,
      "step": 3640
    },
    {
      "epoch": 0.7869029608817808,
      "grad_norm": 0.9644083976745605,
      "learning_rate": 4.577605605092248e-06,
      "loss": 0.8934,
      "step": 3641
    },
    {
      "epoch": 0.7871190836395072,
      "grad_norm": 1.048953652381897,
      "learning_rate": 4.568695742695297e-06,
      "loss": 1.0489,
      "step": 3642
    },
    {
      "epoch": 0.7873352063972336,
      "grad_norm": 1.144123911857605,
      "learning_rate": 4.559793441601761e-06,
      "loss": 1.1039,
      "step": 3643
    },
    {
      "epoch": 0.7875513291549601,
      "grad_norm": 0.9493755102157593,
      "learning_rate": 4.550898706173745e-06,
      "loss": 0.9291,
      "step": 3644
    },
    {
      "epoch": 0.7877674519126864,
      "grad_norm": 1.1302835941314697,
      "learning_rate": 4.54201154076965e-06,
      "loss": 0.8519,
      "step": 3645
    },
    {
      "epoch": 0.7879835746704128,
      "grad_norm": 1.1162554025650024,
      "learning_rate": 4.533131949744167e-06,
      "loss": 0.795,
      "step": 3646
    },
    {
      "epoch": 0.7881996974281392,
      "grad_norm": 0.9301594495773315,
      "learning_rate": 4.524259937448274e-06,
      "loss": 0.9605,
      "step": 3647
    },
    {
      "epoch": 0.7884158201858655,
      "grad_norm": 1.05987548828125,
      "learning_rate": 4.515395508229239e-06,
      "loss": 1.0865,
      "step": 3648
    },
    {
      "epoch": 0.788631942943592,
      "grad_norm": 0.9806153178215027,
      "learning_rate": 4.506538666430606e-06,
      "loss": 0.9356,
      "step": 3649
    },
    {
      "epoch": 0.7888480657013184,
      "grad_norm": 1.012948751449585,
      "learning_rate": 4.4976894163922126e-06,
      "loss": 0.9,
      "step": 3650
    },
    {
      "epoch": 0.7890641884590447,
      "grad_norm": 0.9189282059669495,
      "learning_rate": 4.4888477624501704e-06,
      "loss": 0.9075,
      "step": 3651
    },
    {
      "epoch": 0.7892803112167711,
      "grad_norm": 0.9508541822433472,
      "learning_rate": 4.4800137089368655e-06,
      "loss": 0.9728,
      "step": 3652
    },
    {
      "epoch": 0.7894964339744975,
      "grad_norm": 1.013681411743164,
      "learning_rate": 4.471187260180967e-06,
      "loss": 0.9852,
      "step": 3653
    },
    {
      "epoch": 0.789712556732224,
      "grad_norm": 1.008089303970337,
      "learning_rate": 4.462368420507414e-06,
      "loss": 0.8611,
      "step": 3654
    },
    {
      "epoch": 0.7899286794899503,
      "grad_norm": 0.8755045533180237,
      "learning_rate": 4.453557194237413e-06,
      "loss": 0.7341,
      "step": 3655
    },
    {
      "epoch": 0.7901448022476767,
      "grad_norm": 0.9966463446617126,
      "learning_rate": 4.4447535856884505e-06,
      "loss": 0.8665,
      "step": 3656
    },
    {
      "epoch": 0.7903609250054031,
      "grad_norm": 0.9538276791572571,
      "learning_rate": 4.435957599174281e-06,
      "loss": 0.8991,
      "step": 3657
    },
    {
      "epoch": 0.7905770477631294,
      "grad_norm": 1.0445584058761597,
      "learning_rate": 4.427169239004902e-06,
      "loss": 0.9756,
      "step": 3658
    },
    {
      "epoch": 0.7907931705208558,
      "grad_norm": 0.9147434830665588,
      "learning_rate": 4.41838850948659e-06,
      "loss": 0.9259,
      "step": 3659
    },
    {
      "epoch": 0.7910092932785823,
      "grad_norm": 1.0846123695373535,
      "learning_rate": 4.4096154149218974e-06,
      "loss": 0.9764,
      "step": 3660
    },
    {
      "epoch": 0.7912254160363086,
      "grad_norm": 1.015160083770752,
      "learning_rate": 4.4008499596096095e-06,
      "loss": 1.1893,
      "step": 3661
    },
    {
      "epoch": 0.791441538794035,
      "grad_norm": 0.9260875582695007,
      "learning_rate": 4.392092147844782e-06,
      "loss": 0.9678,
      "step": 3662
    },
    {
      "epoch": 0.7916576615517614,
      "grad_norm": 0.9385184049606323,
      "learning_rate": 4.383341983918723e-06,
      "loss": 0.8045,
      "step": 3663
    },
    {
      "epoch": 0.7918737843094877,
      "grad_norm": 0.9274015426635742,
      "learning_rate": 4.37459947211899e-06,
      "loss": 0.9444,
      "step": 3664
    },
    {
      "epoch": 0.7920899070672142,
      "grad_norm": 0.9285722374916077,
      "learning_rate": 4.365864616729396e-06,
      "loss": 0.892,
      "step": 3665
    },
    {
      "epoch": 0.7923060298249406,
      "grad_norm": 0.9584118723869324,
      "learning_rate": 4.3571374220299974e-06,
      "loss": 0.9126,
      "step": 3666
    },
    {
      "epoch": 0.792522152582667,
      "grad_norm": 0.9379822611808777,
      "learning_rate": 4.348417892297101e-06,
      "loss": 0.887,
      "step": 3667
    },
    {
      "epoch": 0.7927382753403933,
      "grad_norm": 1.0396922826766968,
      "learning_rate": 4.339706031803252e-06,
      "loss": 0.9026,
      "step": 3668
    },
    {
      "epoch": 0.7929543980981197,
      "grad_norm": 1.0380027294158936,
      "learning_rate": 4.331001844817257e-06,
      "loss": 0.9497,
      "step": 3669
    },
    {
      "epoch": 0.7931705208558462,
      "grad_norm": 1.0886414051055908,
      "learning_rate": 4.3223053356041315e-06,
      "loss": 0.918,
      "step": 3670
    },
    {
      "epoch": 0.7933866436135725,
      "grad_norm": 1.024379849433899,
      "learning_rate": 4.313616508425147e-06,
      "loss": 1.1016,
      "step": 3671
    },
    {
      "epoch": 0.7936027663712989,
      "grad_norm": 1.0421767234802246,
      "learning_rate": 4.304935367537814e-06,
      "loss": 1.086,
      "step": 3672
    },
    {
      "epoch": 0.7938188891290253,
      "grad_norm": 0.8832529783248901,
      "learning_rate": 4.296261917195863e-06,
      "loss": 0.7619,
      "step": 3673
    },
    {
      "epoch": 0.7940350118867516,
      "grad_norm": 0.9886100888252258,
      "learning_rate": 4.287596161649283e-06,
      "loss": 0.8841,
      "step": 3674
    },
    {
      "epoch": 0.794251134644478,
      "grad_norm": 1.0766829252243042,
      "learning_rate": 4.278938105144255e-06,
      "loss": 1.0098,
      "step": 3675
    },
    {
      "epoch": 0.7944672574022045,
      "grad_norm": 1.0706645250320435,
      "learning_rate": 4.270287751923215e-06,
      "loss": 0.9871,
      "step": 3676
    },
    {
      "epoch": 0.7946833801599308,
      "grad_norm": 0.9385272264480591,
      "learning_rate": 4.2616451062248075e-06,
      "loss": 0.8735,
      "step": 3677
    },
    {
      "epoch": 0.7948995029176572,
      "grad_norm": 1.014313817024231,
      "learning_rate": 4.253010172283923e-06,
      "loss": 1.1954,
      "step": 3678
    },
    {
      "epoch": 0.7951156256753836,
      "grad_norm": 0.9842823147773743,
      "learning_rate": 4.244382954331652e-06,
      "loss": 0.9118,
      "step": 3679
    },
    {
      "epoch": 0.79533174843311,
      "grad_norm": 1.0659067630767822,
      "learning_rate": 4.2357634565953165e-06,
      "loss": 0.821,
      "step": 3680
    },
    {
      "epoch": 0.7955478711908364,
      "grad_norm": 1.0056623220443726,
      "learning_rate": 4.2271516832984335e-06,
      "loss": 0.9094,
      "step": 3681
    },
    {
      "epoch": 0.7957639939485628,
      "grad_norm": 1.0319385528564453,
      "learning_rate": 4.218547638660773e-06,
      "loss": 0.9341,
      "step": 3682
    },
    {
      "epoch": 0.7959801167062892,
      "grad_norm": 0.9862573146820068,
      "learning_rate": 4.209951326898285e-06,
      "loss": 0.968,
      "step": 3683
    },
    {
      "epoch": 0.7961962394640155,
      "grad_norm": 0.9928536415100098,
      "learning_rate": 4.201362752223146e-06,
      "loss": 0.8657,
      "step": 3684
    },
    {
      "epoch": 0.7964123622217419,
      "grad_norm": 1.0648728609085083,
      "learning_rate": 4.192781918843738e-06,
      "loss": 0.8685,
      "step": 3685
    },
    {
      "epoch": 0.7966284849794684,
      "grad_norm": 1.1425668001174927,
      "learning_rate": 4.184208830964649e-06,
      "loss": 1.0023,
      "step": 3686
    },
    {
      "epoch": 0.7968446077371947,
      "grad_norm": 1.02164626121521,
      "learning_rate": 4.175643492786672e-06,
      "loss": 0.8831,
      "step": 3687
    },
    {
      "epoch": 0.7970607304949211,
      "grad_norm": 1.0058878660202026,
      "learning_rate": 4.167085908506803e-06,
      "loss": 0.982,
      "step": 3688
    },
    {
      "epoch": 0.7972768532526475,
      "grad_norm": 0.8938169479370117,
      "learning_rate": 4.1585360823182365e-06,
      "loss": 0.8402,
      "step": 3689
    },
    {
      "epoch": 0.7974929760103739,
      "grad_norm": 1.107102394104004,
      "learning_rate": 4.149994018410372e-06,
      "loss": 0.9722,
      "step": 3690
    },
    {
      "epoch": 0.7977090987681003,
      "grad_norm": 1.0572689771652222,
      "learning_rate": 4.141459720968793e-06,
      "loss": 1.1449,
      "step": 3691
    },
    {
      "epoch": 0.7979252215258267,
      "grad_norm": 0.9329217672348022,
      "learning_rate": 4.132933194175299e-06,
      "loss": 0.871,
      "step": 3692
    },
    {
      "epoch": 0.7981413442835531,
      "grad_norm": 1.038081169128418,
      "learning_rate": 4.124414442207858e-06,
      "loss": 0.8993,
      "step": 3693
    },
    {
      "epoch": 0.7983574670412794,
      "grad_norm": 1.0002672672271729,
      "learning_rate": 4.115903469240641e-06,
      "loss": 0.885,
      "step": 3694
    },
    {
      "epoch": 0.7985735897990058,
      "grad_norm": 1.032814860343933,
      "learning_rate": 4.107400279443998e-06,
      "loss": 0.8953,
      "step": 3695
    },
    {
      "epoch": 0.7987897125567323,
      "grad_norm": 0.975195050239563,
      "learning_rate": 4.098904876984486e-06,
      "loss": 0.7751,
      "step": 3696
    },
    {
      "epoch": 0.7990058353144586,
      "grad_norm": 1.104385256767273,
      "learning_rate": 4.090417266024833e-06,
      "loss": 0.9905,
      "step": 3697
    },
    {
      "epoch": 0.799221958072185,
      "grad_norm": 1.1246614456176758,
      "learning_rate": 4.081937450723936e-06,
      "loss": 0.9624,
      "step": 3698
    },
    {
      "epoch": 0.7994380808299114,
      "grad_norm": 1.0041158199310303,
      "learning_rate": 4.073465435236886e-06,
      "loss": 0.9519,
      "step": 3699
    },
    {
      "epoch": 0.7996542035876377,
      "grad_norm": 1.0940678119659424,
      "learning_rate": 4.065001223714959e-06,
      "loss": 0.8947,
      "step": 3700
    },
    {
      "epoch": 0.7998703263453641,
      "grad_norm": 1.1004393100738525,
      "learning_rate": 4.056544820305597e-06,
      "loss": 0.9583,
      "step": 3701
    },
    {
      "epoch": 0.8000864491030906,
      "grad_norm": 1.0078845024108887,
      "learning_rate": 4.0480962291524185e-06,
      "loss": 0.9829,
      "step": 3702
    },
    {
      "epoch": 0.800302571860817,
      "grad_norm": 1.1241837739944458,
      "learning_rate": 4.03965545439521e-06,
      "loss": 0.9297,
      "step": 3703
    },
    {
      "epoch": 0.8005186946185433,
      "grad_norm": 0.9753207564353943,
      "learning_rate": 4.0312225001699355e-06,
      "loss": 0.8199,
      "step": 3704
    },
    {
      "epoch": 0.8007348173762697,
      "grad_norm": 0.9606614112854004,
      "learning_rate": 4.022797370608722e-06,
      "loss": 0.8788,
      "step": 3705
    },
    {
      "epoch": 0.8009509401339961,
      "grad_norm": 1.0398199558258057,
      "learning_rate": 4.014380069839861e-06,
      "loss": 0.8983,
      "step": 3706
    },
    {
      "epoch": 0.8011670628917225,
      "grad_norm": 1.0209386348724365,
      "learning_rate": 4.005970601987814e-06,
      "loss": 0.7962,
      "step": 3707
    },
    {
      "epoch": 0.8013831856494489,
      "grad_norm": 0.9006760716438293,
      "learning_rate": 3.997568971173198e-06,
      "loss": 0.8473,
      "step": 3708
    },
    {
      "epoch": 0.8015993084071753,
      "grad_norm": 0.9175861477851868,
      "learning_rate": 3.989175181512794e-06,
      "loss": 0.9721,
      "step": 3709
    },
    {
      "epoch": 0.8018154311649016,
      "grad_norm": 0.9091984033584595,
      "learning_rate": 3.98078923711954e-06,
      "loss": 0.9827,
      "step": 3710
    },
    {
      "epoch": 0.802031553922628,
      "grad_norm": 1.1166619062423706,
      "learning_rate": 3.972411142102528e-06,
      "loss": 0.9531,
      "step": 3711
    },
    {
      "epoch": 0.8022476766803545,
      "grad_norm": 1.0179290771484375,
      "learning_rate": 3.9640409005670075e-06,
      "loss": 1.0223,
      "step": 3712
    },
    {
      "epoch": 0.8024637994380809,
      "grad_norm": 1.015807032585144,
      "learning_rate": 3.95567851661437e-06,
      "loss": 0.8241,
      "step": 3713
    },
    {
      "epoch": 0.8026799221958072,
      "grad_norm": 0.9998385906219482,
      "learning_rate": 3.947323994342178e-06,
      "loss": 0.6707,
      "step": 3714
    },
    {
      "epoch": 0.8028960449535336,
      "grad_norm": 1.1686913967132568,
      "learning_rate": 3.9389773378441185e-06,
      "loss": 1.0828,
      "step": 3715
    },
    {
      "epoch": 0.80311216771126,
      "grad_norm": 1.1071698665618896,
      "learning_rate": 3.930638551210035e-06,
      "loss": 0.7809,
      "step": 3716
    },
    {
      "epoch": 0.8033282904689864,
      "grad_norm": 0.9545592069625854,
      "learning_rate": 3.922307638525909e-06,
      "loss": 0.7958,
      "step": 3717
    },
    {
      "epoch": 0.8035444132267128,
      "grad_norm": 1.0194050073623657,
      "learning_rate": 3.913984603873877e-06,
      "loss": 0.9727,
      "step": 3718
    },
    {
      "epoch": 0.8037605359844392,
      "grad_norm": 0.9267217516899109,
      "learning_rate": 3.9056694513322054e-06,
      "loss": 1.0364,
      "step": 3719
    },
    {
      "epoch": 0.8039766587421655,
      "grad_norm": 0.9400733709335327,
      "learning_rate": 3.8973621849753044e-06,
      "loss": 0.7974,
      "step": 3720
    },
    {
      "epoch": 0.8041927814998919,
      "grad_norm": 1.0166336297988892,
      "learning_rate": 3.889062808873698e-06,
      "loss": 0.8838,
      "step": 3721
    },
    {
      "epoch": 0.8044089042576184,
      "grad_norm": 0.8093613982200623,
      "learning_rate": 3.880771327094075e-06,
      "loss": 0.9021,
      "step": 3722
    },
    {
      "epoch": 0.8046250270153447,
      "grad_norm": 0.9725896120071411,
      "learning_rate": 3.8724877436992425e-06,
      "loss": 1.0241,
      "step": 3723
    },
    {
      "epoch": 0.8048411497730711,
      "grad_norm": 0.9414122700691223,
      "learning_rate": 3.864212062748132e-06,
      "loss": 0.8416,
      "step": 3724
    },
    {
      "epoch": 0.8050572725307975,
      "grad_norm": 1.0157009363174438,
      "learning_rate": 3.85594428829581e-06,
      "loss": 0.9229,
      "step": 3725
    },
    {
      "epoch": 0.8052733952885239,
      "grad_norm": 1.0341070890426636,
      "learning_rate": 3.8476844243934695e-06,
      "loss": 0.8497,
      "step": 3726
    },
    {
      "epoch": 0.8054895180462502,
      "grad_norm": 0.9143247604370117,
      "learning_rate": 3.83943247508842e-06,
      "loss": 0.9056,
      "step": 3727
    },
    {
      "epoch": 0.8057056408039767,
      "grad_norm": 1.0246120691299438,
      "learning_rate": 3.831188444424101e-06,
      "loss": 0.7757,
      "step": 3728
    },
    {
      "epoch": 0.8059217635617031,
      "grad_norm": 0.9455540180206299,
      "learning_rate": 3.822952336440067e-06,
      "loss": 0.719,
      "step": 3729
    },
    {
      "epoch": 0.8061378863194294,
      "grad_norm": 1.1429113149642944,
      "learning_rate": 3.8147241551719915e-06,
      "loss": 0.9857,
      "step": 3730
    },
    {
      "epoch": 0.8063540090771558,
      "grad_norm": 1.106581687927246,
      "learning_rate": 3.8065039046516594e-06,
      "loss": 1.0205,
      "step": 3731
    },
    {
      "epoch": 0.8065701318348822,
      "grad_norm": 0.9925294518470764,
      "learning_rate": 3.798291588906993e-06,
      "loss": 0.9074,
      "step": 3732
    },
    {
      "epoch": 0.8067862545926086,
      "grad_norm": 1.0743608474731445,
      "learning_rate": 3.790087211961988e-06,
      "loss": 0.9284,
      "step": 3733
    },
    {
      "epoch": 0.807002377350335,
      "grad_norm": 1.1088292598724365,
      "learning_rate": 3.7818907778367763e-06,
      "loss": 0.798,
      "step": 3734
    },
    {
      "epoch": 0.8072185001080614,
      "grad_norm": 1.0600242614746094,
      "learning_rate": 3.7737022905475895e-06,
      "loss": 0.9915,
      "step": 3735
    },
    {
      "epoch": 0.8074346228657878,
      "grad_norm": 1.0275952816009521,
      "learning_rate": 3.765521754106776e-06,
      "loss": 0.9083,
      "step": 3736
    },
    {
      "epoch": 0.8076507456235141,
      "grad_norm": 1.030941367149353,
      "learning_rate": 3.7573491725227774e-06,
      "loss": 0.7311,
      "step": 3737
    },
    {
      "epoch": 0.8078668683812406,
      "grad_norm": 1.1321561336517334,
      "learning_rate": 3.7491845498001334e-06,
      "loss": 0.849,
      "step": 3738
    },
    {
      "epoch": 0.808082991138967,
      "grad_norm": 1.012608289718628,
      "learning_rate": 3.741027889939486e-06,
      "loss": 1.1026,
      "step": 3739
    },
    {
      "epoch": 0.8082991138966933,
      "grad_norm": 0.9792577028274536,
      "learning_rate": 3.7328791969375954e-06,
      "loss": 0.7981,
      "step": 3740
    },
    {
      "epoch": 0.8085152366544197,
      "grad_norm": 0.9109253883361816,
      "learning_rate": 3.7247384747872927e-06,
      "loss": 0.9318,
      "step": 3741
    },
    {
      "epoch": 0.8087313594121461,
      "grad_norm": 0.9116306900978088,
      "learning_rate": 3.7166057274775134e-06,
      "loss": 0.8796,
      "step": 3742
    },
    {
      "epoch": 0.8089474821698724,
      "grad_norm": 0.9492202401161194,
      "learning_rate": 3.708480958993286e-06,
      "loss": 0.7395,
      "step": 3743
    },
    {
      "epoch": 0.8091636049275989,
      "grad_norm": 1.0096417665481567,
      "learning_rate": 3.700364173315729e-06,
      "loss": 0.9314,
      "step": 3744
    },
    {
      "epoch": 0.8093797276853253,
      "grad_norm": 1.0050616264343262,
      "learning_rate": 3.692255374422049e-06,
      "loss": 1.0423,
      "step": 3745
    },
    {
      "epoch": 0.8095958504430516,
      "grad_norm": 1.1089311838150024,
      "learning_rate": 3.684154566285536e-06,
      "loss": 0.9018,
      "step": 3746
    },
    {
      "epoch": 0.809811973200778,
      "grad_norm": 1.2634425163269043,
      "learning_rate": 3.6760617528755682e-06,
      "loss": 0.9003,
      "step": 3747
    },
    {
      "epoch": 0.8100280959585044,
      "grad_norm": 0.9780515432357788,
      "learning_rate": 3.667976938157607e-06,
      "loss": 1.0411,
      "step": 3748
    },
    {
      "epoch": 0.8102442187162309,
      "grad_norm": 1.1175509691238403,
      "learning_rate": 3.65990012609319e-06,
      "loss": 1.0888,
      "step": 3749
    },
    {
      "epoch": 0.8104603414739572,
      "grad_norm": 0.9692811369895935,
      "learning_rate": 3.65183132063994e-06,
      "loss": 0.9398,
      "step": 3750
    },
    {
      "epoch": 0.8106764642316836,
      "grad_norm": 0.9459337592124939,
      "learning_rate": 3.643770525751551e-06,
      "loss": 0.8167,
      "step": 3751
    },
    {
      "epoch": 0.81089258698941,
      "grad_norm": 0.9116824865341187,
      "learning_rate": 3.635717745377796e-06,
      "loss": 0.7984,
      "step": 3752
    },
    {
      "epoch": 0.8111087097471363,
      "grad_norm": 0.8943130970001221,
      "learning_rate": 3.62767298346451e-06,
      "loss": 0.8053,
      "step": 3753
    },
    {
      "epoch": 0.8113248325048628,
      "grad_norm": 1.1531084775924683,
      "learning_rate": 3.6196362439536192e-06,
      "loss": 0.8493,
      "step": 3754
    },
    {
      "epoch": 0.8115409552625892,
      "grad_norm": 1.037761926651001,
      "learning_rate": 3.61160753078311e-06,
      "loss": 0.9401,
      "step": 3755
    },
    {
      "epoch": 0.8117570780203155,
      "grad_norm": 1.0017485618591309,
      "learning_rate": 3.6035868478870196e-06,
      "loss": 0.822,
      "step": 3756
    },
    {
      "epoch": 0.8119732007780419,
      "grad_norm": 1.1072742938995361,
      "learning_rate": 3.5955741991954664e-06,
      "loss": 0.9453,
      "step": 3757
    },
    {
      "epoch": 0.8121893235357683,
      "grad_norm": 0.9541094899177551,
      "learning_rate": 3.5875695886346386e-06,
      "loss": 1.0183,
      "step": 3758
    },
    {
      "epoch": 0.8124054462934948,
      "grad_norm": 0.9097086787223816,
      "learning_rate": 3.579573020126774e-06,
      "loss": 0.8697,
      "step": 3759
    },
    {
      "epoch": 0.8126215690512211,
      "grad_norm": 0.9351658225059509,
      "learning_rate": 3.5715844975901747e-06,
      "loss": 0.7667,
      "step": 3760
    },
    {
      "epoch": 0.8128376918089475,
      "grad_norm": 0.9204522967338562,
      "learning_rate": 3.5636040249391845e-06,
      "loss": 0.7639,
      "step": 3761
    },
    {
      "epoch": 0.8130538145666739,
      "grad_norm": 0.9401305913925171,
      "learning_rate": 3.555631606084231e-06,
      "loss": 0.9774,
      "step": 3762
    },
    {
      "epoch": 0.8132699373244002,
      "grad_norm": 1.004342794418335,
      "learning_rate": 3.547667244931776e-06,
      "loss": 0.8118,
      "step": 3763
    },
    {
      "epoch": 0.8134860600821266,
      "grad_norm": 0.8848515748977661,
      "learning_rate": 3.5397109453843403e-06,
      "loss": 0.8712,
      "step": 3764
    },
    {
      "epoch": 0.8137021828398531,
      "grad_norm": 1.2648967504501343,
      "learning_rate": 3.5317627113404917e-06,
      "loss": 0.9541,
      "step": 3765
    },
    {
      "epoch": 0.8139183055975794,
      "grad_norm": 1.0908479690551758,
      "learning_rate": 3.523822546694844e-06,
      "loss": 0.9072,
      "step": 3766
    },
    {
      "epoch": 0.8141344283553058,
      "grad_norm": 0.9648228287696838,
      "learning_rate": 3.51589045533806e-06,
      "loss": 0.8983,
      "step": 3767
    },
    {
      "epoch": 0.8143505511130322,
      "grad_norm": 1.0807435512542725,
      "learning_rate": 3.507966441156847e-06,
      "loss": 0.8812,
      "step": 3768
    },
    {
      "epoch": 0.8145666738707585,
      "grad_norm": 1.1944962739944458,
      "learning_rate": 3.5000505080339565e-06,
      "loss": 0.9897,
      "step": 3769
    },
    {
      "epoch": 0.814782796628485,
      "grad_norm": 0.981046199798584,
      "learning_rate": 3.492142659848172e-06,
      "loss": 0.9178,
      "step": 3770
    },
    {
      "epoch": 0.8149989193862114,
      "grad_norm": 1.0109437704086304,
      "learning_rate": 3.4842429004743196e-06,
      "loss": 0.9643,
      "step": 3771
    },
    {
      "epoch": 0.8152150421439378,
      "grad_norm": 1.007354497909546,
      "learning_rate": 3.476351233783277e-06,
      "loss": 0.96,
      "step": 3772
    },
    {
      "epoch": 0.8154311649016641,
      "grad_norm": 1.2974567413330078,
      "learning_rate": 3.4684676636419278e-06,
      "loss": 0.8178,
      "step": 3773
    },
    {
      "epoch": 0.8156472876593905,
      "grad_norm": 1.0700843334197998,
      "learning_rate": 3.460592193913208e-06,
      "loss": 1.0717,
      "step": 3774
    },
    {
      "epoch": 0.815863410417117,
      "grad_norm": 0.9423474073410034,
      "learning_rate": 3.4527248284560754e-06,
      "loss": 0.862,
      "step": 3775
    },
    {
      "epoch": 0.8160795331748433,
      "grad_norm": 1.1047825813293457,
      "learning_rate": 3.4448655711255286e-06,
      "loss": 0.8822,
      "step": 3776
    },
    {
      "epoch": 0.8162956559325697,
      "grad_norm": 0.9060317873954773,
      "learning_rate": 3.437014425772587e-06,
      "loss": 0.8137,
      "step": 3777
    },
    {
      "epoch": 0.8165117786902961,
      "grad_norm": 0.947235643863678,
      "learning_rate": 3.429171396244284e-06,
      "loss": 0.9042,
      "step": 3778
    },
    {
      "epoch": 0.8167279014480224,
      "grad_norm": 1.0349210500717163,
      "learning_rate": 3.421336486383686e-06,
      "loss": 0.9579,
      "step": 3779
    },
    {
      "epoch": 0.8169440242057489,
      "grad_norm": 1.0050138235092163,
      "learning_rate": 3.413509700029891e-06,
      "loss": 0.9266,
      "step": 3780
    },
    {
      "epoch": 0.8171601469634753,
      "grad_norm": 0.9082103371620178,
      "learning_rate": 3.405691041018e-06,
      "loss": 0.8102,
      "step": 3781
    },
    {
      "epoch": 0.8173762697212016,
      "grad_norm": 0.9560586810112,
      "learning_rate": 3.397880513179137e-06,
      "loss": 0.9151,
      "step": 3782
    },
    {
      "epoch": 0.817592392478928,
      "grad_norm": 1.0189166069030762,
      "learning_rate": 3.390078120340445e-06,
      "loss": 0.8613,
      "step": 3783
    },
    {
      "epoch": 0.8178085152366544,
      "grad_norm": 1.002669095993042,
      "learning_rate": 3.382283866325078e-06,
      "loss": 0.8865,
      "step": 3784
    },
    {
      "epoch": 0.8180246379943809,
      "grad_norm": 0.9827417731285095,
      "learning_rate": 3.374497754952202e-06,
      "loss": 1.0344,
      "step": 3785
    },
    {
      "epoch": 0.8182407607521072,
      "grad_norm": 1.049056887626648,
      "learning_rate": 3.366719790036994e-06,
      "loss": 0.9794,
      "step": 3786
    },
    {
      "epoch": 0.8184568835098336,
      "grad_norm": 1.1441189050674438,
      "learning_rate": 3.3589499753906375e-06,
      "loss": 1.0127,
      "step": 3787
    },
    {
      "epoch": 0.81867300626756,
      "grad_norm": 1.0008662939071655,
      "learning_rate": 3.351188314820324e-06,
      "loss": 0.932,
      "step": 3788
    },
    {
      "epoch": 0.8188891290252863,
      "grad_norm": 1.0514990091323853,
      "learning_rate": 3.3434348121292493e-06,
      "loss": 0.9164,
      "step": 3789
    },
    {
      "epoch": 0.8191052517830127,
      "grad_norm": 1.1653443574905396,
      "learning_rate": 3.335689471116612e-06,
      "loss": 1.0593,
      "step": 3790
    },
    {
      "epoch": 0.8193213745407392,
      "grad_norm": 0.9866675734519958,
      "learning_rate": 3.327952295577612e-06,
      "loss": 0.8743,
      "step": 3791
    },
    {
      "epoch": 0.8195374972984655,
      "grad_norm": 0.9063333868980408,
      "learning_rate": 3.320223289303448e-06,
      "loss": 0.7064,
      "step": 3792
    },
    {
      "epoch": 0.8197536200561919,
      "grad_norm": 0.9756568670272827,
      "learning_rate": 3.312502456081308e-06,
      "loss": 0.8558,
      "step": 3793
    },
    {
      "epoch": 0.8199697428139183,
      "grad_norm": 1.0017309188842773,
      "learning_rate": 3.3047897996943947e-06,
      "loss": 0.9177,
      "step": 3794
    },
    {
      "epoch": 0.8201858655716447,
      "grad_norm": 0.8668156862258911,
      "learning_rate": 3.2970853239218916e-06,
      "loss": 0.88,
      "step": 3795
    },
    {
      "epoch": 0.8204019883293711,
      "grad_norm": 0.9589985013008118,
      "learning_rate": 3.289389032538961e-06,
      "loss": 0.9202,
      "step": 3796
    },
    {
      "epoch": 0.8206181110870975,
      "grad_norm": 0.9581915140151978,
      "learning_rate": 3.281700929316771e-06,
      "loss": 0.8171,
      "step": 3797
    },
    {
      "epoch": 0.8208342338448239,
      "grad_norm": 1.1750407218933105,
      "learning_rate": 3.274021018022484e-06,
      "loss": 0.8521,
      "step": 3798
    },
    {
      "epoch": 0.8210503566025502,
      "grad_norm": 1.0545321702957153,
      "learning_rate": 3.2663493024192316e-06,
      "loss": 0.8311,
      "step": 3799
    },
    {
      "epoch": 0.8212664793602766,
      "grad_norm": 0.9771540760993958,
      "learning_rate": 3.2586857862661447e-06,
      "loss": 0.8776,
      "step": 3800
    },
    {
      "epoch": 0.8214826021180031,
      "grad_norm": 0.9694487452507019,
      "learning_rate": 3.251030473318313e-06,
      "loss": 0.8964,
      "step": 3801
    },
    {
      "epoch": 0.8216987248757294,
      "grad_norm": 0.9411128759384155,
      "learning_rate": 3.2433833673268358e-06,
      "loss": 0.9964,
      "step": 3802
    },
    {
      "epoch": 0.8219148476334558,
      "grad_norm": 1.0032674074172974,
      "learning_rate": 3.235744472038771e-06,
      "loss": 1.0625,
      "step": 3803
    },
    {
      "epoch": 0.8221309703911822,
      "grad_norm": 1.0997395515441895,
      "learning_rate": 3.228113791197163e-06,
      "loss": 0.9355,
      "step": 3804
    },
    {
      "epoch": 0.8223470931489085,
      "grad_norm": 0.9010671377182007,
      "learning_rate": 3.220491328541027e-06,
      "loss": 0.8208,
      "step": 3805
    },
    {
      "epoch": 0.822563215906635,
      "grad_norm": 1.134998083114624,
      "learning_rate": 3.2128770878053506e-06,
      "loss": 0.9348,
      "step": 3806
    },
    {
      "epoch": 0.8227793386643614,
      "grad_norm": 0.963811993598938,
      "learning_rate": 3.2052710727210945e-06,
      "loss": 0.9721,
      "step": 3807
    },
    {
      "epoch": 0.8229954614220878,
      "grad_norm": 1.1139354705810547,
      "learning_rate": 3.1976732870151903e-06,
      "loss": 0.8551,
      "step": 3808
    },
    {
      "epoch": 0.8232115841798141,
      "grad_norm": 0.9749976396560669,
      "learning_rate": 3.1900837344105317e-06,
      "loss": 0.8782,
      "step": 3809
    },
    {
      "epoch": 0.8234277069375405,
      "grad_norm": 1.0936123132705688,
      "learning_rate": 3.182502418625986e-06,
      "loss": 0.9987,
      "step": 3810
    },
    {
      "epoch": 0.823643829695267,
      "grad_norm": 0.9570392966270447,
      "learning_rate": 3.174929343376374e-06,
      "loss": 1.0546,
      "step": 3811
    },
    {
      "epoch": 0.8238599524529933,
      "grad_norm": 0.9953511953353882,
      "learning_rate": 3.1673645123724992e-06,
      "loss": 0.8854,
      "step": 3812
    },
    {
      "epoch": 0.8240760752107197,
      "grad_norm": 1.0623141527175903,
      "learning_rate": 3.159807929321097e-06,
      "loss": 0.8543,
      "step": 3813
    },
    {
      "epoch": 0.8242921979684461,
      "grad_norm": 0.9737940430641174,
      "learning_rate": 3.1522595979248805e-06,
      "loss": 0.8359,
      "step": 3814
    },
    {
      "epoch": 0.8245083207261724,
      "grad_norm": 0.9180198311805725,
      "learning_rate": 3.144719521882511e-06,
      "loss": 1.0532,
      "step": 3815
    },
    {
      "epoch": 0.8247244434838988,
      "grad_norm": 1.090312123298645,
      "learning_rate": 3.1371877048886156e-06,
      "loss": 0.9969,
      "step": 3816
    },
    {
      "epoch": 0.8249405662416253,
      "grad_norm": 0.9804825782775879,
      "learning_rate": 3.1296641506337687e-06,
      "loss": 0.9602,
      "step": 3817
    },
    {
      "epoch": 0.8251566889993517,
      "grad_norm": 0.9461047649383545,
      "learning_rate": 3.1221488628044837e-06,
      "loss": 0.7752,
      "step": 3818
    },
    {
      "epoch": 0.825372811757078,
      "grad_norm": 0.9010140895843506,
      "learning_rate": 3.1146418450832374e-06,
      "loss": 0.8004,
      "step": 3819
    },
    {
      "epoch": 0.8255889345148044,
      "grad_norm": 1.0007201433181763,
      "learning_rate": 3.1071431011484555e-06,
      "loss": 0.9878,
      "step": 3820
    },
    {
      "epoch": 0.8258050572725308,
      "grad_norm": 0.9827144742012024,
      "learning_rate": 3.099652634674506e-06,
      "loss": 1.0481,
      "step": 3821
    },
    {
      "epoch": 0.8260211800302572,
      "grad_norm": 1.028521180152893,
      "learning_rate": 3.0921704493316973e-06,
      "loss": 1.04,
      "step": 3822
    },
    {
      "epoch": 0.8262373027879836,
      "grad_norm": 0.9806821346282959,
      "learning_rate": 3.0846965487862813e-06,
      "loss": 0.9917,
      "step": 3823
    },
    {
      "epoch": 0.82645342554571,
      "grad_norm": 0.9888787865638733,
      "learning_rate": 3.077230936700455e-06,
      "loss": 0.8864,
      "step": 3824
    },
    {
      "epoch": 0.8266695483034363,
      "grad_norm": 0.9758038520812988,
      "learning_rate": 3.06977361673235e-06,
      "loss": 0.881,
      "step": 3825
    },
    {
      "epoch": 0.8268856710611627,
      "grad_norm": 0.9353499412536621,
      "learning_rate": 3.0623245925360344e-06,
      "loss": 0.8202,
      "step": 3826
    },
    {
      "epoch": 0.8271017938188892,
      "grad_norm": 0.9721300005912781,
      "learning_rate": 3.0548838677615154e-06,
      "loss": 1.1367,
      "step": 3827
    },
    {
      "epoch": 0.8273179165766155,
      "grad_norm": 1.3146761655807495,
      "learning_rate": 3.04745144605473e-06,
      "loss": 0.9314,
      "step": 3828
    },
    {
      "epoch": 0.8275340393343419,
      "grad_norm": 1.1623433828353882,
      "learning_rate": 3.040027331057547e-06,
      "loss": 1.0629,
      "step": 3829
    },
    {
      "epoch": 0.8277501620920683,
      "grad_norm": 0.8271481990814209,
      "learning_rate": 3.0326115264077672e-06,
      "loss": 0.768,
      "step": 3830
    },
    {
      "epoch": 0.8279662848497947,
      "grad_norm": 0.9230291247367859,
      "learning_rate": 3.0252040357391156e-06,
      "loss": 0.9952,
      "step": 3831
    },
    {
      "epoch": 0.828182407607521,
      "grad_norm": 0.9363948702812195,
      "learning_rate": 3.0178048626812464e-06,
      "loss": 0.8299,
      "step": 3832
    },
    {
      "epoch": 0.8283985303652475,
      "grad_norm": 0.939526379108429,
      "learning_rate": 3.0104140108597323e-06,
      "loss": 0.8855,
      "step": 3833
    },
    {
      "epoch": 0.8286146531229739,
      "grad_norm": 0.9929744601249695,
      "learning_rate": 3.003031483896084e-06,
      "loss": 0.8704,
      "step": 3834
    },
    {
      "epoch": 0.8288307758807002,
      "grad_norm": 1.0379951000213623,
      "learning_rate": 2.9956572854077205e-06,
      "loss": 0.9567,
      "step": 3835
    },
    {
      "epoch": 0.8290468986384266,
      "grad_norm": 0.9431433081626892,
      "learning_rate": 2.988291419007976e-06,
      "loss": 0.8092,
      "step": 3836
    },
    {
      "epoch": 0.829263021396153,
      "grad_norm": 0.9728435277938843,
      "learning_rate": 2.9809338883061035e-06,
      "loss": 1.0015,
      "step": 3837
    },
    {
      "epoch": 0.8294791441538794,
      "grad_norm": 1.0361050367355347,
      "learning_rate": 2.97358469690729e-06,
      "loss": 0.9186,
      "step": 3838
    },
    {
      "epoch": 0.8296952669116058,
      "grad_norm": 0.9760729074478149,
      "learning_rate": 2.9662438484126155e-06,
      "loss": 0.8529,
      "step": 3839
    },
    {
      "epoch": 0.8299113896693322,
      "grad_norm": 0.965825617313385,
      "learning_rate": 2.9589113464190844e-06,
      "loss": 0.8245,
      "step": 3840
    },
    {
      "epoch": 0.8301275124270586,
      "grad_norm": 1.0055328607559204,
      "learning_rate": 2.9515871945195897e-06,
      "loss": 0.8166,
      "step": 3841
    },
    {
      "epoch": 0.8303436351847849,
      "grad_norm": 1.0321468114852905,
      "learning_rate": 2.9442713963029645e-06,
      "loss": 0.9099,
      "step": 3842
    },
    {
      "epoch": 0.8305597579425114,
      "grad_norm": 1.145390510559082,
      "learning_rate": 2.9369639553539266e-06,
      "loss": 1.1025,
      "step": 3843
    },
    {
      "epoch": 0.8307758807002378,
      "grad_norm": 1.0296893119812012,
      "learning_rate": 2.9296648752531085e-06,
      "loss": 1.1091,
      "step": 3844
    },
    {
      "epoch": 0.8309920034579641,
      "grad_norm": 0.9882704615592957,
      "learning_rate": 2.9223741595770392e-06,
      "loss": 1.1372,
      "step": 3845
    },
    {
      "epoch": 0.8312081262156905,
      "grad_norm": 0.9929832816123962,
      "learning_rate": 2.9150918118981542e-06,
      "loss": 0.8435,
      "step": 3846
    },
    {
      "epoch": 0.8314242489734169,
      "grad_norm": 0.913496196269989,
      "learning_rate": 2.9078178357847874e-06,
      "loss": 0.7169,
      "step": 3847
    },
    {
      "epoch": 0.8316403717311432,
      "grad_norm": 1.0215977430343628,
      "learning_rate": 2.900552234801168e-06,
      "loss": 0.9791,
      "step": 3848
    },
    {
      "epoch": 0.8318564944888697,
      "grad_norm": 0.9647204279899597,
      "learning_rate": 2.8932950125074287e-06,
      "loss": 0.9165,
      "step": 3849
    },
    {
      "epoch": 0.8320726172465961,
      "grad_norm": 0.969436764717102,
      "learning_rate": 2.8860461724595846e-06,
      "loss": 0.8661,
      "step": 3850
    },
    {
      "epoch": 0.8322887400043224,
      "grad_norm": 0.8607776165008545,
      "learning_rate": 2.8788057182095518e-06,
      "loss": 0.7945,
      "step": 3851
    },
    {
      "epoch": 0.8325048627620488,
      "grad_norm": 0.977681577205658,
      "learning_rate": 2.8715736533051485e-06,
      "loss": 0.8304,
      "step": 3852
    },
    {
      "epoch": 0.8327209855197752,
      "grad_norm": 0.9332045912742615,
      "learning_rate": 2.864349981290053e-06,
      "loss": 0.8605,
      "step": 3853
    },
    {
      "epoch": 0.8329371082775017,
      "grad_norm": 0.9848089814186096,
      "learning_rate": 2.8571347057038566e-06,
      "loss": 0.9698,
      "step": 3854
    },
    {
      "epoch": 0.833153231035228,
      "grad_norm": 1.0630346536636353,
      "learning_rate": 2.84992783008202e-06,
      "loss": 0.9879,
      "step": 3855
    },
    {
      "epoch": 0.8333693537929544,
      "grad_norm": 1.0251802206039429,
      "learning_rate": 2.8427293579559067e-06,
      "loss": 0.8216,
      "step": 3856
    },
    {
      "epoch": 0.8335854765506808,
      "grad_norm": 0.9811074733734131,
      "learning_rate": 2.835539292852745e-06,
      "loss": 0.8215,
      "step": 3857
    },
    {
      "epoch": 0.8338015993084071,
      "grad_norm": 1.0241789817810059,
      "learning_rate": 2.828357638295658e-06,
      "loss": 1.0037,
      "step": 3858
    },
    {
      "epoch": 0.8340177220661336,
      "grad_norm": 1.0992343425750732,
      "learning_rate": 2.8211843978036223e-06,
      "loss": 0.8555,
      "step": 3859
    },
    {
      "epoch": 0.83423384482386,
      "grad_norm": 1.0655280351638794,
      "learning_rate": 2.8140195748915243e-06,
      "loss": 0.8525,
      "step": 3860
    },
    {
      "epoch": 0.8344499675815863,
      "grad_norm": 1.097280740737915,
      "learning_rate": 2.8068631730701067e-06,
      "loss": 0.8614,
      "step": 3861
    },
    {
      "epoch": 0.8346660903393127,
      "grad_norm": 1.0304309129714966,
      "learning_rate": 2.7997151958459888e-06,
      "loss": 0.9039,
      "step": 3862
    },
    {
      "epoch": 0.8348822130970391,
      "grad_norm": 0.9885969758033752,
      "learning_rate": 2.7925756467216646e-06,
      "loss": 0.9911,
      "step": 3863
    },
    {
      "epoch": 0.8350983358547656,
      "grad_norm": 0.8733798265457153,
      "learning_rate": 2.785444529195498e-06,
      "loss": 0.8581,
      "step": 3864
    },
    {
      "epoch": 0.8353144586124919,
      "grad_norm": 1.0050156116485596,
      "learning_rate": 2.7783218467617134e-06,
      "loss": 0.802,
      "step": 3865
    },
    {
      "epoch": 0.8355305813702183,
      "grad_norm": 1.1616407632827759,
      "learning_rate": 2.7712076029104152e-06,
      "loss": 0.9401,
      "step": 3866
    },
    {
      "epoch": 0.8357467041279447,
      "grad_norm": 0.8896188139915466,
      "learning_rate": 2.7641018011275657e-06,
      "loss": 0.9754,
      "step": 3867
    },
    {
      "epoch": 0.835962826885671,
      "grad_norm": 0.9471938610076904,
      "learning_rate": 2.7570044448949886e-06,
      "loss": 0.8151,
      "step": 3868
    },
    {
      "epoch": 0.8361789496433975,
      "grad_norm": 0.9115317463874817,
      "learning_rate": 2.749915537690373e-06,
      "loss": 0.9089,
      "step": 3869
    },
    {
      "epoch": 0.8363950724011239,
      "grad_norm": 0.8532413244247437,
      "learning_rate": 2.7428350829872675e-06,
      "loss": 0.815,
      "step": 3870
    },
    {
      "epoch": 0.8366111951588502,
      "grad_norm": 1.0686910152435303,
      "learning_rate": 2.7357630842550785e-06,
      "loss": 0.8564,
      "step": 3871
    },
    {
      "epoch": 0.8368273179165766,
      "grad_norm": 0.8713759183883667,
      "learning_rate": 2.7286995449590703e-06,
      "loss": 1.0198,
      "step": 3872
    },
    {
      "epoch": 0.837043440674303,
      "grad_norm": 1.0089508295059204,
      "learning_rate": 2.7216444685603537e-06,
      "loss": 0.9313,
      "step": 3873
    },
    {
      "epoch": 0.8372595634320293,
      "grad_norm": 0.9502614140510559,
      "learning_rate": 2.714597858515913e-06,
      "loss": 0.8456,
      "step": 3874
    },
    {
      "epoch": 0.8374756861897558,
      "grad_norm": 1.1013680696487427,
      "learning_rate": 2.7075597182785653e-06,
      "loss": 0.8482,
      "step": 3875
    },
    {
      "epoch": 0.8376918089474822,
      "grad_norm": 0.9405859112739563,
      "learning_rate": 2.7005300512969766e-06,
      "loss": 0.8086,
      "step": 3876
    },
    {
      "epoch": 0.8379079317052086,
      "grad_norm": 1.0673246383666992,
      "learning_rate": 2.693508861015668e-06,
      "loss": 0.8902,
      "step": 3877
    },
    {
      "epoch": 0.8381240544629349,
      "grad_norm": 0.8461023569107056,
      "learning_rate": 2.6864961508750154e-06,
      "loss": 0.9145,
      "step": 3878
    },
    {
      "epoch": 0.8383401772206613,
      "grad_norm": 1.0020347833633423,
      "learning_rate": 2.679491924311226e-06,
      "loss": 0.8721,
      "step": 3879
    },
    {
      "epoch": 0.8385562999783878,
      "grad_norm": 1.1831213235855103,
      "learning_rate": 2.672496184756359e-06,
      "loss": 0.8828,
      "step": 3880
    },
    {
      "epoch": 0.8387724227361141,
      "grad_norm": 0.860724925994873,
      "learning_rate": 2.665508935638297e-06,
      "loss": 0.8307,
      "step": 3881
    },
    {
      "epoch": 0.8389885454938405,
      "grad_norm": 1.030531883239746,
      "learning_rate": 2.6585301803807894e-06,
      "loss": 0.7885,
      "step": 3882
    },
    {
      "epoch": 0.8392046682515669,
      "grad_norm": 1.1311169862747192,
      "learning_rate": 2.651559922403406e-06,
      "loss": 0.8929,
      "step": 3883
    },
    {
      "epoch": 0.8394207910092932,
      "grad_norm": 1.1814595460891724,
      "learning_rate": 2.644598165121557e-06,
      "loss": 1.0145,
      "step": 3884
    },
    {
      "epoch": 0.8396369137670197,
      "grad_norm": 0.9401963949203491,
      "learning_rate": 2.6376449119464864e-06,
      "loss": 1.0197,
      "step": 3885
    },
    {
      "epoch": 0.8398530365247461,
      "grad_norm": 0.8778254389762878,
      "learning_rate": 2.630700166285274e-06,
      "loss": 0.8003,
      "step": 3886
    },
    {
      "epoch": 0.8400691592824725,
      "grad_norm": 0.935158908367157,
      "learning_rate": 2.623763931540828e-06,
      "loss": 1.071,
      "step": 3887
    },
    {
      "epoch": 0.8402852820401988,
      "grad_norm": 0.9393670558929443,
      "learning_rate": 2.616836211111886e-06,
      "loss": 0.8125,
      "step": 3888
    },
    {
      "epoch": 0.8405014047979252,
      "grad_norm": 0.9210618734359741,
      "learning_rate": 2.609917008393019e-06,
      "loss": 0.8869,
      "step": 3889
    },
    {
      "epoch": 0.8407175275556517,
      "grad_norm": 0.917171061038971,
      "learning_rate": 2.603006326774615e-06,
      "loss": 0.958,
      "step": 3890
    },
    {
      "epoch": 0.840933650313378,
      "grad_norm": 0.9822640419006348,
      "learning_rate": 2.5961041696428923e-06,
      "loss": 0.7881,
      "step": 3891
    },
    {
      "epoch": 0.8411497730711044,
      "grad_norm": 0.9905669093132019,
      "learning_rate": 2.589210540379903e-06,
      "loss": 0.8828,
      "step": 3892
    },
    {
      "epoch": 0.8413658958288308,
      "grad_norm": 1.043694019317627,
      "learning_rate": 2.5823254423634957e-06,
      "loss": 0.8347,
      "step": 3893
    },
    {
      "epoch": 0.8415820185865571,
      "grad_norm": 0.9720776677131653,
      "learning_rate": 2.5754488789673595e-06,
      "loss": 0.8232,
      "step": 3894
    },
    {
      "epoch": 0.8417981413442835,
      "grad_norm": 1.063287615776062,
      "learning_rate": 2.5685808535609867e-06,
      "loss": 0.9028,
      "step": 3895
    },
    {
      "epoch": 0.84201426410201,
      "grad_norm": 0.9968577027320862,
      "learning_rate": 2.5617213695097045e-06,
      "loss": 0.8873,
      "step": 3896
    },
    {
      "epoch": 0.8422303868597363,
      "grad_norm": 0.8752455711364746,
      "learning_rate": 2.554870430174641e-06,
      "loss": 0.8159,
      "step": 3897
    },
    {
      "epoch": 0.8424465096174627,
      "grad_norm": 1.0653284788131714,
      "learning_rate": 2.5480280389127422e-06,
      "loss": 0.9913,
      "step": 3898
    },
    {
      "epoch": 0.8426626323751891,
      "grad_norm": 0.8732882738113403,
      "learning_rate": 2.541194199076753e-06,
      "loss": 0.8493,
      "step": 3899
    },
    {
      "epoch": 0.8428787551329155,
      "grad_norm": 0.9469903707504272,
      "learning_rate": 2.534368914015253e-06,
      "loss": 0.8376,
      "step": 3900
    },
    {
      "epoch": 0.8430948778906419,
      "grad_norm": 0.9434764981269836,
      "learning_rate": 2.5275521870726107e-06,
      "loss": 0.7804,
      "step": 3901
    },
    {
      "epoch": 0.8433110006483683,
      "grad_norm": 1.004648208618164,
      "learning_rate": 2.520744021589003e-06,
      "loss": 0.9445,
      "step": 3902
    },
    {
      "epoch": 0.8435271234060947,
      "grad_norm": 0.8740761280059814,
      "learning_rate": 2.5139444209004248e-06,
      "loss": 0.9413,
      "step": 3903
    },
    {
      "epoch": 0.843743246163821,
      "grad_norm": 1.0177165269851685,
      "learning_rate": 2.50715338833865e-06,
      "loss": 0.9606,
      "step": 3904
    },
    {
      "epoch": 0.8439593689215474,
      "grad_norm": 0.9700515270233154,
      "learning_rate": 2.5003709272312814e-06,
      "loss": 0.7797,
      "step": 3905
    },
    {
      "epoch": 0.8441754916792739,
      "grad_norm": 1.0586341619491577,
      "learning_rate": 2.493597040901705e-06,
      "loss": 1.1064,
      "step": 3906
    },
    {
      "epoch": 0.8443916144370002,
      "grad_norm": 0.9335659742355347,
      "learning_rate": 2.4868317326691107e-06,
      "loss": 0.8268,
      "step": 3907
    },
    {
      "epoch": 0.8446077371947266,
      "grad_norm": 1.0961978435516357,
      "learning_rate": 2.4800750058484814e-06,
      "loss": 0.809,
      "step": 3908
    },
    {
      "epoch": 0.844823859952453,
      "grad_norm": 0.8870816826820374,
      "learning_rate": 2.4733268637506e-06,
      "loss": 0.8485,
      "step": 3909
    },
    {
      "epoch": 0.8450399827101793,
      "grad_norm": 0.8989795446395874,
      "learning_rate": 2.466587309682038e-06,
      "loss": 0.8478,
      "step": 3910
    },
    {
      "epoch": 0.8452561054679057,
      "grad_norm": 1.1302543878555298,
      "learning_rate": 2.459856346945164e-06,
      "loss": 0.9803,
      "step": 3911
    },
    {
      "epoch": 0.8454722282256322,
      "grad_norm": 0.9858680367469788,
      "learning_rate": 2.4531339788381337e-06,
      "loss": 0.8998,
      "step": 3912
    },
    {
      "epoch": 0.8456883509833586,
      "grad_norm": 0.9979439973831177,
      "learning_rate": 2.4464202086548874e-06,
      "loss": 0.9993,
      "step": 3913
    },
    {
      "epoch": 0.8459044737410849,
      "grad_norm": 0.9646304249763489,
      "learning_rate": 2.439715039685162e-06,
      "loss": 0.7718,
      "step": 3914
    },
    {
      "epoch": 0.8461205964988113,
      "grad_norm": 0.9694345593452454,
      "learning_rate": 2.4330184752144815e-06,
      "loss": 0.925,
      "step": 3915
    },
    {
      "epoch": 0.8463367192565378,
      "grad_norm": 0.9699779152870178,
      "learning_rate": 2.4263305185241316e-06,
      "loss": 0.7816,
      "step": 3916
    },
    {
      "epoch": 0.8465528420142641,
      "grad_norm": 1.027593731880188,
      "learning_rate": 2.419651172891202e-06,
      "loss": 0.9126,
      "step": 3917
    },
    {
      "epoch": 0.8467689647719905,
      "grad_norm": 0.9932441711425781,
      "learning_rate": 2.4129804415885593e-06,
      "loss": 0.9482,
      "step": 3918
    },
    {
      "epoch": 0.8469850875297169,
      "grad_norm": 1.0544880628585815,
      "learning_rate": 2.406318327884847e-06,
      "loss": 0.9531,
      "step": 3919
    },
    {
      "epoch": 0.8472012102874432,
      "grad_norm": 1.0541728734970093,
      "learning_rate": 2.3996648350444816e-06,
      "loss": 0.9116,
      "step": 3920
    },
    {
      "epoch": 0.8474173330451696,
      "grad_norm": 1.0878851413726807,
      "learning_rate": 2.3930199663276633e-06,
      "loss": 1.0167,
      "step": 3921
    },
    {
      "epoch": 0.8476334558028961,
      "grad_norm": 1.073660135269165,
      "learning_rate": 2.3863837249903577e-06,
      "loss": 0.8671,
      "step": 3922
    },
    {
      "epoch": 0.8478495785606225,
      "grad_norm": 0.906342089176178,
      "learning_rate": 2.3797561142843107e-06,
      "loss": 0.8903,
      "step": 3923
    },
    {
      "epoch": 0.8480657013183488,
      "grad_norm": 0.9477362632751465,
      "learning_rate": 2.3731371374570354e-06,
      "loss": 0.9786,
      "step": 3924
    },
    {
      "epoch": 0.8482818240760752,
      "grad_norm": 1.012372612953186,
      "learning_rate": 2.3665267977518157e-06,
      "loss": 1.0066,
      "step": 3925
    },
    {
      "epoch": 0.8484979468338016,
      "grad_norm": 0.9933478236198425,
      "learning_rate": 2.3599250984077026e-06,
      "loss": 0.8428,
      "step": 3926
    },
    {
      "epoch": 0.848714069591528,
      "grad_norm": 1.0077451467514038,
      "learning_rate": 2.353332042659513e-06,
      "loss": 0.9651,
      "step": 3927
    },
    {
      "epoch": 0.8489301923492544,
      "grad_norm": 0.9091978669166565,
      "learning_rate": 2.346747633737829e-06,
      "loss": 0.751,
      "step": 3928
    },
    {
      "epoch": 0.8491463151069808,
      "grad_norm": 0.9324750900268555,
      "learning_rate": 2.3401718748689972e-06,
      "loss": 1.1466,
      "step": 3929
    },
    {
      "epoch": 0.8493624378647071,
      "grad_norm": 1.0648280382156372,
      "learning_rate": 2.3336047692751216e-06,
      "loss": 0.9892,
      "step": 3930
    },
    {
      "epoch": 0.8495785606224335,
      "grad_norm": 0.967571496963501,
      "learning_rate": 2.3270463201740668e-06,
      "loss": 0.959,
      "step": 3931
    },
    {
      "epoch": 0.84979468338016,
      "grad_norm": 0.9903391599655151,
      "learning_rate": 2.32049653077947e-06,
      "loss": 0.7614,
      "step": 3932
    },
    {
      "epoch": 0.8500108061378863,
      "grad_norm": 0.9767742156982422,
      "learning_rate": 2.3139554043006986e-06,
      "loss": 0.8769,
      "step": 3933
    },
    {
      "epoch": 0.8502269288956127,
      "grad_norm": 0.9995903968811035,
      "learning_rate": 2.3074229439428964e-06,
      "loss": 0.8753,
      "step": 3934
    },
    {
      "epoch": 0.8504430516533391,
      "grad_norm": 1.0542783737182617,
      "learning_rate": 2.300899152906946e-06,
      "loss": 0.9123,
      "step": 3935
    },
    {
      "epoch": 0.8506591744110655,
      "grad_norm": 1.0569159984588623,
      "learning_rate": 2.294384034389503e-06,
      "loss": 0.7211,
      "step": 3936
    },
    {
      "epoch": 0.8508752971687918,
      "grad_norm": 0.9834873080253601,
      "learning_rate": 2.287877591582952e-06,
      "loss": 0.8814,
      "step": 3937
    },
    {
      "epoch": 0.8510914199265183,
      "grad_norm": 0.9262287616729736,
      "learning_rate": 2.2813798276754407e-06,
      "loss": 0.8303,
      "step": 3938
    },
    {
      "epoch": 0.8513075426842447,
      "grad_norm": 0.9619258642196655,
      "learning_rate": 2.274890745850846e-06,
      "loss": 0.9651,
      "step": 3939
    },
    {
      "epoch": 0.851523665441971,
      "grad_norm": 0.9438610076904297,
      "learning_rate": 2.2684103492888165e-06,
      "loss": 0.8412,
      "step": 3940
    },
    {
      "epoch": 0.8517397881996974,
      "grad_norm": 1.086382269859314,
      "learning_rate": 2.261938641164725e-06,
      "loss": 0.915,
      "step": 3941
    },
    {
      "epoch": 0.8519559109574238,
      "grad_norm": 1.0183038711547852,
      "learning_rate": 2.2554756246496966e-06,
      "loss": 0.9559,
      "step": 3942
    },
    {
      "epoch": 0.8521720337151502,
      "grad_norm": 1.060952067375183,
      "learning_rate": 2.2490213029105947e-06,
      "loss": 1.0643,
      "step": 3943
    },
    {
      "epoch": 0.8523881564728766,
      "grad_norm": 1.0353541374206543,
      "learning_rate": 2.242575679110013e-06,
      "loss": 0.9,
      "step": 3944
    },
    {
      "epoch": 0.852604279230603,
      "grad_norm": 1.009286642074585,
      "learning_rate": 2.236138756406303e-06,
      "loss": 0.8826,
      "step": 3945
    },
    {
      "epoch": 0.8528204019883294,
      "grad_norm": 0.941284716129303,
      "learning_rate": 2.22971053795354e-06,
      "loss": 0.9171,
      "step": 3946
    },
    {
      "epoch": 0.8530365247460557,
      "grad_norm": 1.0223666429519653,
      "learning_rate": 2.223291026901533e-06,
      "loss": 0.7593,
      "step": 3947
    },
    {
      "epoch": 0.8532526475037822,
      "grad_norm": 0.9023488759994507,
      "learning_rate": 2.2168802263958278e-06,
      "loss": 0.9817,
      "step": 3948
    },
    {
      "epoch": 0.8534687702615086,
      "grad_norm": 0.985598623752594,
      "learning_rate": 2.210478139577705e-06,
      "loss": 1.0036,
      "step": 3949
    },
    {
      "epoch": 0.8536848930192349,
      "grad_norm": 1.0244380235671997,
      "learning_rate": 2.2040847695841693e-06,
      "loss": 0.9144,
      "step": 3950
    },
    {
      "epoch": 0.8539010157769613,
      "grad_norm": 1.0165053606033325,
      "learning_rate": 2.1977001195479586e-06,
      "loss": 0.8182,
      "step": 3951
    },
    {
      "epoch": 0.8541171385346877,
      "grad_norm": 0.9381210803985596,
      "learning_rate": 2.191324192597535e-06,
      "loss": 0.9025,
      "step": 3952
    },
    {
      "epoch": 0.854333261292414,
      "grad_norm": 0.9398542642593384,
      "learning_rate": 2.184956991857088e-06,
      "loss": 0.7237,
      "step": 3953
    },
    {
      "epoch": 0.8545493840501405,
      "grad_norm": 1.001290202140808,
      "learning_rate": 2.1785985204465354e-06,
      "loss": 0.9462,
      "step": 3954
    },
    {
      "epoch": 0.8547655068078669,
      "grad_norm": 1.0010464191436768,
      "learning_rate": 2.172248781481514e-06,
      "loss": 0.8731,
      "step": 3955
    },
    {
      "epoch": 0.8549816295655932,
      "grad_norm": 0.9304476380348206,
      "learning_rate": 2.165907778073373e-06,
      "loss": 0.7721,
      "step": 3956
    },
    {
      "epoch": 0.8551977523233196,
      "grad_norm": 0.8804721832275391,
      "learning_rate": 2.159575513329193e-06,
      "loss": 0.8879,
      "step": 3957
    },
    {
      "epoch": 0.855413875081046,
      "grad_norm": 0.8830518126487732,
      "learning_rate": 2.1532519903517723e-06,
      "loss": 0.9191,
      "step": 3958
    },
    {
      "epoch": 0.8556299978387725,
      "grad_norm": 0.9027577638626099,
      "learning_rate": 2.1469372122396214e-06,
      "loss": 1.0066,
      "step": 3959
    },
    {
      "epoch": 0.8558461205964988,
      "grad_norm": 0.9849170446395874,
      "learning_rate": 2.140631182086965e-06,
      "loss": 0.8284,
      "step": 3960
    },
    {
      "epoch": 0.8560622433542252,
      "grad_norm": 0.9777927994728088,
      "learning_rate": 2.1343339029837496e-06,
      "loss": 0.8547,
      "step": 3961
    },
    {
      "epoch": 0.8562783661119516,
      "grad_norm": 1.0231190919876099,
      "learning_rate": 2.1280453780156153e-06,
      "loss": 0.9239,
      "step": 3962
    },
    {
      "epoch": 0.8564944888696779,
      "grad_norm": 1.0713601112365723,
      "learning_rate": 2.1217656102639326e-06,
      "loss": 0.8877,
      "step": 3963
    },
    {
      "epoch": 0.8567106116274044,
      "grad_norm": 1.1315653324127197,
      "learning_rate": 2.1154946028057744e-06,
      "loss": 0.7842,
      "step": 3964
    },
    {
      "epoch": 0.8569267343851308,
      "grad_norm": 0.9086594581604004,
      "learning_rate": 2.1092323587139174e-06,
      "loss": 0.8541,
      "step": 3965
    },
    {
      "epoch": 0.8571428571428571,
      "grad_norm": 1.12448251247406,
      "learning_rate": 2.1029788810568473e-06,
      "loss": 1.0357,
      "step": 3966
    },
    {
      "epoch": 0.8573589799005835,
      "grad_norm": 0.9281867742538452,
      "learning_rate": 2.0967341728987554e-06,
      "loss": 0.8266,
      "step": 3967
    },
    {
      "epoch": 0.8575751026583099,
      "grad_norm": 1.0272259712219238,
      "learning_rate": 2.09049823729953e-06,
      "loss": 0.9871,
      "step": 3968
    },
    {
      "epoch": 0.8577912254160364,
      "grad_norm": 0.963164210319519,
      "learning_rate": 2.0842710773147677e-06,
      "loss": 0.8631,
      "step": 3969
    },
    {
      "epoch": 0.8580073481737627,
      "grad_norm": 1.1444388628005981,
      "learning_rate": 2.0780526959957627e-06,
      "loss": 0.9162,
      "step": 3970
    },
    {
      "epoch": 0.8582234709314891,
      "grad_norm": 1.088512897491455,
      "learning_rate": 2.0718430963895054e-06,
      "loss": 0.9834,
      "step": 3971
    },
    {
      "epoch": 0.8584395936892155,
      "grad_norm": 0.9993893504142761,
      "learning_rate": 2.065642281538691e-06,
      "loss": 0.7611,
      "step": 3972
    },
    {
      "epoch": 0.8586557164469418,
      "grad_norm": 1.0248545408248901,
      "learning_rate": 2.0594502544816984e-06,
      "loss": 0.8907,
      "step": 3973
    },
    {
      "epoch": 0.8588718392046683,
      "grad_norm": 0.980186402797699,
      "learning_rate": 2.0532670182526093e-06,
      "loss": 0.8991,
      "step": 3974
    },
    {
      "epoch": 0.8590879619623947,
      "grad_norm": 1.2061337232589722,
      "learning_rate": 2.047092575881189e-06,
      "loss": 1.0024,
      "step": 3975
    },
    {
      "epoch": 0.859304084720121,
      "grad_norm": 1.0617130994796753,
      "learning_rate": 2.04092693039291e-06,
      "loss": 0.9011,
      "step": 3976
    },
    {
      "epoch": 0.8595202074778474,
      "grad_norm": 1.0404224395751953,
      "learning_rate": 2.0347700848089193e-06,
      "loss": 0.7154,
      "step": 3977
    },
    {
      "epoch": 0.8597363302355738,
      "grad_norm": 0.974334716796875,
      "learning_rate": 2.0286220421460624e-06,
      "loss": 0.8879,
      "step": 3978
    },
    {
      "epoch": 0.8599524529933001,
      "grad_norm": 1.0852996110916138,
      "learning_rate": 2.0224828054168523e-06,
      "loss": 0.9256,
      "step": 3979
    },
    {
      "epoch": 0.8601685757510266,
      "grad_norm": 1.0426374673843384,
      "learning_rate": 2.0163523776295134e-06,
      "loss": 0.7925,
      "step": 3980
    },
    {
      "epoch": 0.860384698508753,
      "grad_norm": 1.0513733625411987,
      "learning_rate": 2.0102307617879367e-06,
      "loss": 1.0972,
      "step": 3981
    },
    {
      "epoch": 0.8606008212664794,
      "grad_norm": 0.9410885572433472,
      "learning_rate": 2.0041179608917003e-06,
      "loss": 0.9346,
      "step": 3982
    },
    {
      "epoch": 0.8608169440242057,
      "grad_norm": 0.9574518799781799,
      "learning_rate": 1.9980139779360683e-06,
      "loss": 0.9286,
      "step": 3983
    },
    {
      "epoch": 0.8610330667819321,
      "grad_norm": 1.0688837766647339,
      "learning_rate": 1.991918815911964e-06,
      "loss": 0.8646,
      "step": 3984
    },
    {
      "epoch": 0.8612491895396586,
      "grad_norm": 0.8987706899642944,
      "learning_rate": 1.9858324778060133e-06,
      "loss": 0.7938,
      "step": 3985
    },
    {
      "epoch": 0.8614653122973849,
      "grad_norm": 0.9745295643806458,
      "learning_rate": 1.979754966600509e-06,
      "loss": 0.9665,
      "step": 3986
    },
    {
      "epoch": 0.8616814350551113,
      "grad_norm": 1.0833499431610107,
      "learning_rate": 1.9736862852734108e-06,
      "loss": 0.8568,
      "step": 3987
    },
    {
      "epoch": 0.8618975578128377,
      "grad_norm": 1.0643657445907593,
      "learning_rate": 1.9676264367983643e-06,
      "loss": 0.8574,
      "step": 3988
    },
    {
      "epoch": 0.862113680570564,
      "grad_norm": 0.8867236375808716,
      "learning_rate": 1.9615754241446794e-06,
      "loss": 0.7652,
      "step": 3989
    },
    {
      "epoch": 0.8623298033282905,
      "grad_norm": 1.0987955331802368,
      "learning_rate": 1.955533250277335e-06,
      "loss": 0.9481,
      "step": 3990
    },
    {
      "epoch": 0.8625459260860169,
      "grad_norm": 1.0885558128356934,
      "learning_rate": 1.9494999181569874e-06,
      "loss": 0.9157,
      "step": 3991
    },
    {
      "epoch": 0.8627620488437433,
      "grad_norm": 1.0694215297698975,
      "learning_rate": 1.9434754307399537e-06,
      "loss": 0.9242,
      "step": 3992
    },
    {
      "epoch": 0.8629781716014696,
      "grad_norm": 1.1088707447052002,
      "learning_rate": 1.9374597909782135e-06,
      "loss": 0.8787,
      "step": 3993
    },
    {
      "epoch": 0.863194294359196,
      "grad_norm": 1.2541041374206543,
      "learning_rate": 1.9314530018194253e-06,
      "loss": 1.0074,
      "step": 3994
    },
    {
      "epoch": 0.8634104171169225,
      "grad_norm": 1.0666176080703735,
      "learning_rate": 1.9254550662069004e-06,
      "loss": 0.9751,
      "step": 3995
    },
    {
      "epoch": 0.8636265398746488,
      "grad_norm": 1.0079668760299683,
      "learning_rate": 1.919465987079607e-06,
      "loss": 1.0891,
      "step": 3996
    },
    {
      "epoch": 0.8638426626323752,
      "grad_norm": 0.880274772644043,
      "learning_rate": 1.913485767372181e-06,
      "loss": 0.9269,
      "step": 3997
    },
    {
      "epoch": 0.8640587853901016,
      "grad_norm": 1.0225210189819336,
      "learning_rate": 1.9075144100149234e-06,
      "loss": 0.9084,
      "step": 3998
    },
    {
      "epoch": 0.8642749081478279,
      "grad_norm": 1.0274851322174072,
      "learning_rate": 1.9015519179337794e-06,
      "loss": 0.8577,
      "step": 3999
    },
    {
      "epoch": 0.8644910309055543,
      "grad_norm": 1.1693341732025146,
      "learning_rate": 1.895598294050358e-06,
      "loss": 1.0272,
      "step": 4000
    },
    {
      "epoch": 0.8647071536632808,
      "grad_norm": 0.8606007099151611,
      "learning_rate": 1.889653541281926e-06,
      "loss": 0.9262,
      "step": 4001
    },
    {
      "epoch": 0.8649232764210071,
      "grad_norm": 0.9811006784439087,
      "learning_rate": 1.8837176625413866e-06,
      "loss": 0.8264,
      "step": 4002
    },
    {
      "epoch": 0.8651393991787335,
      "grad_norm": 1.1836777925491333,
      "learning_rate": 1.8777906607373175e-06,
      "loss": 0.8992,
      "step": 4003
    },
    {
      "epoch": 0.8653555219364599,
      "grad_norm": 1.0306787490844727,
      "learning_rate": 1.8718725387739312e-06,
      "loss": 0.9253,
      "step": 4004
    },
    {
      "epoch": 0.8655716446941863,
      "grad_norm": 0.8847578763961792,
      "learning_rate": 1.8659632995510946e-06,
      "loss": 0.8711,
      "step": 4005
    },
    {
      "epoch": 0.8657877674519127,
      "grad_norm": 1.0953636169433594,
      "learning_rate": 1.8600629459643226e-06,
      "loss": 0.938,
      "step": 4006
    },
    {
      "epoch": 0.8660038902096391,
      "grad_norm": 0.9728406071662903,
      "learning_rate": 1.8541714809047716e-06,
      "loss": 0.7935,
      "step": 4007
    },
    {
      "epoch": 0.8662200129673655,
      "grad_norm": 1.0419201850891113,
      "learning_rate": 1.8482889072592502e-06,
      "loss": 1.0054,
      "step": 4008
    },
    {
      "epoch": 0.8664361357250918,
      "grad_norm": 0.9814909100532532,
      "learning_rate": 1.8424152279102015e-06,
      "loss": 1.0094,
      "step": 4009
    },
    {
      "epoch": 0.8666522584828182,
      "grad_norm": 0.9687902331352234,
      "learning_rate": 1.8365504457357187e-06,
      "loss": 0.9497,
      "step": 4010
    },
    {
      "epoch": 0.8668683812405447,
      "grad_norm": 1.1203945875167847,
      "learning_rate": 1.8306945636095253e-06,
      "loss": 0.725,
      "step": 4011
    },
    {
      "epoch": 0.867084503998271,
      "grad_norm": 1.0863019227981567,
      "learning_rate": 1.8248475844010016e-06,
      "loss": 0.885,
      "step": 4012
    },
    {
      "epoch": 0.8673006267559974,
      "grad_norm": 1.0180869102478027,
      "learning_rate": 1.8190095109751427e-06,
      "loss": 0.9969,
      "step": 4013
    },
    {
      "epoch": 0.8675167495137238,
      "grad_norm": 1.0216892957687378,
      "learning_rate": 1.8131803461925934e-06,
      "loss": 0.872,
      "step": 4014
    },
    {
      "epoch": 0.8677328722714501,
      "grad_norm": 1.0102025270462036,
      "learning_rate": 1.8073600929096314e-06,
      "loss": 0.9727,
      "step": 4015
    },
    {
      "epoch": 0.8679489950291766,
      "grad_norm": 0.9080718755722046,
      "learning_rate": 1.8015487539781705e-06,
      "loss": 0.8032,
      "step": 4016
    },
    {
      "epoch": 0.868165117786903,
      "grad_norm": 1.008196234703064,
      "learning_rate": 1.7957463322457536e-06,
      "loss": 0.7756,
      "step": 4017
    },
    {
      "epoch": 0.8683812405446294,
      "grad_norm": 1.1037870645523071,
      "learning_rate": 1.7899528305555547e-06,
      "loss": 0.9485,
      "step": 4018
    },
    {
      "epoch": 0.8685973633023557,
      "grad_norm": 1.0407118797302246,
      "learning_rate": 1.7841682517463677e-06,
      "loss": 0.9199,
      "step": 4019
    },
    {
      "epoch": 0.8688134860600821,
      "grad_norm": 0.9282059669494629,
      "learning_rate": 1.7783925986526273e-06,
      "loss": 0.92,
      "step": 4020
    },
    {
      "epoch": 0.8690296088178086,
      "grad_norm": 0.840758740901947,
      "learning_rate": 1.7726258741043945e-06,
      "loss": 0.95,
      "step": 4021
    },
    {
      "epoch": 0.8692457315755349,
      "grad_norm": 0.9742379784584045,
      "learning_rate": 1.7668680809273465e-06,
      "loss": 0.8779,
      "step": 4022
    },
    {
      "epoch": 0.8694618543332613,
      "grad_norm": 0.9899144768714905,
      "learning_rate": 1.7611192219427908e-06,
      "loss": 1.01,
      "step": 4023
    },
    {
      "epoch": 0.8696779770909877,
      "grad_norm": 1.0181303024291992,
      "learning_rate": 1.7553792999676523e-06,
      "loss": 0.831,
      "step": 4024
    },
    {
      "epoch": 0.869894099848714,
      "grad_norm": 1.0468906164169312,
      "learning_rate": 1.749648317814483e-06,
      "loss": 0.7739,
      "step": 4025
    },
    {
      "epoch": 0.8701102226064404,
      "grad_norm": 1.0993393659591675,
      "learning_rate": 1.74392627829145e-06,
      "loss": 1.0472,
      "step": 4026
    },
    {
      "epoch": 0.8703263453641669,
      "grad_norm": 1.4291259050369263,
      "learning_rate": 1.7382131842023374e-06,
      "loss": 0.9302,
      "step": 4027
    },
    {
      "epoch": 0.8705424681218933,
      "grad_norm": 1.1961036920547485,
      "learning_rate": 1.7325090383465503e-06,
      "loss": 0.8646,
      "step": 4028
    },
    {
      "epoch": 0.8707585908796196,
      "grad_norm": 1.1001901626586914,
      "learning_rate": 1.7268138435191061e-06,
      "loss": 0.9163,
      "step": 4029
    },
    {
      "epoch": 0.870974713637346,
      "grad_norm": 1.0502665042877197,
      "learning_rate": 1.721127602510635e-06,
      "loss": 0.9426,
      "step": 4030
    },
    {
      "epoch": 0.8711908363950724,
      "grad_norm": 0.9333060383796692,
      "learning_rate": 1.7154503181073857e-06,
      "loss": 0.768,
      "step": 4031
    },
    {
      "epoch": 0.8714069591527988,
      "grad_norm": 0.9372854828834534,
      "learning_rate": 1.7097819930912129e-06,
      "loss": 0.9407,
      "step": 4032
    },
    {
      "epoch": 0.8716230819105252,
      "grad_norm": 1.0821665525436401,
      "learning_rate": 1.7041226302395797e-06,
      "loss": 0.9718,
      "step": 4033
    },
    {
      "epoch": 0.8718392046682516,
      "grad_norm": 1.0281955003738403,
      "learning_rate": 1.6984722323255654e-06,
      "loss": 0.9438,
      "step": 4034
    },
    {
      "epoch": 0.8720553274259779,
      "grad_norm": 1.062590479850769,
      "learning_rate": 1.6928308021178552e-06,
      "loss": 0.9395,
      "step": 4035
    },
    {
      "epoch": 0.8722714501837043,
      "grad_norm": 0.8797858953475952,
      "learning_rate": 1.687198342380727e-06,
      "loss": 0.7857,
      "step": 4036
    },
    {
      "epoch": 0.8724875729414308,
      "grad_norm": 1.0413355827331543,
      "learning_rate": 1.6815748558740752e-06,
      "loss": 0.9426,
      "step": 4037
    },
    {
      "epoch": 0.8727036956991571,
      "grad_norm": 0.9694939851760864,
      "learning_rate": 1.6759603453534024e-06,
      "loss": 0.7913,
      "step": 4038
    },
    {
      "epoch": 0.8729198184568835,
      "grad_norm": 0.9185709357261658,
      "learning_rate": 1.6703548135698012e-06,
      "loss": 1.0559,
      "step": 4039
    },
    {
      "epoch": 0.8731359412146099,
      "grad_norm": 1.0040276050567627,
      "learning_rate": 1.6647582632699676e-06,
      "loss": 0.9708,
      "step": 4040
    },
    {
      "epoch": 0.8733520639723363,
      "grad_norm": 1.1234097480773926,
      "learning_rate": 1.659170697196204e-06,
      "loss": 0.9381,
      "step": 4041
    },
    {
      "epoch": 0.8735681867300626,
      "grad_norm": 0.9272008538246155,
      "learning_rate": 1.6535921180863956e-06,
      "loss": 0.9944,
      "step": 4042
    },
    {
      "epoch": 0.8737843094877891,
      "grad_norm": 1.124240517616272,
      "learning_rate": 1.6480225286740404e-06,
      "loss": 1.0021,
      "step": 4043
    },
    {
      "epoch": 0.8740004322455155,
      "grad_norm": 0.9661961793899536,
      "learning_rate": 1.642461931688224e-06,
      "loss": 0.7872,
      "step": 4044
    },
    {
      "epoch": 0.8742165550032418,
      "grad_norm": 0.999000608921051,
      "learning_rate": 1.6369103298536227e-06,
      "loss": 0.7445,
      "step": 4045
    },
    {
      "epoch": 0.8744326777609682,
      "grad_norm": 0.9599443674087524,
      "learning_rate": 1.6313677258905114e-06,
      "loss": 0.8118,
      "step": 4046
    },
    {
      "epoch": 0.8746488005186946,
      "grad_norm": 1.1033684015274048,
      "learning_rate": 1.625834122514751e-06,
      "loss": 1.0608,
      "step": 4047
    },
    {
      "epoch": 0.874864923276421,
      "grad_norm": 1.0001951456069946,
      "learning_rate": 1.6203095224377974e-06,
      "loss": 0.8486,
      "step": 4048
    },
    {
      "epoch": 0.8750810460341474,
      "grad_norm": 0.9805539846420288,
      "learning_rate": 1.6147939283666892e-06,
      "loss": 0.7877,
      "step": 4049
    },
    {
      "epoch": 0.8752971687918738,
      "grad_norm": 0.9951595664024353,
      "learning_rate": 1.6092873430040557e-06,
      "loss": 1.0561,
      "step": 4050
    },
    {
      "epoch": 0.8755132915496002,
      "grad_norm": 0.8902543187141418,
      "learning_rate": 1.6037897690481075e-06,
      "loss": 0.8228,
      "step": 4051
    },
    {
      "epoch": 0.8757294143073265,
      "grad_norm": 1.0002317428588867,
      "learning_rate": 1.598301209192654e-06,
      "loss": 0.9239,
      "step": 4052
    },
    {
      "epoch": 0.875945537065053,
      "grad_norm": 0.9699692130088806,
      "learning_rate": 1.5928216661270669e-06,
      "loss": 1.1387,
      "step": 4053
    },
    {
      "epoch": 0.8761616598227794,
      "grad_norm": 1.1492412090301514,
      "learning_rate": 1.5873511425363108e-06,
      "loss": 0.8409,
      "step": 4054
    },
    {
      "epoch": 0.8763777825805057,
      "grad_norm": 1.0058752298355103,
      "learning_rate": 1.5818896411009266e-06,
      "loss": 0.8294,
      "step": 4055
    },
    {
      "epoch": 0.8765939053382321,
      "grad_norm": 1.0559006929397583,
      "learning_rate": 1.5764371644970468e-06,
      "loss": 0.7307,
      "step": 4056
    },
    {
      "epoch": 0.8768100280959585,
      "grad_norm": 0.8250787854194641,
      "learning_rate": 1.570993715396365e-06,
      "loss": 0.8297,
      "step": 4057
    },
    {
      "epoch": 0.8770261508536848,
      "grad_norm": 1.0399080514907837,
      "learning_rate": 1.5655592964661659e-06,
      "loss": 0.8245,
      "step": 4058
    },
    {
      "epoch": 0.8772422736114113,
      "grad_norm": 1.0151152610778809,
      "learning_rate": 1.5601339103692948e-06,
      "loss": 0.9051,
      "step": 4059
    },
    {
      "epoch": 0.8774583963691377,
      "grad_norm": 1.1445362567901611,
      "learning_rate": 1.5547175597641762e-06,
      "loss": 0.9454,
      "step": 4060
    },
    {
      "epoch": 0.877674519126864,
      "grad_norm": 1.040764570236206,
      "learning_rate": 1.5493102473048183e-06,
      "loss": 0.8549,
      "step": 4061
    },
    {
      "epoch": 0.8778906418845904,
      "grad_norm": 0.9080301523208618,
      "learning_rate": 1.543911975640786e-06,
      "loss": 0.996,
      "step": 4062
    },
    {
      "epoch": 0.8781067646423169,
      "grad_norm": 1.0131959915161133,
      "learning_rate": 1.5385227474172215e-06,
      "loss": 0.9449,
      "step": 4063
    },
    {
      "epoch": 0.8783228874000433,
      "grad_norm": 1.0617555379867554,
      "learning_rate": 1.5331425652748344e-06,
      "loss": 1.1335,
      "step": 4064
    },
    {
      "epoch": 0.8785390101577696,
      "grad_norm": 0.9740786552429199,
      "learning_rate": 1.5277714318499025e-06,
      "loss": 0.9065,
      "step": 4065
    },
    {
      "epoch": 0.878755132915496,
      "grad_norm": 1.1823405027389526,
      "learning_rate": 1.5224093497742654e-06,
      "loss": 1.1069,
      "step": 4066
    },
    {
      "epoch": 0.8789712556732224,
      "grad_norm": 0.9506311416625977,
      "learning_rate": 1.5170563216753342e-06,
      "loss": 0.8977,
      "step": 4067
    },
    {
      "epoch": 0.8791873784309487,
      "grad_norm": 0.9873430132865906,
      "learning_rate": 1.5117123501760778e-06,
      "loss": 0.9684,
      "step": 4068
    },
    {
      "epoch": 0.8794035011886752,
      "grad_norm": 1.0437781810760498,
      "learning_rate": 1.5063774378950325e-06,
      "loss": 1.042,
      "step": 4069
    },
    {
      "epoch": 0.8796196239464016,
      "grad_norm": 1.0704689025878906,
      "learning_rate": 1.5010515874462893e-06,
      "loss": 0.9161,
      "step": 4070
    },
    {
      "epoch": 0.8798357467041279,
      "grad_norm": 0.8986729979515076,
      "learning_rate": 1.4957348014395079e-06,
      "loss": 0.8105,
      "step": 4071
    },
    {
      "epoch": 0.8800518694618543,
      "grad_norm": 1.09950590133667,
      "learning_rate": 1.490427082479895e-06,
      "loss": 0.8366,
      "step": 4072
    },
    {
      "epoch": 0.8802679922195807,
      "grad_norm": 0.9348450899124146,
      "learning_rate": 1.4851284331682215e-06,
      "loss": 0.9173,
      "step": 4073
    },
    {
      "epoch": 0.8804841149773072,
      "grad_norm": 0.9867619276046753,
      "learning_rate": 1.4798388561008193e-06,
      "loss": 0.8978,
      "step": 4074
    },
    {
      "epoch": 0.8807002377350335,
      "grad_norm": 1.164351463317871,
      "learning_rate": 1.4745583538695685e-06,
      "loss": 1.2014,
      "step": 4075
    },
    {
      "epoch": 0.8809163604927599,
      "grad_norm": 0.9201247692108154,
      "learning_rate": 1.4692869290618971e-06,
      "loss": 0.9247,
      "step": 4076
    },
    {
      "epoch": 0.8811324832504863,
      "grad_norm": 0.9837038516998291,
      "learning_rate": 1.4640245842607925e-06,
      "loss": 0.8844,
      "step": 4077
    },
    {
      "epoch": 0.8813486060082126,
      "grad_norm": 0.9408811926841736,
      "learning_rate": 1.4587713220447897e-06,
      "loss": 0.8393,
      "step": 4078
    },
    {
      "epoch": 0.881564728765939,
      "grad_norm": 0.9224588871002197,
      "learning_rate": 1.4535271449879806e-06,
      "loss": 0.8589,
      "step": 4079
    },
    {
      "epoch": 0.8817808515236655,
      "grad_norm": 0.8685857653617859,
      "learning_rate": 1.4482920556599988e-06,
      "loss": 0.8432,
      "step": 4080
    },
    {
      "epoch": 0.8819969742813918,
      "grad_norm": 0.8095546960830688,
      "learning_rate": 1.4430660566260256e-06,
      "loss": 0.7959,
      "step": 4081
    },
    {
      "epoch": 0.8822130970391182,
      "grad_norm": 0.887802243232727,
      "learning_rate": 1.4378491504467817e-06,
      "loss": 0.8161,
      "step": 4082
    },
    {
      "epoch": 0.8824292197968446,
      "grad_norm": 0.8776811957359314,
      "learning_rate": 1.4326413396785488e-06,
      "loss": 0.788,
      "step": 4083
    },
    {
      "epoch": 0.8826453425545709,
      "grad_norm": 1.1636507511138916,
      "learning_rate": 1.4274426268731367e-06,
      "loss": 0.9023,
      "step": 4084
    },
    {
      "epoch": 0.8828614653122974,
      "grad_norm": 0.9223209023475647,
      "learning_rate": 1.4222530145779034e-06,
      "loss": 0.8426,
      "step": 4085
    },
    {
      "epoch": 0.8830775880700238,
      "grad_norm": 1.0244272947311401,
      "learning_rate": 1.417072505335748e-06,
      "loss": 0.9004,
      "step": 4086
    },
    {
      "epoch": 0.8832937108277502,
      "grad_norm": 0.8953300714492798,
      "learning_rate": 1.411901101685107e-06,
      "loss": 0.8038,
      "step": 4087
    },
    {
      "epoch": 0.8835098335854765,
      "grad_norm": 0.9177679419517517,
      "learning_rate": 1.4067388061599575e-06,
      "loss": 0.8871,
      "step": 4088
    },
    {
      "epoch": 0.883725956343203,
      "grad_norm": 1.039570927619934,
      "learning_rate": 1.4015856212898116e-06,
      "loss": 0.7814,
      "step": 4089
    },
    {
      "epoch": 0.8839420791009294,
      "grad_norm": 1.0864183902740479,
      "learning_rate": 1.3964415495997185e-06,
      "loss": 0.8857,
      "step": 4090
    },
    {
      "epoch": 0.8841582018586557,
      "grad_norm": 1.0120965242385864,
      "learning_rate": 1.3913065936102555e-06,
      "loss": 0.9873,
      "step": 4091
    },
    {
      "epoch": 0.8843743246163821,
      "grad_norm": 1.0110678672790527,
      "learning_rate": 1.386180755837554e-06,
      "loss": 0.9451,
      "step": 4092
    },
    {
      "epoch": 0.8845904473741085,
      "grad_norm": 1.2350870370864868,
      "learning_rate": 1.3810640387932472e-06,
      "loss": 1.1773,
      "step": 4093
    },
    {
      "epoch": 0.8848065701318348,
      "grad_norm": 0.9564366936683655,
      "learning_rate": 1.3759564449845208e-06,
      "loss": 0.926,
      "step": 4094
    },
    {
      "epoch": 0.8850226928895613,
      "grad_norm": 1.0063345432281494,
      "learning_rate": 1.370857976914086e-06,
      "loss": 1.0404,
      "step": 4095
    },
    {
      "epoch": 0.8852388156472877,
      "grad_norm": 1.1416981220245361,
      "learning_rate": 1.3657686370801737e-06,
      "loss": 1.0394,
      "step": 4096
    },
    {
      "epoch": 0.8854549384050141,
      "grad_norm": 0.9854344725608826,
      "learning_rate": 1.3606884279765553e-06,
      "loss": 0.8007,
      "step": 4097
    },
    {
      "epoch": 0.8856710611627404,
      "grad_norm": 1.0236746072769165,
      "learning_rate": 1.3556173520925242e-06,
      "loss": 0.9153,
      "step": 4098
    },
    {
      "epoch": 0.8858871839204668,
      "grad_norm": 1.0137629508972168,
      "learning_rate": 1.3505554119128861e-06,
      "loss": 0.9402,
      "step": 4099
    },
    {
      "epoch": 0.8861033066781933,
      "grad_norm": 0.9242549538612366,
      "learning_rate": 1.3455026099179835e-06,
      "loss": 1.0568,
      "step": 4100
    },
    {
      "epoch": 0.8863194294359196,
      "grad_norm": 0.9783289432525635,
      "learning_rate": 1.3404589485836805e-06,
      "loss": 0.8188,
      "step": 4101
    },
    {
      "epoch": 0.886535552193646,
      "grad_norm": 0.9931995272636414,
      "learning_rate": 1.3354244303813601e-06,
      "loss": 0.7379,
      "step": 4102
    },
    {
      "epoch": 0.8867516749513724,
      "grad_norm": 1.0986474752426147,
      "learning_rate": 1.3303990577779202e-06,
      "loss": 0.8815,
      "step": 4103
    },
    {
      "epoch": 0.8869677977090987,
      "grad_norm": 0.9438388347625732,
      "learning_rate": 1.3253828332357866e-06,
      "loss": 0.9092,
      "step": 4104
    },
    {
      "epoch": 0.8871839204668251,
      "grad_norm": 1.1600340604782104,
      "learning_rate": 1.320375759212893e-06,
      "loss": 1.1459,
      "step": 4105
    },
    {
      "epoch": 0.8874000432245516,
      "grad_norm": 1.036247968673706,
      "learning_rate": 1.3153778381626968e-06,
      "loss": 0.9705,
      "step": 4106
    },
    {
      "epoch": 0.8876161659822779,
      "grad_norm": 1.0610734224319458,
      "learning_rate": 1.3103890725341683e-06,
      "loss": 0.9458,
      "step": 4107
    },
    {
      "epoch": 0.8878322887400043,
      "grad_norm": 0.9418146014213562,
      "learning_rate": 1.3054094647717896e-06,
      "loss": 0.8307,
      "step": 4108
    },
    {
      "epoch": 0.8880484114977307,
      "grad_norm": 0.9183703064918518,
      "learning_rate": 1.300439017315558e-06,
      "loss": 1.0513,
      "step": 4109
    },
    {
      "epoch": 0.8882645342554571,
      "grad_norm": 0.9850232601165771,
      "learning_rate": 1.2954777326009805e-06,
      "loss": 0.7928,
      "step": 4110
    },
    {
      "epoch": 0.8884806570131835,
      "grad_norm": 1.0256174802780151,
      "learning_rate": 1.2905256130590749e-06,
      "loss": 1.0977,
      "step": 4111
    },
    {
      "epoch": 0.8886967797709099,
      "grad_norm": 1.2523407936096191,
      "learning_rate": 1.2855826611163691e-06,
      "loss": 1.1814,
      "step": 4112
    },
    {
      "epoch": 0.8889129025286363,
      "grad_norm": 1.0399409532546997,
      "learning_rate": 1.2806488791948945e-06,
      "loss": 0.9109,
      "step": 4113
    },
    {
      "epoch": 0.8891290252863626,
      "grad_norm": 0.8949527740478516,
      "learning_rate": 1.2757242697121997e-06,
      "loss": 0.9874,
      "step": 4114
    },
    {
      "epoch": 0.889345148044089,
      "grad_norm": 0.9688258767127991,
      "learning_rate": 1.2708088350813297e-06,
      "loss": 0.873,
      "step": 4115
    },
    {
      "epoch": 0.8895612708018155,
      "grad_norm": 0.8582277894020081,
      "learning_rate": 1.2659025777108336e-06,
      "loss": 0.9761,
      "step": 4116
    },
    {
      "epoch": 0.8897773935595418,
      "grad_norm": 1.06842839717865,
      "learning_rate": 1.2610055000047683e-06,
      "loss": 0.8791,
      "step": 4117
    },
    {
      "epoch": 0.8899935163172682,
      "grad_norm": 0.9435558319091797,
      "learning_rate": 1.2561176043626856e-06,
      "loss": 0.7926,
      "step": 4118
    },
    {
      "epoch": 0.8902096390749946,
      "grad_norm": 1.0161453485488892,
      "learning_rate": 1.2512388931796493e-06,
      "loss": 0.9486,
      "step": 4119
    },
    {
      "epoch": 0.8904257618327209,
      "grad_norm": 1.0344659090042114,
      "learning_rate": 1.2463693688462163e-06,
      "loss": 0.9384,
      "step": 4120
    },
    {
      "epoch": 0.8906418845904474,
      "grad_norm": 0.998457670211792,
      "learning_rate": 1.2415090337484425e-06,
      "loss": 1.0499,
      "step": 4121
    },
    {
      "epoch": 0.8908580073481738,
      "grad_norm": 0.9651675224304199,
      "learning_rate": 1.2366578902678717e-06,
      "loss": 1.0828,
      "step": 4122
    },
    {
      "epoch": 0.8910741301059002,
      "grad_norm": 0.9161550998687744,
      "learning_rate": 1.2318159407815645e-06,
      "loss": 0.8627,
      "step": 4123
    },
    {
      "epoch": 0.8912902528636265,
      "grad_norm": 0.9125650525093079,
      "learning_rate": 1.2269831876620608e-06,
      "loss": 0.9055,
      "step": 4124
    },
    {
      "epoch": 0.8915063756213529,
      "grad_norm": 0.7748423218727112,
      "learning_rate": 1.2221596332773977e-06,
      "loss": 0.8033,
      "step": 4125
    },
    {
      "epoch": 0.8917224983790794,
      "grad_norm": 0.8713288903236389,
      "learning_rate": 1.2173452799911046e-06,
      "loss": 0.981,
      "step": 4126
    },
    {
      "epoch": 0.8919386211368057,
      "grad_norm": 1.0444568395614624,
      "learning_rate": 1.2125401301622076e-06,
      "loss": 0.868,
      "step": 4127
    },
    {
      "epoch": 0.8921547438945321,
      "grad_norm": 0.8919979929924011,
      "learning_rate": 1.2077441861452144e-06,
      "loss": 0.933,
      "step": 4128
    },
    {
      "epoch": 0.8923708666522585,
      "grad_norm": 0.9409472346305847,
      "learning_rate": 1.2029574502901297e-06,
      "loss": 0.8335,
      "step": 4129
    },
    {
      "epoch": 0.8925869894099848,
      "grad_norm": 0.9649437069892883,
      "learning_rate": 1.198179924942442e-06,
      "loss": 0.8632,
      "step": 4130
    },
    {
      "epoch": 0.8928031121677112,
      "grad_norm": 1.0272592306137085,
      "learning_rate": 1.193411612443125e-06,
      "loss": 0.8637,
      "step": 4131
    },
    {
      "epoch": 0.8930192349254377,
      "grad_norm": 0.9774860143661499,
      "learning_rate": 1.1886525151286477e-06,
      "loss": 0.8743,
      "step": 4132
    },
    {
      "epoch": 0.8932353576831641,
      "grad_norm": 1.0608245134353638,
      "learning_rate": 1.1839026353309514e-06,
      "loss": 0.7975,
      "step": 4133
    },
    {
      "epoch": 0.8934514804408904,
      "grad_norm": 1.0434435606002808,
      "learning_rate": 1.1791619753774653e-06,
      "loss": 0.9528,
      "step": 4134
    },
    {
      "epoch": 0.8936676031986168,
      "grad_norm": 1.160058617591858,
      "learning_rate": 1.1744305375911048e-06,
      "loss": 0.9413,
      "step": 4135
    },
    {
      "epoch": 0.8938837259563432,
      "grad_norm": 1.0943877696990967,
      "learning_rate": 1.1697083242902597e-06,
      "loss": 0.968,
      "step": 4136
    },
    {
      "epoch": 0.8940998487140696,
      "grad_norm": 0.9067952632904053,
      "learning_rate": 1.1649953377888102e-06,
      "loss": 1.0125,
      "step": 4137
    },
    {
      "epoch": 0.894315971471796,
      "grad_norm": 0.8918052315711975,
      "learning_rate": 1.1602915803961068e-06,
      "loss": 0.9804,
      "step": 4138
    },
    {
      "epoch": 0.8945320942295224,
      "grad_norm": 0.900425374507904,
      "learning_rate": 1.1555970544169747e-06,
      "loss": 0.832,
      "step": 4139
    },
    {
      "epoch": 0.8947482169872487,
      "grad_norm": 0.986312747001648,
      "learning_rate": 1.1509117621517207e-06,
      "loss": 0.8684,
      "step": 4140
    },
    {
      "epoch": 0.8949643397449751,
      "grad_norm": 1.1736079454421997,
      "learning_rate": 1.1462357058961327e-06,
      "loss": 0.7799,
      "step": 4141
    },
    {
      "epoch": 0.8951804625027016,
      "grad_norm": 1.3107961416244507,
      "learning_rate": 1.1415688879414666e-06,
      "loss": 1.0719,
      "step": 4142
    },
    {
      "epoch": 0.8953965852604279,
      "grad_norm": 0.8984982967376709,
      "learning_rate": 1.1369113105744512e-06,
      "loss": 0.7859,
      "step": 4143
    },
    {
      "epoch": 0.8956127080181543,
      "grad_norm": 1.043787956237793,
      "learning_rate": 1.1322629760772874e-06,
      "loss": 1.031,
      "step": 4144
    },
    {
      "epoch": 0.8958288307758807,
      "grad_norm": 0.9398433566093445,
      "learning_rate": 1.1276238867276511e-06,
      "loss": 0.8668,
      "step": 4145
    },
    {
      "epoch": 0.8960449535336071,
      "grad_norm": 0.9927624464035034,
      "learning_rate": 1.122994044798682e-06,
      "loss": 0.8491,
      "step": 4146
    },
    {
      "epoch": 0.8962610762913334,
      "grad_norm": 0.9938647747039795,
      "learning_rate": 1.1183734525589939e-06,
      "loss": 0.8402,
      "step": 4147
    },
    {
      "epoch": 0.8964771990490599,
      "grad_norm": 0.8894004821777344,
      "learning_rate": 1.1137621122726671e-06,
      "loss": 0.7474,
      "step": 4148
    },
    {
      "epoch": 0.8966933218067863,
      "grad_norm": 1.0167287588119507,
      "learning_rate": 1.109160026199243e-06,
      "loss": 0.9,
      "step": 4149
    },
    {
      "epoch": 0.8969094445645126,
      "grad_norm": 1.0129833221435547,
      "learning_rate": 1.1045671965937421e-06,
      "loss": 0.7466,
      "step": 4150
    },
    {
      "epoch": 0.897125567322239,
      "grad_norm": 0.9298940896987915,
      "learning_rate": 1.099983625706631e-06,
      "loss": 0.8004,
      "step": 4151
    },
    {
      "epoch": 0.8973416900799654,
      "grad_norm": 1.0372710227966309,
      "learning_rate": 1.0954093157838552e-06,
      "loss": 0.9976,
      "step": 4152
    },
    {
      "epoch": 0.8975578128376918,
      "grad_norm": 0.9522298574447632,
      "learning_rate": 1.090844269066813e-06,
      "loss": 0.7235,
      "step": 4153
    },
    {
      "epoch": 0.8977739355954182,
      "grad_norm": 0.9699406027793884,
      "learning_rate": 1.086288487792364e-06,
      "loss": 0.7414,
      "step": 4154
    },
    {
      "epoch": 0.8979900583531446,
      "grad_norm": 1.175062894821167,
      "learning_rate": 1.081741974192838e-06,
      "loss": 0.8125,
      "step": 4155
    },
    {
      "epoch": 0.898206181110871,
      "grad_norm": 1.0469586849212646,
      "learning_rate": 1.0772047304960109e-06,
      "loss": 0.878,
      "step": 4156
    },
    {
      "epoch": 0.8984223038685973,
      "grad_norm": 0.9139113426208496,
      "learning_rate": 1.0726767589251219e-06,
      "loss": 0.8979,
      "step": 4157
    },
    {
      "epoch": 0.8986384266263238,
      "grad_norm": 0.9332925081253052,
      "learning_rate": 1.068158061698865e-06,
      "loss": 0.8307,
      "step": 4158
    },
    {
      "epoch": 0.8988545493840502,
      "grad_norm": 0.9882146120071411,
      "learning_rate": 1.0636486410313961e-06,
      "loss": 0.8748,
      "step": 4159
    },
    {
      "epoch": 0.8990706721417765,
      "grad_norm": 0.920577883720398,
      "learning_rate": 1.0591484991323186e-06,
      "loss": 0.8209,
      "step": 4160
    },
    {
      "epoch": 0.8992867948995029,
      "grad_norm": 1.1150248050689697,
      "learning_rate": 1.0546576382066952e-06,
      "loss": 0.9629,
      "step": 4161
    },
    {
      "epoch": 0.8995029176572293,
      "grad_norm": 0.9610633850097656,
      "learning_rate": 1.0501760604550281e-06,
      "loss": 0.7465,
      "step": 4162
    },
    {
      "epoch": 0.8997190404149557,
      "grad_norm": 1.076218843460083,
      "learning_rate": 1.0457037680732873e-06,
      "loss": 1.0762,
      "step": 4163
    },
    {
      "epoch": 0.8999351631726821,
      "grad_norm": 1.0198490619659424,
      "learning_rate": 1.0412407632528864e-06,
      "loss": 0.8838,
      "step": 4164
    },
    {
      "epoch": 0.9001512859304085,
      "grad_norm": 1.087411880493164,
      "learning_rate": 1.036787048180683e-06,
      "loss": 0.7398,
      "step": 4165
    },
    {
      "epoch": 0.9003674086881348,
      "grad_norm": 1.007017970085144,
      "learning_rate": 1.0323426250389912e-06,
      "loss": 0.8635,
      "step": 4166
    },
    {
      "epoch": 0.9005835314458612,
      "grad_norm": 1.0128799676895142,
      "learning_rate": 1.0279074960055646e-06,
      "loss": 1.0326,
      "step": 4167
    },
    {
      "epoch": 0.9007996542035877,
      "grad_norm": 0.8858432769775391,
      "learning_rate": 1.0234816632536094e-06,
      "loss": 0.8224,
      "step": 4168
    },
    {
      "epoch": 0.9010157769613141,
      "grad_norm": 1.1053907871246338,
      "learning_rate": 1.0190651289517705e-06,
      "loss": 0.9375,
      "step": 4169
    },
    {
      "epoch": 0.9012318997190404,
      "grad_norm": 1.0449488162994385,
      "learning_rate": 1.0146578952641394e-06,
      "loss": 0.935,
      "step": 4170
    },
    {
      "epoch": 0.9014480224767668,
      "grad_norm": 1.0494916439056396,
      "learning_rate": 1.0102599643502508e-06,
      "loss": 0.9382,
      "step": 4171
    },
    {
      "epoch": 0.9016641452344932,
      "grad_norm": 1.1499552726745605,
      "learning_rate": 1.0058713383650875e-06,
      "loss": 0.844,
      "step": 4172
    },
    {
      "epoch": 0.9018802679922195,
      "grad_norm": 1.020134449005127,
      "learning_rate": 1.0014920194590582e-06,
      "loss": 0.7838,
      "step": 4173
    },
    {
      "epoch": 0.902096390749946,
      "grad_norm": 1.2185606956481934,
      "learning_rate": 9.9712200977802e-07,
      "loss": 0.9149,
      "step": 4174
    },
    {
      "epoch": 0.9023125135076724,
      "grad_norm": 1.0377721786499023,
      "learning_rate": 9.927613114632684e-07,
      "loss": 0.9493,
      "step": 4175
    },
    {
      "epoch": 0.9025286362653987,
      "grad_norm": 1.054484486579895,
      "learning_rate": 9.884099266515345e-07,
      "loss": 0.821,
      "step": 4176
    },
    {
      "epoch": 0.9027447590231251,
      "grad_norm": 1.0430999994277954,
      "learning_rate": 9.840678574749886e-07,
      "loss": 0.9175,
      "step": 4177
    },
    {
      "epoch": 0.9029608817808515,
      "grad_norm": 0.9499197602272034,
      "learning_rate": 9.797351060612392e-07,
      "loss": 0.9419,
      "step": 4178
    },
    {
      "epoch": 0.903177004538578,
      "grad_norm": 0.9823135733604431,
      "learning_rate": 9.754116745333152e-07,
      "loss": 0.8397,
      "step": 4179
    },
    {
      "epoch": 0.9033931272963043,
      "grad_norm": 0.8455004692077637,
      "learning_rate": 9.710975650096889e-07,
      "loss": 0.7876,
      "step": 4180
    },
    {
      "epoch": 0.9036092500540307,
      "grad_norm": 0.9631431698799133,
      "learning_rate": 9.66792779604271e-07,
      "loss": 0.8957,
      "step": 4181
    },
    {
      "epoch": 0.9038253728117571,
      "grad_norm": 1.0619665384292603,
      "learning_rate": 9.62497320426392e-07,
      "loss": 1.0105,
      "step": 4182
    },
    {
      "epoch": 0.9040414955694834,
      "grad_norm": 1.1879591941833496,
      "learning_rate": 9.582111895808198e-07,
      "loss": 0.9524,
      "step": 4183
    },
    {
      "epoch": 0.9042576183272099,
      "grad_norm": 1.102536678314209,
      "learning_rate": 9.539343891677432e-07,
      "loss": 0.8738,
      "step": 4184
    },
    {
      "epoch": 0.9044737410849363,
      "grad_norm": 0.9416372179985046,
      "learning_rate": 9.496669212827903e-07,
      "loss": 1.0261,
      "step": 4185
    },
    {
      "epoch": 0.9046898638426626,
      "grad_norm": 1.0179506540298462,
      "learning_rate": 9.45408788017006e-07,
      "loss": 0.9296,
      "step": 4186
    },
    {
      "epoch": 0.904905986600389,
      "grad_norm": 0.9441346526145935,
      "learning_rate": 9.411599914568703e-07,
      "loss": 0.7562,
      "step": 4187
    },
    {
      "epoch": 0.9051221093581154,
      "grad_norm": 1.112977385520935,
      "learning_rate": 9.369205336842779e-07,
      "loss": 0.9034,
      "step": 4188
    },
    {
      "epoch": 0.9053382321158417,
      "grad_norm": 1.003321886062622,
      "learning_rate": 9.32690416776556e-07,
      "loss": 0.9544,
      "step": 4189
    },
    {
      "epoch": 0.9055543548735682,
      "grad_norm": 1.2458070516586304,
      "learning_rate": 9.284696428064577e-07,
      "loss": 0.9224,
      "step": 4190
    },
    {
      "epoch": 0.9057704776312946,
      "grad_norm": 1.2427130937576294,
      "learning_rate": 9.242582138421441e-07,
      "loss": 0.8655,
      "step": 4191
    },
    {
      "epoch": 0.905986600389021,
      "grad_norm": 0.9118661880493164,
      "learning_rate": 9.20056131947209e-07,
      "loss": 0.9911,
      "step": 4192
    },
    {
      "epoch": 0.9062027231467473,
      "grad_norm": 1.0034087896347046,
      "learning_rate": 9.158633991806631e-07,
      "loss": 0.8121,
      "step": 4193
    },
    {
      "epoch": 0.9064188459044737,
      "grad_norm": 0.9934155941009521,
      "learning_rate": 9.116800175969342e-07,
      "loss": 0.957,
      "step": 4194
    },
    {
      "epoch": 0.9066349686622002,
      "grad_norm": 1.0172679424285889,
      "learning_rate": 9.075059892458738e-07,
      "loss": 0.9426,
      "step": 4195
    },
    {
      "epoch": 0.9068510914199265,
      "grad_norm": 1.0507500171661377,
      "learning_rate": 9.033413161727411e-07,
      "loss": 0.8351,
      "step": 4196
    },
    {
      "epoch": 0.9070672141776529,
      "grad_norm": 0.9022180438041687,
      "learning_rate": 8.991860004182196e-07,
      "loss": 0.7201,
      "step": 4197
    },
    {
      "epoch": 0.9072833369353793,
      "grad_norm": 0.9793972373008728,
      "learning_rate": 8.950400440184004e-07,
      "loss": 0.8626,
      "step": 4198
    },
    {
      "epoch": 0.9074994596931056,
      "grad_norm": 0.952980637550354,
      "learning_rate": 8.909034490047964e-07,
      "loss": 0.7976,
      "step": 4199
    },
    {
      "epoch": 0.9077155824508321,
      "grad_norm": 0.993766188621521,
      "learning_rate": 8.867762174043304e-07,
      "loss": 0.9592,
      "step": 4200
    },
    {
      "epoch": 0.9079317052085585,
      "grad_norm": 0.9308351278305054,
      "learning_rate": 8.826583512393361e-07,
      "loss": 0.8082,
      "step": 4201
    },
    {
      "epoch": 0.9081478279662849,
      "grad_norm": 1.0772156715393066,
      "learning_rate": 8.785498525275505e-07,
      "loss": 0.9905,
      "step": 4202
    },
    {
      "epoch": 0.9083639507240112,
      "grad_norm": 1.0217748880386353,
      "learning_rate": 8.744507232821387e-07,
      "loss": 0.8301,
      "step": 4203
    },
    {
      "epoch": 0.9085800734817376,
      "grad_norm": 0.9618154764175415,
      "learning_rate": 8.703609655116608e-07,
      "loss": 0.8667,
      "step": 4204
    },
    {
      "epoch": 0.9087961962394641,
      "grad_norm": 0.9852338433265686,
      "learning_rate": 8.662805812200869e-07,
      "loss": 0.7885,
      "step": 4205
    },
    {
      "epoch": 0.9090123189971904,
      "grad_norm": 0.9149219989776611,
      "learning_rate": 8.62209572406798e-07,
      "loss": 0.7684,
      "step": 4206
    },
    {
      "epoch": 0.9092284417549168,
      "grad_norm": 1.0153368711471558,
      "learning_rate": 8.581479410665805e-07,
      "loss": 1.0269,
      "step": 4207
    },
    {
      "epoch": 0.9094445645126432,
      "grad_norm": 0.863723874092102,
      "learning_rate": 8.540956891896201e-07,
      "loss": 0.7367,
      "step": 4208
    },
    {
      "epoch": 0.9096606872703695,
      "grad_norm": 0.9872366189956665,
      "learning_rate": 8.500528187615131e-07,
      "loss": 0.7964,
      "step": 4209
    },
    {
      "epoch": 0.909876810028096,
      "grad_norm": 0.9526989459991455,
      "learning_rate": 8.46019331763257e-07,
      "loss": 0.9288,
      "step": 4210
    },
    {
      "epoch": 0.9100929327858224,
      "grad_norm": 0.9501885175704956,
      "learning_rate": 8.419952301712508e-07,
      "loss": 0.9276,
      "step": 4211
    },
    {
      "epoch": 0.9103090555435487,
      "grad_norm": 1.0456392765045166,
      "learning_rate": 8.379805159572951e-07,
      "loss": 0.9932,
      "step": 4212
    },
    {
      "epoch": 0.9105251783012751,
      "grad_norm": 1.0203437805175781,
      "learning_rate": 8.339751910885918e-07,
      "loss": 0.872,
      "step": 4213
    },
    {
      "epoch": 0.9107413010590015,
      "grad_norm": 0.8790463209152222,
      "learning_rate": 8.299792575277377e-07,
      "loss": 0.7509,
      "step": 4214
    },
    {
      "epoch": 0.910957423816728,
      "grad_norm": 0.8575922846794128,
      "learning_rate": 8.259927172327331e-07,
      "loss": 0.9013,
      "step": 4215
    },
    {
      "epoch": 0.9111735465744543,
      "grad_norm": 0.9011969566345215,
      "learning_rate": 8.220155721569689e-07,
      "loss": 0.8245,
      "step": 4216
    },
    {
      "epoch": 0.9113896693321807,
      "grad_norm": 1.3565723896026611,
      "learning_rate": 8.180478242492462e-07,
      "loss": 0.7886,
      "step": 4217
    },
    {
      "epoch": 0.9116057920899071,
      "grad_norm": 1.2235686779022217,
      "learning_rate": 8.140894754537476e-07,
      "loss": 0.9486,
      "step": 4218
    },
    {
      "epoch": 0.9118219148476334,
      "grad_norm": 1.0132081508636475,
      "learning_rate": 8.101405277100549e-07,
      "loss": 0.8002,
      "step": 4219
    },
    {
      "epoch": 0.9120380376053598,
      "grad_norm": 0.9781072735786438,
      "learning_rate": 8.062009829531381e-07,
      "loss": 0.9461,
      "step": 4220
    },
    {
      "epoch": 0.9122541603630863,
      "grad_norm": 1.0441522598266602,
      "learning_rate": 8.022708431133752e-07,
      "loss": 0.8362,
      "step": 4221
    },
    {
      "epoch": 0.9124702831208126,
      "grad_norm": 0.8778496980667114,
      "learning_rate": 7.983501101165169e-07,
      "loss": 0.9303,
      "step": 4222
    },
    {
      "epoch": 0.912686405878539,
      "grad_norm": 1.0359258651733398,
      "learning_rate": 7.944387858837199e-07,
      "loss": 1.0722,
      "step": 4223
    },
    {
      "epoch": 0.9129025286362654,
      "grad_norm": 0.9095891118049622,
      "learning_rate": 7.905368723315199e-07,
      "loss": 0.8379,
      "step": 4224
    },
    {
      "epoch": 0.9131186513939918,
      "grad_norm": 1.0899566411972046,
      "learning_rate": 7.866443713718453e-07,
      "loss": 0.977,
      "step": 4225
    },
    {
      "epoch": 0.9133347741517182,
      "grad_norm": 1.255446434020996,
      "learning_rate": 7.827612849120148e-07,
      "loss": 0.8697,
      "step": 4226
    },
    {
      "epoch": 0.9135508969094446,
      "grad_norm": 1.0579952001571655,
      "learning_rate": 7.788876148547308e-07,
      "loss": 0.9245,
      "step": 4227
    },
    {
      "epoch": 0.913767019667171,
      "grad_norm": 1.039961576461792,
      "learning_rate": 7.750233630980841e-07,
      "loss": 0.716,
      "step": 4228
    },
    {
      "epoch": 0.9139831424248973,
      "grad_norm": 0.9159168601036072,
      "learning_rate": 7.711685315355444e-07,
      "loss": 0.8762,
      "step": 4229
    },
    {
      "epoch": 0.9141992651826237,
      "grad_norm": 1.0014218091964722,
      "learning_rate": 7.673231220559785e-07,
      "loss": 0.9218,
      "step": 4230
    },
    {
      "epoch": 0.9144153879403502,
      "grad_norm": 1.0748810768127441,
      "learning_rate": 7.634871365436192e-07,
      "loss": 0.9407,
      "step": 4231
    },
    {
      "epoch": 0.9146315106980765,
      "grad_norm": 1.0846792459487915,
      "learning_rate": 7.596605768780962e-07,
      "loss": 0.8723,
      "step": 4232
    },
    {
      "epoch": 0.9148476334558029,
      "grad_norm": 0.8718065023422241,
      "learning_rate": 7.558434449344143e-07,
      "loss": 0.753,
      "step": 4233
    },
    {
      "epoch": 0.9150637562135293,
      "grad_norm": 0.9065682291984558,
      "learning_rate": 7.520357425829528e-07,
      "loss": 0.8227,
      "step": 4234
    },
    {
      "epoch": 0.9152798789712556,
      "grad_norm": 1.1097488403320312,
      "learning_rate": 7.482374716894902e-07,
      "loss": 0.9782,
      "step": 4235
    },
    {
      "epoch": 0.915496001728982,
      "grad_norm": 1.0259422063827515,
      "learning_rate": 7.444486341151602e-07,
      "loss": 1.0459,
      "step": 4236
    },
    {
      "epoch": 0.9157121244867085,
      "grad_norm": 0.9476332664489746,
      "learning_rate": 7.406692317164865e-07,
      "loss": 0.9825,
      "step": 4237
    },
    {
      "epoch": 0.9159282472444349,
      "grad_norm": 0.9656356573104858,
      "learning_rate": 7.368992663453656e-07,
      "loss": 0.9509,
      "step": 4238
    },
    {
      "epoch": 0.9161443700021612,
      "grad_norm": 0.938168466091156,
      "learning_rate": 7.331387398490753e-07,
      "loss": 0.9848,
      "step": 4239
    },
    {
      "epoch": 0.9163604927598876,
      "grad_norm": 1.0702391862869263,
      "learning_rate": 7.29387654070266e-07,
      "loss": 0.8139,
      "step": 4240
    },
    {
      "epoch": 0.916576615517614,
      "grad_norm": 0.9183998703956604,
      "learning_rate": 7.25646010846961e-07,
      "loss": 0.885,
      "step": 4241
    },
    {
      "epoch": 0.9167927382753404,
      "grad_norm": 0.9607285261154175,
      "learning_rate": 7.21913812012549e-07,
      "loss": 0.9994,
      "step": 4242
    },
    {
      "epoch": 0.9170088610330668,
      "grad_norm": 0.8861908316612244,
      "learning_rate": 7.181910593958075e-07,
      "loss": 0.8015,
      "step": 4243
    },
    {
      "epoch": 0.9172249837907932,
      "grad_norm": 0.9602293372154236,
      "learning_rate": 7.144777548208748e-07,
      "loss": 0.943,
      "step": 4244
    },
    {
      "epoch": 0.9174411065485195,
      "grad_norm": 1.086655616760254,
      "learning_rate": 7.107739001072578e-07,
      "loss": 0.8883,
      "step": 4245
    },
    {
      "epoch": 0.9176572293062459,
      "grad_norm": 1.0744946002960205,
      "learning_rate": 7.070794970698425e-07,
      "loss": 1.0598,
      "step": 4246
    },
    {
      "epoch": 0.9178733520639724,
      "grad_norm": 0.9552499055862427,
      "learning_rate": 7.033945475188741e-07,
      "loss": 0.9892,
      "step": 4247
    },
    {
      "epoch": 0.9180894748216987,
      "grad_norm": 1.0425517559051514,
      "learning_rate": 6.997190532599685e-07,
      "loss": 0.8853,
      "step": 4248
    },
    {
      "epoch": 0.9183055975794251,
      "grad_norm": 1.0558618307113647,
      "learning_rate": 6.960530160941136e-07,
      "loss": 0.9576,
      "step": 4249
    },
    {
      "epoch": 0.9185217203371515,
      "grad_norm": 0.9843370318412781,
      "learning_rate": 6.923964378176551e-07,
      "loss": 1.045,
      "step": 4250
    },
    {
      "epoch": 0.9187378430948779,
      "grad_norm": 1.0195176601409912,
      "learning_rate": 6.887493202223083e-07,
      "loss": 0.8838,
      "step": 4251
    },
    {
      "epoch": 0.9189539658526042,
      "grad_norm": 1.0274927616119385,
      "learning_rate": 6.851116650951528e-07,
      "loss": 0.858,
      "step": 4252
    },
    {
      "epoch": 0.9191700886103307,
      "grad_norm": 0.9396346211433411,
      "learning_rate": 6.814834742186361e-07,
      "loss": 0.8896,
      "step": 4253
    },
    {
      "epoch": 0.9193862113680571,
      "grad_norm": 0.944214403629303,
      "learning_rate": 6.778647493705559e-07,
      "loss": 0.7939,
      "step": 4254
    },
    {
      "epoch": 0.9196023341257834,
      "grad_norm": 1.110487461090088,
      "learning_rate": 6.742554923240829e-07,
      "loss": 1.0676,
      "step": 4255
    },
    {
      "epoch": 0.9198184568835098,
      "grad_norm": 1.0984482765197754,
      "learning_rate": 6.706557048477425e-07,
      "loss": 0.9112,
      "step": 4256
    },
    {
      "epoch": 0.9200345796412362,
      "grad_norm": 0.9708102941513062,
      "learning_rate": 6.670653887054235e-07,
      "loss": 0.8405,
      "step": 4257
    },
    {
      "epoch": 0.9202507023989626,
      "grad_norm": 1.0560801029205322,
      "learning_rate": 6.634845456563766e-07,
      "loss": 1.0735,
      "step": 4258
    },
    {
      "epoch": 0.920466825156689,
      "grad_norm": 1.0283024311065674,
      "learning_rate": 6.599131774552003e-07,
      "loss": 0.7695,
      "step": 4259
    },
    {
      "epoch": 0.9206829479144154,
      "grad_norm": 1.0239213705062866,
      "learning_rate": 6.563512858518573e-07,
      "loss": 0.9256,
      "step": 4260
    },
    {
      "epoch": 0.9208990706721418,
      "grad_norm": 1.0854101181030273,
      "learning_rate": 6.527988725916712e-07,
      "loss": 0.9957,
      "step": 4261
    },
    {
      "epoch": 0.9211151934298681,
      "grad_norm": 0.8721448183059692,
      "learning_rate": 6.492559394153119e-07,
      "loss": 0.6199,
      "step": 4262
    },
    {
      "epoch": 0.9213313161875946,
      "grad_norm": 0.9405100345611572,
      "learning_rate": 6.457224880588108e-07,
      "loss": 0.7663,
      "step": 4263
    },
    {
      "epoch": 0.921547438945321,
      "grad_norm": 1.0795950889587402,
      "learning_rate": 6.421985202535497e-07,
      "loss": 1.0405,
      "step": 4264
    },
    {
      "epoch": 0.9217635617030473,
      "grad_norm": 0.8960533142089844,
      "learning_rate": 6.386840377262626e-07,
      "loss": 0.8689,
      "step": 4265
    },
    {
      "epoch": 0.9219796844607737,
      "grad_norm": 1.0131511688232422,
      "learning_rate": 6.351790421990434e-07,
      "loss": 0.8257,
      "step": 4266
    },
    {
      "epoch": 0.9221958072185001,
      "grad_norm": 0.9130016565322876,
      "learning_rate": 6.31683535389327e-07,
      "loss": 0.9669,
      "step": 4267
    },
    {
      "epoch": 0.9224119299762265,
      "grad_norm": 1.104579210281372,
      "learning_rate": 6.281975190099055e-07,
      "loss": 1.077,
      "step": 4268
    },
    {
      "epoch": 0.9226280527339529,
      "grad_norm": 0.9432905912399292,
      "learning_rate": 6.247209947689192e-07,
      "loss": 0.9165,
      "step": 4269
    },
    {
      "epoch": 0.9228441754916793,
      "grad_norm": 1.0555541515350342,
      "learning_rate": 6.212539643698546e-07,
      "loss": 1.0482,
      "step": 4270
    },
    {
      "epoch": 0.9230602982494056,
      "grad_norm": 0.9618312120437622,
      "learning_rate": 6.177964295115502e-07,
      "loss": 0.856,
      "step": 4271
    },
    {
      "epoch": 0.923276421007132,
      "grad_norm": 0.9917889833450317,
      "learning_rate": 6.14348391888191e-07,
      "loss": 0.8548,
      "step": 4272
    },
    {
      "epoch": 0.9234925437648585,
      "grad_norm": 0.9678471088409424,
      "learning_rate": 6.109098531893076e-07,
      "loss": 0.8315,
      "step": 4273
    },
    {
      "epoch": 0.9237086665225849,
      "grad_norm": 1.0311787128448486,
      "learning_rate": 6.074808150997724e-07,
      "loss": 0.9404,
      "step": 4274
    },
    {
      "epoch": 0.9239247892803112,
      "grad_norm": 0.880062460899353,
      "learning_rate": 6.040612792998124e-07,
      "loss": 0.7252,
      "step": 4275
    },
    {
      "epoch": 0.9241409120380376,
      "grad_norm": 0.978182315826416,
      "learning_rate": 6.006512474649873e-07,
      "loss": 0.7556,
      "step": 4276
    },
    {
      "epoch": 0.924357034795764,
      "grad_norm": 1.063117504119873,
      "learning_rate": 5.972507212662048e-07,
      "loss": 1.0127,
      "step": 4277
    },
    {
      "epoch": 0.9245731575534903,
      "grad_norm": 1.0114563703536987,
      "learning_rate": 5.938597023697146e-07,
      "loss": 0.8317,
      "step": 4278
    },
    {
      "epoch": 0.9247892803112168,
      "grad_norm": 0.9993071556091309,
      "learning_rate": 5.904781924371117e-07,
      "loss": 0.9688,
      "step": 4279
    },
    {
      "epoch": 0.9250054030689432,
      "grad_norm": 0.8388939499855042,
      "learning_rate": 5.871061931253263e-07,
      "loss": 0.885,
      "step": 4280
    },
    {
      "epoch": 0.9252215258266695,
      "grad_norm": 1.0512640476226807,
      "learning_rate": 5.837437060866325e-07,
      "loss": 0.9746,
      "step": 4281
    },
    {
      "epoch": 0.9254376485843959,
      "grad_norm": 1.0189282894134521,
      "learning_rate": 5.803907329686342e-07,
      "loss": 0.837,
      "step": 4282
    },
    {
      "epoch": 0.9256537713421223,
      "grad_norm": 0.9870699048042297,
      "learning_rate": 5.770472754142886e-07,
      "loss": 0.9099,
      "step": 4283
    },
    {
      "epoch": 0.9258698940998488,
      "grad_norm": 1.0658068656921387,
      "learning_rate": 5.737133350618762e-07,
      "loss": 0.9256,
      "step": 4284
    },
    {
      "epoch": 0.9260860168575751,
      "grad_norm": 0.9333972334861755,
      "learning_rate": 5.703889135450258e-07,
      "loss": 0.7999,
      "step": 4285
    },
    {
      "epoch": 0.9263021396153015,
      "grad_norm": 1.1418362855911255,
      "learning_rate": 5.670740124926898e-07,
      "loss": 0.7904,
      "step": 4286
    },
    {
      "epoch": 0.9265182623730279,
      "grad_norm": 1.0817952156066895,
      "learning_rate": 5.63768633529167e-07,
      "loss": 1.0475,
      "step": 4287
    },
    {
      "epoch": 0.9267343851307542,
      "grad_norm": 0.9698407053947449,
      "learning_rate": 5.604727782740838e-07,
      "loss": 0.7099,
      "step": 4288
    },
    {
      "epoch": 0.9269505078884807,
      "grad_norm": 0.8985784649848938,
      "learning_rate": 5.571864483423994e-07,
      "loss": 0.8405,
      "step": 4289
    },
    {
      "epoch": 0.9271666306462071,
      "grad_norm": 1.0659029483795166,
      "learning_rate": 5.539096453444126e-07,
      "loss": 0.9841,
      "step": 4290
    },
    {
      "epoch": 0.9273827534039334,
      "grad_norm": 1.0063865184783936,
      "learning_rate": 5.506423708857456e-07,
      "loss": 1.1218,
      "step": 4291
    },
    {
      "epoch": 0.9275988761616598,
      "grad_norm": 1.0510445833206177,
      "learning_rate": 5.473846265673532e-07,
      "loss": 0.9045,
      "step": 4292
    },
    {
      "epoch": 0.9278149989193862,
      "grad_norm": 1.0389801263809204,
      "learning_rate": 5.441364139855321e-07,
      "loss": 0.6456,
      "step": 4293
    },
    {
      "epoch": 0.9280311216771125,
      "grad_norm": 0.9817002415657043,
      "learning_rate": 5.408977347318889e-07,
      "loss": 0.9881,
      "step": 4294
    },
    {
      "epoch": 0.928247244434839,
      "grad_norm": 0.9527703523635864,
      "learning_rate": 5.376685903933743e-07,
      "loss": 0.849,
      "step": 4295
    },
    {
      "epoch": 0.9284633671925654,
      "grad_norm": 1.1319552659988403,
      "learning_rate": 5.344489825522581e-07,
      "loss": 0.9348,
      "step": 4296
    },
    {
      "epoch": 0.9286794899502918,
      "grad_norm": 1.188833475112915,
      "learning_rate": 5.312389127861428e-07,
      "loss": 0.9795,
      "step": 4297
    },
    {
      "epoch": 0.9288956127080181,
      "grad_norm": 1.1184444427490234,
      "learning_rate": 5.280383826679591e-07,
      "loss": 0.9105,
      "step": 4298
    },
    {
      "epoch": 0.9291117354657445,
      "grad_norm": 1.1532635688781738,
      "learning_rate": 5.248473937659504e-07,
      "loss": 0.7492,
      "step": 4299
    },
    {
      "epoch": 0.929327858223471,
      "grad_norm": 0.9561310410499573,
      "learning_rate": 5.216659476436991e-07,
      "loss": 0.9266,
      "step": 4300
    },
    {
      "epoch": 0.9295439809811973,
      "grad_norm": 1.0445328950881958,
      "learning_rate": 5.184940458601073e-07,
      "loss": 0.8585,
      "step": 4301
    },
    {
      "epoch": 0.9297601037389237,
      "grad_norm": 0.8858436346054077,
      "learning_rate": 5.153316899693983e-07,
      "loss": 0.7677,
      "step": 4302
    },
    {
      "epoch": 0.9299762264966501,
      "grad_norm": 0.9820959568023682,
      "learning_rate": 5.121788815211193e-07,
      "loss": 1.0154,
      "step": 4303
    },
    {
      "epoch": 0.9301923492543764,
      "grad_norm": 0.9774004817008972,
      "learning_rate": 5.090356220601389e-07,
      "loss": 0.898,
      "step": 4304
    },
    {
      "epoch": 0.9304084720121029,
      "grad_norm": 0.9356990456581116,
      "learning_rate": 5.059019131266474e-07,
      "loss": 0.7913,
      "step": 4305
    },
    {
      "epoch": 0.9306245947698293,
      "grad_norm": 0.8759821653366089,
      "learning_rate": 5.027777562561542e-07,
      "loss": 0.8859,
      "step": 4306
    },
    {
      "epoch": 0.9308407175275557,
      "grad_norm": 1.1266238689422607,
      "learning_rate": 4.996631529794882e-07,
      "loss": 0.9109,
      "step": 4307
    },
    {
      "epoch": 0.931056840285282,
      "grad_norm": 0.9832292795181274,
      "learning_rate": 4.965581048227997e-07,
      "loss": 0.8228,
      "step": 4308
    },
    {
      "epoch": 0.9312729630430084,
      "grad_norm": 0.9165593385696411,
      "learning_rate": 4.93462613307556e-07,
      "loss": 0.7686,
      "step": 4309
    },
    {
      "epoch": 0.9314890858007349,
      "grad_norm": 1.1463444232940674,
      "learning_rate": 4.903766799505372e-07,
      "loss": 0.9074,
      "step": 4310
    },
    {
      "epoch": 0.9317052085584612,
      "grad_norm": 1.0148438215255737,
      "learning_rate": 4.873003062638471e-07,
      "loss": 1.0103,
      "step": 4311
    },
    {
      "epoch": 0.9319213313161876,
      "grad_norm": 0.9984873533248901,
      "learning_rate": 4.842334937548976e-07,
      "loss": 0.8455,
      "step": 4312
    },
    {
      "epoch": 0.932137454073914,
      "grad_norm": 1.06214439868927,
      "learning_rate": 4.811762439264244e-07,
      "loss": 0.8178,
      "step": 4313
    },
    {
      "epoch": 0.9323535768316403,
      "grad_norm": 1.0038710832595825,
      "learning_rate": 4.781285582764694e-07,
      "loss": 1.0596,
      "step": 4314
    },
    {
      "epoch": 0.9325696995893668,
      "grad_norm": 0.8882947564125061,
      "learning_rate": 4.750904382983934e-07,
      "loss": 0.8929,
      "step": 4315
    },
    {
      "epoch": 0.9327858223470932,
      "grad_norm": 0.9856219291687012,
      "learning_rate": 4.720618854808678e-07,
      "loss": 0.8802,
      "step": 4316
    },
    {
      "epoch": 0.9330019451048195,
      "grad_norm": 1.1882649660110474,
      "learning_rate": 4.690429013078768e-07,
      "loss": 1.1672,
      "step": 4317
    },
    {
      "epoch": 0.9332180678625459,
      "grad_norm": 0.9748342633247375,
      "learning_rate": 4.6603348725871244e-07,
      "loss": 1.0518,
      "step": 4318
    },
    {
      "epoch": 0.9334341906202723,
      "grad_norm": 0.9472171664237976,
      "learning_rate": 4.630336448079864e-07,
      "loss": 0.8179,
      "step": 4319
    },
    {
      "epoch": 0.9336503133779988,
      "grad_norm": 0.896943986415863,
      "learning_rate": 4.600433754256095e-07,
      "loss": 0.7086,
      "step": 4320
    },
    {
      "epoch": 0.9338664361357251,
      "grad_norm": 1.007301688194275,
      "learning_rate": 4.570626805768119e-07,
      "loss": 1.0587,
      "step": 4321
    },
    {
      "epoch": 0.9340825588934515,
      "grad_norm": 0.9737399816513062,
      "learning_rate": 4.540915617221187e-07,
      "loss": 0.6853,
      "step": 4322
    },
    {
      "epoch": 0.9342986816511779,
      "grad_norm": 1.0090069770812988,
      "learning_rate": 4.511300203173807e-07,
      "loss": 0.948,
      "step": 4323
    },
    {
      "epoch": 0.9345148044089042,
      "grad_norm": 0.9337702393531799,
      "learning_rate": 4.4817805781374177e-07,
      "loss": 0.808,
      "step": 4324
    },
    {
      "epoch": 0.9347309271666306,
      "grad_norm": 0.9156675934791565,
      "learning_rate": 4.4523567565765593e-07,
      "loss": 0.76,
      "step": 4325
    },
    {
      "epoch": 0.9349470499243571,
      "grad_norm": 1.023437261581421,
      "learning_rate": 4.4230287529088534e-07,
      "loss": 0.9403,
      "step": 4326
    },
    {
      "epoch": 0.9351631726820834,
      "grad_norm": 1.1438939571380615,
      "learning_rate": 4.393796581504961e-07,
      "loss": 0.8609,
      "step": 4327
    },
    {
      "epoch": 0.9353792954398098,
      "grad_norm": 0.9754688739776611,
      "learning_rate": 4.364660256688558e-07,
      "loss": 0.8773,
      "step": 4328
    },
    {
      "epoch": 0.9355954181975362,
      "grad_norm": 0.9838240146636963,
      "learning_rate": 4.3356197927363786e-07,
      "loss": 1.0625,
      "step": 4329
    },
    {
      "epoch": 0.9358115409552626,
      "grad_norm": 1.1422899961471558,
      "learning_rate": 4.306675203878219e-07,
      "loss": 0.8681,
      "step": 4330
    },
    {
      "epoch": 0.936027663712989,
      "grad_norm": 0.9374284744262695,
      "learning_rate": 4.2778265042968003e-07,
      "loss": 0.8327,
      "step": 4331
    },
    {
      "epoch": 0.9362437864707154,
      "grad_norm": 1.1640714406967163,
      "learning_rate": 4.2490737081279487e-07,
      "loss": 1.0628,
      "step": 4332
    },
    {
      "epoch": 0.9364599092284418,
      "grad_norm": 1.0389443635940552,
      "learning_rate": 4.220416829460505e-07,
      "loss": 1.0365,
      "step": 4333
    },
    {
      "epoch": 0.9366760319861681,
      "grad_norm": 0.9568149447441101,
      "learning_rate": 4.1918558823362155e-07,
      "loss": 0.8681,
      "step": 4334
    },
    {
      "epoch": 0.9368921547438945,
      "grad_norm": 0.9431428909301758,
      "learning_rate": 4.1633908807498847e-07,
      "loss": 0.9613,
      "step": 4335
    },
    {
      "epoch": 0.937108277501621,
      "grad_norm": 1.0188632011413574,
      "learning_rate": 4.1350218386493115e-07,
      "loss": 0.8431,
      "step": 4336
    },
    {
      "epoch": 0.9373244002593473,
      "grad_norm": 0.9922558665275574,
      "learning_rate": 4.106748769935287e-07,
      "loss": 0.8872,
      "step": 4337
    },
    {
      "epoch": 0.9375405230170737,
      "grad_norm": 0.856784999370575,
      "learning_rate": 4.07857168846153e-07,
      "loss": 0.9267,
      "step": 4338
    },
    {
      "epoch": 0.9377566457748001,
      "grad_norm": 0.8459962606430054,
      "learning_rate": 4.050490608034729e-07,
      "loss": 0.7388,
      "step": 4339
    },
    {
      "epoch": 0.9379727685325264,
      "grad_norm": 1.016809344291687,
      "learning_rate": 4.022505542414545e-07,
      "loss": 1.0432,
      "step": 4340
    },
    {
      "epoch": 0.9381888912902528,
      "grad_norm": 0.947068989276886,
      "learning_rate": 3.994616505313631e-07,
      "loss": 0.7964,
      "step": 4341
    },
    {
      "epoch": 0.9384050140479793,
      "grad_norm": 0.9337932467460632,
      "learning_rate": 3.966823510397522e-07,
      "loss": 0.8459,
      "step": 4342
    },
    {
      "epoch": 0.9386211368057057,
      "grad_norm": 1.0331088304519653,
      "learning_rate": 3.9391265712847236e-07,
      "loss": 0.8917,
      "step": 4343
    },
    {
      "epoch": 0.938837259563432,
      "grad_norm": 1.0333054065704346,
      "learning_rate": 3.9115257015466923e-07,
      "loss": 0.719,
      "step": 4344
    },
    {
      "epoch": 0.9390533823211584,
      "grad_norm": 1.0131577253341675,
      "learning_rate": 3.8840209147077866e-07,
      "loss": 0.7622,
      "step": 4345
    },
    {
      "epoch": 0.9392695050788848,
      "grad_norm": 0.9322049021720886,
      "learning_rate": 3.856612224245249e-07,
      "loss": 0.7806,
      "step": 4346
    },
    {
      "epoch": 0.9394856278366112,
      "grad_norm": 0.9983024001121521,
      "learning_rate": 3.8292996435893125e-07,
      "loss": 0.8483,
      "step": 4347
    },
    {
      "epoch": 0.9397017505943376,
      "grad_norm": 0.9335387945175171,
      "learning_rate": 3.8020831861230733e-07,
      "loss": 0.7184,
      "step": 4348
    },
    {
      "epoch": 0.939917873352064,
      "grad_norm": 1.0600119829177856,
      "learning_rate": 3.77496286518253e-07,
      "loss": 1.0294,
      "step": 4349
    },
    {
      "epoch": 0.9401339961097903,
      "grad_norm": 0.9346319437026978,
      "learning_rate": 3.747938694056585e-07,
      "loss": 0.9171,
      "step": 4350
    },
    {
      "epoch": 0.9403501188675167,
      "grad_norm": 1.1037440299987793,
      "learning_rate": 3.721010685987003e-07,
      "loss": 0.925,
      "step": 4351
    },
    {
      "epoch": 0.9405662416252432,
      "grad_norm": 0.9268629550933838,
      "learning_rate": 3.6941788541684507e-07,
      "loss": 0.8545,
      "step": 4352
    },
    {
      "epoch": 0.9407823643829695,
      "grad_norm": 1.0709277391433716,
      "learning_rate": 3.667443211748456e-07,
      "loss": 0.9047,
      "step": 4353
    },
    {
      "epoch": 0.9409984871406959,
      "grad_norm": 0.9865515232086182,
      "learning_rate": 3.640803771827428e-07,
      "loss": 0.9506,
      "step": 4354
    },
    {
      "epoch": 0.9412146098984223,
      "grad_norm": 1.1726608276367188,
      "learning_rate": 3.614260547458659e-07,
      "loss": 0.9874,
      "step": 4355
    },
    {
      "epoch": 0.9414307326561487,
      "grad_norm": 0.9988002777099609,
      "learning_rate": 3.587813551648256e-07,
      "loss": 1.0136,
      "step": 4356
    },
    {
      "epoch": 0.941646855413875,
      "grad_norm": 0.9838830232620239,
      "learning_rate": 3.561462797355142e-07,
      "loss": 0.9081,
      "step": 4357
    },
    {
      "epoch": 0.9418629781716015,
      "grad_norm": 0.8589175343513489,
      "learning_rate": 3.535208297491144e-07,
      "loss": 0.6842,
      "step": 4358
    },
    {
      "epoch": 0.9420791009293279,
      "grad_norm": 1.0426251888275146,
      "learning_rate": 3.509050064920949e-07,
      "loss": 1.096,
      "step": 4359
    },
    {
      "epoch": 0.9422952236870542,
      "grad_norm": 0.873805820941925,
      "learning_rate": 3.4829881124619933e-07,
      "loss": 0.8171,
      "step": 4360
    },
    {
      "epoch": 0.9425113464447806,
      "grad_norm": 0.9295645356178284,
      "learning_rate": 3.4570224528845953e-07,
      "loss": 0.891,
      "step": 4361
    },
    {
      "epoch": 0.942727469202507,
      "grad_norm": 0.9706746935844421,
      "learning_rate": 3.4311530989118215e-07,
      "loss": 0.7913,
      "step": 4362
    },
    {
      "epoch": 0.9429435919602334,
      "grad_norm": 0.9419608116149902,
      "learning_rate": 3.4053800632196434e-07,
      "loss": 0.8183,
      "step": 4363
    },
    {
      "epoch": 0.9431597147179598,
      "grad_norm": 0.983264684677124,
      "learning_rate": 3.379703358436781e-07,
      "loss": 0.9577,
      "step": 4364
    },
    {
      "epoch": 0.9433758374756862,
      "grad_norm": 0.9642937183380127,
      "learning_rate": 3.3541229971447487e-07,
      "loss": 0.8284,
      "step": 4365
    },
    {
      "epoch": 0.9435919602334126,
      "grad_norm": 0.954237163066864,
      "learning_rate": 3.328638991877853e-07,
      "loss": 1.0989,
      "step": 4366
    },
    {
      "epoch": 0.9438080829911389,
      "grad_norm": 1.041129469871521,
      "learning_rate": 3.303251355123238e-07,
      "loss": 0.7665,
      "step": 4367
    },
    {
      "epoch": 0.9440242057488654,
      "grad_norm": 0.9410345554351807,
      "learning_rate": 3.277960099320732e-07,
      "loss": 0.831,
      "step": 4368
    },
    {
      "epoch": 0.9442403285065918,
      "grad_norm": 1.1055059432983398,
      "learning_rate": 3.2527652368630426e-07,
      "loss": 0.8237,
      "step": 4369
    },
    {
      "epoch": 0.9444564512643181,
      "grad_norm": 0.9659721851348877,
      "learning_rate": 3.227666780095584e-07,
      "loss": 0.9486,
      "step": 4370
    },
    {
      "epoch": 0.9446725740220445,
      "grad_norm": 1.0501004457473755,
      "learning_rate": 3.202664741316519e-07,
      "loss": 0.922,
      "step": 4371
    },
    {
      "epoch": 0.9448886967797709,
      "grad_norm": 1.0579583644866943,
      "learning_rate": 3.177759132776781e-07,
      "loss": 0.9856,
      "step": 4372
    },
    {
      "epoch": 0.9451048195374973,
      "grad_norm": 0.9522593021392822,
      "learning_rate": 3.1529499666800965e-07,
      "loss": 0.8259,
      "step": 4373
    },
    {
      "epoch": 0.9453209422952237,
      "grad_norm": 0.997139036655426,
      "learning_rate": 3.1282372551828975e-07,
      "loss": 0.8049,
      "step": 4374
    },
    {
      "epoch": 0.9455370650529501,
      "grad_norm": 1.0258209705352783,
      "learning_rate": 3.10362101039432e-07,
      "loss": 0.8842,
      "step": 4375
    },
    {
      "epoch": 0.9457531878106764,
      "grad_norm": 1.07490873336792,
      "learning_rate": 3.0791012443762924e-07,
      "loss": 0.9115,
      "step": 4376
    },
    {
      "epoch": 0.9459693105684028,
      "grad_norm": 1.0216132402420044,
      "learning_rate": 3.054677969143449e-07,
      "loss": 0.8965,
      "step": 4377
    },
    {
      "epoch": 0.9461854333261293,
      "grad_norm": 0.8703557252883911,
      "learning_rate": 3.030351196663128e-07,
      "loss": 0.7417,
      "step": 4378
    },
    {
      "epoch": 0.9464015560838557,
      "grad_norm": 0.983020544052124,
      "learning_rate": 3.0061209388553945e-07,
      "loss": 0.9063,
      "step": 4379
    },
    {
      "epoch": 0.946617678841582,
      "grad_norm": 0.9806068539619446,
      "learning_rate": 2.981987207592996e-07,
      "loss": 0.8552,
      "step": 4380
    },
    {
      "epoch": 0.9468338015993084,
      "grad_norm": 0.9600115418434143,
      "learning_rate": 2.9579500147014496e-07,
      "loss": 0.8382,
      "step": 4381
    },
    {
      "epoch": 0.9470499243570348,
      "grad_norm": 0.8612217307090759,
      "learning_rate": 2.93400937195889e-07,
      "loss": 0.8051,
      "step": 4382
    },
    {
      "epoch": 0.9472660471147611,
      "grad_norm": 1.0909043550491333,
      "learning_rate": 2.9101652910961785e-07,
      "loss": 0.847,
      "step": 4383
    },
    {
      "epoch": 0.9474821698724876,
      "grad_norm": 0.9872725605964661,
      "learning_rate": 2.88641778379688e-07,
      "loss": 0.8161,
      "step": 4384
    },
    {
      "epoch": 0.947698292630214,
      "grad_norm": 0.8272393941879272,
      "learning_rate": 2.8627668616972194e-07,
      "loss": 0.865,
      "step": 4385
    },
    {
      "epoch": 0.9479144153879403,
      "grad_norm": 1.1792147159576416,
      "learning_rate": 2.83921253638606e-07,
      "loss": 0.978,
      "step": 4386
    },
    {
      "epoch": 0.9481305381456667,
      "grad_norm": 1.021734595298767,
      "learning_rate": 2.815754819404992e-07,
      "loss": 0.9533,
      "step": 4387
    },
    {
      "epoch": 0.9483466609033931,
      "grad_norm": 0.8528704643249512,
      "learning_rate": 2.7923937222482436e-07,
      "loss": 0.8307,
      "step": 4388
    },
    {
      "epoch": 0.9485627836611196,
      "grad_norm": 0.9459760189056396,
      "learning_rate": 2.7691292563627016e-07,
      "loss": 0.8365,
      "step": 4389
    },
    {
      "epoch": 0.9487789064188459,
      "grad_norm": 0.9810954332351685,
      "learning_rate": 2.74596143314787e-07,
      "loss": 0.8873,
      "step": 4390
    },
    {
      "epoch": 0.9489950291765723,
      "grad_norm": 0.9510730504989624,
      "learning_rate": 2.7228902639559575e-07,
      "loss": 0.7129,
      "step": 4391
    },
    {
      "epoch": 0.9492111519342987,
      "grad_norm": 1.1085160970687866,
      "learning_rate": 2.699915760091787e-07,
      "loss": 0.8964,
      "step": 4392
    },
    {
      "epoch": 0.949427274692025,
      "grad_norm": 1.085143804550171,
      "learning_rate": 2.6770379328127983e-07,
      "loss": 0.7079,
      "step": 4393
    },
    {
      "epoch": 0.9496433974497515,
      "grad_norm": 0.9611392617225647,
      "learning_rate": 2.654256793329069e-07,
      "loss": 0.9965,
      "step": 4394
    },
    {
      "epoch": 0.9498595202074779,
      "grad_norm": 1.0482733249664307,
      "learning_rate": 2.6315723528033133e-07,
      "loss": 1.0366,
      "step": 4395
    },
    {
      "epoch": 0.9500756429652042,
      "grad_norm": 0.9436819553375244,
      "learning_rate": 2.6089846223508853e-07,
      "loss": 1.0193,
      "step": 4396
    },
    {
      "epoch": 0.9502917657229306,
      "grad_norm": 1.0251249074935913,
      "learning_rate": 2.5864936130396647e-07,
      "loss": 0.8849,
      "step": 4397
    },
    {
      "epoch": 0.950507888480657,
      "grad_norm": 1.0731300115585327,
      "learning_rate": 2.564099335890191e-07,
      "loss": 0.9058,
      "step": 4398
    },
    {
      "epoch": 0.9507240112383833,
      "grad_norm": 1.1696685552597046,
      "learning_rate": 2.5418018018756876e-07,
      "loss": 0.8267,
      "step": 4399
    },
    {
      "epoch": 0.9509401339961098,
      "grad_norm": 1.0380321741104126,
      "learning_rate": 2.519601021921814e-07,
      "loss": 0.9227,
      "step": 4400
    },
    {
      "epoch": 0.9511562567538362,
      "grad_norm": 1.0092154741287231,
      "learning_rate": 2.497497006906957e-07,
      "loss": 0.9168,
      "step": 4401
    },
    {
      "epoch": 0.9513723795115626,
      "grad_norm": 1.201551079750061,
      "learning_rate": 2.4754897676619647e-07,
      "loss": 0.8225,
      "step": 4402
    },
    {
      "epoch": 0.9515885022692889,
      "grad_norm": 0.9235658645629883,
      "learning_rate": 2.4535793149704114e-07,
      "loss": 0.8592,
      "step": 4403
    },
    {
      "epoch": 0.9518046250270153,
      "grad_norm": 1.0101706981658936,
      "learning_rate": 2.4317656595683305e-07,
      "loss": 0.9089,
      "step": 4404
    },
    {
      "epoch": 0.9520207477847418,
      "grad_norm": 0.9426020979881287,
      "learning_rate": 2.41004881214435e-07,
      "loss": 0.8982,
      "step": 4405
    },
    {
      "epoch": 0.9522368705424681,
      "grad_norm": 0.9659827947616577,
      "learning_rate": 2.3884287833396915e-07,
      "loss": 0.8039,
      "step": 4406
    },
    {
      "epoch": 0.9524529933001945,
      "grad_norm": 0.9621565937995911,
      "learning_rate": 2.3669055837481247e-07,
      "loss": 0.9724,
      "step": 4407
    },
    {
      "epoch": 0.9526691160579209,
      "grad_norm": 0.9790732860565186,
      "learning_rate": 2.3454792239159474e-07,
      "loss": 0.8141,
      "step": 4408
    },
    {
      "epoch": 0.9528852388156472,
      "grad_norm": 0.9651133418083191,
      "learning_rate": 2.32414971434205e-07,
      "loss": 0.8466,
      "step": 4409
    },
    {
      "epoch": 0.9531013615733737,
      "grad_norm": 0.9662875533103943,
      "learning_rate": 2.3029170654778277e-07,
      "loss": 1.0337,
      "step": 4410
    },
    {
      "epoch": 0.9533174843311001,
      "grad_norm": 1.0735738277435303,
      "learning_rate": 2.2817812877272471e-07,
      "loss": 0.9644,
      "step": 4411
    },
    {
      "epoch": 0.9535336070888265,
      "grad_norm": 1.1095219850540161,
      "learning_rate": 2.2607423914467575e-07,
      "loss": 0.7762,
      "step": 4412
    },
    {
      "epoch": 0.9537497298465528,
      "grad_norm": 1.1492083072662354,
      "learning_rate": 2.239800386945401e-07,
      "loss": 1.0877,
      "step": 4413
    },
    {
      "epoch": 0.9539658526042792,
      "grad_norm": 1.0608298778533936,
      "learning_rate": 2.2189552844847027e-07,
      "loss": 0.8082,
      "step": 4414
    },
    {
      "epoch": 0.9541819753620057,
      "grad_norm": 1.0046215057373047,
      "learning_rate": 2.1982070942786927e-07,
      "loss": 1.0174,
      "step": 4415
    },
    {
      "epoch": 0.954398098119732,
      "grad_norm": 1.0800167322158813,
      "learning_rate": 2.1775558264939488e-07,
      "loss": 0.997,
      "step": 4416
    },
    {
      "epoch": 0.9546142208774584,
      "grad_norm": 1.068735122680664,
      "learning_rate": 2.1570014912495773e-07,
      "loss": 0.902,
      "step": 4417
    },
    {
      "epoch": 0.9548303436351848,
      "grad_norm": 0.9986612796783447,
      "learning_rate": 2.1365440986171215e-07,
      "loss": 0.9912,
      "step": 4418
    },
    {
      "epoch": 0.9550464663929111,
      "grad_norm": 1.009142518043518,
      "learning_rate": 2.1161836586206742e-07,
      "loss": 0.7025,
      "step": 4419
    },
    {
      "epoch": 0.9552625891506376,
      "grad_norm": 0.9367358088493347,
      "learning_rate": 2.0959201812367658e-07,
      "loss": 0.8448,
      "step": 4420
    },
    {
      "epoch": 0.955478711908364,
      "grad_norm": 0.9901167154312134,
      "learning_rate": 2.0757536763944985e-07,
      "loss": 0.9174,
      "step": 4421
    },
    {
      "epoch": 0.9556948346660903,
      "grad_norm": 1.069323182106018,
      "learning_rate": 2.0556841539753903e-07,
      "loss": 0.8929,
      "step": 4422
    },
    {
      "epoch": 0.9559109574238167,
      "grad_norm": 0.8847269415855408,
      "learning_rate": 2.0357116238134633e-07,
      "loss": 0.8808,
      "step": 4423
    },
    {
      "epoch": 0.9561270801815431,
      "grad_norm": 0.9327659606933594,
      "learning_rate": 2.0158360956952004e-07,
      "loss": 0.959,
      "step": 4424
    },
    {
      "epoch": 0.9563432029392696,
      "grad_norm": 1.008876085281372,
      "learning_rate": 1.9960575793595893e-07,
      "loss": 0.8631,
      "step": 4425
    },
    {
      "epoch": 0.9565593256969959,
      "grad_norm": 0.979472815990448,
      "learning_rate": 1.976376084498055e-07,
      "loss": 0.7011,
      "step": 4426
    },
    {
      "epoch": 0.9567754484547223,
      "grad_norm": 0.9732769727706909,
      "learning_rate": 1.9567916207544612e-07,
      "loss": 0.7911,
      "step": 4427
    },
    {
      "epoch": 0.9569915712124487,
      "grad_norm": 1.0720874071121216,
      "learning_rate": 1.9373041977251762e-07,
      "loss": 0.8465,
      "step": 4428
    },
    {
      "epoch": 0.957207693970175,
      "grad_norm": 1.0365686416625977,
      "learning_rate": 1.9179138249589836e-07,
      "loss": 1.0038,
      "step": 4429
    },
    {
      "epoch": 0.9574238167279014,
      "grad_norm": 1.026694655418396,
      "learning_rate": 1.8986205119571055e-07,
      "loss": 0.7158,
      "step": 4430
    },
    {
      "epoch": 0.9576399394856279,
      "grad_norm": 1.1193565130233765,
      "learning_rate": 1.8794242681732243e-07,
      "loss": 0.9759,
      "step": 4431
    },
    {
      "epoch": 0.9578560622433542,
      "grad_norm": 0.900276243686676,
      "learning_rate": 1.8603251030134606e-07,
      "loss": 0.7723,
      "step": 4432
    },
    {
      "epoch": 0.9580721850010806,
      "grad_norm": 0.9539914727210999,
      "learning_rate": 1.8413230258363946e-07,
      "loss": 0.8152,
      "step": 4433
    },
    {
      "epoch": 0.958288307758807,
      "grad_norm": 0.9081903696060181,
      "learning_rate": 1.8224180459529338e-07,
      "loss": 0.8354,
      "step": 4434
    },
    {
      "epoch": 0.9585044305165334,
      "grad_norm": 0.9369721412658691,
      "learning_rate": 1.8036101726265133e-07,
      "loss": 0.7079,
      "step": 4435
    },
    {
      "epoch": 0.9587205532742598,
      "grad_norm": 1.1081403493881226,
      "learning_rate": 1.7848994150729825e-07,
      "loss": 0.8549,
      "step": 4436
    },
    {
      "epoch": 0.9589366760319862,
      "grad_norm": 0.9961804747581482,
      "learning_rate": 1.7662857824604972e-07,
      "loss": 0.897,
      "step": 4437
    },
    {
      "epoch": 0.9591527987897126,
      "grad_norm": 0.8851146697998047,
      "learning_rate": 1.747769283909717e-07,
      "loss": 0.8173,
      "step": 4438
    },
    {
      "epoch": 0.9593689215474389,
      "grad_norm": 1.0285462141036987,
      "learning_rate": 1.7293499284937177e-07,
      "loss": 0.8903,
      "step": 4439
    },
    {
      "epoch": 0.9595850443051653,
      "grad_norm": 1.2063196897506714,
      "learning_rate": 1.7110277252379238e-07,
      "loss": 0.8745,
      "step": 4440
    },
    {
      "epoch": 0.9598011670628918,
      "grad_norm": 0.9206846952438354,
      "learning_rate": 1.692802683120154e-07,
      "loss": 0.9445,
      "step": 4441
    },
    {
      "epoch": 0.9600172898206181,
      "grad_norm": 1.0168124437332153,
      "learning_rate": 1.6746748110706422e-07,
      "loss": 0.672,
      "step": 4442
    },
    {
      "epoch": 0.9602334125783445,
      "grad_norm": 1.0631967782974243,
      "learning_rate": 1.656644117972017e-07,
      "loss": 0.8407,
      "step": 4443
    },
    {
      "epoch": 0.9604495353360709,
      "grad_norm": 1.1555129289627075,
      "learning_rate": 1.6387106126592778e-07,
      "loss": 0.8799,
      "step": 4444
    },
    {
      "epoch": 0.9606656580937972,
      "grad_norm": 0.9522283673286438,
      "learning_rate": 1.620874303919795e-07,
      "loss": 0.6942,
      "step": 4445
    },
    {
      "epoch": 0.9608817808515236,
      "grad_norm": 1.115195870399475,
      "learning_rate": 1.603135200493311e-07,
      "loss": 0.8846,
      "step": 4446
    },
    {
      "epoch": 0.9610979036092501,
      "grad_norm": 0.9391319155693054,
      "learning_rate": 1.5854933110719616e-07,
      "loss": 0.9165,
      "step": 4447
    },
    {
      "epoch": 0.9613140263669765,
      "grad_norm": 0.9205000996589661,
      "learning_rate": 1.56794864430021e-07,
      "loss": 0.9234,
      "step": 4448
    },
    {
      "epoch": 0.9615301491247028,
      "grad_norm": 1.1516467332839966,
      "learning_rate": 1.5505012087749126e-07,
      "loss": 1.0175,
      "step": 4449
    },
    {
      "epoch": 0.9617462718824292,
      "grad_norm": 1.1859296560287476,
      "learning_rate": 1.5331510130452752e-07,
      "loss": 1.2111,
      "step": 4450
    },
    {
      "epoch": 0.9619623946401556,
      "grad_norm": 1.063227653503418,
      "learning_rate": 1.515898065612853e-07,
      "loss": 0.9176,
      "step": 4451
    },
    {
      "epoch": 0.962178517397882,
      "grad_norm": 0.9186780452728271,
      "learning_rate": 1.49874237493155e-07,
      "loss": 0.759,
      "step": 4452
    },
    {
      "epoch": 0.9623946401556084,
      "grad_norm": 0.9617912769317627,
      "learning_rate": 1.4816839494076197e-07,
      "loss": 0.785,
      "step": 4453
    },
    {
      "epoch": 0.9626107629133348,
      "grad_norm": 1.1196167469024658,
      "learning_rate": 1.4647227973996425e-07,
      "loss": 0.8965,
      "step": 4454
    },
    {
      "epoch": 0.9628268856710611,
      "grad_norm": 0.9806433916091919,
      "learning_rate": 1.4478589272185483e-07,
      "loss": 0.8639,
      "step": 4455
    },
    {
      "epoch": 0.9630430084287875,
      "grad_norm": 0.9332010746002197,
      "learning_rate": 1.4310923471275717e-07,
      "loss": 0.9386,
      "step": 4456
    },
    {
      "epoch": 0.963259131186514,
      "grad_norm": 1.1104164123535156,
      "learning_rate": 1.4144230653423408e-07,
      "loss": 0.903,
      "step": 4457
    },
    {
      "epoch": 0.9634752539442403,
      "grad_norm": 0.959007203578949,
      "learning_rate": 1.3978510900307441e-07,
      "loss": 1.0107,
      "step": 4458
    },
    {
      "epoch": 0.9636913767019667,
      "grad_norm": 1.015206217765808,
      "learning_rate": 1.3813764293130194e-07,
      "loss": 0.8919,
      "step": 4459
    },
    {
      "epoch": 0.9639074994596931,
      "grad_norm": 0.9340243935585022,
      "learning_rate": 1.3649990912616873e-07,
      "loss": 0.7884,
      "step": 4460
    },
    {
      "epoch": 0.9641236222174195,
      "grad_norm": 0.8546795845031738,
      "learning_rate": 1.3487190839016394e-07,
      "loss": 0.7857,
      "step": 4461
    },
    {
      "epoch": 0.9643397449751459,
      "grad_norm": 1.1241455078125,
      "learning_rate": 1.3325364152100063e-07,
      "loss": 0.9149,
      "step": 4462
    },
    {
      "epoch": 0.9645558677328723,
      "grad_norm": 1.0332034826278687,
      "learning_rate": 1.3164510931162888e-07,
      "loss": 0.9935,
      "step": 4463
    },
    {
      "epoch": 0.9647719904905987,
      "grad_norm": 0.8962178826332092,
      "learning_rate": 1.3004631255022492e-07,
      "loss": 0.8437,
      "step": 4464
    },
    {
      "epoch": 0.964988113248325,
      "grad_norm": 0.8388503193855286,
      "learning_rate": 1.2845725202019322e-07,
      "loss": 0.7117,
      "step": 4465
    },
    {
      "epoch": 0.9652042360060514,
      "grad_norm": 1.0348695516586304,
      "learning_rate": 1.268779285001731e-07,
      "loss": 0.9797,
      "step": 4466
    },
    {
      "epoch": 0.9654203587637779,
      "grad_norm": 0.9365739822387695,
      "learning_rate": 1.2530834276402782e-07,
      "loss": 0.8525,
      "step": 4467
    },
    {
      "epoch": 0.9656364815215042,
      "grad_norm": 0.996942937374115,
      "learning_rate": 1.23748495580851e-07,
      "loss": 0.997,
      "step": 4468
    },
    {
      "epoch": 0.9658526042792306,
      "grad_norm": 1.0092296600341797,
      "learning_rate": 1.2219838771496462e-07,
      "loss": 0.8307,
      "step": 4469
    },
    {
      "epoch": 0.966068727036957,
      "grad_norm": 1.114109992980957,
      "learning_rate": 1.2065801992591663e-07,
      "loss": 0.9976,
      "step": 4470
    },
    {
      "epoch": 0.9662848497946834,
      "grad_norm": 1.0005512237548828,
      "learning_rate": 1.1912739296848552e-07,
      "loss": 0.99,
      "step": 4471
    },
    {
      "epoch": 0.9665009725524097,
      "grad_norm": 0.9434089064598083,
      "learning_rate": 1.1760650759267356e-07,
      "loss": 0.797,
      "step": 4472
    },
    {
      "epoch": 0.9667170953101362,
      "grad_norm": 0.9649157524108887,
      "learning_rate": 1.1609536454371129e-07,
      "loss": 0.97,
      "step": 4473
    },
    {
      "epoch": 0.9669332180678626,
      "grad_norm": 0.8413819670677185,
      "learning_rate": 1.1459396456205307e-07,
      "loss": 0.8336,
      "step": 4474
    },
    {
      "epoch": 0.9671493408255889,
      "grad_norm": 1.0013097524642944,
      "learning_rate": 1.1310230838338598e-07,
      "loss": 0.9761,
      "step": 4475
    },
    {
      "epoch": 0.9673654635833153,
      "grad_norm": 0.9233847260475159,
      "learning_rate": 1.1162039673861646e-07,
      "loss": 0.7964,
      "step": 4476
    },
    {
      "epoch": 0.9675815863410417,
      "grad_norm": 1.0927408933639526,
      "learning_rate": 1.10148230353877e-07,
      "loss": 0.9503,
      "step": 4477
    },
    {
      "epoch": 0.967797709098768,
      "grad_norm": 1.0073485374450684,
      "learning_rate": 1.0868580995052392e-07,
      "loss": 0.8874,
      "step": 4478
    },
    {
      "epoch": 0.9680138318564945,
      "grad_norm": 0.8882655501365662,
      "learning_rate": 1.0723313624514398e-07,
      "loss": 0.8,
      "step": 4479
    },
    {
      "epoch": 0.9682299546142209,
      "grad_norm": 1.0216253995895386,
      "learning_rate": 1.0579020994954114e-07,
      "loss": 0.956,
      "step": 4480
    },
    {
      "epoch": 0.9684460773719472,
      "grad_norm": 1.0553555488586426,
      "learning_rate": 1.0435703177074763e-07,
      "loss": 0.797,
      "step": 4481
    },
    {
      "epoch": 0.9686622001296736,
      "grad_norm": 1.1059043407440186,
      "learning_rate": 1.0293360241101502e-07,
      "loss": 0.955,
      "step": 4482
    },
    {
      "epoch": 0.9688783228874001,
      "grad_norm": 0.9645715951919556,
      "learning_rate": 1.0151992256782317e-07,
      "loss": 0.8712,
      "step": 4483
    },
    {
      "epoch": 0.9690944456451265,
      "grad_norm": 0.9705997109413147,
      "learning_rate": 1.0011599293386909e-07,
      "loss": 0.7353,
      "step": 4484
    },
    {
      "epoch": 0.9693105684028528,
      "grad_norm": 0.9964491128921509,
      "learning_rate": 9.872181419707805e-08,
      "loss": 0.8744,
      "step": 4485
    },
    {
      "epoch": 0.9695266911605792,
      "grad_norm": 0.9052468538284302,
      "learning_rate": 9.733738704059247e-08,
      "loss": 0.8135,
      "step": 4486
    },
    {
      "epoch": 0.9697428139183056,
      "grad_norm": 1.006423830986023,
      "learning_rate": 9.596271214277864e-08,
      "loss": 0.9931,
      "step": 4487
    },
    {
      "epoch": 0.969958936676032,
      "grad_norm": 1.1249778270721436,
      "learning_rate": 9.459779017722436e-08,
      "loss": 1.0073,
      "step": 4488
    },
    {
      "epoch": 0.9701750594337584,
      "grad_norm": 0.9946759343147278,
      "learning_rate": 9.324262181273691e-08,
      "loss": 0.9562,
      "step": 4489
    },
    {
      "epoch": 0.9703911821914848,
      "grad_norm": 0.898918867111206,
      "learning_rate": 9.189720771334954e-08,
      "loss": 0.9048,
      "step": 4490
    },
    {
      "epoch": 0.9706073049492111,
      "grad_norm": 0.940396249294281,
      "learning_rate": 9.056154853830823e-08,
      "loss": 0.9321,
      "step": 4491
    },
    {
      "epoch": 0.9708234277069375,
      "grad_norm": 0.8641870021820068,
      "learning_rate": 8.923564494208281e-08,
      "loss": 0.8102,
      "step": 4492
    },
    {
      "epoch": 0.971039550464664,
      "grad_norm": 0.9652795791625977,
      "learning_rate": 8.791949757436691e-08,
      "loss": 0.6587,
      "step": 4493
    },
    {
      "epoch": 0.9712556732223904,
      "grad_norm": 0.9301003217697144,
      "learning_rate": 8.661310708006688e-08,
      "loss": 0.9088,
      "step": 4494
    },
    {
      "epoch": 0.9714717959801167,
      "grad_norm": 1.0240460634231567,
      "learning_rate": 8.531647409931065e-08,
      "loss": 0.892,
      "step": 4495
    },
    {
      "epoch": 0.9716879187378431,
      "grad_norm": 1.031960129737854,
      "learning_rate": 8.402959926744337e-08,
      "loss": 0.8466,
      "step": 4496
    },
    {
      "epoch": 0.9719040414955695,
      "grad_norm": 0.915630578994751,
      "learning_rate": 8.275248321503615e-08,
      "loss": 0.9407,
      "step": 4497
    },
    {
      "epoch": 0.9721201642532958,
      "grad_norm": 0.9745843410491943,
      "learning_rate": 8.148512656787066e-08,
      "loss": 0.8825,
      "step": 4498
    },
    {
      "epoch": 0.9723362870110223,
      "grad_norm": 1.051472783088684,
      "learning_rate": 8.022752994694793e-08,
      "loss": 0.7516,
      "step": 4499
    },
    {
      "epoch": 0.9725524097687487,
      "grad_norm": 1.0768318176269531,
      "learning_rate": 7.897969396848615e-08,
      "loss": 0.93,
      "step": 4500
    },
    {
      "epoch": 0.972768532526475,
      "grad_norm": 1.0499401092529297,
      "learning_rate": 7.77416192439251e-08,
      "loss": 0.9373,
      "step": 4501
    },
    {
      "epoch": 0.9729846552842014,
      "grad_norm": 0.964851975440979,
      "learning_rate": 7.651330637991506e-08,
      "loss": 0.7312,
      "step": 4502
    },
    {
      "epoch": 0.9732007780419278,
      "grad_norm": 0.9900359511375427,
      "learning_rate": 7.529475597833013e-08,
      "loss": 0.8702,
      "step": 4503
    },
    {
      "epoch": 0.9734169007996542,
      "grad_norm": 0.9830164313316345,
      "learning_rate": 7.408596863625717e-08,
      "loss": 0.9838,
      "step": 4504
    },
    {
      "epoch": 0.9736330235573806,
      "grad_norm": 1.0824828147888184,
      "learning_rate": 7.288694494599347e-08,
      "loss": 1.0791,
      "step": 4505
    },
    {
      "epoch": 0.973849146315107,
      "grad_norm": 1.0393846035003662,
      "learning_rate": 7.169768549506461e-08,
      "loss": 0.847,
      "step": 4506
    },
    {
      "epoch": 0.9740652690728334,
      "grad_norm": 1.1033649444580078,
      "learning_rate": 7.051819086620004e-08,
      "loss": 0.9154,
      "step": 4507
    },
    {
      "epoch": 0.9742813918305597,
      "grad_norm": 0.9521198868751526,
      "learning_rate": 6.934846163735298e-08,
      "loss": 0.8702,
      "step": 4508
    },
    {
      "epoch": 0.9744975145882862,
      "grad_norm": 1.0471328496932983,
      "learning_rate": 6.818849838168718e-08,
      "loss": 0.7826,
      "step": 4509
    },
    {
      "epoch": 0.9747136373460126,
      "grad_norm": 0.9011777639389038,
      "learning_rate": 6.703830166758129e-08,
      "loss": 0.8816,
      "step": 4510
    },
    {
      "epoch": 0.9749297601037389,
      "grad_norm": 0.955214262008667,
      "learning_rate": 6.589787205862896e-08,
      "loss": 1.0,
      "step": 4511
    },
    {
      "epoch": 0.9751458828614653,
      "grad_norm": 0.938822865486145,
      "learning_rate": 6.476721011363873e-08,
      "loss": 0.9094,
      "step": 4512
    },
    {
      "epoch": 0.9753620056191917,
      "grad_norm": 1.0690059661865234,
      "learning_rate": 6.364631638663188e-08,
      "loss": 0.9433,
      "step": 4513
    },
    {
      "epoch": 0.975578128376918,
      "grad_norm": 0.9920691847801208,
      "learning_rate": 6.253519142684239e-08,
      "loss": 0.9111,
      "step": 4514
    },
    {
      "epoch": 0.9757942511346445,
      "grad_norm": 0.9803416132926941,
      "learning_rate": 6.143383577872142e-08,
      "loss": 0.8739,
      "step": 4515
    },
    {
      "epoch": 0.9760103738923709,
      "grad_norm": 0.9745467305183411,
      "learning_rate": 6.034224998193061e-08,
      "loss": 0.8991,
      "step": 4516
    },
    {
      "epoch": 0.9762264966500973,
      "grad_norm": 0.9180235266685486,
      "learning_rate": 5.926043457134212e-08,
      "loss": 0.8453,
      "step": 4517
    },
    {
      "epoch": 0.9764426194078236,
      "grad_norm": 0.8540080189704895,
      "learning_rate": 5.818839007704524e-08,
      "loss": 0.9471,
      "step": 4518
    },
    {
      "epoch": 0.97665874216555,
      "grad_norm": 0.9433836340904236,
      "learning_rate": 5.712611702433757e-08,
      "loss": 0.8529,
      "step": 4519
    },
    {
      "epoch": 0.9768748649232765,
      "grad_norm": 1.2008123397827148,
      "learning_rate": 5.6073615933731616e-08,
      "loss": 1.0432,
      "step": 4520
    },
    {
      "epoch": 0.9770909876810028,
      "grad_norm": 1.162864327430725,
      "learning_rate": 5.503088732095041e-08,
      "loss": 1.0488,
      "step": 4521
    },
    {
      "epoch": 0.9773071104387292,
      "grad_norm": 0.9588613510131836,
      "learning_rate": 5.399793169692968e-08,
      "loss": 0.8168,
      "step": 4522
    },
    {
      "epoch": 0.9775232331964556,
      "grad_norm": 1.075270652770996,
      "learning_rate": 5.2974749567811235e-08,
      "loss": 0.9058,
      "step": 4523
    },
    {
      "epoch": 0.9777393559541819,
      "grad_norm": 1.1643694639205933,
      "learning_rate": 5.1961341434956233e-08,
      "loss": 1.0175,
      "step": 4524
    },
    {
      "epoch": 0.9779554787119084,
      "grad_norm": 1.2145328521728516,
      "learning_rate": 5.09577077949297e-08,
      "loss": 0.9433,
      "step": 4525
    },
    {
      "epoch": 0.9781716014696348,
      "grad_norm": 1.0082900524139404,
      "learning_rate": 4.996384913951158e-08,
      "loss": 0.859,
      "step": 4526
    },
    {
      "epoch": 0.9783877242273611,
      "grad_norm": 0.8812754154205322,
      "learning_rate": 4.897976595568787e-08,
      "loss": 1.0174,
      "step": 4527
    },
    {
      "epoch": 0.9786038469850875,
      "grad_norm": 0.8338526487350464,
      "learning_rate": 4.800545872566176e-08,
      "loss": 0.9185,
      "step": 4528
    },
    {
      "epoch": 0.9788199697428139,
      "grad_norm": 0.9946125745773315,
      "learning_rate": 4.704092792683579e-08,
      "loss": 0.8096,
      "step": 4529
    },
    {
      "epoch": 0.9790360925005404,
      "grad_norm": 1.0745145082473755,
      "learning_rate": 4.608617403183191e-08,
      "loss": 0.7382,
      "step": 4530
    },
    {
      "epoch": 0.9792522152582667,
      "grad_norm": 1.0827641487121582,
      "learning_rate": 4.51411975084759e-08,
      "loss": 0.8046,
      "step": 4531
    },
    {
      "epoch": 0.9794683380159931,
      "grad_norm": 0.9236524701118469,
      "learning_rate": 4.420599881980403e-08,
      "loss": 0.9964,
      "step": 4532
    },
    {
      "epoch": 0.9796844607737195,
      "grad_norm": 0.957859218120575,
      "learning_rate": 4.328057842406086e-08,
      "loss": 0.9137,
      "step": 4533
    },
    {
      "epoch": 0.9799005835314458,
      "grad_norm": 0.9804733991622925,
      "learning_rate": 4.236493677470144e-08,
      "loss": 0.8111,
      "step": 4534
    },
    {
      "epoch": 0.9801167062891722,
      "grad_norm": 0.9219115972518921,
      "learning_rate": 4.145907432038909e-08,
      "loss": 0.7578,
      "step": 4535
    },
    {
      "epoch": 0.9803328290468987,
      "grad_norm": 0.9382902383804321,
      "learning_rate": 4.056299150499099e-08,
      "loss": 0.9013,
      "step": 4536
    },
    {
      "epoch": 0.980548951804625,
      "grad_norm": 1.0483931303024292,
      "learning_rate": 3.967668876758701e-08,
      "loss": 1.0505,
      "step": 4537
    },
    {
      "epoch": 0.9807650745623514,
      "grad_norm": 0.9215896725654602,
      "learning_rate": 3.880016654246532e-08,
      "loss": 0.7008,
      "step": 4538
    },
    {
      "epoch": 0.9809811973200778,
      "grad_norm": 0.9627508521080017,
      "learning_rate": 3.793342525911792e-08,
      "loss": 0.9846,
      "step": 4539
    },
    {
      "epoch": 0.9811973200778042,
      "grad_norm": 1.0096752643585205,
      "learning_rate": 3.7076465342247295e-08,
      "loss": 1.0354,
      "step": 4540
    },
    {
      "epoch": 0.9814134428355306,
      "grad_norm": 0.8894267678260803,
      "learning_rate": 3.622928721175978e-08,
      "loss": 0.9714,
      "step": 4541
    },
    {
      "epoch": 0.981629565593257,
      "grad_norm": 1.1433912515640259,
      "learning_rate": 3.539189128277221e-08,
      "loss": 0.9653,
      "step": 4542
    },
    {
      "epoch": 0.9818456883509834,
      "grad_norm": 0.9559594392776489,
      "learning_rate": 3.4564277965607465e-08,
      "loss": 0.9179,
      "step": 4543
    },
    {
      "epoch": 0.9820618111087097,
      "grad_norm": 1.0474815368652344,
      "learning_rate": 3.374644766579227e-08,
      "loss": 0.9376,
      "step": 4544
    },
    {
      "epoch": 0.9822779338664361,
      "grad_norm": 0.8644964694976807,
      "learning_rate": 3.293840078406163e-08,
      "loss": 0.8446,
      "step": 4545
    },
    {
      "epoch": 0.9824940566241626,
      "grad_norm": 0.8857249021530151,
      "learning_rate": 3.214013771635882e-08,
      "loss": 0.7733,
      "step": 4546
    },
    {
      "epoch": 0.9827101793818889,
      "grad_norm": 1.0087493658065796,
      "learning_rate": 3.135165885382874e-08,
      "loss": 0.9894,
      "step": 4547
    },
    {
      "epoch": 0.9829263021396153,
      "grad_norm": 1.016663670539856,
      "learning_rate": 3.057296458282677e-08,
      "loss": 1.0015,
      "step": 4548
    },
    {
      "epoch": 0.9831424248973417,
      "grad_norm": 1.002447485923767,
      "learning_rate": 2.9804055284907705e-08,
      "loss": 0.9862,
      "step": 4549
    },
    {
      "epoch": 0.983358547655068,
      "grad_norm": 1.0246262550354004,
      "learning_rate": 2.9044931336836834e-08,
      "loss": 1.0012,
      "step": 4550
    },
    {
      "epoch": 0.9835746704127944,
      "grad_norm": 1.0020081996917725,
      "learning_rate": 2.8295593110583275e-08,
      "loss": 0.9286,
      "step": 4551
    },
    {
      "epoch": 0.9837907931705209,
      "grad_norm": 1.1388698816299438,
      "learning_rate": 2.7556040973322206e-08,
      "loss": 0.9182,
      "step": 4552
    },
    {
      "epoch": 0.9840069159282473,
      "grad_norm": 1.0084162950515747,
      "learning_rate": 2.6826275287430426e-08,
      "loss": 0.9106,
      "step": 4553
    },
    {
      "epoch": 0.9842230386859736,
      "grad_norm": 0.9532014727592468,
      "learning_rate": 2.6106296410493005e-08,
      "loss": 0.9627,
      "step": 4554
    },
    {
      "epoch": 0.9844391614437,
      "grad_norm": 0.9719927310943604,
      "learning_rate": 2.539610469529885e-08,
      "loss": 0.8488,
      "step": 4555
    },
    {
      "epoch": 0.9846552842014265,
      "grad_norm": 1.0258097648620605,
      "learning_rate": 2.4695700489836273e-08,
      "loss": 0.8759,
      "step": 4556
    },
    {
      "epoch": 0.9848714069591528,
      "grad_norm": 0.9381598234176636,
      "learning_rate": 2.400508413730629e-08,
      "loss": 0.8614,
      "step": 4557
    },
    {
      "epoch": 0.9850875297168792,
      "grad_norm": 1.0903583765029907,
      "learning_rate": 2.3324255976104883e-08,
      "loss": 0.9688,
      "step": 4558
    },
    {
      "epoch": 0.9853036524746056,
      "grad_norm": 1.0507482290267944,
      "learning_rate": 2.2653216339840746e-08,
      "loss": 0.9657,
      "step": 4559
    },
    {
      "epoch": 0.9855197752323319,
      "grad_norm": 1.034676194190979,
      "learning_rate": 2.1991965557317528e-08,
      "loss": 0.918,
      "step": 4560
    },
    {
      "epoch": 0.9857358979900583,
      "grad_norm": 0.9329960346221924,
      "learning_rate": 2.1340503952551606e-08,
      "loss": 0.8797,
      "step": 4561
    },
    {
      "epoch": 0.9859520207477848,
      "grad_norm": 1.1639207601547241,
      "learning_rate": 2.0698831844752077e-08,
      "loss": 0.9585,
      "step": 4562
    },
    {
      "epoch": 0.9861681435055112,
      "grad_norm": 1.0641400814056396,
      "learning_rate": 2.0066949548340765e-08,
      "loss": 0.8128,
      "step": 4563
    },
    {
      "epoch": 0.9863842662632375,
      "grad_norm": 1.0153011083602905,
      "learning_rate": 1.9444857372936666e-08,
      "loss": 0.9178,
      "step": 4564
    },
    {
      "epoch": 0.9866003890209639,
      "grad_norm": 0.8482317924499512,
      "learning_rate": 1.8832555623364836e-08,
      "loss": 0.6754,
      "step": 4565
    },
    {
      "epoch": 0.9868165117786903,
      "grad_norm": 0.8743635416030884,
      "learning_rate": 1.8230044599651942e-08,
      "loss": 0.8353,
      "step": 4566
    },
    {
      "epoch": 0.9870326345364167,
      "grad_norm": 0.9658658504486084,
      "learning_rate": 1.763732459702405e-08,
      "loss": 0.9213,
      "step": 4567
    },
    {
      "epoch": 0.9872487572941431,
      "grad_norm": 0.8978521823883057,
      "learning_rate": 1.705439590591551e-08,
      "loss": 0.8257,
      "step": 4568
    },
    {
      "epoch": 0.9874648800518695,
      "grad_norm": 1.0447510480880737,
      "learning_rate": 1.6481258811957836e-08,
      "loss": 1.0086,
      "step": 4569
    },
    {
      "epoch": 0.9876810028095958,
      "grad_norm": 1.058210015296936,
      "learning_rate": 1.59179135959886e-08,
      "loss": 1.0297,
      "step": 4570
    },
    {
      "epoch": 0.9878971255673222,
      "grad_norm": 1.0691936016082764,
      "learning_rate": 1.5364360534046997e-08,
      "loss": 1.0244,
      "step": 4571
    },
    {
      "epoch": 0.9881132483250487,
      "grad_norm": 1.0271050930023193,
      "learning_rate": 1.4820599897369393e-08,
      "loss": 0.9619,
      "step": 4572
    },
    {
      "epoch": 0.988329371082775,
      "grad_norm": 1.0300852060317993,
      "learning_rate": 1.4286631952398212e-08,
      "loss": 0.8862,
      "step": 4573
    },
    {
      "epoch": 0.9885454938405014,
      "grad_norm": 1.0740405321121216,
      "learning_rate": 1.3762456960777492e-08,
      "loss": 0.9585,
      "step": 4574
    },
    {
      "epoch": 0.9887616165982278,
      "grad_norm": 1.0555627346038818,
      "learning_rate": 1.3248075179352893e-08,
      "loss": 1.0198,
      "step": 4575
    },
    {
      "epoch": 0.9889777393559542,
      "grad_norm": 1.0466147661209106,
      "learning_rate": 1.2743486860165022e-08,
      "loss": 0.9095,
      "step": 4576
    },
    {
      "epoch": 0.9891938621136805,
      "grad_norm": 0.9894844889640808,
      "learning_rate": 1.224869225046721e-08,
      "loss": 0.8757,
      "step": 4577
    },
    {
      "epoch": 0.989409984871407,
      "grad_norm": 0.9214833974838257,
      "learning_rate": 1.1763691592705517e-08,
      "loss": 0.8196,
      "step": 4578
    },
    {
      "epoch": 0.9896261076291334,
      "grad_norm": 0.9439073801040649,
      "learning_rate": 1.1288485124529847e-08,
      "loss": 0.804,
      "step": 4579
    },
    {
      "epoch": 0.9898422303868597,
      "grad_norm": 1.0329656600952148,
      "learning_rate": 1.0823073078787271e-08,
      "loss": 0.8119,
      "step": 4580
    },
    {
      "epoch": 0.9900583531445861,
      "grad_norm": 1.309882402420044,
      "learning_rate": 1.0367455683530924e-08,
      "loss": 0.9939,
      "step": 4581
    },
    {
      "epoch": 0.9902744759023125,
      "grad_norm": 0.9725418090820312,
      "learning_rate": 9.921633162011113e-09,
      "loss": 0.8914,
      "step": 4582
    },
    {
      "epoch": 0.9904905986600389,
      "grad_norm": 0.8949552774429321,
      "learning_rate": 9.48560573268198e-09,
      "loss": 0.9486,
      "step": 4583
    },
    {
      "epoch": 0.9907067214177653,
      "grad_norm": 0.8219007253646851,
      "learning_rate": 9.059373609194844e-09,
      "loss": 0.6703,
      "step": 4584
    },
    {
      "epoch": 0.9909228441754917,
      "grad_norm": 0.9638029336929321,
      "learning_rate": 8.64293700040264e-09,
      "loss": 0.9538,
      "step": 4585
    },
    {
      "epoch": 0.991138966933218,
      "grad_norm": 0.9765580296516418,
      "learning_rate": 8.23629611035548e-09,
      "loss": 0.736,
      "step": 4586
    },
    {
      "epoch": 0.9913550896909444,
      "grad_norm": 1.000381350517273,
      "learning_rate": 7.839451138311748e-09,
      "loss": 0.8744,
      "step": 4587
    },
    {
      "epoch": 0.9915712124486709,
      "grad_norm": 0.983529269695282,
      "learning_rate": 7.45240227872035e-09,
      "loss": 0.9069,
      "step": 4588
    },
    {
      "epoch": 0.9917873352063973,
      "grad_norm": 1.205679178237915,
      "learning_rate": 7.075149721236241e-09,
      "loss": 0.9098,
      "step": 4589
    },
    {
      "epoch": 0.9920034579641236,
      "grad_norm": 1.0421596765518188,
      "learning_rate": 6.707693650711555e-09,
      "loss": 0.8997,
      "step": 4590
    },
    {
      "epoch": 0.99221958072185,
      "grad_norm": 0.8940469622612,
      "learning_rate": 6.350034247197823e-09,
      "loss": 0.9408,
      "step": 4591
    },
    {
      "epoch": 0.9924357034795764,
      "grad_norm": 1.1142728328704834,
      "learning_rate": 6.002171685950408e-09,
      "loss": 0.9996,
      "step": 4592
    },
    {
      "epoch": 0.9926518262373027,
      "grad_norm": 0.8785488605499268,
      "learning_rate": 5.664106137419634e-09,
      "loss": 0.9027,
      "step": 4593
    },
    {
      "epoch": 0.9928679489950292,
      "grad_norm": 0.8924729228019714,
      "learning_rate": 5.335837767255214e-09,
      "loss": 0.7117,
      "step": 4594
    },
    {
      "epoch": 0.9930840717527556,
      "grad_norm": 1.0885940790176392,
      "learning_rate": 5.017366736308482e-09,
      "loss": 0.9792,
      "step": 4595
    },
    {
      "epoch": 0.9933001945104819,
      "grad_norm": 0.9672080278396606,
      "learning_rate": 4.708693200632386e-09,
      "loss": 0.8026,
      "step": 4596
    },
    {
      "epoch": 0.9935163172682083,
      "grad_norm": 0.9822454452514648,
      "learning_rate": 4.409817311474829e-09,
      "loss": 0.9591,
      "step": 4597
    },
    {
      "epoch": 0.9937324400259347,
      "grad_norm": 0.9059059619903564,
      "learning_rate": 4.120739215280889e-09,
      "loss": 0.9964,
      "step": 4598
    },
    {
      "epoch": 0.9939485627836612,
      "grad_norm": 1.1094274520874023,
      "learning_rate": 3.841459053703922e-09,
      "loss": 0.8858,
      "step": 4599
    },
    {
      "epoch": 0.9941646855413875,
      "grad_norm": 1.0185402631759644,
      "learning_rate": 3.5719769635855773e-09,
      "loss": 0.8135,
      "step": 4600
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 4627,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "total_flos": 1.5423137193824092e+21,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}